Skip to content

Commit

Permalink
Bugfix/non azure open ai prompty (#3621)
Browse files Browse the repository at this point in the history
# Description

Fix evaluators to accept model configs of type
`OpenAIModelConfiguration`. Highlights of this change include:
- A bunch of typehint updates.
- The actual bugfix: Making a common azure-openai-config check only
apply to configs of type `AzureOpenAIModelConfiguration` instead of all
inputted configs.
- Secondary bugfix: Make `validate_interface` method properly handle
both singleton and list outputs of the `resolve_annotation` method.
- Added a test to the QA eval (since this implicitly calls all other
evaluators). This test is currently disabled since we don't have a
team-wide openAI API key to use for general testing yet, but I had it
passing locally when using a personal API key.
- Added an OpenAIConfiguration fixture that's also currently un-usable
for the same reason.
  • Loading branch information
MilesHolland authored Aug 8, 2024
1 parent 4ff9706 commit 1be5cb8
Show file tree
Hide file tree
Showing 12 changed files with 159 additions and 47 deletions.
6 changes: 5 additions & 1 deletion src/promptflow-core/promptflow/_core/tool_meta_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,11 @@ def validate_interface(ports, fields, tool_name, port_type, support_model_config
for k, v in ports.items():
if ValueType.OBJECT not in v.type:
continue
if resolve_annotation(fields[k]) not in supported_types:
annotations = resolve_annotation(fields[k])

if not isinstance(annotations, list):
annotations = [annotations]
if any([note not in supported_types for note in annotations]):
raise BadFunctionInterface(
message_format=(
"Parse interface for tool '{tool_name}' failed: "
Expand Down
1 change: 1 addition & 0 deletions src/promptflow-evals/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
### Bugs Fixed
- Large simulation was causing a jinja exception, this has been fixed.
- Fixed the issue where the relative data path was not working with the evaluate API when using multiple evaluators.
- Fixed evaluators to accept (non-Azure) Open AI Configs.

### Improvements
- Converted built-in evaluators to async-based implementation, leveraging async batch run for performance improvement. Introduced `PF_EVALS_BATCH_USE_ASYNC` environment variable to enable/disable async batch run, with the default set to False.
Expand Down
17 changes: 12 additions & 5 deletions src/promptflow-evals/promptflow/evals/evaluators/_chat/_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
import asyncio
import json
import logging
from typing import Dict, List
from typing import Dict, List, Union

import numpy as np

from promptflow._utils.async_utils import async_run_allowing_running_loop
from promptflow.core import AzureOpenAIModelConfiguration
from promptflow.core import AzureOpenAIModelConfiguration, OpenAIModelConfiguration

from .._coherence import CoherenceEvaluator
from .._fluency import FluencyEvaluator
Expand All @@ -22,7 +22,10 @@

class _AsyncChatEvaluator:
def __init__(
self, model_config: AzureOpenAIModelConfiguration, eval_last_turn: bool = False, parallel: bool = True
self,
model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
eval_last_turn: bool = False,
parallel: bool = True,
):
self._eval_last_turn = eval_last_turn
self._parallel = parallel
Expand Down Expand Up @@ -242,7 +245,8 @@ class ChatEvaluator:
Initialize a chat evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: ~promptflow.core.AzureOpenAIModelConfiguration
:type model_config: Union[~promptflow.core.AzureOpenAIModelConfiguration,
~promptflow.core.OpenAIModelConfiguration]
:param eval_last_turn: Set to True to evaluate only the most recent exchange in the dialogue,
focusing on the latest user inquiry and the assistant's corresponding response. Defaults to False
:type eval_last_turn: bool
Expand Down Expand Up @@ -289,7 +293,10 @@ class ChatEvaluator:
"""

def __init__(
self, model_config: AzureOpenAIModelConfiguration, eval_last_turn: bool = False, parallel: bool = True
self,
model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
eval_last_turn: bool = False,
parallel: bool = True,
):
self._async_evaluator = _AsyncChatEvaluator(model_config, eval_last_turn, parallel)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
import logging
import os
import re
from typing import Union

import numpy as np

from promptflow._utils.async_utils import async_run_allowing_running_loop
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration, OpenAIModelConfiguration

logger = logging.getLogger(__name__)

Expand All @@ -21,12 +22,17 @@


class _AsyncRetrievalChatEvaluator:
# Constants must be defined within eval's directory to be save/loadable
PROMPTY_FILE = "retrieval.prompty"
LLM_CALL_TIMEOUT = 600
DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"

def __init__(self, model_config: AzureOpenAIModelConfiguration):
if model_config.api_version is None:
model_config.api_version = "2024-02-15-preview"
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
if (
isinstance(model_config, AzureOpenAIModelConfiguration)
and (not hasattr(model_config, "api_version") or model_config.api_version) is None
):
model_config.api_version = self.DEFAULT_OPEN_API_VERSION

prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}

Expand Down Expand Up @@ -101,7 +107,8 @@ class RetrievalChatEvaluator:
Initialize an evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: ~promptflow.core.AzureOpenAIModelConfiguration
:type model_config: Union[~promptflow.core.AzureOpenAIModelConfiguration,
~promptflow.core.OpenAIModelConfiguration]
:return: A function that evaluates and generates metrics for "chat" scenario.
:rtype: Callable
**Usage**
Expand Down Expand Up @@ -134,7 +141,7 @@ class RetrievalChatEvaluator:
}
"""

def __init__(self, model_config: AzureOpenAIModelConfiguration):
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
self._async_evaluator = _AsyncRetrievalChatEvaluator(model_config)

def __call__(self, *, conversation, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

import os
import re
from typing import Union

import numpy as np

from promptflow._utils.async_utils import async_run_allowing_running_loop
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration, OpenAIModelConfiguration

try:
from ..._user_agent import USER_AGENT
Expand All @@ -17,12 +18,17 @@


class _AsyncCoherenceEvaluator:
# Constants must be defined within eval's directory to be save/loadable
PROMPTY_FILE = "coherence.prompty"
LLM_CALL_TIMEOUT = 600
DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"

def __init__(self, model_config: AzureOpenAIModelConfiguration):
if model_config.api_version is None:
model_config.api_version = "2024-02-15-preview"
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
if (
isinstance(model_config, AzureOpenAIModelConfiguration)
and (not hasattr(model_config, "api_version") or model_config.api_version) is None
):
model_config.api_version = self.DEFAULT_OPEN_API_VERSION

prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}

Expand Down Expand Up @@ -62,7 +68,8 @@ class CoherenceEvaluator:
Initialize a coherence evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: ~promptflow.core.AzureOpenAIModelConfiguration
:type model_config: Union[~promptflow.core.AzureOpenAIModelConfiguration,
~promptflow.core.OpenAIModelConfiguration]
**Usage**
Expand All @@ -82,7 +89,7 @@ class CoherenceEvaluator:
}
"""

def __init__(self, model_config: AzureOpenAIModelConfiguration):
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
self._async_evaluator = _AsyncCoherenceEvaluator(model_config)

def __call__(self, *, question: str, answer: str, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

import os
import re
from typing import Union

import numpy as np

from promptflow._utils.async_utils import async_run_allowing_running_loop
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration, OpenAIModelConfiguration

try:
from ..._user_agent import USER_AGENT
Expand All @@ -17,12 +18,17 @@


class _AsyncFluencyEvaluator:
# Constants must be defined within eval's directory to be save/loadable
PROMPTY_FILE = "fluency.prompty"
LLM_CALL_TIMEOUT = 600
DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"

def __init__(self, model_config: AzureOpenAIModelConfiguration):
if model_config.api_version is None:
model_config.api_version = "2024-02-15-preview"
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
if (
isinstance(model_config, AzureOpenAIModelConfiguration)
and (not hasattr(model_config, "api_version") or model_config.api_version) is None
):
model_config.api_version = self.DEFAULT_OPEN_API_VERSION

prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}

Expand Down Expand Up @@ -62,7 +68,8 @@ class FluencyEvaluator:
Initialize a fluency evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: ~promptflow.core.AzureOpenAIModelConfiguration
:type model_config: Union[~promptflow.core.AzureOpenAIModelConfiguration,
~promptflow.core.OpenAIModelConfiguration]
**Usage**
Expand All @@ -82,7 +89,7 @@ class FluencyEvaluator:
}
"""

def __init__(self, model_config: AzureOpenAIModelConfiguration):
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
self._async_evaluator = _AsyncFluencyEvaluator(model_config)

def __call__(self, *, question: str, answer: str, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

import os
import re
from typing import Union

import numpy as np

from promptflow._utils.async_utils import async_run_allowing_running_loop
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration, OpenAIModelConfiguration

try:
from ..._user_agent import USER_AGENT
Expand All @@ -17,12 +18,17 @@


class _AsyncGroundednessEvaluator:
# Constants must be defined within eval's directory to be save/loadable
PROMPTY_FILE = "groundedness.prompty"
LLM_CALL_TIMEOUT = 600
DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"

def __init__(self, model_config: AzureOpenAIModelConfiguration):
if model_config.api_version is None:
model_config.api_version = "2024-02-15-preview"
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
if (
isinstance(model_config, AzureOpenAIModelConfiguration)
and (not hasattr(model_config, "api_version") or model_config.api_version) is None
):
model_config.api_version = self.DEFAULT_OPEN_API_VERSION

prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}

Expand Down Expand Up @@ -62,7 +68,8 @@ class GroundednessEvaluator:
Initialize a groundedness evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: ~promptflow.core.AzureOpenAIModelConfiguration
:type model_config: Union[~promptflow.core.AzureOpenAIModelConfiguration,
~promptflow.core.OpenAIModelConfiguration]
**Usage**
Expand All @@ -83,7 +90,7 @@ class GroundednessEvaluator:
}
"""

def __init__(self, model_config: AzureOpenAIModelConfiguration):
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
self._async_evaluator = _AsyncGroundednessEvaluator(model_config)

def __call__(self, *, answer: str, context: str, **kwargs):
Expand Down
14 changes: 10 additions & 4 deletions src/promptflow-evals/promptflow/evals/evaluators/_qa/_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
# ---------------------------------------------------------

import asyncio
from typing import Union

from promptflow._utils.async_utils import async_run_allowing_running_loop
from promptflow.core import AzureOpenAIModelConfiguration
from promptflow.core import AzureOpenAIModelConfiguration, OpenAIModelConfiguration

from .._coherence import CoherenceEvaluator
from .._f1_score import F1ScoreEvaluator
Expand All @@ -16,7 +17,9 @@


class _AsyncQAEvaluator:
def __init__(self, model_config: AzureOpenAIModelConfiguration, parallel: bool = True):
def __init__(
self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration], parallel: bool = True
):
self._parallel = parallel

self._evaluators = [
Expand Down Expand Up @@ -59,7 +62,8 @@ class QAEvaluator:
Initialize a question-answer evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: ~promptflow.core.AzureOpenAIModelConfiguration
:type model_config: Union[~promptflow.core.AzureOpenAIModelConfiguration,
~promptflow.core.OpenAIModelConfiguration]
:return: A function that evaluates and generates metrics for "question-answering" scenario.
:rtype: Callable
Expand Down Expand Up @@ -89,7 +93,9 @@ class QAEvaluator:
}
"""

def __init__(self, model_config: AzureOpenAIModelConfiguration, parallel: bool = True):
def __init__(
self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration], parallel: bool = True
):
self._async_evaluator = _AsyncQAEvaluator(model_config, parallel)

def __call__(self, *, question: str, answer: str, context: str, ground_truth: str, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

import os
import re
from typing import Union

import numpy as np

from promptflow._utils.async_utils import async_run_allowing_running_loop
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration
from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration, OpenAIModelConfiguration

try:
from ..._user_agent import USER_AGENT
Expand All @@ -17,12 +18,17 @@


class _AsyncRelevanceEvaluator:
# Constants must be defined within eval's directory to be save/loadable
PROMPTY_FILE = "relevance.prompty"
LLM_CALL_TIMEOUT = 600
DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"

def __init__(self, model_config: AzureOpenAIModelConfiguration):
if model_config.api_version is None:
model_config.api_version = "2024-02-15-preview"
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
if (
isinstance(model_config, AzureOpenAIModelConfiguration)
and (not hasattr(model_config, "api_version") or model_config.api_version) is None
):
model_config.api_version = self.DEFAULT_OPEN_API_VERSION

prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}

Expand Down Expand Up @@ -65,7 +71,8 @@ class RelevanceEvaluator:
Initialize a relevance evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: ~promptflow.core.AzureOpenAIModelConfiguration
:type model_config: Union[~promptflow.core.AzureOpenAIModelConfiguration,
~promptflow.core.OpenAIModelConfiguration]
**Usage**
Expand All @@ -87,7 +94,7 @@ class RelevanceEvaluator:
}
"""

def __init__(self, model_config: AzureOpenAIModelConfiguration):
def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
self._async_evaluator = _AsyncRelevanceEvaluator(model_config)

def __call__(self, *, question: str, answer: str, context: str, **kwargs):
Expand Down
Loading

0 comments on commit 1be5cb8

Please sign in to comment.