From 1deab1b857d97a6eaa68c88634693d8b2dba93c0 Mon Sep 17 00:00:00 2001 From: Yann Dubois Date: Fri, 16 Aug 2024 16:46:56 -0700 Subject: [PATCH] [ENH] OpenAI use tools instead of functions (#391) * [ENH] more general `_requires_chatml` * [ENH] update OpenAI to using tools instead of functions --- src/alpaca_eval/decoders/openai.py | 8 ++- .../configs.yaml | 55 +++++++++++-------- .../alpaca_eval_gpt4_fn/configs.yaml | 49 ++++++++++------- .../alpaca_eval_gpt4_turbo_fn/configs.yaml | 49 ++++++++++------- .../chatgpt_fn/configs.yaml | 29 ++++++---- 5 files changed, 111 insertions(+), 79 deletions(-) diff --git a/src/alpaca_eval/decoders/openai.py b/src/alpaca_eval/decoders/openai.py index fb1cb46e..a61900b4 100644 --- a/src/alpaca_eval/decoders/openai.py +++ b/src/alpaca_eval/decoders/openai.py @@ -237,10 +237,15 @@ def _openai_completion_helper( else: choices[i]["text"] = choice.message.content + # backward compatibility for function calls # TODO: remove once function calls are removed if choice.message.function_call: # currently we only use function calls to get a JSON object => return raw text of json choices[i]["text"] = choice.message.function_call.arguments + if choice.message.tool_calls is not None: + # currently we only use function calls to get a JSON object => return raw text of json + choices[i]["text"] = choice.message.tool_calls[0].function.arguments + else: completion_batch = client.completions.create(prompt=prompt_batch, **curr_kwargs) choices = completion_batch.choices @@ -290,7 +295,8 @@ def _openai_completion_helper( def _requires_chatml(model: str) -> bool: """Whether a model requires the ChatML format.""" # TODO: this should ideally be an OpenAI function... Maybe it already exists? - return ("turbo" in model or "gpt-4" in model) and "instruct" not in model + not_chatml = ("instruct" in model) or ("gpt-3" in model and "turbo" not in model) or (model.startswith("text-")) + return not not_chatml def _get_price_per_token(model, price_per_token=None): diff --git a/src/alpaca_eval/evaluators_configs/alpaca_eval_cot_gpt4_turbo_fn/configs.yaml b/src/alpaca_eval/evaluators_configs/alpaca_eval_cot_gpt4_turbo_fn/configs.yaml index f6979d14..20615b68 100644 --- a/src/alpaca_eval/evaluators_configs/alpaca_eval_cot_gpt4_turbo_fn/configs.yaml +++ b/src/alpaca_eval/evaluators_configs/alpaca_eval_cot_gpt4_turbo_fn/configs.yaml @@ -5,30 +5,37 @@ alpaca_eval_cot_gpt4_turbo_fn: model_name: "gpt-4-1106-preview" max_tokens: 300 temperature: 0 - function_call: - name: "make_partial_leaderboard" - functions: - - name: "make_partial_leaderboard" - description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs." - parameters: - type: "object" - properties: - concise_explanation: - type: "string" - description: "A concise explanation for the ranking of the current models." - ordered_models: - type: "array" - description: "A list of models ordered by the preference of their outputs. The first model in the list has the best output." - items: - type: "object" - properties: - model: - type: "string" - description: "The name of the model" - rank: - type: "number" - description: "Order of preference of the model, 1 has the best output" - "required": [ "ordered_models" ] + tool_choice: + type: function + function: + name: "make_partial_leaderboard" + tools: + - type: function + function: + name: "make_partial_leaderboard" + description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs." + parameters: + type: "object" + properties: + concise_explanation: + type: "string" + description: "A concise explanation for the ranking of the current models." + ordered_models: + type: "array" + description: "A list of models ordered by the preference of their outputs. The first model in the list has the best output." + items: + type: "object" + properties: + model: + type: "string" + description: "The name of the model" + rank: + type: "number" + description: "Order of preference of the model, 1 has the best output" + additionalProperties: false + required: [ "model", "rank" ] + additionalProperties: false + required: [ "ordered_models" ] fn_completion_parser: "pipeline_meta_parser" completion_parser_kwargs: parsers_to_kwargs: diff --git a/src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_fn/configs.yaml b/src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_fn/configs.yaml index 1aa2b91d..f16b20e5 100644 --- a/src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_fn/configs.yaml +++ b/src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_fn/configs.yaml @@ -5,27 +5,34 @@ alpaca_eval_gpt4_fn: model_name: "gpt-4" max_tokens: 100 temperature: 0 - function_call: - name: "make_leaderboard" - functions: - - name: "make_leaderboard" - description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs." - parameters: - type: "object" - properties: - ordered_models: - type: "array" - description: "A list of models ordered by the preference of their outputs" - items: - type: "object" - properties: - model: - type: "string" - description: "The name of the model" - rank: - type: "number" - description: "Order of preference of the model, 1 has the best output" - "required": [ "ordered_models" ] + tool_choice: + type: function + function: + name: "make_leaderboard" + tools: + - type: function + function: + name: "make_leaderboard" + description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs." + parameters: + type: "object" + properties: + ordered_models: + type: "array" + description: "A list of models ordered by the preference of their outputs" + items: + type: "object" + properties: + model: + type: "string" + description: "The name of the model" + rank: + type: "number" + description: "Order of preference of the model, 1 has the best output" + additionalProperties: false + required: [ "model", "rank" ] + additionalProperties: false + required: [ "ordered_models" ] fn_completion_parser: "pipeline_meta_parser" completion_parser_kwargs: parsers_to_kwargs: diff --git a/src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_turbo_fn/configs.yaml b/src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_turbo_fn/configs.yaml index b7f6be3a..9896a327 100644 --- a/src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_turbo_fn/configs.yaml +++ b/src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_turbo_fn/configs.yaml @@ -5,27 +5,34 @@ alpaca_eval_gpt4_turbo_fn: model_name: "gpt-4-1106-preview" max_tokens: 100 temperature: 0 - function_call: - name: "make_partial_leaderboard" - functions: - - name: "make_partial_leaderboard" - description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs." - parameters: - type: "object" - properties: - ordered_models: - type: "array" - description: "A list of models ordered by the preference of their outputs. The first model in the list has the best output." - items: - type: "object" - properties: - model: - type: "string" - description: "The name of the model" - rank: - type: "number" - description: "Order of preference of the model, 1 has the best output" - "required": [ "ordered_models" ] + tool_choice: + type: function + function: + name: "make_partial_leaderboard" + tools: + - type: function + function: + name: "make_partial_leaderboard" + description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs." + parameters: + type: "object" + properties: + ordered_models: + type: "array" + description: "A list of models ordered by the preference of their outputs. The first model in the list has the best output." + items: + type: "object" + properties: + model: + type: "string" + description: "The name of the model" + rank: + type: "number" + description: "Order of preference of the model, 1 has the best output" + additionalProperties: false + required: [ "model", "rank" ] + additionalProperties: false + required: [ "ordered_models" ] fn_completion_parser: "pipeline_meta_parser" completion_parser_kwargs: parsers_to_kwargs: diff --git a/src/alpaca_eval/evaluators_configs/chatgpt_fn/configs.yaml b/src/alpaca_eval/evaluators_configs/chatgpt_fn/configs.yaml index b857a327..d200aa7b 100644 --- a/src/alpaca_eval/evaluators_configs/chatgpt_fn/configs.yaml +++ b/src/alpaca_eval/evaluators_configs/chatgpt_fn/configs.yaml @@ -5,18 +5,23 @@ chatgpt_fn: model_name: "gpt-3.5-turbo-16k-0613" max_tokens: 50 temperature: 0 - function_call: - name: "print_best_model" - functions: - - name: "print_best_model" - description: "Print the best model given the preferred output." - parameters: - type: "object" - properties: - best_output: - type: "string" - description: "Name of the best output, should be 'Output (a)' or 'Output (b)'" - "required": [ "best_output" ] + tool_choice: + type: function + function: + name: "print_best_model" + tools: + - type: function + function: + name: "print_best_model" + description: "Print the best model given the preferred output." + parameters: + type: "object" + properties: + best_output: + type: "string" + description: "Name of the best output, should be 'Output (a)' or 'Output (b)'" + additionalProperties: false + required: [ "best_output" ] completion_parser_kwargs: outputs_to_match: 1: '(?i)output \(a\)'