Skip to content

Commit

Permalink
[ENH] OpenAI use tools instead of functions (#391)
Browse files Browse the repository at this point in the history
* [ENH] more general `_requires_chatml`

* [ENH] update OpenAI to using tools instead of functions
  • Loading branch information
YannDubs authored Aug 16, 2024
1 parent 9c46d20 commit 1deab1b
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 79 deletions.
8 changes: 7 additions & 1 deletion src/alpaca_eval/decoders/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,10 +237,15 @@ def _openai_completion_helper(
else:
choices[i]["text"] = choice.message.content

# backward compatibility for function calls # TODO: remove once function calls are removed
if choice.message.function_call:
# currently we only use function calls to get a JSON object => return raw text of json
choices[i]["text"] = choice.message.function_call.arguments

if choice.message.tool_calls is not None:
# currently we only use function calls to get a JSON object => return raw text of json
choices[i]["text"] = choice.message.tool_calls[0].function.arguments

else:
completion_batch = client.completions.create(prompt=prompt_batch, **curr_kwargs)
choices = completion_batch.choices
Expand Down Expand Up @@ -290,7 +295,8 @@ def _openai_completion_helper(
def _requires_chatml(model: str) -> bool:
"""Whether a model requires the ChatML format."""
# TODO: this should ideally be an OpenAI function... Maybe it already exists?
return ("turbo" in model or "gpt-4" in model) and "instruct" not in model
not_chatml = ("instruct" in model) or ("gpt-3" in model and "turbo" not in model) or (model.startswith("text-"))
return not not_chatml


def _get_price_per_token(model, price_per_token=None):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,37 @@ alpaca_eval_cot_gpt4_turbo_fn:
model_name: "gpt-4-1106-preview"
max_tokens: 300
temperature: 0
function_call:
name: "make_partial_leaderboard"
functions:
- name: "make_partial_leaderboard"
description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs."
parameters:
type: "object"
properties:
concise_explanation:
type: "string"
description: "A concise explanation for the ranking of the current models."
ordered_models:
type: "array"
description: "A list of models ordered by the preference of their outputs. The first model in the list has the best output."
items:
type: "object"
properties:
model:
type: "string"
description: "The name of the model"
rank:
type: "number"
description: "Order of preference of the model, 1 has the best output"
"required": [ "ordered_models" ]
tool_choice:
type: function
function:
name: "make_partial_leaderboard"
tools:
- type: function
function:
name: "make_partial_leaderboard"
description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs."
parameters:
type: "object"
properties:
concise_explanation:
type: "string"
description: "A concise explanation for the ranking of the current models."
ordered_models:
type: "array"
description: "A list of models ordered by the preference of their outputs. The first model in the list has the best output."
items:
type: "object"
properties:
model:
type: "string"
description: "The name of the model"
rank:
type: "number"
description: "Order of preference of the model, 1 has the best output"
additionalProperties: false
required: [ "model", "rank" ]
additionalProperties: false
required: [ "ordered_models" ]
fn_completion_parser: "pipeline_meta_parser"
completion_parser_kwargs:
parsers_to_kwargs:
Expand Down
49 changes: 28 additions & 21 deletions src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_fn/configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,34 @@ alpaca_eval_gpt4_fn:
model_name: "gpt-4"
max_tokens: 100
temperature: 0
function_call:
name: "make_leaderboard"
functions:
- name: "make_leaderboard"
description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs."
parameters:
type: "object"
properties:
ordered_models:
type: "array"
description: "A list of models ordered by the preference of their outputs"
items:
type: "object"
properties:
model:
type: "string"
description: "The name of the model"
rank:
type: "number"
description: "Order of preference of the model, 1 has the best output"
"required": [ "ordered_models" ]
tool_choice:
type: function
function:
name: "make_leaderboard"
tools:
- type: function
function:
name: "make_leaderboard"
description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs."
parameters:
type: "object"
properties:
ordered_models:
type: "array"
description: "A list of models ordered by the preference of their outputs"
items:
type: "object"
properties:
model:
type: "string"
description: "The name of the model"
rank:
type: "number"
description: "Order of preference of the model, 1 has the best output"
additionalProperties: false
required: [ "model", "rank" ]
additionalProperties: false
required: [ "ordered_models" ]
fn_completion_parser: "pipeline_meta_parser"
completion_parser_kwargs:
parsers_to_kwargs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,34 @@ alpaca_eval_gpt4_turbo_fn:
model_name: "gpt-4-1106-preview"
max_tokens: 100
temperature: 0
function_call:
name: "make_partial_leaderboard"
functions:
- name: "make_partial_leaderboard"
description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs."
parameters:
type: "object"
properties:
ordered_models:
type: "array"
description: "A list of models ordered by the preference of their outputs. The first model in the list has the best output."
items:
type: "object"
properties:
model:
type: "string"
description: "The name of the model"
rank:
type: "number"
description: "Order of preference of the model, 1 has the best output"
"required": [ "ordered_models" ]
tool_choice:
type: function
function:
name: "make_partial_leaderboard"
tools:
- type: function
function:
name: "make_partial_leaderboard"
description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs."
parameters:
type: "object"
properties:
ordered_models:
type: "array"
description: "A list of models ordered by the preference of their outputs. The first model in the list has the best output."
items:
type: "object"
properties:
model:
type: "string"
description: "The name of the model"
rank:
type: "number"
description: "Order of preference of the model, 1 has the best output"
additionalProperties: false
required: [ "model", "rank" ]
additionalProperties: false
required: [ "ordered_models" ]
fn_completion_parser: "pipeline_meta_parser"
completion_parser_kwargs:
parsers_to_kwargs:
Expand Down
29 changes: 17 additions & 12 deletions src/alpaca_eval/evaluators_configs/chatgpt_fn/configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,23 @@ chatgpt_fn:
model_name: "gpt-3.5-turbo-16k-0613"
max_tokens: 50
temperature: 0
function_call:
name: "print_best_model"
functions:
- name: "print_best_model"
description: "Print the best model given the preferred output."
parameters:
type: "object"
properties:
best_output:
type: "string"
description: "Name of the best output, should be 'Output (a)' or 'Output (b)'"
"required": [ "best_output" ]
tool_choice:
type: function
function:
name: "print_best_model"
tools:
- type: function
function:
name: "print_best_model"
description: "Print the best model given the preferred output."
parameters:
type: "object"
properties:
best_output:
type: "string"
description: "Name of the best output, should be 'Output (a)' or 'Output (b)'"
additionalProperties: false
required: [ "best_output" ]
completion_parser_kwargs:
outputs_to_match:
1: '(?i)output \(a\)'
Expand Down

0 comments on commit 1deab1b

Please sign in to comment.