Skip to content

Commit

Permalink
Add Tulu 2 models to AlpacaEval (#171)
Browse files Browse the repository at this point in the history
Co-authored-by: Hamish Ivison <hamishi@allennlp-cirrascale-20.reviz.ai2.in>
  • Loading branch information
hamishivi and Hamish Ivison authored Nov 18, 2023
1 parent 3388c5a commit 90506bf
Show file tree
Hide file tree
Showing 11 changed files with 43,533 additions and 0 deletions.
9,662 changes: 9,662 additions & 0 deletions results/tulu-2-dpo-13b/annotation_alpaca_eval_gpt4.json

Large diffs are not rendered by default.

4,832 changes: 4,832 additions & 0 deletions results/tulu-2-dpo-13b/model_outputs.json

Large diffs are not rendered by default.

9,662 changes: 9,662 additions & 0 deletions results/tulu-2-dpo-70b/annotation_alpaca_eval_gpt4.json

Large diffs are not rendered by default.

4,832 changes: 4,832 additions & 0 deletions results/tulu-2-dpo-70b/model_outputs.json

Large diffs are not rendered by default.

9,662 changes: 9,662 additions & 0 deletions results/tulu-2-dpo-7b/annotation_alpaca_eval_gpt4.json

Large diffs are not rendered by default.

4,832 changes: 4,832 additions & 0 deletions results/tulu-2-dpo-7b/model_outputs.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
gpt4_turbo,97.69900497512438,0.5104849118311993,783,16,5,804,minimal,2049
xwinlm-70b-v0.1,95.56803995,0.724941926,765,35,1,801,community,1775
gpt4,95.27950311,0.71628144,761,32,12,805,minimal,1365
tulu-2-dpo-70b,95.03105590062113,0.7613100978662208,764,39,2,805,community,1418
llama-2-70b-chat-hf,92.66169154,0.911762258,743,57,4,804,minimal,1790
ultralm-13b-v2.0-best-of-16,92.29813665,0.940299807,743,62,0,805,community,1720
xwinlm-13b-v0.1,91.76029963,0.968139439,734,65,2,801,community,1894
Expand All @@ -16,6 +17,7 @@ wizardlm-13b-v1.2,89.16562889,1.090425466,714,85,4,803,community,1635
vicuna-33b-v1.3,88.99253731,1.095692216,713,86,5,804,verified,1479
claude,88.38509317,1.11448754,707,89,9,805,minimal,1082
causallm-14b,88.26086956521739,1.1163330437039891,705,89,11,805,community,1391
tulu-2-dpo-13b,88.12189054726367,1.1366163670057372,707,94,3,804,community,1614
humpback-llama2-70b,87.93532338,1.154547675,706,96,2,804,community,1822
xwinlm-7b-v0.1,87.82771536,1.154308696,703,97,1,801,community,1894
openbuddy-llama2-70b-v10.1,87.67123288,1.150841752,701,96,6,803,community,1077
Expand All @@ -24,6 +26,7 @@ openbuddy-llama-65b-v8,86.53366584,1.20291824,693,107,2,802,community,1162
wizardlm-13b-v1.1,86.31840796,1.206321783,692,108,4,804,community,1525
zephyr-7b-alpha,85.7587064676617,1.2285451680042003,688,113,3,804,community,1302
openchat-v2-13b,84.9689441,1.257297984,683,120,2,805,community,1564
tulu-2-dpo-7b,84.22360248447205,1.2855613371106336,678,127,0,805,community,1663
humpback-llama-65b,83.70646766,1.307103474,672,130,2,804,community,1269
ultralm-13b-v2.0,83.60248447,1.305781745,673,132,0,805,community,1399
recycled-wizardlm-7b-v2.0,83.47826086956522,1.3097444061303425,672,133,0,805,community,1583
Expand Down
15 changes: 15 additions & 0 deletions src/alpaca_eval/models_configs/tulu-2-dpo-13b/configs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
tulu-2-dpo-13b:
prompt_template: "tulu-2-dpo-70b/prompt.txt"
fn_completions: "vllm_local_completions"
completions_kwargs:
model_name: "allenai/tulu-2-dpo-13b"
model_kwargs:
torch_dtype: 'bfloat16'
tp: 2
max_new_tokens: 7500
temperature: 0.0
top_p: 1.0
do_sample: False
batch_size: 800
pretty_name: "Tulu 2+DPO 13B"
link: "https://huggingface.co/allenai/tulu-2-dpo-13b"
15 changes: 15 additions & 0 deletions src/alpaca_eval/models_configs/tulu-2-dpo-70b/configs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
tulu-2-dpo-70b:
prompt_template: "tulu-2-dpo-70b/prompt.txt"
fn_completions: "vllm_local_completions"
completions_kwargs:
model_name: "allenai/tulu-2-dpo-70b"
model_kwargs:
torch_dtype: 'bfloat16'
tp: 2 # you need at least 2 A100 80GB GPUs to run this model
max_new_tokens: 7500
temperature: 0.0
top_p: 1.0
do_sample: False
batch_size: 800
pretty_name: "Tulu 2+DPO 70B"
link: "https://huggingface.co/allenai/tulu-2-dpo-70b"
3 changes: 3 additions & 0 deletions src/alpaca_eval/models_configs/tulu-2-dpo-70b/prompt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<|user|>
{instruction}
<|assistant|>
15 changes: 15 additions & 0 deletions src/alpaca_eval/models_configs/tulu-2-dpo-7b/configs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
tulu-2-dpo-7b:
prompt_template: "tulu-2-dpo-70b/prompt.txt"
fn_completions: "vllm_local_completions"
completions_kwargs:
model_name: "allenai/tulu-2-dpo-7b"
model_kwargs:
torch_dtype: 'bfloat16'
tp: 2
max_new_tokens: 7500
temperature: 0.0
top_p: 1.0
do_sample: False
batch_size: 800
pretty_name: "Tulu 2+DPO 7B"
link: "https://huggingface.co/allenai/tulu-2-dpo-7b"

0 comments on commit 90506bf

Please sign in to comment.