Skip to content

Commit

Permalink
Add FuseChat-3.0 models to AlpacaEval (#426)
Browse files Browse the repository at this point in the history
  • Loading branch information
yangzy39 authored Dec 27, 2024
1 parent 0b4af76 commit 8bb6e57
Show file tree
Hide file tree
Showing 20 changed files with 64,540 additions and 0 deletions.
4,832 changes: 4,832 additions & 0 deletions results/FuseChat-Gemma-2-9B-Instruct/model_outputs.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

4,832 changes: 4,832 additions & 0 deletions results/FuseChat-Llama-3.1-8B-Instruct/model_outputs.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

4,832 changes: 4,832 additions & 0 deletions results/FuseChat-Llama-3.2-1B-Instruct/model_outputs.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

4,832 changes: 4,832 additions & 0 deletions results/FuseChat-Llama-3.2-3B-Instruct/model_outputs.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

4,832 changes: 4,832 additions & 0 deletions results/FuseChat-Qwen-2.5-7B-Instruct/model_outputs.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,20 @@ gemma-2-9b-it-WPO-HB,77.82503168985093,1.2355857177790277,640,163,2,805,79.62732
SelfMoA_gemma-2-9b-it-SimPO,71.9958856144492,1.3495341826849294,597,208,0,805,74.16149068322981,community,1930,75.04950944068965,0.4428706876009843
blendaxai-gm-l3-v35,73.41035740244067,1.254951147343878,607,196,2,805,75.527950310559,community,2186,73.37270365010379,0.6163911450738288
gemma-2-9b-it-SimPO,65.86422561532919,1.423459922555078,540,264,1,805,67.14285714285714,community,1833,72.3508446939842,0.5167873784867067
FuseChat-Gemma-2-9B-Instruct,70.49713534560247,1.3426390784895994,575,225,5,805,71.73913043478261,community,2155,70.18106263911686,0.5941187965717439
openpipe-moa-gpt-4-turbo-v1,63.15493451236265,1.422980098799326,515,283,7,805,64.40993788819875,community,1856,68.37866250336802,0.7309418614587613
gemma-2-9b-it-DPO,65.35922380122982,1.402802336467638,536,268,1,805,66.64596273291924,community,2016,67.6620382198043,0.6605613085864308
FuseChat-Llama-3.1-8B-Instruct,63.33158292362734,1.4225069834256892,518,286,1,805,64.40993788819875,community,2033,65.38623116037492,0.6668876066398686
Together-MoA,59.8688062333292,1.434305604543079,490,314,1,805,60.93167701863354,community,1825,65.37996976852163,0.7392392836781445
FuseChat-Qwen-2.5-7B-Instruct,64.64069997299381,1.4301369533298258,531,273,1,805,66.02484472049689,community,2173,63.58298649463735,0.6161348916427868
Llama3-PBM-Nova-70B,62.95129983494411,1.3965649883206293,512,293,0,805,63.60248447204969,community,2207,62.39078292806358,0.7630318008010619
Storm-7B-best-of-64,63.04099075186919,1.4253258915161846,519,286,0,805,64.472049689441,community,2340,61.63789557199839,
Together-MoA-Lite,56.593045622273294,1.4464848562244548,456,347,2,805,56.77018633540373,community,1968,59.1415240989275,0.7580510219326322
aligner-2b_gpt-4-turbo-2024-04-09,46.77089325668323,1.3378060774476594,371,417,17,805,40.18633540372671,community,1370,58.33130206276722,
gpt-4o-2024-05-13,51.32757578249279,1.4700094589795554,429,369,7,805,53.72670807453416,minimal,1873,57.45682883335095,
higgs-llama-3-70b-v2,68.63519246435168,1.3151765652301792,563,240,2,805,70.06211180124224,community,2657,56.76317433000503,0.8571649314205525
gpt-4-turbo-2024-04-09,46.11526538763708,1.474073957743638,370,426,9,805,46.52173913043478,minimal,1802,55.01530093647852,
FuseChat-Llama-3.2-3B-Instruct,51.29667710101864,1.482579367297701,424,378,3,805,52.85714285714286,community,1976,53.99883748344241,0.6945055858828797
SPPO-Gemma-2-9B-It-PairRM,48.23404468746583,1.4568887170812033,386,418,1,805,48.01242236024844,community,1803,53.96983730150777,0.6516507968188552
Llama-3-Instruct-8B-WPO-HB-v2,57.33198613024009,1.4953200715726744,469,336,0,805,58.26086956521739,community,2472,53.37264268894168,0.7120573420060313
claude-3-5-sonnet-20240620,40.56021409682828,1.4679655403720542,312,493,0,805,38.75776397515528,community,1488,52.36675427146999,
Expand Down Expand Up @@ -82,6 +86,7 @@ tulu-2-dpo-70b-ExPO,22.98061970610497,1.3591734082562228,184,620,1,805,22.919254
claude-instant-1.2,16.12739962159006,1.1341036838301686,120,682,3,805,15.093167701863356,community,1112,25.61225902543337,
Infinity-Instruct-3M-0613-Mistral-7B,15.747828130770788,1.1194852005680405,118,687,0,805,14.658385093167702,community,1180,25.501557794727287,
dbrx-instruct,18.44834898407453,1.255388020324377,150,655,0,805,18.633540372670808,verified,1450,25.37544974044448,
FuseChat-Llama-3.2-1B-Instruct,29.9219322658882,1.3934584328741797,233,570,2,805,29.068322981366464,community,2259,25.27098247880791,0.6406636457433237
claude-2.1,15.733506736409938,1.120315865445773,115,688,2,805,14.409937888198757,verified,1096,25.251943886133027,
Nanbeige2-8B-Chat,39.35450207219922,1.4524224245579649,323,480,2,805,40.24844720496895,community,2709,25.24207090175315,
xwinlm-70b-v0.1,21.812957073875776,1.230327447605842,166,635,4,805,20.869565217391305,community,1775,24.649686057119272,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,8 @@ NullModel,-1.0518971527519405,0.2538445948493148,1.9057926500734572
GPO-Llama-3-8B-Instruct-GPM-2B,-1.1688688988236986,0.7678817822697138,-0.4997466376902971
SPPO-Llama-3-8B-Instruct-GPM-2B,-1.2289746990068291,0.8046474033904255,-0.6767509934260389
Llama-3-Instruct-8B-RainbowPO,-1.3587935106099684,0.7600298380500641,0.1779421196386809
FuseChat-Gemma-2-9B-Instruct,-1.1543337259190067,0.6937176687992737,1.0978693063596836
FuseChat-Llama-3.1-8B-Instruct,-1.1271666967241551,0.7265708972653502,0.8583100053446140
FuseChat-Qwen-2.5-7B-Instruct,-0.9656160039317526,0.6283493169692116,0.7193794241118705
FuseChat-Llama-3.2-1B-Instruct,-1.3081165824298635,0.6708544960650634,-1.2396285670785026
FuseChat-Llama-3.2-3B-Instruct,-1.1236209478665597,0.6516838121662227,0.2637504763960864
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FuseChat-Gemma-2-9B-Instruct:
completions_kwargs:
model_name: FuseAI/FuseChat-Gemma-2-9B-Instruct
model_kwargs:
dtype: bfloat16
max_new_tokens: 4096
temperature: 0.5
top_p: 1.0
batch_size: 1000
use_beam_search: true
stop_token_ids:
- 1
- 107
fn_completions: vllm_local_completions
pretty_name: FuseChat-Gemma-2-9B-Instruct
prompt_template: FuseChat-Gemma-2-9B-Instruct/prompt.txt
link: https://huggingface.co/FuseAI/FuseChat-Gemma-2-9B-Instruct
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<start_of_turn>user
{instruction}<end_of_turn>
<start_of_turn>model
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
FuseChat-Llama-3.1-8B-Instruct:
completions_kwargs:
model_name: FuseAI/FuseChat-Llama-3.1-8B-Instruct
model_kwargs:
dtype: bfloat16
max_new_tokens: 4096
temperature: 0.6
top_k: 50
top_p: 0.9
presence_penalty: 0.1
frequency_penalty: 0.1
batch_size: 1000
use_beam_search: true
stop_token_ids:
- 128001
- 128009
fn_completions: vllm_local_completions
pretty_name: FuseChat-Llama-3.1-8B-Instruct
prompt_template: FuseChat-Llama-3.1-8B-Instruct/prompt.txt
link: https://huggingface.co/FuseAI/FuseChat-Llama-3.1-8B-Instruct

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<|begin_of_text|><|start_header_id|>user<|end_header_id|>

{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
FuseChat-Llama-3.2-1B-Instruct:
completions_kwargs:
model_name: FuseAI/FuseChat-Llama-3.2-1B-Instruct
model_kwargs:
dtype: bfloat16
max_new_tokens: 4096
temperature: 0.6
top_k: 50
top_p: 0.9
presence_penalty: 0.1
frequency_penalty: 0.1
batch_size: 1000
use_beam_search: true
stop_token_ids:
- 128001
- 128009
fn_completions: vllm_local_completions
pretty_name: FuseChat-Llama-3.2-1B-Instruct
prompt_template: FuseChat-Llama-3.1-8B-Instruct/prompt.txt
link: https://huggingface.co/FuseAI/FuseChat-Llama-3.2-1B-Instruct

Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
FuseChat-Llama-3.2-3B-Instruct:
completions_kwargs:
model_name: FuseAI/FuseChat-Llama-3.2-3B-Instruct
model_kwargs:
dtype: bfloat16
max_new_tokens: 4096
temperature: 0.6
top_k: 50
top_p: 0.9
presence_penalty: 0.1
frequency_penalty: 0.1
batch_size: 1000
use_beam_search: true
stop_token_ids:
- 128001
- 128009
fn_completions: vllm_local_completions
pretty_name: FuseChat-Llama-3.2-3B-Instruct
prompt_template: FuseChat-Llama-3.1-8B-Instruct/prompt.txt
link: https://huggingface.co/FuseAI/FuseChat-Llama-3.2-3B-Instruct

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FuseChat-Qwen-2.5-7B-Instruct:
completions_kwargs:
model_name: FuseAI/FuseChat-Qwen-2.5-7B-Instruct
model_kwargs:
dtype: bfloat16
max_new_tokens: 4096
temperature: 0.7
top_p: 0.8
repetition_penalty: 1.05
top_k: 20
batch_size: 1000
stop_token_ids:
- 151645
- 151643
fn_completions: vllm_local_completions
pretty_name: FuseChat-Qwen-2.5-7B-Instruct
prompt_template: FuseChat-Qwen-2.5-7B-Instruct/prompt.txt
link: https://huggingface.co/FuseAI/FuseChat-Qwen-2.5-7B-Instruct
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>
<|im_start|>user
{instruction}<|im_end|>
<|im_start|>assistant

0 comments on commit 8bb6e57

Please sign in to comment.