Skip to content

Commit

Permalink
[Datasets] Add datasets CMO&AIME (#1610)
Browse files Browse the repository at this point in the history
* add datasets cmo&aime

* delete unused modules

* modify prompt

* update __init__

* update data load and add README

* update data load

* update performance

* update md5

* remove indents

* add indent

* fix log for debug mode
  • Loading branch information
jnanliu authored Oct 28, 2024
1 parent 9c39cb6 commit 645c5f3
Show file tree
Hide file tree
Showing 11 changed files with 183 additions and 1 deletion.
13 changes: 13 additions & 0 deletions opencompass/configs/datasets/aime2024/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
### Description

Math dataset composed of problems from AIME2024 (American Invitational Mathematics Examination 2024).

### Performance

| Qwen2.5-Math-72B-Instruct | Qwen2.5-Math-7B-Instruct | Qwen2-Math-7B-Instruct | Qwen2-Math-1.5B-Instruct | internlm2-math-7b |
| ----------- | ----------- | ----------- | ----------- | ----------- |
| 20.00 | 16.67 | 16.67 | 13.33 | 3.33 |

| Qwen2.5-72B-Instruct | Qwen2.5-7B-Instruct | internlm2_5-7b-chat |
| ----------- | ----------- | ----------- |
| 31.25 | 26.44 | 9.13 |
4 changes: 4 additions & 0 deletions opencompass/configs/datasets/aime2024/aime2024_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .aime2024_gen_6e39a4 import aime2024_datasets # noqa: F401, F403
39 changes: 39 additions & 0 deletions opencompass/configs/datasets/aime2024/aime2024_gen_6e39a4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import Aime2024Dataset, MATHEvaluator, math_postprocess_v2


aime2024_reader_cfg = dict(
input_columns=['question'],
output_column='answer'
)


aime2024_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{question}\nPlease reason step by step, and put your final answer within \\boxed{}.'),
],
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=2048)
)

aime2024_eval_cfg = dict(
evaluator=dict(type=MATHEvaluator, version='v2'), pred_postprocessor=dict(type=math_postprocess_v2)
)

aime2024_datasets = [
dict(
abbr='aime2024',
type=Aime2024Dataset,
path='opencompass/aime2024',
reader_cfg=aime2024_reader_cfg,
infer_cfg=aime2024_infer_cfg,
eval_cfg=aime2024_eval_cfg
)
]
13 changes: 13 additions & 0 deletions opencompass/configs/datasets/cmo_fib/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
### Description

Math dataset composed of problems from CMO (Chinese Mathematical Olympiad) 2009-2022 .

### Performance

| Qwen2.5-Math-72B-Instruct | Qwen2.5-Math-7B-Instruct | Qwen2-Math-7B-Instruct | Qwen2-Math-1.5B-Instruct | internlm2-math-7b |
| ----------- | ----------- | ----------- | ----------- | ----------- |
| 46.15 | 42.79 | 31.73 | 23.56 | 3.37 |

| Qwen2.5-72B-Instruct | Qwen2.5-7B-Instruct | internlm2_5-7b-chat |
| ----------- | ----------- | ----------- |
| 20.00 | 16.67 | 6.67 |
4 changes: 4 additions & 0 deletions opencompass/configs/datasets/cmo_fib/cmo_fib_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mmengine.config import read_base

with read_base():
from .cmo_fib_gen_ace24b import cmo_fib_datasets # noqa: F401, F403
39 changes: 39 additions & 0 deletions opencompass/configs/datasets/cmo_fib/cmo_fib_gen_ace24b.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import CMOFibDataset, MATHEvaluator, math_postprocess_v2


cmo_fib_reader_cfg = dict(
input_columns=['question'],
output_column='answer'
)


cmo_fib_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{question}\n请一步一步地推理,并将最终答案写入\\boxed{}.'),
],
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=2048)
)

cmo_fib_eval_cfg = dict(
evaluator=dict(type=MATHEvaluator, version='v2'), pred_postprocessor=dict(type=math_postprocess_v2)
)

cmo_fib_datasets = [
dict(
abbr='cmo_fib',
type=CMOFibDataset,
path='opencompass/cmo_fib',
reader_cfg=cmo_fib_reader_cfg,
infer_cfg=cmo_fib_infer_cfg,
eval_cfg=cmo_fib_eval_cfg
)
]
2 changes: 2 additions & 0 deletions opencompass/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .advglue import * # noqa: F401, F403
from .afqmcd import * # noqa: F401, F403
from .agieval import * # noqa: F401, F403
from .aime2024 import * # noqa: F401, F403
from .anli import AnliDataset # noqa: F401, F403
from .anthropics_evals import * # noqa: F401, F403
from .apps import * # noqa: F401, F403
Expand All @@ -24,6 +25,7 @@
from .cmb import * # noqa: F401, F403
from .cmmlu import * # noqa: F401, F403
from .cmnli import * # noqa: F401, F403
from .cmo_fib import * # noqa: F401, F403
from .cmrc import * # noqa: F401, F403
from .commonsenseqa import * # noqa: F401, F403
from .commonsenseqa_cn import * # noqa: F401, F403
Expand Down
25 changes: 25 additions & 0 deletions opencompass/datasets/aime2024.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import json

from datasets import Dataset

from opencompass.registry import LOAD_DATASET
from opencompass.utils import get_data_path

from .base import BaseDataset


@LOAD_DATASET.register_module()
class Aime2024Dataset(BaseDataset):

@staticmethod
def load(path):
path = get_data_path(path)
dataset = []
with open(path, 'r') as f:
for line in f:
line = json.loads(line)
origin_prompt = line['origin_prompt']
line['question'] = origin_prompt[:]
line['answer'] = line['gold_answer']
dataset.append(line)
return Dataset.from_list(dataset)
25 changes: 25 additions & 0 deletions opencompass/datasets/cmo_fib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import json

from datasets import Dataset

from opencompass.registry import LOAD_DATASET
from opencompass.utils import get_data_path

from .base import BaseDataset


@LOAD_DATASET.register_module()
class CMOFibDataset(BaseDataset):

@staticmethod
def load(path):
path = get_data_path(path)
dataset = []
with open(path, 'r') as f:
for line in f:
line = json.loads(line)
origin_prompt = line['origin_prompt']
line['question'] = origin_prompt[:]
line['answer'] = line['gold_answer']
dataset.append(line)
return Dataset.from_list(dataset)
2 changes: 1 addition & 1 deletion opencompass/runners/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def launch(self, tasks: List[Dict[str, Any]]) -> List[Tuple[str, int]]:
task.run()
else:
tmp_logs = f'tmp/{os.getpid()}_debug.log'
get_logger().debug(
get_logger().warning(
f'Debug mode, log will be saved to {tmp_logs}')
with open(tmp_logs, 'a') as log_file:
subprocess.run(cmd,
Expand Down
18 changes: 18 additions & 0 deletions opencompass/utils/datasets_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,16 @@
"ms_id": "",
"hf_id": "",
"local": "./data/test_generation",
},
"opencompass/aime2024": {
"ms_id": "",
"hf_id": "",
"local": "./data/aime.jsonl",
},
"opencompass/cmo_fib": {
"ms_id": "",
"hf_id": "",
"local": "./data/cmo.jsonl",
}
}

Expand Down Expand Up @@ -455,4 +465,12 @@
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/test_generation.zip",
"md5": "918a6ea2b1eee6f2b1314db3c21cb4c7",
},
"/aime": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/aime.zip",
"md5": "fbe2d0577fc210962a549f8cea1a00c8"
},
"/cmo": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/cmo.zip",
"md5": "fad52c81290506a8ca74f46b5400d8fc"
}
}

0 comments on commit 645c5f3

Please sign in to comment.