forked from open-compass/opencompass
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fbc966b
commit 0d5cac4
Showing
8 changed files
with
348 additions
and
0 deletions.
There are no files selected for viewing
4 changes: 4 additions & 0 deletions
4
configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .compassbench_v1_3_objective_gen_068af0 import compassbench_aug_datasets # noqa: F401, F403 |
74 changes: 74 additions & 0 deletions
74
configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen_068af0.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator | ||
from opencompass.datasets.compassbench_obj import CompassBenchObjectiveV1_3, compassbench_objective_v1_3_postprocess | ||
from opencompass.utils.text_postprocessors import first_option_postprocess | ||
|
||
|
||
prompt_cn = { | ||
'single_choice_cn': '以下是一道单项选择题,请你根据你了解的知识给出正确的答案选项。请你一步步推理并在最后用“答案选项为X”来回答,其中X是ABCD中你认为正确的选项序号\n下面是你要回答的题目:\n{question}\n让我们一步步解决这个问题:', | ||
'cloze_cn': '以下是一道填空题,请你根据你了解的知识一步步思考后把你的最终答案放到\\boxed{}中。\n下面是你要回答的题目:\n{question}\n让我们一步步解决这个问题:', | ||
} | ||
|
||
prompt_en = { | ||
'single_choice_en': "Here is a single-choice question. Please give the correct answer based on your knowledge. Please reason step by step and answer with 'The answer is X' at the end, where X is the option number you think is correct.\nHere is the question you need to answer:\n{question}\nLet's solve this problem step by step:", | ||
'cloze_en': "Here is a fill-in-the-blank question. Please think step by step based on your knowledge and put your final answer in \\boxed{}. Here is the question you need to answer:\n{question}\nLet's solve this problem step by step:", | ||
} | ||
|
||
|
||
douknow_sets = { | ||
'knowledge': ['single_choice_cn'], | ||
'math': ['single_choice_cn'], | ||
} | ||
|
||
# Set up the prompts | ||
CircularEval = True | ||
|
||
|
||
compassbench_aug_datasets = [] | ||
|
||
for _split in list(douknow_sets.keys()): | ||
for _name in douknow_sets[_split]: | ||
if 'cn' in _name: | ||
single_choice_prompts = prompt_cn | ||
cloze_prompts = prompt_cn | ||
else: | ||
single_choice_prompts = prompt_en | ||
cloze_prompts = prompt_en | ||
douknow_infer_cfg = dict( | ||
ice_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
begin='</E>', | ||
round=[ | ||
dict( | ||
role='HUMAN', | ||
prompt= single_choice_prompts[_name], | ||
), | ||
dict(role='BOT', prompt='{answer}'),] if 'choice' in _name else cloze_prompts[_name], | ||
), | ||
ice_token='</E>', | ||
), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer), | ||
) | ||
douknow_eval_cfg = dict( | ||
evaluator=dict(type=CircularEvaluator if CircularEval else AccEvaluator) if 'single_choice' in _name else dict(type=AccEvaluator), | ||
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD' ) if 'single_choice' in _name else dict(type=compassbench_objective_v1_3_postprocess, name=_name)) | ||
|
||
compassbench_aug_datasets.append( | ||
dict( | ||
type=CompassBenchObjectiveV1_3, | ||
path=f'./data/compassbench_v1_3/{_split}/{_name}.jsonl', | ||
name='circular_' + _name if CircularEval else _name, | ||
abbr='compassbench-' + _split + '-' + _name + 'circular'if CircularEval else '', | ||
reader_cfg=dict( | ||
input_columns=['question'], | ||
output_column='answer' | ||
), | ||
infer_cfg=douknow_infer_cfg, | ||
eval_cfg=douknow_eval_cfg, | ||
)) | ||
|
||
del _split, _name |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import FixKRetriever | ||
from opencompass.openicl.icl_inferencer import PPLInferencer | ||
from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator | ||
from opencompass.datasets import GPQADataset, GPQAEvaluator | ||
from opencompass.utils import first_option_postprocess | ||
|
||
gpqa_reader_cfg = dict( | ||
input_columns=['question', 'A', 'B', 'C', 'D'], | ||
output_column='answer') | ||
|
||
hint = f'对下面的单项选择题,请直接给出正确答案的选项。' | ||
question_and_options = 'Question: {question}\n(A){A}\n(B){B}\n(C){C}\n(D){D}\n' | ||
gpqa_infer_cfg = dict( | ||
ice_template=dict( | ||
type=PromptTemplate, | ||
template={ | ||
opt: f'{question_and_options}\nAnswer: {opt}' for opt in ['A', 'B', 'C', 'D']}, | ||
), | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template={ | ||
opt: f'{hint}\n</E>{question_and_options}\nAnswer: {opt}' for opt in ['A', 'B', 'C', 'D'] | ||
}, | ||
ice_token='</E>' | ||
), | ||
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), | ||
inferencer=dict(type=PPLInferencer)) | ||
|
||
gpqa_eval_cfg = dict(evaluator=dict(type=AccwithDetailsEvaluator)) | ||
|
||
gpqa_datasets = [] | ||
gpqa_subsets = { | ||
# 'extended': 'gpqa_extended.csv', | ||
# 'main': 'gpqa_main.csv', | ||
'diamond': 'gpqa_diamond.csv' | ||
} | ||
|
||
for split in list(gpqa_subsets.keys()): | ||
gpqa_datasets.append( | ||
dict( | ||
abbr='GPQA_' + split, | ||
type=GPQADataset, | ||
path='./data/gpqa/', | ||
name=gpqa_subsets[split], | ||
reader_cfg=gpqa_reader_cfg, | ||
infer_cfg=gpqa_infer_cfg, | ||
eval_cfg=gpqa_eval_cfg) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
from mmengine.config import read_base | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import FixKRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import MMLUProDataset, MMLUProBaseEvaluator | ||
|
||
with read_base(): | ||
from .mmlu_pro_categories import categories | ||
|
||
mmlu_pro_datasets = [] | ||
|
||
for category in categories: | ||
hint = f'Answer the following multiple choice question about {category}, and give your answer option directly.' | ||
question_and_options = 'Question:\n{question}\nOptions:\n{options_str}' | ||
mmlu_pro_reader_cfg = dict( | ||
input_columns=['question', 'cot_content', 'options_str'], | ||
output_column='answer_string', | ||
train_split='validation', | ||
test_split='test', | ||
) | ||
mmlu_pro_infer_cfg = dict( | ||
ice_template=dict( | ||
type=PromptTemplate, | ||
template=f'{question_and_options}\nAnswer: {{answer}}'), | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=f'{hint}\n</E>{question_and_options}\nAnswer: ', | ||
ice_token='</E>' | ||
), | ||
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), | ||
inferencer=dict(type=GenInferencer, max_out_len=100) | ||
) | ||
|
||
mmlu_pro_eval_cfg = dict( | ||
evaluator=dict(type=MMLUProBaseEvaluator) | ||
) | ||
|
||
mmlu_pro_datasets.append( | ||
dict( | ||
abbr=f'mmlu_pro_{category.replace(" ", "_")}', | ||
type=MMLUProDataset, | ||
path='opencompass/mmlu_pro', | ||
category=category, | ||
reader_cfg=mmlu_pro_reader_cfg, | ||
infer_cfg=mmlu_pro_infer_cfg, | ||
eval_cfg=mmlu_pro_eval_cfg, | ||
)) |
4 changes: 4 additions & 0 deletions
4
opencompass/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .compassbench_v1_3_objective_gen_068af0 import compassbench_aug_datasets # noqa: F401, F403 |
74 changes: 74 additions & 0 deletions
74
opencompass/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen_068af0.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator | ||
from opencompass.datasets.compassbench_obj import CompassBenchObjectiveV1_3, compassbench_objective_v1_3_postprocess | ||
from opencompass.utils.text_postprocessors import first_option_postprocess | ||
|
||
|
||
prompt_cn = { | ||
'single_choice_cn': '以下是一道单项选择题,请你根据你了解的知识给出正确的答案选项。请你一步步推理并在最后用“答案选项为X”来回答,其中X是ABCD中你认为正确的选项序号\n下面是你要回答的题目:\n{question}\n让我们一步步解决这个问题:', | ||
'cloze_cn': '以下是一道填空题,请你根据你了解的知识一步步思考后把你的最终答案放到\\boxed{}中。\n下面是你要回答的题目:\n{question}\n让我们一步步解决这个问题:', | ||
} | ||
|
||
prompt_en = { | ||
'single_choice_en': "Here is a single-choice question. Please give the correct answer based on your knowledge. Please reason step by step and answer with 'The answer is X' at the end, where X is the option number you think is correct.\nHere is the question you need to answer:\n{question}\nLet's solve this problem step by step:", | ||
'cloze_en': "Here is a fill-in-the-blank question. Please think step by step based on your knowledge and put your final answer in \\boxed{}. Here is the question you need to answer:\n{question}\nLet's solve this problem step by step:", | ||
} | ||
|
||
|
||
douknow_sets = { | ||
'knowledge': ['single_choice_cn'], | ||
'math': ['single_choice_cn'], | ||
} | ||
|
||
# Set up the prompts | ||
CircularEval = True | ||
|
||
|
||
compassbench_aug_datasets = [] | ||
|
||
for _split in list(douknow_sets.keys()): | ||
for _name in douknow_sets[_split]: | ||
if 'cn' in _name: | ||
single_choice_prompts = prompt_cn | ||
cloze_prompts = prompt_cn | ||
else: | ||
single_choice_prompts = prompt_en | ||
cloze_prompts = prompt_en | ||
douknow_infer_cfg = dict( | ||
ice_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
begin='</E>', | ||
round=[ | ||
dict( | ||
role='HUMAN', | ||
prompt= single_choice_prompts[_name], | ||
), | ||
dict(role='BOT', prompt='{answer}'),] if 'choice' in _name else cloze_prompts[_name], | ||
), | ||
ice_token='</E>', | ||
), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer), | ||
) | ||
douknow_eval_cfg = dict( | ||
evaluator=dict(type=CircularEvaluator if CircularEval else AccEvaluator) if 'single_choice' in _name else dict(type=AccEvaluator), | ||
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD' ) if 'single_choice' in _name else dict(type=compassbench_objective_v1_3_postprocess, name=_name)) | ||
|
||
compassbench_aug_datasets.append( | ||
dict( | ||
type=CompassBenchObjectiveV1_3, | ||
path=f'./data/compassbench_v1_3/{_split}/{_name}.jsonl', | ||
name='circular_' + _name if CircularEval else _name, | ||
abbr='compassbench-' + _split + '-' + _name + 'circular'if CircularEval else '', | ||
reader_cfg=dict( | ||
input_columns=['question'], | ||
output_column='answer' | ||
), | ||
infer_cfg=douknow_infer_cfg, | ||
eval_cfg=douknow_eval_cfg, | ||
)) | ||
|
||
del _split, _name |
49 changes: 49 additions & 0 deletions
49
opencompass/configs/datasets/gpqa/gpqa_few_shot_ppl_2c9cd6.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import FixKRetriever | ||
from opencompass.openicl.icl_inferencer import PPLInferencer | ||
from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator | ||
from opencompass.datasets import GPQADataset, GPQAEvaluator | ||
from opencompass.utils import first_option_postprocess | ||
|
||
gpqa_reader_cfg = dict( | ||
input_columns=['question', 'A', 'B', 'C', 'D'], | ||
output_column='answer') | ||
|
||
hint = f'对下面的单项选择题,请直接给出正确答案的选项。' | ||
question_and_options = 'Question: {question}\n(A){A}\n(B){B}\n(C){C}\n(D){D}\n' | ||
gpqa_infer_cfg = dict( | ||
ice_template=dict( | ||
type=PromptTemplate, | ||
template={ | ||
opt: f'{question_and_options}\nAnswer: {opt}' for opt in ['A', 'B', 'C', 'D']}, | ||
), | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template={ | ||
opt: f'{hint}\n</E>{question_and_options}\nAnswer: {opt}' for opt in ['A', 'B', 'C', 'D'] | ||
}, | ||
ice_token='</E>' | ||
), | ||
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), | ||
inferencer=dict(type=PPLInferencer)) | ||
|
||
gpqa_eval_cfg = dict(evaluator=dict(type=AccwithDetailsEvaluator)) | ||
|
||
gpqa_datasets = [] | ||
gpqa_subsets = { | ||
# 'extended': 'gpqa_extended.csv', | ||
# 'main': 'gpqa_main.csv', | ||
'diamond': 'gpqa_diamond.csv' | ||
} | ||
|
||
for split in list(gpqa_subsets.keys()): | ||
gpqa_datasets.append( | ||
dict( | ||
abbr='GPQA_' + split, | ||
type=GPQADataset, | ||
path='./data/gpqa/', | ||
name=gpqa_subsets[split], | ||
reader_cfg=gpqa_reader_cfg, | ||
infer_cfg=gpqa_infer_cfg, | ||
eval_cfg=gpqa_eval_cfg) | ||
) |
47 changes: 47 additions & 0 deletions
47
opencompass/configs/datasets/mmlu_pro/mmlu_pro_few_shot_gen_bfaf90.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
from mmengine.config import read_base | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import FixKRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import MMLUProDataset, MMLUProBaseEvaluator | ||
|
||
with read_base(): | ||
from .mmlu_pro_categories import categories | ||
|
||
mmlu_pro_datasets = [] | ||
|
||
for category in categories: | ||
hint = f'Answer the following multiple choice question about {category}, and give your answer option directly.' | ||
question_and_options = 'Question:\n{question}\nOptions:\n{options_str}' | ||
mmlu_pro_reader_cfg = dict( | ||
input_columns=['question', 'cot_content', 'options_str'], | ||
output_column='answer_string', | ||
train_split='validation', | ||
test_split='test', | ||
) | ||
mmlu_pro_infer_cfg = dict( | ||
ice_template=dict( | ||
type=PromptTemplate, | ||
template=f'{question_and_options}\nAnswer: {{answer}}'), | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=f'{hint}\n</E>{question_and_options}\nAnswer: ', | ||
ice_token='</E>' | ||
), | ||
retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), | ||
inferencer=dict(type=GenInferencer, max_out_len=100) | ||
) | ||
|
||
mmlu_pro_eval_cfg = dict( | ||
evaluator=dict(type=MMLUProBaseEvaluator) | ||
) | ||
|
||
mmlu_pro_datasets.append( | ||
dict( | ||
abbr=f'mmlu_pro_{category.replace(" ", "_")}', | ||
type=MMLUProDataset, | ||
path='opencompass/mmlu_pro', | ||
category=category, | ||
reader_cfg=mmlu_pro_reader_cfg, | ||
infer_cfg=mmlu_pro_infer_cfg, | ||
eval_cfg=mmlu_pro_eval_cfg, | ||
)) |