From 0d5cac4285b1795767b1c3b5ef1760ee1648aa54 Mon Sep 17 00:00:00 2001 From: bittersweet1999 <1487910649@qq.com> Date: Thu, 17 Oct 2024 16:14:57 +0800 Subject: [PATCH] compati old version --- .../compassbench_v1_3_objective_gen.py | 4 + .../compassbench_v1_3_objective_gen_068af0.py | 74 +++++++++++++++++++ .../datasets/gpqa/gpqa_few_shot_ppl_2c9cd6.py | 49 ++++++++++++ .../mmlu_pro/mmlu_pro_few_shot_gen_bfaf90.py | 47 ++++++++++++ .../compassbench_v1_3_objective_gen.py | 4 + .../compassbench_v1_3_objective_gen_068af0.py | 74 +++++++++++++++++++ .../datasets/gpqa/gpqa_few_shot_ppl_2c9cd6.py | 49 ++++++++++++ .../mmlu_pro/mmlu_pro_few_shot_gen_bfaf90.py | 47 ++++++++++++ 8 files changed, 348 insertions(+) create mode 100644 configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen.py create mode 100644 configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen_068af0.py create mode 100644 configs/datasets/gpqa/gpqa_few_shot_ppl_2c9cd6.py create mode 100644 configs/datasets/mmlu_pro/mmlu_pro_few_shot_gen_bfaf90.py create mode 100644 opencompass/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen.py create mode 100644 opencompass/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen_068af0.py create mode 100644 opencompass/configs/datasets/gpqa/gpqa_few_shot_ppl_2c9cd6.py create mode 100644 opencompass/configs/datasets/mmlu_pro/mmlu_pro_few_shot_gen_bfaf90.py diff --git a/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen.py b/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen.py new file mode 100644 index 000000000..dc1871b1f --- /dev/null +++ b/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .compassbench_v1_3_objective_gen_068af0 import compassbench_aug_datasets # noqa: F401, F403 \ No newline at end of file diff --git a/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen_068af0.py b/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen_068af0.py new file mode 100644 index 000000000..d54e0878e --- /dev/null +++ b/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen_068af0.py @@ -0,0 +1,74 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator +from opencompass.datasets.compassbench_obj import CompassBenchObjectiveV1_3, compassbench_objective_v1_3_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess + + +prompt_cn = { + 'single_choice_cn': '以下是一道单项选择题,请你根据你了解的知识给出正确的答案选项。请你一步步推理并在最后用“答案选项为X”来回答,其中X是ABCD中你认为正确的选项序号\n下面是你要回答的题目:\n{question}\n让我们一步步解决这个问题:', + 'cloze_cn': '以下是一道填空题,请你根据你了解的知识一步步思考后把你的最终答案放到\\boxed{}中。\n下面是你要回答的题目:\n{question}\n让我们一步步解决这个问题:', +} + +prompt_en = { + 'single_choice_en': "Here is a single-choice question. Please give the correct answer based on your knowledge. Please reason step by step and answer with 'The answer is X' at the end, where X is the option number you think is correct.\nHere is the question you need to answer:\n{question}\nLet's solve this problem step by step:", + 'cloze_en': "Here is a fill-in-the-blank question. Please think step by step based on your knowledge and put your final answer in \\boxed{}. Here is the question you need to answer:\n{question}\nLet's solve this problem step by step:", +} + + +douknow_sets = { + 'knowledge': ['single_choice_cn'], + 'math': ['single_choice_cn'], +} + +# Set up the prompts +CircularEval = True + + +compassbench_aug_datasets = [] + +for _split in list(douknow_sets.keys()): + for _name in douknow_sets[_split]: + if 'cn' in _name: + single_choice_prompts = prompt_cn + cloze_prompts = prompt_cn + else: + single_choice_prompts = prompt_en + cloze_prompts = prompt_en + douknow_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template=dict( + begin='', + round=[ + dict( + role='HUMAN', + prompt= single_choice_prompts[_name], + ), + dict(role='BOT', prompt='{answer}'),] if 'choice' in _name else cloze_prompts[_name], + ), + ice_token='', + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), + ) + douknow_eval_cfg = dict( + evaluator=dict(type=CircularEvaluator if CircularEval else AccEvaluator) if 'single_choice' in _name else dict(type=AccEvaluator), + pred_postprocessor=dict(type=first_option_postprocess, options='ABCD' ) if 'single_choice' in _name else dict(type=compassbench_objective_v1_3_postprocess, name=_name)) + + compassbench_aug_datasets.append( + dict( + type=CompassBenchObjectiveV1_3, + path=f'./data/compassbench_v1_3/{_split}/{_name}.jsonl', + name='circular_' + _name if CircularEval else _name, + abbr='compassbench-' + _split + '-' + _name + 'circular'if CircularEval else '', + reader_cfg=dict( + input_columns=['question'], + output_column='answer' + ), + infer_cfg=douknow_infer_cfg, + eval_cfg=douknow_eval_cfg, + )) + +del _split, _name \ No newline at end of file diff --git a/configs/datasets/gpqa/gpqa_few_shot_ppl_2c9cd6.py b/configs/datasets/gpqa/gpqa_few_shot_ppl_2c9cd6.py new file mode 100644 index 000000000..a32ceb36e --- /dev/null +++ b/configs/datasets/gpqa/gpqa_few_shot_ppl_2c9cd6.py @@ -0,0 +1,49 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import FixKRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator +from opencompass.datasets import GPQADataset, GPQAEvaluator +from opencompass.utils import first_option_postprocess + +gpqa_reader_cfg = dict( + input_columns=['question', 'A', 'B', 'C', 'D'], + output_column='answer') + +hint = f'对下面的单项选择题,请直接给出正确答案的选项。' +question_and_options = 'Question: {question}\n(A){A}\n(B){B}\n(C){C}\n(D){D}\n' +gpqa_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template={ + opt: f'{question_and_options}\nAnswer: {opt}' for opt in ['A', 'B', 'C', 'D']}, + ), + prompt_template=dict( + type=PromptTemplate, + template={ + opt: f'{hint}\n{question_and_options}\nAnswer: {opt}' for opt in ['A', 'B', 'C', 'D'] + }, + ice_token='' + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), + inferencer=dict(type=PPLInferencer)) + +gpqa_eval_cfg = dict(evaluator=dict(type=AccwithDetailsEvaluator)) + +gpqa_datasets = [] +gpqa_subsets = { + # 'extended': 'gpqa_extended.csv', + # 'main': 'gpqa_main.csv', + 'diamond': 'gpqa_diamond.csv' +} + +for split in list(gpqa_subsets.keys()): + gpqa_datasets.append( + dict( + abbr='GPQA_' + split, + type=GPQADataset, + path='./data/gpqa/', + name=gpqa_subsets[split], + reader_cfg=gpqa_reader_cfg, + infer_cfg=gpqa_infer_cfg, + eval_cfg=gpqa_eval_cfg) + ) \ No newline at end of file diff --git a/configs/datasets/mmlu_pro/mmlu_pro_few_shot_gen_bfaf90.py b/configs/datasets/mmlu_pro/mmlu_pro_few_shot_gen_bfaf90.py new file mode 100644 index 000000000..2dea8af7f --- /dev/null +++ b/configs/datasets/mmlu_pro/mmlu_pro_few_shot_gen_bfaf90.py @@ -0,0 +1,47 @@ +from mmengine.config import read_base +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import FixKRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import MMLUProDataset, MMLUProBaseEvaluator + +with read_base(): + from .mmlu_pro_categories import categories + +mmlu_pro_datasets = [] + +for category in categories: + hint = f'Answer the following multiple choice question about {category}, and give your answer option directly.' + question_and_options = 'Question:\n{question}\nOptions:\n{options_str}' + mmlu_pro_reader_cfg = dict( + input_columns=['question', 'cot_content', 'options_str'], + output_column='answer_string', + train_split='validation', + test_split='test', + ) + mmlu_pro_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template=f'{question_and_options}\nAnswer: {{answer}}'), + prompt_template=dict( + type=PromptTemplate, + template=f'{hint}\n{question_and_options}\nAnswer: ', + ice_token='' + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), + inferencer=dict(type=GenInferencer, max_out_len=100) + ) + + mmlu_pro_eval_cfg = dict( + evaluator=dict(type=MMLUProBaseEvaluator) + ) + + mmlu_pro_datasets.append( + dict( + abbr=f'mmlu_pro_{category.replace(" ", "_")}', + type=MMLUProDataset, + path='opencompass/mmlu_pro', + category=category, + reader_cfg=mmlu_pro_reader_cfg, + infer_cfg=mmlu_pro_infer_cfg, + eval_cfg=mmlu_pro_eval_cfg, + )) \ No newline at end of file diff --git a/opencompass/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen.py b/opencompass/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen.py new file mode 100644 index 000000000..dc1871b1f --- /dev/null +++ b/opencompass/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .compassbench_v1_3_objective_gen_068af0 import compassbench_aug_datasets # noqa: F401, F403 \ No newline at end of file diff --git a/opencompass/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen_068af0.py b/opencompass/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen_068af0.py new file mode 100644 index 000000000..d54e0878e --- /dev/null +++ b/opencompass/configs/datasets/compassbench_v1_3/compassbench_v1_3_objective_gen_068af0.py @@ -0,0 +1,74 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import CircularEvaluator, AccEvaluator +from opencompass.datasets.compassbench_obj import CompassBenchObjectiveV1_3, compassbench_objective_v1_3_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess + + +prompt_cn = { + 'single_choice_cn': '以下是一道单项选择题,请你根据你了解的知识给出正确的答案选项。请你一步步推理并在最后用“答案选项为X”来回答,其中X是ABCD中你认为正确的选项序号\n下面是你要回答的题目:\n{question}\n让我们一步步解决这个问题:', + 'cloze_cn': '以下是一道填空题,请你根据你了解的知识一步步思考后把你的最终答案放到\\boxed{}中。\n下面是你要回答的题目:\n{question}\n让我们一步步解决这个问题:', +} + +prompt_en = { + 'single_choice_en': "Here is a single-choice question. Please give the correct answer based on your knowledge. Please reason step by step and answer with 'The answer is X' at the end, where X is the option number you think is correct.\nHere is the question you need to answer:\n{question}\nLet's solve this problem step by step:", + 'cloze_en': "Here is a fill-in-the-blank question. Please think step by step based on your knowledge and put your final answer in \\boxed{}. Here is the question you need to answer:\n{question}\nLet's solve this problem step by step:", +} + + +douknow_sets = { + 'knowledge': ['single_choice_cn'], + 'math': ['single_choice_cn'], +} + +# Set up the prompts +CircularEval = True + + +compassbench_aug_datasets = [] + +for _split in list(douknow_sets.keys()): + for _name in douknow_sets[_split]: + if 'cn' in _name: + single_choice_prompts = prompt_cn + cloze_prompts = prompt_cn + else: + single_choice_prompts = prompt_en + cloze_prompts = prompt_en + douknow_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template=dict( + begin='', + round=[ + dict( + role='HUMAN', + prompt= single_choice_prompts[_name], + ), + dict(role='BOT', prompt='{answer}'),] if 'choice' in _name else cloze_prompts[_name], + ), + ice_token='', + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), + ) + douknow_eval_cfg = dict( + evaluator=dict(type=CircularEvaluator if CircularEval else AccEvaluator) if 'single_choice' in _name else dict(type=AccEvaluator), + pred_postprocessor=dict(type=first_option_postprocess, options='ABCD' ) if 'single_choice' in _name else dict(type=compassbench_objective_v1_3_postprocess, name=_name)) + + compassbench_aug_datasets.append( + dict( + type=CompassBenchObjectiveV1_3, + path=f'./data/compassbench_v1_3/{_split}/{_name}.jsonl', + name='circular_' + _name if CircularEval else _name, + abbr='compassbench-' + _split + '-' + _name + 'circular'if CircularEval else '', + reader_cfg=dict( + input_columns=['question'], + output_column='answer' + ), + infer_cfg=douknow_infer_cfg, + eval_cfg=douknow_eval_cfg, + )) + +del _split, _name \ No newline at end of file diff --git a/opencompass/configs/datasets/gpqa/gpqa_few_shot_ppl_2c9cd6.py b/opencompass/configs/datasets/gpqa/gpqa_few_shot_ppl_2c9cd6.py new file mode 100644 index 000000000..a32ceb36e --- /dev/null +++ b/opencompass/configs/datasets/gpqa/gpqa_few_shot_ppl_2c9cd6.py @@ -0,0 +1,49 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import FixKRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator +from opencompass.datasets import GPQADataset, GPQAEvaluator +from opencompass.utils import first_option_postprocess + +gpqa_reader_cfg = dict( + input_columns=['question', 'A', 'B', 'C', 'D'], + output_column='answer') + +hint = f'对下面的单项选择题,请直接给出正确答案的选项。' +question_and_options = 'Question: {question}\n(A){A}\n(B){B}\n(C){C}\n(D){D}\n' +gpqa_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template={ + opt: f'{question_and_options}\nAnswer: {opt}' for opt in ['A', 'B', 'C', 'D']}, + ), + prompt_template=dict( + type=PromptTemplate, + template={ + opt: f'{hint}\n{question_and_options}\nAnswer: {opt}' for opt in ['A', 'B', 'C', 'D'] + }, + ice_token='' + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), + inferencer=dict(type=PPLInferencer)) + +gpqa_eval_cfg = dict(evaluator=dict(type=AccwithDetailsEvaluator)) + +gpqa_datasets = [] +gpqa_subsets = { + # 'extended': 'gpqa_extended.csv', + # 'main': 'gpqa_main.csv', + 'diamond': 'gpqa_diamond.csv' +} + +for split in list(gpqa_subsets.keys()): + gpqa_datasets.append( + dict( + abbr='GPQA_' + split, + type=GPQADataset, + path='./data/gpqa/', + name=gpqa_subsets[split], + reader_cfg=gpqa_reader_cfg, + infer_cfg=gpqa_infer_cfg, + eval_cfg=gpqa_eval_cfg) + ) \ No newline at end of file diff --git a/opencompass/configs/datasets/mmlu_pro/mmlu_pro_few_shot_gen_bfaf90.py b/opencompass/configs/datasets/mmlu_pro/mmlu_pro_few_shot_gen_bfaf90.py new file mode 100644 index 000000000..2dea8af7f --- /dev/null +++ b/opencompass/configs/datasets/mmlu_pro/mmlu_pro_few_shot_gen_bfaf90.py @@ -0,0 +1,47 @@ +from mmengine.config import read_base +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import FixKRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import MMLUProDataset, MMLUProBaseEvaluator + +with read_base(): + from .mmlu_pro_categories import categories + +mmlu_pro_datasets = [] + +for category in categories: + hint = f'Answer the following multiple choice question about {category}, and give your answer option directly.' + question_and_options = 'Question:\n{question}\nOptions:\n{options_str}' + mmlu_pro_reader_cfg = dict( + input_columns=['question', 'cot_content', 'options_str'], + output_column='answer_string', + train_split='validation', + test_split='test', + ) + mmlu_pro_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template=f'{question_and_options}\nAnswer: {{answer}}'), + prompt_template=dict( + type=PromptTemplate, + template=f'{hint}\n{question_and_options}\nAnswer: ', + ice_token='' + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 1, 2, 3, 4]), + inferencer=dict(type=GenInferencer, max_out_len=100) + ) + + mmlu_pro_eval_cfg = dict( + evaluator=dict(type=MMLUProBaseEvaluator) + ) + + mmlu_pro_datasets.append( + dict( + abbr=f'mmlu_pro_{category.replace(" ", "_")}', + type=MMLUProDataset, + path='opencompass/mmlu_pro', + category=category, + reader_cfg=mmlu_pro_reader_cfg, + infer_cfg=mmlu_pro_infer_cfg, + eval_cfg=mmlu_pro_eval_cfg, + )) \ No newline at end of file