Merge branch 'open-compass:main' into main

bittersweet1999 · Sep 11, 2024 · f3f3b20 · f3f3b20
2 parents 42404a5 + 7c7fa36
commit f3f3b20
Show file tree

Hide file tree

Showing 86 changed files with 1,471 additions and 456 deletions.
diff --git a/.github/workflows/link-check.yml b/.github/workflows/link-check.yml
@@ -5,17 +5,22 @@ on:
     # check links at 01:30 a.m. every day
     - cron: '30 1 * * *'
 
+  workflow_dispatch: # allow manual trigger
+
 jobs:
   link-check:
     runs-on: ubuntu-latest
     steps:
       # - uses: actions/checkout@v3
 
-      - name: linkchecker
+      - name: Install linkchecker
         run: |
           pip install linkchecker
-          linkchecker https://opencompass.readthedocs.io/ --no-robots -t 30 --no-warnings |
-            --ignore-url https://opencompass\.readthedocs\.io/.*/static/images/opencompass_logo\.svg |
-            --ignore-url https://opencompass\.readthedocs\.io/.*/_static/images/icon-menu-dots\.svg |
-            --ignore-url https://opencompass\.readthedocs\.io/policy |
-            --ignore-url https://opencompass\.readthedocs\.io/(en|zh_CN)/[0-9a-f]{40}/.*
+
+      - name: Run linkchecker
+        run: |
+          linkchecker https://opencompass.readthedocs.io/ --no-robots -t 30 --no-warnings \
+            --ignore-url "https://opencompass.readthedocs.io/.*/static/images/opencompass_logo.svg" \
+            --ignore-url "https://opencompass.readthedocs.io/.*/_static/images/icon-menu-dots.svg" \
+            --ignore-url "https://opencompass.readthedocs.io/policy" \
+            --ignore-url "https://opencompass.readthedocs.io/(en|zh_CN)/[0-9a-f]{40}/.*"
diff --git a/configs/datasets/longbench/longbench2wikimqa/longbench_2wikimqa_gen_6b3efc.py b/configs/datasets/longbench/longbench2wikimqa/longbench_2wikimqa_gen_6b3efc.py
@@ -7,32 +7,37 @@
     input_columns=['context', 'input'],
     output_column='answers',
     train_split='test',
-    test_split='test'
+    test_split='test',
 )
 
 LongBench_2wikimqa_infer_cfg = dict(
     prompt_template=dict(
         type=PromptTemplate,
         template=dict(
             round=[
-                dict(role='HUMAN', prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'),
-            ], )),
+                dict(
+                    role='HUMAN',
+                    prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                ),
+            ],
+        ),
+    ),
     retriever=dict(type=ZeroRetriever),
-    inferencer=dict(type=GenInferencer, max_out_len=32)
+    inferencer=dict(type=GenInferencer, max_out_len=32),
 )
 
 LongBench_2wikimqa_eval_cfg = dict(
-    evaluator=dict(type=LongBenchF1Evaluator),
-    pred_role='BOT'
+    evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT'
 )
 
 LongBench_2wikimqa_datasets = [
     dict(
         type=LongBench2wikimqaDataset,
         abbr='LongBench_2wikimqa',
-        path='THUDM/LongBench',
+        path='opencompass/Longbench',
         name='2wikimqa',
         reader_cfg=LongBench_2wikimqa_reader_cfg,
         infer_cfg=LongBench_2wikimqa_infer_cfg,
-        eval_cfg=LongBench_2wikimqa_eval_cfg)
+        eval_cfg=LongBench_2wikimqa_eval_cfg,
+    )
 ]
diff --git a/configs/datasets/longbench/longbenchdureader/longbench_dureader_gen_c6c7e4.py b/configs/datasets/longbench/longbenchdureader/longbench_dureader_gen_c6c7e4.py
@@ -7,32 +7,37 @@
     input_columns=['context', 'input'],
     output_column='answers',
     train_split='test',
-    test_split='test'
+    test_split='test',
 )
 
 LongBench_dureader_infer_cfg = dict(
     prompt_template=dict(
         type=PromptTemplate,
         template=dict(
             round=[
-                dict(role='HUMAN', prompt='请基于给定的文章回答下述问题。\n\n文章：{context}\n\n请基于上述文章回答下面的问题。\n\n问题：{input}\n回答：'),
-            ], )),
+                dict(
+                    role='HUMAN',
+                    prompt='请基于给定的文章回答下述问题。\n\n文章：{context}\n\n请基于上述文章回答下面的问题。\n\n问题：{input}\n回答：',
+                ),
+            ],
+        ),
+    ),
     retriever=dict(type=ZeroRetriever),
-    inferencer=dict(type=GenInferencer, max_out_len=128)
+    inferencer=dict(type=GenInferencer, max_out_len=128),
 )
 
 LongBench_dureader_eval_cfg = dict(
-    evaluator=dict(type=LongBenchRougeEvaluator, language='zh'),
-    pred_role='BOT'
+    evaluator=dict(type=LongBenchRougeEvaluator, language='zh'), pred_role='BOT'
 )
 
 LongBench_dureader_datasets = [
     dict(
         type=LongBenchdureaderDataset,
         abbr='LongBench_dureader',
-        path='THUDM/LongBench',
+        path='opencompass/Longbench',
         name='dureader',
         reader_cfg=LongBench_dureader_reader_cfg,
         infer_cfg=LongBench_dureader_infer_cfg,
-        eval_cfg=LongBench_dureader_eval_cfg)
+        eval_cfg=LongBench_dureader_eval_cfg,
+    )
 ]
diff --git a/configs/datasets/longbench/longbenchgov_report/longbench_gov_report_gen_54c5b0.py b/configs/datasets/longbench/longbenchgov_report/longbench_gov_report_gen_54c5b0.py
@@ -7,32 +7,37 @@
     input_columns=['context'],
     output_column='answers',
     train_split='test',
-    test_split='test'
+    test_split='test',
 )
 
 LongBench_gov_report_infer_cfg = dict(
     prompt_template=dict(
         type=PromptTemplate,
         template=dict(
             round=[
-                dict(role='HUMAN', prompt='You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:'),
-            ], )),
+                dict(
+                    role='HUMAN',
+                    prompt='You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:',
+                ),
+            ],
+        ),
+    ),
     retriever=dict(type=ZeroRetriever),
-    inferencer=dict(type=GenInferencer, max_out_len=512)
+    inferencer=dict(type=GenInferencer, max_out_len=512),
 )
 
 LongBench_gov_report_eval_cfg = dict(
-    evaluator=dict(type=LongBenchRougeEvaluator),
-    pred_role='BOT'
+    evaluator=dict(type=LongBenchRougeEvaluator), pred_role='BOT'
 )
 
 LongBench_gov_report_datasets = [
     dict(
         type=LongBenchgov_reportDataset,
         abbr='LongBench_gov_report',
-        path='THUDM/LongBench',
+        path='opencompass/Longbench',
         name='gov_report',
         reader_cfg=LongBench_gov_report_reader_cfg,
         infer_cfg=LongBench_gov_report_infer_cfg,
-        eval_cfg=LongBench_gov_report_eval_cfg)
+        eval_cfg=LongBench_gov_report_eval_cfg,
+    )
 ]
diff --git a/configs/datasets/longbench/longbenchhotpotqa/longbench_hotpotqa_gen_6b3efc.py b/configs/datasets/longbench/longbenchhotpotqa/longbench_hotpotqa_gen_6b3efc.py
@@ -7,32 +7,37 @@
     input_columns=['context', 'input'],
     output_column='answers',
     train_split='test',
-    test_split='test'
+    test_split='test',
 )
 
 LongBench_hotpotqa_infer_cfg = dict(
     prompt_template=dict(
         type=PromptTemplate,
         template=dict(
             round=[
-                dict(role='HUMAN', prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:'),
-            ], )),
+                dict(
+                    role='HUMAN',
+                    prompt='Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                ),
+            ],
+        ),
+    ),
     retriever=dict(type=ZeroRetriever),
-    inferencer=dict(type=GenInferencer, max_out_len=32)
+    inferencer=dict(type=GenInferencer, max_out_len=32),
 )
 
 LongBench_hotpotqa_eval_cfg = dict(
-    evaluator=dict(type=LongBenchF1Evaluator),
-    pred_role='BOT'
+    evaluator=dict(type=LongBenchF1Evaluator), pred_role='BOT'
 )
 
 LongBench_hotpotqa_datasets = [
     dict(
         type=LongBenchhotpotqaDataset,
         abbr='LongBench_hotpotqa',
-        path='THUDM/LongBench',
+        path='opencompass/Longbench',
         name='hotpotqa',
         reader_cfg=LongBench_hotpotqa_reader_cfg,
         infer_cfg=LongBench_hotpotqa_infer_cfg,
-        eval_cfg=LongBench_hotpotqa_eval_cfg)
+        eval_cfg=LongBench_hotpotqa_eval_cfg,
+    )
 ]
diff --git a/configs/datasets/longbench/longbenchlcc/longbench_lcc_gen_6ba507.py b/configs/datasets/longbench/longbenchlcc/longbench_lcc_gen_6ba507.py
@@ -7,32 +7,37 @@
     input_columns=['context'],
     output_column='answers',
     train_split='test',
-    test_split='test'
+    test_split='test',
 )
 
 LongBench_lcc_infer_cfg = dict(
     prompt_template=dict(
         type=PromptTemplate,
         template=dict(
             round=[
-                dict(role='HUMAN', prompt='Please complete the code given below. \n{context}Next line of code:\n'),
-            ], )),
+                dict(
+                    role='HUMAN',
+                    prompt='Please complete the code given below. \n{context}Next line of code:\n',
+                ),
+            ],
+        ),
+    ),
     retriever=dict(type=ZeroRetriever),
-    inferencer=dict(type=GenInferencer, max_out_len=64)
+    inferencer=dict(type=GenInferencer, max_out_len=64),
 )
 
 LongBench_lcc_eval_cfg = dict(
-    evaluator=dict(type=LongBenchCodeSimEvaluator),
-    pred_role='BOT'
+    evaluator=dict(type=LongBenchCodeSimEvaluator), pred_role='BOT'
 )
 
 LongBench_lcc_datasets = [
     dict(
         type=LongBenchlccDataset,
         abbr='LongBench_lcc',
-        path='THUDM/LongBench',
+        path='opencompass/Longbench',
         name='lcc',
         reader_cfg=LongBench_lcc_reader_cfg,
         infer_cfg=LongBench_lcc_infer_cfg,
-        eval_cfg=LongBench_lcc_eval_cfg)
+        eval_cfg=LongBench_lcc_eval_cfg,
+    )
 ]
diff --git a/configs/datasets/longbench/longbenchlsht/longbench_lsht_gen_e8a339.py b/configs/datasets/longbench/longbenchlsht/longbench_lsht_gen_e8a339.py
@@ -1,24 +1,33 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import LongBenchClassificationEvaluator, LongBenchlshtDataset, lsht_postprocess
+from opencompass.datasets import (
+    LongBenchClassificationEvaluator,
+    LongBenchlshtDataset,
+    lsht_postprocess,
+)
 
 LongBench_lsht_reader_cfg = dict(
     input_columns=['context', 'input'],
     output_column='all_labels',
     train_split='test',
-    test_split='test'
+    test_split='test',
 )
 
 LongBench_lsht_infer_cfg = dict(
     prompt_template=dict(
         type=PromptTemplate,
         template=dict(
             round=[
-                dict(role='HUMAN', prompt='请判断给定新闻的类别，下面是一些例子。\n\n{context}\n{input}'),
-            ], )),
+                dict(
+                    role='HUMAN',
+                    prompt='请判断给定新闻的类别，下面是一些例子。\n\n{context}\n{input}',
+                ),
+            ],
+        ),
+    ),
     retriever=dict(type=ZeroRetriever),
-    inferencer=dict(type=GenInferencer, max_out_len=64)
+    inferencer=dict(type=GenInferencer, max_out_len=64),
 )
 
 LongBench_lsht_eval_cfg = dict(
@@ -31,9 +40,10 @@
     dict(
         type=LongBenchlshtDataset,
         abbr='LongBench_lsht',
-        path='THUDM/LongBench',
+        path='opencompass/Longbench',
         name='lsht',
         reader_cfg=LongBench_lsht_reader_cfg,
         infer_cfg=LongBench_lsht_infer_cfg,
-        eval_cfg=LongBench_lsht_eval_cfg)
+        eval_cfg=LongBench_lsht_eval_cfg,
+    )
 ]
diff --git a/configs/datasets/longbench/longbenchmulti_news/longbench_multi_news_gen_6f9da9.py b/configs/datasets/longbench/longbenchmulti_news/longbench_multi_news_gen_6f9da9.py
@@ -7,32 +7,37 @@
     input_columns=['context'],
     output_column='answers',
     train_split='test',
-    test_split='test'
+    test_split='test',
 )
 
 LongBench_multi_news_infer_cfg = dict(
     prompt_template=dict(
         type=PromptTemplate,
         template=dict(
             round=[
-                dict(role='HUMAN', prompt='You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n'),
-            ], )),
+                dict(
+                    role='HUMAN',
+                    prompt='You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n',
+                ),
+            ],
+        ),
+    ),
     retriever=dict(type=ZeroRetriever),
-    inferencer=dict(type=GenInferencer, max_out_len=512)
+    inferencer=dict(type=GenInferencer, max_out_len=512),
 )
 
 LongBench_multi_news_eval_cfg = dict(
-    evaluator=dict(type=LongBenchRougeEvaluator),
-    pred_role='BOT'
+    evaluator=dict(type=LongBenchRougeEvaluator), pred_role='BOT'
 )
 
 LongBench_multi_news_datasets = [
     dict(
         type=LongBenchmulti_newsDataset,
         abbr='LongBench_multi_news',
-        path='THUDM/LongBench',
+        path='opencompass/Longbench',
         name='multi_news',
         reader_cfg=LongBench_multi_news_reader_cfg,
         infer_cfg=LongBench_multi_news_infer_cfg,
-        eval_cfg=LongBench_multi_news_eval_cfg)
+        eval_cfg=LongBench_multi_news_eval_cfg,
+    )
 ]