diff --git a/scripts/auto_eval.py b/scripts/auto_eval.py new file mode 100644 index 000000000..7d12efe84 --- /dev/null +++ b/scripts/auto_eval.py @@ -0,0 +1,37 @@ +from vlmeval.smp import * +import time +from datetime import datetime + +dataset = ['MME', 'SEEDBench_IMG', 'MMBench', 'CCBench', 'MMBench_CN'] +suffix = ['score.csv', 'acc.csv', 'acc.csv', 'acc.csv', 'acc.csv'] +script = ['mme_eval.py', 'multiple_choice.py', 'multiple_choice.py', 'multiple_choice.py', 'multiple_choice.py'] + +N = len(dataset) +assert N == len(suffix) == len(script) + +def now(): + return datetime.now().strftime("%m/%d/%Y, %H:%M:%S") + +cnt = 0 +while True: + fs = ls(mode='dir') + for f in fs: + files = ls(f, mode='file') + for i in range(N): + D = dataset[i] + suff = suffix[i] + scri = script[i] + pred_file = f'{f}/{f}_{D}.xlsx' + score_file = f'{f}/{f}_{D}_{suff}' + if osp.exists(pred_file) and not osp.exists(score_file): + cmd = f'python {scri} {pred_file} --verbose' + if D != 'MME': + cmd += f' --dataset {D}' + print(cmd) + time.sleep(1) + os.system(cmd) + time.sleep(5) + cnt += 1 + if cnt % 10 == 0: + print('Looping', now()) + time.sleep(30) \ No newline at end of file diff --git a/scripts/report_missing.py b/scripts/report_missing.py new file mode 100644 index 000000000..9b230842c --- /dev/null +++ b/scripts/report_missing.py @@ -0,0 +1,22 @@ +from vlmeval.smp import * +import time +from datetime import datetime + +dataset = ['MME', 'SEEDBench_IMG', 'MMBench', 'CCBench', 'MMBench_CN'] +suffix = ['score.csv', 'acc.csv', 'acc.csv', 'acc.csv', 'acc.csv'] +script = ['mme_eval.py', 'multiple_choice.py', 'multiple_choice.py', 'multiple_choice.py', 'multiple_choice.py'] + +N = len(dataset) +assert N == len(suffix) == len(script) + +fs = ls(mode='dir') +for f in fs: + files = ls(f, mode='file') + for i in range(N): + D = dataset[i] + suff = suffix[i] + scri = script[i] + pred_file = f'{f}/{f}_{D}.xlsx' + score_file = f'{f}/{f}_{D}_{suff}' + if osp.exists(pred_file) and not osp.exists(score_file): + print(f, D) \ No newline at end of file diff --git a/vlmeval/eval/multiple_choice.py b/vlmeval/eval/multiple_choice.py index 8abb53d97..f00d8fbde 100644 --- a/vlmeval/eval/multiple_choice.py +++ b/vlmeval/eval/multiple_choice.py @@ -322,7 +322,7 @@ def parse_args(): if __name__ == '__main__': args = parse_args() - assert args.dataset in ['MMBench', 'MMBench_CN', 'MMBench_DEV_EN', 'MMBench_DEV_CN', 'SEEDBench_IMG'] + assert args.dataset in ['MMBench', 'MMBench_CN', 'MMBench_DEV_EN', 'MMBench_DEV_CN', 'SEEDBench_IMG', 'CCBench'] suffix = args.data.split('.')[-1] log_pth = args.data.replace('.' + suffix, f'_{args.model}_eval.log') fout = open(log_pth, 'a') diff --git a/vlmeval/utils/data_util.py b/vlmeval/utils/data_util.py index ee7ce0c4f..ac8cfec18 100644 --- a/vlmeval/utils/data_util.py +++ b/vlmeval/utils/data_util.py @@ -28,7 +28,7 @@ } def DATASET_TYPE(dataset): - if 'mmbench' in dataset.lower() or 'seedbench' in dataset.lower(): + if 'mmbench' in dataset.lower() or 'seedbench' in dataset.lower() or 'ccbench' in dataset.lower(): return 'multi-choice' elif 'MME' in dataset: return 'Y/N'