-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluate_flan.py
63 lines (50 loc) · 2.5 KB
/
evaluate_flan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import argparse
from pathlib import Path
from typing import Callable
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, T5ForConditionalGeneration, T5TokenizerFast
from mmlu.dataset import CHOICES
from mmlu.evaluation import predict_dataset, evaluate_results
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
class FlanPredictor(Callable[[str], str]):
def __init__(self, model: T5ForConditionalGeneration, tokenizer: T5TokenizerFast) -> None:
self._model = model
self._tokenizer = tokenizer
self._model.eval()
self._choice_tokens = [self._tokenizer(c).input_ids[0] for c in CHOICES]
self._choice_tokens = torch.tensor(self._choice_tokens).long()
def __call__(self, prompt: str) -> str:
input_ids = self._tokenizer(prompt, return_tensors='pt').input_ids.to(device)
decoder_input_ids = self._tokenizer('', return_tensors='pt').input_ids.to(device)
decoder_input_ids = self._model._shift_right(decoder_input_ids)
with torch.no_grad():
logits = self._model(
input_ids=input_ids, decoder_input_ids=decoder_input_ids
).logits.flatten().cpu()
logits = logits[self._choice_tokens]
pred_index = torch.argmax(logits)
pred = CHOICES[int(pred_index)]
return pred
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str, default='data')
parser.add_argument('--result_dir', type=str, default='results/flan_small')
parser.add_argument('--k_shot', type=int, default=0, help='The number of few-shot examples in the prompt.')
parser.add_argument('--engine', type=str, default='google/flan-t5-small')
args = parser.parse_args()
print(args)
model = AutoModelForSeq2SeqLM.from_pretrained(args.engine).to(device)
tokenizer = AutoTokenizer.from_pretrained(args.engine)
predict_function = FlanPredictor(model, tokenizer)
def token_counter(prompt: str) -> int:
return tokenizer(prompt, return_tensors='pt').input_ids.shape[-1]
predict_dataset(data_dir=Path(args.data_dir),
result_dir=Path(args.result_dir),
predict_function=predict_function,
k_shot=args.k_shot,
n_workers=0,
timeout_s=0,
retries=0,
token_counter=token_counter,
max_tokens=2048)
evaluate_results(result_dir=Path(args.result_dir))