-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
98 lines (81 loc) · 2.46 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import pprint
def last_boxed_only(sample):
"""
Given a (q,a) sample, filter the answers so that they only contain
the last \boxed{...} or \fbox{...} element
"""
q, a = sample
a = last_boxed_only_string(a)
if a == None:
return None
return (q, a)
def last_boxed_only_string(string):
idx = string.rfind("\\boxed")
if idx < 0:
idx = string.rfind("\\fbox")
if idx < 0:
return None
i = idx
right_brace_idx = None
num_left_braces_open = 0
while i < len(string):
if string[i] == "{":
num_left_braces_open += 1
if string[i] == "}":
num_left_braces_open -= 1
if num_left_braces_open == 0:
right_brace_idx = i
break
i += 1
if right_brace_idx == None:
retval = None
else:
retval = string[idx:right_brace_idx + 1]
return retval
def only_until_first_boxed_from_tokens(string, tokens):
idx = string.find("\\boxed")
if idx < 0:
idx = string.find("\\fbox")
if idx < 0:
return None
cum_length = 0
for i, t in enumerate(tokens):
cum_length += len(t)
if cum_length >= idx:
break
return tokens[:i]
def clean_numbers(sample):
if not sample:
return None
new_sample = list()
for s in sample:
new_sample.append(_clean_numbers(s))
return tuple(new_sample)
def _clean_numbers(string):
"""
Clean Numbers in the given string
>>> _clean_numbers(None, "Hello 123")
'Hello 123'
>>> _clean_numbers(None, "Hello 1234")
'Hello 1,234'
>>> _clean_numbers(None, "Hello 1234324asdasd")
'Hello 1,234,324asdasd'
"""
num_prev_digits = 0
new_string = ""
for i, c in enumerate(string):
# isdigit() doesnt work here because of weird unicode chars.
if c in {'1', '2', '3', '4', '5', '6', '7', '8', '9', '0'}:
num_prev_digits += 1
else:
if num_prev_digits > 3:
# Some fixing
string_number = new_string[-num_prev_digits:]
new_string = new_string[:-num_prev_digits] + "{0:,}".format(int(string_number))
num_prev_digits = 0
new_string += c
if num_prev_digits > 3:
# Some fixing
string_number = new_string[-num_prev_digits:]
new_string = new_string[:-num_prev_digits] + "{0:,}".format(int(string_number))
return new_string