forked from princeton-vl/CoqGym
-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_synthetic_proofs.py
228 lines (194 loc) · 8.76 KB
/
extract_synthetic_proofs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
import os
import json
from glob import glob
from utils import get_code, SexpCache, set_paths, extract_code, dst_filename
from serapi import SerAPI
from time import time
import sexpdata
from proof_tree import ProofTree
from extract_proof import goal_is_prop, check_topology
import pdb
def close_proof(sexp_cache, serapi):
fg_goals, bg_goals, shelved_goals, given_up_goals = serapi.query_goals()
assert bg_goals + shelved_goals == []
if fg_goals == []:
return []
num_goals_left = len(fg_goals)
cmds = []
for i in range(num_goals_left):
_, ast = serapi.execute('auto.')
cmds.append(('auto.', 'VernacExtend', sexp_cache.dump(ast)))
fg_goals, bg_goals, shelved_goals, given_up_goals = serapi.query_goals()
if fg_goals == []:
return cmds
return None
def subgoals2hypotheses(script, serapi):
'Execute `script` and convert all unsolved new sub-goals into hypotheses in the context of the current goal'
serapi.push()
fg_goals, bg_goals, shelved_goals, given_up_goals = serapi.query_goals()
assert shelved_goals + given_up_goals == []
assert len(fg_goals) == 1
current_goal_id = fg_goals[0]['id']
existing_goal_ids = set([g['id'] for g in fg_goals + bg_goals])
local_context = []
for hyp in fg_goals[0]['hypotheses']:
for ident in hyp['idents'][::-1]:
local_context.append((ident, hyp['type']))
local_context = local_context[::-1]
for cmd, _, _ in script:
serapi.execute(cmd)
fg_goals, bg_goals, shelved_goals, given_up_goals = serapi.query_goals()
assert shelved_goals + given_up_goals == []
unsolved_goal_ids = set([g['id'] for g in fg_goals + bg_goals])
if bg_goals != [] or not existing_goal_ids.issubset(unsolved_goal_ids.union({current_goal_id})):
# consider only the case when there is no unfocused goal left.
# and no existing goal other than the current_goal was decomposed (the sub-tree of the current goal has insufficient height)
serapi.pull()
return None
hypotheses = {}
# convert the focused goals
for n, g in enumerate(fg_goals, start=1):
serapi.push()
serapi.execute('Focus %d.' % n)
local_context_g = []
for hyp in g['hypotheses']:
for ident in hyp['idents'][::-1]:
local_context_g.append((ident, hyp['type']))
local_context_g = local_context_g[::-1]
local_context_diff = [p for p in local_context_g if p not in local_context]
for ident, _ in local_context_diff:
serapi.execute('generalize %s.' % ident)
fg_goals, _, _, _ = serapi.query_goals()
assert len(fg_goals) == 1
hypotheses[g['id']] = fg_goals[0]['type']
serapi.pop()
serapi.pop()
return hypotheses
def set_up_hypotheses(hyps, sexp_cache, serapi):
'Set up the hypotheses converted from the subgoals'
cmds = []
for goal_id, hyp in hyps.items():
H = 'assert (HYPOTHESIS_FROM_SUBGOAL_%d: %s); [> admit | idtac].' % (goal_id, hyp)
_, ast = serapi.execute(H, return_ast=True)
cmds.append((H, 'VernacExtend', sexp_cache.dump(ast)))
return cmds
def goal2subproof(goal, length, line_nb, script, sexp_cache, serapi):
'Extract a synthetic proof a fixed length from a goal'
hypotheses = subgoals2hypotheses(script, serapi)
if hypotheses is None:
return None
cmds = set_up_hypotheses(hypotheses, sexp_cache, serapi)
subproof_data = {
'goal_id': goal['id'],
'length': length,
'line_nb': line_nb,
'entry_cmds': cmds,
'steps': [],
'goals': {},
'proof_tree': None,
}
# execute the proof
for num_executed, (cmd, cmd_type, _) in enumerate(script, start=line_nb + 1):
# keep track of the goals
fg_goals, bg_goals, shelved_goals, given_up_goals = serapi.query_goals()
for g in fg_goals + bg_goals:
g['sexp'] = sexp_cache.dump(g['sexp'])
for i, h in enumerate(g['hypotheses']):
g['hypotheses'][i]['sexp'] = sexp_cache.dump(g['hypotheses'][i]['sexp'])
subproof_data['goals'][g['id']] = g
ast = sexpdata.dumps(serapi.query_ast(cmd))
subproof_data['steps'].append({'command': (cmd, cmd_type, sexp_cache.dump(ast)),
'goal_ids': {'fg': [g['id'] for g in fg_goals],
'bg': [g['id'] for g in bg_goals]}
})
# the proof ends
if cmd_type == 'VernacEndProof':
return None # there is no such proof
# run the tactic
if args.debug:
print('SUBPROOF-id%d-len%d %d: %s' % (goal['id'], length, num_executed, cmd))
serapi.execute(cmd)
if num_executed == line_nb + length: # now, try to close the proof
exit_cmds = close_proof(sexp_cache, serapi)
if exit_cmds is None:
print('failed')
return None
print('succeeded')
subproof_data['exit_cmds'] = exit_cmds
break
assert check_topology(subproof_data['steps'])
subproof_data['proof_tree'] = ProofTree(subproof_data['steps'], subproof_data['goals']).to_dict()
return subproof_data
def record_subproofs(line_nb, script, sexp_cache, serapi):
'For a human-written proof, extract all synthetic proofs from all goals'
assert script[-1][1] == 'VernacEndProof'
subproofs_data = []
# execute the proof
for num_executed, (cmd, cmd_type, _) in enumerate(script, start=line_nb + 1):
# keep track of the goals
fg_goals, bg_goals, shelved_goals, given_up_goals = serapi.query_goals()
if fg_goals != []:
g = fg_goals[0]
if g['id'] in subproofs_data:
continue
if not goal_is_prop(g, serapi):
continue
serapi.push()
serapi.execute('Focus 1.')
for i in range(1, min(args.max_length, len(script) - 2) + 1):
serapi.push()
subprf = goal2subproof(g, i, num_executed - 1, script[num_executed - line_nb - 1 : num_executed - line_nb - 1 + i],
sexp_cache, serapi)
if subprf is not None:
subproofs_data.append(subprf)
serapi.pop()
serapi.pop()
# run the tactic
if args.debug:
print('PROOF %d: %s' % (num_executed, cmd))
serapi.execute(cmd)
return subproofs_data
def get_subproofs(human_proof_file, vernac_cmds, sexp_cache, args):
prf = json.load(open(human_proof_file))
line_nb = prf['line_nb']
with SerAPI(args.timeout, args.debug) as serapi:
for cmd, _, _ in vernac_cmds[:line_nb + 1]:
serapi.execute(cmd)
subproofs_data = record_subproofs(line_nb, vernac_cmds[line_nb + 1 : line_nb + 1 + len(prf['steps'])],
sexp_cache, serapi)
return subproofs_data
def dump(subproofs_data, args):
print('%d synthetic proofs extracted' % len(subproofs_data))
if subproofs_data == []:
return
for i, subprf in enumerate(subproofs_data):
subproofs_data[i]['name'] = args.proof
dirname = dst_filename(args.file, args.data_path) + '-SUBPROOFS/'
json.dump(subproofs_data, open(os.path.join(dirname, args.proof + '.json'), 'wt'))
if __name__ == '__main__':
import sys
sys.setrecursionlimit(100000)
import argparse
arg_parser = argparse.ArgumentParser(description='Extract the synthetic proofs from intermediate goals')
arg_parser.add_argument('--debug', action='store_true')
arg_parser.add_argument('--file', type=str, help='The meta file to process')
arg_parser.add_argument('--proof', type=str, help='The proof to extract')
arg_parser.add_argument('--max_length', type=int, default=4, help='The maximum length for synthetic proofs')
arg_parser.add_argument('--timeout', type=int, default=3600, help='Timeout for SerAPI')
arg_parser.add_argument('--data_path', type=str, default='./data')
args = arg_parser.parse_args()
print(args)
human_proof_file = dst_filename(args.file, args.data_path) + '-PROOFS/' + args.proof + '.json'
if not os.path.exists(human_proof_file):
print('%s does not exist. Exiting..' % human_proof_file)
sys.exit(0)
dirname = dst_filename(args.file, args.data_path) + '-SUBPROOFS/'
try:
os.makedirs(dirname)
except os.error:
pass
db_path = os.path.join(dirname, args.proof + '_sexp_cache')
sexp_cache = SexpCache(db_path)
file_data = json.load(open(dst_filename(args.file, args.data_path) + '.json'))
subproofs_data = get_subproofs(human_proof_file, file_data['vernac_cmds'], sexp_cache, args)
dump(subproofs_data, args)