-
Notifications
You must be signed in to change notification settings - Fork 32
/
compute.py
393 lines (339 loc) · 17.2 KB
/
compute.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
import os
from .helpers import package_up_vars as _package_up_vars
from .models import WorkerNodesCounter
import json
import requests
import taxcalc
from requests.exceptions import Timeout, RequestException
from .helpers import arrange_totals_by_row
from ..constants import START_YEAR
import requests_mock
requests_mock.Mocker.TEST_PREFIX = 'dropq'
dqversion_info = taxcalc._version.get_versions()
dropq_version = dqversion_info['version']
NUM_BUDGET_YEARS = int(os.environ.get('NUM_BUDGET_YEARS', 10))
NUM_BUDGET_YEARS_QUICK = int(os.environ.get('NUM_BUDGET_YEARS_QUICK', 1))
#Hard fail on lack of dropq workers
dropq_workers = os.environ.get('DROPQ_WORKERS', '')
DROPQ_WORKERS = dropq_workers.split(",")
DROPQ_URL = "/dropq_start_job"
# URL to perform the dropq algorithm on a sample of the full dataset
DROPQ_SMALL_URL = "/dropq_small_start_job"
ENFORCE_REMOTE_VERSION_CHECK = os.environ.get('ENFORCE_VERSION', 'False') == 'True'
TIMEOUT_IN_SECONDS = 1.0
MAX_ATTEMPTS_SUBMIT_JOB = 20
TAXCALC_RESULTS_TOTAL_ROW_KEYS = taxcalc.dropq.TOTAL_ROW_NAMES
ELASTIC_RESULTS_TOTAL_ROW_KEYS = ["gdp_elasticity"]
class JobFailError(Exception):
'''An Exception to raise when a remote jobs has failed'''
pass
class DropqCompute(object):
num_budget_years = NUM_BUDGET_YEARS
def package_up_vars(self, *args, **kwargs):
return _package_up_vars(*args, **kwargs)
def remote_submit_job(self, theurl, data, timeout=TIMEOUT_IN_SECONDS):
response = requests.post(theurl, data=data, timeout=timeout)
return response
def remote_results_ready(self, theurl, params):
job_response = requests.get(theurl, params=params)
return job_response
def remote_retrieve_results(self, theurl, params):
job_response = requests.get(theurl, params=params)
return job_response
def submit_json_dropq_calculation(self, user_mods, first_budget_year, additional_data=None):
url_template = "http://{hn}" + DROPQ_URL
return self.submit_calculation(user_mods, first_budget_year, url_template,
num_years=NUM_BUDGET_YEARS,
pack_up_user_mods=False,
additional_data=additional_data)
def submit_dropq_calculation(self, user_mods, first_budget_year, additional_data={}, is_file=False,
pack_up_user_mods=True):
url_template = "http://{hn}" + DROPQ_URL
return self.submit_calculation(user_mods, first_budget_year, url_template,
num_years=NUM_BUDGET_YEARS,
additional_data=additional_data,
pack_up_user_mods=pack_up_user_mods)
def submit_json_dropq_small_calculation(self, user_mods, first_budget_year):
url_template = "http://{hn}" + DROPQ_SMALL_URL
return self.submit_calculation(user_mods, first_budget_year, url_template,
num_years=NUM_BUDGET_YEARS_QUICK,
increment_counter=False,
pack_up_user_mods=False)
def submit_dropq_small_calculation(self, user_mods, first_budget_year, additional_data={}, is_file=False,
pack_up_user_mods=True):
url_template = "http://{hn}" + DROPQ_SMALL_URL
return self.submit_calculation(user_mods, first_budget_year, url_template,
num_years=NUM_BUDGET_YEARS_QUICK,
additional_data=additional_data,
increment_counter=False,
pack_up_user_mods=pack_up_user_mods)
def submit_elastic_calculation(self, user_mods, first_budget_year, is_file=False, additional_data={},
pack_up_user_mods=True):
url_template = "http://{hn}/elastic_gdp_start_job"
return self.submit_calculation(user_mods, first_budget_year, url_template,
start_budget_year=1,
additional_data=additional_data,
pack_up_user_mods=pack_up_user_mods)
def submit_calculation(self, user_mods, first_budget_year, url_template,
start_budget_year=0, num_years=NUM_BUDGET_YEARS,
workers=DROPQ_WORKERS,
increment_counter=True,
use_wnc_offset=True,
pack_up_user_mods=True,
additional_data={}):
if pack_up_user_mods:
user_mods = self.package_up_vars(user_mods, first_budget_year)
if not bool(user_mods):
return False
user_mods = {first_budget_year: user_mods}
data = {}
years = self._get_years(start_budget_year, num_years, first_budget_year)
if use_wnc_offset:
wnc, created = WorkerNodesCounter.objects.get_or_create(singleton_enforce=1)
dropq_worker_offset = wnc.current_offset
if dropq_worker_offset > len(workers):
dropq_worker_offset = 0
if increment_counter:
wnc.current_offset = (dropq_worker_offset + num_years) % len(DROPQ_WORKERS)
wnc.save()
else:
dropq_worker_offset = 0
hostnames = workers[dropq_worker_offset: dropq_worker_offset + num_years]
print "hostnames: ", hostnames
num_hosts = len(hostnames)
data["user_mods"] = json.dumps(user_mods)
data["first_budget_year"] = str(first_budget_year)
if additional_data:
if "behavior" in additional_data.keys():
data["behavior_params"] = json.dumps(additional_data)
else:
data[additional_data.keys()[0]] = json.dumps(additional_data)
job_ids = []
hostname_idx = 0
max_queue_length = 0
for y in years:
year_submitted = False
attempts = 0
while not year_submitted:
data['year'] = str(y)
theurl = url_template.format(hn=hostnames[hostname_idx])
try:
response = self.remote_submit_job(theurl, data=data, timeout=TIMEOUT_IN_SECONDS)
if response.status_code == 200:
print "submitted: ", hostnames[hostname_idx]
year_submitted = True
response_d = response.json()
job_ids.append((response_d['job_id'], hostnames[hostname_idx]))
hostname_idx = (hostname_idx + 1) % num_hosts
if response_d['qlength'] > max_queue_length:
max_queue_length = response_d['qlength']
else:
print "FAILED: ", str(y), hostnames[hostname_idx]
hostname_idx = (hostname_idx + 1) % num_hosts
attempts += 1
except Timeout:
print "Couldn't submit to: ", hostnames[hostname_idx]
hostname_idx = (hostname_idx + 1) % num_hosts
attempts += 1
except RequestException as re:
print "Something unexpected happened: ", re
hostname_idx = (hostname_idx + 1) % num_hosts
attempts += 1
if attempts > MAX_ATTEMPTS_SUBMIT_JOB:
print "Exceeded max attempts. Bailing out."
raise IOError()
return job_ids, max_queue_length
def _get_years(self, start_budget_year, num_years, first_budget_year):
if start_budget_year is not None:
return list(range(start_budget_year, num_years))
# The following is just a dummy year for btax
# Btax is not currently running in separate years, I don't think.
return [first_budget_year]
def dropq_results_ready(self, job_ids):
jobs_done = [None] * len(job_ids)
for idx, id_hostname in enumerate(job_ids):
id_, hostname = id_hostname
result_url = "http://{hn}/dropq_query_result".format(hn=hostname)
job_response = self.remote_results_ready(result_url, params={'job_id':id_})
msg = '{0} failed on host: {1}'.format(id_, hostname)
if job_response.status_code == 200: # Valid response
rep = job_response.text
jobs_done[idx] = rep
else:
print 'did not expect response with status_code', job_response.status_code
raise JobFailError(msg)
return jobs_done
def _get_results_base(self, job_ids, job_failure=False):
ans = []
for idx, id_hostname in enumerate(job_ids):
id_, hostname = id_hostname
result_url = "http://{hn}/dropq_get_result".format(hn=hostname)
job_response = self.remote_retrieve_results(result_url, params={'job_id':id_})
if job_response.status_code == 200: # Valid response
try:
if job_failure:
ans.append(job_response.text)
else:
ans.append(job_response.json())
except ValueError:
# Got back a bad response. Get the text and re-raise
msg = 'PROBLEM WITH RESPONSE. TEXT RECEIVED: {}'
raise ValueError(msg)
return ans
def dropq_get_results(self, job_ids, job_failure=False):
if job_failure:
return self._get_results_base(job_ids, job_failure=job_failure)
ans = self._get_results_base(job_ids, job_failure=job_failure)
mY_dec = {}
mX_dec = {}
df_dec = {}
pdf_dec = {}
cdf_dec = {}
mY_bin = {}
mX_bin = {}
df_bin = {}
pdf_bin = {}
cdf_bin = {}
fiscal_tot_diffs = {}
fiscal_tot_base = {}
fiscal_tot_ref = {}
for result in ans:
mY_dec.update(result['mY_dec'])
mX_dec.update(result['mX_dec'])
df_dec.update(result['df_dec'])
pdf_dec.update(result['pdf_dec'])
cdf_dec.update(result['cdf_dec'])
mY_bin.update(result['mY_bin'])
mX_bin.update(result['mX_bin'])
df_bin.update(result['df_bin'])
pdf_bin.update(result['pdf_bin'])
cdf_bin.update(result['cdf_bin'])
fiscal_tot_diffs.update(result['fiscal_tot_diffs'])
fiscal_tot_base.update(result['fiscal_tot_base'])
fiscal_tot_ref.update(result['fiscal_tot_ref'])
if ENFORCE_REMOTE_VERSION_CHECK:
versions = [r.get('taxcalc_version', None) for r in ans]
if not all([ver==taxcalc_version for ver in versions]):
msg ="Got different taxcalc versions from workers. Bailing out"
print msg
raise IOError(msg)
versions = [r.get('dropq_version', None) for r in ans]
if not all([same_version(ver, dropq_version) for ver in versions]):
msg ="Got different dropq versions from workers. Bailing out"
print msg
raise IOError(msg)
fiscal_tot_diffs = arrange_totals_by_row(fiscal_tot_diffs,
TAXCALC_RESULTS_TOTAL_ROW_KEYS)
fiscal_tot_base = arrange_totals_by_row(fiscal_tot_base,
TAXCALC_RESULTS_TOTAL_ROW_KEYS)
fiscal_tot_ref = arrange_totals_by_row(fiscal_tot_ref,
TAXCALC_RESULTS_TOTAL_ROW_KEYS)
results = {'mY_dec': mY_dec, 'mX_dec': mX_dec, 'df_dec': df_dec,
'pdf_dec': pdf_dec, 'cdf_dec': cdf_dec, 'mY_bin': mY_bin,
'mX_bin': mX_bin, 'df_bin': df_bin, 'pdf_bin': pdf_bin,
'cdf_bin': cdf_bin, 'fiscal_tot_diffs': fiscal_tot_diffs,
'fiscal_tot_base': fiscal_tot_base,
'fiscal_tot_ref': fiscal_tot_ref}
return results
def elastic_get_results(self, job_ids):
ans = []
for idx, id_hostname in enumerate(job_ids):
id_, hostname = id_hostname
result_url = "http://{hn}/dropq_get_result".format(hn=hostname)
job_response = self.remote_retrieve_results(result_url, params={'job_id':id_})
if job_response.status_code == 200: # Valid response
ans.append(job_response.json())
elasticity_gdp = {}
for result in ans:
elasticity_gdp.update(result['elasticity_gdp'])
if ENFORCE_REMOTE_VERSION_CHECK:
versions = [r.get('taxcalc_version', None) for r in ans]
if not all([ver==taxcalc_version for ver in versions]):
msg ="Got different taxcalc versions from workers. Bailing out"
print msg
raise IOError(msg)
versions = [r.get('dropq_version', None) for r in ans]
if not all([same_version(ver, dropq_version) for ver in versions]):
msg ="Got different dropq versions from workers. Bailing out"
print msg
raise IOError(msg)
elasticity_gdp[u'gdp_elasticity_0'] = u'NA'
elasticity_gdp = arrange_totals_by_row(elasticity_gdp,
ELASTIC_RESULTS_TOTAL_ROW_KEYS)
results = {'elasticity_gdp': elasticity_gdp}
return results
class MockCompute(DropqCompute):
num_budget_years = NUM_BUDGET_YEARS
__slots__ = ('count', 'num_times_to_wait', 'last_posted')
def __init__(self, num_times_to_wait=0):
self.count = 0
# Number of times to respond 'No' before
# replying that a job is ready
self.num_times_to_wait = num_times_to_wait
def remote_submit_job(self, theurl, data, timeout):
with requests_mock.Mocker() as mock:
resp = {'job_id': '424242', 'qlength':2}
resp = json.dumps(resp)
mock.register_uri('POST', DROPQ_URL, text=resp)
mock.register_uri('POST', DROPQ_SMALL_URL, text=resp)
mock.register_uri('POST', '/elastic_gdp_start_job', text=resp)
mock.register_uri('POST', '/btax_start_job', text=resp)
self.last_posted = data
return DropqCompute.remote_submit_job(self, theurl, data, timeout)
def remote_results_ready(self, theurl, params):
with requests_mock.Mocker() as mock:
if self.num_times_to_wait > 0:
mock.register_uri('GET', '/dropq_query_result', text='NO')
self.num_times_to_wait -= 1
else:
mock.register_uri('GET', '/dropq_query_result', text='YES')
return DropqCompute.remote_results_ready(self, theurl, params)
def remote_retrieve_results(self, theurl, params):
mock_path = os.path.join(os.path.split(__file__)[0], "tests",
"response_year_{0}.json")
with open(mock_path.format(self.count), 'r') as f:
text = f.read()
self.count += 1
with requests_mock.Mocker() as mock:
mock.register_uri('GET', '/dropq_get_result', text=text)
return DropqCompute.remote_retrieve_results(self, theurl, params)
class ElasticMockCompute(MockCompute):
def remote_retrieve_results(self, theurl, params):
self.count += 1
text = (u'{"elasticity_gdp": {"gdp_elasticity_1": "0.00310"}, '
'"dropq_version": "0.6.a96303", "taxcalc_version": '
'"0.6.10d462"}')
with requests_mock.Mocker() as mock:
mock.register_uri('GET', '/dropq_get_result', text=text)
return DropqCompute.remote_retrieve_results(self, theurl, params)
class MockFailedCompute(MockCompute):
def remote_results_ready(self, theurl, params):
print 'MockFailedCompute remote_results_ready', theurl, params
with requests_mock.Mocker() as mock:
mock.register_uri('GET', '/dropq_query_result', text='FAIL')
return DropqCompute.remote_results_ready(self, theurl, params)
class NodeDownCompute(MockCompute):
__slots__ = ('count', 'num_times_to_wait', 'switch')
def __init__(self, **kwargs):
if 'switch' in kwargs:
self.switch = kwargs['switch']
del kwargs['switch']
else:
self.switch = 0
self.count = 0
self.num_times_to_wait = 0
super(MockCompute, self).__init__(**kwargs)
def remote_submit_job(self, theurl, data, timeout):
with requests_mock.Mocker() as mock:
resp = {'job_id': '424242', 'qlength':2}
resp = json.dumps(resp)
if (self.switch % 2 == 0):
mock.register_uri('POST', DROPQ_URL, status_code=502)
mock.register_uri('POST', '/elastic_gdp_start_job', status_code=502)
mock.register_uri('POST', '/btax_start_job', status_code=502)
else:
mock.register_uri('POST', DROPQ_URL, text=resp)
mock.register_uri('POST', '/elastic_gdp_start_job', text=resp)
mock.register_uri('POST', '/btax_start_job', text=resp)
self.switch += 1
return DropqCompute.remote_submit_job(self, theurl, data, timeout)