-
Notifications
You must be signed in to change notification settings - Fork 2
/
app.py
339 lines (274 loc) · 11.2 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
# coding: utf-8
import logging
import sys
import os
import gzip
import requests
import simplejson as json
import functools
import warnings
import urllib.parse
from time import time
import numpy
from contextlib import contextmanager
from collections import OrderedDict
# from dozer import Dozer
import boto3
warnings.filterwarnings("ignore", category=UserWarning, module='psycopg2')
import psycopg2
import psycopg2.extras # needed though you wouldn't guess it
from psycopg2.pool import ThreadedConnectionPool
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from flask_compress import Compress
from flask_debugtoolbar import DebugToolbarExtension
from flask_jwt_extended import JWTManager
# from sqlalchemy.pool import NullPool
from util import elapsed
from util import HTTPMethodOverrideMiddleware
HEROKU_APP_NAME = "jump-api"
DEMO_PACKAGE_ID = "658349d9"
JISC_PACKAGE_ID = "package-3WkCDEZTqo6S"
JISC_INSTITUTION_ID = "institution-Afxc4mAYXoJH"
USE_PAPER_GROWTH = False
DATABASE_URL = os.getenv("DATABASE_URL_REDSHIFT_TEST") if os.getenv("TESTING_DB") else os.getenv("DATABASE_URL_REDSHIFT")
OPENALEX_API_KEY = os.getenv("OPENALEX_API_KEY")
# set up logging
# see http://wiki.pylonshq.com/display/pylonscookbook/Alternative+logging+configuration
logging.basicConfig(
stream=sys.stdout,
level=logging.DEBUG,
format='%(thread)d: %(message)s' #tried process but it was always "6" on heroku
)
logger = logging.getLogger("jump-api")
libraries_to_mum_warning = [
"requests",
"urllib3",
"requests.packages.urllib3",
"stripe",
"boto",
"boto3",
"botocore",
"s3transfer",
# "newrelic",
"RateLimiter",
"paramiko",
"chardet",
"cryptography",
"pyexcel",
"lml",
"pyexcel_io"
]
libraries_to_mum_error = [
"scipy",
"psycopg2",
"matplotlib",
"numpy"
]
for a_library in libraries_to_mum_warning:
the_logger = logging.getLogger(a_library)
the_logger.setLevel(logging.WARNING)
the_logger.propagate = True
warnings.filterwarnings("ignore", category=UserWarning, module=a_library)
for a_library in libraries_to_mum_error:
the_logger = logging.getLogger(a_library)
the_logger.setLevel(logging.ERROR)
the_logger.propagate = True
warnings.filterwarnings("ignore", category=UserWarning, module=a_library)
for name in list(logging.Logger.manager.loggerDict.keys()):
if ('boto' in name) or ('urllib3' in name) or ('s3transfer' in name) or ('boto3' in name) or ('botocore' in name):
logging.getLogger(name).setLevel(logging.ERROR)
with warnings.catch_warnings():
warnings.filterwarnings('ignore', r'RuntimeWarning: overflow encountered in exp')
numpy.seterr(over="ignore")
# disable extra warnings
requests.packages.urllib3.disable_warnings()
warnings.filterwarnings("ignore", category=DeprecationWarning)
app = Flask(__name__)
# memory profiling
# app.wsgi_app = Dozer(app.wsgi_app, profile_path='./dozer_profiles')
# authorization
app.config['JWT_SECRET_KEY'] = os.getenv('JWT_SECRET_KEY')
app.config['SECRET_KEY'] = os.getenv('JWT_SECRET_KEY')
app.config['JWT_ACCESS_TOKEN_EXPIRES'] = False # doesn't expire
app.config['JWT_REFRESH_TOKEN_EXPIRES'] = False # doesn't expire
app.config['JWT_TOKEN_LOCATION'] = ('headers', 'query_string')
jwt = JWTManager(app)
# database stuff
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = True # as instructed, to suppress warning
app.config['SQLALCHEMY_ECHO'] = (os.getenv("SQLALCHEMY_ECHO", False) == "True")
# app.config['SQLALCHEMY_ECHO'] = True
# test or production database
app.config["SQLALCHEMY_DATABASE_URI"] = DATABASE_URL
app.config["SQLALCHEMY_BINDS"] = {
"redshift_db": DATABASE_URL
}
# see https://stackoverflow.com/questions/43594310/redshift-sqlalchemy-long-query-hangs
app.config["SQLALCHEMY_ENGINE_OPTIONS"] = { "pool_pre_ping": True,
"pool_recycle": 300,
"connect_args": {
"keepalives": 1,
"keepalives_idle": 10,
"keepalives_interval": 2,
"keepalives_count": 5
}
}
# from http://stackoverflow.com/a/12417346/596939
# class NullPoolSQLAlchemy(SQLAlchemy):
# def apply_driver_hacks(self, app, info, options):
# options['poolclass'] = NullPool
# return super(NullPoolSQLAlchemy, self).apply_driver_hacks(app, info, options)
#
# db = NullPoolSQLAlchemy(app, session_options={"autoflush": False})
app.config["SQLALCHEMY_POOL_SIZE"] = 200
db = SQLAlchemy(app, session_options={"autoflush": False, "autocommit": False})
# do compression. has to be above flask debug toolbar so it can override this.
compress_json = os.getenv("COMPRESS_DEBUG", "True")=="True"
# set up Flask-DebugToolbar
if (os.getenv("FLASK_DEBUG", False) == "True"):
logger.info("Setting app.debug=True; Flask-DebugToolbar will display")
compress_json = False
app.debug = True
app.config['DEBUG'] = True
app.config["DEBUG_TB_INTERCEPT_REDIRECTS"] = False
app.config["SQLALCHEMY_RECORD_QUERIES"] = True
app.config["SECRET_KEY"] = os.getenv("SECRET_KEY")
toolbar = DebugToolbarExtension(app)
# gzip responses
Compress(app)
app.config["COMPRESS_DEBUG"] = compress_json
redshift_url = urllib.parse.urlparse(DATABASE_URL)
app.config['postgreSQL_pool'] = ThreadedConnectionPool(2, 200,
database=redshift_url.path[1:],
user=redshift_url.username,
password=redshift_url.password,
host=redshift_url.hostname,
port=redshift_url.port,
keepalives=1,
keepalives_idle=10,
keepalives_interval=2,
keepalives_count=5)
app.config['PROFILE_REQUESTS'] = (os.getenv("PROFILE_REQUESTS", False) == "True")
logger.info("Database URL host: {}".format(redshift_url.hostname))
@contextmanager
def get_db_connection():
try:
connection = app.config['postgreSQL_pool'].getconn()
connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
connection.autocommit=True
# connection.readonly = True
yield connection
# except Exception as e:
# print u"error in get_db_connection", e
# raise
finally:
app.config['postgreSQL_pool'].putconn(connection)
@contextmanager
def get_db_cursor(commit=False, use_realdictcursor=False, use_defaultcursor=False):
with get_db_connection() as connection:
if use_realdictcursor:
# takes more memory, so default is no
cursor = connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
elif use_defaultcursor:
cursor = connection.cursor()
else:
cursor = connection.cursor(cursor_factory=psycopg2.extras.DictCursor)
try:
yield cursor
if commit:
connection.commit()
except Exception as e:
print("Error: error in get_db_cursor: {} {}, rolling back".format(e, str(e)))
try:
connection.rollback()
except:
pass
finally:
cursor.close()
pass
s3_client = boto3.client("s3")
print("made s3_client")
use_groups_lookup = OrderedDict()
use_groups_lookup["oa_plus_social_networks"] = {"display": "OA", "free_instant": True}
# use_groups_lookup["social_networks"] = {"display": "ASNs", "free_instant": True}
use_groups_lookup["backfile"] = {"display": "Backfile", "free_instant": True}
use_groups_lookup["subscription"] = {"display": "Subscription", "free_instant": False}
use_groups_lookup["ill"] = {"display": "ILL", "free_instant": False}
use_groups_lookup["other_delayed"] = {"display": "Other", "free_instant": False}
use_groups_lookup["total"] = {"display": "*Total*", "free_instant": False}
use_groups = list(use_groups_lookup.keys())
use_groups_free_instant = [k for k, v in use_groups_lookup.items() if v["free_instant"]]
suny_consortium_package_ids = ["P2NFgz7B", "PN3juRC5", "2k4Qs74v", "uwdhDaJ2"]
app.my_memorycache_dict = {}
def build_cache_key(module_name, function_name, *args):
# just ignoring kwargs for now
hashable_args = args
# Generate unique cache key
key_raw = (module_name, function_name, hashable_args)
cache_key = json.dumps(key_raw)
return cache_key
def memorycache(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
cache_key = build_cache_key(func.__module__, func.__name__, *args)
# Return cached version if available
result = app.my_memorycache_dict.get(cache_key, None)
if result is not None:
# print "cache hit on", cache_key
# print("cache hit")
return result
# print("cache miss on", cache_key)
# print("cache miss")
# Generate output
# print("> calling {}.{} with {}".format(func.__module__, func.__name__, args))
result = func(*args)
# Cache output if allowed
if result is not None:
app.my_memorycache_dict[cache_key] = result
# reset_cache(func.__module__, func.__name__, *args)
return result
return wrapper
def reset_cache(module_name, function_name, *args):
print("args", args)
cache_key = build_cache_key(module_name, function_name, *args)
print("cache_key", cache_key)
if cache_key in app.my_memorycache_dict:
del app.my_memorycache_dict[cache_key]
delete_command = "delete from jump_cache_status where cache_call = %s"
insert_command = "insert into jump_cache_status (cache_call, updated) values (%s, sysdate)"
with get_db_cursor() as cursor:
cursor.execute(delete_command, (cache_key,))
cursor.execute(insert_command, (cache_key,))
cached_consortium_scenario_ids = ["tGUVWRiN", "scenario-QC2kbHfUhj9W", "EcUvEELe", "CBy9gUC3", "6it6ajJd", "GcAsm5CX", "aAFAuovt"]
@memorycache
def fetch_common_package_data():
try:
print("downloading common_package_data_for_all.json.gz")
s3_clientobj = s3_client.get_object(Bucket="unsub-cache", Key="common_package_data_for_all.json.gz")
with gzip.open(s3_clientobj["Body"], 'r') as f:
data_from_s3 = json.loads(f.read().decode('utf-8'))
return data_from_s3
except Exception as e:
print("no S3 data, so computing. Error message: ", e)
pass
from common_data import gather_common_data
return gather_common_data()
common_data_dict = None
def warm_common_data(lst):
lst.append(fetch_common_package_data())
# NOTE: we have to do this when app loads for now - return to this later
## if there's a different take on dealing with common data
# if os.getenv('PRELOAD_LARGE_TABLES', False) == 'True':
if not common_data_dict:
import threading
import time
an_lst = []
t = threading.Thread(target=warm_common_data, args=[an_lst])
t.daemon = True
t.start()
while t.is_alive():
time.sleep(0.1)
common_data_dict = an_lst[0]
print("warm_common_data done!")
else:
print("not warming common data cache")