From b75996df4908f5e2d19cc54168e930abc36859a7 Mon Sep 17 00:00:00 2001 From: Darrel Herbst Date: Mon, 18 Jan 2021 04:09:17 -0500 Subject: [PATCH] 2852/refactor/replace urllib with requests ol infobase (#4419) * Resolves lint warnings with reformatting, et al. * Ran isort --profile black on ol_infobase * Replace simplejson with json and urlopen with requests.get * use more descriptive name, and add named param for requests.get * Requests raise_for_status() needs to be called from a response Co-authored-by: Christian Clauss --- openlibrary/plugins/ol_infobase.py | 117 +++++++++++++++++++---------- 1 file changed, 79 insertions(+), 38 deletions(-) diff --git a/openlibrary/plugins/ol_infobase.py b/openlibrary/plugins/ol_infobase.py index 731c413e681..745cb4c462f 100644 --- a/openlibrary/plugins/ol_infobase.py +++ b/openlibrary/plugins/ol_infobase.py @@ -2,30 +2,36 @@ """Open Library plugin for infobase. """ from __future__ import print_function -import os + import datetime -import simplejson +import json import logging import logging.config +import os +import re import sys import traceback -import re import unicodedata +import requests import six -from six.moves import urllib import web -from infogami.infobase import config, common, server, cache, dbstore + +from infogami.infobase import cache, common, config, dbstore, server + +from ..utils.isbn import isbn_10_to_isbn_13, isbn_13_to_isbn_10, normalize_isbn # relative import from .openlibrary import schema -from ..utils.isbn import isbn_10_to_isbn_13, isbn_13_to_isbn_10, normalize_isbn logger = logging.getLogger("infobase.ol") + def init_plugin(): """Initialize infobase plugin.""" - from infogami.infobase import common, dbstore, server, logger as infobase_logger + from infogami.infobase import common, dbstore + from infogami.infobase import logger as infobase_logger + from infogami.infobase import server dbstore.default_schema = schema.get_schema() # Replace infobase Indexer with OL custom Indexer @@ -45,30 +51,31 @@ def init_plugin(): if ol: # install custom indexer - #XXX-Anand: this might create some trouble. Commenting out. + # XXX-Anand: this might create some trouble. Commenting out. # ol.store.indexer = Indexer() if config.get('http_listeners'): logger.info("setting up http listeners") ol.add_trigger(None, http_notify) - ## memcache invalidator is not required now. It was added for future use. - #_cache = config.get("cache", {}) - #if _cache.get("type") == "memcache": - # logger.info("setting up memcache invalidater") - # ol.add_trigger(None, MemcacheInvalidater()) + # # memcache invalidator is not required now. It was added for future use. + # _cache = config.get("cache", {}) + # if _cache.get("type") == "memcache": + # logger.info("setting up memcache invalidater") + # ol.add_trigger(None, MemcacheInvalidater()) # hook to add count functionality - server.app.add_mapping("/([^/]*)/count_editions_by_author", __name__ + ".count_editions_by_author") - server.app.add_mapping("/([^/]*)/count_editions_by_work", __name__ + ".count_editions_by_work") - server.app.add_mapping("/([^/]*)/count_edits_by_user", __name__ + ".count_edits_by_user") - server.app.add_mapping("/([^/]*)/most_recent", __name__ + ".most_recent") - server.app.add_mapping("/([^/]*)/clear_cache", __name__ + ".clear_cache") - server.app.add_mapping("/([^/]*)/stats/(\d\d\d\d-\d\d-\d\d)", __name__ + ".stats") - server.app.add_mapping("/([^/]*)/has_user", __name__ + ".has_user") - server.app.add_mapping("/([^/]*)/olid_to_key", __name__ + ".olid_to_key") - server.app.add_mapping("/_reload_config", __name__ + ".reload_config") - server.app.add_mapping("/_inspect", __name__ + "._inspect") + server.app.add_mapping(r"/([^/]*)/count_editions_by_author", __name__ + ".count_editions_by_author") + server.app.add_mapping(r"/([^/]*)/count_editions_by_work", __name__ + ".count_editions_by_work") + server.app.add_mapping(r"/([^/]*)/count_edits_by_user", __name__ + ".count_edits_by_user") + server.app.add_mapping(r"/([^/]*)/most_recent", __name__ + ".most_recent") + server.app.add_mapping(r"/([^/]*)/clear_cache", __name__ + ".clear_cache") + server.app.add_mapping(r"/([^/]*)/stats/(\d\d\d\d-\d\d-\d\d)", __name__ + ".stats") + server.app.add_mapping(r"/([^/]*)/has_user", __name__ + ".has_user") + server.app.add_mapping(r"/([^/]*)/olid_to_key", __name__ + ".olid_to_key") + server.app.add_mapping(r"/_reload_config", __name__ + ".reload_config") + server.app.add_mapping(r"/_inspect", __name__ + "._inspect") + def setup_logging(): try: @@ -81,6 +88,7 @@ def setup_logging(): print("Unable to set logging configuration:", str(e), file=sys.stderr) raise + class reload_config: @server.jsonify def POST(self): @@ -88,6 +96,7 @@ def POST(self): setup_logging() return {"ok": "true"} + class _inspect: """Backdoor to inspect the running process. @@ -98,13 +107,15 @@ def GET(self): try: import _inspect return _inspect.inspect() - except Exception as e: + except Exception: return traceback.format_exc() + def get_db(): site = server.get_site('openlibrary.org') return site.store.db + @web.memoize def get_property_id(type, name): db = get_db() @@ -114,12 +125,14 @@ def get_property_id(type, name): except IndexError: return None + def get_thing_id(key): try: return get_db().where('thing', key=key)[0].id except IndexError: return None + def count(table, type, key, value): pid = get_property_id(type, key) @@ -128,18 +141,21 @@ def count(table, type, key, value): return 0 return get_db().query("SELECT count(*) FROM " + table + " WHERE key_id=$pid AND value=$value_id", vars=locals())[0].count + class count_editions_by_author: @server.jsonify def GET(self, sitename): i = server.input('key') return count('edition_ref', '/type/edition', 'authors', i.key) + class count_editions_by_work: @server.jsonify def GET(self, sitename): i = server.input('key') return count('edition_ref', '/type/edition', 'works', i.key) + class count_edits_by_user: @server.jsonify def GET(self, sitename): @@ -147,6 +163,7 @@ def GET(self, sitename): author_id = get_thing_id(i.key) return get_db().query("SELECT count(*) as count FROM transaction WHERE author_id=$author_id", vars=locals())[0].count + class has_user: @server.jsonify def GET(self, sitename): @@ -161,6 +178,7 @@ def GET(self, sitename): d = get_db().query("SELECT * from thing WHERE lower(key) = $key AND type=$type_user", vars=locals()) return bool(d) + class stats: @server.jsonify def GET(self, sitename, today): @@ -203,12 +221,15 @@ def count(self, tables, where, vars): vars=vars )[0].value + most_recent_change = None + def invalidate_most_recent_change(event): global most_recent_change most_recent_change = None + class most_recent: @server.jsonify def GET(self, sitename): @@ -218,6 +239,7 @@ def GET(self, sitename): most_recent_change = site.versions({'limit': 1})[0] return most_recent_change + class clear_cache: @server.jsonify def POST(self, sitename): @@ -225,6 +247,7 @@ def POST(self, sitename): cache.global_cache.clear() return {'done': True} + class olid_to_key: @server.jsonify def GET(self, sitename): @@ -233,6 +256,7 @@ def GET(self, sitename): key = d and d[0].key or None return {'olid': i.olid, 'key': key} + def write(path, data): dir = os.path.dirname(path) if not os.path.exists(dir): @@ -241,19 +265,23 @@ def write(path, data): f.write(data) f.close() + def save_error(dir, prefix): try: logger.error("Error", exc_info=True) error = web.djangoerror() now = datetime.datetime.utcnow() - path = '%s/%04d-%02d-%02d/%s-%02d%02d%02d.%06d.html' % (dir, \ + path = '%s/%04d-%02d-%02d/%s-%02d%02d%02d.%06d.html' % ( + dir, now.year, now.month, now.day, prefix, - now.hour, now.minute, now.second, now.microsecond) + now.hour, now.minute, now.second, now.microsecond + ) logger.error('Error saved to %s', path) write(path, web.safestr(error)) - except: + except Exception: logger.error('Exception in saving the error', exc_info=True) + def get_object_data(site, thing): """Return expanded data of specified object.""" def expand(value): @@ -272,6 +300,7 @@ def expand(value): d[k] = expand(v) return d + def http_notify(site, old, new): """Notify listeners over http.""" if isinstance(new, dict): @@ -280,25 +309,28 @@ def http_notify(site, old, new): # new is a thing. call format_data to get the actual data. data = new.format_data() - json = simplejson.dumps(data) + json_data = json.dumps(data) key = data['key'] # optimize the most common case. # The following prefixes are never cached at the client. Avoid cache invalidation in that case. - not_cached = ['/b/', '/a/', '/books/', '/authors/', '/works/', '/subjects/', '/publishers/', '/user/', '/usergroup/', '/people/'] + not_cached = ['/b/', '/a/', '/books/', '/authors/', '/works/', '/subjects/', '/publishers/', '/user/', + '/usergroup/', '/people/'] for prefix in not_cached: if key.startswith(prefix): return for url in config.http_listeners: try: - response = urllib.request.urlopen(url, json).read() - print('http_notify', repr(url), repr(key), repr(response), file=web.debug) - except: + response = requests.get(url, params=json_data) + response.raise_for_status() + print('http_notify', repr(url), repr(key), repr(response.text), file=web.debug) + except Exception: print('failed to send http_notify', repr(url), repr(key), file=web.debug) import traceback traceback.print_exc() + class MemcacheInvalidater: def __init__(self): self.memcache = self.get_memcache_client() @@ -362,6 +394,7 @@ def invalidate_work(self, site, old): def invalidate_default(self, site, old): yield old.key + # openlibrary.utils can't be imported directly because # openlibrary.plugins.openlibrary masks openlibrary module olmemcache = __import__('openlibrary.utils.olmemcache', None, None, ['x']) @@ -373,8 +406,10 @@ def MemcachedDict(servers=None): client = olmemcache.Client(servers) return cache.MemcachedDict(memcache_client=client) + cache.register_cache('memcache', MemcachedDict) + def _process_key(key): mapping = ( '/l/', '/languages/', @@ -387,6 +422,7 @@ def _process_key(key): return new + key[len(old):] return key + def _process_data(data): if isinstance(data, list): return [_process_data(d) for d in data] @@ -397,6 +433,7 @@ def _process_data(data): else: return data + def safeint(value, default=0): """Convers the value to integer. Returns 0, if the conversion fails.""" try: @@ -404,6 +441,7 @@ def safeint(value, default=0): except Exception: return default + def fix_table_of_contents(table_of_contents): """Some books have bad table_of_contents. This function converts them in to correct format. """ @@ -431,19 +469,21 @@ def row(r): d = [row(r) for r in table_of_contents] return [row for row in d if any(row.values())] -def process_json(key, json): - if key is None or json is None: + +def process_json(key, json_str): + if key is None or json_str is None: return None base = key[1:].split('/')[0] if base in ['authors', 'books', 'works', 'languages', 'people', 'usergroup', 'permission']: - data = simplejson.loads(json) + data = json.loads(json_str) data = _process_data(data) if base == 'books' and 'table_of_contents' in data: data['table_of_contents'] = fix_table_of_contents(data['table_of_contents']) - json = simplejson.dumps(data) - return json + json_str = json.dumps(data) + return json_str + dbstore.process_json = process_json @@ -451,6 +491,7 @@ def process_json(key, json): re_normalize = re.compile('[^[:alphanum:] ]', re.U) + class OLIndexer(_Indexer): """OL custom indexer to index normalized_title etc. """ @@ -489,7 +530,7 @@ def normalize_edition_title(self, title): # http://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string def strip_accents(s): - return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')) + return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')) norm = strip_accents(title).lower() norm = norm.replace(' and ', ' ')