2852/refactor/replace urllib with requests ol infobase (#4419)

* Resolves lint warnings with reformatting, et al. * Ran isort --profile black on ol_infobase * Replace simplejson with json and urlopen with requests.get * use more descriptive name, and add named param for requests.get * Requests raise_for_status() needs to be called from a response Co-authored-by: Christian Clauss <[email protected]>
internetarchive · Jan 18, 2021 · b75996d · b75996d
1 parent 8203f8c
commit b75996d
Showing 1 changed file with 79 additions and 38 deletions.
diff --git a/openlibrary/plugins/ol_infobase.py b/openlibrary/plugins/ol_infobase.py
@@ -2,30 +2,36 @@
 """Open Library plugin for infobase.
 """
 from __future__ import print_function
-import os
+
 import datetime
-import simplejson
+import json
 import logging
 import logging.config
+import os
+import re
 import sys
 import traceback
-import re
 import unicodedata
 
+import requests
 import six
-from six.moves import urllib
 import web
-from infogami.infobase import config, common, server, cache, dbstore
+
+from infogami.infobase import cache, common, config, dbstore, server
+
+from ..utils.isbn import isbn_10_to_isbn_13, isbn_13_to_isbn_10, normalize_isbn
 
 # relative import
 from .openlibrary import schema
-from ..utils.isbn import isbn_10_to_isbn_13, isbn_13_to_isbn_10, normalize_isbn
 
 logger = logging.getLogger("infobase.ol")
 
+
 def init_plugin():
     """Initialize infobase plugin."""
-    from infogami.infobase import common, dbstore, server, logger as infobase_logger
+    from infogami.infobase import common, dbstore
+    from infogami.infobase import logger as infobase_logger
+    from infogami.infobase import server
     dbstore.default_schema = schema.get_schema()
 
     # Replace infobase Indexer with OL custom Indexer
@@ -45,30 +51,31 @@ def init_plugin():
 
     if ol:
         # install custom indexer
-        #XXX-Anand: this might create some trouble. Commenting out.
+        # XXX-Anand: this might create some trouble. Commenting out.
         # ol.store.indexer = Indexer()
 
         if config.get('http_listeners'):
             logger.info("setting up http listeners")
             ol.add_trigger(None, http_notify)
 
-        ## memcache invalidator is not required now. It was added for future use.
-        #_cache = config.get("cache", {})
-        #if _cache.get("type") == "memcache":
-        #    logger.info("setting up memcache invalidater")
-        #    ol.add_trigger(None, MemcacheInvalidater())
+        # # memcache invalidator is not required now. It was added for future use.
+        # _cache = config.get("cache", {})
+        # if _cache.get("type") == "memcache":
+        #     logger.info("setting up memcache invalidater")
+        #     ol.add_trigger(None, MemcacheInvalidater())
 
     # hook to add count functionality
-    server.app.add_mapping("/([^/]*)/count_editions_by_author", __name__ + ".count_editions_by_author")
-    server.app.add_mapping("/([^/]*)/count_editions_by_work", __name__ + ".count_editions_by_work")
-    server.app.add_mapping("/([^/]*)/count_edits_by_user", __name__ + ".count_edits_by_user")
-    server.app.add_mapping("/([^/]*)/most_recent", __name__ + ".most_recent")
-    server.app.add_mapping("/([^/]*)/clear_cache", __name__ + ".clear_cache")
-    server.app.add_mapping("/([^/]*)/stats/(\d\d\d\d-\d\d-\d\d)", __name__ + ".stats")
-    server.app.add_mapping("/([^/]*)/has_user", __name__ + ".has_user")
-    server.app.add_mapping("/([^/]*)/olid_to_key", __name__ + ".olid_to_key")
-    server.app.add_mapping("/_reload_config", __name__ + ".reload_config")
-    server.app.add_mapping("/_inspect", __name__ + "._inspect")
+    server.app.add_mapping(r"/([^/]*)/count_editions_by_author", __name__ + ".count_editions_by_author")
+    server.app.add_mapping(r"/([^/]*)/count_editions_by_work", __name__ + ".count_editions_by_work")
+    server.app.add_mapping(r"/([^/]*)/count_edits_by_user", __name__ + ".count_edits_by_user")
+    server.app.add_mapping(r"/([^/]*)/most_recent", __name__ + ".most_recent")
+    server.app.add_mapping(r"/([^/]*)/clear_cache", __name__ + ".clear_cache")
+    server.app.add_mapping(r"/([^/]*)/stats/(\d\d\d\d-\d\d-\d\d)", __name__ + ".stats")
+    server.app.add_mapping(r"/([^/]*)/has_user", __name__ + ".has_user")
+    server.app.add_mapping(r"/([^/]*)/olid_to_key", __name__ + ".olid_to_key")
+    server.app.add_mapping(r"/_reload_config", __name__ + ".reload_config")
+    server.app.add_mapping(r"/_inspect", __name__ + "._inspect")
+
 
 def setup_logging():
     try:
@@ -81,13 +88,15 @@ def setup_logging():
         print("Unable to set logging configuration:", str(e), file=sys.stderr)
         raise
 
+
 class reload_config:
     @server.jsonify
     def POST(self):
         logging.info("reloading logging config")
         setup_logging()
         return {"ok": "true"}
 
+
 class _inspect:
     """Backdoor to inspect the running process.
 
@@ -98,13 +107,15 @@ def GET(self):
         try:
             import _inspect
             return _inspect.inspect()
-        except Exception as e:
+        except Exception:
             return traceback.format_exc()
 
+
 def get_db():
     site = server.get_site('openlibrary.org')
     return site.store.db
 
+
 @web.memoize
 def get_property_id(type, name):
     db = get_db()
@@ -114,12 +125,14 @@ def get_property_id(type, name):
     except IndexError:
         return None
 
+
 def get_thing_id(key):
     try:
         return get_db().where('thing', key=key)[0].id
     except IndexError:
         return None
 
+
 def count(table, type, key, value):
     pid = get_property_id(type, key)
 
@@ -128,25 +141,29 @@ def count(table, type, key, value):
         return 0
     return get_db().query("SELECT count(*) FROM " + table + " WHERE key_id=$pid AND value=$value_id", vars=locals())[0].count
 
+
 class count_editions_by_author:
     @server.jsonify
     def GET(self, sitename):
         i = server.input('key')
         return count('edition_ref', '/type/edition', 'authors', i.key)
 
+
 class count_editions_by_work:
     @server.jsonify
     def GET(self, sitename):
         i = server.input('key')
         return count('edition_ref', '/type/edition', 'works', i.key)
 
+
 class count_edits_by_user:
     @server.jsonify
     def GET(self, sitename):
         i = server.input('key')
         author_id = get_thing_id(i.key)
         return get_db().query("SELECT count(*) as count FROM transaction WHERE author_id=$author_id", vars=locals())[0].count
 
+
 class has_user:
     @server.jsonify
     def GET(self, sitename):
@@ -161,6 +178,7 @@ def GET(self, sitename):
         d = get_db().query("SELECT * from thing WHERE lower(key) = $key AND type=$type_user", vars=locals())
         return bool(d)
 
+
 class stats:
     @server.jsonify
     def GET(self, sitename, today):
@@ -203,12 +221,15 @@ def count(self, tables, where, vars):
             vars=vars
         )[0].value
 
+
 most_recent_change = None
 
+
 def invalidate_most_recent_change(event):
     global most_recent_change
     most_recent_change = None
 
+
 class most_recent:
     @server.jsonify
     def GET(self, sitename):
@@ -218,13 +239,15 @@ def GET(self, sitename):
             most_recent_change = site.versions({'limit': 1})[0]
         return most_recent_change
 
+
 class clear_cache:
     @server.jsonify
     def POST(self, sitename):
         from infogami.infobase import cache
         cache.global_cache.clear()
         return {'done': True}
 
+
 class olid_to_key:
     @server.jsonify
     def GET(self, sitename):
@@ -233,6 +256,7 @@ def GET(self, sitename):
         key = d and d[0].key or None
         return {'olid': i.olid, 'key': key}
 
+
 def write(path, data):
     dir = os.path.dirname(path)
     if not os.path.exists(dir):
@@ -241,19 +265,23 @@ def write(path, data):
     f.write(data)
     f.close()
 
+
 def save_error(dir, prefix):
     try:
         logger.error("Error", exc_info=True)
         error = web.djangoerror()
         now = datetime.datetime.utcnow()
-        path = '%s/%04d-%02d-%02d/%s-%02d%02d%02d.%06d.html' % (dir, \
+        path = '%s/%04d-%02d-%02d/%s-%02d%02d%02d.%06d.html' % (
+            dir,
             now.year, now.month, now.day, prefix,
-            now.hour, now.minute, now.second, now.microsecond)
+            now.hour, now.minute, now.second, now.microsecond
+        )
         logger.error('Error saved to %s', path)
         write(path, web.safestr(error))
-    except:
+    except Exception:
         logger.error('Exception in saving the error', exc_info=True)
 
+
 def get_object_data(site, thing):
     """Return expanded data of specified object."""
     def expand(value):
@@ -272,6 +300,7 @@ def expand(value):
             d[k] = expand(v)
     return d
 
+
 def http_notify(site, old, new):
     """Notify listeners over http."""
     if isinstance(new, dict):
@@ -280,25 +309,28 @@ def http_notify(site, old, new):
         # new is a thing. call format_data to get the actual data.
         data = new.format_data()
 
-    json = simplejson.dumps(data)
+    json_data = json.dumps(data)
     key = data['key']
 
     # optimize the most common case.
     # The following prefixes are never cached at the client. Avoid cache invalidation in that case.
-    not_cached = ['/b/', '/a/', '/books/', '/authors/', '/works/', '/subjects/', '/publishers/', '/user/', '/usergroup/', '/people/']
+    not_cached = ['/b/', '/a/', '/books/', '/authors/', '/works/', '/subjects/', '/publishers/', '/user/',
+                  '/usergroup/', '/people/']
     for prefix in not_cached:
         if key.startswith(prefix):
             return
 
     for url in config.http_listeners:
         try:
-            response = urllib.request.urlopen(url, json).read()
-            print('http_notify', repr(url), repr(key), repr(response), file=web.debug)
-        except:
+            response = requests.get(url, params=json_data)
+            response.raise_for_status()
+            print('http_notify', repr(url), repr(key), repr(response.text), file=web.debug)
+        except Exception:
             print('failed to send http_notify', repr(url), repr(key), file=web.debug)
             import traceback
             traceback.print_exc()
 
+
 class MemcacheInvalidater:
     def __init__(self):
         self.memcache = self.get_memcache_client()
@@ -362,6 +394,7 @@ def invalidate_work(self, site, old):
     def invalidate_default(self, site, old):
         yield old.key
 
+
 # openlibrary.utils can't be imported directly because
 # openlibrary.plugins.openlibrary masks openlibrary module
 olmemcache = __import__('openlibrary.utils.olmemcache', None, None, ['x'])
@@ -373,8 +406,10 @@ def MemcachedDict(servers=None):
     client = olmemcache.Client(servers)
     return cache.MemcachedDict(memcache_client=client)
 
+
 cache.register_cache('memcache', MemcachedDict)
 
+
 def _process_key(key):
     mapping = (
         '/l/', '/languages/',
@@ -387,6 +422,7 @@ def _process_key(key):
             return new + key[len(old):]
     return key
 
+
 def _process_data(data):
     if isinstance(data, list):
         return [_process_data(d) for d in data]
@@ -397,13 +433,15 @@ def _process_data(data):
     else:
         return data
 
+
 def safeint(value, default=0):
     """Convers the value to integer. Returns 0, if the conversion fails."""
     try:
         return int(value)
     except Exception:
         return default
 
+
 def fix_table_of_contents(table_of_contents):
     """Some books have bad table_of_contents. This function converts them in to correct format.
     """
@@ -431,26 +469,29 @@ def row(r):
     d = [row(r) for r in table_of_contents]
     return [row for row in d if any(row.values())]
 
-def process_json(key, json):
-    if key is None or json is None:
+
+def process_json(key, json_str):
+    if key is None or json_str is None:
         return None
     base = key[1:].split('/')[0]
     if base in ['authors', 'books', 'works', 'languages', 'people', 'usergroup', 'permission']:
-        data = simplejson.loads(json)
+        data = json.loads(json_str)
         data = _process_data(data)
 
         if base == 'books' and 'table_of_contents' in data:
             data['table_of_contents'] = fix_table_of_contents(data['table_of_contents'])
 
-        json = simplejson.dumps(data)
-    return json
+        json_str = json.dumps(data)
+    return json_str
+
 
 dbstore.process_json = process_json
 
 _Indexer = dbstore.Indexer
 
 re_normalize = re.compile('[^[:alphanum:] ]', re.U)
 
+
 class OLIndexer(_Indexer):
     """OL custom indexer to index normalized_title etc.
     """
@@ -489,7 +530,7 @@ def normalize_edition_title(self, title):
 
         # http://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string
         def strip_accents(s):
-           return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
+            return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
 
         norm = strip_accents(title).lower()
         norm = norm.replace(' and ', ' ')