Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2852/refactor/replace urllib with requests ol infobase #4419

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 79 additions & 38 deletions openlibrary/plugins/ol_infobase.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,36 @@
"""Open Library plugin for infobase.
"""
from __future__ import print_function
import os

import datetime
import simplejson
import json
import logging
import logging.config
import os
import re
import sys
import traceback
import re
import unicodedata

import requests
import six
from six.moves import urllib
import web
from infogami.infobase import config, common, server, cache, dbstore

from infogami.infobase import cache, common, config, dbstore, server

from ..utils.isbn import isbn_10_to_isbn_13, isbn_13_to_isbn_10, normalize_isbn

# relative import
from .openlibrary import schema
from ..utils.isbn import isbn_10_to_isbn_13, isbn_13_to_isbn_10, normalize_isbn

logger = logging.getLogger("infobase.ol")


def init_plugin():
"""Initialize infobase plugin."""
from infogami.infobase import common, dbstore, server, logger as infobase_logger
from infogami.infobase import common, dbstore
from infogami.infobase import logger as infobase_logger
from infogami.infobase import server
dbstore.default_schema = schema.get_schema()

# Replace infobase Indexer with OL custom Indexer
Expand All @@ -45,30 +51,31 @@ def init_plugin():

if ol:
# install custom indexer
#XXX-Anand: this might create some trouble. Commenting out.
# XXX-Anand: this might create some trouble. Commenting out.
# ol.store.indexer = Indexer()

if config.get('http_listeners'):
logger.info("setting up http listeners")
ol.add_trigger(None, http_notify)

## memcache invalidator is not required now. It was added for future use.
#_cache = config.get("cache", {})
#if _cache.get("type") == "memcache":
# logger.info("setting up memcache invalidater")
# ol.add_trigger(None, MemcacheInvalidater())
# # memcache invalidator is not required now. It was added for future use.
# _cache = config.get("cache", {})
# if _cache.get("type") == "memcache":
# logger.info("setting up memcache invalidater")
# ol.add_trigger(None, MemcacheInvalidater())

# hook to add count functionality
server.app.add_mapping("/([^/]*)/count_editions_by_author", __name__ + ".count_editions_by_author")
server.app.add_mapping("/([^/]*)/count_editions_by_work", __name__ + ".count_editions_by_work")
server.app.add_mapping("/([^/]*)/count_edits_by_user", __name__ + ".count_edits_by_user")
server.app.add_mapping("/([^/]*)/most_recent", __name__ + ".most_recent")
server.app.add_mapping("/([^/]*)/clear_cache", __name__ + ".clear_cache")
server.app.add_mapping("/([^/]*)/stats/(\d\d\d\d-\d\d-\d\d)", __name__ + ".stats")
server.app.add_mapping("/([^/]*)/has_user", __name__ + ".has_user")
server.app.add_mapping("/([^/]*)/olid_to_key", __name__ + ".olid_to_key")
server.app.add_mapping("/_reload_config", __name__ + ".reload_config")
server.app.add_mapping("/_inspect", __name__ + "._inspect")
server.app.add_mapping(r"/([^/]*)/count_editions_by_author", __name__ + ".count_editions_by_author")
server.app.add_mapping(r"/([^/]*)/count_editions_by_work", __name__ + ".count_editions_by_work")
server.app.add_mapping(r"/([^/]*)/count_edits_by_user", __name__ + ".count_edits_by_user")
server.app.add_mapping(r"/([^/]*)/most_recent", __name__ + ".most_recent")
server.app.add_mapping(r"/([^/]*)/clear_cache", __name__ + ".clear_cache")
server.app.add_mapping(r"/([^/]*)/stats/(\d\d\d\d-\d\d-\d\d)", __name__ + ".stats")
server.app.add_mapping(r"/([^/]*)/has_user", __name__ + ".has_user")
server.app.add_mapping(r"/([^/]*)/olid_to_key", __name__ + ".olid_to_key")
server.app.add_mapping(r"/_reload_config", __name__ + ".reload_config")
server.app.add_mapping(r"/_inspect", __name__ + "._inspect")


def setup_logging():
try:
Expand All @@ -81,13 +88,15 @@ def setup_logging():
print("Unable to set logging configuration:", str(e), file=sys.stderr)
raise


class reload_config:
@server.jsonify
def POST(self):
logging.info("reloading logging config")
setup_logging()
return {"ok": "true"}


class _inspect:
"""Backdoor to inspect the running process.

Expand All @@ -98,13 +107,15 @@ def GET(self):
try:
import _inspect
return _inspect.inspect()
except Exception as e:
except Exception:
return traceback.format_exc()


def get_db():
site = server.get_site('openlibrary.org')
return site.store.db


@web.memoize
def get_property_id(type, name):
db = get_db()
Expand All @@ -114,12 +125,14 @@ def get_property_id(type, name):
except IndexError:
return None


def get_thing_id(key):
try:
return get_db().where('thing', key=key)[0].id
except IndexError:
return None


def count(table, type, key, value):
pid = get_property_id(type, key)

Expand All @@ -128,25 +141,29 @@ def count(table, type, key, value):
return 0
return get_db().query("SELECT count(*) FROM " + table + " WHERE key_id=$pid AND value=$value_id", vars=locals())[0].count


class count_editions_by_author:
@server.jsonify
def GET(self, sitename):
i = server.input('key')
return count('edition_ref', '/type/edition', 'authors', i.key)


class count_editions_by_work:
@server.jsonify
def GET(self, sitename):
i = server.input('key')
return count('edition_ref', '/type/edition', 'works', i.key)


class count_edits_by_user:
@server.jsonify
def GET(self, sitename):
i = server.input('key')
author_id = get_thing_id(i.key)
return get_db().query("SELECT count(*) as count FROM transaction WHERE author_id=$author_id", vars=locals())[0].count


class has_user:
@server.jsonify
def GET(self, sitename):
Expand All @@ -161,6 +178,7 @@ def GET(self, sitename):
d = get_db().query("SELECT * from thing WHERE lower(key) = $key AND type=$type_user", vars=locals())
return bool(d)


class stats:
@server.jsonify
def GET(self, sitename, today):
Expand Down Expand Up @@ -203,12 +221,15 @@ def count(self, tables, where, vars):
vars=vars
)[0].value


most_recent_change = None


def invalidate_most_recent_change(event):
global most_recent_change
most_recent_change = None


class most_recent:
@server.jsonify
def GET(self, sitename):
Expand All @@ -218,13 +239,15 @@ def GET(self, sitename):
most_recent_change = site.versions({'limit': 1})[0]
return most_recent_change


class clear_cache:
@server.jsonify
def POST(self, sitename):
from infogami.infobase import cache
cache.global_cache.clear()
return {'done': True}


class olid_to_key:
@server.jsonify
def GET(self, sitename):
Expand All @@ -233,6 +256,7 @@ def GET(self, sitename):
key = d and d[0].key or None
return {'olid': i.olid, 'key': key}


def write(path, data):
dir = os.path.dirname(path)
if not os.path.exists(dir):
Expand All @@ -241,19 +265,23 @@ def write(path, data):
f.write(data)
f.close()


def save_error(dir, prefix):
try:
logger.error("Error", exc_info=True)
error = web.djangoerror()
now = datetime.datetime.utcnow()
path = '%s/%04d-%02d-%02d/%s-%02d%02d%02d.%06d.html' % (dir, \
path = '%s/%04d-%02d-%02d/%s-%02d%02d%02d.%06d.html' % (
dir,
now.year, now.month, now.day, prefix,
now.hour, now.minute, now.second, now.microsecond)
now.hour, now.minute, now.second, now.microsecond
)
logger.error('Error saved to %s', path)
write(path, web.safestr(error))
except:
except Exception:
logger.error('Exception in saving the error', exc_info=True)


def get_object_data(site, thing):
"""Return expanded data of specified object."""
def expand(value):
Expand All @@ -272,6 +300,7 @@ def expand(value):
d[k] = expand(v)
return d


def http_notify(site, old, new):
"""Notify listeners over http."""
if isinstance(new, dict):
Expand All @@ -280,25 +309,28 @@ def http_notify(site, old, new):
# new is a thing. call format_data to get the actual data.
data = new.format_data()

json = simplejson.dumps(data)
json_data = json.dumps(data)
key = data['key']

# optimize the most common case.
# The following prefixes are never cached at the client. Avoid cache invalidation in that case.
not_cached = ['/b/', '/a/', '/books/', '/authors/', '/works/', '/subjects/', '/publishers/', '/user/', '/usergroup/', '/people/']
not_cached = ['/b/', '/a/', '/books/', '/authors/', '/works/', '/subjects/', '/publishers/', '/user/',
'/usergroup/', '/people/']
for prefix in not_cached:
if key.startswith(prefix):
return

for url in config.http_listeners:
try:
response = urllib.request.urlopen(url, json).read()
print('http_notify', repr(url), repr(key), repr(response), file=web.debug)
except:
response = requests.get(url, params=json_data)
response.raise_for_status()
print('http_notify', repr(url), repr(key), repr(response.text), file=web.debug)
except Exception:
print('failed to send http_notify', repr(url), repr(key), file=web.debug)
import traceback
traceback.print_exc()


class MemcacheInvalidater:
def __init__(self):
self.memcache = self.get_memcache_client()
Expand Down Expand Up @@ -362,6 +394,7 @@ def invalidate_work(self, site, old):
def invalidate_default(self, site, old):
yield old.key


# openlibrary.utils can't be imported directly because
# openlibrary.plugins.openlibrary masks openlibrary module
olmemcache = __import__('openlibrary.utils.olmemcache', None, None, ['x'])
Expand All @@ -373,8 +406,10 @@ def MemcachedDict(servers=None):
client = olmemcache.Client(servers)
return cache.MemcachedDict(memcache_client=client)


cache.register_cache('memcache', MemcachedDict)


def _process_key(key):
mapping = (
'/l/', '/languages/',
Expand All @@ -387,6 +422,7 @@ def _process_key(key):
return new + key[len(old):]
return key


def _process_data(data):
if isinstance(data, list):
return [_process_data(d) for d in data]
Expand All @@ -397,13 +433,15 @@ def _process_data(data):
else:
return data


def safeint(value, default=0):
"""Convers the value to integer. Returns 0, if the conversion fails."""
try:
return int(value)
except Exception:
return default


def fix_table_of_contents(table_of_contents):
"""Some books have bad table_of_contents. This function converts them in to correct format.
"""
Expand Down Expand Up @@ -431,26 +469,29 @@ def row(r):
d = [row(r) for r in table_of_contents]
return [row for row in d if any(row.values())]

def process_json(key, json):
if key is None or json is None:

def process_json(key, json_str):
if key is None or json_str is None:
return None
base = key[1:].split('/')[0]
if base in ['authors', 'books', 'works', 'languages', 'people', 'usergroup', 'permission']:
data = simplejson.loads(json)
data = json.loads(json_str)
data = _process_data(data)

if base == 'books' and 'table_of_contents' in data:
data['table_of_contents'] = fix_table_of_contents(data['table_of_contents'])

json = simplejson.dumps(data)
return json
json_str = json.dumps(data)
return json_str


dbstore.process_json = process_json

_Indexer = dbstore.Indexer

re_normalize = re.compile('[^[:alphanum:] ]', re.U)


class OLIndexer(_Indexer):
"""OL custom indexer to index normalized_title etc.
"""
Expand Down Expand Up @@ -489,7 +530,7 @@ def normalize_edition_title(self, title):

# http://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string
def strip_accents(s):
return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))

norm = strip_accents(title).lower()
norm = norm.replace(' and ', ' ')
Expand Down