Skip to content

Commit

Permalink
Use hash values to compare versions for ibabs. See #452
Browse files Browse the repository at this point in the history
  • Loading branch information
breyten committed Jan 17, 2024
1 parent c8a0a2c commit e3a588d
Showing 1 changed file with 30 additions and 6 deletions.
36 changes: 30 additions & 6 deletions ocd_backend/extractors/ibabs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from collections import OrderedDict
from hashlib import sha1

import redis
from zeep.client import Client, Settings
from zeep.exceptions import Error
from zeep.helpers import serialize_object
Expand All @@ -16,6 +17,8 @@
from ocd_backend.utils.ibabs import (
meeting_to_dict, list_entry_response_to_dict, votes_to_dict)
from ocd_backend.utils.misc import json_encoder
from ocd_backend.settings import SOURCES_CONFIG_FILE, \
DEFAULT_INDEX_PREFIX, DUMPS_DIR, REDIS_HOST, REDIS_PORT

log = get_source_logger('extractor')

Expand Down Expand Up @@ -51,6 +54,12 @@ def __init__(self, *args, **kwargs):
except Error as e:
log.error(f'Unable to instantiate iBabs client: {str(e)}')

try:
self.redis_client = redis.StrictRedis(
host=REDIS_HOST, port=REDIS_PORT, db=1, decode_responses=True)
except Exception as e:
log.error(f'Unable to instantiate Redis client: {str(e)}')

def _make_hash(self, report_dict):
"""
Make a hash value for a dict. This can be usedc to compare dicts to an
Expand All @@ -61,6 +70,14 @@ def _make_hash(self, report_dict):
h.update(json_encoder.encode(ordered_report_dict).encode('ascii'))
return h.hexdigest()

def check_if_most_recent(self, site_name, id, report_dict):
redis_key = "%s|%s" % (site_name, id,)
old_hash = self.redis_client.get(redis_key)
new_hash = self._make_hash(report_dict)
if old_hash != new_hash:
self.redis_client.set(redis_key, new_hash)
return (old_hash != new_hash)


class IBabsCommitteesExtractor(IBabsBaseExtractor):
"""
Expand Down Expand Up @@ -193,11 +210,13 @@ def run(self):
continue

meeting_dict['Meetingtype'] = meeting_types[meeting_dict['MeetingtypeId']]
yield 'application/json', \
json.dumps(meeting_dict), \
None, \
'ibabs/' + cached_path,

if self.check_if_most_recent(self.source_definition['ibabs_sitename'], meeting['Id'], meeting_dict):
yield 'application/json', \
json.dumps(meeting_dict), \
None, \
'ibabs/' + cached_path,
else:
log.info('Skipped meeting %s because we have it already' % (meeting['Id'],))
meeting_count += 1

log.info(f'[{self.source_definition["key"]}] Extracted total of {meeting_count} ibabs meetings. Also skipped {meetings_skipped} meetings.')
Expand Down Expand Up @@ -311,8 +330,13 @@ def run(self):
continue

log.info(self._make_hash(report_dict))
is_newer = self.check_if_most_recent(
self.source_definition['ibabs_sitename'], item['id'], report_dict)
# identifier = item['id'][0]
yield 'application/json', json_encoder.encode(report_dict), None, 'ibabs/' + cached_path
if is_newer:
yield 'application/json', json_encoder.encode(report_dict), None, 'ibabs/' + cached_path
else:
log.info('Skipped %s because we already have that one in this version' % (item['id'],))
yield_count += 1
total_yield_count += 1
result_count += 1
Expand Down

0 comments on commit e3a588d

Please sign in to comment.