From 28d909985c76d104084b85f4435a055f61b1f7a8 Mon Sep 17 00:00:00 2001 From: Peter Weber Date: Wed, 24 Aug 2022 21:18:55 +0200 Subject: [PATCH] various: better UTC times Co-Authored-by: Peter Weber --- rero_mef/agents/mef/api.py | 17 ++++++++ rero_mef/api_mef.py | 19 +-------- rero_mef/concepts/mef/api.py | 20 ++++++++- rero_mef/marctojson/do_gnd_agent.py | 8 +--- rero_mef/marctojson/do_idref_agent.py | 8 +--- rero_mef/utils.py | 58 ++++++++++++++------------- 6 files changed, 72 insertions(+), 58 deletions(-) diff --git a/rero_mef/agents/mef/api.py b/rero_mef/agents/mef/api.py index 02d92eb1..7aed7aba 100644 --- a/rero_mef/agents/mef/api.py +++ b/rero_mef/agents/mef/api.py @@ -17,6 +17,8 @@ """API for manipulating MEF records.""" +from datetime import datetime, timezone + import click from elasticsearch_dsl import Q from flask import current_app @@ -166,6 +168,21 @@ def create_or_update_mef_viaf_record(self, dbcommit=False, reindex=False, """ return self, Action.ERROR, None, False + @classmethod + def create_deleted(cls, record, dbcommit=False, reindex=False): + """Create a deleted record for an record. + + :param record: Record to create. + :param dbcommit: Commit changes to DB. + :param reindex: Reindex record. + :returns: Created record. + """ + data = { + record.name: {'$ref': build_ref_string(record.pid, record.name)}, + 'deleted': datetime.now(timezone.utc).isoformat() + } + return cls.create(data=data, dbcommit=dbcommit, reindex=reindex) + class AgentMefIndexer(ReroIndexer): """AgentMefIndexer.""" diff --git a/rero_mef/api_mef.py b/rero_mef/api_mef.py index 0c214c80..2e2a9211 100644 --- a/rero_mef/api_mef.py +++ b/rero_mef/api_mef.py @@ -17,10 +17,9 @@ """API for manipulating MEF records.""" -from datetime import datetime +from datetime import datetime, timezone import click -import pytz from elasticsearch_dsl import Q from flask import current_app from invenio_search import current_search @@ -212,24 +211,10 @@ def mark_as_deleted(self, dbcommit=False, reindex=False): :param reindex: Reindex record. :returns: Modified record. """ - self['deleted'] = pytz.utc.localize(datetime.now()).isoformat() + self['deleted'] = datetime.now(timezone.utc).isoformat() self.update(data=self, dbcommit=dbcommit, reindex=reindex) return self - @classmethod - def create_deleted(cls, record, dbcommit=False, reindex=False): - """Create a deleted record for an record. - - :param record: Record to create. - :param dbcommit: Commit changes to DB. - :param reindex: Reindex record. - :returns: Created record. - """ - data = {record.name: { - '$ref': build_ref_string(record.pid, record.name)}} - data['deleted'] = pytz.utc.localize(datetime.now()).isoformat() - return cls.create(data=data, dbcommit=dbcommit, reindex=reindex) - @classmethod def flush_indexes(cls): """Update indexes.""" diff --git a/rero_mef/concepts/mef/api.py b/rero_mef/concepts/mef/api.py index 557ddc96..4f2d0d3b 100644 --- a/rero_mef/concepts/mef/api.py +++ b/rero_mef/concepts/mef/api.py @@ -17,6 +17,8 @@ """API for manipulating MEF records.""" +from datetime import datetime, timezone + from flask import current_app from invenio_search import current_search from invenio_search.api import RecordsSearch @@ -95,8 +97,7 @@ def replace_refs(self): for agent in ['rero']: if agent in data and data[agent]: sources.append(agent) - metadata = data[agent].get('metadata') - if metadata: + if metadata := data[agent].get('metadata'): data[agent] = metadata data['sources'] = sources return data @@ -112,6 +113,21 @@ def create_or_update_mef_viaf_record(self, dbcommit=False, reindex=False, """ return self, Action.Error, None, False + @classmethod + def create_deleted(cls, record, dbcommit=False, reindex=False): + """Create a deleted record for an record. + + :param record: Record to create. + :param dbcommit: Commit changes to DB. + :param reindex: Reindex record. + :returns: Created record. + """ + data = { + record.name: {'$ref': build_ref_string(record.pid, record.name)}, + 'deleted': datetime.now(timezone.utc).isoformat() + } + return cls.create(data=data, dbcommit=dbcommit, reindex=reindex) + class ConceptMefIndexer(ReroIndexer): """MefIndexer.""" diff --git a/rero_mef/marctojson/do_gnd_agent.py b/rero_mef/marctojson/do_gnd_agent.py index 1cb5c227..7a051a23 100644 --- a/rero_mef/marctojson/do_gnd_agent.py +++ b/rero_mef/marctojson/do_gnd_agent.py @@ -19,9 +19,7 @@ # https://www.dnb.de/EN/Professionell/Metadatendienste/Datenbezug/GND_Aenderungsdienst/gndAenderungsdienst_node.html import re -from datetime import datetime - -import pytz +from datetime import datetime, timezone from rero_mef.marctojson.helper import COUNTRIES, COUNTRY_UNIMARC_MARC21, \ LANGUAGES, build_string_list_from_fields @@ -76,9 +74,7 @@ def trans_gnd_deleted(self): if self.logger and self.verbose: self.logger.info('Call Function', 'trans_gnd_deleted') if self.marc.leader[5] in ['c', 'd', 'x']: - self.json_dict['deleted'] = pytz.utc.localize( - datetime.now() - ).isoformat() + self.json_dict['deleted'] = datetime.now(timezone.utc).isoformat() def trans_gnd_relation_pid(self): """Transformation relation pids 682 $0. diff --git a/rero_mef/marctojson/do_idref_agent.py b/rero_mef/marctojson/do_idref_agent.py index 2a4d05c0..b96e52f2 100644 --- a/rero_mef/marctojson/do_idref_agent.py +++ b/rero_mef/marctojson/do_idref_agent.py @@ -17,9 +17,7 @@ """Marctojsons transformer for IDREF records.""" -from datetime import datetime - -import pytz +from datetime import datetime, timezone from rero_mef.marctojson.helper import COUNTRIES, COUNTRY_UNIMARC_MARC21, \ LANGUAGES, build_string_list_from_fields, remove_trailing_punctuation @@ -149,9 +147,7 @@ def trans_idref_deleted(self): if self.logger and self.verbose: self.logger.info('Call Function', 'trans_idref_deleted') if self.marc.leader[5] == 'd': - self.json_dict['deleted'] = pytz.utc.localize( - datetime.now() - ).isoformat() + self.json_dict['deleted'] = datetime.now(timezone.utc).isoformat() def trans_idref_relation_pid(self): """Transformation old pids 035 $a $9 = sudoc.""" diff --git a/rero_mef/utils.py b/rero_mef/utils.py index e1ff8cb2..d20adaec 100644 --- a/rero_mef/utils.py +++ b/rero_mef/utils.py @@ -23,14 +23,13 @@ # under the terms of the MIT License; see LICENSE file for more details. """Utilities.""" -import datetime import gc import hashlib import json import os import traceback from copy import deepcopy -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from functools import wraps from io import StringIO from json import JSONDecodeError, JSONDecoder, dumps @@ -61,6 +60,10 @@ from sickle.iterator import OAIItemIterator from sickle.oaiexceptions import NoRecordsMatch +# Hours can not be retrieved by get_info_by_oai_name +# TIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ' +TIME_FORMAT = '%Y-%m-%d' + def add_oai_source(name, baseurl, metadataprefix='marc21', setspecs='', comment='', update=False): @@ -120,7 +123,7 @@ def oai_set_last_run(name, date, verbose=False): oai_source = get_oaiharvest_object(name) lastrun_date = date if isinstance(date, str): - lastrun_date = parser.parse(date) + lastrun_date = parser.isoparse(date).astimezone(timezone.utc) oai_source.update_lastrun(lastrun_date) oai_source.save() db.session.commit() @@ -227,7 +230,7 @@ def oai_process_records_from_dates(name, sickle, oai_item_iterator, and dates_inital['from'] > dates_inital['until']: raise WrongDateCombination("'Until' date larger than 'from' date.") - last_run_date = datetime.now() + last_run_date = datetime.now(timezone.utc) # If we don't have specifications for set searches the setspecs will be # set to e list with None to go into the retrieval loop without @@ -245,23 +248,25 @@ def oai_process_records_from_dates(name, sickle, oai_item_iterator, } if access_token: params['accessToken'] = access_token - params.update(dates) + params |= dates if spec: params['set'] = spec - my_from_date = parser.parse(dates['from']) + my_from_date = parser.isoparse( + dates['from']).astimezone(timezone.utc) my_until_date = last_run_date if dates['until']: - my_until_date = parser.parse(dates['until']) + my_until_date = parser.isoparse( + dates['until']).astimezone(timezone.utc) while my_from_date <= my_until_date: until_date = my_from_date + timedelta(days=days_span) if until_date > my_until_date: until_date = my_until_date dates = { - 'from': my_from_date.strftime("%Y-%m-%d"), - 'until': until_date.strftime("%Y-%m-%d") + 'from': my_from_date.strftime(TIME_FORMAT), + 'until': until_date.strftime(TIME_FORMAT) } - params.update(dates) + params |= dates try: for record in request.ListRecords(**params): @@ -327,12 +332,12 @@ def oai_process_records_from_dates(name, sickle, oai_item_iterator, if debug: traceback.print_exc() count = -1 - my_from_date = my_from_date + timedelta(days=days_span + 1) if verbose: - from_date = my_from_date.strftime("%Y-%m-%d") + from_date = my_from_date.strftime(TIME_FORMAT) click.echo( f'OAI {name} {spec}: {from_date} .. +{days_span}' ) + my_from_date = my_from_date + timedelta(days=days_span + 1) if update_last_run: if verbose: @@ -371,7 +376,7 @@ def oai_save_records_from_dates(name, file_name, sickle, oai_item_iterator, and dates_inital['from'] > dates_inital['until']: raise WrongDateCombination("'Until' date larger than 'from' date.") - last_run_date = datetime.now() + last_run_date = datetime.now(timezone.utc) # If we don't have specifications for set searches the setspecs will be # set to e list with None to go into the retrieval loop without @@ -387,30 +392,31 @@ def oai_save_records_from_dates(name, file_name, sickle, oai_item_iterator, } if access_token: params['accessToken'] = access_token - params.update(dates) + params |= dates if spec: params['set'] = spec - my_from_date = parser.parse(dates['from']) + my_from_date = parser.parse(dates['from'], tzinfos=timezone.utc) my_until_date = last_run_date if dates['until']: - my_until_date = parser.parse(dates['until']) + my_until_date = parser.isoparse( + dates['until']).astimezone(timezone.utc) while my_from_date <= my_until_date: until_date = my_from_date + timedelta(days=days_span) if until_date > my_until_date: until_date = my_until_date dates = { - 'from': my_from_date.strftime("%Y-%m-%d"), - 'until': until_date.strftime("%Y-%m-%d") + 'from': my_from_date.strftime(TIME_FORMAT), + 'until': until_date.strftime(TIME_FORMAT) } - params.update(dates) + params |= dates try: for record in request.ListRecords(**params): count += 1 records = parse_xml_to_array(StringIO(record.raw)) if verbose: - from_date = my_from_date.strftime("%Y-%m-%d") + from_date = my_from_date.strftime(TIME_FORMAT) click.echo( f'OAI {name} spec({spec}): {from_date} ' f'count:{count:>10} = {id}' @@ -427,7 +433,7 @@ def oai_save_records_from_dates(name, file_name, sickle, oai_item_iterator, my_from_date = my_from_date + timedelta(days=days_span + 1) if verbose: - from_date = my_from_date.strftime("%Y-%m-%d") + from_date = my_from_date.strftime(TIME_FORMAT) click.echo( f'OAI {name} spec({spec}): ' f'{from_date} .. +{days_span}' @@ -695,7 +701,7 @@ def bulk_load_agent(agent, data, table, columns, bulk_count=0, verbose=False, buffer = StringIO() buffer_uuid = [] index = columns.index('id') if 'id' in columns else -1 - start_time = datetime.now() + start_time = datetime.now(timezone.utc) with open(data, 'r', encoding='utf-8', buffering=1) as input_file: for line in input_file: count += 1 @@ -706,7 +712,7 @@ def bulk_load_agent(agent, data, table, columns, bulk_count=0, verbose=False, buffer.flush() buffer.seek(0) if verbose: - end_time = datetime.now() + end_time = datetime.now(timezone.utc) diff_time = end_time - start_time start_time = end_time click.echo( @@ -730,7 +736,7 @@ def bulk_load_agent(agent, data, table, columns, bulk_count=0, verbose=False, buffer = StringIO() if verbose: - end_time = datetime.now() + end_time = datetime.now(timezone.utc) diff_time = end_time - start_time click.echo( f'{agent} copy from file: {count} {diff_time.seconds}s', @@ -1113,9 +1119,7 @@ def get_timestamp(name): :returns: time of time stamp """ time_stamps = current_cache.get('timestamps') - if not time_stamps: - return None - return time_stamps.get(name) + return time_stamps.get(name) if time_stamps else None def settimestamp(func):