Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

various: better UTC times #102

Merged
merged 1 commit into from
Aug 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions rero_mef/agents/mef/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

"""API for manipulating MEF records."""

from datetime import datetime, timezone

import click
from elasticsearch_dsl import Q
from flask import current_app
Expand Down Expand Up @@ -166,6 +168,21 @@ def create_or_update_mef_viaf_record(self, dbcommit=False, reindex=False,
"""
return self, Action.ERROR, None, False

@classmethod
def create_deleted(cls, record, dbcommit=False, reindex=False):
"""Create a deleted record for an record.

:param record: Record to create.
:param dbcommit: Commit changes to DB.
:param reindex: Reindex record.
:returns: Created record.
"""
data = {
record.name: {'$ref': build_ref_string(record.pid, record.name)},
'deleted': datetime.now(timezone.utc).isoformat()
}
return cls.create(data=data, dbcommit=dbcommit, reindex=reindex)


class AgentMefIndexer(ReroIndexer):
"""AgentMefIndexer."""
Expand Down
19 changes: 2 additions & 17 deletions rero_mef/api_mef.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@

"""API for manipulating MEF records."""

from datetime import datetime
from datetime import datetime, timezone

import click
import pytz
from elasticsearch_dsl import Q
from flask import current_app
from invenio_search import current_search
Expand Down Expand Up @@ -212,24 +211,10 @@ def mark_as_deleted(self, dbcommit=False, reindex=False):
:param reindex: Reindex record.
:returns: Modified record.
"""
self['deleted'] = pytz.utc.localize(datetime.now()).isoformat()
self['deleted'] = datetime.now(timezone.utc).isoformat()
self.update(data=self, dbcommit=dbcommit, reindex=reindex)
return self

@classmethod
def create_deleted(cls, record, dbcommit=False, reindex=False):
"""Create a deleted record for an record.

:param record: Record to create.
:param dbcommit: Commit changes to DB.
:param reindex: Reindex record.
:returns: Created record.
"""
data = {record.name: {
'$ref': build_ref_string(record.pid, record.name)}}
data['deleted'] = pytz.utc.localize(datetime.now()).isoformat()
return cls.create(data=data, dbcommit=dbcommit, reindex=reindex)

@classmethod
def flush_indexes(cls):
"""Update indexes."""
Expand Down
20 changes: 18 additions & 2 deletions rero_mef/concepts/mef/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

"""API for manipulating MEF records."""

from datetime import datetime, timezone

from flask import current_app
from invenio_search import current_search
from invenio_search.api import RecordsSearch
Expand Down Expand Up @@ -95,8 +97,7 @@ def replace_refs(self):
for agent in ['rero']:
if agent in data and data[agent]:
sources.append(agent)
metadata = data[agent].get('metadata')
if metadata:
if metadata := data[agent].get('metadata'):
data[agent] = metadata
data['sources'] = sources
return data
Expand All @@ -112,6 +113,21 @@ def create_or_update_mef_viaf_record(self, dbcommit=False, reindex=False,
"""
return self, Action.Error, None, False

@classmethod
def create_deleted(cls, record, dbcommit=False, reindex=False):
"""Create a deleted record for an record.

:param record: Record to create.
:param dbcommit: Commit changes to DB.
:param reindex: Reindex record.
:returns: Created record.
"""
data = {
record.name: {'$ref': build_ref_string(record.pid, record.name)},
'deleted': datetime.now(timezone.utc).isoformat()
}
return cls.create(data=data, dbcommit=dbcommit, reindex=reindex)


class ConceptMefIndexer(ReroIndexer):
"""MefIndexer."""
Expand Down
8 changes: 2 additions & 6 deletions rero_mef/marctojson/do_gnd_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@
# https://www.dnb.de/EN/Professionell/Metadatendienste/Datenbezug/GND_Aenderungsdienst/gndAenderungsdienst_node.html

import re
from datetime import datetime

import pytz
from datetime import datetime, timezone

from rero_mef.marctojson.helper import COUNTRIES, COUNTRY_UNIMARC_MARC21, \
LANGUAGES, build_string_list_from_fields
Expand Down Expand Up @@ -76,9 +74,7 @@ def trans_gnd_deleted(self):
if self.logger and self.verbose:
self.logger.info('Call Function', 'trans_gnd_deleted')
if self.marc.leader[5] in ['c', 'd', 'x']:
self.json_dict['deleted'] = pytz.utc.localize(
datetime.now()
).isoformat()
self.json_dict['deleted'] = datetime.now(timezone.utc).isoformat()

def trans_gnd_relation_pid(self):
"""Transformation relation pids 682 $0.
Expand Down
8 changes: 2 additions & 6 deletions rero_mef/marctojson/do_idref_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@

"""Marctojsons transformer for IDREF records."""

from datetime import datetime

import pytz
from datetime import datetime, timezone

from rero_mef.marctojson.helper import COUNTRIES, COUNTRY_UNIMARC_MARC21, \
LANGUAGES, build_string_list_from_fields, remove_trailing_punctuation
Expand Down Expand Up @@ -149,9 +147,7 @@ def trans_idref_deleted(self):
if self.logger and self.verbose:
self.logger.info('Call Function', 'trans_idref_deleted')
if self.marc.leader[5] == 'd':
self.json_dict['deleted'] = pytz.utc.localize(
datetime.now()
).isoformat()
self.json_dict['deleted'] = datetime.now(timezone.utc).isoformat()

def trans_idref_relation_pid(self):
"""Transformation old pids 035 $a $9 = sudoc."""
Expand Down
58 changes: 31 additions & 27 deletions rero_mef/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,13 @@
# under the terms of the MIT License; see LICENSE file for more details.

"""Utilities."""
import datetime
import gc
import hashlib
import json
import os
import traceback
from copy import deepcopy
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from functools import wraps
from io import StringIO
from json import JSONDecodeError, JSONDecoder, dumps
Expand Down Expand Up @@ -61,6 +60,10 @@
from sickle.iterator import OAIItemIterator
from sickle.oaiexceptions import NoRecordsMatch

# Hours can not be retrieved by get_info_by_oai_name
# TIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
TIME_FORMAT = '%Y-%m-%d'


def add_oai_source(name, baseurl, metadataprefix='marc21',
setspecs='', comment='', update=False):
Expand Down Expand Up @@ -120,7 +123,7 @@ def oai_set_last_run(name, date, verbose=False):
oai_source = get_oaiharvest_object(name)
lastrun_date = date
if isinstance(date, str):
lastrun_date = parser.parse(date)
lastrun_date = parser.isoparse(date).astimezone(timezone.utc)
oai_source.update_lastrun(lastrun_date)
oai_source.save()
db.session.commit()
Expand Down Expand Up @@ -227,7 +230,7 @@ def oai_process_records_from_dates(name, sickle, oai_item_iterator,
and dates_inital['from'] > dates_inital['until']:
raise WrongDateCombination("'Until' date larger than 'from' date.")

last_run_date = datetime.now()
last_run_date = datetime.now(timezone.utc)

# If we don't have specifications for set searches the setspecs will be
# set to e list with None to go into the retrieval loop without
Expand All @@ -245,23 +248,25 @@ def oai_process_records_from_dates(name, sickle, oai_item_iterator,
}
if access_token:
params['accessToken'] = access_token
params.update(dates)
params |= dates
if spec:
params['set'] = spec

my_from_date = parser.parse(dates['from'])
my_from_date = parser.isoparse(
dates['from']).astimezone(timezone.utc)
my_until_date = last_run_date
if dates['until']:
my_until_date = parser.parse(dates['until'])
my_until_date = parser.isoparse(
dates['until']).astimezone(timezone.utc)
while my_from_date <= my_until_date:
until_date = my_from_date + timedelta(days=days_span)
if until_date > my_until_date:
until_date = my_until_date
dates = {
'from': my_from_date.strftime("%Y-%m-%d"),
'until': until_date.strftime("%Y-%m-%d")
'from': my_from_date.strftime(TIME_FORMAT),
'until': until_date.strftime(TIME_FORMAT)
}
params.update(dates)
params |= dates

try:
for record in request.ListRecords(**params):
Expand Down Expand Up @@ -327,12 +332,12 @@ def oai_process_records_from_dates(name, sickle, oai_item_iterator,
if debug:
traceback.print_exc()
count = -1
my_from_date = my_from_date + timedelta(days=days_span + 1)
if verbose:
from_date = my_from_date.strftime("%Y-%m-%d")
from_date = my_from_date.strftime(TIME_FORMAT)
click.echo(
f'OAI {name} {spec}: {from_date} .. +{days_span}'
)
my_from_date = my_from_date + timedelta(days=days_span + 1)

if update_last_run:
if verbose:
Expand Down Expand Up @@ -371,7 +376,7 @@ def oai_save_records_from_dates(name, file_name, sickle, oai_item_iterator,
and dates_inital['from'] > dates_inital['until']:
raise WrongDateCombination("'Until' date larger than 'from' date.")

last_run_date = datetime.now()
last_run_date = datetime.now(timezone.utc)

# If we don't have specifications for set searches the setspecs will be
# set to e list with None to go into the retrieval loop without
Expand All @@ -387,30 +392,31 @@ def oai_save_records_from_dates(name, file_name, sickle, oai_item_iterator,
}
if access_token:
params['accessToken'] = access_token
params.update(dates)
params |= dates
rerowep marked this conversation as resolved.
Show resolved Hide resolved
if spec:
params['set'] = spec

my_from_date = parser.parse(dates['from'])
my_from_date = parser.parse(dates['from'], tzinfos=timezone.utc)
my_until_date = last_run_date
if dates['until']:
my_until_date = parser.parse(dates['until'])
my_until_date = parser.isoparse(
dates['until']).astimezone(timezone.utc)
while my_from_date <= my_until_date:
until_date = my_from_date + timedelta(days=days_span)
if until_date > my_until_date:
until_date = my_until_date
dates = {
'from': my_from_date.strftime("%Y-%m-%d"),
'until': until_date.strftime("%Y-%m-%d")
'from': my_from_date.strftime(TIME_FORMAT),
'until': until_date.strftime(TIME_FORMAT)
}
params.update(dates)
params |= dates
rerowep marked this conversation as resolved.
Show resolved Hide resolved

try:
for record in request.ListRecords(**params):
count += 1
records = parse_xml_to_array(StringIO(record.raw))
if verbose:
from_date = my_from_date.strftime("%Y-%m-%d")
from_date = my_from_date.strftime(TIME_FORMAT)
click.echo(
f'OAI {name} spec({spec}): {from_date} '
f'count:{count:>10} = {id}'
Expand All @@ -427,7 +433,7 @@ def oai_save_records_from_dates(name, file_name, sickle, oai_item_iterator,

my_from_date = my_from_date + timedelta(days=days_span + 1)
if verbose:
from_date = my_from_date.strftime("%Y-%m-%d")
from_date = my_from_date.strftime(TIME_FORMAT)
click.echo(
f'OAI {name} spec({spec}): '
f'{from_date} .. +{days_span}'
Expand Down Expand Up @@ -695,7 +701,7 @@ def bulk_load_agent(agent, data, table, columns, bulk_count=0, verbose=False,
buffer = StringIO()
buffer_uuid = []
index = columns.index('id') if 'id' in columns else -1
start_time = datetime.now()
start_time = datetime.now(timezone.utc)
with open(data, 'r', encoding='utf-8', buffering=1) as input_file:
for line in input_file:
count += 1
Expand All @@ -706,7 +712,7 @@ def bulk_load_agent(agent, data, table, columns, bulk_count=0, verbose=False,
buffer.flush()
buffer.seek(0)
if verbose:
end_time = datetime.now()
end_time = datetime.now(timezone.utc)
diff_time = end_time - start_time
start_time = end_time
click.echo(
Expand All @@ -730,7 +736,7 @@ def bulk_load_agent(agent, data, table, columns, bulk_count=0, verbose=False,
buffer = StringIO()

if verbose:
end_time = datetime.now()
end_time = datetime.now(timezone.utc)
diff_time = end_time - start_time
click.echo(
f'{agent} copy from file: {count} {diff_time.seconds}s',
Expand Down Expand Up @@ -1113,9 +1119,7 @@ def get_timestamp(name):
:returns: time of time stamp
"""
time_stamps = current_cache.get('timestamps')
if not time_stamps:
return None
return time_stamps.get(name)
return time_stamps.get(name) if time_stamps else None


def settimestamp(func):
Expand Down