Skip to content

Commit

Permalink
concepts: add idref
Browse files Browse the repository at this point in the history
Kubernets OAI harvesting cronjobs `idref` and `gnd` must be changed to
`agents.idref` and `agents.gnd `.
New kubernets OAI harvest cronjob for `contributions.idref` must be created.

Co-Authored-by: Peter Weber <[email protected]>
  • Loading branch information
rerowep committed Aug 31, 2022
1 parent cb3728a commit efada4e
Show file tree
Hide file tree
Showing 58 changed files with 15,195 additions and 725 deletions.
12,608 changes: 12,608 additions & 0 deletions data/cidref.json

Large diffs are not rendered by default.

177 changes: 177 additions & 0 deletions data/cidref_metadata.csv

Large diffs are not rendered by default.

177 changes: 177 additions & 0 deletions data/cidref_pidstore.csv

Large diffs are not rendered by default.

68 changes: 34 additions & 34 deletions data/corero.json
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,10 @@
"name": "Médecine"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Abdomen",
"source": "MeSH"
},
}],
"pid": "A021001001",
"identifiedBy": [
{
Expand Down Expand Up @@ -190,10 +190,10 @@
"name": "Technique"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Storage batteries",
"source": "LCSH"
},
}],
"pid": "A021001003",
"identifiedBy": [
{
Expand Down Expand Up @@ -272,10 +272,10 @@
"name": "Linguistique générale"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Speech acts (Linguistics)",
"source": "LCSH"
},
}],
"pid": "A021001004",
"identifiedBy": [
{
Expand Down Expand Up @@ -345,10 +345,10 @@
"name": "Audiovisuel"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Motion picture actors and actresses",
"source": "LCSH"
},
}],
"pid": "A021001005",
"identifiedBy": [
{
Expand Down Expand Up @@ -457,10 +457,10 @@
"name": "Arts du spectacle"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Actresses",
"source": "LCSH"
},
}],
"pid": "A021001007",
"identifiedBy": [
{
Expand Down Expand Up @@ -533,10 +533,10 @@
"name": "Technique"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Adhesives",
"source": "LCSH"
},
}],
"pid": "A021001008",
"identifiedBy": [
{
Expand Down Expand Up @@ -618,10 +618,10 @@
"name": "Administration publique"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Administration publique",
"source": "RVMLaval"
},
}],
"pid": "A021001009",
"identifiedBy": [
{
Expand Down Expand Up @@ -699,10 +699,10 @@
"name": "Technique"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Aerodynamics",
"source": "LCSH"
},
}],
"pid": "A021001012",
"identifiedBy": [
{
Expand Down Expand Up @@ -821,10 +821,10 @@
"name": "Physique"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Aerodynamics, Supersonic",
"source": "LCSH"
},
}],
"pid": "A021001013",
"identifiedBy": [
{
Expand Down Expand Up @@ -891,10 +891,10 @@
"name": "Physique"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Aerodynamics, Transonic",
"source": "LCSH"
},
}],
"pid": "A021001014",
"identifiedBy": [
{
Expand Down Expand Up @@ -947,10 +947,10 @@
"name": "Technique"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Ground-effect machines",
"source": "LCSH"
},
}],
"pid": "A021001016",
"identifiedBy": [
{
Expand Down Expand Up @@ -1015,10 +1015,10 @@
"name": "Économie domestique"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Lamb (Meat)",
"source": "LCSH"
},
}],
"pid": "A021001017",
"identifiedBy": [
{
Expand Down Expand Up @@ -1073,10 +1073,10 @@
"name": "Psychologie"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Aggressiveness",
"source": "LCSH"
},
}],
"pid": "A021001018",
"identifiedBy": [
{
Expand Down Expand Up @@ -1153,10 +1153,10 @@
"name": "Agriculture"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Organic farming",
"source": "LCSH"
},
}],
"pid": "A021001021",
"identifiedBy": [
{
Expand Down Expand Up @@ -1240,10 +1240,10 @@
"name": "Technique"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Airbus aircraft",
"source": "LCSH"
},
}],
"pid": "A021001023",
"identifiedBy": [
{
Expand Down Expand Up @@ -1311,10 +1311,10 @@
"name": "Parapsychologie, occultisme et ésotérisme"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Alchemy",
"source": "LCSH"
},
}],
"pid": "A021001024",
"identifiedBy": [
{
Expand Down Expand Up @@ -1382,10 +1382,10 @@
"name": "Langues"
}
],
"closeMatch": {
"closeMatch": [{
"authorized_access_point": "Alemannic dialects",
"source": "LCSH"
},
}],
"pid": "A021001025",
"identifiedBy": [
{
Expand Down
34 changes: 17 additions & 17 deletions data/corero_metadata.csv

Large diffs are not rendered by default.

10 changes: 8 additions & 2 deletions data/oaisources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,21 @@
#
# OAI-PMH connection settings

idref:
agents.idref:
# http://documentation.abes.fr/aideidrefoai/index.html
baseurl: https://www.idref.fr/OAI/oai.jsp
metadataprefix: marc-xml
comment: 'idref'
setspecs: 'a b'
gnd:
agents.gnd:
# https://www.dnb.de/EN/Professionell/Metadatendienste/Datenbezug/OAI/oai_node.html
baseurl: http://services.dnb.de/oai/repository
metadataprefix: MARC21-xml
comment: 'gnd'
setspecs: 'authorities:kongress authorities:koerperschaft authorities:person'
concepts.idref:
# http://documentation.abes.fr/aideidrefoai/index.html
baseurl: https://www.idref.fr/OAI/oai.jsp
metadataprefix: marc-xml
comment: 'idref'
setspecs: 'j l'
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ agents_idref = "rero_mef.agents.idref.models"
agents_rero = "rero_mef.agents.rero.models"
concepts_mef = "rero_mef.concepts.mef.models"
concepts_rero = "rero_mef.concepts.rero.models"
concepts_idref = "rero_mef.concepts.idref.models"

[tool.poetry.plugins."invenio_pidstore.minters"]
mef_id = "rero_mef.agents.mef.minters:mef_id_minter"
Expand All @@ -126,6 +127,7 @@ agent_idref_id = "rero_mef.agents.idref.minters:idref_id_minter"
agent_rero_id = "rero_mef.agents.rero.minters:rero_id_minter"
concept_mef_id = "rero_mef.concepts.mef.minters:mef_id_minter"
concept_rero_id = "rero_mef.concepts.rero.minters:rero_id_minter"
concept_idref_id = "rero_mef.concepts.idref.minters:idref_id_minter"

[tool.poetry.plugins."invenio_pidstore.fetchers"]
mef_id = "rero_mef.agents.mef.fetchers:mef_id_fetcher"
Expand All @@ -135,6 +137,7 @@ agent_idref_id = "rero_mef.agents.idref.fetchers:idref_id_fetcher"
agent_rero_id = "rero_mef.agents.rero.fetchers:rero_id_fetcher"
concept_mef_id = "rero_mef.concepts.mef.fetchers:mef_id_fetcher"
concept_rero_id = "rero_mef.concepts.rero.fetchers:rero_id_fetcher"
concept_idref_id = "rero_mef.concepts.idref.fetchers:idref_id_fetcher"

[tool.poetry.plugins."invenio_jsonschemas.schemas"]
common = "rero_mef.jsonschemas"
Expand All @@ -145,6 +148,7 @@ agents_idref = "rero_mef.agents.idref.jsonschemas"
agents_rero = "rero_mef.agents.rero.jsonschemas"
cocepts_mef = "rero_mef.concepts.mef.jsonschemas"
cocepts_rero = "rero_mef.concepts.rero.jsonschemas"
cocepts_idref = "rero_mef.concepts.idref.jsonschemas"

[tool.poetry.plugins."invenio_search.mappings"]
mef = "rero_mef.agents.mef.mappings"
Expand All @@ -154,6 +158,7 @@ agents_idref = "rero_mef.agents.idref.mappings"
agents_rero = "rero_mef.agents.rero.mappings"
concepts_mef = "rero_mef.concepts.mef.mappings"
concepts_rero = "rero_mef.concepts.rero.mappings"
concepts_idref = "rero_mef.concepts.idref.mappings"

[tool.poetry.plugins."invenio_records.jsonresolver"]
mef = "rero_mef.agents.mef.jsonresolvers.mef_resolver"
Expand All @@ -163,6 +168,7 @@ agents_idref = "rero_mef.agents.idref.jsonresolvers.idref_resolver"
agents_rero = "rero_mef.agents.rero.jsonresolvers.rero_resolver"
concepts_mef = "rero_mef.concepts.mef.jsonresolvers.mef_resolver"
concepts_rero = "rero_mef.concepts.rero.jsonresolvers.rero_resolver"
concepts_idref = "rero_mef.concepts.idref.jsonresolvers.idref_resolver"

[tool.poetry.plugins."invenio_base.api_blueprints"]
api_rero_mef = "rero_mef.theme.views:api_blueprint"
Expand Down
18 changes: 7 additions & 11 deletions rero_mef/agents/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ class AgentRecord(ReroMefRecord):
def __init__(self, *args, **kwargs):
"""Init class."""
super().__init__(*args, **kwargs)
self.agent = self.name

@classmethod
def create(cls, data, id_=None, delete_pid=False, dbcommit=False,
Expand Down Expand Up @@ -115,16 +114,13 @@ def create_or_update_mef_viaf_record(self, dbcommit=False, reindex=False,
agent=self,
online=online
)

from .mef.api import AgentMefRecord
ref_string = build_ref_string(
agent=self.agent,
agent_pid=self.pid)

mef_data = {self.agent: {'$ref': ref_string}}
mef_record = AgentMefRecord.get_mef_by_entity_pid(
entity_pid=self.pid,
entity_name=self.name
agent=self.name,
agent_pid=self.pid
)
mef_data = {self.name: {'$ref': ref_string}}
mef_record = AgentMefRecord.get_mef_by_entity_pid(self.pid, self.name)
if viaf_record:
mef_data['viaf_pid'] = viaf_record.pid
if not mef_record:
Expand Down Expand Up @@ -172,7 +168,7 @@ def delete_from_mef(self, dbcommit=False, reindex=False, verbose=False):
if mef_record:
old_mef_pid = mef_record.pid
if not mef_record.deleted:
mef_record.pop(self.agent, None)
mef_record.pop(self.name, None)
mef_action = Action.DELETEAGENT
mef_record = mef_record.replace(
data=mef_record,
Expand All @@ -198,7 +194,7 @@ def delete_from_mef(self, dbcommit=False, reindex=False, verbose=False):
AgentMefRecord.flush_indexes()
if verbose:
click.echo(
f'Delete {self.agent}: {self.pid} '
f'Delete {self.name}: {self.pid} '
f'from mef: {old_mef_pid} {mef_action.value} '
f'new mef: {mef_record.pid}'
)
Expand Down
7 changes: 3 additions & 4 deletions rero_mef/agents/gnd/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def process_records_from_dates(from_date=None, until_date=None,
:param until_date: The upper bound date for the harvesting (optional).
"""
return oai_process_records_from_dates(
name='gnd',
name='agents.gnd',
sickle=Sickle,
max_retries=current_app.config.get('RERO_OAI_RETRIES', 0),
oai_item_iterator=MyOAIItemIterator,
Expand Down Expand Up @@ -73,7 +73,7 @@ def save_records_from_dates(file_name, from_date=None, until_date=None,
"""
# data on IDREF Servers starts on 2000-10-01
return oai_save_records_from_dates(
name='gnd',
name='agents.gnd',
file_name=file_name,
sickle=Sickle,
max_retries=current_app.config.get('RERO_OAI_RETRIES', 0),
Expand All @@ -90,9 +90,8 @@ def gnd_get_record(id, verbose=False, debug=False):
"""Get a record from GND OAI repo."""
return oai_get_record(
id=id,
name='gnd',
name='agents.gnd',
transformation=Transformation,
record_cls=AgentGndRecord,
access_token=current_app.config.get('RERO_OAI_GND_TOKEN'),
identifier='oai:dnb.de/authorities/',
verbose=verbose,
Expand Down
7 changes: 3 additions & 4 deletions rero_mef/agents/idref/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def process_records_from_dates(from_date=None, until_date=None,
"""
# data on IDREF Servers starts on 2000-10-01
return oai_process_records_from_dates(
name='idref',
name='agents.idref',
sickle=MySickle,
max_retries=current_app.config.get('RERO_OAI_RETRIES', 0),
oai_item_iterator=MyOAIItemIterator,
Expand Down Expand Up @@ -102,7 +102,7 @@ def save_records_from_dates(file_name, from_date=None, until_date=None,
"""
# data on IDREF Servers starts on 2000-10-01
return oai_save_records_from_dates(
name='idref',
name='agents.idref',
file_name=file_name,
sickle=MySickle,
max_retries=current_app.config.get('RERO_OAI_RETRIES', 0),
Expand All @@ -119,9 +119,8 @@ def idref_get_record(id, verbose=False, debug=False):
"""Get a record from GND OAI repo."""
return oai_get_record(
id=id,
name='idref',
name='agents.idref',
transformation=Transformation,
record_cls=AgentIdrefRecord,
identifier='oai:IdRefOAIServer.fr:',
verbose=verbose,
debug=debug
Expand Down
Loading

0 comments on commit efada4e

Please sign in to comment.