Skip to content

Commit

Permalink
merge with upstream master
Browse files Browse the repository at this point in the history
  • Loading branch information
mattiagiupponi committed Feb 4, 2022
2 parents 56a4726 + 326d701 commit 8f07103
Show file tree
Hide file tree
Showing 42 changed files with 747 additions and 306 deletions.
3 changes: 2 additions & 1 deletion .clabot
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
"italogsfernandes",
"jcaceres85",
"luorlandini",
"minardimarcello"
"minardimarcello",
"mikesname"
]
}
2 changes: 1 addition & 1 deletion geonode/base/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1149,7 +1149,7 @@ def ratings(self, request, pk=None):
)
if rating_input not in range(NUM_OF_RATINGS + 1):
return HttpResponseForbidden(
"Invalid rating. It must be a value between 0 and {}".format(NUM_OF_RATINGS)
f"Invalid rating. It must be a value between 0 and {NUM_OF_RATINGS}"
)
Rating.update(
rating_object=resource,
Expand Down
2 changes: 2 additions & 0 deletions geonode/base/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,8 @@ class Meta:
'state',
'blob',
'files',
'was_approved',
'was_published'
)


Expand Down
104 changes: 44 additions & 60 deletions geonode/base/management/commands/dump_thesaurus.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#########################################################################
#
# Copyright (C) 2021 OSGeo
# Copyright (C) 2020 OSGeo
# Copyright (C) 2022 King's College London
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand All @@ -17,37 +18,43 @@
#
#########################################################################

from lxml import etree

from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from rdflib import Graph, URIRef, Literal
from rdflib.namespace import DC, DCTERMS, RDF, SKOS

from geonode.base.models import Thesaurus, ThesaurusKeyword, ThesaurusKeywordLabel, ThesaurusLabel

RDF_URI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
XML_URI = 'http://www.w3.org/XML/1998/namespace'
SKOS_URI = 'http://www.w3.org/2004/02/skos/core#'
DC_URI = 'http://purl.org/dc/elements/1.1/'
DCTERMS_URI = 'http://purl.org/dc/terms/'

RDF_NS = f'{{{RDF_URI}}}'
XML_NS = f'{{{XML_URI}}}'
SKOS_NS = f'{{{SKOS_URI}}}'
DC_NS = f'{{{DC_URI}}}'
DCTERMS_NS = f'{{{DCTERMS_URI}}}'


class Command(BaseCommand):

help = 'Dump a thesaurus in RDF format'
formats = sorted(['ttl', 'xml', 'pretty-xml', 'json-ld', 'nt', 'n3', 'trig'])

def add_arguments(self, parser):

# Named (optional) arguments
parser.add_argument(
'-n',
'--name',
dest='name',
help='Dump the thesaurus with the given name')

parser.add_argument(
'-f',
'--format',
dest='format',
default='pretty-xml',
help=f'Format string supported by rdflib, e.g.: pretty-xml (default), {", ".join(self.formats)}'
)

parser.add_argument(
'--default-lang',
dest='lang',
default=getattr(settings, 'THESAURUS_DEFAULT_LANG', None),
help='Default language code for untagged string literals'
)

# Named (optional) arguments
parser.add_argument(
'-l',
Expand All @@ -65,14 +72,17 @@ def handle(self, **options):
if not name and not list:
raise CommandError("Missing identifier name for the thesaurus (--name)")

if options.get('format') not in self.formats:
raise CommandError(f"Invalid output format: supported formats are {', '.join(self.formats)}")

if list:
self.list_thesauri()
return

self.dump_thesaurus(name)
self.dump_thesaurus(name, options.get('format'), options.get('lang'))

def list_thesauri(self):
print('LISTING THESAURI')
self.stderr.write(self.style.SUCCESS('LISTING THESAURI'))
max_id_len = len(max(Thesaurus.objects.values_list('identifier', flat=True), key=len))

for t in Thesaurus.objects.order_by('order').all():
Expand All @@ -82,55 +92,29 @@ def list_thesauri(self):
# DISABLED
# [0..n]
card = f'[{t.card_min}..{t.card_max if t.card_max!=-1 else "N"}] '
print(f'id:{t.id:2} sort:{t.order:3} {card} name={t.identifier.ljust(max_id_len)} title="{t.title}" URI:{t.about}')
self.stdout.write(f'id:{t.id:2} sort:{t.order:3} {card} name={t.identifier.ljust(max_id_len)} title="{t.title}" URI:{t.about}\n')

def dump_thesaurus(self, name: str, fmt: str, default_lang: str):

def dump_thesaurus(self, name):
g = Graph()
thesaurus = Thesaurus.objects.filter(identifier=name).get()
scheme = URIRef(thesaurus.about)
g.add((scheme, RDF.type, SKOS.ConceptScheme))
g.add((scheme, DC.title, Literal(thesaurus.title, lang=default_lang)))
g.add((scheme, DC.description, Literal(thesaurus.description, lang=default_lang)))
g.add((scheme, DCTERMS.issued, Literal(thesaurus.date)))

ns = {
None: SKOS_URI,
'rdf': RDF_URI,
'xml': XML_URI,
'dc': DC_URI,
'dcterms': DCTERMS_URI
}

root = etree.Element(f"{RDF_NS}RDF", nsmap=ns)
concept_scheme = etree.SubElement(root, f"{SKOS_NS}ConceptScheme")
concept_scheme.set(f"{RDF_NS}about", thesaurus.about)

# Default title
# <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/">GEMET - INSPIRE themes, version 1.0</dc:title>
title = etree.SubElement(concept_scheme, f"{DC_NS}title")
title.text = thesaurus.title

# Localized titles
# <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">Limitations on public access</dc:title>
for ltitle in ThesaurusLabel.objects.filter(thesaurus=thesaurus).all():
title = etree.SubElement(concept_scheme, f"{DC_NS}title")
title.set(f"{XML_NS}lang", ltitle.lang)
title.text = ltitle.label

d = etree.SubElement(concept_scheme, f"{DCTERMS_NS}issued")
d.text = thesaurus.date
d = etree.SubElement(concept_scheme, f"{DCTERMS_NS}modified")
d.text = thesaurus.date
for title_label in ThesaurusLabel.objects.filter(thesaurus=thesaurus).all():
g.add((scheme, DC.title, Literal(title_label.label, lang=title_label.lang)))

# Concepts
for keyword in ThesaurusKeyword.objects.filter(thesaurus=thesaurus).all():
concept = etree.SubElement(concept_scheme, f"{SKOS_NS}Concept")
if keyword.about:
concept.set(f"{RDF_NS}about", keyword.about)

concept = URIRef(keyword.about)
g.add((concept, RDF.type, SKOS.Concept))
g.add((concept, SKOS.inScheme, scheme))
if keyword.alt_label:
# <skos:altLabel>cp</skos:altLabel>
label = etree.SubElement(concept, f"{SKOS_NS}altLabel")
label.text = keyword.alt_label

g.add((concept, SKOS.altLabel, Literal(keyword.alt_label, lang=default_lang)))
for label in ThesaurusKeywordLabel.objects.filter(keyword=keyword).all():
# <skos:prefLabel xml:lang="en">Geographical grid systems</skos:prefLabel>
pref_label = etree.SubElement(concept, f"{SKOS_NS}prefLabel")
pref_label.set(f"{XML_NS}lang", label.lang)
pref_label.text = label.label
g.add((concept, SKOS.prefLabel, Literal(label.label, lang=label.lang)))

etree.dump(root, pretty_print=True)
self.stdout.write(g.serialize(format=fmt))
110 changes: 51 additions & 59 deletions geonode/base/management/commands/load_thesaurus.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#########################################################################
#
# Copyright (C) 2016 OSGeo
# Copyright (C) 2022 King's College London
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand All @@ -18,10 +19,13 @@
#########################################################################

from typing import List
from owslib.etree import etree as dlxml
from django.conf import settings

from django.conf import settings
from django.core.files.uploadedfile import UploadedFile
from django.core.management.base import BaseCommand, CommandError
from rdflib import Graph, Literal
from rdflib.namespace import RDF, SKOS, DC, DCTERMS
from rdflib.util import guess_format

from geonode.base.models import Thesaurus, ThesaurusKeyword, ThesaurusKeywordLabel, ThesaurusLabel

Expand Down Expand Up @@ -69,70 +73,63 @@ def handle(self, **options):
self.load_thesaurus(input_file, name, not dryrun)

def load_thesaurus(self, input_file, name, store):
g = Graph()

RDF_URI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
XML_URI = 'http://www.w3.org/XML/1998/namespace'

ABOUT_ATTRIB = f"{{{RDF_URI}}}about"
LANG_ATTRIB = f"{{{XML_URI}}}lang"
# if the input_file is an UploadedFile object rather than a file path the Graph.parse()
# method may not have enough info to correctly guess the type; in this case supply the
# name, which should include the extension, to guess_format manually...
rdf_format = None
if isinstance(input_file, UploadedFile):
self.stderr.write(self.style.WARNING(f"Guessing RDF format from {input_file.name}..."))
rdf_format = guess_format(input_file.name)

ns = {
'rdf': RDF_URI,
'foaf': 'http://xmlns.com/foaf/0.1/',
'dc': 'http://purl.org/dc/elements/1.1/',
'dcterms': 'http://purl.org/dc/terms/',
'skos': 'http://www.w3.org/2004/02/skos/core#'
}
g.parse(input_file, format=rdf_format)

tfile = dlxml.parse(input_file)
root = tfile.getroot()

scheme = root.find('skos:ConceptScheme', ns)
if not scheme:
# An error will be thrown here there is more than one scheme in the file
scheme = g.value(None, RDF.type, SKOS.ConceptScheme, any=False)
if scheme is None:
raise CommandError("ConceptScheme not found in file")

titles = scheme.findall('dc:title', ns)

default_lang = getattr(settings, 'THESAURUS_DEFAULT_LANG', None)
available_lang = get_all_lang_available_with_title(titles, LANG_ATTRIB)
thesaurus_title = determinate_value(available_lang, default_lang)

descr = scheme.find('dc:description', ns).text if scheme.find('dc:description', ns) else thesaurus_title
date_issued = scheme.find('dcterms:issued', ns).text
about = scheme.attrib.get(ABOUT_ATTRIB)
available_titles = [t for t in g.objects(scheme, DC.title) if isinstance(t, Literal)]
thesaurus_title = value_for_language(available_titles, default_lang)
description = g.value(scheme, DC.description, None, default=thesaurus_title)
date_issued = g.value(scheme, DCTERMS.issued, None, default="")

print(f'Thesaurus "{thesaurus_title}" issued at {date_issued}')
self.stderr.write(self.style.SUCCESS(f'Thesaurus "{thesaurus_title}", desc: {description} issued at {date_issued}'))

thesaurus = Thesaurus()
thesaurus.identifier = name

thesaurus.description = description
thesaurus.title = thesaurus_title
thesaurus.description = descr
thesaurus.about = about
thesaurus.about = str(scheme)
thesaurus.date = date_issued

if store:
thesaurus.save()

for lang in available_lang:
if lang[0] is not None:
for lang in available_titles:
if lang.language is not None:
thesaurus_label = ThesaurusLabel()
thesaurus_label.lang = lang[0]
thesaurus_label.label = lang[1]
thesaurus_label.lang = lang.language
thesaurus_label.label = lang.value
thesaurus_label.thesaurus = thesaurus
thesaurus_label.save()

for concept in root.findall('skos:Concept', ns):
about = concept.attrib.get(ABOUT_ATTRIB)
alt_label = concept.find('skos:altLabel', ns)
if store:
thesaurus_label.save()

for concept in g.subjects(RDF.type, SKOS.Concept):
pref = g.preferredLabel(concept, default_lang)[0][1]
about = str(concept)
alt_label = g.value(concept, SKOS.altLabel, object=None, default=None)
if alt_label is not None:
alt_label = alt_label.text
alt_label = str(alt_label)
else:
concepts = concept.findall('skos:prefLabel', ns)
available_lang = get_all_lang_available_with_title(concepts, LANG_ATTRIB)
alt_label = determinate_value(available_lang, default_lang)
available_labels = [t for t in g.objects(concept, SKOS.prefLabel) if isinstance(t, Literal)]
alt_label = value_for_language(available_labels, default_lang)

print(f'Concept {alt_label} ({about})')
self.stderr.write(self.style.SUCCESS(f'Concept {str(pref)}: {alt_label} ({about})'))

tk = ThesaurusKeyword()
tk.thesaurus = thesaurus
Expand All @@ -142,11 +139,10 @@ def load_thesaurus(self, input_file, name, store):
if store:
tk.save()

for pref_label in concept.findall('skos:prefLabel', ns):
lang = pref_label.attrib.get(LANG_ATTRIB)
label = pref_label.text

print(f' Label {lang}: {label}')
for _, pref_label in g.preferredLabel(concept):
lang = pref_label.language
label = str(pref_label)
self.stderr.write(self.style.SUCCESS(f' Label {lang}: {label}'))

tkl = ThesaurusKeywordLabel()
tkl.keyword = tk
Expand Down Expand Up @@ -181,15 +177,11 @@ def create_fake_thesaurus(self, name):
tkl.save()


def get_all_lang_available_with_title(items: List, LANG_ATTRIB: str):
return [(item.attrib.get(LANG_ATTRIB), item.text) for item in items]


def determinate_value(available_lang: List, default_lang: str):
sorted_lang = sorted(available_lang, key=lambda lang: '' if lang[0] is None else lang[0])
def value_for_language(available: List[Literal], default_lang: str) -> str:
sorted_lang = sorted(available, key=lambda literal: '' if literal.language is None else literal.language)
for item in sorted_lang:
if item[0] is None:
return item[1]
elif item[0] == default_lang:
return item[1]
return available_lang[0][1]
if item.language is None:
return str(item)
elif item.language.split("-")[0] == default_lang:
return str(item)
return str(available[0])
17 changes: 17 additions & 0 deletions geonode/base/migrations/0062_resourcebase_extra_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 2.2.24 on 2022-01-26 14:04

from django.db import migrations, models

class Migration(migrations.Migration):

dependencies = [
('base', '0061_auto_20211117_1238'),
]

operations = [
migrations.AddField(
model_name='resourcebase',
name='extra_metadata',
field=models.JSONField(blank=True, default=list, null=True),
),
]
Loading

0 comments on commit 8f07103

Please sign in to comment.