Skip to content

Commit

Permalink
fs counter handler easier to maintain
Browse files Browse the repository at this point in the history
  • Loading branch information
arcangelo7 committed Aug 11, 2024
1 parent 5b1dd62 commit 88b3b82
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 15 deletions.
31 changes: 23 additions & 8 deletions oc_meta/run/find_duplicated_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,31 @@ def main():
parser = argparse.ArgumentParser(description='Run a SPARQL query and save results to CSV.')
parser.add_argument('endpoint', type=str, help='SPARQL endpoint URL')
parser.add_argument('csv_path', type=str, help='Path to the CSV file to save results')
parser.add_argument('entity_type', type=str, choices=['id', 'br', 'ra'], help='Type of entity: "id" or "br"')
args = parser.parse_args()

sparql_query = """
PREFIX datacite: <http://purl.org/spar/datacite/>
PREFIX literal: <http://www.essepuntato.it/2010/06/literalreification/>
SELECT ?entity1 ?entity2 {
?id ^datacite:hasIdentifier ?entity1, ?entity2.
FILTER(?entity1 != ?entity2 )
}
"""
if args.entity_type == 'id':
sparql_query = """
PREFIX datacite: <http://purl.org/spar/datacite/>
PREFIX literal: <http://www.essepuntato.it/2010/06/literalreification/>
SELECT ?entity1 ?entity2 {
?id ^datacite:hasIdentifier ?entity1, ?entity2.
FILTER(?entity1 != ?entity2 )
}
"""
elif args.entity_type == 'ra':
sparql_query = """
PREFIX datacite: <http://purl.org/spar/datacite/>
PREFIX literal: <http://www.essepuntato.it/2010/06/literalreification/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?entity1 ?entity2 {
?entity1 datacite:hasIdentifier ?id;
a foaf:Agent.
?entity2 datacite:hasIdentifier ?id;
a foaf:Agent.
FILTER(?entity1 != ?entity2 )
}
"""

results = execute_sparql_query(args.endpoint, sparql_query)
final_entities = find_surviving_entities(results)
Expand Down
File renamed without changes.
3 changes: 3 additions & 0 deletions oc_meta/run/upload/on_triplestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ def split_queries(file_path, batch_size):
return generate_sparql_queries(quads_to_add, quads_to_remove, batch_size)

def upload_sparql_updates(endpoint, folder, batch_size):
if not os.path.exists(folder):
return

processed_files = load_cache()
failed_files = []

Expand Down
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ oc-ds-converter = "1.0.3"
ijson = "^3.2.3"
internetarchive = "^3.7.0"
zenodopy = "^0.3.0"
oc-ocdm = "8.2.1"
oc-ocdm = "8.2.2"
retrying = "^1.3.4"

[tool.poetry.dev-dependencies]
Expand Down
4 changes: 2 additions & 2 deletions test/editor_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,10 @@ def read_and_normalize_file(filepath):
graph_storer.upload_all(endpoint)
editor = MetaEditor(META_CONFIG, 'https://orcid.org/0000-0002-8420-0696')
editor.merge(URIRef('https://w3id.org/oc/meta/ra/06107'), URIRef('https://w3id.org/oc/meta/ra/06205'))
expected_lines_0610 = ['1 \n', '1 \n', '1 \n', '1 \n', '1 \n', '1 \n', '2 \n']
expected_lines_0610 = ['1\n', '1\n', '1\n', '1\n', '1\n', '1\n', '2\n']
normalized_lines_0610 = read_and_normalize_file(os.path.join(OUTPUT, 'info_dir', '0610', 'creator', 'prov_file_ra.txt'))
self.assertEqual(normalized_lines_0610, expected_lines_0610)
expected_lines_0620 = [' \n', ' \n', ' \n', ' \n', '2 \n']
expected_lines_0620 = ['\n', '\n', '\n', '\n', '2\n']
normalized_lines_0620 = read_and_normalize_file(os.path.join(OUTPUT, 'info_dir', '0620', 'creator', 'prov_file_ra.txt'))
self.assertEqual(normalized_lines_0620, expected_lines_0620)
for filepath in [
Expand Down

0 comments on commit 88b3b82

Please sign in to comment.