Skip to content

Commit

Permalink
YDA-5829: troubleshooting tool for published data packages
Browse files Browse the repository at this point in the history
(Backport to Yoda 1.9)

Co-authored-by: claravox <[email protected]>
Co-authored-by: Sirjan <[email protected]>
  • Loading branch information
3 people authored and stsnel committed Dec 2, 2024
1 parent 170d8ea commit 53cf5dc
Show file tree
Hide file tree
Showing 14 changed files with 982 additions and 79 deletions.
49 changes: 25 additions & 24 deletions __init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,30 +24,31 @@
# Import all modules containing rules into the package namespace,
# so that they become visible to iRODS.

from browse import *
from folder import *
from groups import *
from json_datacite import *
from json_landing_page import *
from mail import *
from meta import *
from meta_form import *
from provenance import *
from research import *
from resources import *
from schema import *
from schema_transformation import *
from schema_transformations import *
from vault import *
from datacite import *
from epic import *
from publication import *
from policies import *
from replication import *
from revisions import *
from settings import *
from notifications import *
from integration_tests import *
from browse import *
from folder import *
from groups import *
from json_datacite import *
from json_landing_page import *
from mail import *
from meta import *
from meta_form import *
from provenance import *
from research import *
from resources import *
from schema import *
from schema_transformation import *
from schema_transformations import *
from publication_troubleshoot import *
from vault import *
from datacite import *
from epic import *
from publication import *
from policies import *
from replication import *
from revisions import *
from settings import *
from notifications import *
from integration_tests import *

# Import certain modules only when enabled.
from .util.config import config
Expand Down
27 changes: 27 additions & 0 deletions integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,27 @@ def _test_avu_rmw_collection(ctx, rmw_attributes):
return result


def _test_avu_get_attr_val_of_coll(ctx, attr, value):
# Test getting the value of an attribute on a collection
tmp_coll = _create_tmp_collection(ctx)
ctx.msi_add_avu('-c', tmp_coll, attr, value, "baz")
result = avu.get_attr_val_of_coll(ctx, tmp_coll, attr)
collection.remove(ctx, tmp_coll)
return result


def _test_avu_get_attr_val_of_coll_exception(ctx):
# Test that getting a non existing attribute on a collection raises an exception (True for exception raised)
tmp_coll = _create_tmp_collection(ctx)
result = False
try:
result = avu.get_attr_val_of_coll(ctx, tmp_coll, "foo")
except Exception:
result = True
collection.remove(ctx, tmp_coll)
return result


def _test_folder_set_retry_avus(ctx):
tmp_coll = _create_tmp_collection(ctx)
folder.folder_secure_set_retry_avus(ctx, tmp_coll, 2)
Expand Down Expand Up @@ -413,6 +434,12 @@ def _test_folder_secure_func(ctx, func):
"check": lambda x: (("aap", "noot", "mies") in x
and len([a for a in x if a[0] not in ["org_replication_scheduled"]]) == 1
)},
{"name": "avu.get_attr_val_of_coll.exists.yes",
"test": lambda ctx: _test_avu_get_attr_val_of_coll(ctx, "foo", "bar"),
"check": lambda x: x == "bar"},
{"name": "avu.get_attr_val_of_coll.exists.no",
"test": lambda ctx: _test_avu_get_attr_val_of_coll_exception(ctx),
"check": lambda x: x},
{"name": "avu.apply_atomic_operations.collection",
"test": lambda ctx: _test_msvc_apply_atomic_operations_collection(ctx),
"check": lambda x: (("foo", "bar", "baz") in x and len(x) == 1)},
Expand Down
50 changes: 49 additions & 1 deletion meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from deepdiff import DeepDiff

import avu_json
import meta_form
import provenance
import publication
import schema as schema_
Expand Down Expand Up @@ -709,4 +710,51 @@ def copy_user_metadata(ctx, source, target):

log.write(ctx, "rule_copy_user_metadata: copied user metadata from <{}> to <{}>".format(source, target))
except Exception:
log.write(ctx, "rule_copy_user_metadata: failed to copy user metadata from <{}> to <{}>".format(source, target))
log.write(ctx, "copy_user_metadata: failed to copy user metadata from <{}> to <{}/original>".format(source, target))


def vault_metadata_matches_schema(ctx, coll_name, schema_cache, report_name, write_stdout):
"""Process a single data package to retrieve and validate that its metadata conforms to the schema.
:param ctx: Combined type of a callback and rei struct
:param coll_name: String representing the data package collection path.
:param schema_cache: Dictionary storing schema blueprints, can be empty.
:param report_name: Name of report script (for logging)
:param write_stdout: A boolean representing whether to write to stdout or rodsLog
:returns: A dictionary result containing if schema matches and the schema short name.
"""
metadata_path = get_latest_vault_metadata_path(ctx, coll_name)

if not metadata_path:
log.write(ctx, "{} skips {}, because metadata could not be found.".format(report_name, coll_name), write_stdout)
return None

try:
metadata = jsonutil.read(ctx, metadata_path)
except Exception as exc:
log.write(ctx, "{} skips {}, because of exception while reading metadata file {}: {}".format(report_name, coll_name, metadata_path, str(exc)), write_stdout)
log.write(ctx, "vault_metadata_matches_schema: Error while reading metadata file {} of data package {}: {}".format(metadata_path, coll_name, str(exc)), write_stdout)
return None

# Determine schema
schema_id = schema_.get_schema_id(ctx, metadata_path)
schema_shortname = schema_id.split("/")[-2]

# Retrieve schema and cache it for future use
schema_path = schema_.get_schema_path_by_id(ctx, metadata_path, schema_id)
if schema_shortname in schema_cache:
schema_contents = schema_cache[schema_shortname]
else:
schema_contents = jsonutil.read(ctx, schema_path)
schema_cache[schema_shortname] = schema_contents

# Check whether metadata matches schema and log any errors
error_list = get_json_metadata_errors(ctx, metadata_path, metadata=metadata, schema=schema_contents)
match_schema = len(error_list) == 0
if not match_schema:
errors_formatted = [meta_form.humanize_validation_error(e).encode('utf-8') for e in error_list]
log.write(ctx, "{}: metadata {} did not match schema {}: {}".format(report_name, metadata_path, schema_shortname, str(errors_formatted)), write_stdout)
log.write(ctx, "vault_metadata_matches_schema: Metadata {} of data package {} did not match the schema {}. Error list: {}".format(metadata_path, coll_name, schema_shortname, str(errors_formatted)), write_stdout)

return {"schema": schema_shortname, "match_schema": match_schema}
27 changes: 26 additions & 1 deletion publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -1327,7 +1327,32 @@ def rule_update_publication(ctx, vault_package, update_datacite, update_landingp
:returns: "OK" if all went ok
"""
return update_publication(ctx, vault_package, update_datacite == 'Yes', update_landingpage == 'Yes', update_moai == 'Yes')
if user.user_type(ctx) != 'rodsadmin':
log.write(ctx, "User is no rodsadmin", True)
return

log.write(ctx, "[UPDATE PUBLICATIONS] Start for {}".format(vault_package), True)
collections = genquery.row_iterator(
"COLL_NAME",
"COLL_NAME like '%%/home/vault-%%' "
"AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + "vault_status' "
"AND META_COLL_ATTR_VALUE = '{}'".format(str(constants.vault_package_state.PUBLISHED)),
genquery.AS_LIST,
ctx
)

packages_found = False
for collection in collections:
coll_name = collection[0]
if ((vault_package == '*' and re.match(r'/[^/]+/home/vault-.*', coll_name)) or (vault_package != '*' and re.match(r'/[^/]+/home/vault-.*', coll_name) and coll_name == vault_package)):
packages_found = True
output = update_publication(ctx, coll_name, update_datacite == 'Yes', update_landingpage == 'Yes', update_moai == 'Yes')
log.write(ctx, coll_name + ': ' + output, True)

if not packages_found:
log.write(ctx, "[UPDATE PUBLICATIONS] No packages found for {}".format(vault_package), True)
else:
log.write(ctx, "[UPDATE PUBLICATIONS] Finished for {}".format(vault_package), True)


def update_publication(ctx, vault_package, update_datacite=False, update_landingpage=False, update_moai=False):
Expand Down
Loading

0 comments on commit 53cf5dc

Please sign in to comment.