Skip to content

Commit

Permalink
refactor: prefix doi management commands
Browse files Browse the repository at this point in the history
- prefix all one-off destructive DOI commands with `doi_`
- add reset_staging to mint new DOIs on staging using the datacite
  sandbox, doi_reset_staging -> step 3, doi_mint_parent_codebase_dois
- bump deps for datacite schema 4.5 and django cve
  • Loading branch information
alee committed Nov 14, 2024
1 parent 9c2678f commit 60340cf
Show file tree
Hide file tree
Showing 15 changed files with 464 additions and 310 deletions.
63 changes: 43 additions & 20 deletions django/library/doi.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
DataCiteRegistrationLog,
)

from datacite import DataCiteRESTClient, schema43
from datacite import DataCiteRESTClient, schema45
from datacite.errors import (
DataCiteError,
DataCiteNoContentError,
Expand Down Expand Up @@ -183,8 +183,12 @@ def _datacite_heartbeat_url(self):

def _validate_metadata(self, datacite_metadata: DataCiteSchema):
metadata_dict = datacite_metadata.to_dict()
if not schema43.validate(metadata_dict):
logger.error("Invalid DataCite metadata: %s", metadata_dict)
try:
schema45.validator.validate(metadata_dict)
except Exception as e:
logger.error(
"Invalid DataCite metadata: %s", schema45.tostring(metadata_dict), e
)
raise DataCiteError(f"Invalid DataCite metadata: {metadata_dict}")
return datacite_metadata, metadata_dict

Expand All @@ -202,17 +206,22 @@ def mint_public_doi(self, codebase_or_release: Codebase | CodebaseRelease):
return "XX.DRYXX/XXXX-XRUN", True
if hasattr(codebase_or_release, "datacite"):
del codebase_or_release.datacite
datacite_metadata, metadata_dict = self._validate_metadata(
codebase_or_release.datacite
)

doi = "Unassigned"
http_status = 200
message = "Minted new DOI successfully."

datacite_metadata = codebase_or_release.datacite

try:
datacite_metadata, metadata_dict = self._validate_metadata(
datacite_metadata
)
doi = self.datacite_client.public_doi(
metadata_dict, url=codebase_or_release.permanent_url
)
codebase_or_release.doi = doi
codebase_or_release.save()
except DataCiteError as e:
logger.error(e)
message = str(e)
Expand All @@ -235,7 +244,27 @@ def mint_public_doi(self, codebase_or_release: Codebase | CodebaseRelease):
self._save_log_record(**log_record_dict)
return doi, http_status == 200

@classmethod
def is_metadata_fresh(cls, codebase_or_release: Codebase | CodebaseRelease):
try:
newest_log_entry = DataCiteRegistrationLog.objects.latest_entry(
codebase_or_release
)
# make sure item does not have stale datacite metadata
if hasattr(codebase_or_release, "datacite"):
del codebase_or_release.datacite
return newest_log_entry.metadata_hash == codebase_or_release.datacite.hash()

except DataCiteRegistrationLog.DoesNotExist:
# no logs for this item, metadata is stale
logger.info("No registration logs available for %s", codebase_or_release)

return False

def update_doi_metadata(self, codebase_or_release: Codebase | CodebaseRelease):
if self.is_metadata_fresh(codebase_or_release):
logger.info("No need to update DOI metadata for %s", codebase_or_release)
return True
doi = codebase_or_release.doi
if self.dry_run:
logger.debug("DRY RUN")
Expand Down Expand Up @@ -278,16 +307,10 @@ def update_doi_metadata(self, codebase_or_release: Codebase | CodebaseRelease):
self._save_log_record(**log_record_dict)
return http_status == 200

def mint_new_doi_for_codebase(self, codebase: Codebase) -> str:
return self.mint_public_doi(codebase)

def mint_new_doi_for_release(self, release: CodebaseRelease) -> str:
return self.mint_public_doi(release)

def update_metadata_for_codebase(self, codebase: Codebase) -> bool:
def update_codebase_metadata(self, codebase: Codebase) -> bool:
return self.update_doi_metadata(codebase)

def update_metadata_for_release(self, release: CodebaseRelease) -> bool:
def update_release_metadata(self, release: CodebaseRelease) -> bool:
return self.update_doi_metadata(release)

@staticmethod
Expand Down Expand Up @@ -524,7 +547,7 @@ def mint_dois_for_peer_reviewed_releases_without_dois(interactive=True, dry_run=
"""
if not codebase_doi:
# request to DataCite API
codebase_doi = datacite_api.mint_new_doi_for_codebase(codebase)
codebase_doi = datacite_api.mint_public_doi(codebase)

if not codebase_doi:
logger.error(
Expand All @@ -544,7 +567,7 @@ def mint_dois_for_peer_reviewed_releases_without_dois(interactive=True, dry_run=
Mint DOI for release
"""
# request to DataCite API
release_doi = datacite_api.mint_new_doi_for_release(release)
release_doi = datacite_api.mint_public_doi(release)
if not release_doi:
logger.error("Could not mint DOI for release %s. Skipping.", release.pk)
if interactive:
Expand All @@ -559,7 +582,7 @@ def mint_dois_for_peer_reviewed_releases_without_dois(interactive=True, dry_run=
"""
Since a new DOI has been minted for the release, we need to update it's parent's metadata (HasVersion)
"""
ok = datacite_api.update_metadata_for_codebase(codebase)
ok = datacite_api.update_codebase_metadata(codebase)
if not ok:
logger.error("Failed to update metadata for codebase %s", codebase.pk)

Expand All @@ -572,15 +595,15 @@ def mint_dois_for_peer_reviewed_releases_without_dois(interactive=True, dry_run=
next_release = release.get_next_release()

if previous_release and previous_release.doi:
ok = datacite_api.update_metadata_for_release(previous_release)
ok = datacite_api.update_release_metadata(previous_release)
if not ok:
logger.error(
"Failed to update metadata for previous_release %s",
previous_release.pk,
)

if next_release and next_release.doi:
ok = datacite_api.update_metadata_for_release(next_release)
ok = datacite_api.update_release_metadata(next_release)
if not ok:
logger.error(
"Failed to update metadata for next_release %s", next_release.pk
Expand Down Expand Up @@ -619,7 +642,7 @@ def mint_dois_for_peer_reviewed_releases_without_dois(interactive=True, dry_run=
if invalid_codebases:
logger.error(
"FAILURE: %s Codebases with invalid or missing DOIs: %s",
invalid_codebases.count(),
len(invalid_codebases),
invalid_codebases,
)
else:
Expand Down
64 changes: 0 additions & 64 deletions django/library/management/commands/clean_peer_reviewed_dois_02.py

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def update_existing_dois(interactive=True, dry_run=True):
total_peer_reviewed_releases_count = peer_reviewed_releases.count()

logger.info(
"Updating DOIs for %s peer reviewed CodebaseReleases with DOIs",
"Updating DOIs for parent Codebases of %s peer reviewed CodebaseReleases with DOIs",
total_peer_reviewed_releases_count,
)

Expand All @@ -56,7 +56,7 @@ def update_existing_dois(interactive=True, dry_run=True):
if not codebase_doi:
# request to DataCite API
logger.debug("Minting DOI for parent codebase: %s", codebase.pk)
codebase_doi, success = datacite_api.mint_new_doi_for_codebase(codebase)
codebase_doi, success = datacite_api.mint_public_doi(codebase)

if not success:
logger.error(
Expand Down Expand Up @@ -108,7 +108,7 @@ def update_existing_dois(interactive=True, dry_run=True):
release_doi,
)
# set up DataCite API request to mint new DOI
release_doi, success = datacite_api.mint_new_doi_for_release(release)
release_doi, success = datacite_api.mint_public_doi(release)
if not success:
logger.error(
"Could not mint DOI for release %s. DOI: %s. Skipping.",
Expand Down Expand Up @@ -139,7 +139,7 @@ def update_existing_dois(interactive=True, dry_run=True):
release_doi,
)
# request to DataCite API: mint new DOI!
release_doi, success = datacite_api.mint_new_doi_for_release(release)
release_doi, success = datacite_api.mint_public_doi(release)
if not success:
logger.error(
"Could not mint DOI for release %s. DOI: %s. Skipping.",
Expand Down Expand Up @@ -214,7 +214,7 @@ def add_arguments(self, parser):
"--interactive",
action="store_true",
help="Wait for user to press enter to continue.",
default=True,
default=False,
)
parser.add_argument(
"--dry-run", action="store_true", help="Output what would have happened."
Expand Down
Loading

0 comments on commit 60340cf

Please sign in to comment.