diff --git a/django/library/doi.py b/django/library/doi.py index 1ede754a0..ae9c60ab1 100644 --- a/django/library/doi.py +++ b/django/library/doi.py @@ -18,7 +18,7 @@ DataCiteRegistrationLog, ) -from datacite import DataCiteRESTClient, schema43 +from datacite import DataCiteRESTClient, schema45 from datacite.errors import ( DataCiteError, DataCiteNoContentError, @@ -183,7 +183,7 @@ def _datacite_heartbeat_url(self): def _validate_metadata(self, datacite_metadata: DataCiteSchema): metadata_dict = datacite_metadata.to_dict() - if not schema43.validate(metadata_dict): + if not schema45.validate(metadata_dict): logger.error("Invalid DataCite metadata: %s", metadata_dict) raise DataCiteError(f"Invalid DataCite metadata: {metadata_dict}") return datacite_metadata, metadata_dict @@ -202,16 +202,16 @@ def mint_public_doi(self, codebase_or_release: Codebase | CodebaseRelease): return "XX.DRYXX/XXXX-XRUN", True if hasattr(codebase_or_release, "datacite"): del codebase_or_release.datacite - datacite_metadata, metadata_dict = self._validate_metadata( - codebase_or_release.datacite - ) + doi = "Unassigned" http_status = 200 message = "Minted new DOI successfully." + datacite_metadata = codebase_or_release.datacite + try: doi = self.datacite_client.public_doi( - metadata_dict, url=codebase_or_release.permanent_url + datacite_metadata.to_dict(), url=codebase_or_release.permanent_url ) except DataCiteError as e: logger.error(e) diff --git a/django/library/management/commands/fix_existing_dois_03.py b/django/library/management/commands/doi_mint_parent_codebase_dois_03.py similarity index 100% rename from django/library/management/commands/fix_existing_dois_03.py rename to django/library/management/commands/doi_mint_parent_codebase_dois_03.py diff --git a/django/library/management/commands/delete_all_existing_codebase_dois_01.py b/django/library/management/commands/doi_remove_codebase_dois_01.py similarity index 84% rename from django/library/management/commands/delete_all_existing_codebase_dois_01.py rename to django/library/management/commands/doi_remove_codebase_dois_01.py index 613bc02e1..1f732bc16 100644 --- a/django/library/management/commands/delete_all_existing_codebase_dois_01.py +++ b/django/library/management/commands/doi_remove_codebase_dois_01.py @@ -10,11 +10,9 @@ def remove_existing_codebase_dois(interactive=True, dry_run=True): print(get_welcome_message(dry_run)) - codebases_with_dois = Codebase.objects.exclude(doi__isnull=True) + codebases_with_dois = Codebase.objects.with_doi() - logger.info( - f"Removing DOIs for {len(codebases_with_dois)} Codebases. Query: Codebase.objects.exclude(doi__isnull=True) ..." - ) + logger.info("Removing all Codebase DOIs") if interactive and codebases_with_dois.exists(): confirm = input( "WARNING: this will remove all existing codebase DOIs and is unrecoverable. Type 'DELETE' to continue or Ctrl+C to quit: " @@ -30,10 +28,6 @@ def remove_existing_codebase_dois(interactive=True, dry_run=True): logger.info("Aborting.") sys.exit() - logger.info( - "All DOIs from {len(codebases_with_dois)} codebases deleted successfully." - ) - """ assert correctness """ diff --git a/django/library/management/commands/clean_peer_reviewed_dois_02.py b/django/library/management/commands/doi_remove_unreviewed_dois_02.py similarity index 77% rename from django/library/management/commands/clean_peer_reviewed_dois_02.py rename to django/library/management/commands/doi_remove_unreviewed_dois_02.py index 44ca82a29..cfbac1402 100644 --- a/django/library/management/commands/clean_peer_reviewed_dois_02.py +++ b/django/library/management/commands/doi_remove_unreviewed_dois_02.py @@ -1,4 +1,5 @@ import logging +import sys from django.core.management.base import BaseCommand from library.doi import VERIFICATION_MESSAGE, get_welcome_message from library.models import CodebaseRelease @@ -9,9 +10,7 @@ def remove_dois_from_unreviewed_releases(interactive=True, dry_run=True): print(get_welcome_message(dry_run)) - unreviewed_releases_with_dois = CodebaseRelease.objects.filter( - peer_reviewed=False, doi__isnull=False - ) + unreviewed_releases_with_dois = CodebaseRelease.objects.unreviewed().with_doi() total_unreviewed_releases_with_dois = unreviewed_releases_with_dois.count() logger.info( @@ -24,6 +23,9 @@ def remove_dois_from_unreviewed_releases(interactive=True, dry_run=True): ) if confirm.lower() == "delete": unreviewed_releases_with_dois.update(doi=None) + else: + logger.debug("Aborting...") + sys.exit() """ assert correctness @@ -31,16 +33,11 @@ def remove_dois_from_unreviewed_releases(interactive=True, dry_run=True): if not dry_run: print(VERIFICATION_MESSAGE) logger.info( - "Checking that DOIs for all not peer reviewed releases have been deleted..." - ) - assert ( - CodebaseRelease.objects.filter( - peer_reviewed=False, doi__isnull=False - ).count() - == 0 + "Checking that DOIs for all unreviewed releases have been deleted..." ) + assert not CodebaseRelease.objects.unreviewed().with_doi().exists() logger.info( - "All DOIs from not peer_reviewed CodebaseReleases %s with DOIs deleted successfully.", + "%s unreviewed CodebaseReleases with DOIs updated successfully.", total_unreviewed_releases_with_dois, ) diff --git a/django/library/management/commands/doi_reset_staging.py b/django/library/management/commands/doi_reset_staging.py new file mode 100644 index 000000000..169d5ddcb --- /dev/null +++ b/django/library/management/commands/doi_reset_staging.py @@ -0,0 +1,71 @@ +import csv +import logging +import sys +from django.conf import settings +from django.core.management.base import BaseCommand +from library.doi import VERIFICATION_MESSAGE, get_welcome_message, DataCiteApi +from library.models import Codebase, CodebaseRelease + +logger = logging.getLogger(__name__) + + +def reset_all_dois(interactive=True, dry_run=True): + print(get_welcome_message(dry_run)) + if settings.DEPLOY_ENVIRONMENT.is_production: + logger.error("This command is not allowed in production.") + sys.exit() + logger.info("(ENV: %s) Removing all DOIs", settings.DEPLOY_ENVIRONMENT) + releases_with_dois = CodebaseRelease.objects.with_doi() + codebases_with_dois = Codebase.objects.with_doi() + confirm = input( + "WARNING: this will remove ALL existing DOIs and is unrecoverable. Type 'DELETE' to continue or Ctrl+C to quit: " + ) + if confirm.lower() == "delete": + with open("codebase_dois.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["Codebase ID", "Codebase DOI"]) + for codebase in codebases_with_dois: + writer.writerow([codebase.pk, codebase.doi]) + Codebase.objects.update(doi=None) + with open("release_doi.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["CodebaseRelease ID", "CodebaseRelease DOI"]) + for release in releases_with_dois: + writer.writerow([release.pk, release.doi]) + else: + logger.info("Aborting.") + sys.exit() + + """ + assert correctness + """ + if not dry_run: + print(VERIFICATION_MESSAGE) + assert Codebase.objects.with_doi().count() == 0 + assert CodebaseRelease.objects.with_doi().count() == 0 + logger.info("Success. All existing codebase DOIs deleted.") + + """ Mint DOIs for all new Peer Reviewed Releases""" + peer_reviewed_releases = CodebaseRelease.objects.reviewed() + datacite_api = DataCiteApi(dry_run=dry_run) + for release in peer_reviewed_releases: + datacite_api.mint_new_doi_for_release(release) + + +class Command(BaseCommand): + + def add_arguments(self, parser): + parser.add_argument( + "--interactive", + action="store_true", + help="Wait for user to press enter to continue.", + default=True, + ) + parser.add_argument( + "--dry-run", action="store_true", help="Output what would have happened." + ) + + def handle(self, *args, **options): + interactive = options["interactive"] + dry_run = options["dry_run"] + reset_all_dois(interactive, dry_run) diff --git a/django/library/management/commands/update_metadata_for_all_existing_dois_04.py b/django/library/management/commands/doi_update_doi_metadata_04.py similarity index 81% rename from django/library/management/commands/update_metadata_for_all_existing_dois_04.py rename to django/library/management/commands/doi_update_doi_metadata_04.py index 1a691ca24..d43ae0bfc 100644 --- a/django/library/management/commands/update_metadata_for_all_existing_dois_04.py +++ b/django/library/management/commands/doi_update_doi_metadata_04.py @@ -12,10 +12,11 @@ def update_doi_metadata(interactive=True, dry_run=True): datacite_api = DataCiteApi(dry_run=dry_run) all_codebases_with_dois = Codebase.objects.with_doi() + total_number_of_codebases_with_dois = all_codebases_with_dois.count() logger.info( "Updating metadata for all codebases (%s) with DOIs and their releases with DOIs. ...", - all_codebases_with_dois.count(), + total_number_of_codebases_with_dois, ) for i, codebase in enumerate(all_codebases_with_dois): @@ -23,7 +24,7 @@ def update_doi_metadata(interactive=True, dry_run=True): "Processing codebase %s - %s/%s", codebase.pk, i + 1, - all_codebases_with_dois.count(), + total_number_of_codebases_with_dois, ) if interactive: input("Press Enter to continue or CTRL+C to quit...") @@ -68,39 +69,38 @@ def update_doi_metadata(interactive=True, dry_run=True): """ if not dry_run: print(VERIFICATION_MESSAGE) - logger.info("Checking that Comses metadata is in sync with DataCite...") - invalid_codebases = [] + logger.info("Checking that local metadata is in sync with DataCite...") invalid_releases = [] results = datacite_api.threaded_metadata_check(all_codebases_with_dois) - for pk, is_meta_valid in results: - if not is_meta_valid: - invalid_codebases.append(pk) - + invalid_codebases = [ + pk for pk, is_valid_metadata in results if not is_valid_metadata + ] if invalid_codebases: logger.error( - "Failure. Metadata not in sync with DataCite for %s codebases: %s", + "FAILURE: Metadata not in sync with DataCite for %s codebases: %s", invalid_codebases.count(), invalid_codebases, ) else: logger.info( - "Success. Metadata in sync with DataCite for all codebases with DOI." + "SUCCESS: Metadata in sync with DataCite for all codebases with DOI." ) all_releases_with_dois = CodebaseRelease.objects.with_doi() results = datacite_api.threaded_metadata_check(all_releases_with_dois) - for pk, is_meta_valid in results: - if not is_meta_valid: - invalid_releases.append(pk) - + invalid_releases = [ + pk for pk, is_valid_metadata in results if not is_valid_metadata + ] if invalid_releases: logger.error( - f"Failure. Metadata not in sync with DataCite for {len(invalid_releases)} releases: {invalid_releases}" + "FAILURE: Metadata not in sync with DataCite for %s releases: %s", + invalid_releases.count(), + invalid_releases, ) else: logger.info( - f"Success. Metadata in sync with DataCite for all releases with DOI." + "SUCCESS: Metadata in sync with DataCite for all releases with DOI." ) diff --git a/django/library/migrations/0031_dataciteregistrationlog_and_more.py b/django/library/migrations/0031_dataciteregistrationlog_and_more.py new file mode 100644 index 000000000..ef45abe60 --- /dev/null +++ b/django/library/migrations/0031_dataciteregistrationlog_and_more.py @@ -0,0 +1,96 @@ +# Generated by Django 4.2.16 on 2024-10-29 21:28 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("library", "0030_peerreviewinvitation"), + ] + + operations = [ + migrations.CreateModel( + name="DataCiteRegistrationLog", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "action", + models.CharField( + choices=[ + ("CREATE_RELEASE_DOI", "create release DOI"), + ("CREATE_CODEBASE_DOI", "create codebase DOI"), + ("UPDATE_RELEASE_METADATA", "update release metadata"), + ("UPDATE_CODEBASE_METADATA", "update codebase metadata"), + ], + max_length=50, + ), + ), + ("timestamp", models.DateTimeField(auto_now_add=True)), + ("http_status", models.IntegerField(default=None, null=True)), + ("message", models.TextField(default=None, null=True)), + ("metadata_hash", models.CharField(max_length=255)), + ("doi", models.CharField(blank=True, max_length=255, null=True)), + ], + ), + migrations.AlterModelOptions( + name="peerreviewinvitation", + options={"ordering": ["-date_sent"]}, + ), + migrations.RemoveField( + model_name="contributor", + name="affiliations", + ), + migrations.AlterField( + model_name="codebase", + name="date_created", + field=models.DateTimeField(auto_now_add=True), + ), + migrations.AlterField( + model_name="codebaserelease", + name="date_created", + field=models.DateTimeField(auto_now_add=True), + ), + migrations.AlterField( + model_name="codebasereleasedownload", + name="date_created", + field=models.DateTimeField(auto_now_add=True), + ), + migrations.AlterField( + model_name="peerreviewinvitation", + name="date_sent", + field=models.DateTimeField(auto_now=True), + ), + migrations.DeleteModel( + name="ContributorAffiliation", + ), + migrations.AddField( + model_name="dataciteregistrationlog", + name="codebase", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="datacite_logs", + to="library.codebase", + ), + ), + migrations.AddField( + model_name="dataciteregistrationlog", + name="release", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="datacite_logs", + to="library.codebaserelease", + ), + ), + ] diff --git a/django/library/models.py b/django/library/models.py index da4d784b7..461a050c5 100644 --- a/django/library/models.py +++ b/django/library/models.py @@ -1132,6 +1132,9 @@ def accessible(self, user): def reviewed(self, **kwargs): return self.filter(peer_reviewed=True, **kwargs) + def unreviewed(self, **kwargs): + return self.exclude(peer_reviewed=True).filter(**kwargs) + def with_doi(self, **kwargs): return self.exclude(Q(doi__isnull=True) | Q(doi="")).filter(**kwargs) diff --git a/django/requirements.txt b/django/requirements.txt index b5a7e1356..d67dbd045 100644 --- a/django/requirements.txt +++ b/django/requirements.txt @@ -1,6 +1,6 @@ bagit==1.8.1 bleach==6.1.0 -datacite==1.1.4 +datacite==1.2.0 dedupe==3.0.2 django-allauth==0.63.6 django-anymail[mailgun]==10.3 @@ -24,7 +24,7 @@ django-vite==2.1.3 # latest is 3.0.4 django-waffle==4.1.0 djangorestframework==3.15.2 djangorestframework-camel-case==1.4.2 -Django==4.2.15 +Django==4.2.16 elasticsearch-dsl>=7.0.0,<8.0.0 elasticsearch>=7.0.0,<8.0.0 html2text>=2016.9.19