From a475f0803dc4ff9ef395f5d68d1377c78807d40b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 9 Apr 2022 12:12:30 -0400 Subject: [PATCH 01/23] Redefine semantic uri for metadata terms with no explicit uri, refactor --- .../iq/dataverse/DatasetFieldType.java | 13 +++- .../harvard/iq/dataverse/MetadataBlock.java | 19 ++++++ .../iq/dataverse/util/bagit/OREMap.java | 60 +++---------------- .../iq/dataverse/util/json/JSONLDUtil.java | 30 +++------- 4 files changed, 44 insertions(+), 78 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java index a092cdad784..81b763089b3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.search.SolrField; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import java.util.Collection; @@ -54,7 +55,7 @@ public void setId(Long id) { /** * The internal, DDI-like name, no spaces, etc. */ - @Column(name = "name", columnDefinition = "TEXT", nullable = false) + @Column(name = "name", columnDefinition = "TEXT", nullable = false, unique=true) private String name; /** @@ -303,12 +304,20 @@ public void setMetadataBlock(MetadataBlock metadataBlock) { /** * A formal URI for the field used in json-ld exports */ - @Column(name = "uri", columnDefinition = "TEXT") + @Column(name = "uri", columnDefinition = "TEXT", unique=true) private String uri; public String getUri() { return uri; } + + public JsonLDTerm getJsonLDTerm() { + if(uri!=null) { + return new JsonLDTerm(name,uri); + } else { + return new JsonLDTerm(metadataBlock.getJsonLDNamespace(), name); + } + } public void setUri(String uri) { this.uri=uri; diff --git a/src/main/java/edu/harvard/iq/dataverse/MetadataBlock.java b/src/main/java/edu/harvard/iq/dataverse/MetadataBlock.java index 1a1a87b1b87..039915c7201 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MetadataBlock.java +++ b/src/main/java/edu/harvard/iq/dataverse/MetadataBlock.java @@ -1,6 +1,8 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonLDNamespace; import java.io.Serializable; import java.util.List; @@ -66,9 +68,26 @@ public void setName(String name) { public String getNamespaceUri() { return namespaceUri; } + public void setNamespaceUri(String namespaceUri) { this.namespaceUri = namespaceUri; } + + private String getAssignedNamespaceUri() { + String nsUri = getNamespaceUri(); + // Standard blocks will have a namespaceUri + if (nsUri == null) { + // Locally created/edited blocks, legacy blocks may not have a defined + // namespaceUri, so generate one that indicates that this is a locally defined + // term + nsUri = SystemConfig.getDataverseSiteUrlStatic() + "/schema/" + name + "#"; + } + return nsUri; + } + + public JsonLDNamespace getJsonLDNamespace() { + return JsonLDNamespace.defineNamespace(name, getAssignedNamespaceUri()); + } @OneToMany(mappedBy = "metadataBlock", cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) @OrderBy("displayOrder") diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index 38a04b36314..9f190bd605b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -98,7 +98,7 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except if (excludeEmail && DatasetFieldType.FieldType.EMAIL.equals(dfType.getFieldType())) { continue; } - JsonLDTerm fieldName = getTermFor(dfType); + JsonLDTerm fieldName = dfType.getJsonLDTerm(); if (fieldName.inNamespace()) { localContext.putIfAbsent(fieldName.getNamespace().getPrefix(), fieldName.getNamespace().getUrl()); } else { @@ -144,7 +144,7 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except if (!dsf.isEmpty()) { // Add context entry // ToDo - also needs to recurse here? - JsonLDTerm subFieldName = getTermFor(dfType, dsft); + JsonLDTerm subFieldName = dsft.getJsonLDTerm(); if (subFieldName.inNamespace()) { localContext.putIfAbsent(subFieldName.getNamespace().getPrefix(), subFieldName.getNamespace().getUrl()); @@ -359,11 +359,11 @@ public JsonLDTerm getContactTerm() { } public JsonLDTerm getContactNameTerm() { - return getTermFor(DatasetFieldConstant.datasetContact, DatasetFieldConstant.datasetContactName); + return getTermFor(DatasetFieldConstant.datasetContactName); } public JsonLDTerm getContactEmailTerm() { - return getTermFor(DatasetFieldConstant.datasetContact, DatasetFieldConstant.datasetContactEmail); + return getTermFor(DatasetFieldConstant.datasetContactEmail); } public JsonLDTerm getDescriptionTerm() { @@ -371,61 +371,15 @@ public JsonLDTerm getDescriptionTerm() { } public JsonLDTerm getDescriptionTextTerm() { - return getTermFor(DatasetFieldConstant.description, DatasetFieldConstant.descriptionText); + return getTermFor(DatasetFieldConstant.descriptionText); } private JsonLDTerm getTermFor(String fieldTypeName) { + //Could call datasetFieldService.findByName(fieldTypeName) - is that faster/prefereable? for (DatasetField dsf : version.getDatasetFields()) { DatasetFieldType dsft = dsf.getDatasetFieldType(); if (dsft.getName().equals(fieldTypeName)) { - return getTermFor(dsft); - } - } - return null; - } - - private JsonLDTerm getTermFor(DatasetFieldType dsft) { - if (dsft.getUri() != null) { - return new JsonLDTerm(dsft.getTitle(), dsft.getUri()); - } else { - String namespaceUri = dsft.getMetadataBlock().getNamespaceUri(); - if (namespaceUri == null) { - namespaceUri = SystemConfig.getDataverseSiteUrlStatic() + "/schema/" + dsft.getMetadataBlock().getName() - + "#"; - } - JsonLDNamespace blockNamespace = JsonLDNamespace.defineNamespace(dsft.getMetadataBlock().getName(), namespaceUri); - return new JsonLDTerm(blockNamespace, dsft.getTitle()); - } - } - - private JsonLDTerm getTermFor(DatasetFieldType dfType, DatasetFieldType dsft) { - if (dsft.getUri() != null) { - return new JsonLDTerm(dsft.getTitle(), dsft.getUri()); - } else { - // Use metadatablock URI or custom URI for this field based on the path - String subFieldNamespaceUri = dfType.getMetadataBlock().getNamespaceUri(); - if (subFieldNamespaceUri == null) { - subFieldNamespaceUri = SystemConfig.getDataverseSiteUrlStatic() + "/schema/" - + dfType.getMetadataBlock().getName() + "/"; - } - subFieldNamespaceUri = subFieldNamespaceUri + dfType.getName() + "#"; - JsonLDNamespace fieldNamespace = JsonLDNamespace.defineNamespace(dfType.getName(), subFieldNamespaceUri); - return new JsonLDTerm(fieldNamespace, dsft.getTitle()); - } - } - - private JsonLDTerm getTermFor(String type, String subType) { - for (DatasetField dsf : version.getDatasetFields()) { - DatasetFieldType dsft = dsf.getDatasetFieldType(); - if (dsft.getName().equals(type)) { - for (DatasetFieldCompoundValue dscv : dsf.getDatasetFieldCompoundValues()) { - for (DatasetField subField : dscv.getChildDatasetFields()) { - DatasetFieldType subFieldType = subField.getDatasetFieldType(); - if (subFieldType.getName().equals(subType)) { - return getTermFor(dsft, subFieldType); - } - } - } + return dsft.getJsonLDTerm(); } } return null; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java index 3fdacbdc8de..2417bebecd1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java @@ -74,7 +74,8 @@ public static JsonObject getContext(Map contextMap) { } public static Dataset updateDatasetMDFromJsonLD(Dataset ds, String jsonLDBody, - MetadataBlockServiceBean metadataBlockSvc, DatasetFieldServiceBean datasetFieldSvc, boolean append, boolean migrating, LicenseServiceBean licenseSvc) { + MetadataBlockServiceBean metadataBlockSvc, DatasetFieldServiceBean datasetFieldSvc, boolean append, + boolean migrating, LicenseServiceBean licenseSvc) { DatasetVersion dsv = new DatasetVersion(); @@ -488,27 +489,13 @@ private static JsonArray getValues(JsonValue val, boolean allowMultiples, String static Map localContext = new TreeMap(); static Map dsftMap = new TreeMap(); + //A map if DatasetFieldTypes by decontextualized URL private static void populateFieldTypeMap(MetadataBlockServiceBean metadataBlockSvc) { if (dsftMap.isEmpty()) { - List mdbList = metadataBlockSvc.listMetadataBlocks(); - for (MetadataBlock mdb : mdbList) { - boolean blockHasUri = mdb.getNamespaceUri() != null; for (DatasetFieldType dsft : mdb.getDatasetFieldTypes()) { - if (dsft.getUri() != null) { - dsftMap.put(dsft.getUri(), dsft); - } - if (blockHasUri) { - if (dsft.getParentDatasetFieldType() != null) { - // ToDo - why not getName for child type? Would have to fix in ORE generation - // code and handle legacy bags - dsftMap.put(mdb.getNamespaceUri() + dsft.getParentDatasetFieldType().getName() + "#" - + dsft.getTitle(), dsft); - } else { - dsftMap.put(mdb.getNamespaceUri() + dsft.getTitle(), dsft); - } - } + dsftMap.put(dsft.getJsonLDTerm().getUrl(), dsft); } } logger.fine("DSFT Map: " + String.join(", ", dsftMap.keySet())); @@ -519,15 +506,12 @@ public static void populateContext(MetadataBlockServiceBean metadataBlockSvc) { if (localContext.isEmpty()) { List mdbList = metadataBlockSvc.listMetadataBlocks(); - for (MetadataBlock mdb : mdbList) { - boolean blockHasUri = mdb.getNamespaceUri() != null; - if (blockHasUri) { - JsonLDNamespace.defineNamespace(mdb.getName(), mdb.getNamespaceUri()); - } + //Assures the mdb's namespace is in the list checked by JsonLDNamespace.isInNamespace() below + mdb.getJsonLDNamespace(); for (DatasetFieldType dsft : mdb.getDatasetFieldTypes()) { if ((dsft.getUri() != null) && !JsonLDNamespace.isInNamespace(dsft.getUri())) { - //Add term if uri exists and it's not in one of the namespaces already defined + // Add term if uri exists and it's not in one of the namespaces already defined localContext.putIfAbsent(dsft.getName(), dsft.getUri()); } } From cc9a5f96713514e618ec01c35da0db6656a7d6a4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 9 Apr 2022 14:17:14 -0400 Subject: [PATCH 02/23] update examples, remove obsolete oai-ore file --- .../source/_static/api/dataset-create.jsonld | 14 +++--- .../source/_static/api/dataset-migrate.jsonld | 48 +++++++++---------- .../search/tests/data/dataset-finch1.jsonld | 14 +++--- src/test/resources/oai-ore.jsonld | 1 - 4 files changed, 36 insertions(+), 41 deletions(-) delete mode 100644 src/test/resources/oai-ore.jsonld diff --git a/doc/sphinx-guides/source/_static/api/dataset-create.jsonld b/doc/sphinx-guides/source/_static/api/dataset-create.jsonld index 16861ff64ad..a010e30bf7b 100644 --- a/doc/sphinx-guides/source/_static/api/dataset-create.jsonld +++ b/doc/sphinx-guides/source/_static/api/dataset-create.jsonld @@ -2,14 +2,14 @@ "http://purl.org/dc/terms/title": "Darwin's Finches", "http://purl.org/dc/terms/subject": "Medicine, Health and Life Sciences", "http://purl.org/dc/terms/creator": { - "https://dataverse.org/schema/citation/author#Name": "Finch, Fiona", - "https://dataverse.org/schema/citation/author#Affiliation": "Birds Inc." + "https://dataverse.org/schema/citation/authorName": "Finch, Fiona", + "https://dataverse.org/schema/citation/authorAffiliation": "Birds Inc." }, - "https://dataverse.org/schema/citation/Contact": { - "https://dataverse.org/schema/citation/datasetContact#E-mail": "finch@mailinator.com", - "https://dataverse.org/schema/citation/datasetContact#Name": "Finch, Fiona" + "https://dataverse.org/schema/citation/datasetContact": { + "https://dataverse.org/schema/citation/datasetContactEmail": "finch@mailinator.com", + "https://dataverse.org/schema/citation/datasetContactName": "Finch, Fiona" }, - "https://dataverse.org/schema/citation/Description": { - "https://dataverse.org/schema/citation/dsDescription#Text": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds." + "https://dataverse.org/schema/citation/dsDescription": { + "https://dataverse.org/schema/citation/dsDescriptionValue": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds." } } \ No newline at end of file diff --git a/doc/sphinx-guides/source/_static/api/dataset-migrate.jsonld b/doc/sphinx-guides/source/_static/api/dataset-migrate.jsonld index f79dbd30d8f..8f43d1dd6e9 100644 --- a/doc/sphinx-guides/source/_static/api/dataset-migrate.jsonld +++ b/doc/sphinx-guides/source/_static/api/dataset-migrate.jsonld @@ -1,25 +1,25 @@ { -"citation:Depositor": "Admin, Dataverse", -"Title": "Test Dataset", -"Subject": "Computer and Information Science", -"Creator": { - "author:Name": "Admin, Dataverse", - "author:Affiliation": "GDCC" +"citation:depositor": "Admin, Dataverse", +"title": "Test Dataset", +"subject": "Computer and Information Science", +"author": { + "citation:authorName": "Admin, Dataverse", + "citation:authorAffiliation": "GDCC" }, -"Deposit Date": "2020-10-08", -"citation:Distributor": { - "distributor:Name": "Demo Dataverse Repository", - "distributor:Affiliation": "Dataverse Community", - "distributor:Abbreviation": "GDCC", - "distributor:URL": "https://dataverse.org/global-dataverse-community-consortium" +"dateOfDeposit": "2020-10-08", +"citation:distributor": { + "citation:distributorName": "Demo Dataverse Repository", + "citation:distributorAffiliation": "Dataverse Community", + "citation:distributorAbbreviation": "GDCC", + "citation:distributorURL": "https://dataverse.org/global-dataverse-community-consortium" }, -"citation:Contact": { -"datasetContact:Name": "Admin, Dataverse", -"datasetContact:Affiliation": "GDCC", -"datasetContact:E-mail": "admin@demo.dataverse.org" +"citation:datasetContact": { +"citation:datasetContactName": "Admin, Dataverse", +"citation:datasetContactAffiliation": "GDCC", +"citation:datasetContactEmail": "admin@demo.dataverse.org" }, -"citation:Description": { - "dsDescription:Text": "A short description" +"citation:dsDescription": { + "citation:dsDescriptionValue": "A short description" }, "@id": "doi:10.33564/FK27U7YBV", "schema:version": "1.0", @@ -29,15 +29,11 @@ "dvcore:fileRequestAccess": false }, "@context": { - "Creator": "http://purl.org/dc/terms/creator", - "Deposit Date": "http://purl.org/dc/terms/dateSubmitted", - "Subject": "http://purl.org/dc/terms/subject", - "Title": "http://purl.org/dc/terms/title", - "author": "https://dataverse.org/schema/citation/author#", + "author": "http://purl.org/dc/terms/creator", + "dateOfDeposit": "http://purl.org/dc/terms/dateSubmitted", + "subject": "http://purl.org/dc/terms/subject", + "title": "http://purl.org/dc/terms/title", "citation": "https://dataverse.org/schema/citation/", - "datasetContact": "https://dataverse.org/schema/citation/datasetContact#", - "distributor": "https://dataverse.org/schema/citation/distributor#", - "dsDescription": "https://dataverse.org/schema/citation/dsDescription#", "dvcore": "https://dataverse.org/schema/core#", "schema": "http://schema.org/" }} diff --git a/scripts/search/tests/data/dataset-finch1.jsonld b/scripts/search/tests/data/dataset-finch1.jsonld index be39c9f14b2..c49eed74749 100644 --- a/scripts/search/tests/data/dataset-finch1.jsonld +++ b/scripts/search/tests/data/dataset-finch1.jsonld @@ -3,15 +3,15 @@ "http://purl.org/dc/terms/title": "Darwin's Finches", "http://purl.org/dc/terms/subject": "Medicine, Health and Life Sciences", "http://purl.org/dc/terms/creator": { - "https://dataverse.org/schema/citation/author#Name": "Finch, Fiona", - "https://dataverse.org/schema/citation/author#Affiliation": "Birds Inc." + "https://dataverse.org/schema/citation/authorName": "Finch, Fiona", + "https://dataverse.org/schema/citation/authorAffiliation": "Birds Inc." }, - "https://dataverse.org/schema/citation/Contact": { - "https://dataverse.org/schema/citation/datasetContact#E-mail": "finch@mailinator.com", - "https://dataverse.org/schema/citation/datasetContact#Name": "Finch, Fiona" + "https://dataverse.org/schema/citation/datasetContact": { + "https://dataverse.org/schema/citation/datasetContactEmail": "finch@mailinator.com", + "https://dataverse.org/schema/citation/datasetContactName": "Finch, Fiona" }, - "https://dataverse.org/schema/citation/Description": { - "https://dataverse.org/schema/citation/dsDescription#Text": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds." + "https://dataverse.org/schema/citation/dsDescription": { + "https://dataverse.org/schema/citation/dsDescriptionValue": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds." }, "@type": [ "http://www.openarchives.org/ore/terms/Aggregation", diff --git a/src/test/resources/oai-ore.jsonld b/src/test/resources/oai-ore.jsonld deleted file mode 100644 index 08a524e6750..00000000000 --- a/src/test/resources/oai-ore.jsonld +++ /dev/null @@ -1 +0,0 @@ -{"dcterms:modified":"2018-09-26","dcterms:creator":"Qualitative Data Repository","@type":"ore:ResourceMap","@id":"https://dv.dev-aws.qdr.org/api/datasets/export?exporter=OAI_ORE&persistentId=doi:10.5072/FK2HYIXMY","ore:describes":{"citation:Contact":{"datasetContact:Name":"Myers, Jim","datasetContact:Affiliation":"QDR","datasetContact:E-mail":"qqmyers@hotmail.com"},"citation:Description":{"dsDescription:Text":"This is a sample dataset with a few files and lots of metadata designed to show how QDR's OAI-ORE and BagIT capabilities work.\r\nThe metadata is solely for demonstration purposes and does not accurately describe the content!","dsDescription:NoLongerUsedDate":"2018-05-22"},"citation:Keywords":[{"keyword:Term":"preservation"},{"keyword:Term":"documentation"}],"Creator":{"author:Name":"Myers, Jim","Identifier Scheme":"ORCID","ORCID":"https://orcid.org/0000-0001-8462-650X"},"Related Publication":{"Citation":" J. Myers, M. Hedstrom, D. Akmon, S. Payette, B. A. Plale, I. Kouper, S. McCaulay, R. McDonald, I. Suriarachchi, A. Varadharaju et al., \"Towards sustainable curation and preservation: The sead project's data services approach\", e-Science (e-Science) 2015 IEEE 11th International Conference on. IEEE, pp. 485-494, 2015.","Identifier Type":"doi","Identifier":"doi:10.1109/eScience.2015.56","URL":"https://doi.org/10.1109/eScience.2015.56"},"Time Period Covered":{"timePeriodCovered:Start":"2018-05-01","timePeriodCovered:End":"2018-05-22"},"citation:Notes":"QDR has adapted some of the OAI-ORE and BagIT conventions from SEAD and DataOne.","Subject":"Computer and Information Science","Title":"OAI-ORE and BagIt demonstration","citation:Depositor":"Myers, Jim","Deposit Date":"2018-05-22","citation:Series":{"series:Name":"Use of Standards in the Social Sciences"},"Grant Information":{"grantNumber:Grant Agency":"NSF","grantNumber:Grant Number":"1424191 "},"Software":{"software:Name":"Chrome","software:Version":"current"},"citation:Distributor":{"distributor:Name":"Qualitative Data Repository","distributor:Affiliation":"Syracuse University","distributor:Abbreviation":"QDR","distributor:URL":"https://qdr.syr.edu"},"citation:Producer":{"producer:Name":"Bagore, Locoai"},"Geographic Bounding Box":{"geographicBoundingBox:West Longitude":"0","geographicBoundingBox:East Longitude":"100","geographicBoundingBox:North Latitude":"50","geographicBoundingBox:South Latitude":"150"},"citation:QDRID":{"otherId:QDRID":"-00000001"},"Geographic Coverage":{"geographicCoverage:Country / Nation":"Åland Islands"},"Contributor":{"contributor:Type":"Data Curator","contributor:Name":"Myers, Jim"},"citation:Production Place":"Albany, NY","citation:Related Material":"This data publication is archived as a single Zip file, formatted according to the BagIT specification (https://tools.ietf.org/html/draft-kunze-bagit-11), with metadata documented according to the Open Archives Initiative Object Reuse and Exchange (ORE, https://www.openarchives.org/ore/) specification (serialized as JSON-LD (http://www.openarchives.org/ore/0.9/jsonld), integrated with BagIT according to the convention (http://jenkins-1.dataone.org/documentation/unstable/API-Documentation-development/design/DataPackage.html#package-serialization-using-bagit) developed within the DataOne project.)","Language":"Nuosu","citation:NoLongerUsedSubtitle":"Preservation of DV content","Type of Data Project":"example","Data Sources":"QDR Website","citation:Characteristic of Sources Noted":"teaching materials","Related Data Projects":"Dataverse","Alternative Title":"How to pack your Bags","Alternative URL":"https://unavailable.org/1234","citation:Production Date":"2018-05-23","@id":"doi:10.5072/FK2HYIXMY","@type":["ore:Aggregation","schema:Dataset"],"schema:version":"1.0","schema:datePublished":"2018-05-22","schema:name":"OAI-ORE and BagIt demonstration","schema:dateModified":"Tue May 22 13:51:34 UTC 2018","schema:license":"http://creativecommons.org/publicdomain/zero/1.0","dvcore:confidentialityDeclaration":"These materials are not confidential.","dvcore:citationRequirements":"Please cite https://github.com/QualitativeDataRepository/dataverse/wiki/Data-and-Metadata-Packaging-for-Archiving .","dvcore:conditions":"Only on Tuesdays.","dvcore:disclaimer":"Caveat browser.","dvcore:fileTermsOfAccess":{"dvcore:termsOfAccess":"Broadly available.","dvcore:fileRequestAccess":false,"dvcore:originalArchive":"qdr.syr.edu","dvcore:availabilityStatus":"posted on 05-22-2018","dvcore:sizeOfCollection":"small"},"schema:includedInDataCatalog":"QDR Main Collection","ore:aggregates":[{"schema:description":"Available at qdr.syr.edu","schema:name":"Handout 1 (Management Intervention).pdf","dvcore:restricted":false,"dvcore:directoryLabel":"Handouts","schema:version":2,"dvcore:datasetVersionId":55,"dvcore:categories":["Documentation"],"@id":"doi:10.5072/FK2HYIXMY/ABZMQI","schema:sameAs":"https://dv.dev-aws.qdr.org/api/access/datafile/:persistentId?persistentId=doi:10.5072/FK2HYIXMY/ABZMQI","@type":"ore:AggregatedResource","schema:fileFormat":"application/pdf","dvcore:filesize":99014,"dvcore:storageIdentifier":"163880ce3e3-e73e49dca91e","dvcore:originalFormatLabel":"UNKNOWN","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"SHA-512","@value":"ae249017710fa0b0e62440ee62f43c3f0e44fa4936a42320db3c5b2ede29b31c81bdb793531fb970a7e22ae1c37f81c947ab2bd178dfe121604626e6aae97514"}},{"schema:description":"This is a file from the QDR website qdr.syr.edu.","schema:name":"Handout 2 (Data Management Checklist).pdf","dvcore:restricted":false,"dvcore:directoryLabel":"Handouts","schema:version":1,"dvcore:datasetVersionId":55,"@id":"doi:10.5072/FK2HYIXMY/GNCTTN","schema:sameAs":"https://dv.dev-aws.qdr.org/api/access/datafile/:persistentId?persistentId=doi:10.5072/FK2HYIXMY/GNCTTN","@type":"ore:AggregatedResource","schema:fileFormat":"application/pdf","dvcore:filesize":78408,"dvcore:storageIdentifier":"163880ce3d5-a1941ddb87e6","dvcore:originalFormatLabel":"UNKNOWN","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"SHA-512","@value":"8b3db45486713b1b3a4603e03a0f64c2596afd39bab0ebc429340a85589fb2a0b992a3b833cd02a49b0dc2ec41509fc0a5a1b9f3eaa3d2f76817af2b96244ae7"}},{"schema:name":"QDR - Data Management and Sharing - presentation.pptx","dvcore:restricted":false,"schema:version":1,"dvcore:datasetVersionId":55,"@id":"doi:10.5072/FK2HYIXMY/2LRK1L","schema:sameAs":"https://dv.dev-aws.qdr.org/api/access/datafile/:persistentId?persistentId=doi:10.5072/FK2HYIXMY/2LRK1L","@type":"ore:AggregatedResource","schema:fileFormat":"application/vnd.openxmlformats-officedocument.presentationml.presentation","dvcore:filesize":1032481,"dvcore:storageIdentifier":"163880ce687-51cae10bf0c6","dvcore:originalFormatLabel":"UNKNOWN","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"SHA-512","@value":"b057b65cff5cd893ed4b18d0e1414db169b69b03941fcc2f2b5d96efca5932db04968a52ef68c5781415447b141dde2866319f429b10db8533741161fbb5c098"}},{"schema:name":"QDR - Data Management and Sharing - Syllabus insert.docx","dvcore:restricted":false,"schema:version":2,"dvcore:datasetVersionId":55,"dvcore:categories":["Code","Data"],"@id":"doi:10.5072/FK2HYIXMY/RHMACI","schema:sameAs":"https://dv.dev-aws.qdr.org/api/access/datafile/:persistentId?persistentId=doi:10.5072/FK2HYIXMY/RHMACI","@type":"ore:AggregatedResource","schema:fileFormat":"application/vnd.openxmlformats-officedocument.wordprocessingml.document","dvcore:filesize":17889,"dvcore:storageIdentifier":"163880ce80e-f7f8cfdbb10c","dvcore:originalFormatLabel":"UNKNOWN","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"SHA-512","@value":"bde6f69b1db246b51f1c8aa257684007c2067c66aaf8669c4e169d0971cc49ce0517f2a9e3af2128f09ef5696e6fedd7ac6b29e865509fdbd4bd5ea5784d7aa5"}}],"schema:hasPart":["doi:10.5072/FK2HYIXMY/ABZMQI","doi:10.5072/FK2HYIXMY/GNCTTN","doi:10.5072/FK2HYIXMY/2LRK1L","doi:10.5072/FK2HYIXMY/RHMACI"]},"@context":{"Alternative Title":"http://purl.org/dc/terms/alternative","Alternative URL":"https://schema.org/distribution","Citation":"http://purl.org/dc/terms/bibliographicCitation","Contributor":"http://purl.org/dc/terms/contributor","Creator":"http://purl.org/dc/terms/creator","Data Sources":"https://www.w3.org/TR/prov-o/#wasDerivedFrom","Deposit Date":"http://purl.org/dc/terms/dateSubmitted","Geographic Bounding Box":"https://schema.org/spatialCoverage","Geographic Coverage":"https://schema.org/contentLocation","Grant Information":"https://schema.org/sponsor","Identifier":"http://purl.org/spar/datacite/ResourceIdentifier","Identifier Scheme":"http://purl.org/spar/datacite/AgentIdentifierScheme","Identifier Type":"http://purl.org/spar/datacite/ResourceIdentifierScheme","Language":"http://purl.org/dc/terms/language","ORCID":"http://purl.org/spar/datacite/AgentIdentifier","Related Data Projects":"http://purl.org/dc/terms/relation","Related Publication":"http://purl.org/dc/terms/isReferencedBy","Software":"https://www.w3.org/TR/prov-o/#wasGeneratedBy","Subject":"http://purl.org/dc/terms/subject","Time Period Covered":"https://schema.org/temporalCoverage","Title":"http://purl.org/dc/terms/title","Type of Data Project":"http://rdf-vocabulary.ddialliance.org/discovery#kindOfData","URL":"https://schema.org/distribution","author":"https://dataverse.org/schema/citation/author#","citation":"https://dataverse.org/schema/citation/","contributor":"https://dataverse.org/schema/citation/contributor#","datasetContact":"https://dataverse.org/schema/citation/datasetContact#","dcterms":"http://purl.org/dc/terms/","distributor":"https://dataverse.org/schema/citation/distributor#","dsDescription":"https://dataverse.org/schema/citation/dsDescription#","dvcore":"https://dv.dev-aws.qdr.org/schema/core#","geographicBoundingBox":"https://dataverse.org/schema/citation/geographicBoundingBox#","geographicCoverage":"https://dataverse.org/schema/citation/geographicCoverage#","grantNumber":"https://dataverse.org/schema/citation/grantNumber#","keyword":"https://dataverse.org/schema/citation/keyword#","ore":"http://www.openarchives.org/ore/terms/","otherId":"https://dataverse.org/schema/citation/otherId#","producer":"https://dataverse.org/schema/citation/producer#","schema":"http://schema.org/","series":"https://dataverse.org/schema/citation/series#","software":"https://dataverse.org/schema/citation/software#","timePeriodCovered":"https://dataverse.org/schema/citation/timePeriodCovered#"}} \ No newline at end of file From b6376dacedb9bc20440cc776c3b8791cdc68ef37 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 9 Apr 2022 14:37:17 -0400 Subject: [PATCH 03/23] update guides examples, tests, cleanup metadatablock discussion --- .../source/admin/metadatacustomization.rst | 572 +++++++++--------- .../dataset-semantic-metadata-api.rst | 4 +- .../harvard/iq/dataverse/api/DatasetsIT.java | 8 +- 3 files changed, 292 insertions(+), 292 deletions(-) diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index b7d0086e221..e59d3d4bc3b 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -76,310 +76,310 @@ Each of the three main sections own sets of properties: #metadataBlock properties ~~~~~~~~~~~~~~~~~~~~~~~~~ -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| **Property** | **Purpose** | **Allowed values and restrictions** | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| name | A user-definable string used to identify a | \• No spaces or punctuation, except underscore. | -| | #metadataBlock | | -| | | \• By convention, should start with a letter, and use | -| | | lower camel case [3]_ | -| | | | -| | | \• Must not collide with a field of the same name in | -| | | the same or any other #datasetField definition, | -| | | including metadata blocks defined elsewhere. [4]_ | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| dataverseAlias | If specified, this metadata block will be available | Free text. For an example, see custom_hbgdki.tsv. | -| | only to the Dataverse collection designated here by | | -| | its alias and to children of that Dataverse collection. | | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| displayName | Acts as a brief label for display related to this | Should be relatively brief. The limit is 256 character, | -| | #metadataBlock. | but very long names might cause display problems. | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| blockURI | Associates the properties in a block with an external | The citation #metadataBlock has the blockURI | -| | URI. | https://dataverse.org/schema/citation/ which assigns a | -| | Properties will be assigned the global assigned the | global URI to terms such as | -| | global identifier blockURI in the OAI_ORE | https://dataverse.org/schema/citation/subtitle | -| | metadata and archival Bags | | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ ++----------------+---------------------------------------------------------+---------------------------------------------------------+ +| **Property** | **Purpose** | **Allowed values and restrictions** | ++----------------+---------------------------------------------------------+---------------------------------------------------------+ +| name | A user-definable string used to identify a | \• No spaces or punctuation, except underscore. | +| | #metadataBlock | | +| | | \• By convention, should start with a letter, and use | +| | | lower camel case [3]_ | +| | | | +| | | \• Must not collide with a field of the same name in | +| | | the same or any other #datasetField definition, | +| | | including metadata blocks defined elsewhere. [4]_ | ++----------------+---------------------------------------------------------+---------------------------------------------------------+ +| dataverseAlias | If specified, this metadata block will be available | Free text. For an example, see custom_hbgdki.tsv. | +| | only to the Dataverse collection designated here by | | +| | its alias and to children of that Dataverse collection. | | ++----------------+---------------------------------------------------------+---------------------------------------------------------+ +| displayName | Acts as a brief label for display related to this | Should be relatively brief. The limit is 256 character, | +| | #metadataBlock. | but very long names might cause display problems. | ++----------------+---------------------------------------------------------+---------------------------------------------------------+ +| blockURI | Associates the properties in a block with an external | The citation #metadataBlock has the blockURI | +| | URI. | https://dataverse.org/schema/citation/ which assigns a | +| | Properties will be assigned the | default global URI to terms such as | +| | global identifier blockURI in the OAI_ORE | https://dataverse.org/schema/citation/subtitle | +| | metadata and archival Bags | | ++----------------+---------------------------------------------------------+---------------------------------------------------------+ #datasetField (field) properties ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| **Property** | **Purpose** | **Allowed values and restrictions** | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| name | A user-definable string used to identify a | \• (from DatasetFieldType.java) The internal DDI-like | -| | #datasetField. Maps directly to field name used by | name, no spaces, etc. | -| | Solr. | | -| | | \• (from Solr) Field names should consist of | -| | | alphanumeric or underscore characters only and not start| -| | | with a digit. This is not currently strictly enforced, | -| | | but other field names will not have first class | -| | | support from all components and back compatibility | -| | | is not guaranteed. | -| | | Names with both leading and trailing underscores | -| | | (e.g. \_version_) are reserved. | -| | | | -| | | \• Must not collide with a field of | -| | | the same same name in another #metadataBlock | -| | | definition or any name already included as a | -| | | field in the Solr index. | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| title | Acts as a brief label for display | Should be relatively brief. | -| | related to this #datasetField. | | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| description | Used to provide a description of the | Free text | -| | field. | | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| watermark | A string to initially display in a field | Free text | -| | as a prompt for what the user should enter. | | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| fieldType | Defines the type of content that the | | \• none | -| | field, if not empty, is meant to contain. | | \• date | -| | | | \• email | -| | | | \• text | -| | | | \• textbox | -| | | | \• url | -| | | | \• int | -| | | | \• float | -| | | | \• See below for | -| | | | fieldtype definitions | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| displayOrder | Controls the sequence in which the fields | Non-negative integer. | -| | are displayed, both for input and | | -| | presentation. | | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| displayFormat | Controls how the content is displayed | See below for displayFormat | -| | for presentation (not entry). The value of | variables | -| | this field may contain one or more | | -| | special variables (enumerated below). | | -| | HTML tags, likely in conjunction with one | | -| | or more of these values, may be used | | -| | to control the display of content in | | -| | the web UI. | | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| advancedSearchField | Specify whether this field is available in | TRUE (available) or | -| | advanced search. | FALSE (not available) | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| allowControlledVocabulary | Specify whether the possible values of | TRUE (controlled) or FALSE (not | -| | this field are determined by values | controlled) | -| | in the #controlledVocabulary section. | | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| allowmultiples | Specify whether this field is repeatable. | TRUE (repeatable) or FALSE (not | -| | | repeatable) | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| facetable | Specify whether the field is facetable | TRUE (controlled) or FALSE (not | -| | (i.e., if the expected values for | controlled) | -| | this field are themselves useful | | -| | search terms for this field). If a field is | | -| | "facetable" (able to be faceted on), it | | -| | appears under "Browse/Search | | -| | Facets" when you edit | | -| | "General Information" for a Dataverse | | -| | collection. | | -| | Setting this value to TRUE generally makes | | -| | sense for enumerated or controlled | | -| | vocabulary fields, fields representing | | -| | identifiers (IDs, names, email | | -| | addresses), and other fields that are | | -| | likely to share values across | | -| | entries. It is less likely to make sense | | -| | for fields containing descriptions, | | -| | floating point numbers, and other | | -| | values that are likely to be unique. | | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| displayoncreate [5]_ | Designate fields that should display during | TRUE (display during creation) or FALSE | -| | the creation of a new dataset, even before | (don’t display during creation) | -| | the dataset is saved. | | -| | Fields not so designated will not | | -| | be displayed until the dataset has been | | -| | saved. | | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| required | For primitive fields, specify whether or not the | For primitive fields, TRUE | -| | field is required. | (required) or FALSE (optional). | -| | | | -| | For compound fields, also specify if one or more | For compound fields: | -| | subfields are required or conditionally required. At | | -| | least one instance of a required field must be | \• To make one or more | -| | present. More than one instance of a field may be | subfields optional, the parent | -| | allowed, depending on the value of allowmultiples. | field and subfield(s) must be | -| | | FALSE (optional). | -| | | | -| | | \• To make one or more subfields | -| | | required, the parent field and | -| | | the required subfield(s) must be | -| | | TRUE (required). | -| | | | -| | | \• To make one or more subfields | -| | | conditionally required, make the | -| | | parent field FALSE (optional) | -| | | and make TRUE (required) any | -| | | subfield or subfields that are | -| | | required if any other subfields | -| | | are filled. | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| parent | For subfields, specify the name of the parent or | \• Must not result in a cyclical reference. | -| | containing field. | | -| | | \• Must reference an existing field in the same | -| | | #metadataBlock. | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| metadatablock_id | Specify the name of the #metadataBlock that contains | \• Must reference an existing #metadataBlock. | -| | this field. | | -| | | \• As a best practice, the value should reference the | -| | | #metadataBlock in the current | -| | | definition (it is technically | -| | | possible to reference another | -| | | existing metadata block.) | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| termURI | Specify a global URI identifying this term in an | For example, the existing citation | -| | external community vocabulary. | #metadataBlock defines the property | -| | | names 'title' as http://purl.org/dc/terms/title | -| | This value overrides the default created by appending | - i.e. indicating that it can | -| | the property name to the blockURI defined for the | be interpreted as the Dublin Core term 'title' | -| | #metadataBlock | | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| **Property** | **Purpose** | **Allowed values and restrictions** | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| name | A user-definable string used to identify a | \• (from DatasetFieldType.java) The internal DDI-like | | +| | #datasetField. Maps directly to field name used by | name, no spaces, etc. | | +| | Solr. | | | +| | | \• (from Solr) Field names should consist of | | +| | | alphanumeric or underscore characters only and not start | | +| | | with a digit. This is not currently strictly enforced, | | +| | | but other field names will not have first class | | +| | | support from all components and back compatibility | | +| | | is not guaranteed. | | +| | | Names with both leading and trailing underscores | | +| | | (e.g. \_version_) are reserved. | | +| | | | | +| | | \• Must not collide with a field of | | +| | | the same same name in another #metadataBlock | | +| | | definition or any name already included as a | | +| | | field in the Solr index. | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| title | Acts as a brief label for display | Should be relatively brief. | | +| | related to this #datasetField. | | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| description | Used to provide a description of the | Free text | | +| | field. | | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| watermark | A string to initially display in a field | Free text | | +| | as a prompt for what the user should enter. | | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| fieldType | Defines the type of content that the | | \• none | +| | field, if not empty, is meant to contain. | | \• date | +| | | | \• email | +| | | | \• text | +| | | | \• textbox | +| | | | \• url | +| | | | \• int | +| | | | \• float | +| | | | \• See below for | +| | | | fieldtype definitions | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| displayOrder | Controls the sequence in which the fields | Non-negative integer. | | +| | are displayed, both for input and | | | +| | presentation. | | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| displayFormat | Controls how the content is displayed | See below for displayFormat | | +| | for presentation (not entry). The value of | variables | | +| | this field may contain one or more | | | +| | special variables (enumerated below). | | | +| | HTML tags, likely in conjunction with one | | | +| | or more of these values, may be used | | | +| | to control the display of content in | | | +| | the web UI. | | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| advancedSearchField | Specify whether this field is available in | TRUE (available) or | | +| | advanced search. | FALSE (not available) | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| allowControlledVocabulary | Specify whether the possible values of | TRUE (controlled) or FALSE (not | | +| | this field are determined by values | controlled) | | +| | in the #controlledVocabulary section. | | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| allowmultiples | Specify whether this field is repeatable. | TRUE (repeatable) or FALSE (not | | +| | | repeatable) | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| facetable | Specify whether the field is facetable | TRUE (controlled) or FALSE (not | | +| | (i.e., if the expected values for | controlled) | | +| | this field are themselves useful | | | +| | search terms for this field). If a field is | | | +| | "facetable" (able to be faceted on), it | | | +| | appears under "Browse/Search | | | +| | Facets" when you edit | | | +| | "General Information" for a Dataverse | | | +| | collection. | | | +| | Setting this value to TRUE generally makes | | | +| | sense for enumerated or controlled | | | +| | vocabulary fields, fields representing | | | +| | identifiers (IDs, names, email | | | +| | addresses), and other fields that are | | | +| | likely to share values across | | | +| | entries. It is less likely to make sense | | | +| | for fields containing descriptions, | | | +| | floating point numbers, and other | | | +| | values that are likely to be unique. | | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| displayoncreate [5]_ | Designate fields that should display during | TRUE (display during creation) or FALSE | | +| | the creation of a new dataset, even before | (don’t display during creation) | | +| | the dataset is saved. | | | +| | Fields not so designated will not | | | +| | be displayed until the dataset has been | | | +| | saved. | | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| required | For primitive fields, specify whether or not the | For primitive fields, TRUE | | +| | field is required. | (required) or FALSE (optional). | | +| | | | | +| | For compound fields, also specify if one or more | For compound fields: | | +| | subfields are required or conditionally required. At | | | +| | least one instance of a required field must be | \• To make one or more | | +| | present. More than one instance of a field may be | subfields optional, the parent | | +| | allowed, depending on the value of allowmultiples. | field and subfield(s) must be | | +| | | FALSE (optional). | | +| | | | | +| | | \• To make one or more subfields | | +| | | required, the parent field and | | +| | | the required subfield(s) must be | | +| | | TRUE (required). | | +| | | | | +| | | \• To make one or more subfields | | +| | | conditionally required, make the | | +| | | parent field FALSE (optional) | | +| | | and make TRUE (required) any | | +| | | subfield or subfields that are | | +| | | required if any other subfields | | +| | | are filled. | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| parent | For subfields, specify the name of the parent or | \• Must not result in a cyclical reference. | | +| | containing field. | | | +| | | \• Must reference an existing field in the same | | +| | | #metadataBlock. | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| metadatablock_id | Specify the name of the #metadataBlock that contains | \• Must reference an existing #metadataBlock. | | +| | this field. | | | +| | | \• As a best practice, the value should reference the | | +| | | #metadataBlock in the current | | +| | | definition (it is technically | | +| | | possible to reference another | | +| | | existing metadata block.) | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ +| termURI | Specify a global URI identifying this term in an | For example, the existing citation | | +| | external community vocabulary. | #metadataBlock defines the property | | +| | | named 'title' as http://purl.org/dc/terms/title | | +| | This value overrides the default (created by appending | - i.e. indicating that it can | | +| | the property name to the blockURI defined for the | be interpreted as the Dublin Core term 'title' | | +| | #metadataBlock) | | | ++---------------------------+--------------------------------------------------------+----------------------------------------------------------+-----------------------+ #controlledVocabulary (enumerated) properties ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| **Property** | **Purpose** | **Allowed values and restrictions** | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| DatasetField | Specifies the #datasetField to which | Must reference an existing | -| | #datasetField to which this entry applies. | #datasetField. | -| | | As a best practice, the value should | -| | | reference a #datasetField in the | -| | | current metadata block definition. (It | -| | | is technically possible to reference | -| | | an existing #datasetField from | -| | | another metadata block.) | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| Value | A short display string, representing | Free text | -| | an enumerated value for this field. If | | -| | the identifier property is empty, | | -| | this value is used as the identifier. | | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| identifier | A string used to encode the selected | Free text | -| | enumerated value of a field. If this | | -| | property is empty, the value of the | | -| | “Value” field is used as the identifier. | | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ -| displayOrder | Control the order in which the enumerated | Non-negative integer. | -| | values are displayed for selection. | | -+---------------------------------------------------------+---------------------------------------------------------+---------------------------------------------------------+ ++--------------+--------------------------------------------+-----------------------------------------+ +| **Property** | **Purpose** | **Allowed values and restrictions** | ++--------------+--------------------------------------------+-----------------------------------------+ +| DatasetField | Specifies the #datasetField to which | Must reference an existing | +| | #datasetField to which this entry applies. | #datasetField. | +| | | As a best practice, the value should | +| | | reference a #datasetField in the | +| | | current metadata block definition. (It | +| | | is technically possible to reference | +| | | an existing #datasetField from | +| | | another metadata block.) | ++--------------+--------------------------------------------+-----------------------------------------+ +| Value | A short display string, representing | Free text | +| | an enumerated value for this field. If | | +| | the identifier property is empty, | | +| | this value is used as the identifier. | | ++--------------+--------------------------------------------+-----------------------------------------+ +| identifier | A string used to encode the selected | Free text | +| | enumerated value of a field. If this | | +| | property is empty, the value of the | | +| | “Value” field is used as the identifier. | | ++--------------+--------------------------------------------+-----------------------------------------+ +| displayOrder | Control the order in which the enumerated | Non-negative integer. | +| | values are displayed for selection. | | ++--------------+--------------------------------------------+-----------------------------------------+ FieldType definitions ~~~~~~~~~~~~~~~~~~~~~ -+---------------------------------------------------------+---------------------------------------------------------+ -| **Fieldtype** | **Definition** | -+---------------------------------------------------------+---------------------------------------------------------+ -| none | Used for compound fields, in which | -| | case the parent field would have | -| | no value and display no data | -| | entry control. | -+---------------------------------------------------------+---------------------------------------------------------+ -| date | A date, expressed in one of three | -| | resolutions of the form | -| | YYYY-MM-DD, YYYY-MM, or YYYY. | -+---------------------------------------------------------+---------------------------------------------------------+ -| email | A valid email address. Not | -| | indexed for privacy reasons. | -+---------------------------------------------------------+---------------------------------------------------------+ -| text | Any text other than newlines may | -| | be entered into this field. | -+---------------------------------------------------------+---------------------------------------------------------+ -| textbox | Any text may be entered. For | -| | input, the Dataverse Software | -| | presents a | -| | multi-line area that accepts | -| | newlines. While any HTML is | -| | permitted, only a subset of HTML | -| | tags will be rendered in the UI. | -| | See the | -| | :ref:`supported-html-fields` | -| | section of the Dataset + File | -| | Management page in the User Guide. | -+---------------------------------------------------------+---------------------------------------------------------+ -| url | If not empty, field must contain | -| | a valid URL. | -+---------------------------------------------------------+---------------------------------------------------------+ -| int | An integer value destined for a | -| | numeric field. | -+---------------------------------------------------------+---------------------------------------------------------+ -| float | A floating point number destined | -| | for a numeric field. | -+---------------------------------------------------------+---------------------------------------------------------+ ++---------------+------------------------------------+ +| **Fieldtype** | **Definition** | ++---------------+------------------------------------+ +| none | Used for compound fields, in which | +| | case the parent field would have | +| | no value and display no data | +| | entry control. | ++---------------+------------------------------------+ +| date | A date, expressed in one of three | +| | resolutions of the form | +| | YYYY-MM-DD, YYYY-MM, or YYYY. | ++---------------+------------------------------------+ +| email | A valid email address. Not | +| | indexed for privacy reasons. | ++---------------+------------------------------------+ +| text | Any text other than newlines may | +| | be entered into this field. | ++---------------+------------------------------------+ +| textbox | Any text may be entered. For | +| | input, the Dataverse Software | +| | presents a | +| | multi-line area that accepts | +| | newlines. While any HTML is | +| | permitted, only a subset of HTML | +| | tags will be rendered in the UI. | +| | See the | +| | :ref:`supported-html-fields` | +| | section of the Dataset + File | +| | Management page in the User Guide. | ++---------------+------------------------------------+ +| url | If not empty, field must contain | +| | a valid URL. | ++---------------+------------------------------------+ +| int | An integer value destined for a | +| | numeric field. | ++---------------+------------------------------------+ +| float | A floating point number destined | +| | for a numeric field. | ++---------------+------------------------------------+ displayFormat variables ~~~~~~~~~~~~~~~~~~~~~~~ These are common ways to use the displayFormat to control how values are displayed in the UI. This list is not exhaustive. -+---------------------------------------------------------+---------------------------------------------------------+ -| **Variable** | **Description** | -+---------------------------------------------------------+---------------------------------------------------------+ -| (blank) | The displayFormat is left blank | -| | for primitive fields (e.g. | -| | subtitle) and fields that do not | -| | take values (e.g. author), since | -| | displayFormats do not work for | -| | these fields. | -+---------------------------------------------------------+---------------------------------------------------------+ -| #VALUE | The value of the field (instance level). | -+---------------------------------------------------------+---------------------------------------------------------+ -| #NAME | The name of the field (class level). | -+---------------------------------------------------------+---------------------------------------------------------+ -| #EMAIL | For displaying emails. | -+---------------------------------------------------------+---------------------------------------------------------+ -| #VALUE | For displaying the value as a | -| | link (if the value entered is a | -| | link). | -+---------------------------------------------------------+---------------------------------------------------------+ -| #VALUE | For displaying the value as a | -| | link, with the value included in | -| | the URL (e.g. if URL is | -| | \http://emsearch.rutgers.edu/atla\ | -| | \s/#VALUE_summary.html, | -| | and the value entered is 1001, | -| | the field is displayed as | -| | `1001 `__ | -| | (hyperlinked to | -| | http://emsearch.rutgers.edu/atlas/1001_summary.html)). | -+---------------------------------------------------------+---------------------------------------------------------+ -|
| entered image URL (used to | -| | display images in the producer | -| | and distributor logos metadata | -| | fields). | -+---------------------------------------------------------+---------------------------------------------------------+ -| #VALUE: | Appends and/or prepends | -| | characters to the value of the | -| \- #VALUE: | field. e.g. if the displayFormat | -| | for the distributorAffiliation is | -| (#VALUE) | (#VALUE) (wrapped with parens) | -| | and the value entered | -| | is University of North | -| | Carolina, the field is displayed | -| | in the UI as (University of | -| | North Carolina). | -+---------------------------------------------------------+---------------------------------------------------------+ -| ; | Displays the character (e.g. | -| | semicolon, comma) between the | -| : | values of fields within | -| | compound fields. For example, | -| , | if the displayFormat for the | -| | compound field “series” is a | -| | colon, and if the value | -| | entered for seriesName is | -| | IMPs and for | -| | seriesInformation is A | -| | collection of NMR data, the | -| | compound field is displayed in | -| | the UI as IMPs: A | -| | collection of NMR data. | -+---------------------------------------------------------+---------------------------------------------------------+ ++---------------------------------+--------------------------------------------------------+ +| **Variable** | **Description** | ++---------------------------------+--------------------------------------------------------+ +| (blank) | The displayFormat is left blank | +| | for primitive fields (e.g. | +| | subtitle) and fields that do not | +| | take values (e.g. author), since | +| | displayFormats do not work for | +| | these fields. | ++---------------------------------+--------------------------------------------------------+ +| #VALUE | The value of the field (instance level). | ++---------------------------------+--------------------------------------------------------+ +| #NAME | The name of the field (class level). | ++---------------------------------+--------------------------------------------------------+ +| #EMAIL | For displaying emails. | ++---------------------------------+--------------------------------------------------------+ +| #VALUE | For displaying the value as a | +| | link (if the value entered is a | +| | link). | ++---------------------------------+--------------------------------------------------------+ +| #VALUE | For displaying the value as a | +| | link, with the value included in | +| | the URL (e.g. if URL is | +| | \http://emsearch.rutgers.edu/atla\ | +| | \s/#VALUE_summary.html, | +| | and the value entered is 1001, | +| | the field is displayed as | +| | `1001 `__ | +| | (hyperlinked to | +| | http://emsearch.rutgers.edu/atlas/1001_summary.html)). | ++---------------------------------+--------------------------------------------------------+ +|
| entered image URL (used to | +| | display images in the producer | +| | and distributor logos metadata | +| | fields). | ++---------------------------------+--------------------------------------------------------+ +| #VALUE: | Appends and/or prepends | +| | characters to the value of the | +| \- #VALUE: | field. e.g. if the displayFormat | +| | for the distributorAffiliation is | +| (#VALUE) | (#VALUE) (wrapped with parens) | +| | and the value entered | +| | is University of North | +| | Carolina, the field is displayed | +| | in the UI as (University of | +| | North Carolina). | ++---------------------------------+--------------------------------------------------------+ +| ; | Displays the character (e.g. | +| | semicolon, comma) between the | +| : | values of fields within | +| | compound fields. For example, | +| , | if the displayFormat for the | +| | compound field “series” is a | +| | colon, and if the value | +| | entered for seriesName is | +| | IMPs and for | +| | seriesInformation is A | +| | collection of NMR data, the | +| | compound field is displayed in | +| | the UI as IMPs: A | +| | collection of NMR data. | ++---------------------------------+--------------------------------------------------------+ Metadata Block Setup -------------------- diff --git a/doc/sphinx-guides/source/developers/dataset-semantic-metadata-api.rst b/doc/sphinx-guides/source/developers/dataset-semantic-metadata-api.rst index da28cc60c53..77bbddcb966 100644 --- a/doc/sphinx-guides/source/developers/dataset-semantic-metadata-api.rst +++ b/doc/sphinx-guides/source/developers/dataset-semantic-metadata-api.rst @@ -48,11 +48,11 @@ To add json-ld formatted metadata for a Dataset, specify the Dataset ID (DATASET Example: Change the Dataset title - curl -X PUT -H X-Dataverse-key:$API_TOKEN -H 'Content-Type: application/ld+json' -d '{"Title": "Submit menu test", "@context":{"Title": "http://purl.org/dc/terms/title"}}' "$SERVER_URL/api/datasets/$DATASET_ID/metadata?replace=true" + curl -X PUT -H X-Dataverse-key:$API_TOKEN -H 'Content-Type: application/ld+json' -d '{"title": "Submit menu test", "@context":{"title": "http://purl.org/dc/terms/title"}}' "$SERVER_URL/api/datasets/$DATASET_ID/metadata?replace=true" Example 2: Add a description using the DATASET PID - curl -X PUT -H X-Dataverse-key:$API_TOKEN -H 'Content-Type: application/ld+json' -d '{"citation:Description": {"dsDescription:Text": "New description"}, "@context":{"citation": "https://dataverse.org/schema/citation/","dsDescription": "https://dataverse.org/schema/citation/dsDescription#"}}' "$SERVER_URL/api/datasets/:persistentId/metadata?persistentId=$DATASET_PID" + curl -X PUT -H X-Dataverse-key:$API_TOKEN -H 'Content-Type: application/ld+json' -d '{"citation:dsDescription": {"citation:dsDescriptionValue": "New description"}, "@context":{"citation": "https://dataverse.org/schema/citation/"}}' "$SERVER_URL/api/datasets/:persistentId/metadata?persistentId=$DATASET_PID" You should expect a 200 ("OK") response indicating whether a draft Dataset version was created or an existing draft was updated. diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 23c17c071ff..a6b5bb7c6a7 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -2333,7 +2333,7 @@ public void testSemanticMetadataAPIs() { JSONAssert.assertEquals(expectedJsonLD, jsonLD, false); // Now change the title response = UtilIT.updateDatasetJsonLDMetadata(datasetId, apiToken, - "{\"Title\": \"New Title\", \"@context\":{\"Title\": \"http://purl.org/dc/terms/title\"}}", true); + "{\"title\": \"New Title\", \"@context\":{\"title\": \"http://purl.org/dc/terms/title\"}}", true); response.then().assertThat().statusCode(OK.getStatusCode()); response = UtilIT.getDatasetJsonLDMetadata(datasetId, apiToken); @@ -2347,7 +2347,7 @@ public void testSemanticMetadataAPIs() { // Add an additional description (which is multi-valued and compound) // Also add new terms of use (single value so would fail with replace false if a // value existed) - String newDescription = "{\"citation:Description\": {\"dsDescription:Text\": \"New description\"}, \"https://dataverse.org/schema/core#termsOfUse\": \"New terms\", \"@context\":{\"citation\": \"https://dataverse.org/schema/citation/\",\"dsDescription\": \"https://dataverse.org/schema/citation/dsDescription#\"}}"; + String newDescription = "{\"citation:dsDescription\": {\"citation:dsDescriptionValue\": \"New description\"}, \"https://dataverse.org/schema/core#termsOfUse\": \"New terms\", \"@context\":{\"citation\": \"https://dataverse.org/schema/citation/\"}}"; response = UtilIT.updateDatasetJsonLDMetadata(datasetId, apiToken, newDescription, false); response.then().assertThat().statusCode(OK.getStatusCode()); @@ -2358,8 +2358,8 @@ public void testSemanticMetadataAPIs() { jsonLDString = getData(response.getBody().asString()); jsonLDObject = JSONLDUtil.decontextualizeJsonLD(jsonLDString); assertEquals("New description", - ((JsonObject) jsonLDObject.getJsonArray("https://dataverse.org/schema/citation/Description").get(1)) - .getString("https://dataverse.org/schema/citation/dsDescription#Text")); + ((JsonObject) jsonLDObject.getJsonArray("https://dataverse.org/schema/citation/dsDescription").get(1)) + .getString("https://dataverse.org/schema/citation/dsDescriptionValue")); // Can't add terms of use with replace=false and a value already set (single // valued field) From 77ced21e6cb8392b0709462f2fa226848d78bb55 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 9 Apr 2022 14:42:26 -0400 Subject: [PATCH 04/23] assure name and uri are unique --- .../V5.10.1.0.1__8533-semantic-updates.sql | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql diff --git a/src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql b/src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql new file mode 100644 index 00000000000..ee4bf440722 --- /dev/null +++ b/src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql @@ -0,0 +1,17 @@ +DO $$ +BEGIN + + BEGIN + ALTER TABLE datasetfieldtype ADD CONSTRAINT datasetfieldtype_name_key UNIQUE(name); + EXCEPTION + WHEN duplicate_object THEN RAISE NOTICE 'Table unique constraint datasetfieldtype_name_key already exists'; + END; + + BEGIN + ALTER TABLE datasetfieldtype ADD CONSTRAINT datasetfieldtype_uri_key UNIQUE(uri); + EXCEPTION + WHEN duplicate_object THEN RAISE NOTICE 'Table unique constraint datasetfieldtype_uri_key already exists'; + END; + +END $$; + From 2797ea9f8b45074573bbcd52e160d89970cb3584 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 9 Apr 2022 15:18:45 -0400 Subject: [PATCH 05/23] release notes --- doc/release-notes/8533_semantic-api-updates.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 doc/release-notes/8533_semantic-api-updates.md diff --git a/doc/release-notes/8533_semantic-api-updates.md b/doc/release-notes/8533_semantic-api-updates.md new file mode 100644 index 00000000000..a0391ca0dc3 --- /dev/null +++ b/doc/release-notes/8533_semantic-api-updates.md @@ -0,0 +1,14 @@ + +## Notes for Developers and Integrators + +This release include an update to the experimental semantic API and the underlying assignment of URIs to metadatablock termshat are not explicitly mapped to terms in community vocabularies. The change affects the output of the OAI_ORE metadata export, the OAI_ORE file in archival bags, and the input/out allowed for those terms in the semantic api. For those updating integrating code or existing files intended for input into this release of Dataverse: URIs of the form: + https://dataverse.org/schema//#, and + https://dataverse.org/schema// + are both replaced with URIs of the form: + https://dataverse.org/schema// + +## Additional Release Steps + +Upgrade should include re-export of metadata files (only the OAI_ORE is affected). + +For this PR and other changes coming from DataCommons, it will also be advisable for people archiving Bags to re-archive. More detail on the overall set if changes in those tbd PRs. From 6c3fcf6ec296e0cae476788376660906979c2c3e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 9 Apr 2022 18:34:37 -0400 Subject: [PATCH 06/23] Revert flyway to see if it affects tests --- .../V5.10.1.0.1__8533-semantic-updates.sql | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql diff --git a/src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql b/src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql deleted file mode 100644 index ee4bf440722..00000000000 --- a/src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql +++ /dev/null @@ -1,17 +0,0 @@ -DO $$ -BEGIN - - BEGIN - ALTER TABLE datasetfieldtype ADD CONSTRAINT datasetfieldtype_name_key UNIQUE(name); - EXCEPTION - WHEN duplicate_object THEN RAISE NOTICE 'Table unique constraint datasetfieldtype_name_key already exists'; - END; - - BEGIN - ALTER TABLE datasetfieldtype ADD CONSTRAINT datasetfieldtype_uri_key UNIQUE(uri); - EXCEPTION - WHEN duplicate_object THEN RAISE NOTICE 'Table unique constraint datasetfieldtype_uri_key already exists'; - END; - -END $$; - From 68e00858a8fde41916de534ea254791d9253c085 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 9 Apr 2022 21:25:17 -0400 Subject: [PATCH 07/23] Revert "Revert flyway to see if it affects tests" This reverts commit 6c3fcf6ec296e0cae476788376660906979c2c3e. --- .../V5.10.1.0.1__8533-semantic-updates.sql | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql diff --git a/src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql b/src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql new file mode 100644 index 00000000000..ee4bf440722 --- /dev/null +++ b/src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql @@ -0,0 +1,17 @@ +DO $$ +BEGIN + + BEGIN + ALTER TABLE datasetfieldtype ADD CONSTRAINT datasetfieldtype_name_key UNIQUE(name); + EXCEPTION + WHEN duplicate_object THEN RAISE NOTICE 'Table unique constraint datasetfieldtype_name_key already exists'; + END; + + BEGIN + ALTER TABLE datasetfieldtype ADD CONSTRAINT datasetfieldtype_uri_key UNIQUE(uri); + EXCEPTION + WHEN duplicate_object THEN RAISE NOTICE 'Table unique constraint datasetfieldtype_uri_key already exists'; + END; + +END $$; + From 2e8492b404d9dbee617b5daf86b07cdcb2eb9680 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sun, 10 Apr 2022 13:40:32 -0400 Subject: [PATCH 08/23] remove unique uri constraint uri only has to be unique within parent - could try to make that constraint --- .../java/edu/harvard/iq/dataverse/DatasetFieldType.java | 2 +- .../db/migration/V5.10.1.0.1__8533-semantic-updates.sql | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java index 81b763089b3..6128e7ffdf0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java @@ -304,7 +304,7 @@ public void setMetadataBlock(MetadataBlock metadataBlock) { /** * A formal URI for the field used in json-ld exports */ - @Column(name = "uri", columnDefinition = "TEXT", unique=true) + @Column(name = "uri", columnDefinition = "TEXT") private String uri; public String getUri() { diff --git a/src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql b/src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql index ee4bf440722..7186adbee3e 100644 --- a/src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql +++ b/src/main/resources/db/migration/V5.10.1.0.1__8533-semantic-updates.sql @@ -7,11 +7,5 @@ BEGIN WHEN duplicate_object THEN RAISE NOTICE 'Table unique constraint datasetfieldtype_name_key already exists'; END; - BEGIN - ALTER TABLE datasetfieldtype ADD CONSTRAINT datasetfieldtype_uri_key UNIQUE(uri); - EXCEPTION - WHEN duplicate_object THEN RAISE NOTICE 'Table unique constraint datasetfieldtype_uri_key already exists'; - END; - END $$; From 64091f56bc2c0093052b6762d105aa70de6108dc Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 18 Apr 2022 12:59:18 -0400 Subject: [PATCH 09/23] Large file upload - internalization error message fix --- src/main/webapp/dataset-widgets.xhtml | 3 ++- src/main/webapp/themeAndWidgetsFragment.xhtml | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/webapp/dataset-widgets.xhtml b/src/main/webapp/dataset-widgets.xhtml index a57f144b97a..93072952a36 100644 --- a/src/main/webapp/dataset-widgets.xhtml +++ b/src/main/webapp/dataset-widgets.xhtml @@ -72,7 +72,8 @@ data-toggle="tooltip" data-placement="auto right" data-original-title="#{bundle['dataset.thumbnailsAndWidget.thumbnailImage.uploadNew.title']}">