diff --git a/src/main/java/org/filteredpush/qc/sciname/DwCSciNameDQ.java b/src/main/java/org/filteredpush/qc/sciname/DwCSciNameDQ.java index bae394c..95daef8 100644 --- a/src/main/java/org/filteredpush/qc/sciname/DwCSciNameDQ.java +++ b/src/main/java/org/filteredpush/qc/sciname/DwCSciNameDQ.java @@ -19,6 +19,8 @@ package org.filteredpush.qc.sciname; import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -43,6 +45,7 @@ * Implementation of the TDWG TG2 NAME (scientific name) related data quality tests. * * #82 VALIDATION_SCIENTIFICNAME_NOTEMPTY 7c4b9498-a8d9-4ebb-85f1-9f200c788595 + * #120 VALIDATION_TAXONID_NOTEMPTY 401bf207-9a55-4dff-88a5-abcd58ad97fa * * #81 VALIDATION_KINGDOM_FOUND 125b5493-052d-4a0d-a3e1-ed5bf792689e * #22 VALIDATION_PHYLUM_FOUND eaad41c5-1d46-4917-a08b-4fd1d7ff5c0f @@ -76,11 +79,13 @@ public static DQResponse validationPhylumFound(@ActedUpon("dwc: * Provides: #22 VALIDATION_PHYLUM_FOUND * * @param phylum the provided dwc:phylum to evaluate + * @param sourceAuthority the bdq:sourceAuthority to consult, defaults to GBIF Backbone Taxonomy if null * @return DQResponse the response of type ComplianceValue to return */ @Validation(label="VALIDATION_PHYLUM_FOUND", description="Does the value of dwc:phylum occur at rank of Phylum in bdq:sourceAuthority?") @Provides("eaad41c5-1d46-4917-a08b-4fd1d7ff5c0f") - public static DQResponse validationPhylumFound(@ActedUpon("dwc:phylum") String phylum, @Parameter(name="bdq:sourceAuthority") SciNameSourceAuthority sourceAuthority) { + public static DQResponse validationPhylumFound(@ActedUpon("dwc:phylum") String phylum, + @Parameter(name="bdq:sourceAuthority") SciNameSourceAuthority sourceAuthority) { DQResponse result = new DQResponse(); // Specification @@ -115,11 +120,13 @@ public static DQResponse validationFamilyFound(@ActedUpon("dwc: * Provides: #28 VALIDATION_FAMILY_FOUND * * @param family the provided dwc:family to evaluate + * @param sourceAuthority the bdq:sourceAuthority to consult, defaults to GBIF Backbone Taxonomy if null * @return DQResponse the response of type ComplianceValue to return */ @Validation(label="VALIDATION_FAMILY_FOUND", description="Does the value of dwc:family occur at rank of Family in bdq:sourceAuthority?") @Provides("3667556d-d8f5-454c-922b-af8af38f613c") - public static DQResponse validationFamilyFound(@ActedUpon("dwc:family") String family, @Parameter(name="bdq:sourceAuthority") SciNameSourceAuthority sourceAuthority) { + public static DQResponse validationFamilyFound(@ActedUpon("dwc:family") String family, + @Parameter(name="bdq:sourceAuthority") SciNameSourceAuthority sourceAuthority) { DQResponse result = new DQResponse(); // Specification @@ -536,32 +543,67 @@ public DQResponse validationTaxonAmbiguous(@ActedUpon("dwc:clas return result; } + + public static DQResponse amendmentScientificnameFromTaxonid( + @Consulted("dwc:taxonID") String taxonID, + @ActedUpon("dwc:scientificName") String scientificName + ) { + return amendmentScientificnameFromTaxonid(taxonID, scientificName, null); + } /** - * #71 Amendment SingleRecord Completeness: scientificname from taxonid + * Propose an amendment to the value of dwc:scientificName using the taxonID value from bdq:sourceAuthority. * - * Provides: AMENDMENT_SCIENTIFICNAME_FROM_TAXONID + * Provides: #71 AMENDMENT_SCIENTIFICNAME_FROM_TAXONID * * @param taxonID the provided dwc:taxonID to evaluate * @param scientificName the provided dwc:scientificName to evaluate + * @param sourceAuthority the bdq:sourceAuthority to consult, defaults to GBIF Backbone Taxonomy if null * @return DQResponse the response of type AmendmentValue to return */ + @Amendment(label="AMENDMENT_SCIENTIFICNAME_FROM_TAXONID", description="Propose an amendment to the value of dwc:scientificName using the taxonID value from bdq:sourceAuthority.") @Provides("f01fb3f9-2f7e-418b-9f51-adf50f202aea") - public DQResponse amendmentScientificnameFromTaxonid(@ActedUpon("dwc:taxonID") String taxonID, @ActedUpon("dwc:scientificName") String scientificName) { + public static DQResponse amendmentScientificnameFromTaxonid( + @Consulted("dwc:taxonID") String taxonID, + @ActedUpon("dwc:scientificName") String scientificName, + @Parameter(name="bdq:sourceAuthority") SciNameSourceAuthority sourceAuthority + ) { + DQResponse result = new DQResponse(); //TODO: Implement specification // EXTERNAL_PREREQUISITES_NOT_MET if the bdq:sourceAuthority - // service was not available; INTERNAL_PREREQUISITES_NOT_MET - // if dwc:taxonID is EMPTY, the value of dwc:taxonID is ambiguous - // or dwc:scientificName was not EMPTY; AMENDED if dwc:scientificName - // was added from a successful lookup of dwc:taxonID in the - //bdq:sourceAuthority; otherwise NOT_AMENDED + // is not available; INTERNAL_PREREQUISITES_NOT_MET if dwc:taxonID + // is EMPTY, the value of dwc:taxonID is ambiguous or dwc:scientificName + // was not EMPTY; FILLED_IN the value of dwc:scientificName + // if the value of dwc:taxonID could be unambiguously interpreted + // as a value in bdq:sourceAuthority; otherwise NOT_AMENDED + // bdq:sourceAuthority default = "GBIF Backbone Taxonomy" [https://doi.org/10.15468/39omei], + // "API endpoint" [https://api.gbif.org/v1/species?datasetKey=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=] + // //TODO: Parameters. This test is defined as parameterized. - // bdq:sourceAuthority + // bdq:sourceAuthority default="GBIF Backbone Taxonomy" + + if (sourceAuthority==null) { + try { + sourceAuthority = new SciNameSourceAuthority(EnumSciNameSourceAuthority.GBIF_BACKBONE_TAXONOMY); + } catch (SourceAuthorityException e) { + logger.error(e.getMessage(),e); + } + } + if (SciNameUtils.isEmpty(taxonID)) { + result.addComment("dwc:taxonID does not contains a value."); + result.setResultState(ResultState.INTERNAL_PREREQUISITES_NOT_MET); + } else if (!SciNameUtils.isEmpty(scientificName)) { + result.addComment("dwc:scientificName already contains a value ["+ scientificName +"]."); + result.setResultState(ResultState.INTERNAL_PREREQUISITES_NOT_MET); + } else { + + } return result; + } public static DQResponse validationClassFound(@ActedUpon("dwc:class") String taxonomic_class) { @@ -574,11 +616,14 @@ public static DQResponse validationClassFound(@ActedUpon("dwc:c * Provides: #77 VALIDATION_CLASS_FOUND * * @param taxonomic_class the provided dwc:class to evaluate + * @param sourceAuthority the bdq:sourceAuthority to consult, defaults to GBIF Backbone Taxonomy if null * @return DQResponse the response of type ComplianceValue to return */ @Validation(label="VALIDATION_CLASS_FOUND", description="Does the value of dwc:class occur at rank of Class in bdq:sourceAuthority?") @Provides("2cd6884e-3d14-4476-94f7-1191cfff309b") - public static DQResponse validationClassFound(@ActedUpon("dwc:class") String taxonomic_class, @Parameter(name="bdq:sourceAuthority") SciNameSourceAuthority sourceAuthority) { + public static DQResponse validationClassFound( + @ActedUpon("dwc:class") String taxonomic_class, + @Parameter(name="bdq:sourceAuthority") SciNameSourceAuthority sourceAuthority) { DQResponse result = new DQResponse(); // Specification @@ -614,11 +659,13 @@ public static DQResponse validationKingdomFound(@ActedUpon("dwc * Provides: #81 VALIDATION_KINGDOM_FOUND * * @param kingdom the provided dwc:kingdom to evaluate + * @param sourceAuthority the bdq:sourceAuthority to consult, defaults to GBIF Backbone Taxonomy if null * @return DQResponse the response of type ComplianceValue to return */ @Validation(label="VALIDATION_KINGDOM_FOUND", description="Does the value of dwc:kingdom occur at rank of Kingdom in bdq:sourceAuthority?") @Provides("125b5493-052d-4a0d-a3e1-ed5bf792689e") - public static DQResponse validationKingdomFound(@ActedUpon("dwc:kingdom") String kingdom, @Parameter(name="bdq:sourceAuthority") SciNameSourceAuthority sourceAuthority) { + public static DQResponse validationKingdomFound(@ActedUpon("dwc:kingdom") String kingdom, + @Parameter(name="bdq:sourceAuthority") SciNameSourceAuthority sourceAuthority) { DQResponse result = new DQResponse(); // Specification @@ -682,11 +729,13 @@ public static DQResponse validationOrderFound(@ActedUpon("dwc:o * Provides: #83 VALIDATION_ORDER_FOUND * * @param order the provided dwc:order to evaluate + * @param sourceAuthority the bdq:sourceAuthority to consult, defaults to GBIF Backbone Taxonomy if null * @return DQResponse the response of type ComplianceValue to return */ @Validation(label="VALIDATION_ORDER_FOUND", description="Does the value of dwc:order occur at rank of Order in bdq:sourceAuthority?") @Provides("81cc974d-43cc-4c0f-a5e0-afa23b455aa3") - public static DQResponse validationOrderFound(@ActedUpon("dwc:order") String order, @Parameter(name="bdq:sourceAuthority") SciNameSourceAuthority sourceAuthority) { + public static DQResponse validationOrderFound(@ActedUpon("dwc:order") String order, + @Parameter(name="bdq:sourceAuthority") SciNameSourceAuthority sourceAuthority) { DQResponse result = new DQResponse(); // Specification @@ -889,17 +938,19 @@ public static DQResponse validationTaxonEmpty( } /** - * #120 Validation SingleRecord Completeness: taxonid empty + * Is there a value in dwc:taxonID? * - * Provides: VALIDATION_TAXONID_EMPTY + * Provides: #120 VALIDATION_TAXONID_NOTEMPTY * * @param taxonID the provided dwc:taxonID to evaluate * @return DQResponse the response of type ComplianceValue to return */ - @Provides("urn:uuid:401bf207-9a55-4dff-88a5-abcd58ad97fa") - public static DQResponse validationTaxonidEmpty(@ActedUpon("dwc:taxonID") String taxonID) { + @Validation(label="VALIDATION_TAXONID_NOTEMPTY", description="Is there a value in dwc:taxonID?") + @Provides("401bf207-9a55-4dff-88a5-abcd58ad97fa") + public static DQResponse validationTaxonidNotempty(@ActedUpon("dwc:taxonID") String taxonID) { DQResponse result = new DQResponse(); + // Specification // COMPLIANT if dwc:taxonID is not EMPTY; otherwise NOT_COMPLIANT // @@ -916,28 +967,105 @@ public static DQResponse validationTaxonidEmpty(@ActedUpon("dwc } /** - * #121 Validation SingleRecord Conformance: taxonid ambiguous + * Does the value of dwc:taxonID contain both a URI and namespace indicator? * - * Provides: VALIDATION_TAXONID_AMBIGUOUS + * Provides: #121 VALIDATION_TAXONID_COMPLETE * * @param taxonID the provided dwc:taxonID to evaluate * @return DQResponse the response of type ComplianceValue to return */ + @Validation(label="VALIDATION_TAXONID_COMPLETE", description="Does the value of dwc:taxonID contain both a URI and namespace indicator?") @Provides("a82c7e3a-3a50-4438-906c-6d0fefa9e984") - public DQResponse validationTaxonidAmbiguous(@ActedUpon("dwc:taxonID") String taxonID) { + public static DQResponse validationTaxonidComplete(@ActedUpon("dwc:taxonID") String taxonID) { DQResponse result = new DQResponse(); + //TODO: Specification needs work. + // something like COMPLIANT if taxonID is a validly formed LSID, or taxonID + // is a validly formed URN with at least NID and NSS, or taxonID is a + // validly formed URI with host and path where path consists of + // more than just "/", and if host is www.gbif.org and path begins with + // "/species/", the path contains additional trailing characters, otherwise + // NOT_COMPLIANT + //TODO: Implement specification - // EXTERNAL_PREREQUISITES_NOT_MET if the GBIF backbone taxonomy - // service was not available; INTERNAL_PREREQUISITES_NOT_MET - // if dwc:taxonID is EMPTY or does not include the resolving - // authority; COMPLIANT if the value of dwc:taxonID is resolvable; - //otherwise NOT_COMPLIANT + // INTERNAL_PREREQUISITES_NOT_MET if dwc:taxonID is EMPTY; + // COMPLIANT if dwc:taxonID contains both a URI and a namespace + // indicator; otherwise NOT_COMPLIANT + + if (SciNameUtils.isEmpty(taxonID)) { + result.addComment("No value provided for taxonId."); + result.setResultState(ResultState.RUN_HAS_RESULT); + result.setValue(ComplianceValue.NOT_COMPLIANT); + } else { + try { + RFC8141URN urn = new RFC8141URN(taxonID); + if (urn.getNid().equalsIgnoreCase("lsid")) { + try { + LSID lsid = new LSID(taxonID); + lsid.getAuthority(); + lsid.getNamespace(); + lsid.getObjectID(); + result.addComment("Provided taxonID recognized as an LSID."); + result.setResultState(ResultState.RUN_HAS_RESULT); + result.setValue(ComplianceValue.COMPLIANT); + } catch (URNFormatException e2) { + logger.debug(e2.getMessage()); + result.addComment("Provided value for taxonID ["+taxonID+"] claims to be an lsid, but is not correctly formatted as such."); + result.setResultState(ResultState.RUN_HAS_RESULT); + result.setValue(ComplianceValue.NOT_COMPLIANT); + } + } else { + logger.debug(urn.getNid()); + logger.debug(urn.getNss()); + if (urn.getNid().length()>0 && urn.getNss().length()>0) { + result.addComment("Provided taxonID recognized as an URN."); + result.setResultState(ResultState.RUN_HAS_RESULT); + result.setValue(ComplianceValue.COMPLIANT); + } else { + result.addComment("Provided taxonID appears to be a URN, but doesn't have both NID and NSS"); + result.setResultState(ResultState.RUN_HAS_RESULT); + result.setValue(ComplianceValue.NOT_COMPLIANT); + } + } + } catch (URNFormatException e) { + logger.debug(e.getMessage()); + try { + URI uri = new URI(taxonID); + logger.debug(uri.getScheme()); + logger.debug(uri.getAuthority()); + logger.debug(uri.getHost()); + logger.debug(uri.getPath()); + if (uri.getHost()!=null && uri.getPath()!=null + && uri.getHost().length()>0 && uri.getPath().length()>0 + && !uri.getPath().equals("/")) { + if (uri.getHost().equalsIgnoreCase("www.gbif.org") && uri.getPath().equals("/species/")) { + result.addComment("Provided taxonID recognized as GBIF species URL, but lacks the ID ["+taxonID+"]"); + result.setResultState(ResultState.RUN_HAS_RESULT); + result.setValue(ComplianceValue.NOT_COMPLIANT); + } else { + result.addComment("Provided taxonID recognized as an URI with host, and path."); + result.setResultState(ResultState.RUN_HAS_RESULT); + result.setValue(ComplianceValue.COMPLIANT); + } + } else { + result.addComment("Provided taxonID may be a URI, but doesn't have host and path ["+taxonID+"]"); + result.setResultState(ResultState.RUN_HAS_RESULT); + result.setValue(ComplianceValue.NOT_COMPLIANT); + } + } catch (URISyntaxException e1) { + logger.debug(e1); + result.addComment("Provided value for taxonID ["+taxonID+"] is not a URN or a URI."); + result.setResultState(ResultState.RUN_HAS_RESULT); + result.setValue(ComplianceValue.NOT_COMPLIANT); + } + } + + } return result; } - public DQResponse validationGenusFound(@ActedUpon("dwc:genus") String genus) { + public static DQResponse validationGenusFound(@ActedUpon("dwc:genus") String genus) { return validationGenusFound(genus, null); } @@ -947,11 +1075,13 @@ public DQResponse validationGenusFound(@ActedUpon("dwc:genus") * Provides: #122 VALIDATION_GENUS_FOUND * * @param genus the provided dwc:genus to evaluate + * @param sourceAuthority the bdq:sourceAuthority to consult, defaults to GBIF Backbone Taxonomy if null * @return DQResponse the response of type ComplianceValue to return */ @Validation(label="VALIDATION_GENUS_FOUND", description="Does the value of dwc:genus occur at the rank of Genus in bdq:sourceAuthority?") @Provides("f2ce7d55-5b1d-426a-b00e-6d4efe3058ec") - public static DQResponse validationGenusFound(@ActedUpon("dwc:genus") String genus, @Parameter(name="bdq:sourceAuthority") SciNameSourceAuthority sourceAuthority) { + public static DQResponse validationGenusFound(@ActedUpon("dwc:genus") String genus, + @Parameter(name="bdq:sourceAuthority") SciNameSourceAuthority sourceAuthority) { DQResponse result = new DQResponse(); // Specification @@ -1135,6 +1265,7 @@ public DQResponse validationTaxonrankNotstandard(@ActedUpon("dw * Provides: AMENDMENT_TAXONRANK_STANDARDIZED * * @param taxonRank the provided dwc:taxonRank to evaluate + * @param sourceAuthority the bdq:sourceAuthority to consult, defaults to GBIF Backbone Taxonomy if null * @return DQResponse the response of type AmendmentValue to return */ @Provides("e39098df-ef46-464c-9aef-bcdeee2a88cb") diff --git a/src/main/java/org/filteredpush/qc/sciname/LSID.java b/src/main/java/org/filteredpush/qc/sciname/LSID.java new file mode 100644 index 0000000..75b9b5a --- /dev/null +++ b/src/main/java/org/filteredpush/qc/sciname/LSID.java @@ -0,0 +1,126 @@ +/** + * LSID.java + * + * Copyright 2022 President and Fellows of Harvard College + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.filteredpush.qc.sciname; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Utility class for testing strings that may be LSIDs. + * + * @author mole + * + */ +public class LSID extends RFC8141URN { + + private static final Log logger = LogFactory.getLog(LSID.class); + + private String authority; + private String namespace; + private String objectID; + private String version; + + /** + * @param urn + * @throws URNFormatException + */ + public LSID(String urn) throws URNFormatException { + super(urn); + // LSID specification https://www.omg.org/cgi-bin/doc?dtc/04-05-01.pdf + // has authority:namespace:objectidenfification with optional :revisionidentification + // where authority is usually an internet domain name or is a unique string and, + // namespace, objectidentification, and revisionidentification are + // specified as "alphanumeric sequence", but the examples therin include non-alphanumeric + // characters -., so using PCHAR without : for each._ + String chars = PCHAR.replace(":", ""); + logger.debug(chars); + logger.debug(nss); + String[] bits = nss.split(":"); + if (bits.length<3 || bits.length > 4) { + throw new URNFormatException("Not a validly formatted LSID"); + } + if (bits[0].length()<1 || bits[1].length()<1 || bits[2].length()<1) { + throw new URNFormatException("Not a validly formatted LSID"); + } + authority = bits[0]; + namespace = bits[1]; + objectID = bits[2]; + if (bits.length==4) { + version = bits[3]; + } else { + version = null; + } + } + + /** + * @return the authority + */ + public String getAuthority() { + return authority; + } + + /** + * @param authority the authority to set + */ + public void setAuthority(String authority) { + this.authority = authority; + } + + /** + * @return the namespace + */ + public String getNamespace() { + return namespace; + } + + /** + * @param namespace the namespace to set + */ + public void setNamespace(String namespace) { + this.namespace = namespace; + } + + /** + * @return the objectID + */ + public String getObjectID() { + return objectID; + } + + /** + * @param objectID the objectID to set + */ + public void setObjectID(String objectID) { + this.objectID = objectID; + } + + /** + * @return the version + */ + public String getVersion() { + return version; + } + + /** + * @param version the version to set + */ + public void setVersion(String version) { + this.version = version; + } + +} diff --git a/src/main/java/org/filteredpush/qc/sciname/RFC8141URN.java b/src/main/java/org/filteredpush/qc/sciname/RFC8141URN.java new file mode 100644 index 0000000..d9c0ddc --- /dev/null +++ b/src/main/java/org/filteredpush/qc/sciname/RFC8141URN.java @@ -0,0 +1,190 @@ +/** + * RFC8141URN.java + * + * Copyright 2022 President and Fellows of Harvard College + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.filteredpush.qc.sciname; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Utility class for working with URNs + * + * @author mole + * + * Test for RFC8141 compliant URNs https://tools.ietf.org/html/rfc8141 + * + */ +public class RFC8141URN { + + + protected String nid; // namespace id + protected String nss; // namepace specific string + + public RFC8141URN(String urn) throws URNFormatException { + if (RFC8141URN.isRFC8141URN(urn)) { + Pattern urn_pattern = Pattern.compile(URN_RFC8141); + Matcher matcher = urn_pattern.matcher(urn); + if (matcher.matches()) { + setNid(matcher.group(2)); + setNss(matcher.group(3)); + } + } else { + throw new URNFormatException("Not a valid URN"); + } + } + + + /** + * @return the nid Namespace identifier + */ + public String getNid() { + return nid; + } + + /** + * @param nid the namespace identifer to set + */ + public void setNid(String nid) { + this.nid = nid; + } + + + /** + * @return the nss Namespace specific string + */ + public String getNss() { + return nss; + } + + + /** + * @param nss the namespace specific string to set + */ + public void setNss(String nss) { + this.nss = nss; + } + + + private static final Log logger = LogFactory.getLog(RFC8141URN.class); + + /* + namestring = assigned-name + [ rq-components ] + [ "#" f-component ] + assigned-name = "urn" ":" NID ":" NSS + NID = (alphanum) 0*30(ldh) (alphanum) + ldh = alphanum / "-" + NSS = pchar *(pchar / "/") + rq-components = [ "?+" r-component ] + [ "?=" q-component ] + r-component = pchar *( pchar / "/" / "?" ) + q-component = pchar *( pchar / "/" / "?" ) + f-component = fragment + */ + + /** + * pchar from rfc3986 + */ + public static String PCHAR = "[A-Za-z0-9\\-\\._~!\\$&'\\(\\)\\*\\+,;=:@]|(%[0-9A-Fa-f]{2})"; + + /** + * fragment from rfc3986 + */ + public static String FRAGMENT = "((" + PCHAR + ")|/|\\?)*"; + + /** + * Namespace Specific String (NSS) + * + * pchar *(pchar / "/") + */ + private static String NSS = "(" + PCHAR + ")((" + PCHAR + ")|/)*"; + + /** + * Namespace Identifier (NID) + * + * (alphanum) 0*30(ldh) (alphanum) + */ + private static String NID = "[A-Za-z0-9][A-Za-z0-9\\-]{0,30}[A-Za-z0-9]"; + + /** + * assigned-name + * + * "urn" ":" NID ":" NSS + * (where urn is case insenstive) + */ + private static String ASSIGNED_NAME = "([uU][rR][nN]):(" + NID + "):(" + NSS + ")"; + + /** + * r-component and q-component, share same definition + * + * pchar *( pchar / "/" / "?" ) + */ + private static String RQ_COMPONENT = "(" + PCHAR + ")" + FRAGMENT; + + /** + * rq-components + * + * [ "?+" r-component ] [ "?=" q-component ] + * + */ + private static String RQ_COMPONENTS = + "((\\?\\+)(" + RQ_COMPONENT + "))?((\\?=)(" + RQ_COMPONENT + "))?"; + + /** + * Regular expression to match RFC 8141 URNs. + */ + public static String URN_RFC8141 = "^" + ASSIGNED_NAME + RQ_COMPONENTS + "(#" + FRAGMENT + ")?$"; + + public static boolean isRFC8141URN(String stringToTest) { + boolean result = false; + if (stringToTest!=null) { + Pattern urn_pattern = Pattern.compile(URN_RFC8141); + Matcher matcher = urn_pattern.matcher(stringToTest); + result = matcher.matches(); + String nid = null; + String nss = null; + if (result) { + for (int i=1; i<=matcher.groupCount(); i++) { + logger.debug(matcher.group(i)); + nid = matcher.group(2); + nss = matcher.group(3); + } + } + if (nid==null) { + result = false; + } + if (nss==null) { + result = false; + } + // additional section 51. and 5.2 rules for formal and informal namespaces + if (nid!=null && nid.startsWith("X-")) { + result = false; // formerly permitted experimental namespace RFC3406 + } + if (nid!=null && nid.startsWith("urn-")) { + if (!nid.matches("^urn-[1-9][0-9]*$")) { + result = false; // not a correctly formatted informal URN namespace. + } + } + } + return result; + } + +} + diff --git a/src/main/java/org/filteredpush/qc/sciname/URNFormatException.java b/src/main/java/org/filteredpush/qc/sciname/URNFormatException.java new file mode 100644 index 0000000..3f8ef1b --- /dev/null +++ b/src/main/java/org/filteredpush/qc/sciname/URNFormatException.java @@ -0,0 +1,36 @@ +/** + * URNFormatException.java + * + * Copyright 2022 President and Fellows of Harvard College + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.filteredpush.qc.sciname; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * @author mole + * + */ +public class URNFormatException extends Exception { + + private static final long serialVersionUID = -9093102475674980588L; + private static final Log logger = LogFactory.getLog(URNFormatException.class); + + public URNFormatException(String message) { + super(message); + } + +} diff --git a/src/test/java/org/filteredpush/qc/sciname/TestDwCSciNameDQ.java b/src/test/java/org/filteredpush/qc/sciname/TestDwCSciNameDQ.java index b447345..470a85f 100644 --- a/src/test/java/org/filteredpush/qc/sciname/TestDwCSciNameDQ.java +++ b/src/test/java/org/filteredpush/qc/sciname/TestDwCSciNameDQ.java @@ -37,6 +37,71 @@ public class TestDwCSciNameDQ { private static final Log logger = LogFactory.getLog(TestDwCSciNameDQ.class); + @Test + public void testvalidationTaxonidComplete() { + // TODO: specification needs work. + + String taxonID = ""; + DQResponse result = DwCSciNameDQ.validationTaxonidComplete(taxonID); + logger.debug(result.getComment()); + assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel()); + assertEquals(ComplianceValue.NOT_COMPLIANT.getLabel(), result.getValue().getLabel()); + + taxonID = "3256236"; + result = DwCSciNameDQ.validationTaxonidComplete(taxonID); + logger.debug(result.getComment()); + assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel()); + assertEquals(ComplianceValue.NOT_COMPLIANT.getLabel(), result.getValue().getLabel()); + + taxonID = "urn:lsid:marinespecies.org:taxname:148"; + result = DwCSciNameDQ.validationTaxonidComplete(taxonID); + logger.debug(result.getComment()); + assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel()); + assertEquals(ComplianceValue.COMPLIANT.getLabel(), result.getValue().getLabel()); + + taxonID = "urn:lsid:marinespecies.org:taxname:"; + result = DwCSciNameDQ.validationTaxonidComplete(taxonID); + logger.debug(result.getComment()); + assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel()); + assertEquals(ComplianceValue.NOT_COMPLIANT.getLabel(), result.getValue().getLabel()); + + taxonID = "https://www.gbif.org/species/2529789"; + result = DwCSciNameDQ.validationTaxonidComplete(taxonID); + logger.debug(result.getComment()); + assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel()); + assertEquals(ComplianceValue.COMPLIANT.getLabel(), result.getValue().getLabel()); + + taxonID = "https://www.gbif.org/"; + result = DwCSciNameDQ.validationTaxonidComplete(taxonID); + logger.debug(result.getComment()); + assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel()); + assertEquals(ComplianceValue.NOT_COMPLIANT.getLabel(), result.getValue().getLabel()); + + taxonID = "https://www.gbif.org/species/"; + result = DwCSciNameDQ.validationTaxonidComplete(taxonID); + logger.debug(result.getComment()); + assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel()); + assertEquals(ComplianceValue.NOT_COMPLIANT.getLabel(), result.getValue().getLabel()); + + taxonID = "urn:uuid:c65c3ede-484f-45af-813e-65f606dff750"; + result = DwCSciNameDQ.validationTaxonidComplete(taxonID); + logger.debug(result.getComment()); + assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel()); + assertEquals(ComplianceValue.COMPLIANT.getLabel(), result.getValue().getLabel()); + + taxonID = "c65c3ede-484f-45af-813e-65f606dff750"; + result = DwCSciNameDQ.validationTaxonidComplete(taxonID); + logger.debug(result.getComment()); + assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel()); + assertEquals(ComplianceValue.NOT_COMPLIANT.getLabel(), result.getValue().getLabel()); + + taxonID = "gbif:2529789"; + result = DwCSciNameDQ.validationTaxonidComplete(taxonID); + logger.debug(result.getComment()); + assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel()); + assertEquals(ComplianceValue.NOT_COMPLIANT.getLabel(), result.getValue().getLabel()); + } + /** * Test method for {@link org.filteredpush.qc.sciname.DwCSciNameDQ#validationScientificnameNotempty(java.lang.String)}. */ @@ -137,20 +202,20 @@ public void testValidationTaxonEmpty() { } /** - * Test method for {@link org.filteredpush.qc.sciname.DwCSciNameDQ#validationTaxonidEmpty(java.lang.String)}. + * Test method for {@link org.filteredpush.qc.sciname.DwCSciNameDQ#validationTaxonidNotempty(java.lang.String)}. */ @Test public void testValidationTaxonidEmpty() { // COMPLIANT if dwc:taxonID is not EMPTY; otherwise NOT_COMPLIANT String taxonId = "foo"; - DQResponse result = DwCSciNameDQ.validationTaxonidEmpty(taxonId); + DQResponse result = DwCSciNameDQ.validationTaxonidNotempty(taxonId); logger.debug(result.getComment()); assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel()); assertEquals(ComplianceValue.COMPLIANT.getLabel(), result.getValue().getLabel()); taxonId = ""; - result = DwCSciNameDQ.validationTaxonidEmpty(taxonId); + result = DwCSciNameDQ.validationTaxonidNotempty(taxonId); logger.debug(result.getComment()); assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel()); assertEquals(ComplianceValue.NOT_COMPLIANT.getLabel(), result.getValue().getLabel()); diff --git a/src/test/java/org/filteredpush/qc/sciname/TestRFC8141URN.java b/src/test/java/org/filteredpush/qc/sciname/TestRFC8141URN.java new file mode 100644 index 0000000..eacd013 --- /dev/null +++ b/src/test/java/org/filteredpush/qc/sciname/TestRFC8141URN.java @@ -0,0 +1,96 @@ +/** + * TestRFC8141URN.java + * + * Copyright 2022 President and Fellows of Harvard College + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.filteredpush.qc.sciname; + +import static org.junit.Assert.*; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.junit.Test; + +/** + * Test RFC8141URN and its subclass LSID + * + * @author mole + * + */ +public class TestRFC8141URN { + + private static final Log logger = LogFactory.getLog(TestRFC8141URN.class); + + @Test + public void testConstructor() { + try { + RFC8141URN test = new RFC8141URN("urn:lsid:marinespecies.org:taxname:148"); + assertEquals("lsid",test.getNid()); + assertEquals("marinespecies.org:taxname:148",test.getNss()); + } catch (URNFormatException e) { + fail("Threw exception for validly formatted URN"); + } + } + + @Test + public void testLSIDConstructor() { + try { + LSID test = new LSID("urn:lsid:marinespecies.org:taxname:148"); + assertEquals("lsid",test.getNid()); + assertEquals("marinespecies.org:taxname:148",test.getNss()); + assertEquals("marinespecies.org",test.getAuthority()); + assertEquals("taxname",test.getNamespace()); + assertEquals("148",test.getObjectID()); + assertNull(test.getVersion()); + } catch (URNFormatException e) { + logger.debug(e.getMessage()); + fail("Threw exception for validly formatted LSID"); + } + try { + LSID test = new LSID("URN:LSID:ebi.ac.uk:SWISS-PROT.accession:P34355:3"); + assertEquals("ebi.ac.uk",test.getAuthority()); + assertEquals("SWISS-PROT.accession",test.getNamespace()); + assertEquals("P34355",test.getObjectID()); + assertEquals("3",test.getVersion()); + } catch (URNFormatException e) { + logger.debug(e.getMessage()); + fail("Threw exception for validly formatted LSID"); + } + try { + LSID test = new LSID("urn:lsid:b021cfc4-883b-4f48-9679-47985dd006ef"); + fail("Failed to throw exception for invalidly formatted LSID"); + } catch (URNFormatException e) { + // expected exception thrown + } + } + + /** + * Test method for {@link org.filteredpush.qc.sciname.RFC8141URN#isRFC8141URN(java.lang.String)}. + */ + @Test + public void testIsRFC8141URN() { + + assertEquals(true, RFC8141URN.isRFC8141URN("urn:lsid:marinespecies.org:taxname:148")); + assertEquals(true, RFC8141URN.isRFC8141URN("urn:uuid:c65c3ede-484f-45af-813e-65f606dff750")); + + assertEquals(false, RFC8141URN.isRFC8141URN("c65c3ede-484f-45af-813e-65f606dff750")); + assertEquals(false, RFC8141URN.isRFC8141URN("https://www.gbif.org/species/2529789")); + + assertEquals(true, RFC8141URN.isRFC8141URN("urn:urn-99999:string")); + assertEquals(false, RFC8141URN.isRFC8141URN("urn:urn-09999:string")); + assertEquals(false, RFC8141URN.isRFC8141URN("urn:X-999:string")); + } + +}