Skip to content

Commit

Permalink
Fix forced use of oboInOwl namespace when translating OBO IDs.
Browse files Browse the repository at this point in the history
This commit rewrites part of the OWLAPIObo2Owl#oboIdToIRI_load() method
so that the oboInOwlDefault parameter is only used when translating
unprefixed IDs (where it instructs to create an IRI in the oboInOwl
namespace instead of the ontology's default namespace). When translating
a prefixed ID, the IRI to construct should always use the URL prefix
dictated by the prefix.

closes #1112

2nd commit message:
Ensure longest namespace match is used. Disallow prefixes that are
substrings of OBO namespace.

3rd commit message:
If a prefix is declared in idspaces, don't use # separator for
"non-canonical" ids.

4th commit message:
Drop conflicting id annotations rather than stuff into owl-axioms
header.

5th commit message:
Corrected key comparison in OBOFormatPrefixManager. Exclude OBO IRIs
from idspaces.

7th commit message:
Don't inject default semweb prefixes into OBO document format.

8th commit message:
Test prefixes aren't injected.
  • Loading branch information
gouttegd authored and ignazio1977 committed May 7, 2024
1 parent f3ed3a2 commit 1f4764d
Show file tree
Hide file tree
Showing 9 changed files with 634 additions and 74 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,12 @@

import javax.annotation.Nonnull;

import org.semanticweb.owlapi.model.OWLDocumentFormatImpl;

/**
* @author Matthew Horridge, The University Of Manchester, Bio-Health
* Informatics Group
* @since 2.0.0
*/
public class OBODocumentFormat extends OWLDocumentFormatImpl {
public class OBODocumentFormat extends PrefixDocumentFormatImpl {

/**
* Key for validation parameter. Currently supports Boolean.TRUE and
Expand All @@ -30,6 +28,11 @@ public class OBODocumentFormat extends OWLDocumentFormatImpl {
public static final String VALIDATION = "obo.validation";
private static final long serialVersionUID = 40000L;

public OBODocumentFormat() {
super();
this.clear();
}

@Nonnull
@Override
public String getKey() {
Expand All @@ -38,12 +41,11 @@ public String getKey() {

@Override
public boolean isPrefixOWLOntologyFormat() {
return false;
return true;
}

@Override
public PrefixDocumentFormat asPrefixOWLOntologyFormat() {
throw new UnsupportedOperationException(getClass().getName()
+ " is not a PrefixDocumentFormat");
return this;
}
}
112 changes: 112 additions & 0 deletions contract/src/test/java/org/obolibrary/oboformat/PrefixesTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package org.obolibrary.oboformat;

import org.junit.jupiter.api.Test;
import org.semanticweb.owlapi.api.test.baseclasses.TestBase;
import org.semanticweb.owlapi.apibinding.OWLManager;
import org.semanticweb.owlapi.formats.OBODocumentFormat;
import org.semanticweb.owlapi.model.*;
import org.semanticweb.owlapi.search.EntitySearcher;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Map;

import static org.junit.jupiter.api.Assertions.*;

public class PrefixesTest extends TestBase {

@Test
void testPrefixesRoundtrip() throws OWLOntologyStorageException, IOException {
OWLDataFactory factory = OWLManager.getOWLDataFactory();
OWLAnnotationProperty termReplacedBy = factory.getOWLAnnotationProperty(IRI.create("http://purl.obolibrary.org/obo/IAO_0100001"));
OWLAnnotationProperty consider = factory.getOWLAnnotationProperty(IRI.create("http://www.geneontology.org/formats/oboInOwl#consider"));
OWLOntology oboOnt = loadOntology("obo/test_prefixes.obo", OWLManager.createOWLOntologyManager());
assertTrue(oboOnt.containsClassInSignature(IRI.create("http://purl.obolibrary.org/obo/FOO_1234")));
assertTrue(oboOnt.containsClassInSignature(IRI.create("http://somewhere.org/MyClass")));
assertFalse(oboOnt.containsClassInSignature(IRI.create("https://example.org/myns/#ABC_123")));
Map<String, String> prefixMap = oboOnt.getOWLOntologyManager().getOntologyFormat(oboOnt).asPrefixOWLOntologyFormat().getPrefixName2PrefixMap();
assertEquals("http://somewhere.org/", prefixMap.get("sw:"));
ByteArrayOutputStream stream = new ByteArrayOutputStream();
oboOnt.getOWLOntologyManager().saveOntology(oboOnt, stream);
stream.close();
String roundtripOBO = new String(stream.toByteArray(), StandardCharsets.UTF_8);
assertTrue(roundtripOBO.contains("idspace: sw http://somewhere.org/"));
assertTrue(roundtripOBO.contains("[Term]\nid: FOO:1234\nis_a: sw:MyClass"));

OWLOntology replacementsOnt = loadOntology("obo/iris_for_obsoletes_replacements.obo", OWLManager.createOWLOntologyManager());
assertTrue(EntitySearcher.getAnnotationAssertionAxioms(factory.getOWLClass(IRI.create("http://purl.obolibrary.org/obo/GO_0000108")), replacementsOnt).stream()
.filter(ax -> ax.getProperty().equals(termReplacedBy))
.anyMatch(ax -> ax.getValue().asIRI().get().equals(IRI.create("http://purl.obolibrary.org/obo/GO_0000109"))),
"Values for replaced_by should be IRIs rather than strings");
assertTrue(EntitySearcher.getAnnotationAssertionAxioms(factory.getOWLClass(IRI.create("http://purl.obolibrary.org/obo/GO_0000114")), replacementsOnt).stream()
.filter(ax -> ax.getProperty().equals(consider))
.anyMatch(ax -> ax.getValue().asIRI().get().equals(IRI.create("http://purl.obolibrary.org/obo/GO_0000083"))),
"Values for consider should be IRIs rather than strings");
assertTrue(EntitySearcher.getAnnotationAssertionAxioms(factory.getOWLClass(IRI.create("http://purl.obolibrary.org/obo/GO_0010553")), replacementsOnt).stream()
.filter(ax -> ax.getProperty().equals(termReplacedBy))
.anyMatch(ax -> ax.getValue().asIRI().get().equals(IRI.create("http://purl.obolibrary.org/obo/GO_0000122"))),
"Values for replaced_by on alt_ids should be IRIs");
ByteArrayOutputStream stream2 = new ByteArrayOutputStream();
replacementsOnt.getOWLOntologyManager().saveOntology(replacementsOnt, stream2);
stream2.close();
String roundtripReplacementsOnt = new String(stream2.toByteArray(), StandardCharsets.UTF_8);
assertFalse(roundtripReplacementsOnt.contains("idspace:"));
assertTrue(roundtripReplacementsOnt.contains("replaced_by: GO:0000109"));
}

@Test
void testHandlingOfDeclaredOBOPrefix() throws OWLOntologyStorageException, IOException, OWLOntologyCreationException {
OWLOntology oboOnt = loadOntology("obo/test_obo_prefix.obo", OWLManager.createOWLOntologyManager());
ByteArrayOutputStream stream = new ByteArrayOutputStream();
oboOnt.getOWLOntologyManager().saveOntology(oboOnt, stream);
stream.close();
String roundtripOBO = new String(stream.toByteArray(), StandardCharsets.UTF_8);
assertFalse(roundtripOBO.contains("obo:"));
assertFalse(roundtripOBO.contains("obo "));
assertFalse(roundtripOBO.contains("ex:MMyClass"), "The longest available namespace match should be used");
assertFalse(roundtripOBO.contains("owl-axioms:"));
}

@Test
void testOBOFormatShouldNotInjectPrefixesInConstructedDocFormat() throws OWLOntologyStorageException, IOException {
OWLOntology oboOnt = loadOntology("obo/test_obo_prefix.obo", OWLManager.createOWLOntologyManager());
OWLOntologyManager manager = oboOnt.getOWLOntologyManager();
ByteArrayOutputStream stream = new ByteArrayOutputStream();
OWLDocumentFormat format = new OBODocumentFormat();
String defaultNamespace = format.asPrefixOWLOntologyFormat().getDefaultPrefix();
format.asPrefixOWLOntologyFormat().copyPrefixesFrom(manager.getOntologyFormat(oboOnt).asPrefixOWLOntologyFormat());
format.asPrefixOWLOntologyFormat().setDefaultPrefix(defaultNamespace);
manager.saveOntology(oboOnt, format, stream);
stream.close();
String roundtripOBO = new String(stream.toByteArray(), StandardCharsets.UTF_8);
assertFalse(roundtripOBO.contains("idspace: rdf"));
}

@Test
void testOBOFormatShouldNotInjectPrefixesInLoadedDocFormat() throws OWLOntologyStorageException, IOException {
OWLOntology oboOnt = loadOntology("obo/test_obo_prefix.obo", OWLManager.createOWLOntologyManager());
OWLOntologyManager manager = oboOnt.getOWLOntologyManager();
ByteArrayOutputStream stream = new ByteArrayOutputStream();
OWLDocumentFormat format = manager.getOntologyFormat(oboOnt);
format.asPrefixOWLOntologyFormat().copyPrefixesFrom(manager.getOntologyFormat(oboOnt).asPrefixOWLOntologyFormat());
manager.saveOntology(oboOnt, format, stream);
stream.close();
String roundtripOBO = new String(stream.toByteArray(), StandardCharsets.UTF_8);
assertFalse(roundtripOBO.contains("idspace: rdf"));
}

@Test
void testOBOFormatShouldPreventOBOPrefixes() throws OWLOntologyStorageException, IOException {
OWLOntology oboOnt = loadOntology("obo/test_obo_prefix.obo", OWLManager.createOWLOntologyManager());
OWLOntologyManager manager = oboOnt.getOWLOntologyManager();
ByteArrayOutputStream stream = new ByteArrayOutputStream();
OWLDocumentFormat format = manager.getOntologyFormat(oboOnt);
format.asPrefixOWLOntologyFormat().setPrefix("GO", "http://purl.obolibrary.org/obo/GX_");
manager.saveOntology(oboOnt, format, stream);
stream.close();
String roundtripOBO = new String(stream.toByteArray(), StandardCharsets.UTF_8);
assertFalse(roundtripOBO.contains("idspace: GO"));
}

}
102 changes: 102 additions & 0 deletions contract/src/test/resources/obo/iris_for_obsoletes_replacements.obo
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
format-version: 1.2
subsetdef: chebi_ph7_3 "Rhea list of ChEBI terms representing the major species at pH 7.3."
subsetdef: gocheck_do_not_annotate "Term not to be used for direct annotation"
subsetdef: gocheck_do_not_manually_annotate "Term not to be used for direct manual annotation"
subsetdef: goslim_agr "AGR slim"
subsetdef: goslim_aspergillus "Aspergillus GO slim"
subsetdef: goslim_candida "Candida GO slim"
subsetdef: goslim_chembl "ChEMBL protein targets summary"
subsetdef: goslim_drosophila "Drosophila GO slim"
subsetdef: goslim_flybase_ribbon "FlyBase Drosophila GO ribbon slim"
subsetdef: goslim_generic "Generic GO slim"
subsetdef: goslim_metagenomics "Metagenomics GO slim"
subsetdef: goslim_mouse "Mouse GO slim"
subsetdef: goslim_pir "PIR GO slim"
subsetdef: goslim_plant "Plant GO slim"
subsetdef: goslim_pombe "Fission yeast GO slim"
subsetdef: goslim_synapse "synapse GO slim"
subsetdef: goslim_yeast "Yeast GO slim"
subsetdef: prokaryote_subset "GO subset for prokaryotes"
synonymtypedef: syngo_official_label "label approved by the SynGO project"
synonymtypedef: systematic_synonym "Systematic synonym" EXACT
default-namespace: gene_ontology
ontology: go
property_value: has_ontology_root_term GO:0003674
property_value: has_ontology_root_term GO:0005575
property_value: has_ontology_root_term GO:0008150
property_value: http://purl.org/dc/elements/1.1/description "The Gene Ontology (GO) provides a framework and set of concepts for describing the functions of gene products from all organisms." xsd:string
property_value: http://purl.org/dc/elements/1.1/title "Gene Ontology" xsd:string
property_value: http://purl.org/dc/terms/license http://creativecommons.org/licenses/by/4.0/

[Term]
id: GO:0000108
name: obsolete repairosome
namespace: cellular_component
def: "OBSOLETE. A stable complex of proteins that carry out the DNA damage recognition and incision reactions characteristic of nucleotide excision repair (NER), such as DNA damage recognition, DNA helix unwinding, and endonucleolytic cleavage at sites flanking damaged DNA; includes TFIIH subunits and additional polypeptides; may form in the absence of DNA damage." [PMID:10681587, PMID:9852079]
comment: This term was made obsolete because 'repairosome' has fallen out of use in the literature, and the large complex described in the definition has not been confirmed to exist. The term has also confused annotators.
synonym: "repairosome" EXACT []
is_obsolete: true
replaced_by: GO:0000109

[Term]
id: GO:0000109
name: nucleotide-excision repair complex
namespace: cellular_component
def: "Any complex formed of proteins that act in nucleotide-excision repair." [PMID:10915862]
comment: Note that process information is included in the term and definition for the purpose of describing and distinguishing the complex.
subset: goslim_pir
synonym: "UvrB-UvrC complex" NARROW [PMID:12145219]
synonym: "UvrBC complex" NARROW [GOC:bhm, PMID:12145219]
is_a: GO:0032991 ! protein-containing complex
intersection_of: GO:0032991 ! protein-containing complex
intersection_of: capable_of_part_of GO:0006289 ! nucleotide-excision repair
relationship: part_of GO:0005634 ! nucleus

[Term]
id: GO:0000114
name: obsolete regulation of transcription involved in G1 phase of mitotic cell cycle
namespace: biological_process
def: "OBSOLETE. Any process that regulates transcription such that the target genes are transcribed as part of the G1 phase of the mitotic cell cycle." [GOC:dph, GOC:mah, GOC:tb]
comment: This term was made obsolete because it is unclear exactly what it means. It could mean either 'regulation of transcription during phase X' or 'regulation of transition between phase X and phase Y'.
synonym: "G1-specific transcription in mitotic cell cycle" RELATED []
synonym: "regulation of transcription from RNA polymerase II promoter during G1 phase of cell cycle" EXACT []
synonym: "regulation of transcription involved in G1 phase of mitotic cell cycle" EXACT []
is_obsolete: true
consider: GO:0000083
consider: GO:0006357

[Term]
id: GO:0000122
name: negative regulation of transcription by RNA polymerase II
namespace: biological_process
alt_id: GO:0010553
alt_id: GO:0045816
def: "Any process that stops, prevents, or reduces the frequency, rate or extent of transcription mediated by RNA polymerase II." [GOC:go_curators, GOC:txnOH]
synonym: "down regulation of global transcription from RNA polymerase II promoter" RELATED []
synonym: "down regulation of transcription from RNA polymerase II promoter" EXACT []
synonym: "down-regulation of global transcription from RNA polymerase II promoter" RELATED []
synonym: "down-regulation of transcription from RNA polymerase II promoter" EXACT []
synonym: "downregulation of global transcription from RNA polymerase II promoter" RELATED []
synonym: "downregulation of transcription from RNA polymerase II promoter" EXACT []
synonym: "inhibition of global transcription from RNA polymerase II promoter" RELATED []
synonym: "inhibition of transcription from RNA polymerase II promoter" EXACT []
synonym: "negative regulation of gene-specific transcription from RNA polymerase II promoter" RELATED []
synonym: "negative regulation of global transcription from Pol II promoter" RELATED []
synonym: "negative regulation of transcription from Pol II promoter" EXACT []
synonym: "negative regulation of transcription from RNA polymerase II promoter" EXACT []
synonym: "negative regulation of transcription from RNA polymerase II promoter, global" RELATED []
intersection_of: GO:0065007 ! biological regulation
intersection_of: negatively_regulates GO:0006366 ! transcription by RNA polymerase II

[Typedef]
id: capable_of_part_of
name: capable of part of
namespace: external
xref: RO:0002216

[Typedef]
id: negatively_regulates
name: negatively regulates
namespace: external
xref: RO:0002212
is_a: regulates ! regulates
25 changes: 25 additions & 0 deletions contract/src/test/resources/obo/test_obo_prefix.obo
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
format-version: 1.2
idspace: ex http://example.org/
idspace: ex1 http://example.org/M
idspace: obo http://purl.obolibrary.org/obo/
ontology: foo

[Term]
id: FOO:1234
name: the 1234 class
def: "A very important concept."
is_a: ex:MyClass

[Term]
id: ex:MyClass
name: my ex class

[Term]
id: ex1:MyClass
name: my ex1 class
relationship: capable_of_part_of FOO:1234

[Typedef]
id: capable_of_part_of
name: capable of part of
xref: RO:0002216
12 changes: 12 additions & 0 deletions contract/src/test/resources/obo/test_prefixes.obo
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
format-version: 1.2
idspace: owl http://www.w3.org/2002/07/owl#
idspace: rdf http://www.w3.org/1999/02/22-rdf-syntax-ns#
idspace: rdfs http://www.w3.org/2000/01/rdf-schema#
idspace: sw http://somewhere.org/
idspace: xml http://www.w3.org/XML/1998/namespace
idspace: xsd http://www.w3.org/2001/XMLSchema#
ontology: foo

[Term]
id: FOO:1234
is_a: sw:MyClass
Loading

0 comments on commit 1f4764d

Please sign in to comment.