From 455f159279f2621ae62c71906ba0c380e2def988 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Thu, 29 Feb 2024 21:40:31 +0100 Subject: [PATCH] #4567 - Add support for a generic CAS RDF export format - Switch from Jena to RDF4J - Added documentation --- .../META-INF/asciidoc/user-guide.adoc | 2 + .../META-INF/asciidoc/user-guide/formats.adoc | 4 + inception/inception-io-rdf/pom.xml | 79 +++++++++++++++---- .../ukp/inception/io/rdf/RdfReader.java | 9 +-- .../ukp/inception/io/rdf/RdfWriter.java | 16 ++-- .../io/rdf/UimaRdfCasFormatSupport.java | 3 +- .../inception/io/rdf/internal/BasicIRI.java | 47 +++++++++++ .../inception/io/rdf/internal/Rdf2Uima.java | 55 +++++++------ .../ukp/inception/io/rdf/internal/RdfCas.java | 24 +++--- .../inception/io/rdf/internal/Uima2Rdf.java | 58 +++++++++----- .../asciidoc/user-guide/formats-rdfcas.adoc | 68 ++++++++++++++++ .../ukp/inception/io/rdf/RdfWriterTest.java | 14 ++-- 12 files changed, 282 insertions(+), 97 deletions(-) create mode 100644 inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/BasicIRI.java create mode 100644 inception/inception-io-rdf/src/main/resources/META-INF/asciidoc/user-guide/formats-rdfcas.adoc diff --git a/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide.adoc b/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide.adoc index e875e943bbd..b2b3c75eb7b 100644 --- a/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide.adoc +++ b/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide.adoc @@ -284,6 +284,8 @@ include::{include-dir}formats-uimajson.adoc[leveloffset=+2] include::{include-dir}formats-uimajson-legacy.adoc[leveloffset=+2] +include::{include-dir}formats-rdfcas.adoc.adoc[leveloffset=+2] + include::{include-dir}formats-uimaxmi.adoc[leveloffset=+2] include::{include-dir}formats-webannotsv1.adoc[leveloffset=+2] diff --git a/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide/formats.adoc b/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide/formats.adoc index 46cbb7efcf2..7dd38dfb79c 100644 --- a/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide/formats.adoc +++ b/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide/formats.adoc @@ -140,6 +140,10 @@ data in a particular format. The **feature flag** column shows which flags you c | `json` | `format.json-cas-legacy.enabled` +| <> +| `rdfcas` +| `format.rdf-cas.enabled` + | <> | `xmi` | `format.uima-xmi.enabled` diff --git a/inception/inception-io-rdf/pom.xml b/inception/inception-io-rdf/pom.xml index b813aa68631..41d476d0291 100644 --- a/inception/inception-io-rdf/pom.xml +++ b/inception/inception-io-rdf/pom.xml @@ -31,6 +31,27 @@ INCEpTION - IO - RDF + + de.tudarmstadt.ukp.inception.app + inception-diag + + + de.tudarmstadt.ukp.inception.app + inception-ui-kb + + + de.tudarmstadt.ukp.inception.app + inception-model + + + de.tudarmstadt.ukp.inception.app + inception-schema-api + + + de.tudarmstadt.ukp.inception.app + inception-api-formats + + org.apache.uima uimaj-core @@ -39,6 +60,20 @@ org.apache.uima uimafit-core + + + org.springframework + spring-context + + + org.springframework.boot + spring-boot-autoconfigure + + + + org.apache.commons + commons-lang3 + org.dkpro.core @@ -48,7 +83,19 @@ org.dkpro.core dkpro-core-api-io-asl + + org.dkpro.core + dkpro-core-api-resources-asl + + + org.dkpro.core + dkpro-core-api-metadata-asl + + + org.eclipse.rdf4j + rdf4j-rio-api + org.eclipse.rdf4j rdf4j-model @@ -69,20 +116,6 @@ org.eclipse.rdf4j rdf4j-model-vocabulary - - - org.apache.commons - commons-collections4 - - - - de.tudarmstadt.ukp.inception.app - inception-diag - - - de.tudarmstadt.ukp.inception.app - inception-ui-kb - org.dkpro.core @@ -100,4 +133,22 @@ test + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + + org.eclipse.rdf4j:rdf4j-rio-rdfxml + org.eclipse.rdf4j:rdf4j-rio-ntriples + + + + + + \ No newline at end of file diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java index 6cbbd658944..e9aa8be78be 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java @@ -42,13 +42,12 @@ import de.tudarmstadt.ukp.inception.io.rdf.internal.Rdf2Uima; import de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas; -import eu.openminted.share.annotations.api.DocumentationResource; /** * Reads a CAS serialized as RDF. */ @ResourceMetaData(name = "UIMA CAS RDF Reader") -@DocumentationResource("${docbase}/format-reference.html#format-${command}") +// @DocumentationResource("${docbase}/format-reference.html#format-${command}") @MimeTypeCapability({ MimeTypes.APPLICATION_X_UIMA_RDF }) public class RdfReader extends JCasResourceCollectionReader_ImplBase @@ -129,9 +128,9 @@ private void step() throws IOException .stripCompressionExtension(res.getLocation())) .orElse(RDFXML); model = Rio.parse(is, res.getLocation().toString(), format); - } - - contextIterator = model.filter(null, RDF.TYPE, vf.createIRI(RdfCas.TYPE_VIEW)).iterator(); + } + + contextIterator = model.filter(null, RDF.TYPE, RdfCas.TYPE_VIEW).iterator(); } else { // No more files to read diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java index 7e5ce9c676f..e88f97c360d 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java @@ -49,8 +49,6 @@ public class RdfWriter /** * Specify the suffix of output files. Default value .ttl. The file format will be * chosen depending on the file suffice. - * - * @see RDFLanguages */ public static final String PARAM_FILENAME_EXTENSION = ComponentParameters.PARAM_FILENAME_EXTENSION; @ConfigurationParameter(name = PARAM_FILENAME_EXTENSION, mandatory = true, defaultValue = ".ttl") @@ -61,20 +59,20 @@ public class RdfWriter private Set iriFeatures; private Uima2Rdf uima2rdf; - + @Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); - + uima2rdf = new Uima2Rdf(iriFeatures); } - + @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { var model = new DynamicModelFactory().createEmptyModel(); - + try { uima2rdf.convert(aJCas, model); } @@ -83,10 +81,8 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException } try (var docOS = getOutputStream(aJCas, filenameSuffix)) { - var format = Rio - .getParserFormatForFileName(filenameSuffix) - .orElse(RDFXML); - Rio.write(model, docOS, format); + var format = Rio.getParserFormatForFileName(filenameSuffix).orElse(RDFXML); + Rio.write(model, docOS, format); } catch (Exception e) { throw new AnalysisEngineProcessException(e); diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/UimaRdfCasFormatSupport.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/UimaRdfCasFormatSupport.java index 607eb066be4..c84ce5dd530 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/UimaRdfCasFormatSupport.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/UimaRdfCasFormatSupport.java @@ -91,8 +91,7 @@ public AnalysisEngineDescription getWriterDescription(Project aProject, { var iriFeatures = schemaService.listAnnotationFeature(aProject).stream() .filter(f -> f.getType().startsWith(ConceptFeatureSupport.PREFIX)) - .map(f -> f.getLayer().getName() + ":" + f.getName()) - .collect(toUnmodifiableSet()); + .map(f -> f.getLayer().getName() + ":" + f.getName()).collect(toUnmodifiableSet()); return createEngineDescription(RdfWriter.class, aTSD, // RdfWriter.PARAM_IRI_FEATURES, iriFeatures); diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/BasicIRI.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/BasicIRI.java new file mode 100644 index 00000000000..e792ce362d6 --- /dev/null +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/BasicIRI.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.rdf.internal; + +import org.eclipse.rdf4j.model.base.AbstractIRI; + +public class BasicIRI + extends AbstractIRI +{ + private static final long serialVersionUID = 4794310809421877727L; + + private final String namespace; + private final String localName; + + public BasicIRI(String aNamespace, String aLocalName) + { + namespace = aNamespace; + localName = aLocalName; + } + + @Override + public String getNamespace() + { + return namespace; + } + + @Override + public String getLocalName() + { + return localName; + } +} diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java index c8f908eab4c..8039f28bc60 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java @@ -17,10 +17,19 @@ */ package de.tudarmstadt.ukp.inception.io.rdf.internal; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.NS_RDFCAS; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.PROP_INDEXED_IN; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.PROP_SOFA_ID; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.PROP_SOFA_MIME_TYPE; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.PROP_SOFA_STRING; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.SCHEME_UIMA; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.TYPE_FEATURE_STRUCTURE; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.TYPE_VIEW; +import static org.apache.commons.lang3.StringUtils.substringAfterLast; + import java.util.HashMap; import java.util.Map; -import org.apache.commons.lang3.StringUtils; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CASException; import org.apache.uima.cas.FeatureStructure; @@ -31,7 +40,6 @@ import org.eclipse.rdf4j.model.Model; import org.eclipse.rdf4j.model.Resource; import org.eclipse.rdf4j.model.Statement; -import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.model.vocabulary.RDF; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; @@ -40,19 +48,13 @@ public class Rdf2Uima { public static void convert(Model aModel, Statement aContext, JCas aJCas) throws CASException { - var vf = SimpleValueFactory.getInstance(); var m = aModel; - // Set up names - var tView = vf.createIRI(RdfCas.TYPE_VIEW); - var tFeatureStructure = vf.createIRI(RdfCas.TYPE_FEATURE_STRUCTURE); - var pIndexedIn = vf.createIRI(RdfCas.PROP_INDEXED_IN); - var fsIndex = new HashMap(); // Convert the views/SofAs var viewIndex = new HashMap(); - for (var view : aModel.filter(null, RDF.TYPE, tView).subjects()) { + for (var view : aModel.filter(null, RDF.TYPE, TYPE_VIEW).subjects()) { var viewJCas = convertView(aModel, view, aJCas); viewIndex.put(view, viewJCas); fsIndex.put(view, viewJCas.getSofa()); @@ -60,7 +62,8 @@ public static void convert(Model aModel, Statement aContext, JCas aJCas) throws // Convert the FSes but without setting their feature values yet - we cannot fill // the feature values just set because some of them may point to FSes not yet created - var fses = m.filter(null, RDF.TYPE, tFeatureStructure).subjects(); + var fses = m.filter(null, RDF.TYPE, TYPE_FEATURE_STRUCTURE).subjects() + .toArray(Resource[]::new); for (var fs : fses) { var uimaFS = initFS(aModel, fs, aJCas); fsIndex.put(fs, uimaFS); @@ -73,7 +76,7 @@ public static void convert(Model aModel, Statement aContext, JCas aJCas) throws // Finally add the FSes to the indexes of the respective views for (var fs : fses) { - for (var indexedIn : aModel.filter(fs, pIndexedIn, null).objects()) { + for (var indexedIn : aModel.filter(fs, PROP_INDEXED_IN, null).objects()) { var viewJCas = viewIndex.get(indexedIn); viewJCas.addFsToIndexes(fsIndex.get(fs)); } @@ -82,17 +85,13 @@ public static void convert(Model aModel, Statement aContext, JCas aJCas) throws public static JCas convertView(Model aModel, Resource aView, JCas aJCas) throws CASException { - var vf = SimpleValueFactory.getInstance(); - - // Set up names - var pSofaID = vf.createIRI(RdfCas.PROP_SOFA_ID); - var pSofaString = vf.createIRI(RdfCas.PROP_SOFA_STRING); - var pSofaMimeType = vf.createIRI(RdfCas.PROP_SOFA_MIME_TYPE); - // Get the values - var viewName = aModel.filter(aView, pSofaID, null).objects().iterator().next().stringValue(); - var sofaString = aModel.filter(aView, pSofaString, null).objects().iterator().next().stringValue(); - var sofaMimeType = aModel.filter(aView, pSofaMimeType, null).objects().iterator().next().stringValue(); + var viewName = aModel.filter(aView, PROP_SOFA_ID, null).objects().iterator().next() + .stringValue(); + var sofaString = aModel.filter(aView, PROP_SOFA_STRING, null).objects().iterator().next() + .stringValue(); + var sofaMimeType = aModel.filter(aView, PROP_SOFA_MIME_TYPE, null).objects().iterator() + .next().stringValue(); // Instantiate the view/SofA var view = JCasUtil.getView(aJCas, viewName, true); @@ -107,10 +106,10 @@ public static FeatureStructure initFS(Model aModel, Resource aFS, JCas aJCas) // Figure out the UIMA type - there can be only one type per FS var types = aModel.filter(aFS, RDF.TYPE, null).objects(); - types.removeIf(res -> res.stringValue().startsWith(RdfCas.NS_RDFCAS)); + types.removeIf(res -> res.stringValue().startsWith(NS_RDFCAS)); assert types.size() == 1; var type = CasUtil.getType(cas, - types.iterator().next().stringValue().substring(RdfCas.NS_UIMA.length())); + types.iterator().next().stringValue().substring(SCHEME_UIMA.length())); FeatureStructure fs; if (type.getName().equals(DocumentMetaData.class.getName())) { @@ -135,7 +134,7 @@ public static FeatureStructure convertFS(Model aModel, Resource aFS, JCas aJCas, continue; } - var featureName = StringUtils.substringAfterLast(stmt.getPredicate().stringValue(), "-"); + var featureName = substringAfterLast(stmt.getPredicate().stringValue(), "-"); var uimaFeat = fs.getType().getFeatureByBaseName(featureName); // Cannot update start/end of document annotation because that FS is already indexed, so @@ -149,9 +148,9 @@ public static FeatureStructure convertFS(Model aModel, Resource aFS, JCas aJCas, if (uimaFeat.getRange().isPrimitive()) { Literal literal = null; if (stmt.getObject().isLiteral()) { - literal = (Literal) stmt; + literal = (Literal) stmt.getObject(); } - + switch (uimaFeat.getRange().getName()) { case CAS.TYPE_NAME_BOOLEAN: fs.setBooleanValue(uimaFeat, literal.booleanValue()); @@ -187,8 +186,8 @@ public static FeatureStructure convertFS(Model aModel, Resource aFS, JCas aJCas, else { var targetUimaFS = aFsIndex.get(stmt.getObject()); if (targetUimaFS == null) { - throw new IllegalStateException("No UIMA FS found for [" - + stmt.getObject().stringValue() + "]"); + throw new IllegalStateException( + "No UIMA FS found for [" + stmt.getObject().stringValue() + "]"); } fs.setFeatureValue(uimaFeat, targetUimaFS); } diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java index afa43cafd75..fe7fa77167f 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java @@ -18,6 +18,7 @@ package de.tudarmstadt.ukp.inception.io.rdf.internal; import org.apache.uima.cas.CAS; +import org.eclipse.rdf4j.model.IRI; /** * RDF CAS vocabulary. @@ -27,19 +28,18 @@ public class RdfCas public static final String PREFIX_RDFCAS = "rdfcas"; public static final String NS_RDFCAS = "http://uima.apache.org/rdf/cas#"; - public static final String NS_UIMA = "uima:"; + public static final String SCHEME_UIMA = "uima:"; - public static final String PROP_VIEW = NS_RDFCAS + "view"; - public static final String PROP_INDEXED_IN = NS_RDFCAS + "indexedIn"; + public static final IRI PROP_VIEW = new BasicIRI(NS_RDFCAS, "view"); + public static final IRI PROP_INDEXED_IN = new BasicIRI(NS_RDFCAS, "indexedIn"); - // public static final String TYPE_CAS = NS_RDFCAS + "CAS"; - public static final String TYPE_VIEW = NS_RDFCAS + "View"; - public static final String TYPE_FEATURE_STRUCTURE = NS_RDFCAS + "FeatureStructure"; + public static final IRI TYPE_VIEW = new BasicIRI(NS_RDFCAS, "View"); + public static final IRI TYPE_FEATURE_STRUCTURE = new BasicIRI(NS_RDFCAS, "FeatureStructure"); - public static final String PROP_SOFA_ID = NS_UIMA + CAS.TYPE_NAME_SOFA + '-' - + CAS.FEATURE_BASE_NAME_SOFAID; - public static final String PROP_SOFA_STRING = NS_UIMA + CAS.TYPE_NAME_SOFA + '-' - + CAS.FEATURE_BASE_NAME_SOFASTRING; - public static final String PROP_SOFA_MIME_TYPE = NS_UIMA + CAS.TYPE_NAME_SOFA + '-' - + CAS.FEATURE_BASE_NAME_SOFAMIME; + public static final IRI PROP_SOFA_ID = new BasicIRI(SCHEME_UIMA, + CAS.TYPE_NAME_SOFA + '-' + CAS.FEATURE_BASE_NAME_SOFAID); + public static final IRI PROP_SOFA_STRING = new BasicIRI(SCHEME_UIMA, + CAS.TYPE_NAME_SOFA + '-' + CAS.FEATURE_BASE_NAME_SOFASTRING); + public static final IRI PROP_SOFA_MIME_TYPE = new BasicIRI(SCHEME_UIMA, + CAS.TYPE_NAME_SOFA + '-' + CAS.FEATURE_BASE_NAME_SOFAMIME); } diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java index 108023f073d..36fb9cf08be 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java @@ -17,6 +17,8 @@ */ package de.tudarmstadt.ukp.inception.io.rdf.internal; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.PREFIX_RDFCAS; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.SCHEME_UIMA; import static java.lang.String.format; import java.util.HashSet; @@ -31,8 +33,10 @@ import org.apache.uima.jcas.JCas; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.Namespace; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.model.vocabulary.RDF; + import de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorUtils; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; @@ -56,9 +60,9 @@ public void convert(JCas aJCas, Model aTarget) throws CASException { // Set up prefix mappings var ts = aJCas.getTypeSystem(); - aTarget.setNamespace("cas", RdfCas.NS_UIMA + "uima.cas."); - aTarget.setNamespace("tcas", RdfCas.NS_UIMA + "uima.tcas."); - aTarget.setNamespace(RdfCas.PREFIX_RDFCAS, RdfCas.NS_RDFCAS); + aTarget.setNamespace("cas", SCHEME_UIMA + "uima.cas."); + aTarget.setNamespace("tcas", SCHEME_UIMA + "uima.tcas."); + aTarget.setNamespace(PREFIX_RDFCAS, RdfCas.NS_RDFCAS); // Additional prefix mappings for DKPro Core typesystems for (var t : ts.getProperlySubsumedTypes(ts.getTopType())) { @@ -73,7 +77,7 @@ public void convert(JCas aJCas, Model aTarget) throws CASException if (nameMatcher.group("INMODULE") != null) { prefix = prefix + "-" + nameMatcher.group("INMODULE"); } - aTarget.setNamespace(prefix, RdfCas.NS_UIMA + nameMatcher.group("LONG")); + aTarget.setNamespace(prefix, SCHEME_UIMA + nameMatcher.group("LONG")); } } @@ -87,11 +91,6 @@ private void convertView(JCas aJCas, Model aTarget) { var vf = SimpleValueFactory.getInstance(); - // Set up names - var tView = vf.createIRI(RdfCas.TYPE_VIEW); - var tFeatureStructure = vf.createIRI(RdfCas.TYPE_FEATURE_STRUCTURE); - var pIndexedIn = vf.createIRI(RdfCas.PROP_INDEXED_IN); - // Get a URI for the document var dmd = DocumentMetaData.get(aJCas); var docuri = dmd.getDocumentUri() != null ? dmd.getDocumentUri() @@ -104,23 +103,23 @@ private void convertView(JCas aJCas, Model aTarget) reachable.add(aJCas.getSofa()); // Set up the view itself - var viewUri = format("%s#%d", docuri, aJCas.getLowLevelCas().ll_getFSRef(aJCas.getSofa())); - var rdfView = vf.createIRI(viewUri); - aTarget.add(rdfView, RDF.TYPE, tView); + var rdfView = vf.createIRI( + format("%s#%d", docuri, aJCas.getLowLevelCas().ll_getFSRef(aJCas.getSofa()))); + aTarget.add(rdfView, RDF.TYPE, RdfCas.TYPE_VIEW); for (var uimaFS : reachable) { var uri = format("%s#%d", docuri, aJCas.getLowLevelCas().ll_getFSRef(uimaFS)); var rdfFS = vf.createIRI(uri); - aTarget.add(rdfFS, RDF.TYPE, vf.createIRI(rdfType(uimaFS.getType()))); + aTarget.add(rdfFS, RDF.TYPE, rdfType(aTarget, uimaFS.getType())); // The SoFa is not a regular FS - do not mark it as such if (uimaFS != aJCas.getSofa()) { - aTarget.add(rdfFS, RDF.TYPE, tFeatureStructure); + aTarget.add(rdfFS, RDF.TYPE, RdfCas.TYPE_FEATURE_STRUCTURE); } // Internal UIMA information if (indexed.contains(uimaFS)) { - aTarget.add(rdfFS, pIndexedIn, rdfView); + aTarget.add(rdfFS, RdfCas.PROP_INDEXED_IN, rdfView); } // Convert features @@ -133,7 +132,7 @@ private void convertFeatures(Model aTarget, String docuri, FeatureStructure uima var vf = SimpleValueFactory.getInstance(); for (var uimaFeat : uimaFS.getType().getFeatures()) { - var rdfFeat = vf.createIRI(rdfFeature(uimaFeat)); + var rdfFeat = rdfFeature(aTarget, uimaFeat); if (uimaFeat.getRange().isPrimitive()) { switch (uimaFeat.getRange().getName()) { case CAS.TYPE_NAME_BOOLEAN: @@ -189,13 +188,32 @@ private static String rdfUri(String docuri, FeatureStructure uimaFS) return format("%s#%d", docuri, uimaFS.getCAS().getLowLevelCAS().ll_getFSRef(uimaFS)); } - private static String rdfFeature(Feature aUimaFeature) + private static IRI rdfFeature(Model aModel, Feature aUimaFeature) { - return rdfType(aUimaFeature.getDomain()) + "-" + aUimaFeature.getShortName(); + var typeIri = rdfType(aModel, aUimaFeature.getDomain()); + return new BasicIRI(typeIri.getNamespace(), + typeIri.getLocalName() + "-" + aUimaFeature.getShortName()); } - private static String rdfType(Type aUimaType) + private static IRI rdfType(Model aModel, Type aUimaType) { - return RdfCas.NS_UIMA + aUimaType.getName(); + Namespace bestNs = null; + for (var ns : aModel.getNamespaces()) { + var nsName = ns.getName().substring(SCHEME_UIMA.length()); + if (aUimaType.getName().startsWith(nsName) + && (bestNs == null || nsName.length() > bestNs.getName().length())) { + bestNs = ns; + } + } + + var vf = SimpleValueFactory.getInstance(); + if (bestNs != null) { + var namespace = bestNs.getName(); + var localName = aUimaType.getName() + .substring(bestNs.getName().length() - SCHEME_UIMA.length()); + return new BasicIRI(namespace, localName); + } + + return vf.createIRI(SCHEME_UIMA + aUimaType.getName()); } } diff --git a/inception/inception-io-rdf/src/main/resources/META-INF/asciidoc/user-guide/formats-rdfcas.adoc b/inception/inception-io-rdf/src/main/resources/META-INF/asciidoc/user-guide/formats-rdfcas.adoc new file mode 100644 index 00000000000..335d0d9ddb7 --- /dev/null +++ b/inception/inception-io-rdf/src/main/resources/META-INF/asciidoc/user-guide/formats-rdfcas.adoc @@ -0,0 +1,68 @@ +// Licensed to the Technische Universität Darmstadt under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The Technische Universität Darmstadt +// licenses this file to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +[[sect_formats_rdfcas]] += UIMA CAS RDF + +This format provides a representation of the annotated document in RDF using the design model of the UIMA CAS. This format is not an official Apache UIMA file format but rather a facility provided by {product-name} for the benefit of users who want to interact with thier annotated data using Semantic Web technology. + +[cols="2,1,1,1,3"] +|==== +| Format | Read | Write | Custom Layers | Description + +| UIMA CAS RDF (`rdfcas`) +| yes +| yes +| yes +| +|==== + +.Example +[source,turtle] +---- +{ + + a cas:Sofa , rdfcas:View ; + cas:Sofa-mimeType "text" ; + cas:Sofa-sofaID "_InitialView" ; + cas:Sofa-sofaNum "1"^^xsd:int ; + cas:Sofa-sofaString "... here be document text ..." . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "173"^^xsd:int ; + tcas:Annotation-end "183"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "obj" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "173"^^xsd:int ; + tcas:Annotation-end "183"^^xsd:int . +---- diff --git a/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java b/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java index a3934d08193..528ac4c715f 100644 --- a/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java +++ b/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java @@ -31,6 +31,7 @@ import java.io.File; import java.io.FileInputStream; import java.util.ArrayList; + import org.apache.uima.fit.factory.JCasFactory; import org.dkpro.core.io.conll.Conll2006Reader; import org.dkpro.core.io.conll.Conll2006Writer; @@ -90,8 +91,7 @@ void readWriteWithIriFeatures(@TempDir File aTemp) throws Exception var targetFile = new File(aTemp, "test.ttl"); assertThat(contentOf(targetFile, UTF_8)) // - .contains("ner:NamedEntity-value \"PER\" ;") - .contains("ner:NamedEntity-identifier ;"); + .contains("\"PER\"").contains(""); cas.reset(); @@ -121,16 +121,18 @@ private void assertModelEquals(File expected, File actual) try { var sExpected = new ArrayList(); try (var is = new FileInputStream(expected)) { - Rio.parse(is, RDFFormat.TURTLE).forEach(s -> sExpected.add(s.toString()));; + Rio.parse(is, RDFFormat.TURTLE).forEach(s -> sExpected.add(s.toString())); + ; } sort(sExpected); - + var sActual = new ArrayList(); try (var is = new FileInputStream(actual)) { - Rio.parse(is, RDFFormat.TURTLE).forEach(s -> sActual.add(s.toString()));; + Rio.parse(is, RDFFormat.TURTLE).forEach(s -> sActual.add(s.toString())); + ; } sort(sActual); - + assertThat(join("\n", sActual)).isEqualTo(join("\n", sExpected)); } catch (Exception e) {