diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java index 6cbbd658944..e7795407d7e 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java @@ -129,9 +129,9 @@ private void step() throws IOException .stripCompressionExtension(res.getLocation())) .orElse(RDFXML); model = Rio.parse(is, res.getLocation().toString(), format); - } - - contextIterator = model.filter(null, RDF.TYPE, vf.createIRI(RdfCas.TYPE_VIEW)).iterator(); + } + + contextIterator = model.filter(null, RDF.TYPE, RdfCas.TYPE_VIEW).iterator(); } else { // No more files to read diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java index 7e5ce9c676f..e88f97c360d 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java @@ -49,8 +49,6 @@ public class RdfWriter /** * Specify the suffix of output files. Default value .ttl. The file format will be * chosen depending on the file suffice. - * - * @see RDFLanguages */ public static final String PARAM_FILENAME_EXTENSION = ComponentParameters.PARAM_FILENAME_EXTENSION; @ConfigurationParameter(name = PARAM_FILENAME_EXTENSION, mandatory = true, defaultValue = ".ttl") @@ -61,20 +59,20 @@ public class RdfWriter private Set iriFeatures; private Uima2Rdf uima2rdf; - + @Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); - + uima2rdf = new Uima2Rdf(iriFeatures); } - + @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { var model = new DynamicModelFactory().createEmptyModel(); - + try { uima2rdf.convert(aJCas, model); } @@ -83,10 +81,8 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException } try (var docOS = getOutputStream(aJCas, filenameSuffix)) { - var format = Rio - .getParserFormatForFileName(filenameSuffix) - .orElse(RDFXML); - Rio.write(model, docOS, format); + var format = Rio.getParserFormatForFileName(filenameSuffix).orElse(RDFXML); + Rio.write(model, docOS, format); } catch (Exception e) { throw new AnalysisEngineProcessException(e); diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/BasicIRI.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/BasicIRI.java new file mode 100644 index 00000000000..a94176c646b --- /dev/null +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/BasicIRI.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.rdf.internal; + +import org.eclipse.rdf4j.model.base.AbstractIRI; + +public class BasicIRI extends AbstractIRI +{ + private static final long serialVersionUID = 4794310809421877727L; + + private final String namespace; + private final String localName; + + public BasicIRI(String aNamespace, String aLocalName) + { + namespace = aNamespace; + localName = aLocalName; + } + + @Override + public String getNamespace() + { + return namespace; + } + + @Override + public String getLocalName() + { + return localName; + } +} diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java index c8f908eab4c..94892f9f4f0 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java @@ -17,10 +17,19 @@ */ package de.tudarmstadt.ukp.inception.io.rdf.internal; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.NS_RDFCAS; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.SCHEME_UIMA; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.PROP_INDEXED_IN; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.PROP_SOFA_ID; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.PROP_SOFA_MIME_TYPE; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.PROP_SOFA_STRING; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.TYPE_FEATURE_STRUCTURE; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.TYPE_VIEW; +import static org.apache.commons.lang3.StringUtils.substringAfterLast; + import java.util.HashMap; import java.util.Map; -import org.apache.commons.lang3.StringUtils; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CASException; import org.apache.uima.cas.FeatureStructure; @@ -31,7 +40,6 @@ import org.eclipse.rdf4j.model.Model; import org.eclipse.rdf4j.model.Resource; import org.eclipse.rdf4j.model.Statement; -import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.model.vocabulary.RDF; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; @@ -40,19 +48,13 @@ public class Rdf2Uima { public static void convert(Model aModel, Statement aContext, JCas aJCas) throws CASException { - var vf = SimpleValueFactory.getInstance(); var m = aModel; - // Set up names - var tView = vf.createIRI(RdfCas.TYPE_VIEW); - var tFeatureStructure = vf.createIRI(RdfCas.TYPE_FEATURE_STRUCTURE); - var pIndexedIn = vf.createIRI(RdfCas.PROP_INDEXED_IN); - var fsIndex = new HashMap(); // Convert the views/SofAs var viewIndex = new HashMap(); - for (var view : aModel.filter(null, RDF.TYPE, tView).subjects()) { + for (var view : aModel.filter(null, RDF.TYPE, TYPE_VIEW).subjects()) { var viewJCas = convertView(aModel, view, aJCas); viewIndex.put(view, viewJCas); fsIndex.put(view, viewJCas.getSofa()); @@ -60,7 +62,7 @@ public static void convert(Model aModel, Statement aContext, JCas aJCas) throws // Convert the FSes but without setting their feature values yet - we cannot fill // the feature values just set because some of them may point to FSes not yet created - var fses = m.filter(null, RDF.TYPE, tFeatureStructure).subjects(); + var fses = m.filter(null, RDF.TYPE, TYPE_FEATURE_STRUCTURE).subjects().toArray(Resource[]::new); for (var fs : fses) { var uimaFS = initFS(aModel, fs, aJCas); fsIndex.put(fs, uimaFS); @@ -73,7 +75,7 @@ public static void convert(Model aModel, Statement aContext, JCas aJCas) throws // Finally add the FSes to the indexes of the respective views for (var fs : fses) { - for (var indexedIn : aModel.filter(fs, pIndexedIn, null).objects()) { + for (var indexedIn : aModel.filter(fs, PROP_INDEXED_IN, null).objects()) { var viewJCas = viewIndex.get(indexedIn); viewJCas.addFsToIndexes(fsIndex.get(fs)); } @@ -82,17 +84,10 @@ public static void convert(Model aModel, Statement aContext, JCas aJCas) throws public static JCas convertView(Model aModel, Resource aView, JCas aJCas) throws CASException { - var vf = SimpleValueFactory.getInstance(); - - // Set up names - var pSofaID = vf.createIRI(RdfCas.PROP_SOFA_ID); - var pSofaString = vf.createIRI(RdfCas.PROP_SOFA_STRING); - var pSofaMimeType = vf.createIRI(RdfCas.PROP_SOFA_MIME_TYPE); - // Get the values - var viewName = aModel.filter(aView, pSofaID, null).objects().iterator().next().stringValue(); - var sofaString = aModel.filter(aView, pSofaString, null).objects().iterator().next().stringValue(); - var sofaMimeType = aModel.filter(aView, pSofaMimeType, null).objects().iterator().next().stringValue(); + var viewName = aModel.filter(aView, PROP_SOFA_ID, null).objects().iterator().next().stringValue(); + var sofaString = aModel.filter(aView, PROP_SOFA_STRING, null).objects().iterator().next().stringValue(); + var sofaMimeType = aModel.filter(aView, PROP_SOFA_MIME_TYPE, null).objects().iterator().next().stringValue(); // Instantiate the view/SofA var view = JCasUtil.getView(aJCas, viewName, true); @@ -107,10 +102,10 @@ public static FeatureStructure initFS(Model aModel, Resource aFS, JCas aJCas) // Figure out the UIMA type - there can be only one type per FS var types = aModel.filter(aFS, RDF.TYPE, null).objects(); - types.removeIf(res -> res.stringValue().startsWith(RdfCas.NS_RDFCAS)); + types.removeIf(res -> res.stringValue().startsWith(NS_RDFCAS)); assert types.size() == 1; var type = CasUtil.getType(cas, - types.iterator().next().stringValue().substring(RdfCas.NS_UIMA.length())); + types.iterator().next().stringValue().substring(SCHEME_UIMA.length())); FeatureStructure fs; if (type.getName().equals(DocumentMetaData.class.getName())) { @@ -135,7 +130,7 @@ public static FeatureStructure convertFS(Model aModel, Resource aFS, JCas aJCas, continue; } - var featureName = StringUtils.substringAfterLast(stmt.getPredicate().stringValue(), "-"); + var featureName = substringAfterLast(stmt.getPredicate().stringValue(), "-"); var uimaFeat = fs.getType().getFeatureByBaseName(featureName); // Cannot update start/end of document annotation because that FS is already indexed, so @@ -149,7 +144,7 @@ public static FeatureStructure convertFS(Model aModel, Resource aFS, JCas aJCas, if (uimaFeat.getRange().isPrimitive()) { Literal literal = null; if (stmt.getObject().isLiteral()) { - literal = (Literal) stmt; + literal = (Literal) stmt.getObject(); } switch (uimaFeat.getRange().getName()) { diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java index afa43cafd75..120eb89d1cc 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java @@ -18,6 +18,7 @@ package de.tudarmstadt.ukp.inception.io.rdf.internal; import org.apache.uima.cas.CAS; +import org.eclipse.rdf4j.model.IRI; /** * RDF CAS vocabulary. @@ -27,19 +28,18 @@ public class RdfCas public static final String PREFIX_RDFCAS = "rdfcas"; public static final String NS_RDFCAS = "http://uima.apache.org/rdf/cas#"; - public static final String NS_UIMA = "uima:"; + public static final String SCHEME_UIMA = "uima:"; - public static final String PROP_VIEW = NS_RDFCAS + "view"; - public static final String PROP_INDEXED_IN = NS_RDFCAS + "indexedIn"; + public static final IRI PROP_VIEW = new BasicIRI(NS_RDFCAS, "view"); + public static final IRI PROP_INDEXED_IN =new BasicIRI(NS_RDFCAS, "indexedIn"); - // public static final String TYPE_CAS = NS_RDFCAS + "CAS"; - public static final String TYPE_VIEW = NS_RDFCAS + "View"; - public static final String TYPE_FEATURE_STRUCTURE = NS_RDFCAS + "FeatureStructure"; + public static final IRI TYPE_VIEW = new BasicIRI(NS_RDFCAS, "View"); + public static final IRI TYPE_FEATURE_STRUCTURE = new BasicIRI(NS_RDFCAS, "FeatureStructure"); - public static final String PROP_SOFA_ID = NS_UIMA + CAS.TYPE_NAME_SOFA + '-' - + CAS.FEATURE_BASE_NAME_SOFAID; - public static final String PROP_SOFA_STRING = NS_UIMA + CAS.TYPE_NAME_SOFA + '-' - + CAS.FEATURE_BASE_NAME_SOFASTRING; - public static final String PROP_SOFA_MIME_TYPE = NS_UIMA + CAS.TYPE_NAME_SOFA + '-' - + CAS.FEATURE_BASE_NAME_SOFAMIME; + public static final IRI PROP_SOFA_ID = new BasicIRI(SCHEME_UIMA, CAS.TYPE_NAME_SOFA + '-' + + CAS.FEATURE_BASE_NAME_SOFAID); + public static final IRI PROP_SOFA_STRING = new BasicIRI(SCHEME_UIMA, CAS.TYPE_NAME_SOFA + '-' + + CAS.FEATURE_BASE_NAME_SOFASTRING); + public static final IRI PROP_SOFA_MIME_TYPE = new BasicIRI(SCHEME_UIMA, CAS.TYPE_NAME_SOFA + '-' + + CAS.FEATURE_BASE_NAME_SOFAMIME); } diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java index 108023f073d..f705298a763 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java @@ -17,6 +17,8 @@ */ package de.tudarmstadt.ukp.inception.io.rdf.internal; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.PREFIX_RDFCAS; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.SCHEME_UIMA; import static java.lang.String.format; import java.util.HashSet; @@ -31,6 +33,7 @@ import org.apache.uima.jcas.JCas; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.Namespace; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.model.vocabulary.RDF; import de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorUtils; @@ -56,9 +59,9 @@ public void convert(JCas aJCas, Model aTarget) throws CASException { // Set up prefix mappings var ts = aJCas.getTypeSystem(); - aTarget.setNamespace("cas", RdfCas.NS_UIMA + "uima.cas."); - aTarget.setNamespace("tcas", RdfCas.NS_UIMA + "uima.tcas."); - aTarget.setNamespace(RdfCas.PREFIX_RDFCAS, RdfCas.NS_RDFCAS); + aTarget.setNamespace("cas", SCHEME_UIMA + "uima.cas."); + aTarget.setNamespace("tcas", SCHEME_UIMA + "uima.tcas."); + aTarget.setNamespace(PREFIX_RDFCAS, RdfCas.NS_RDFCAS); // Additional prefix mappings for DKPro Core typesystems for (var t : ts.getProperlySubsumedTypes(ts.getTopType())) { @@ -73,7 +76,7 @@ public void convert(JCas aJCas, Model aTarget) throws CASException if (nameMatcher.group("INMODULE") != null) { prefix = prefix + "-" + nameMatcher.group("INMODULE"); } - aTarget.setNamespace(prefix, RdfCas.NS_UIMA + nameMatcher.group("LONG")); + aTarget.setNamespace(prefix, SCHEME_UIMA + nameMatcher.group("LONG")); } } @@ -87,11 +90,6 @@ private void convertView(JCas aJCas, Model aTarget) { var vf = SimpleValueFactory.getInstance(); - // Set up names - var tView = vf.createIRI(RdfCas.TYPE_VIEW); - var tFeatureStructure = vf.createIRI(RdfCas.TYPE_FEATURE_STRUCTURE); - var pIndexedIn = vf.createIRI(RdfCas.PROP_INDEXED_IN); - // Get a URI for the document var dmd = DocumentMetaData.get(aJCas); var docuri = dmd.getDocumentUri() != null ? dmd.getDocumentUri() @@ -104,23 +102,23 @@ private void convertView(JCas aJCas, Model aTarget) reachable.add(aJCas.getSofa()); // Set up the view itself - var viewUri = format("%s#%d", docuri, aJCas.getLowLevelCas().ll_getFSRef(aJCas.getSofa())); - var rdfView = vf.createIRI(viewUri); - aTarget.add(rdfView, RDF.TYPE, tView); + var rdfView = vf.createIRI( + format("%s#%d", docuri, aJCas.getLowLevelCas().ll_getFSRef(aJCas.getSofa()))); + aTarget.add(rdfView, RDF.TYPE, RdfCas.TYPE_VIEW); for (var uimaFS : reachable) { var uri = format("%s#%d", docuri, aJCas.getLowLevelCas().ll_getFSRef(uimaFS)); var rdfFS = vf.createIRI(uri); - aTarget.add(rdfFS, RDF.TYPE, vf.createIRI(rdfType(uimaFS.getType()))); + aTarget.add(rdfFS, RDF.TYPE, rdfType(aTarget, uimaFS.getType())); // The SoFa is not a regular FS - do not mark it as such if (uimaFS != aJCas.getSofa()) { - aTarget.add(rdfFS, RDF.TYPE, tFeatureStructure); + aTarget.add(rdfFS, RDF.TYPE, RdfCas.TYPE_FEATURE_STRUCTURE); } // Internal UIMA information if (indexed.contains(uimaFS)) { - aTarget.add(rdfFS, pIndexedIn, rdfView); + aTarget.add(rdfFS, RdfCas.PROP_INDEXED_IN, rdfView); } // Convert features @@ -133,7 +131,7 @@ private void convertFeatures(Model aTarget, String docuri, FeatureStructure uima var vf = SimpleValueFactory.getInstance(); for (var uimaFeat : uimaFS.getType().getFeatures()) { - var rdfFeat = vf.createIRI(rdfFeature(uimaFeat)); + var rdfFeat = rdfFeature(aTarget, uimaFeat); if (uimaFeat.getRange().isPrimitive()) { switch (uimaFeat.getRange().getName()) { case CAS.TYPE_NAME_BOOLEAN: @@ -189,13 +187,32 @@ private static String rdfUri(String docuri, FeatureStructure uimaFS) return format("%s#%d", docuri, uimaFS.getCAS().getLowLevelCAS().ll_getFSRef(uimaFS)); } - private static String rdfFeature(Feature aUimaFeature) + private static IRI rdfFeature(Model aModel, Feature aUimaFeature) { - return rdfType(aUimaFeature.getDomain()) + "-" + aUimaFeature.getShortName(); + var typeIri = rdfType(aModel, aUimaFeature.getDomain()); + return new BasicIRI(typeIri.getNamespace(), + typeIri.getLocalName() + "-" + aUimaFeature.getShortName()); } - private static String rdfType(Type aUimaType) + private static IRI rdfType(Model aModel, Type aUimaType) { - return RdfCas.NS_UIMA + aUimaType.getName(); + Namespace bestNs = null; + for (var ns : aModel.getNamespaces()) { + var nsName = ns.getName().substring(SCHEME_UIMA.length()); + if (aUimaType.getName().startsWith(nsName) + && (bestNs == null || nsName.length() > bestNs.getName().length())) { + bestNs = ns; + } + } + + var vf = SimpleValueFactory.getInstance(); + if (bestNs != null) { + var namespace = bestNs.getName(); + var localName = aUimaType.getName() + .substring(bestNs.getName().length() - SCHEME_UIMA.length()); + return new BasicIRI(namespace, localName); + } + + return vf.createIRI(SCHEME_UIMA + aUimaType.getName()); } } diff --git a/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java b/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java index a3934d08193..084dcb5e5bf 100644 --- a/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java +++ b/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java @@ -90,8 +90,8 @@ void readWriteWithIriFeatures(@TempDir File aTemp) throws Exception var targetFile = new File(aTemp, "test.ttl"); assertThat(contentOf(targetFile, UTF_8)) // - .contains("ner:NamedEntity-value \"PER\" ;") - .contains("ner:NamedEntity-identifier ;"); + .contains("\"PER\"") + .contains(""); cas.reset();