From caebfe116f0da3ff0d4dab871c5132750a494337 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Tue, 27 Feb 2024 19:16:01 +0100 Subject: [PATCH 1/3] #4567 - Add support for a generic CAS RDF export format - Ported RDF reader/writer from DKPro Core - Added PARAM_IRI_FEATURES so we can represent KB concepts are proper IRIs in the export instead of strings - Added test using PARAM_IRI_FEATURES --- inception/inception-app-webapp/pom.xml | 57 +- inception/inception-bom/pom.xml | 30 +- inception/inception-io-rdf/LICENSE.txt | 202 ++ inception/inception-io-rdf/pom.xml | 91 + .../ukp/inception/io/rdf/RdfReader.java | 142 ++ .../ukp/inception/io/rdf/RdfWriter.java | 93 + .../io/rdf/UimaRdfCasFormatSupport.java | 100 + .../config/RdfFormatAutoConfiguration.java | 39 + .../inception/io/rdf/internal/Rdf2Uima.java | 199 ++ .../ukp/inception/io/rdf/internal/RdfCas.java | 45 + .../inception/io/rdf/internal/Uima2Rdf.java | 208 ++ ...ot.autoconfigure.AutoConfiguration.imports | 1 + .../ukp/inception/io/rdf/RdfWriterTest.java | 132 ++ .../src/test/resources/conll/2006/README.txt | 7 + .../test/resources/conll/2006/fi-orig.conll | 38 + .../src/test/resources/conll/2006/fi-ref.ttl | 1796 +++++++++++++++++ .../src/test/resources/log4j2-test.xml | 17 + .../src/test/resources/ttl/fi-orig.ttl | 1760 ++++++++++++++++ .../src/test/resources/ttl/fi-ref.conll | 36 + .../ui/kb/feature/ConceptFeatureSupport.java | 25 +- inception/pom.xml | 1 + 21 files changed, 4968 insertions(+), 51 deletions(-) create mode 100644 inception/inception-io-rdf/LICENSE.txt create mode 100644 inception/inception-io-rdf/pom.xml create mode 100644 inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java create mode 100644 inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java create mode 100644 inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/UimaRdfCasFormatSupport.java create mode 100644 inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/config/RdfFormatAutoConfiguration.java create mode 100644 inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java create mode 100644 inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java create mode 100644 inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java create mode 100644 inception/inception-io-rdf/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports create mode 100644 inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java create mode 100644 inception/inception-io-rdf/src/test/resources/conll/2006/README.txt create mode 100644 inception/inception-io-rdf/src/test/resources/conll/2006/fi-orig.conll create mode 100644 inception/inception-io-rdf/src/test/resources/conll/2006/fi-ref.ttl create mode 100644 inception/inception-io-rdf/src/test/resources/log4j2-test.xml create mode 100644 inception/inception-io-rdf/src/test/resources/ttl/fi-orig.ttl create mode 100644 inception/inception-io-rdf/src/test/resources/ttl/fi-ref.conll diff --git a/inception/inception-app-webapp/pom.xml b/inception/inception-app-webapp/pom.xml index 22ddd2740f4..b4ecaefcbfd 100644 --- a/inception/inception-app-webapp/pom.xml +++ b/inception/inception-app-webapp/pom.xml @@ -359,31 +359,35 @@ de.tudarmstadt.ukp.inception.app - inception-io-json + inception-io-bioc de.tudarmstadt.ukp.inception.app - inception-io-webanno-tsv + inception-io-brat de.tudarmstadt.ukp.inception.app - inception-io-text + inception-io-conll de.tudarmstadt.ukp.inception.app - inception-io-tcf + inception-io-imscwb de.tudarmstadt.ukp.inception.app - inception-io-nif + inception-io-html de.tudarmstadt.ukp.inception.app - inception-io-imscwb + inception-io-intertext de.tudarmstadt.ukp.inception.app - inception-io-intertext + inception-io-json + + + de.tudarmstadt.ukp.inception.app + inception-io-nif de.tudarmstadt.ukp.inception.app @@ -391,28 +395,27 @@ de.tudarmstadt.ukp.inception.app - inception-io-bioc + inception-io-rdf de.tudarmstadt.ukp.inception.app - inception-io-brat + inception-io-tcf de.tudarmstadt.ukp.inception.app - inception-io-conll + inception-io-tei de.tudarmstadt.ukp.inception.app - inception-io-xmi + inception-io-text de.tudarmstadt.ukp.inception.app - inception-io-tei + inception-io-webanno-tsv - - org.apache.commons - commons-lang3 + de.tudarmstadt.ukp.inception.app + inception-io-xmi @@ -421,7 +424,6 @@ uimaj-core - org.apache.wicket @@ -549,8 +551,8 @@ sentry-spring-boot-starter - io.sentry - sentry-log4j2 + io.sentry + sentry-log4j2 @@ -664,6 +666,11 @@ info.picocli picocli-spring-boot-starter + + + org.apache.commons + commons-lang3 + no.nav.security @@ -953,17 +960,19 @@ de.tudarmstadt.ukp.inception.app:inception-io-bioc de.tudarmstadt.ukp.inception.app:inception-io-brat - de.tudarmstadt.ukp.inception.app:inception-io-nif - de.tudarmstadt.ukp.inception.app:inception-io-intertext + de.tudarmstadt.ukp.inception.app:inception-io-conll de.tudarmstadt.ukp.inception.app:inception-io-imscwb - de.tudarmstadt.ukp.inception.app:inception-io-perseus + de.tudarmstadt.ukp.inception.app:inception-io-html + de.tudarmstadt.ukp.inception.app:inception-io-intertext de.tudarmstadt.ukp.inception.app:inception-io-json - de.tudarmstadt.ukp.inception.app:inception-io-webanno-tsv + de.tudarmstadt.ukp.inception.app:inception-io-nif + de.tudarmstadt.ukp.inception.app:inception-io-perseus + de.tudarmstadt.ukp.inception.app:inception-io-rdf de.tudarmstadt.ukp.inception.app:inception-io-tcf - de.tudarmstadt.ukp.inception.app:inception-io-xmi - de.tudarmstadt.ukp.inception.app:inception-io-conll de.tudarmstadt.ukp.inception.app:inception-io-tei de.tudarmstadt.ukp.inception.app:inception-io-text + de.tudarmstadt.ukp.inception.app:inception-io-webanno-tsv + de.tudarmstadt.ukp.inception.app:inception-io-xmi de.tudarmstadt.ukp.inception.app:inception-doc diff --git a/inception/inception-bom/pom.xml b/inception/inception-bom/pom.xml index e518d9c97f3..5cff9529a56 100644 --- a/inception/inception-bom/pom.xml +++ b/inception/inception-bom/pom.xml @@ -423,6 +423,11 @@ inception-io-bioc 32.0-SNAPSHOT + + de.tudarmstadt.ukp.inception.app + inception-io-conll + 32.0-SNAPSHOT + de.tudarmstadt.ukp.inception.app inception-io-brat @@ -430,62 +435,62 @@ de.tudarmstadt.ukp.inception.app - inception-io-json + inception-io-html 32.0-SNAPSHOT de.tudarmstadt.ukp.inception.app - inception-io-text + inception-io-intertext 32.0-SNAPSHOT de.tudarmstadt.ukp.inception.app - inception-io-html + inception-io-json 32.0-SNAPSHOT de.tudarmstadt.ukp.inception.app - inception-io-webanno-tsv + inception-io-imscwb 32.0-SNAPSHOT de.tudarmstadt.ukp.inception.app - inception-io-tei + inception-io-lif 32.0-SNAPSHOT de.tudarmstadt.ukp.inception.app - inception-io-conll + inception-io-nif 32.0-SNAPSHOT de.tudarmstadt.ukp.inception.app - inception-io-imscwb + inception-io-perseus 32.0-SNAPSHOT de.tudarmstadt.ukp.inception.app - inception-io-intertext + inception-io-rdf 32.0-SNAPSHOT de.tudarmstadt.ukp.inception.app - inception-io-perseus + inception-io-tcf 32.0-SNAPSHOT de.tudarmstadt.ukp.inception.app - inception-io-nif + inception-io-tei 32.0-SNAPSHOT de.tudarmstadt.ukp.inception.app - inception-io-lif + inception-io-text 32.0-SNAPSHOT de.tudarmstadt.ukp.inception.app - inception-io-tcf + inception-io-webanno-tsv 32.0-SNAPSHOT @@ -493,6 +498,7 @@ inception-io-xmi 32.0-SNAPSHOT + de.tudarmstadt.ukp.inception.app inception-telemetry diff --git a/inception/inception-io-rdf/LICENSE.txt b/inception/inception-io-rdf/LICENSE.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/inception/inception-io-rdf/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/inception/inception-io-rdf/pom.xml b/inception/inception-io-rdf/pom.xml new file mode 100644 index 00000000000..2d0857c1aed --- /dev/null +++ b/inception/inception-io-rdf/pom.xml @@ -0,0 +1,91 @@ + + + 4.0.0 + + de.tudarmstadt.ukp.inception.app + inception-app + 32.0-SNAPSHOT + + + inception-io-rdf + + INCEpTION - IO - RDF + + + + org.apache.uima + uimaj-core + + + org.apache.uima + uimafit-core + + + + org.dkpro.core + dkpro-core-api-parameter-asl + + + org.dkpro.core + dkpro-core-api-io-asl + + + + org.apache.jena + jena-core + + + org.apache.jena + jena-arq + + + + org.apache.commons + commons-collections4 + + + + de.tudarmstadt.ukp.inception.app + inception-diag + + + de.tudarmstadt.ukp.inception.app + inception-ui-kb + + + + org.dkpro.core + dkpro-core-testing-asl + test + + + org.dkpro.core + dkpro-core-api-ner-asl + test + + + org.dkpro.core + dkpro-core-io-conll-asl + test + + + \ No newline at end of file diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java new file mode 100644 index 00000000000..20113eaf3b7 --- /dev/null +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.rdf; + +import static org.dkpro.core.api.resources.CompressionUtils.getInputStream; +import java.io.IOException; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.rdf.model.StmtIterator; +import org.apache.jena.riot.RDFDataMgr; +import org.apache.jena.riot.RDFLanguages; +import org.apache.jena.vocabulary.RDF; +import org.apache.uima.UimaContext; +import org.apache.uima.cas.CASException; +import org.apache.uima.collection.CollectionException; +import org.apache.uima.fit.descriptor.MimeTypeCapability; +import org.apache.uima.fit.descriptor.ResourceMetaData; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; +import org.dkpro.core.api.io.JCasResourceCollectionReader_ImplBase; +import org.dkpro.core.api.parameter.MimeTypes; +import org.dkpro.core.api.resources.CompressionUtils; +import de.tudarmstadt.ukp.inception.io.rdf.internal.Rdf2Uima; +import de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas; +import eu.openminted.share.annotations.api.DocumentationResource; + +/** + * Reads a CAS serialized as RDF. + */ +@ResourceMetaData(name = "UIMA CAS RDF Reader") +@DocumentationResource("${docbase}/format-reference.html#format-${command}") +@MimeTypeCapability({ MimeTypes.APPLICATION_X_UIMA_RDF }) +public class RdfReader + extends JCasResourceCollectionReader_ImplBase +{ + private Resource res; + private Model model; + private StmtIterator contextIterator; + + @Override + public void initialize(UimaContext aContext) throws ResourceInitializationException + { + super.initialize(aContext); + + // Seek first article + try { + step(); + } + catch (IOException e) { + throw new ResourceInitializationException(e); + } + } + + @Override + public void getNext(JCas aJCas) throws IOException, CollectionException + { + initCas(aJCas, res); + + var context = contextIterator.next(); + try { + Rdf2Uima.convert(context, aJCas); + } + catch (CASException e) { + throw new CollectionException(e); + } + + // inFileCount++; + step(); + } + + private void closeAll() + { + res = null; + contextIterator = null; + } + + @Override + public void destroy() + { + closeAll(); + super.destroy(); + } + + @Override + public boolean hasNext() throws IOException, CollectionException + { + // If there is still an iterator, then there is still data. This requires that we call + // step() already during initialization. + return contextIterator != null; + } + + /** + * Seek article in file. Stop once article element has been found without reading it. + */ + private void step() throws IOException + { + // Open next file + while (true) { + if (res == null) { + // Call to super here because we want to know about the resources, not the articles + if (getResourceIterator().hasNext()) { + // There are still resources left to read + res = nextFile(); + // inFileCount = 0; + try (var is = getInputStream(res.getLocation(), res.getInputStream())) { + model = ModelFactory.createOntologyModel(); + RDFDataMgr.read(model, is, RDFLanguages.filenameToLang( + CompressionUtils.stripCompressionExtension(res.getLocation()))); + } + contextIterator = model.listStatements(null, RDF.type, + model.createResource(RdfCas.TYPE_VIEW)); + } + else { + // No more files to read + return; + } + } + + if (contextIterator.hasNext()) { + return; + } + + // End of file reached + closeAll(); + } + } +} diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java new file mode 100644 index 00000000000..4611ae4e053 --- /dev/null +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.rdf; + +import static org.apache.jena.riot.RDFLanguages.fileExtToLang; + +import java.util.Set; + +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.riot.RDFDataMgr; +import org.apache.jena.riot.RDFLanguages; +import org.apache.uima.UimaContext; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.CASException; +import org.apache.uima.fit.descriptor.ConfigurationParameter; +import org.apache.uima.fit.descriptor.MimeTypeCapability; +import org.apache.uima.fit.descriptor.ResourceMetaData; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; +import org.dkpro.core.api.io.JCasFileWriter_ImplBase; +import org.dkpro.core.api.parameter.ComponentParameters; +import org.dkpro.core.api.parameter.MimeTypes; + +import de.tudarmstadt.ukp.inception.io.rdf.internal.Uima2Rdf; + +/** + * Writes the CAS out as RDF. + */ +@ResourceMetaData(name = "UIMA CAS RDF Writer") +// @DocumentationResource("${docbase}/format-reference.html#format-${command}") +@MimeTypeCapability({ MimeTypes.APPLICATION_X_UIMA_RDF }) +public class RdfWriter + extends JCasFileWriter_ImplBase +{ + /** + * Specify the suffix of output files. Default value .ttl. The file format will be + * chosen depending on the file suffice. + * + * @see RDFLanguages + */ + public static final String PARAM_FILENAME_EXTENSION = ComponentParameters.PARAM_FILENAME_EXTENSION; + @ConfigurationParameter(name = PARAM_FILENAME_EXTENSION, mandatory = true, defaultValue = ".ttl") + private String filenameSuffix; + + public static final String PARAM_IRI_FEATURES = "iriFeatures"; + @ConfigurationParameter(name = PARAM_IRI_FEATURES, mandatory = false) + private Set iriFeatures; + + private Uima2Rdf uima2rdf; + + @Override + public void initialize(UimaContext aContext) throws ResourceInitializationException + { + super.initialize(aContext); + + uima2rdf = new Uima2Rdf(iriFeatures); + } + + @Override + public void process(JCas aJCas) throws AnalysisEngineProcessException + { + var model = ModelFactory.createOntologyModel(); + + try { + uima2rdf.convert(aJCas, model); + } + catch (CASException e) { + throw new AnalysisEngineProcessException(e); + } + + try (var docOS = getOutputStream(aJCas, filenameSuffix)) { + RDFDataMgr.write(docOS, model.getBaseModel(), fileExtToLang(filenameSuffix)); + } + catch (Exception e) { + throw new AnalysisEngineProcessException(e); + } + } +} diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/UimaRdfCasFormatSupport.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/UimaRdfCasFormatSupport.java new file mode 100644 index 00000000000..607eb066be4 --- /dev/null +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/UimaRdfCasFormatSupport.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.rdf; + +import static java.util.stream.Collectors.toUnmodifiableSet; +import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; +import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; + +import org.apache.uima.analysis_engine.AnalysisEngineDescription; +import org.apache.uima.cas.CAS; +import org.apache.uima.collection.CollectionReaderDescription; +import org.apache.uima.resource.ResourceInitializationException; +import org.apache.uima.resource.metadata.TypeSystemDescription; + +import de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport; +import de.tudarmstadt.ukp.clarin.webanno.model.Project; +import de.tudarmstadt.ukp.inception.io.rdf.config.RdfFormatAutoConfiguration; +import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService; +import de.tudarmstadt.ukp.inception.ui.kb.feature.ConceptFeatureSupport; + +/** + *

+ * This class is exposed as a Spring Component via + * {@link RdfFormatAutoConfiguration#uimaRdfCasFormatSupport}. + *

+ */ +public class UimaRdfCasFormatSupport + implements FormatSupport +{ + public static final String ID = "rdfcas"; + public static final String NAME = "UIMA CAS RDF"; + + private final AnnotationSchemaService schemaService; + + public UimaRdfCasFormatSupport(AnnotationSchemaService aSchemaService) + { + schemaService = aSchemaService; + } + + @Override + public String getId() + { + return ID; + } + + @Override + public String getName() + { + return NAME; + } + + @Override + public boolean isReadable() + { + return true; + } + + @Override + public boolean isWritable() + { + return true; + } + + @Override + public CollectionReaderDescription getReaderDescription(Project aProject, + TypeSystemDescription aTSD) + throws ResourceInitializationException + { + return createReaderDescription(RdfReader.class, aTSD); + } + + @Override + public AnalysisEngineDescription getWriterDescription(Project aProject, + TypeSystemDescription aTSD, CAS aCAS) + throws ResourceInitializationException + { + var iriFeatures = schemaService.listAnnotationFeature(aProject).stream() + .filter(f -> f.getType().startsWith(ConceptFeatureSupport.PREFIX)) + .map(f -> f.getLayer().getName() + ":" + f.getName()) + .collect(toUnmodifiableSet()); + + return createEngineDescription(RdfWriter.class, aTSD, // + RdfWriter.PARAM_IRI_FEATURES, iriFeatures); + } +} diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/config/RdfFormatAutoConfiguration.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/config/RdfFormatAutoConfiguration.java new file mode 100644 index 00000000000..2e225900639 --- /dev/null +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/config/RdfFormatAutoConfiguration.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.rdf.config; + +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import de.tudarmstadt.ukp.inception.io.rdf.UimaRdfCasFormatSupport; +import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService; +import de.tudarmstadt.ukp.inception.ui.kb.feature.ConceptFeatureSupport; + +@Configuration +public class RdfFormatAutoConfiguration +{ + @ConditionalOnProperty(prefix = "format.rdf-cas", name = "enabled", // + havingValue = "true", matchIfMissing = false) + @Bean + public UimaRdfCasFormatSupport uimaRdfCasFormatSupport(AnnotationSchemaService aSchemaService, + ConceptFeatureSupport aConceptFeatureSupport) + { + return new UimaRdfCasFormatSupport(aSchemaService); + } +} diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java new file mode 100644 index 00000000000..a3b65992b51 --- /dev/null +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.rdf.internal; + +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.collections4.iterators.IteratorIterable; +import org.apache.commons.lang3.StringUtils; +import org.apache.jena.ontology.OntResource; +import org.apache.jena.rdf.model.Resource; +import org.apache.jena.rdf.model.Statement; +import org.apache.jena.vocabulary.RDF; +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.CASException; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.fit.util.CasUtil; +import org.apache.uima.fit.util.JCasUtil; +import org.apache.uima.jcas.JCas; + +import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; + +public class Rdf2Uima +{ + public static void convert(Statement aContext, JCas aJCas) throws CASException + { + var m = aContext.getModel(); + + // Set up names + var tView = m.createResource(RdfCas.TYPE_VIEW); + var tFeatureStructure = m.createResource(RdfCas.TYPE_FEATURE_STRUCTURE); + var pIndexedIn = m.createProperty(RdfCas.PROP_INDEXED_IN); + + var fsIndex = new HashMap(); + + // Convert the views/SofAs + var viewIndex = new HashMap(); + var viewIter = m.listSubjectsWithProperty(RDF.type, tView); + for (var view : new IteratorIterable(viewIter)) { + var viewJCas = convertView(view, aJCas); + viewIndex.put(view, viewJCas); + fsIndex.put(view, viewJCas.getSofa()); + } + + // Convert the FSes but without setting their feature values yet - we cannot fill + // the feature values just set because some of them may point to FSes not yet created + var fses = m.listSubjectsWithProperty(RDF.type, tFeatureStructure).toList(); + for (var fs : fses) { + var uimaFS = initFS(fs.as(OntResource.class), aJCas); + fsIndex.put(fs, uimaFS); + } + + // Now fill the FSes with their feature values + for (var fs : fses) { + convertFS(fs.as(OntResource.class), aJCas, fsIndex); + } + + // Finally add the FSes to the indexes of the respective views + for (var fs : fses) { + var indexedInIter = fs.listProperties(pIndexedIn); + for (var indexedIn : new IteratorIterable(indexedInIter)) { + var viewJCas = viewIndex.get(indexedIn.getResource()); + viewJCas.addFsToIndexes(fsIndex.get(fs)); + } + } + } + + public static JCas convertView(Resource aView, JCas aJCas) throws CASException + { + var m = aView.getModel(); + + // Set up names + var pSofaID = m.createProperty(RdfCas.PROP_SOFA_ID); + var pSofaString = m.createProperty(RdfCas.PROP_SOFA_STRING); + var pSofaMimeType = m.createProperty(RdfCas.PROP_SOFA_MIME_TYPE); + + // Get the values + var viewName = aView.getProperty(pSofaID).getString(); + var sofaString = aView.getProperty(pSofaString).getString(); + var sofaMimeType = aView.getProperty(pSofaMimeType).getString(); + + // Instantiate the view/SofA + var view = JCasUtil.getView(aJCas, viewName, true); + view.setSofaDataString(sofaString, sofaMimeType); + + return view; + } + + public static FeatureStructure initFS(OntResource aFS, JCas aJCas) + { + var cas = aJCas.getCas(); + + // Figure out the UIMA type - there can be only one type per FS + var types = aFS.listRDFTypes(true).toSet(); + types.removeIf(res -> res.getURI().startsWith(RdfCas.NS_RDFCAS)); + assert types.size() == 1; + var type = CasUtil.getType(cas, + types.iterator().next().getURI().substring(RdfCas.NS_UIMA.length())); + + FeatureStructure fs; + if (type.getName().equals(DocumentMetaData.class.getName())) { + // Special handling to avoid ending up with two document annotations in the CAS + fs = DocumentMetaData.get(aJCas); + } + else { + fs = cas.createFS(type); + } + + return fs; + } + + public static FeatureStructure convertFS(OntResource aFS, JCas aJCas, + Map aFsIndex) + { + var fs = aFsIndex.get(aFS); + + var stmtIter = aFS.listProperties(); + for (var stmt : new IteratorIterable(stmtIter)) { + // Skip all non-features + if (!stmt.getPredicate().getURI().startsWith("uima:")) { + continue; + } + + var featureName = StringUtils.substringAfterLast(stmt.getPredicate().getURI(), "-"); + var uimaFeat = fs.getType().getFeatureByBaseName(featureName); + + // Cannot update start/end of document annotation because that FS is already indexed, so + // we skip those + if (fs == aJCas.getDocumentAnnotationFs() + && (CAS.FEATURE_BASE_NAME_BEGIN.equals(featureName) + || CAS.FEATURE_BASE_NAME_END.equals(featureName))) { + continue; + } + + if (uimaFeat.getRange().isPrimitive()) { + switch (uimaFeat.getRange().getName()) { + case CAS.TYPE_NAME_BOOLEAN: + fs.setBooleanValue(uimaFeat, stmt.getObject().asLiteral().getBoolean()); + break; + case CAS.TYPE_NAME_BYTE: + fs.setByteValue(uimaFeat, stmt.getObject().asLiteral().getByte()); + break; + case CAS.TYPE_NAME_DOUBLE: + fs.setDoubleValue(uimaFeat, stmt.getObject().asLiteral().getDouble()); + break; + case CAS.TYPE_NAME_FLOAT: + fs.setFloatValue(uimaFeat, stmt.getObject().asLiteral().getFloat()); + break; + case CAS.TYPE_NAME_INTEGER: + fs.setIntValue(uimaFeat, stmt.getObject().asLiteral().getInt()); + break; + case CAS.TYPE_NAME_LONG: + fs.setLongValue(uimaFeat, stmt.getObject().asLiteral().getLong()); + break; + case CAS.TYPE_NAME_SHORT: + fs.setShortValue(uimaFeat, stmt.getObject().asLiteral().getShort()); + break; + case CAS.TYPE_NAME_STRING: { + if (stmt.getObject().isLiteral()) { + fs.setStringValue(uimaFeat, stmt.getObject().asLiteral().getString()); + } + else { + fs.setStringValue(uimaFeat, stmt.getObject().asResource().getURI()); + } + break; + } + default: + throw new IllegalArgumentException( + "Feature [" + uimaFeat.getName() + "] has unsupported primitive type [" + + uimaFeat.getRange().getName() + "]"); + } + } + else { + FeatureStructure targetUimaFS = aFsIndex.get(stmt.getObject().asResource()); + if (targetUimaFS == null) { + throw new IllegalStateException("No UIMA FS found for [" + + stmt.getObject().asResource().getURI() + "]"); + } + fs.setFeatureValue(uimaFeat, targetUimaFS); + } + } + + return fs; + } +} diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java new file mode 100644 index 00000000000..afa43cafd75 --- /dev/null +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.rdf.internal; + +import org.apache.uima.cas.CAS; + +/** + * RDF CAS vocabulary. + */ +public class RdfCas +{ + public static final String PREFIX_RDFCAS = "rdfcas"; + + public static final String NS_RDFCAS = "http://uima.apache.org/rdf/cas#"; + public static final String NS_UIMA = "uima:"; + + public static final String PROP_VIEW = NS_RDFCAS + "view"; + public static final String PROP_INDEXED_IN = NS_RDFCAS + "indexedIn"; + + // public static final String TYPE_CAS = NS_RDFCAS + "CAS"; + public static final String TYPE_VIEW = NS_RDFCAS + "View"; + public static final String TYPE_FEATURE_STRUCTURE = NS_RDFCAS + "FeatureStructure"; + + public static final String PROP_SOFA_ID = NS_UIMA + CAS.TYPE_NAME_SOFA + '-' + + CAS.FEATURE_BASE_NAME_SOFAID; + public static final String PROP_SOFA_STRING = NS_UIMA + CAS.TYPE_NAME_SOFA + '-' + + CAS.FEATURE_BASE_NAME_SOFASTRING; + public static final String PROP_SOFA_MIME_TYPE = NS_UIMA + CAS.TYPE_NAME_SOFA + '-' + + CAS.FEATURE_BASE_NAME_SOFAMIME; +} diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java new file mode 100644 index 00000000000..60b318c7f0b --- /dev/null +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.rdf.internal; + +import static java.lang.String.format; + +import java.util.HashSet; +import java.util.Set; +import java.util.regex.Pattern; + +import org.apache.jena.datatypes.xsd.XSDDatatype; +import org.apache.jena.ontology.Individual; +import org.apache.jena.ontology.OntModel; +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.CASException; +import org.apache.uima.cas.Feature; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.Type; +import org.apache.uima.jcas.JCas; + +import de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorUtils; +import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; + +public class Uima2Rdf +{ + private static final String DOCUMENT_SCHEME = "doc:"; + + private static final Pattern DKPRO_CORE_SCHEME = Pattern.compile( + "(?de\\.tudarmstadt\\.ukp\\.dkpro\\.core\\.api\\.(?[^.]+)\\.type(\\.(?.*))?\\.)[^.]+"); + + private final Set iriFeatures = new HashSet<>(); + + public Uima2Rdf(Set aIriFeatures) + { + if (aIriFeatures != null) { + iriFeatures.addAll(aIriFeatures); + } + } + + public void convert(JCas aJCas, OntModel aTarget) throws CASException + { + // Set up prefix mappings + var ts = aJCas.getTypeSystem(); + aTarget.setNsPrefix("cas", RdfCas.NS_UIMA + "uima.cas."); + aTarget.setNsPrefix("tcas", RdfCas.NS_UIMA + "uima.tcas."); + aTarget.setNsPrefix(RdfCas.PREFIX_RDFCAS, RdfCas.NS_RDFCAS); + + // Additional prefix mappings for DKPro Core typesystems + for (var t : ts.getProperlySubsumedTypes(ts.getTopType())) { + var nameMatcher = DKPRO_CORE_SCHEME.matcher(""); + var typeName = t.getName(); + if (typeName.endsWith("[]")) { + typeName = typeName.substring(0, typeName.length() - 2); + } + nameMatcher.reset(typeName); + if (nameMatcher.matches()) { + var prefix = nameMatcher.group("MODULE"); + if (nameMatcher.group("INMODULE") != null) { + prefix = prefix + "-" + nameMatcher.group("INMODULE"); + } + aTarget.setNsPrefix(prefix, RdfCas.NS_UIMA + nameMatcher.group("LONG")); + } + } + + var viewIterator = aJCas.getViewIterator(); + while (viewIterator.hasNext()) { + convertView(viewIterator.next(), aTarget); + } + } + + private void convertView(JCas aJCas, OntModel aTarget) + { + // Shorten down variable name for model + var m = aTarget; + + // Set up names + var tView = m.createResource(RdfCas.TYPE_VIEW); + var tFeatureStructure = m.createResource(RdfCas.TYPE_FEATURE_STRUCTURE); + var pIndexedIn = m.createProperty(RdfCas.PROP_INDEXED_IN); + + // Get a URI for the document + var dmd = DocumentMetaData.get(aJCas); + var docuri = dmd.getDocumentUri() != null ? dmd.getDocumentUri() + : DOCUMENT_SCHEME + dmd.getDocumentId(); + + // These only collect a single view... + var reachable = CasDoctorUtils.collectReachable(aJCas.getCas()); + var indexed = CasDoctorUtils.collectIndexed(aJCas.getCas()); + // ... they do not collect the SOFA, so we add that explicitly + reachable.add(aJCas.getSofa()); + + // Set up the view itself + var viewUri = format("%s#%d", docuri, aJCas.getLowLevelCas().ll_getFSRef(aJCas.getSofa())); + var rdfView = m.createIndividual(viewUri, tView); + + for (var uimaFS : reachable) { + var uri = format("%s#%d", docuri, aJCas.getLowLevelCas().ll_getFSRef(uimaFS)); + var rdfFS = m.createIndividual(uri, m.createResource(rdfType(uimaFS.getType()))); + + // The SoFa is not a regular FS - do not mark it as such + if (uimaFS != aJCas.getSofa()) { + rdfFS.addOntClass(tFeatureStructure); + } + + // Internal UIMA information + if (indexed.contains(uimaFS)) { + rdfFS.addProperty(pIndexedIn, rdfView); + } + + // Convert features + convertFeatures(docuri, uimaFS, rdfFS); + } + } + + private void convertFeatures(String docuri, FeatureStructure uimaFS, Individual rdfFS) + { + var m = rdfFS.getOntModel(); + + for (var uimaFeat : uimaFS.getType().getFeatures()) { + var rdfFeat = m.createProperty(rdfFeature(uimaFeat)); + if (uimaFeat.getRange().isPrimitive()) { + switch (uimaFeat.getRange().getName()) { + case CAS.TYPE_NAME_BOOLEAN: + rdfFS.addLiteral(rdfFeat, m.createTypedLiteral(uimaFS.getBooleanValue(uimaFeat), + XSDDatatype.XSDboolean)); + break; + case CAS.TYPE_NAME_BYTE: + rdfFS.addLiteral(rdfFeat, m.createTypedLiteral(uimaFS.getByteValue(uimaFeat), + XSDDatatype.XSDbyte)); + break; + case CAS.TYPE_NAME_DOUBLE: + rdfFS.addLiteral(rdfFeat, m.createTypedLiteral(uimaFS.getDoubleValue(uimaFeat), + XSDDatatype.XSDdouble)); + break; + case CAS.TYPE_NAME_FLOAT: + rdfFS.addLiteral(rdfFeat, m.createTypedLiteral(uimaFS.getFloatValue(uimaFeat), + XSDDatatype.XSDfloat)); + break; + case CAS.TYPE_NAME_INTEGER: + rdfFS.addLiteral(rdfFeat, + m.createTypedLiteral(uimaFS.getIntValue(uimaFeat), XSDDatatype.XSDint)); + break; + case CAS.TYPE_NAME_LONG: + rdfFS.addLiteral(rdfFeat, m.createTypedLiteral(uimaFS.getLongValue(uimaFeat), + XSDDatatype.XSDlong)); + break; + case CAS.TYPE_NAME_SHORT: + rdfFS.addLiteral(rdfFeat, m.createTypedLiteral(uimaFS.getShortValue(uimaFeat), + XSDDatatype.XSDshort)); + break; + case CAS.TYPE_NAME_STRING: { + var s = uimaFS.getStringValue(uimaFeat); + if (s != null) { + if (iriFeatures.contains(uimaFeat.getName())) { + rdfFS.addProperty(rdfFeat, m.createResource(s)); + } + else { + rdfFS.addLiteral(rdfFeat, + m.createTypedLiteral(s, XSDDatatype.XSDstring)); + } + } + break; + } + default: + throw new IllegalArgumentException( + "Feature [" + uimaFeat.getName() + "] has unsupported primitive type [" + + uimaFeat.getRange().getName() + "]"); + } + } + else { + var targetUimaFS = uimaFS.getFeatureValue(uimaFeat); + if (targetUimaFS != null) { + rdfFS.addProperty(rdfFeat, m.createResource(rdfUri(docuri, targetUimaFS))); + } + } + } + } + + private static String rdfUri(String docuri, FeatureStructure uimaFS) + { + return format("%s#%d", docuri, uimaFS.getCAS().getLowLevelCAS().ll_getFSRef(uimaFS)); + } + + private static String rdfFeature(Feature aUimaFeature) + { + return rdfType(aUimaFeature.getDomain()) + "-" + aUimaFeature.getShortName(); + } + + private static String rdfType(Type aUimaType) + { + return RdfCas.NS_UIMA + aUimaType.getName(); + } +} diff --git a/inception/inception-io-rdf/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports b/inception/inception-io-rdf/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports new file mode 100644 index 00000000000..5adf6e7857e --- /dev/null +++ b/inception/inception-io-rdf/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports @@ -0,0 +1 @@ +de.tudarmstadt.ukp.inception.io.rdf.config.RdfFormatAutoConfiguration diff --git a/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java b/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java new file mode 100644 index 00000000000..efebcb2d994 --- /dev/null +++ b/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.rdf; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.Collections.sort; +import static org.apache.commons.lang3.StringUtils.join; +import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine; +import static org.apache.uima.fit.factory.CollectionReaderFactory.createReader; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.contentOf; +import static org.assertj.core.api.Assertions.tuple; +import static org.dkpro.core.testing.IOTestRunner.testOneWay; +import static org.dkpro.core.testing.IOTestRunner.testRoundTrip; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; + +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.uima.fit.factory.JCasFactory; +import org.dkpro.core.io.conll.Conll2006Reader; +import org.dkpro.core.io.conll.Conll2006Writer; +import org.dkpro.core.testing.TestOptions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; +import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity; + +public class RdfWriterTest +{ + @Test + void oneWay() throws Exception + { + testOneWay(Conll2006Reader.class, // the reader + RdfWriter.class, // the writer + "conll/2006/fi-ref.ttl", // the reference file for the output + "conll/2006/fi-orig.conll", // the input file for the test + new TestOptions().resultAssertor(this::assertModelEquals)); + } + + @Test + void otherWay() throws Exception + { + testOneWay(RdfReader.class, // the reader + Conll2006Writer.class, // the writer + "ttl/fi-ref.conll", // the reference file for the output + "ttl/fi-orig.ttl"); // the input file for the test + } + + @Test + void readWriteWithIriFeatures(@TempDir File aTemp) throws Exception + { + var cas = JCasFactory.createJCas(); + cas.setDocumentText("John"); + + var dmd = DocumentMetaData.create(cas); + dmd.setDocumentId("test.txt"); + + var ne = new NamedEntity(cas, 0, 4); + ne.setValue("PER"); + ne.setIdentifier("iri:somewhere"); + ne.addToIndexes(); + + var writer = createEngine( // + RdfWriter.class, // + RdfWriter.PARAM_IRI_FEATURES, + NamedEntity._TypeName + ":" + NamedEntity._FeatName_identifier, + RdfWriter.PARAM_STRIP_EXTENSION, true, // + RdfWriter.PARAM_TARGET_LOCATION, aTemp); + + writer.process(cas); + + var targetFile = new File(aTemp, "test.ttl"); + assertThat(contentOf(targetFile, UTF_8)) // + .contains("ner:NamedEntity-value \"PER\" ;") + .contains("ner:NamedEntity-identifier ;"); + + cas.reset(); + + var reader = createReader( // + RdfReader.class, RdfReader.PARAM_SOURCE_LOCATION, targetFile); + + reader.getNext(cas.getCas()); + + assertThat(cas.select(NamedEntity.class).asList()) // + .extracting(NamedEntity::getIdentifier, NamedEntity::getValue) // + .containsExactly(tuple("iri:somewhere", "PER")); + } + + @Disabled("Currently does not work because IDs are not stable on round-trips") + @Test + void roundTrip() throws Exception + { + testRoundTrip(RdfReader.class, // the reader + RdfWriter.class, // the writer + "ttl/fi-orig.ttl", + // "conll/2006/fi-ref.ttl", + new TestOptions().resultAssertor(this::assertModelEquals)); + } + + private void assertModelEquals(File expected, File actual) + { + var mExpected = ModelFactory.createDefaultModel(); + mExpected.read(expected.toURI().toString(), null, "TURTLE"); + var sExpected = mExpected.listStatements().mapWith(s -> s.toString()).toList(); + sort(sExpected); + + var mActual = ModelFactory.createDefaultModel(); + mActual.read(actual.toURI().toString(), null, "TURTLE"); + var sActual = mActual.listStatements().mapWith(s -> s.toString()).toList(); + sort(sActual); + + assertEquals(join(sExpected, "\n"), join(sActual, "\n")); + } +} diff --git a/inception/inception-io-rdf/src/test/resources/conll/2006/README.txt b/inception/inception-io-rdf/src/test/resources/conll/2006/README.txt new file mode 100644 index 00000000000..930aa8f08ab --- /dev/null +++ b/inception/inception-io-rdf/src/test/resources/conll/2006/README.txt @@ -0,0 +1,7 @@ +fi-orig.conll + + First two sentences from FinnTreeBank 3.1 + http://www.ling.helsinki.fi/kieliteknologia/tutkimus/treebank/sources/ftb3.1.conllx.gz + Creative Commons Attribution 3.0 License + + \ No newline at end of file diff --git a/inception/inception-io-rdf/src/test/resources/conll/2006/fi-orig.conll b/inception/inception-io-rdf/src/test/resources/conll/2006/fi-orig.conll new file mode 100644 index 00000000000..3f324700758 --- /dev/null +++ b/inception/inception-io-rdf/src/test/resources/conll/2006/fi-orig.conll @@ -0,0 +1,38 @@ + +1 NEUVOSTO neuvosto N N N Nom Sg 2 attr _ _ +2 EURATOMIN Euratom N N N Prop Gen Sg 3 attr _ _ +3 HANKINTAKESKUKSEN hankinta#keskus N N N Gen Sg 4 attr _ _ +4 PERUSSÄÄNTÖ perus#sääntö N N N Nom Sg 7 attr _ _ +5 EUROOPAN Eurooppa N N N Prop Gen Sg 6 attr _ _ +6 ATOMIENERGIAYHTEISÖN atomi#energia#yhteisö N N N Gen Sg 7 attr _ _ +7 NEUVOSTO neuvosto N N N Nom Sg 0 main _ _ +8 , , Punct Punct Punct _ _ _ _ +9 joka joka Pron Pron Pron Rel Nom Sg 10 subj _ _ +10 ottaa ottaa V V V Prs Act Sg3 7 mod _ _ +11 huomioon huomio N N N Ill Sg 10 phrv _ _ +12 perustamissopimuksen perustamis#sopimus N N N Gen Sg 14 attr _ _ +13 54 54 Num Num Num Digit Nom Sg 14 attr _ _ +14 artiklan artikla N N N Gen Sg 10 obj _ _ +15 , , Punct Punct Punct 17 phrm _ _ +16 ja ja CC CC CC 17 phrm _ _ +17 ottaa ottaa V V V Prs Act Sg3 10 conjunct _ _ +18 huomioon huomio N N N Ill Sg 17 phrv _ _ +19 komission komissio N N N Gen Sg 20 subj _ _ +20 ehdotuksen ehdotus N N N Gen Sg 17 obj _ _ +21 , , Punct Punct Punct 23 phrm _ _ +22 ON olla V V V Prs Act Sg3 23 aux _ _ +23 PÄÄTTÄNYT päättää PrfPrc PrfPrc PrfPrc Act Pos Nom Sg 17 conjunct _ _ +24 antaa antaa V V V Inf1 Lat 23 obj _ _ +25 Euratomin Euratom N N N Prop Gen Sg 26 attr _ _ +26 hankintakeskuksen hankinta#keskus N N N Gen Sg 27 attr _ _ +27 perussäännön perus#sääntö N N N Gen Sg 24 obj _ _ +28 seuraavasti seuraava Adv Adv Adv Pos Man 24 advl _ _ +29 : : Punct Punct Punct _ _ _ _ + + +1 1 1 Num Num Num Digit Nom Sg 2 attr _ _ +2 artikla artikla N N N Nom Sg 3 attr _ _ +3 Nimi nimi N N N Nom Sg 0 main _ _ +4 ja ja CC CC CC 5 phrm _ _ +5 tarkoitus tarkoitus N N N Nom Sg 3 conjunct _ _ + diff --git a/inception/inception-io-rdf/src/test/resources/conll/2006/fi-ref.ttl b/inception/inception-io-rdf/src/test/resources/conll/2006/fi-ref.ttl new file mode 100644 index 00000000000..3903d343e41 --- /dev/null +++ b/inception/inception-io-rdf/src/test/resources/conll/2006/fi-ref.ttl @@ -0,0 +1,1796 @@ +@prefix anomaly: . +@prefix cas: . +@prefix coref: . +@prefix lexmorph-morph: . +@prefix lexmorph-pos: . +@prefix lexmorph-pos.tweet: . +@prefix metadata: . +@prefix ner: . +@prefix owl: . +@prefix rdf: . +@prefix rdfcas: . +@prefix rdfs: . +@prefix segmentation: . +@prefix semantics: . +@prefix structure: . +@prefix syntax: . +@prefix syntax-chunk: . +@prefix syntax-constituent: . +@prefix syntax-dependency: . +@prefix tcas: . +@prefix transform: . +@prefix xsd: . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "143"^^xsd:int ; + tcas:Annotation-end "144"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "233"^^xsd:int ; + tcas:Annotation-end "245"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "V" ; + lexmorph-pos:POS-coarseValue "V" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "95"^^xsd:int ; + tcas:Annotation-end "100"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "Eurooppa" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "49"^^xsd:int ; + tcas:Annotation-end "57"^^xsd:int . + + + rdf:type cas:Sofa , rdfcas:View ; + cas:Sofa-mimeType "text" ; + cas:Sofa-sofaID "_InitialView" ; + cas:Sofa-sofaNum "1"^^xsd:int ; + cas:Sofa-sofaString "NEUVOSTO EURATOMIN HANKINTAKESKUKSEN PERUSSÄÄNTÖ EUROOPAN ATOMIENERGIAYHTEISÖN NEUVOSTO , joka ottaa huomioon perustamissopimuksen 54 artiklan , ja ottaa huomioon komission ehdotuksen , ON PÄÄTTÄNYT antaa Euratomin hankintakeskuksen perussäännön seuraavasti :\n1 artikla Nimi ja tarkoitus\n" . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "270"^^xsd:int ; + tcas:Annotation-end "274"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "189"^^xsd:int ; + tcas:Annotation-end "198"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "subj" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "163"^^xsd:int ; + tcas:Annotation-end "172"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "V Prs Act Sg3" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "148"^^xsd:int ; + tcas:Annotation-end "153"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "258"^^xsd:int ; + tcas:Annotation-end "259"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "110"^^xsd:int ; + tcas:Annotation-end "130"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "neuvosto" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "79"^^xsd:int ; + tcas:Annotation-end "87"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "278"^^xsd:int ; + tcas:Annotation-end "287"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "205"^^xsd:int ; + tcas:Annotation-end "214"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "obj" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "233"^^xsd:int ; + tcas:Annotation-end "245"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "163"^^xsd:int ; + tcas:Annotation-end "172"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "49"^^xsd:int ; + tcas:Annotation-end "57"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "134"^^xsd:int ; + tcas:Annotation-end "142"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Prop Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "205"^^xsd:int ; + tcas:Annotation-end "214"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "joka" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "90"^^xsd:int ; + tcas:Annotation-end "94"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "37"^^xsd:int ; + tcas:Annotation-end "48"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "artikla" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "262"^^xsd:int ; + tcas:Annotation-end "269"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "184"^^xsd:int ; + tcas:Annotation-end "185"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "obj" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "134"^^xsd:int ; + tcas:Annotation-end "142"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "CC" ; + lexmorph-pos:POS-coarseValue "CC" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "145"^^xsd:int ; + tcas:Annotation-end "147"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "233"^^xsd:int ; + tcas:Annotation-end "245"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "huomio" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "101"^^xsd:int ; + tcas:Annotation-end "109"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "58"^^xsd:int ; + tcas:Annotation-end "78"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "0"^^xsd:int ; + tcas:Annotation-end "8"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "ja" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "275"^^xsd:int ; + tcas:Annotation-end "277"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "PrfPrc Act Pos Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "189"^^xsd:int ; + tcas:Annotation-end "198"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "aux" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "186"^^xsd:int ; + tcas:Annotation-end "188"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "154"^^xsd:int ; + tcas:Annotation-end "162"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "258"^^xsd:int ; + tcas:Annotation-end "259"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "54" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "131"^^xsd:int ; + tcas:Annotation-end "133"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "88"^^xsd:int ; + tcas:Annotation-end "89"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Prop Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "9"^^xsd:int ; + tcas:Annotation-end "18"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "262"^^xsd:int ; + tcas:Annotation-end "269"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "260"^^xsd:int ; + tcas:Annotation-end "261"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "173"^^xsd:int ; + tcas:Annotation-end "183"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "subj" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "90"^^xsd:int ; + tcas:Annotation-end "94"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "," ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "143"^^xsd:int ; + tcas:Annotation-end "144"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "215"^^xsd:int ; + tcas:Annotation-end "232"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "95"^^xsd:int ; + tcas:Annotation-end "100"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "37"^^xsd:int ; + tcas:Annotation-end "48"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "270"^^xsd:int ; + tcas:Annotation-end "274"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "V" ; + lexmorph-pos:POS-coarseValue "V" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "186"^^xsd:int ; + tcas:Annotation-end "188"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "conjunct" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "148"^^xsd:int ; + tcas:Annotation-end "153"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "ottaa" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "148"^^xsd:int ; + tcas:Annotation-end "153"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Adv" ; + lexmorph-pos:POS-coarseValue "Adv" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "246"^^xsd:int ; + tcas:Annotation-end "257"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "110"^^xsd:int ; + tcas:Annotation-end "130"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "58"^^xsd:int ; + tcas:Annotation-end "78"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "Euratom" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "9"^^xsd:int ; + tcas:Annotation-end "18"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "278"^^xsd:int ; + tcas:Annotation-end "287"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "V" ; + lexmorph-pos:POS-coarseValue "V" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "199"^^xsd:int ; + tcas:Annotation-end "204"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "205"^^xsd:int ; + tcas:Annotation-end "214"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "komissio" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "163"^^xsd:int ; + tcas:Annotation-end "172"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "19"^^xsd:int ; + tcas:Annotation-end "36"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "134"^^xsd:int ; + tcas:Annotation-end "142"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "Euratom" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "205"^^xsd:int ; + tcas:Annotation-end "214"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "88"^^xsd:int ; + tcas:Annotation-end "89"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "19"^^xsd:int ; + tcas:Annotation-end "36"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "conjunct" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "278"^^xsd:int ; + tcas:Annotation-end "287"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Num Digit Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "260"^^xsd:int ; + tcas:Annotation-end "261"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "," ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "184"^^xsd:int ; + tcas:Annotation-end "185"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "110"^^xsd:int ; + tcas:Annotation-end "130"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "145"^^xsd:int ; + tcas:Annotation-end "147"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "perus#sääntö" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "233"^^xsd:int ; + tcas:Annotation-end "245"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "V Prs Act Sg3" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "95"^^xsd:int ; + tcas:Annotation-end "100"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "49"^^xsd:int ; + tcas:Annotation-end "57"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "0"^^xsd:int ; + tcas:Annotation-end "8"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "270"^^xsd:int ; + tcas:Annotation-end "274"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "päättää" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "189"^^xsd:int ; + tcas:Annotation-end "198"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "obj" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "173"^^xsd:int ; + tcas:Annotation-end "183"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "154"^^xsd:int ; + tcas:Annotation-end "162"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value ":" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "258"^^xsd:int ; + tcas:Annotation-end "259"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "110"^^xsd:int ; + tcas:Annotation-end "130"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "79"^^xsd:int ; + tcas:Annotation-end "87"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "278"^^xsd:int ; + tcas:Annotation-end "287"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "advl" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "246"^^xsd:int ; + tcas:Annotation-end "257"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "173"^^xsd:int ; + tcas:Annotation-end "183"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "58"^^xsd:int ; + tcas:Annotation-end "78"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "134"^^xsd:int ; + tcas:Annotation-end "142"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "215"^^xsd:int ; + tcas:Annotation-end "232"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Pron" ; + lexmorph-pos:POS-coarseValue "Pron" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "90"^^xsd:int ; + tcas:Annotation-end "94"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "perus#sääntö" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "37"^^xsd:int ; + tcas:Annotation-end "48"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "262"^^xsd:int ; + tcas:Annotation-end "269"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "186"^^xsd:int ; + tcas:Annotation-end "188"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "phrm" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "143"^^xsd:int ; + tcas:Annotation-end "144"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "CC" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "145"^^xsd:int ; + tcas:Annotation-end "147"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "246"^^xsd:int ; + tcas:Annotation-end "257"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "101"^^xsd:int ; + tcas:Annotation-end "109"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "atomi#energia#yhteisö" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "58"^^xsd:int ; + tcas:Annotation-end "78"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "0"^^xsd:int ; + tcas:Annotation-end "8"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "CC" ; + lexmorph-pos:POS-coarseValue "CC" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "275"^^xsd:int ; + tcas:Annotation-end "277"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "199"^^xsd:int ; + tcas:Annotation-end "204"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "conjunct" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "189"^^xsd:int ; + tcas:Annotation-end "198"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Ill Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "154"^^xsd:int ; + tcas:Annotation-end "162"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "0"^^xsd:int ; + tcas:Annotation-end "8"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Num" ; + lexmorph-pos:POS-coarseValue "Num" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "131"^^xsd:int ; + tcas:Annotation-end "133"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "," ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "88"^^xsd:int ; + tcas:Annotation-end "89"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "19"^^xsd:int ; + tcas:Annotation-end "36"^^xsd:int . + + + rdf:type syntax-dependency:ROOT , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "main" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "270"^^xsd:int ; + tcas:Annotation-end "274"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "1" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "260"^^xsd:int ; + tcas:Annotation-end "261"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "173"^^xsd:int ; + tcas:Annotation-end "183"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "mod" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "95"^^xsd:int ; + tcas:Annotation-end "100"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Punct" ; + lexmorph-pos:POS-coarseValue "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "143"^^xsd:int ; + tcas:Annotation-end "144"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "215"^^xsd:int ; + tcas:Annotation-end "232"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "ottaa" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "95"^^xsd:int ; + tcas:Annotation-end "100"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "49"^^xsd:int ; + tcas:Annotation-end "57"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , metadata:DocumentMetaData ; + rdfcas:indexedIn ; + metadata:DocumentMetaData-documentId + "fi-orig.conll" ; + metadata:DocumentMetaData-documentTitle + "fi-orig.conll" ; + metadata:DocumentMetaData-isLastSegment + false ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "0"^^xsd:int ; + tcas:Annotation-end "288"^^xsd:int ; + tcas:DocumentAnnotation-language + "x-unspecified" . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "nimi" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "270"^^xsd:int ; + tcas:Annotation-end "274"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "V Prs Act Sg3" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "186"^^xsd:int ; + tcas:Annotation-end "188"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "phrv" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "154"^^xsd:int ; + tcas:Annotation-end "162"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "V" ; + lexmorph-pos:POS-coarseValue "V" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "148"^^xsd:int ; + tcas:Annotation-end "153"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Adv Pos Man" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "246"^^xsd:int ; + tcas:Annotation-end "257"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "perustamis#sopimus" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "110"^^xsd:int ; + tcas:Annotation-end "130"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "79"^^xsd:int ; + tcas:Annotation-end "87"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "9"^^xsd:int ; + tcas:Annotation-end "18"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "tarkoitus" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "278"^^xsd:int ; + tcas:Annotation-end "287"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "V Inf1 Lat" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "199"^^xsd:int ; + tcas:Annotation-end "204"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "215"^^xsd:int ; + tcas:Annotation-end "232"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "163"^^xsd:int ; + tcas:Annotation-end "172"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "37"^^xsd:int ; + tcas:Annotation-end "48"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "artikla" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "134"^^xsd:int ; + tcas:Annotation-end "142"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "205"^^xsd:int ; + tcas:Annotation-end "214"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "90"^^xsd:int ; + tcas:Annotation-end "94"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "19"^^xsd:int ; + tcas:Annotation-end "36"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Sentence ; + rdfcas:indexedIn ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "260"^^xsd:int ; + tcas:Annotation-end "287"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "262"^^xsd:int ; + tcas:Annotation-end "269"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Punct" ; + lexmorph-pos:POS-coarseValue "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "184"^^xsd:int ; + tcas:Annotation-end "185"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "131"^^xsd:int ; + tcas:Annotation-end "133"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "ja" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "145"^^xsd:int ; + tcas:Annotation-end "147"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "233"^^xsd:int ; + tcas:Annotation-end "245"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "101"^^xsd:int ; + tcas:Annotation-end "109"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Prop Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "49"^^xsd:int ; + tcas:Annotation-end "57"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "neuvosto" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "0"^^xsd:int ; + tcas:Annotation-end "8"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "275"^^xsd:int ; + tcas:Annotation-end "277"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "PrfPrc" ; + lexmorph-pos:POS-coarseValue "PrfPrc" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "189"^^xsd:int ; + tcas:Annotation-end "198"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "phrm" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "184"^^xsd:int ; + tcas:Annotation-end "185"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "huomio" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "154"^^xsd:int ; + tcas:Annotation-end "162"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Punct" ; + lexmorph-pos:POS-coarseValue "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "258"^^xsd:int ; + tcas:Annotation-end "259"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "131"^^xsd:int ; + tcas:Annotation-end "133"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "79"^^xsd:int ; + tcas:Annotation-end "87"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "260"^^xsd:int ; + tcas:Annotation-end "261"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Sentence ; + rdfcas:indexedIn ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "0"^^xsd:int ; + tcas:Annotation-end "259"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "ehdotus" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "173"^^xsd:int ; + tcas:Annotation-end "183"^^xsd:int . + + + rdf:type syntax-dependency:ROOT , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "main" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "79"^^xsd:int ; + tcas:Annotation-end "87"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "143"^^xsd:int ; + tcas:Annotation-end "144"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "hankinta#keskus" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "215"^^xsd:int ; + tcas:Annotation-end "232"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Pron Rel Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "90"^^xsd:int ; + tcas:Annotation-end "94"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "37"^^xsd:int ; + tcas:Annotation-end "48"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "262"^^xsd:int ; + tcas:Annotation-end "269"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "olla" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "186"^^xsd:int ; + tcas:Annotation-end "188"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "phrm" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "145"^^xsd:int ; + tcas:Annotation-end "147"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "148"^^xsd:int ; + tcas:Annotation-end "153"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "seuraava" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "246"^^xsd:int ; + tcas:Annotation-end "257"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Ill Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "101"^^xsd:int ; + tcas:Annotation-end "109"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "58"^^xsd:int ; + tcas:Annotation-end "78"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "9"^^xsd:int ; + tcas:Annotation-end "18"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "CC" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "275"^^xsd:int ; + tcas:Annotation-end "277"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "antaa" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "199"^^xsd:int ; + tcas:Annotation-end "204"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "obj" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "199"^^xsd:int ; + tcas:Annotation-end "204"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "163"^^xsd:int ; + tcas:Annotation-end "172"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "9"^^xsd:int ; + tcas:Annotation-end "18"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Num Digit Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "131"^^xsd:int ; + tcas:Annotation-end "133"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Punct" ; + lexmorph-pos:POS-coarseValue "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "88"^^xsd:int ; + tcas:Annotation-end "89"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "hankinta#keskus" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "19"^^xsd:int ; + tcas:Annotation-end "36"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "phrm" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "275"^^xsd:int ; + tcas:Annotation-end "277"^^xsd:int . + + + rdf:type rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Num" ; + lexmorph-pos:POS-coarseValue "Num" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "260"^^xsd:int ; + tcas:Annotation-end "261"^^xsd:int . + + + rdf:type segmentation:Token , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-order "0"^^xsd:int ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "184"^^xsd:int ; + tcas:Annotation-end "185"^^xsd:int . + + + rdf:type syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "phrv" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "101"^^xsd:int ; + tcas:Annotation-end "109"^^xsd:int . diff --git a/inception/inception-io-rdf/src/test/resources/log4j2-test.xml b/inception/inception-io-rdf/src/test/resources/log4j2-test.xml new file mode 100644 index 00000000000..7ea9011d8a4 --- /dev/null +++ b/inception/inception-io-rdf/src/test/resources/log4j2-test.xml @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/inception/inception-io-rdf/src/test/resources/ttl/fi-orig.ttl b/inception/inception-io-rdf/src/test/resources/ttl/fi-orig.ttl new file mode 100644 index 00000000000..29faad42e48 --- /dev/null +++ b/inception/inception-io-rdf/src/test/resources/ttl/fi-orig.ttl @@ -0,0 +1,1760 @@ +@prefix coref: . +@prefix syntax-dependency: . +@prefix syntax-chunk: . +@prefix metadata: . +@prefix lexmorph-pos: . +@prefix anomaly: . +@prefix owl: . +@prefix lexmorph-morph: . +@prefix xsd: . +@prefix rdfs: . +@prefix semantics: . +@prefix lexmorph-pos.tweet: . +@prefix syntax-constituent: . +@prefix cas: . +@prefix rdf: . +@prefix tcas: . +@prefix rdfcas: . +@prefix syntax: . +@prefix segmentation: . +@prefix ner: . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "262"^^xsd:int ; + tcas:Annotation-end "269"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "79"^^xsd:int ; + tcas:Annotation-end "87"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "258"^^xsd:int ; + tcas:Annotation-end "259"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "phrv" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "101"^^xsd:int ; + tcas:Annotation-end "109"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "ja" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "145"^^xsd:int ; + tcas:Annotation-end "147"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "199"^^xsd:int ; + tcas:Annotation-end "204"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "huomio" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "101"^^xsd:int ; + tcas:Annotation-end "109"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "Euratom" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "9"^^xsd:int ; + tcas:Annotation-end "18"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "V" ; + lexmorph-pos:POS-coarseValue "V" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "186"^^xsd:int ; + tcas:Annotation-end "188"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "neuvosto" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "0"^^xsd:int ; + tcas:Annotation-end "8"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "9"^^xsd:int ; + tcas:Annotation-end "18"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "260"^^xsd:int ; + tcas:Annotation-end "261"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value ":" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "258"^^xsd:int ; + tcas:Annotation-end "259"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "obj" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "173"^^xsd:int ; + tcas:Annotation-end "183"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "päättää" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "189"^^xsd:int ; + tcas:Annotation-end "198"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Prop Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "49"^^xsd:int ; + tcas:Annotation-end "57"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "19"^^xsd:int ; + tcas:Annotation-end "36"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "270"^^xsd:int ; + tcas:Annotation-end "274"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "49"^^xsd:int ; + tcas:Annotation-end "57"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Pron" ; + lexmorph-pos:POS-coarseValue "Pron" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "90"^^xsd:int ; + tcas:Annotation-end "94"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "148"^^xsd:int ; + tcas:Annotation-end "153"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "88"^^xsd:int ; + tcas:Annotation-end "89"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "215"^^xsd:int ; + tcas:Annotation-end "232"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "olla" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "186"^^xsd:int ; + tcas:Annotation-end "188"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "184"^^xsd:int ; + tcas:Annotation-end "185"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "95"^^xsd:int ; + tcas:Annotation-end "100"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "134"^^xsd:int ; + tcas:Annotation-end "142"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "1" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "260"^^xsd:int ; + tcas:Annotation-end "261"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "PrfPrc" ; + lexmorph-pos:POS-coarseValue "PrfPrc" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "189"^^xsd:int ; + tcas:Annotation-end "198"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "101"^^xsd:int ; + tcas:Annotation-end "109"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "," ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "88"^^xsd:int ; + tcas:Annotation-end "89"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Adv" ; + lexmorph-pos:POS-coarseValue "Adv" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "246"^^xsd:int ; + tcas:Annotation-end "257"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "189"^^xsd:int ; + tcas:Annotation-end "198"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "215"^^xsd:int ; + tcas:Annotation-end "232"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "173"^^xsd:int ; + tcas:Annotation-end "183"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "205"^^xsd:int ; + tcas:Annotation-end "214"^^xsd:int . + + + a rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "aux" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "186"^^xsd:int ; + tcas:Annotation-end "188"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "antaa" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "199"^^xsd:int ; + tcas:Annotation-end "204"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "Eurooppa" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "49"^^xsd:int ; + tcas:Annotation-end "57"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "phrm" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "184"^^xsd:int ; + tcas:Annotation-end "185"^^xsd:int . + + + a rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "37"^^xsd:int ; + tcas:Annotation-end "48"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "58"^^xsd:int ; + tcas:Annotation-end "78"^^xsd:int . + + + a rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "0"^^xsd:int ; + tcas:Annotation-end "8"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "0"^^xsd:int ; + tcas:Annotation-end "8"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "49"^^xsd:int ; + tcas:Annotation-end "57"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "ehdotus" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "173"^^xsd:int ; + tcas:Annotation-end "183"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "49"^^xsd:int ; + tcas:Annotation-end "57"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "110"^^xsd:int ; + tcas:Annotation-end "130"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "ja" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "275"^^xsd:int ; + tcas:Annotation-end "277"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "233"^^xsd:int ; + tcas:Annotation-end "245"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Punct" ; + lexmorph-pos:POS-coarseValue "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "88"^^xsd:int ; + tcas:Annotation-end "89"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "," ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "143"^^xsd:int ; + tcas:Annotation-end "144"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "9"^^xsd:int ; + tcas:Annotation-end "18"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Sentence ; + rdfcas:indexedIn ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "0"^^xsd:int ; + tcas:Annotation-end "259"^^xsd:int . + + + a rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "obj" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "233"^^xsd:int ; + tcas:Annotation-end "245"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "komissio" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "163"^^xsd:int ; + tcas:Annotation-end "172"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "186"^^xsd:int ; + tcas:Annotation-end "188"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Prop Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "205"^^xsd:int ; + tcas:Annotation-end "214"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "hankinta#keskus" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "19"^^xsd:int ; + tcas:Annotation-end "36"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "258"^^xsd:int ; + tcas:Annotation-end "259"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "V Inf1 Lat" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "199"^^xsd:int ; + tcas:Annotation-end "204"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "173"^^xsd:int ; + tcas:Annotation-end "183"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Num" ; + lexmorph-pos:POS-coarseValue "Num" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "131"^^xsd:int ; + tcas:Annotation-end "133"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "subj" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "163"^^xsd:int ; + tcas:Annotation-end "172"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "conjunct" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "148"^^xsd:int ; + tcas:Annotation-end "153"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "19"^^xsd:int ; + tcas:Annotation-end "36"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "58"^^xsd:int ; + tcas:Annotation-end "78"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "37"^^xsd:int ; + tcas:Annotation-end "48"^^xsd:int . + + + a metadata:DocumentMetaData , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + metadata:DocumentMetaData-documentId + "fi-orig.conll" ; + metadata:DocumentMetaData-documentTitle + "fi-orig.conll" ; + metadata:DocumentMetaData-isLastSegment + false ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "0"^^xsd:int ; + tcas:Annotation-end "288"^^xsd:int ; + tcas:DocumentAnnotation-language + "x-unspecified" . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "V Prs Act Sg3" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "95"^^xsd:int ; + tcas:Annotation-end "100"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Ill Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "101"^^xsd:int ; + tcas:Annotation-end "109"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "huomio" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "154"^^xsd:int ; + tcas:Annotation-end "162"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "V" ; + lexmorph-pos:POS-coarseValue "V" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "199"^^xsd:int ; + tcas:Annotation-end "204"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "joka" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "90"^^xsd:int ; + tcas:Annotation-end "94"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "131"^^xsd:int ; + tcas:Annotation-end "133"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "CC" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "145"^^xsd:int ; + tcas:Annotation-end "147"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "90"^^xsd:int ; + tcas:Annotation-end "94"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "hankinta#keskus" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "215"^^xsd:int ; + tcas:Annotation-end "232"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "0"^^xsd:int ; + tcas:Annotation-end "8"^^xsd:int . + + + a rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "205"^^xsd:int ; + tcas:Annotation-end "214"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "19"^^xsd:int ; + tcas:Annotation-end "36"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "0"^^xsd:int ; + tcas:Annotation-end "8"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "V" ; + lexmorph-pos:POS-coarseValue "V" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "95"^^xsd:int ; + tcas:Annotation-end "100"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "88"^^xsd:int ; + tcas:Annotation-end "89"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "58"^^xsd:int ; + tcas:Annotation-end "78"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "nimi" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "270"^^xsd:int ; + tcas:Annotation-end "274"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "154"^^xsd:int ; + tcas:Annotation-end "162"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Ill Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "154"^^xsd:int ; + tcas:Annotation-end "162"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "ottaa" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "95"^^xsd:int ; + tcas:Annotation-end "100"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "perus#sääntö" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "233"^^xsd:int ; + tcas:Annotation-end "245"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "163"^^xsd:int ; + tcas:Annotation-end "172"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "54" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "131"^^xsd:int ; + tcas:Annotation-end "133"^^xsd:int . + + + a rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "262"^^xsd:int ; + tcas:Annotation-end "269"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "233"^^xsd:int ; + tcas:Annotation-end "245"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "seuraava" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "246"^^xsd:int ; + tcas:Annotation-end "257"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "ottaa" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "148"^^xsd:int ; + tcas:Annotation-end "153"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "143"^^xsd:int ; + tcas:Annotation-end "144"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Punct" ; + lexmorph-pos:POS-coarseValue "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "184"^^xsd:int ; + tcas:Annotation-end "185"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "270"^^xsd:int ; + tcas:Annotation-end "274"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "phrm" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "275"^^xsd:int ; + tcas:Annotation-end "277"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "CC" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "275"^^xsd:int ; + tcas:Annotation-end "277"^^xsd:int . + + + a cas:Sofa , rdfcas:View ; + cas:Sofa-mimeType "text" ; + cas:Sofa-sofaID "_InitialView" ; + cas:Sofa-sofaNum "1"^^xsd:int ; + cas:Sofa-sofaString "NEUVOSTO EURATOMIN HANKINTAKESKUKSEN PERUSSÄÄNTÖ EUROOPAN ATOMIENERGIAYHTEISÖN NEUVOSTO , joka ottaa huomioon perustamissopimuksen 54 artiklan , ja ottaa huomioon komission ehdotuksen , ON PÄÄTTÄNYT antaa Euratomin hankintakeskuksen perussäännön seuraavasti :\n1 artikla Nimi ja tarkoitus\n" . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "163"^^xsd:int ; + tcas:Annotation-end "172"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "perustamis#sopimus" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "110"^^xsd:int ; + tcas:Annotation-end "130"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "134"^^xsd:int ; + tcas:Annotation-end "142"^^xsd:int . + + + a rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "110"^^xsd:int ; + tcas:Annotation-end "130"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "275"^^xsd:int ; + tcas:Annotation-end "277"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Adv Pos Man" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "246"^^xsd:int ; + tcas:Annotation-end "257"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "phrm" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "143"^^xsd:int ; + tcas:Annotation-end "144"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "101"^^xsd:int ; + tcas:Annotation-end "109"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "145"^^xsd:int ; + tcas:Annotation-end "147"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "278"^^xsd:int ; + tcas:Annotation-end "287"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Punct" ; + lexmorph-pos:POS-coarseValue "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "258"^^xsd:int ; + tcas:Annotation-end "259"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Sentence ; + rdfcas:indexedIn ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "260"^^xsd:int ; + tcas:Annotation-end "287"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "37"^^xsd:int ; + tcas:Annotation-end "48"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "184"^^xsd:int ; + tcas:Annotation-end "185"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Num Digit Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "131"^^xsd:int ; + tcas:Annotation-end "133"^^xsd:int . + + + a rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "215"^^xsd:int ; + tcas:Annotation-end "232"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "PrfPrc Act Pos Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "189"^^xsd:int ; + tcas:Annotation-end "198"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "163"^^xsd:int ; + tcas:Annotation-end "172"^^xsd:int . + + + a rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "obj" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "199"^^xsd:int ; + tcas:Annotation-end "204"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "artikla" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "134"^^xsd:int ; + tcas:Annotation-end "142"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "," ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "184"^^xsd:int ; + tcas:Annotation-end "185"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "79"^^xsd:int ; + tcas:Annotation-end "87"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "262"^^xsd:int ; + tcas:Annotation-end "269"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "advl" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "246"^^xsd:int ; + tcas:Annotation-end "257"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Num" ; + lexmorph-pos:POS-coarseValue "Num" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "260"^^xsd:int ; + tcas:Annotation-end "261"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "tarkoitus" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "278"^^xsd:int ; + tcas:Annotation-end "287"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "CC" ; + lexmorph-pos:POS-coarseValue "CC" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "275"^^xsd:int ; + tcas:Annotation-end "277"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "233"^^xsd:int ; + tcas:Annotation-end "245"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "143"^^xsd:int ; + tcas:Annotation-end "144"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Pron Rel Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "90"^^xsd:int ; + tcas:Annotation-end "94"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "262"^^xsd:int ; + tcas:Annotation-end "269"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "278"^^xsd:int ; + tcas:Annotation-end "287"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "58"^^xsd:int ; + tcas:Annotation-end "78"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "215"^^xsd:int ; + tcas:Annotation-end "232"^^xsd:int . + + + a rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "phrm" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "145"^^xsd:int ; + tcas:Annotation-end "147"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "19"^^xsd:int ; + tcas:Annotation-end "36"^^xsd:int . + + + a rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "obj" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "134"^^xsd:int ; + tcas:Annotation-end "142"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "conjunct" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "189"^^xsd:int ; + tcas:Annotation-end "198"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "173"^^xsd:int ; + tcas:Annotation-end "183"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "CC" ; + lexmorph-pos:POS-coarseValue "CC" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "145"^^xsd:int ; + tcas:Annotation-end "147"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "278"^^xsd:int ; + tcas:Annotation-end "287"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "110"^^xsd:int ; + tcas:Annotation-end "130"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "246"^^xsd:int ; + tcas:Annotation-end "257"^^xsd:int . + + + a syntax-dependency:ROOT , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "main" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "270"^^xsd:int ; + tcas:Annotation-end "274"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "79"^^xsd:int ; + tcas:Annotation-end "87"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "Num Digit Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "260"^^xsd:int ; + tcas:Annotation-end "261"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "atomi#energia#yhteisö" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "58"^^xsd:int ; + tcas:Annotation-end "78"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "270"^^xsd:int ; + tcas:Annotation-end "274"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "neuvosto" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "79"^^xsd:int ; + tcas:Annotation-end "87"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "V Prs Act Sg3" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "186"^^xsd:int ; + tcas:Annotation-end "188"^^xsd:int . + + + a rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "conjunct" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "278"^^xsd:int ; + tcas:Annotation-end "287"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "134"^^xsd:int ; + tcas:Annotation-end "142"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "260"^^xsd:int ; + tcas:Annotation-end "261"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "154"^^xsd:int ; + tcas:Annotation-end "162"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Prop Gen Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "9"^^xsd:int ; + tcas:Annotation-end "18"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "205"^^xsd:int ; + tcas:Annotation-end "214"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "131"^^xsd:int ; + tcas:Annotation-end "133"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "subj" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "90"^^xsd:int ; + tcas:Annotation-end "94"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "V" ; + lexmorph-pos:POS-coarseValue "V" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "148"^^xsd:int ; + tcas:Annotation-end "153"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "mod" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "95"^^xsd:int ; + tcas:Annotation-end "100"^^xsd:int . + + + a rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "attr" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "9"^^xsd:int ; + tcas:Annotation-end "18"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "perus#sääntö" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "37"^^xsd:int ; + tcas:Annotation-end "48"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "artikla" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "262"^^xsd:int ; + tcas:Annotation-end "269"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "N Nom Sg" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "37"^^xsd:int ; + tcas:Annotation-end "48"^^xsd:int . + + + a rdfcas:FeatureStructure , syntax-dependency:ROOT ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "main" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "79"^^xsd:int ; + tcas:Annotation-end "87"^^xsd:int . + + + a rdfcas:FeatureStructure , segmentation:Lemma ; + rdfcas:indexedIn ; + segmentation:Lemma-value "Euratom" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "205"^^xsd:int ; + tcas:Annotation-end "214"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-morph:MorphologicalFeatures ; + rdfcas:indexedIn ; + lexmorph-morph:MorphologicalFeatures-value + "V Prs Act Sg3" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "148"^^xsd:int ; + tcas:Annotation-end "153"^^xsd:int . + + + a rdfcas:FeatureStructure , syntax-dependency:Dependency ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "phrv" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "154"^^xsd:int ; + tcas:Annotation-end "162"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "N" ; + lexmorph-pos:POS-coarseValue "N" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "110"^^xsd:int ; + tcas:Annotation-end "130"^^xsd:int . + + + a rdfcas:FeatureStructure , lexmorph-pos:POS ; + rdfcas:indexedIn ; + lexmorph-pos:POS-PosValue "Punct" ; + lexmorph-pos:POS-coarseValue "Punct" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "143"^^xsd:int ; + tcas:Annotation-end "144"^^xsd:int . diff --git a/inception/inception-io-rdf/src/test/resources/ttl/fi-ref.conll b/inception/inception-io-rdf/src/test/resources/ttl/fi-ref.conll new file mode 100644 index 00000000000..813c864e3c7 --- /dev/null +++ b/inception/inception-io-rdf/src/test/resources/ttl/fi-ref.conll @@ -0,0 +1,36 @@ +1 NEUVOSTO neuvosto N N N Nom Sg 2 attr _ _ +2 EURATOMIN Euratom N N N Prop Gen Sg 3 attr _ _ +3 HANKINTAKESKUKSEN hankinta#keskus N N N Gen Sg 4 attr _ _ +4 PERUSSÄÄNTÖ perus#sääntö N N N Nom Sg 7 attr _ _ +5 EUROOPAN Eurooppa N N N Prop Gen Sg 6 attr _ _ +6 ATOMIENERGIAYHTEISÖN atomi#energia#yhteisö N N N Gen Sg 7 attr _ _ +7 NEUVOSTO neuvosto N N N Nom Sg 0 main _ _ +8 , , Punct Punct Punct _ _ _ _ +9 joka joka Pron Pron Pron Rel Nom Sg 10 subj _ _ +10 ottaa ottaa V V V Prs Act Sg3 7 mod _ _ +11 huomioon huomio N N N Ill Sg 10 phrv _ _ +12 perustamissopimuksen perustamis#sopimus N N N Gen Sg 14 attr _ _ +13 54 54 Num Num Num Digit Nom Sg 14 attr _ _ +14 artiklan artikla N N N Gen Sg 10 obj _ _ +15 , , Punct Punct Punct 17 phrm _ _ +16 ja ja CC CC CC 17 phrm _ _ +17 ottaa ottaa V V V Prs Act Sg3 10 conjunct _ _ +18 huomioon huomio N N N Ill Sg 17 phrv _ _ +19 komission komissio N N N Gen Sg 20 subj _ _ +20 ehdotuksen ehdotus N N N Gen Sg 17 obj _ _ +21 , , Punct Punct Punct 23 phrm _ _ +22 ON olla V V V Prs Act Sg3 23 aux _ _ +23 PÄÄTTÄNYT päättää PrfPrc PrfPrc PrfPrc Act Pos Nom Sg 17 conjunct _ _ +24 antaa antaa V V V Inf1 Lat 23 obj _ _ +25 Euratomin Euratom N N N Prop Gen Sg 26 attr _ _ +26 hankintakeskuksen hankinta#keskus N N N Gen Sg 27 attr _ _ +27 perussäännön perus#sääntö N N N Gen Sg 24 obj _ _ +28 seuraavasti seuraava Adv Adv Adv Pos Man 24 advl _ _ +29 : : Punct Punct Punct _ _ _ _ + +1 1 1 Num Num Num Digit Nom Sg 2 attr _ _ +2 artikla artikla N N N Nom Sg 3 attr _ _ +3 Nimi nimi N N N Nom Sg 0 main _ _ +4 ja ja CC CC CC 5 phrm _ _ +5 tarkoitus tarkoitus N N N Nom Sg 3 conjunct _ _ + diff --git a/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureSupport.java b/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureSupport.java index 800e842c9ab..c912ed7d573 100644 --- a/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureSupport.java +++ b/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureSupport.java @@ -18,10 +18,10 @@ package de.tudarmstadt.ukp.inception.ui.kb.feature; import static java.util.Arrays.asList; +import static java.util.Collections.emptyList; import static org.apache.commons.lang3.StringUtils.isNotBlank; import java.io.IOException; -import java.util.Collections; import java.util.List; import java.util.Optional; @@ -135,7 +135,7 @@ public String renderFeatureValue(AnnotationFeature aFeature, String aIdentifier) return null; } - ConceptFeatureTraits traits = readTraits(aFeature); + var traits = readTraits(aFeature); return getConceptHandle(aFeature, aIdentifier, traits).getUiLabel(); } @@ -172,9 +172,9 @@ public KBHandle wrapFeatureValue(AnnotationFeature aFeature, CAS aCAS, Object aV } if (aValue instanceof String) { - String identifier = (String) aValue; - ConceptFeatureTraits traits = readTraits(aFeature); - KBHandle chbk = getConceptHandle(aFeature, identifier, traits); + var identifier = (String) aValue; + var traits = readTraits(aFeature); + var chbk = getConceptHandle(aFeature, identifier, traits); // Clone the cached original so we can override the KB var clone = new KBHandle(chbk); clone.setKB(chbk.getKB()); @@ -196,25 +196,21 @@ public FeatureEditor createEditor(String aId, MarkupContainer aOwner, AnnotationActionHandler aHandler, IModel aStateModel, IModel aFeatureStateModel) { - AnnotationFeature feature = aFeatureStateModel.getObject().feature; - FeatureEditor editor; + var feature = aFeatureStateModel.getObject().feature; switch (feature.getMultiValueMode()) { case NONE: if (feature.getType().startsWith(PREFIX)) { - editor = new ConceptFeatureEditor(aId, aOwner, aFeatureStateModel, aStateModel, + return new ConceptFeatureEditor(aId, aOwner, aFeatureStateModel, aStateModel, aHandler); } else { throw unsupportedMultiValueModeException(feature); } - break; case ARRAY: // fall-through default: throw unsupportedMultiValueModeException(feature); } - - return editor; } @Override @@ -270,8 +266,7 @@ public void generateFeature(TypeSystemDescription aTSD, TypeDescription aTD, @Override public List lookupLazyDetails(AnnotationFeature aFeature, Object aValue) { - if (aValue instanceof KBHandle) { - var handle = (KBHandle) aValue; + if (aValue instanceof KBHandle handle) { var result = new VLazyDetailGroup(handle.getIdentifier()); result.addDetail(new VLazyDetail("Label", handle.getUiLabel())); @@ -282,13 +277,13 @@ public List lookupLazyDetails(AnnotationFeature aFeature, Obje return asList(result); } - return Collections.emptyList(); + return emptyList(); } @Override public boolean suppressAutoFocus(AnnotationFeature aFeature) { - ConceptFeatureTraits traits = readTraits(aFeature); + var traits = readTraits(aFeature); return !traits.getKeyBindings().isEmpty(); } } diff --git a/inception/pom.xml b/inception/pom.xml index bd042465b1b..429f8698408 100644 --- a/inception/pom.xml +++ b/inception/pom.xml @@ -210,6 +210,7 @@ inception-io-perseus inception-io-imscwb inception-io-intertext + inception-io-rdf inception-io-tcf inception-io-tei inception-io-text From 3ea91a11c09d134f717374ffaac07ef6338c2dd0 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Thu, 29 Feb 2024 07:51:49 +0100 Subject: [PATCH 2/3] #4567 - Add support for a generic CAS RDF export format - Towards migrating the code from Jena to RDF4J to avoid an extra dependency (ok, we still have Jena for NIF... but still) --- inception/inception-io-rdf/pom.xml | 20 +++- .../ukp/inception/io/rdf/RdfReader.java | 36 ++++--- .../ukp/inception/io/rdf/RdfWriter.java | 16 +-- .../inception/io/rdf/internal/Rdf2Uima.java | 102 +++++++++--------- .../inception/io/rdf/internal/Uima2Rdf.java | 75 ++++++------- .../ukp/inception/io/rdf/RdfWriterTest.java | 38 ++++--- .../schema/exporters/LayerExporter.java | 2 + 7 files changed, 157 insertions(+), 132 deletions(-) diff --git a/inception/inception-io-rdf/pom.xml b/inception/inception-io-rdf/pom.xml index 2d0857c1aed..b813aa68631 100644 --- a/inception/inception-io-rdf/pom.xml +++ b/inception/inception-io-rdf/pom.xml @@ -50,12 +50,24 @@
- org.apache.jena - jena-core + org.eclipse.rdf4j + rdf4j-model - org.apache.jena - jena-arq + org.eclipse.rdf4j + rdf4j-model-api + + + org.eclipse.rdf4j + rdf4j-rio-rdfxml + + + org.eclipse.rdf4j + rdf4j-rio-ntriples + + + org.eclipse.rdf4j + rdf4j-model-vocabulary diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java index 20113eaf3b7..6cbbd658944 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java @@ -18,13 +18,11 @@ package de.tudarmstadt.ukp.inception.io.rdf; import static org.dkpro.core.api.resources.CompressionUtils.getInputStream; +import static org.eclipse.rdf4j.rio.RDFFormat.RDFXML; + import java.io.IOException; -import org.apache.jena.rdf.model.Model; -import org.apache.jena.rdf.model.ModelFactory; -import org.apache.jena.rdf.model.StmtIterator; -import org.apache.jena.riot.RDFDataMgr; -import org.apache.jena.riot.RDFLanguages; -import org.apache.jena.vocabulary.RDF; +import java.util.Iterator; + import org.apache.uima.UimaContext; import org.apache.uima.cas.CASException; import org.apache.uima.collection.CollectionException; @@ -35,6 +33,13 @@ import org.dkpro.core.api.io.JCasResourceCollectionReader_ImplBase; import org.dkpro.core.api.parameter.MimeTypes; import org.dkpro.core.api.resources.CompressionUtils; +import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.vocabulary.RDF; +import org.eclipse.rdf4j.rio.Rio; + import de.tudarmstadt.ukp.inception.io.rdf.internal.Rdf2Uima; import de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas; import eu.openminted.share.annotations.api.DocumentationResource; @@ -50,7 +55,8 @@ public class RdfReader { private Resource res; private Model model; - private StmtIterator contextIterator; + private Iterator contextIterator; + private final ValueFactory vf = SimpleValueFactory.getInstance(); @Override public void initialize(UimaContext aContext) throws ResourceInitializationException @@ -73,7 +79,7 @@ public void getNext(JCas aJCas) throws IOException, CollectionException var context = contextIterator.next(); try { - Rdf2Uima.convert(context, aJCas); + Rdf2Uima.convert(model, context, aJCas); } catch (CASException e) { throw new CollectionException(e); @@ -118,12 +124,14 @@ private void step() throws IOException res = nextFile(); // inFileCount = 0; try (var is = getInputStream(res.getLocation(), res.getInputStream())) { - model = ModelFactory.createOntologyModel(); - RDFDataMgr.read(model, is, RDFLanguages.filenameToLang( - CompressionUtils.stripCompressionExtension(res.getLocation()))); - } - contextIterator = model.listStatements(null, RDF.type, - model.createResource(RdfCas.TYPE_VIEW)); + var format = Rio + .getParserFormatForFileName(CompressionUtils + .stripCompressionExtension(res.getLocation())) + .orElse(RDFXML); + model = Rio.parse(is, res.getLocation().toString(), format); + } + + contextIterator = model.filter(null, RDF.TYPE, vf.createIRI(RdfCas.TYPE_VIEW)).iterator(); } else { // No more files to read diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java index 4611ae4e053..7e5ce9c676f 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java @@ -17,13 +17,10 @@ */ package de.tudarmstadt.ukp.inception.io.rdf; -import static org.apache.jena.riot.RDFLanguages.fileExtToLang; +import static org.eclipse.rdf4j.rio.RDFFormat.RDFXML; import java.util.Set; -import org.apache.jena.rdf.model.ModelFactory; -import org.apache.jena.riot.RDFDataMgr; -import org.apache.jena.riot.RDFLanguages; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.CASException; @@ -35,6 +32,8 @@ import org.dkpro.core.api.io.JCasFileWriter_ImplBase; import org.dkpro.core.api.parameter.ComponentParameters; import org.dkpro.core.api.parameter.MimeTypes; +import org.eclipse.rdf4j.model.impl.DynamicModelFactory; +import org.eclipse.rdf4j.rio.Rio; import de.tudarmstadt.ukp.inception.io.rdf.internal.Uima2Rdf; @@ -74,8 +73,8 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { - var model = ModelFactory.createOntologyModel(); - + var model = new DynamicModelFactory().createEmptyModel(); + try { uima2rdf.convert(aJCas, model); } @@ -84,7 +83,10 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException } try (var docOS = getOutputStream(aJCas, filenameSuffix)) { - RDFDataMgr.write(docOS, model.getBaseModel(), fileExtToLang(filenameSuffix)); + var format = Rio + .getParserFormatForFileName(filenameSuffix) + .orElse(RDFXML); + Rio.write(model, docOS, format); } catch (Exception e) { throw new AnalysisEngineProcessException(e); diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java index a3b65992b51..c8f908eab4c 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java @@ -19,79 +19,80 @@ import java.util.HashMap; import java.util.Map; -import org.apache.commons.collections4.iterators.IteratorIterable; + import org.apache.commons.lang3.StringUtils; -import org.apache.jena.ontology.OntResource; -import org.apache.jena.rdf.model.Resource; -import org.apache.jena.rdf.model.Statement; -import org.apache.jena.vocabulary.RDF; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CASException; import org.apache.uima.cas.FeatureStructure; import org.apache.uima.fit.util.CasUtil; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.vocabulary.RDF; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; public class Rdf2Uima { - public static void convert(Statement aContext, JCas aJCas) throws CASException + public static void convert(Model aModel, Statement aContext, JCas aJCas) throws CASException { - var m = aContext.getModel(); + var vf = SimpleValueFactory.getInstance(); + var m = aModel; // Set up names - var tView = m.createResource(RdfCas.TYPE_VIEW); - var tFeatureStructure = m.createResource(RdfCas.TYPE_FEATURE_STRUCTURE); - var pIndexedIn = m.createProperty(RdfCas.PROP_INDEXED_IN); + var tView = vf.createIRI(RdfCas.TYPE_VIEW); + var tFeatureStructure = vf.createIRI(RdfCas.TYPE_FEATURE_STRUCTURE); + var pIndexedIn = vf.createIRI(RdfCas.PROP_INDEXED_IN); var fsIndex = new HashMap(); // Convert the views/SofAs var viewIndex = new HashMap(); - var viewIter = m.listSubjectsWithProperty(RDF.type, tView); - for (var view : new IteratorIterable(viewIter)) { - var viewJCas = convertView(view, aJCas); + for (var view : aModel.filter(null, RDF.TYPE, tView).subjects()) { + var viewJCas = convertView(aModel, view, aJCas); viewIndex.put(view, viewJCas); fsIndex.put(view, viewJCas.getSofa()); } // Convert the FSes but without setting their feature values yet - we cannot fill // the feature values just set because some of them may point to FSes not yet created - var fses = m.listSubjectsWithProperty(RDF.type, tFeatureStructure).toList(); + var fses = m.filter(null, RDF.TYPE, tFeatureStructure).subjects(); for (var fs : fses) { - var uimaFS = initFS(fs.as(OntResource.class), aJCas); + var uimaFS = initFS(aModel, fs, aJCas); fsIndex.put(fs, uimaFS); } // Now fill the FSes with their feature values for (var fs : fses) { - convertFS(fs.as(OntResource.class), aJCas, fsIndex); + convertFS(aModel, fs, aJCas, fsIndex); } // Finally add the FSes to the indexes of the respective views for (var fs : fses) { - var indexedInIter = fs.listProperties(pIndexedIn); - for (var indexedIn : new IteratorIterable(indexedInIter)) { - var viewJCas = viewIndex.get(indexedIn.getResource()); + for (var indexedIn : aModel.filter(fs, pIndexedIn, null).objects()) { + var viewJCas = viewIndex.get(indexedIn); viewJCas.addFsToIndexes(fsIndex.get(fs)); } } } - public static JCas convertView(Resource aView, JCas aJCas) throws CASException + public static JCas convertView(Model aModel, Resource aView, JCas aJCas) throws CASException { - var m = aView.getModel(); + var vf = SimpleValueFactory.getInstance(); // Set up names - var pSofaID = m.createProperty(RdfCas.PROP_SOFA_ID); - var pSofaString = m.createProperty(RdfCas.PROP_SOFA_STRING); - var pSofaMimeType = m.createProperty(RdfCas.PROP_SOFA_MIME_TYPE); + var pSofaID = vf.createIRI(RdfCas.PROP_SOFA_ID); + var pSofaString = vf.createIRI(RdfCas.PROP_SOFA_STRING); + var pSofaMimeType = vf.createIRI(RdfCas.PROP_SOFA_MIME_TYPE); // Get the values - var viewName = aView.getProperty(pSofaID).getString(); - var sofaString = aView.getProperty(pSofaString).getString(); - var sofaMimeType = aView.getProperty(pSofaMimeType).getString(); + var viewName = aModel.filter(aView, pSofaID, null).objects().iterator().next().stringValue(); + var sofaString = aModel.filter(aView, pSofaString, null).objects().iterator().next().stringValue(); + var sofaMimeType = aModel.filter(aView, pSofaMimeType, null).objects().iterator().next().stringValue(); // Instantiate the view/SofA var view = JCasUtil.getView(aJCas, viewName, true); @@ -100,16 +101,16 @@ public static JCas convertView(Resource aView, JCas aJCas) throws CASException return view; } - public static FeatureStructure initFS(OntResource aFS, JCas aJCas) + public static FeatureStructure initFS(Model aModel, Resource aFS, JCas aJCas) { var cas = aJCas.getCas(); // Figure out the UIMA type - there can be only one type per FS - var types = aFS.listRDFTypes(true).toSet(); - types.removeIf(res -> res.getURI().startsWith(RdfCas.NS_RDFCAS)); + var types = aModel.filter(aFS, RDF.TYPE, null).objects(); + types.removeIf(res -> res.stringValue().startsWith(RdfCas.NS_RDFCAS)); assert types.size() == 1; var type = CasUtil.getType(cas, - types.iterator().next().getURI().substring(RdfCas.NS_UIMA.length())); + types.iterator().next().stringValue().substring(RdfCas.NS_UIMA.length())); FeatureStructure fs; if (type.getName().equals(DocumentMetaData.class.getName())) { @@ -123,19 +124,18 @@ public static FeatureStructure initFS(OntResource aFS, JCas aJCas) return fs; } - public static FeatureStructure convertFS(OntResource aFS, JCas aJCas, + public static FeatureStructure convertFS(Model aModel, Resource aFS, JCas aJCas, Map aFsIndex) { var fs = aFsIndex.get(aFS); - var stmtIter = aFS.listProperties(); - for (var stmt : new IteratorIterable(stmtIter)) { + for (var stmt : aModel.filter(aFS, null, null)) { // Skip all non-features - if (!stmt.getPredicate().getURI().startsWith("uima:")) { + if (!stmt.getPredicate().stringValue().startsWith("uima:")) { continue; } - var featureName = StringUtils.substringAfterLast(stmt.getPredicate().getURI(), "-"); + var featureName = StringUtils.substringAfterLast(stmt.getPredicate().stringValue(), "-"); var uimaFeat = fs.getType().getFeatureByBaseName(featureName); // Cannot update start/end of document annotation because that FS is already indexed, so @@ -147,35 +147,35 @@ public static FeatureStructure convertFS(OntResource aFS, JCas aJCas, } if (uimaFeat.getRange().isPrimitive()) { + Literal literal = null; + if (stmt.getObject().isLiteral()) { + literal = (Literal) stmt; + } + switch (uimaFeat.getRange().getName()) { case CAS.TYPE_NAME_BOOLEAN: - fs.setBooleanValue(uimaFeat, stmt.getObject().asLiteral().getBoolean()); + fs.setBooleanValue(uimaFeat, literal.booleanValue()); break; case CAS.TYPE_NAME_BYTE: - fs.setByteValue(uimaFeat, stmt.getObject().asLiteral().getByte()); + fs.setByteValue(uimaFeat, literal.byteValue()); break; case CAS.TYPE_NAME_DOUBLE: - fs.setDoubleValue(uimaFeat, stmt.getObject().asLiteral().getDouble()); + fs.setDoubleValue(uimaFeat, literal.doubleValue()); break; case CAS.TYPE_NAME_FLOAT: - fs.setFloatValue(uimaFeat, stmt.getObject().asLiteral().getFloat()); + fs.setFloatValue(uimaFeat, literal.floatValue()); break; case CAS.TYPE_NAME_INTEGER: - fs.setIntValue(uimaFeat, stmt.getObject().asLiteral().getInt()); + fs.setIntValue(uimaFeat, literal.intValue()); break; case CAS.TYPE_NAME_LONG: - fs.setLongValue(uimaFeat, stmt.getObject().asLiteral().getLong()); + fs.setLongValue(uimaFeat, literal.longValue()); break; case CAS.TYPE_NAME_SHORT: - fs.setShortValue(uimaFeat, stmt.getObject().asLiteral().getShort()); + fs.setShortValue(uimaFeat, literal.shortValue()); break; case CAS.TYPE_NAME_STRING: { - if (stmt.getObject().isLiteral()) { - fs.setStringValue(uimaFeat, stmt.getObject().asLiteral().getString()); - } - else { - fs.setStringValue(uimaFeat, stmt.getObject().asResource().getURI()); - } + fs.setStringValue(uimaFeat, stmt.getObject().stringValue()); break; } default: @@ -185,10 +185,10 @@ public static FeatureStructure convertFS(OntResource aFS, JCas aJCas, } } else { - FeatureStructure targetUimaFS = aFsIndex.get(stmt.getObject().asResource()); + var targetUimaFS = aFsIndex.get(stmt.getObject()); if (targetUimaFS == null) { throw new IllegalStateException("No UIMA FS found for [" - + stmt.getObject().asResource().getURI() + "]"); + + stmt.getObject().stringValue() + "]"); } fs.setFeatureValue(uimaFeat, targetUimaFS); } diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java index 60b318c7f0b..108023f073d 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java @@ -23,16 +23,16 @@ import java.util.Set; import java.util.regex.Pattern; -import org.apache.jena.datatypes.xsd.XSDDatatype; -import org.apache.jena.ontology.Individual; -import org.apache.jena.ontology.OntModel; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CASException; import org.apache.uima.cas.Feature; import org.apache.uima.cas.FeatureStructure; import org.apache.uima.cas.Type; import org.apache.uima.jcas.JCas; - +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.vocabulary.RDF; import de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorUtils; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; @@ -52,13 +52,13 @@ public Uima2Rdf(Set aIriFeatures) } } - public void convert(JCas aJCas, OntModel aTarget) throws CASException + public void convert(JCas aJCas, Model aTarget) throws CASException { // Set up prefix mappings var ts = aJCas.getTypeSystem(); - aTarget.setNsPrefix("cas", RdfCas.NS_UIMA + "uima.cas."); - aTarget.setNsPrefix("tcas", RdfCas.NS_UIMA + "uima.tcas."); - aTarget.setNsPrefix(RdfCas.PREFIX_RDFCAS, RdfCas.NS_RDFCAS); + aTarget.setNamespace("cas", RdfCas.NS_UIMA + "uima.cas."); + aTarget.setNamespace("tcas", RdfCas.NS_UIMA + "uima.tcas."); + aTarget.setNamespace(RdfCas.PREFIX_RDFCAS, RdfCas.NS_RDFCAS); // Additional prefix mappings for DKPro Core typesystems for (var t : ts.getProperlySubsumedTypes(ts.getTopType())) { @@ -73,7 +73,7 @@ public void convert(JCas aJCas, OntModel aTarget) throws CASException if (nameMatcher.group("INMODULE") != null) { prefix = prefix + "-" + nameMatcher.group("INMODULE"); } - aTarget.setNsPrefix(prefix, RdfCas.NS_UIMA + nameMatcher.group("LONG")); + aTarget.setNamespace(prefix, RdfCas.NS_UIMA + nameMatcher.group("LONG")); } } @@ -83,15 +83,14 @@ public void convert(JCas aJCas, OntModel aTarget) throws CASException } } - private void convertView(JCas aJCas, OntModel aTarget) + private void convertView(JCas aJCas, Model aTarget) { - // Shorten down variable name for model - var m = aTarget; + var vf = SimpleValueFactory.getInstance(); // Set up names - var tView = m.createResource(RdfCas.TYPE_VIEW); - var tFeatureStructure = m.createResource(RdfCas.TYPE_FEATURE_STRUCTURE); - var pIndexedIn = m.createProperty(RdfCas.PROP_INDEXED_IN); + var tView = vf.createIRI(RdfCas.TYPE_VIEW); + var tFeatureStructure = vf.createIRI(RdfCas.TYPE_FEATURE_STRUCTURE); + var pIndexedIn = vf.createIRI(RdfCas.PROP_INDEXED_IN); // Get a URI for the document var dmd = DocumentMetaData.get(aJCas); @@ -106,72 +105,66 @@ private void convertView(JCas aJCas, OntModel aTarget) // Set up the view itself var viewUri = format("%s#%d", docuri, aJCas.getLowLevelCas().ll_getFSRef(aJCas.getSofa())); - var rdfView = m.createIndividual(viewUri, tView); + var rdfView = vf.createIRI(viewUri); + aTarget.add(rdfView, RDF.TYPE, tView); for (var uimaFS : reachable) { var uri = format("%s#%d", docuri, aJCas.getLowLevelCas().ll_getFSRef(uimaFS)); - var rdfFS = m.createIndividual(uri, m.createResource(rdfType(uimaFS.getType()))); + var rdfFS = vf.createIRI(uri); + aTarget.add(rdfFS, RDF.TYPE, vf.createIRI(rdfType(uimaFS.getType()))); // The SoFa is not a regular FS - do not mark it as such if (uimaFS != aJCas.getSofa()) { - rdfFS.addOntClass(tFeatureStructure); + aTarget.add(rdfFS, RDF.TYPE, tFeatureStructure); } // Internal UIMA information if (indexed.contains(uimaFS)) { - rdfFS.addProperty(pIndexedIn, rdfView); + aTarget.add(rdfFS, pIndexedIn, rdfView); } // Convert features - convertFeatures(docuri, uimaFS, rdfFS); + convertFeatures(aTarget, docuri, uimaFS, rdfFS); } } - private void convertFeatures(String docuri, FeatureStructure uimaFS, Individual rdfFS) + private void convertFeatures(Model aTarget, String docuri, FeatureStructure uimaFS, IRI rdfFS) { - var m = rdfFS.getOntModel(); + var vf = SimpleValueFactory.getInstance(); for (var uimaFeat : uimaFS.getType().getFeatures()) { - var rdfFeat = m.createProperty(rdfFeature(uimaFeat)); + var rdfFeat = vf.createIRI(rdfFeature(uimaFeat)); if (uimaFeat.getRange().isPrimitive()) { switch (uimaFeat.getRange().getName()) { case CAS.TYPE_NAME_BOOLEAN: - rdfFS.addLiteral(rdfFeat, m.createTypedLiteral(uimaFS.getBooleanValue(uimaFeat), - XSDDatatype.XSDboolean)); + aTarget.add(rdfFS, rdfFeat, vf.createLiteral(uimaFS.getBooleanValue(uimaFeat))); break; case CAS.TYPE_NAME_BYTE: - rdfFS.addLiteral(rdfFeat, m.createTypedLiteral(uimaFS.getByteValue(uimaFeat), - XSDDatatype.XSDbyte)); + aTarget.add(rdfFS, rdfFeat, vf.createLiteral(uimaFS.getByteValue(uimaFeat))); break; case CAS.TYPE_NAME_DOUBLE: - rdfFS.addLiteral(rdfFeat, m.createTypedLiteral(uimaFS.getDoubleValue(uimaFeat), - XSDDatatype.XSDdouble)); + aTarget.add(rdfFS, rdfFeat, vf.createLiteral(uimaFS.getDoubleValue(uimaFeat))); break; case CAS.TYPE_NAME_FLOAT: - rdfFS.addLiteral(rdfFeat, m.createTypedLiteral(uimaFS.getFloatValue(uimaFeat), - XSDDatatype.XSDfloat)); + aTarget.add(rdfFS, rdfFeat, vf.createLiteral(uimaFS.getFloatValue(uimaFeat))); break; case CAS.TYPE_NAME_INTEGER: - rdfFS.addLiteral(rdfFeat, - m.createTypedLiteral(uimaFS.getIntValue(uimaFeat), XSDDatatype.XSDint)); + aTarget.add(rdfFS, rdfFeat, vf.createLiteral(uimaFS.getIntValue(uimaFeat))); break; case CAS.TYPE_NAME_LONG: - rdfFS.addLiteral(rdfFeat, m.createTypedLiteral(uimaFS.getLongValue(uimaFeat), - XSDDatatype.XSDlong)); + aTarget.add(rdfFS, rdfFeat, vf.createLiteral(uimaFS.getLongValue(uimaFeat))); break; case CAS.TYPE_NAME_SHORT: - rdfFS.addLiteral(rdfFeat, m.createTypedLiteral(uimaFS.getShortValue(uimaFeat), - XSDDatatype.XSDshort)); + aTarget.add(rdfFS, rdfFeat, vf.createLiteral(uimaFS.getShortValue(uimaFeat))); break; case CAS.TYPE_NAME_STRING: { var s = uimaFS.getStringValue(uimaFeat); if (s != null) { if (iriFeatures.contains(uimaFeat.getName())) { - rdfFS.addProperty(rdfFeat, m.createResource(s)); + aTarget.add(rdfFS, rdfFeat, vf.createIRI(s)); } else { - rdfFS.addLiteral(rdfFeat, - m.createTypedLiteral(s, XSDDatatype.XSDstring)); + aTarget.add(rdfFS, rdfFeat, vf.createLiteral(s)); } } break; @@ -185,7 +178,7 @@ private void convertFeatures(String docuri, FeatureStructure uimaFS, Individual else { var targetUimaFS = uimaFS.getFeatureValue(uimaFeat); if (targetUimaFS != null) { - rdfFS.addProperty(rdfFeat, m.createResource(rdfUri(docuri, targetUimaFS))); + aTarget.add(rdfFS, rdfFeat, vf.createIRI(rdfUri(docuri, targetUimaFS))); } } } diff --git a/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java b/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java index efebcb2d994..a3934d08193 100644 --- a/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java +++ b/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java @@ -17,9 +17,9 @@ */ package de.tudarmstadt.ukp.inception.io.rdf; +import static java.lang.String.join; import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Collections.sort; -import static org.apache.commons.lang3.StringUtils.join; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReader; import static org.assertj.core.api.Assertions.assertThat; @@ -27,15 +27,16 @@ import static org.assertj.core.api.Assertions.tuple; import static org.dkpro.core.testing.IOTestRunner.testOneWay; import static org.dkpro.core.testing.IOTestRunner.testRoundTrip; -import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.File; - -import org.apache.jena.rdf.model.ModelFactory; +import java.io.FileInputStream; +import java.util.ArrayList; import org.apache.uima.fit.factory.JCasFactory; import org.dkpro.core.io.conll.Conll2006Reader; import org.dkpro.core.io.conll.Conll2006Writer; import org.dkpro.core.testing.TestOptions; +import org.eclipse.rdf4j.rio.RDFFormat; +import org.eclipse.rdf4j.rio.Rio; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -117,16 +118,23 @@ void roundTrip() throws Exception private void assertModelEquals(File expected, File actual) { - var mExpected = ModelFactory.createDefaultModel(); - mExpected.read(expected.toURI().toString(), null, "TURTLE"); - var sExpected = mExpected.listStatements().mapWith(s -> s.toString()).toList(); - sort(sExpected); - - var mActual = ModelFactory.createDefaultModel(); - mActual.read(actual.toURI().toString(), null, "TURTLE"); - var sActual = mActual.listStatements().mapWith(s -> s.toString()).toList(); - sort(sActual); - - assertEquals(join(sExpected, "\n"), join(sActual, "\n")); + try { + var sExpected = new ArrayList(); + try (var is = new FileInputStream(expected)) { + Rio.parse(is, RDFFormat.TURTLE).forEach(s -> sExpected.add(s.toString()));; + } + sort(sExpected); + + var sActual = new ArrayList(); + try (var is = new FileInputStream(actual)) { + Rio.parse(is, RDFFormat.TURTLE).forEach(s -> sActual.add(s.toString()));; + } + sort(sActual); + + assertThat(join("\n", sActual)).isEqualTo(join("\n", sExpected)); + } + catch (Exception e) { + throw new RuntimeException(e); + } } } diff --git a/inception/inception-schema/src/main/java/de/tudarmstadt/ukp/inception/schema/exporters/LayerExporter.java b/inception/inception-schema/src/main/java/de/tudarmstadt/ukp/inception/schema/exporters/LayerExporter.java index 495fbf6969a..caac132ba61 100644 --- a/inception/inception-schema/src/main/java/de/tudarmstadt/ukp/inception/schema/exporters/LayerExporter.java +++ b/inception/inception-schema/src/main/java/de/tudarmstadt/ukp/inception/schema/exporters/LayerExporter.java @@ -178,6 +178,7 @@ private ExportedAnnotationFeature exportFeatureDetails(AnnotationFeature feature exFeature.setType(feature.getType()); exFeature.setUiName(feature.getUiName()); exFeature.setVisible(feature.isVisible()); + exFeature.setIncludeInHover(feature.isIncludeInHover()); exFeature.setMultiValueMode(feature.getMultiValueMode()); exFeature.setLinkMode(feature.getLinkMode()); exFeature.setLinkTypeName(feature.getLinkTypeName()); @@ -305,6 +306,7 @@ private void importFeature(AnnotationFeature aFeature, ExportedAnnotationFeature aFeature.setDescription(aExFeature.getDescription()); aFeature.setEnabled(aExFeature.isEnabled()); aFeature.setVisible(aExFeature.isVisible()); + aFeature.setIncludeInHover(aExFeature.isIncludeInHover()); aFeature.setUiName(aExFeature.getUiName()); aFeature.setProject(aProject); aFeature.setName(aExFeature.getName()); From 455f159279f2621ae62c71906ba0c380e2def988 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Thu, 29 Feb 2024 21:40:31 +0100 Subject: [PATCH 3/3] #4567 - Add support for a generic CAS RDF export format - Switch from Jena to RDF4J - Added documentation --- .../META-INF/asciidoc/user-guide.adoc | 2 + .../META-INF/asciidoc/user-guide/formats.adoc | 4 + inception/inception-io-rdf/pom.xml | 79 +++++++++++++++---- .../ukp/inception/io/rdf/RdfReader.java | 9 +-- .../ukp/inception/io/rdf/RdfWriter.java | 16 ++-- .../io/rdf/UimaRdfCasFormatSupport.java | 3 +- .../inception/io/rdf/internal/BasicIRI.java | 47 +++++++++++ .../inception/io/rdf/internal/Rdf2Uima.java | 55 +++++++------ .../ukp/inception/io/rdf/internal/RdfCas.java | 24 +++--- .../inception/io/rdf/internal/Uima2Rdf.java | 58 +++++++++----- .../asciidoc/user-guide/formats-rdfcas.adoc | 68 ++++++++++++++++ .../ukp/inception/io/rdf/RdfWriterTest.java | 14 ++-- 12 files changed, 282 insertions(+), 97 deletions(-) create mode 100644 inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/BasicIRI.java create mode 100644 inception/inception-io-rdf/src/main/resources/META-INF/asciidoc/user-guide/formats-rdfcas.adoc diff --git a/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide.adoc b/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide.adoc index e875e943bbd..b2b3c75eb7b 100644 --- a/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide.adoc +++ b/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide.adoc @@ -284,6 +284,8 @@ include::{include-dir}formats-uimajson.adoc[leveloffset=+2] include::{include-dir}formats-uimajson-legacy.adoc[leveloffset=+2] +include::{include-dir}formats-rdfcas.adoc.adoc[leveloffset=+2] + include::{include-dir}formats-uimaxmi.adoc[leveloffset=+2] include::{include-dir}formats-webannotsv1.adoc[leveloffset=+2] diff --git a/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide/formats.adoc b/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide/formats.adoc index 46cbb7efcf2..7dd38dfb79c 100644 --- a/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide/formats.adoc +++ b/inception/inception-doc/src/main/resources/META-INF/asciidoc/user-guide/formats.adoc @@ -140,6 +140,10 @@ data in a particular format. The **feature flag** column shows which flags you c | `json` | `format.json-cas-legacy.enabled` +| <> +| `rdfcas` +| `format.rdf-cas.enabled` + | <> | `xmi` | `format.uima-xmi.enabled` diff --git a/inception/inception-io-rdf/pom.xml b/inception/inception-io-rdf/pom.xml index b813aa68631..41d476d0291 100644 --- a/inception/inception-io-rdf/pom.xml +++ b/inception/inception-io-rdf/pom.xml @@ -31,6 +31,27 @@ INCEpTION - IO - RDF + + de.tudarmstadt.ukp.inception.app + inception-diag + + + de.tudarmstadt.ukp.inception.app + inception-ui-kb + + + de.tudarmstadt.ukp.inception.app + inception-model + + + de.tudarmstadt.ukp.inception.app + inception-schema-api + + + de.tudarmstadt.ukp.inception.app + inception-api-formats + + org.apache.uima uimaj-core @@ -39,6 +60,20 @@ org.apache.uima uimafit-core + + + org.springframework + spring-context + + + org.springframework.boot + spring-boot-autoconfigure + + + + org.apache.commons + commons-lang3 + org.dkpro.core @@ -48,7 +83,19 @@ org.dkpro.core dkpro-core-api-io-asl + + org.dkpro.core + dkpro-core-api-resources-asl + + + org.dkpro.core + dkpro-core-api-metadata-asl + + + org.eclipse.rdf4j + rdf4j-rio-api + org.eclipse.rdf4j rdf4j-model @@ -69,20 +116,6 @@ org.eclipse.rdf4j rdf4j-model-vocabulary - - - org.apache.commons - commons-collections4 - - - - de.tudarmstadt.ukp.inception.app - inception-diag - - - de.tudarmstadt.ukp.inception.app - inception-ui-kb - org.dkpro.core @@ -100,4 +133,22 @@ test + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + + org.eclipse.rdf4j:rdf4j-rio-rdfxml + org.eclipse.rdf4j:rdf4j-rio-ntriples + + + + + + \ No newline at end of file diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java index 6cbbd658944..e9aa8be78be 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfReader.java @@ -42,13 +42,12 @@ import de.tudarmstadt.ukp.inception.io.rdf.internal.Rdf2Uima; import de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas; -import eu.openminted.share.annotations.api.DocumentationResource; /** * Reads a CAS serialized as RDF. */ @ResourceMetaData(name = "UIMA CAS RDF Reader") -@DocumentationResource("${docbase}/format-reference.html#format-${command}") +// @DocumentationResource("${docbase}/format-reference.html#format-${command}") @MimeTypeCapability({ MimeTypes.APPLICATION_X_UIMA_RDF }) public class RdfReader extends JCasResourceCollectionReader_ImplBase @@ -129,9 +128,9 @@ private void step() throws IOException .stripCompressionExtension(res.getLocation())) .orElse(RDFXML); model = Rio.parse(is, res.getLocation().toString(), format); - } - - contextIterator = model.filter(null, RDF.TYPE, vf.createIRI(RdfCas.TYPE_VIEW)).iterator(); + } + + contextIterator = model.filter(null, RDF.TYPE, RdfCas.TYPE_VIEW).iterator(); } else { // No more files to read diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java index 7e5ce9c676f..e88f97c360d 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriter.java @@ -49,8 +49,6 @@ public class RdfWriter /** * Specify the suffix of output files. Default value .ttl. The file format will be * chosen depending on the file suffice. - * - * @see RDFLanguages */ public static final String PARAM_FILENAME_EXTENSION = ComponentParameters.PARAM_FILENAME_EXTENSION; @ConfigurationParameter(name = PARAM_FILENAME_EXTENSION, mandatory = true, defaultValue = ".ttl") @@ -61,20 +59,20 @@ public class RdfWriter private Set iriFeatures; private Uima2Rdf uima2rdf; - + @Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); - + uima2rdf = new Uima2Rdf(iriFeatures); } - + @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { var model = new DynamicModelFactory().createEmptyModel(); - + try { uima2rdf.convert(aJCas, model); } @@ -83,10 +81,8 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException } try (var docOS = getOutputStream(aJCas, filenameSuffix)) { - var format = Rio - .getParserFormatForFileName(filenameSuffix) - .orElse(RDFXML); - Rio.write(model, docOS, format); + var format = Rio.getParserFormatForFileName(filenameSuffix).orElse(RDFXML); + Rio.write(model, docOS, format); } catch (Exception e) { throw new AnalysisEngineProcessException(e); diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/UimaRdfCasFormatSupport.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/UimaRdfCasFormatSupport.java index 607eb066be4..c84ce5dd530 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/UimaRdfCasFormatSupport.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/UimaRdfCasFormatSupport.java @@ -91,8 +91,7 @@ public AnalysisEngineDescription getWriterDescription(Project aProject, { var iriFeatures = schemaService.listAnnotationFeature(aProject).stream() .filter(f -> f.getType().startsWith(ConceptFeatureSupport.PREFIX)) - .map(f -> f.getLayer().getName() + ":" + f.getName()) - .collect(toUnmodifiableSet()); + .map(f -> f.getLayer().getName() + ":" + f.getName()).collect(toUnmodifiableSet()); return createEngineDescription(RdfWriter.class, aTSD, // RdfWriter.PARAM_IRI_FEATURES, iriFeatures); diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/BasicIRI.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/BasicIRI.java new file mode 100644 index 00000000000..e792ce362d6 --- /dev/null +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/BasicIRI.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.rdf.internal; + +import org.eclipse.rdf4j.model.base.AbstractIRI; + +public class BasicIRI + extends AbstractIRI +{ + private static final long serialVersionUID = 4794310809421877727L; + + private final String namespace; + private final String localName; + + public BasicIRI(String aNamespace, String aLocalName) + { + namespace = aNamespace; + localName = aLocalName; + } + + @Override + public String getNamespace() + { + return namespace; + } + + @Override + public String getLocalName() + { + return localName; + } +} diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java index c8f908eab4c..8039f28bc60 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Rdf2Uima.java @@ -17,10 +17,19 @@ */ package de.tudarmstadt.ukp.inception.io.rdf.internal; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.NS_RDFCAS; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.PROP_INDEXED_IN; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.PROP_SOFA_ID; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.PROP_SOFA_MIME_TYPE; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.PROP_SOFA_STRING; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.SCHEME_UIMA; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.TYPE_FEATURE_STRUCTURE; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.TYPE_VIEW; +import static org.apache.commons.lang3.StringUtils.substringAfterLast; + import java.util.HashMap; import java.util.Map; -import org.apache.commons.lang3.StringUtils; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CASException; import org.apache.uima.cas.FeatureStructure; @@ -31,7 +40,6 @@ import org.eclipse.rdf4j.model.Model; import org.eclipse.rdf4j.model.Resource; import org.eclipse.rdf4j.model.Statement; -import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.model.vocabulary.RDF; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; @@ -40,19 +48,13 @@ public class Rdf2Uima { public static void convert(Model aModel, Statement aContext, JCas aJCas) throws CASException { - var vf = SimpleValueFactory.getInstance(); var m = aModel; - // Set up names - var tView = vf.createIRI(RdfCas.TYPE_VIEW); - var tFeatureStructure = vf.createIRI(RdfCas.TYPE_FEATURE_STRUCTURE); - var pIndexedIn = vf.createIRI(RdfCas.PROP_INDEXED_IN); - var fsIndex = new HashMap(); // Convert the views/SofAs var viewIndex = new HashMap(); - for (var view : aModel.filter(null, RDF.TYPE, tView).subjects()) { + for (var view : aModel.filter(null, RDF.TYPE, TYPE_VIEW).subjects()) { var viewJCas = convertView(aModel, view, aJCas); viewIndex.put(view, viewJCas); fsIndex.put(view, viewJCas.getSofa()); @@ -60,7 +62,8 @@ public static void convert(Model aModel, Statement aContext, JCas aJCas) throws // Convert the FSes but without setting their feature values yet - we cannot fill // the feature values just set because some of them may point to FSes not yet created - var fses = m.filter(null, RDF.TYPE, tFeatureStructure).subjects(); + var fses = m.filter(null, RDF.TYPE, TYPE_FEATURE_STRUCTURE).subjects() + .toArray(Resource[]::new); for (var fs : fses) { var uimaFS = initFS(aModel, fs, aJCas); fsIndex.put(fs, uimaFS); @@ -73,7 +76,7 @@ public static void convert(Model aModel, Statement aContext, JCas aJCas) throws // Finally add the FSes to the indexes of the respective views for (var fs : fses) { - for (var indexedIn : aModel.filter(fs, pIndexedIn, null).objects()) { + for (var indexedIn : aModel.filter(fs, PROP_INDEXED_IN, null).objects()) { var viewJCas = viewIndex.get(indexedIn); viewJCas.addFsToIndexes(fsIndex.get(fs)); } @@ -82,17 +85,13 @@ public static void convert(Model aModel, Statement aContext, JCas aJCas) throws public static JCas convertView(Model aModel, Resource aView, JCas aJCas) throws CASException { - var vf = SimpleValueFactory.getInstance(); - - // Set up names - var pSofaID = vf.createIRI(RdfCas.PROP_SOFA_ID); - var pSofaString = vf.createIRI(RdfCas.PROP_SOFA_STRING); - var pSofaMimeType = vf.createIRI(RdfCas.PROP_SOFA_MIME_TYPE); - // Get the values - var viewName = aModel.filter(aView, pSofaID, null).objects().iterator().next().stringValue(); - var sofaString = aModel.filter(aView, pSofaString, null).objects().iterator().next().stringValue(); - var sofaMimeType = aModel.filter(aView, pSofaMimeType, null).objects().iterator().next().stringValue(); + var viewName = aModel.filter(aView, PROP_SOFA_ID, null).objects().iterator().next() + .stringValue(); + var sofaString = aModel.filter(aView, PROP_SOFA_STRING, null).objects().iterator().next() + .stringValue(); + var sofaMimeType = aModel.filter(aView, PROP_SOFA_MIME_TYPE, null).objects().iterator() + .next().stringValue(); // Instantiate the view/SofA var view = JCasUtil.getView(aJCas, viewName, true); @@ -107,10 +106,10 @@ public static FeatureStructure initFS(Model aModel, Resource aFS, JCas aJCas) // Figure out the UIMA type - there can be only one type per FS var types = aModel.filter(aFS, RDF.TYPE, null).objects(); - types.removeIf(res -> res.stringValue().startsWith(RdfCas.NS_RDFCAS)); + types.removeIf(res -> res.stringValue().startsWith(NS_RDFCAS)); assert types.size() == 1; var type = CasUtil.getType(cas, - types.iterator().next().stringValue().substring(RdfCas.NS_UIMA.length())); + types.iterator().next().stringValue().substring(SCHEME_UIMA.length())); FeatureStructure fs; if (type.getName().equals(DocumentMetaData.class.getName())) { @@ -135,7 +134,7 @@ public static FeatureStructure convertFS(Model aModel, Resource aFS, JCas aJCas, continue; } - var featureName = StringUtils.substringAfterLast(stmt.getPredicate().stringValue(), "-"); + var featureName = substringAfterLast(stmt.getPredicate().stringValue(), "-"); var uimaFeat = fs.getType().getFeatureByBaseName(featureName); // Cannot update start/end of document annotation because that FS is already indexed, so @@ -149,9 +148,9 @@ public static FeatureStructure convertFS(Model aModel, Resource aFS, JCas aJCas, if (uimaFeat.getRange().isPrimitive()) { Literal literal = null; if (stmt.getObject().isLiteral()) { - literal = (Literal) stmt; + literal = (Literal) stmt.getObject(); } - + switch (uimaFeat.getRange().getName()) { case CAS.TYPE_NAME_BOOLEAN: fs.setBooleanValue(uimaFeat, literal.booleanValue()); @@ -187,8 +186,8 @@ public static FeatureStructure convertFS(Model aModel, Resource aFS, JCas aJCas, else { var targetUimaFS = aFsIndex.get(stmt.getObject()); if (targetUimaFS == null) { - throw new IllegalStateException("No UIMA FS found for [" - + stmt.getObject().stringValue() + "]"); + throw new IllegalStateException( + "No UIMA FS found for [" + stmt.getObject().stringValue() + "]"); } fs.setFeatureValue(uimaFeat, targetUimaFS); } diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java index afa43cafd75..fe7fa77167f 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/RdfCas.java @@ -18,6 +18,7 @@ package de.tudarmstadt.ukp.inception.io.rdf.internal; import org.apache.uima.cas.CAS; +import org.eclipse.rdf4j.model.IRI; /** * RDF CAS vocabulary. @@ -27,19 +28,18 @@ public class RdfCas public static final String PREFIX_RDFCAS = "rdfcas"; public static final String NS_RDFCAS = "http://uima.apache.org/rdf/cas#"; - public static final String NS_UIMA = "uima:"; + public static final String SCHEME_UIMA = "uima:"; - public static final String PROP_VIEW = NS_RDFCAS + "view"; - public static final String PROP_INDEXED_IN = NS_RDFCAS + "indexedIn"; + public static final IRI PROP_VIEW = new BasicIRI(NS_RDFCAS, "view"); + public static final IRI PROP_INDEXED_IN = new BasicIRI(NS_RDFCAS, "indexedIn"); - // public static final String TYPE_CAS = NS_RDFCAS + "CAS"; - public static final String TYPE_VIEW = NS_RDFCAS + "View"; - public static final String TYPE_FEATURE_STRUCTURE = NS_RDFCAS + "FeatureStructure"; + public static final IRI TYPE_VIEW = new BasicIRI(NS_RDFCAS, "View"); + public static final IRI TYPE_FEATURE_STRUCTURE = new BasicIRI(NS_RDFCAS, "FeatureStructure"); - public static final String PROP_SOFA_ID = NS_UIMA + CAS.TYPE_NAME_SOFA + '-' - + CAS.FEATURE_BASE_NAME_SOFAID; - public static final String PROP_SOFA_STRING = NS_UIMA + CAS.TYPE_NAME_SOFA + '-' - + CAS.FEATURE_BASE_NAME_SOFASTRING; - public static final String PROP_SOFA_MIME_TYPE = NS_UIMA + CAS.TYPE_NAME_SOFA + '-' - + CAS.FEATURE_BASE_NAME_SOFAMIME; + public static final IRI PROP_SOFA_ID = new BasicIRI(SCHEME_UIMA, + CAS.TYPE_NAME_SOFA + '-' + CAS.FEATURE_BASE_NAME_SOFAID); + public static final IRI PROP_SOFA_STRING = new BasicIRI(SCHEME_UIMA, + CAS.TYPE_NAME_SOFA + '-' + CAS.FEATURE_BASE_NAME_SOFASTRING); + public static final IRI PROP_SOFA_MIME_TYPE = new BasicIRI(SCHEME_UIMA, + CAS.TYPE_NAME_SOFA + '-' + CAS.FEATURE_BASE_NAME_SOFAMIME); } diff --git a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java index 108023f073d..36fb9cf08be 100644 --- a/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java +++ b/inception/inception-io-rdf/src/main/java/de/tudarmstadt/ukp/inception/io/rdf/internal/Uima2Rdf.java @@ -17,6 +17,8 @@ */ package de.tudarmstadt.ukp.inception.io.rdf.internal; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.PREFIX_RDFCAS; +import static de.tudarmstadt.ukp.inception.io.rdf.internal.RdfCas.SCHEME_UIMA; import static java.lang.String.format; import java.util.HashSet; @@ -31,8 +33,10 @@ import org.apache.uima.jcas.JCas; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.Namespace; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.model.vocabulary.RDF; + import de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorUtils; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; @@ -56,9 +60,9 @@ public void convert(JCas aJCas, Model aTarget) throws CASException { // Set up prefix mappings var ts = aJCas.getTypeSystem(); - aTarget.setNamespace("cas", RdfCas.NS_UIMA + "uima.cas."); - aTarget.setNamespace("tcas", RdfCas.NS_UIMA + "uima.tcas."); - aTarget.setNamespace(RdfCas.PREFIX_RDFCAS, RdfCas.NS_RDFCAS); + aTarget.setNamespace("cas", SCHEME_UIMA + "uima.cas."); + aTarget.setNamespace("tcas", SCHEME_UIMA + "uima.tcas."); + aTarget.setNamespace(PREFIX_RDFCAS, RdfCas.NS_RDFCAS); // Additional prefix mappings for DKPro Core typesystems for (var t : ts.getProperlySubsumedTypes(ts.getTopType())) { @@ -73,7 +77,7 @@ public void convert(JCas aJCas, Model aTarget) throws CASException if (nameMatcher.group("INMODULE") != null) { prefix = prefix + "-" + nameMatcher.group("INMODULE"); } - aTarget.setNamespace(prefix, RdfCas.NS_UIMA + nameMatcher.group("LONG")); + aTarget.setNamespace(prefix, SCHEME_UIMA + nameMatcher.group("LONG")); } } @@ -87,11 +91,6 @@ private void convertView(JCas aJCas, Model aTarget) { var vf = SimpleValueFactory.getInstance(); - // Set up names - var tView = vf.createIRI(RdfCas.TYPE_VIEW); - var tFeatureStructure = vf.createIRI(RdfCas.TYPE_FEATURE_STRUCTURE); - var pIndexedIn = vf.createIRI(RdfCas.PROP_INDEXED_IN); - // Get a URI for the document var dmd = DocumentMetaData.get(aJCas); var docuri = dmd.getDocumentUri() != null ? dmd.getDocumentUri() @@ -104,23 +103,23 @@ private void convertView(JCas aJCas, Model aTarget) reachable.add(aJCas.getSofa()); // Set up the view itself - var viewUri = format("%s#%d", docuri, aJCas.getLowLevelCas().ll_getFSRef(aJCas.getSofa())); - var rdfView = vf.createIRI(viewUri); - aTarget.add(rdfView, RDF.TYPE, tView); + var rdfView = vf.createIRI( + format("%s#%d", docuri, aJCas.getLowLevelCas().ll_getFSRef(aJCas.getSofa()))); + aTarget.add(rdfView, RDF.TYPE, RdfCas.TYPE_VIEW); for (var uimaFS : reachable) { var uri = format("%s#%d", docuri, aJCas.getLowLevelCas().ll_getFSRef(uimaFS)); var rdfFS = vf.createIRI(uri); - aTarget.add(rdfFS, RDF.TYPE, vf.createIRI(rdfType(uimaFS.getType()))); + aTarget.add(rdfFS, RDF.TYPE, rdfType(aTarget, uimaFS.getType())); // The SoFa is not a regular FS - do not mark it as such if (uimaFS != aJCas.getSofa()) { - aTarget.add(rdfFS, RDF.TYPE, tFeatureStructure); + aTarget.add(rdfFS, RDF.TYPE, RdfCas.TYPE_FEATURE_STRUCTURE); } // Internal UIMA information if (indexed.contains(uimaFS)) { - aTarget.add(rdfFS, pIndexedIn, rdfView); + aTarget.add(rdfFS, RdfCas.PROP_INDEXED_IN, rdfView); } // Convert features @@ -133,7 +132,7 @@ private void convertFeatures(Model aTarget, String docuri, FeatureStructure uima var vf = SimpleValueFactory.getInstance(); for (var uimaFeat : uimaFS.getType().getFeatures()) { - var rdfFeat = vf.createIRI(rdfFeature(uimaFeat)); + var rdfFeat = rdfFeature(aTarget, uimaFeat); if (uimaFeat.getRange().isPrimitive()) { switch (uimaFeat.getRange().getName()) { case CAS.TYPE_NAME_BOOLEAN: @@ -189,13 +188,32 @@ private static String rdfUri(String docuri, FeatureStructure uimaFS) return format("%s#%d", docuri, uimaFS.getCAS().getLowLevelCAS().ll_getFSRef(uimaFS)); } - private static String rdfFeature(Feature aUimaFeature) + private static IRI rdfFeature(Model aModel, Feature aUimaFeature) { - return rdfType(aUimaFeature.getDomain()) + "-" + aUimaFeature.getShortName(); + var typeIri = rdfType(aModel, aUimaFeature.getDomain()); + return new BasicIRI(typeIri.getNamespace(), + typeIri.getLocalName() + "-" + aUimaFeature.getShortName()); } - private static String rdfType(Type aUimaType) + private static IRI rdfType(Model aModel, Type aUimaType) { - return RdfCas.NS_UIMA + aUimaType.getName(); + Namespace bestNs = null; + for (var ns : aModel.getNamespaces()) { + var nsName = ns.getName().substring(SCHEME_UIMA.length()); + if (aUimaType.getName().startsWith(nsName) + && (bestNs == null || nsName.length() > bestNs.getName().length())) { + bestNs = ns; + } + } + + var vf = SimpleValueFactory.getInstance(); + if (bestNs != null) { + var namespace = bestNs.getName(); + var localName = aUimaType.getName() + .substring(bestNs.getName().length() - SCHEME_UIMA.length()); + return new BasicIRI(namespace, localName); + } + + return vf.createIRI(SCHEME_UIMA + aUimaType.getName()); } } diff --git a/inception/inception-io-rdf/src/main/resources/META-INF/asciidoc/user-guide/formats-rdfcas.adoc b/inception/inception-io-rdf/src/main/resources/META-INF/asciidoc/user-guide/formats-rdfcas.adoc new file mode 100644 index 00000000000..335d0d9ddb7 --- /dev/null +++ b/inception/inception-io-rdf/src/main/resources/META-INF/asciidoc/user-guide/formats-rdfcas.adoc @@ -0,0 +1,68 @@ +// Licensed to the Technische Universität Darmstadt under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The Technische Universität Darmstadt +// licenses this file to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +[[sect_formats_rdfcas]] += UIMA CAS RDF + +This format provides a representation of the annotated document in RDF using the design model of the UIMA CAS. This format is not an official Apache UIMA file format but rather a facility provided by {product-name} for the benefit of users who want to interact with thier annotated data using Semantic Web technology. + +[cols="2,1,1,1,3"] +|==== +| Format | Read | Write | Custom Layers | Description + +| UIMA CAS RDF (`rdfcas`) +| yes +| yes +| yes +| +|==== + +.Example +[source,turtle] +---- +{ + + a cas:Sofa , rdfcas:View ; + cas:Sofa-mimeType "text" ; + cas:Sofa-sofaID "_InitialView" ; + cas:Sofa-sofaNum "1"^^xsd:int ; + cas:Sofa-sofaString "... here be document text ..." . + + + a rdfcas:FeatureStructure , segmentation:Token ; + rdfcas:indexedIn ; + segmentation:Token-lemma ; + segmentation:Token-morph ; + segmentation:Token-pos ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "173"^^xsd:int ; + tcas:Annotation-end "183"^^xsd:int . + + + a syntax-dependency:Dependency , rdfcas:FeatureStructure ; + rdfcas:indexedIn ; + syntax-dependency:Dependency-DependencyType + "obj" ; + syntax-dependency:Dependency-Dependent + ; + syntax-dependency:Dependency-Governor + ; + syntax-dependency:Dependency-flavor + "basic" ; + cas:AnnotationBase-sofa ; + tcas:Annotation-begin "173"^^xsd:int ; + tcas:Annotation-end "183"^^xsd:int . +---- diff --git a/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java b/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java index a3934d08193..528ac4c715f 100644 --- a/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java +++ b/inception/inception-io-rdf/src/test/java/de/tudarmstadt/ukp/inception/io/rdf/RdfWriterTest.java @@ -31,6 +31,7 @@ import java.io.File; import java.io.FileInputStream; import java.util.ArrayList; + import org.apache.uima.fit.factory.JCasFactory; import org.dkpro.core.io.conll.Conll2006Reader; import org.dkpro.core.io.conll.Conll2006Writer; @@ -90,8 +91,7 @@ void readWriteWithIriFeatures(@TempDir File aTemp) throws Exception var targetFile = new File(aTemp, "test.ttl"); assertThat(contentOf(targetFile, UTF_8)) // - .contains("ner:NamedEntity-value \"PER\" ;") - .contains("ner:NamedEntity-identifier ;"); + .contains("\"PER\"").contains(""); cas.reset(); @@ -121,16 +121,18 @@ private void assertModelEquals(File expected, File actual) try { var sExpected = new ArrayList(); try (var is = new FileInputStream(expected)) { - Rio.parse(is, RDFFormat.TURTLE).forEach(s -> sExpected.add(s.toString()));; + Rio.parse(is, RDFFormat.TURTLE).forEach(s -> sExpected.add(s.toString())); + ; } sort(sExpected); - + var sActual = new ArrayList(); try (var is = new FileInputStream(actual)) { - Rio.parse(is, RDFFormat.TURTLE).forEach(s -> sActual.add(s.toString()));; + Rio.parse(is, RDFFormat.TURTLE).forEach(s -> sActual.add(s.toString())); + ; } sort(sActual); - + assertThat(join("\n", sActual)).isEqualTo(join("\n", sExpected)); } catch (Exception e) {