From e00f37ee69009e9d714048281156bafeae6aba3a Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Fri, 9 Jun 2023 19:13:59 +0200 Subject: [PATCH 1/2] #4058 - Cannot export texts containing certain characters as UIMA CAS XMI - Rename packages in XMI support module - Switch to modern spring factories declaration --- .../export/CuratedDocumentsExporterTest.java | 2 +- .../DocumentImportExportServiceImpl.java | 2 +- .../DocumentImportExportServiceImplTest.java | 2 +- .../ukp/clarin/webanno/xmi/package-info.java | 24 - .../io}/xmi/BinaryCasFormatSupport.java | 4 +- .../io}/xmi/UimaInlineXmlFormatSupport.java | 2 +- .../io}/xmi/XmiFormatSupport.java | 4 +- .../io}/xmi/XmiXml11FormatSupport.java | 4 +- .../config/UimaFormatsAutoConfiguration.java | 10 +- .../main/resources/META-INF/spring.factories | 2 - ...ot.autoconfigure.AutoConfiguration.imports | 1 + .../main/resources/desc/type/XmlStructure.xml | 568 +----------------- .../CuratedDocumentsProjectExporterPanel.java | 2 +- .../AnnotationDocumentsExporterTest.java | 2 +- .../versioning/VersioningServiceImplTest.java | 2 +- 15 files changed, 28 insertions(+), 603 deletions(-) delete mode 100644 inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/package-info.java rename inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/{clarin/webanno => inception/io}/xmi/BinaryCasFormatSupport.java (95%) rename inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/{clarin/webanno => inception/io}/xmi/UimaInlineXmlFormatSupport.java (97%) rename inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/{clarin/webanno => inception/io}/xmi/XmiFormatSupport.java (95%) rename inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/{clarin/webanno => inception/io}/xmi/XmiXml11FormatSupport.java (95%) rename inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/{clarin/webanno => inception/io}/xmi/config/UimaFormatsAutoConfiguration.java (86%) delete mode 100644 inception/inception-io-xmi/src/main/resources/META-INF/spring.factories create mode 100644 inception/inception-io-xmi/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports diff --git a/inception/inception-curation/src/test/java/de/tudarmstadt/ukp/inception/curation/export/CuratedDocumentsExporterTest.java b/inception/inception-curation/src/test/java/de/tudarmstadt/ukp/inception/curation/export/CuratedDocumentsExporterTest.java index 089a683d9fd..1ce83dfb208 100644 --- a/inception/inception-curation/src/test/java/de/tudarmstadt/ukp/inception/curation/export/CuratedDocumentsExporterTest.java +++ b/inception/inception-curation/src/test/java/de/tudarmstadt/ukp/inception/curation/export/CuratedDocumentsExporterTest.java @@ -50,7 +50,6 @@ import de.tudarmstadt.ukp.clarin.webanno.export.model.ExportedProject; import de.tudarmstadt.ukp.clarin.webanno.model.Project; import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument; -import de.tudarmstadt.ukp.clarin.webanno.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.inception.annotation.storage.CasStorageServiceImpl; import de.tudarmstadt.ukp.inception.annotation.storage.config.CasStorageBackupProperties; import de.tudarmstadt.ukp.inception.annotation.storage.config.CasStorageCachePropertiesImpl; @@ -60,6 +59,7 @@ import de.tudarmstadt.ukp.inception.export.DocumentImportExportServiceImpl; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceProperties; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServicePropertiesImpl; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.inception.project.export.ProjectExportServiceImpl; import de.tudarmstadt.ukp.inception.schema.AnnotationSchemaService; diff --git a/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImpl.java b/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImpl.java index df59c506df5..f2fdebc700a 100644 --- a/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImpl.java +++ b/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImpl.java @@ -90,7 +90,6 @@ import de.tudarmstadt.ukp.clarin.webanno.model.TagSet; import de.tudarmstadt.ukp.clarin.webanno.support.logging.BaseLoggers; import de.tudarmstadt.ukp.clarin.webanno.support.logging.LogMessage; -import de.tudarmstadt.ukp.clarin.webanno.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; @@ -99,6 +98,7 @@ import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceProperties; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceProperties.CasDoctorOnImportPolicy; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.inception.schema.AnnotationSchemaService; import it.unimi.dsi.fastutil.ints.IntArrayList; diff --git a/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImplTest.java b/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImplTest.java index 9a8ae4391ff..e1e3fef567b 100644 --- a/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImplTest.java +++ b/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImplTest.java @@ -76,7 +76,6 @@ import de.tudarmstadt.ukp.clarin.webanno.model.Project; import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument; import de.tudarmstadt.ukp.clarin.webanno.support.logging.Logging; -import de.tudarmstadt.ukp.clarin.webanno.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; import de.tudarmstadt.ukp.inception.annotation.storage.CasStorageServiceImpl; import de.tudarmstadt.ukp.inception.annotation.storage.CasStorageSession; @@ -86,6 +85,7 @@ import de.tudarmstadt.ukp.inception.annotation.storage.driver.filesystem.FileSystemCasStorageDriver; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceProperties; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServicePropertiesImpl; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.inception.schema.AnnotationSchemaService; import de.tudarmstadt.ukp.inception.schema.service.AnnotationSchemaServiceImpl; diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/package-info.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/package-info.java deleted file mode 100644 index ce8b812ff5c..00000000000 --- a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/package-info.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Technische Universität Darmstadt under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The Technische Universität Darmstadt - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Support for (de)serializing the CAS to/from XMI files. - * - * @since 1.1.0 - */ -package de.tudarmstadt.ukp.clarin.webanno.xmi; diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/BinaryCasFormatSupport.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/BinaryCasFormatSupport.java similarity index 95% rename from inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/BinaryCasFormatSupport.java rename to inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/BinaryCasFormatSupport.java index afa9721e7d0..9b72dc08c88 100644 --- a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/BinaryCasFormatSupport.java +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/BinaryCasFormatSupport.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.clarin.webanno.xmi; +package de.tudarmstadt.ukp.inception.io.xmi; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; @@ -30,7 +30,7 @@ import de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport; import de.tudarmstadt.ukp.clarin.webanno.model.Project; -import de.tudarmstadt.ukp.clarin.webanno.xmi.config.UimaFormatsAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsAutoConfiguration; /** *

diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/UimaInlineXmlFormatSupport.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/UimaInlineXmlFormatSupport.java similarity index 97% rename from inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/UimaInlineXmlFormatSupport.java rename to inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/UimaInlineXmlFormatSupport.java index d672caa704a..d48a3ef9cdc 100644 --- a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/UimaInlineXmlFormatSupport.java +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/UimaInlineXmlFormatSupport.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.clarin.webanno.xmi; +package de.tudarmstadt.ukp.inception.io.xmi; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/XmiFormatSupport.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiFormatSupport.java similarity index 95% rename from inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/XmiFormatSupport.java rename to inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiFormatSupport.java index f00ad21b1d4..758eb4e0cf3 100644 --- a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/XmiFormatSupport.java +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiFormatSupport.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.clarin.webanno.xmi; +package de.tudarmstadt.ukp.inception.io.xmi; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; @@ -30,7 +30,7 @@ import de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport; import de.tudarmstadt.ukp.clarin.webanno.model.Project; -import de.tudarmstadt.ukp.clarin.webanno.xmi.config.UimaFormatsAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsAutoConfiguration; /** *

diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/XmiXml11FormatSupport.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiXml11FormatSupport.java similarity index 95% rename from inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/XmiXml11FormatSupport.java rename to inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiXml11FormatSupport.java index 7164e1eb790..7cdfe7065e1 100644 --- a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/XmiXml11FormatSupport.java +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiXml11FormatSupport.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.clarin.webanno.xmi; +package de.tudarmstadt.ukp.inception.io.xmi; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; @@ -30,7 +30,7 @@ import de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport; import de.tudarmstadt.ukp.clarin.webanno.model.Project; -import de.tudarmstadt.ukp.clarin.webanno.xmi.config.UimaFormatsAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsAutoConfiguration; /** *

diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/config/UimaFormatsAutoConfiguration.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsAutoConfiguration.java similarity index 86% rename from inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/config/UimaFormatsAutoConfiguration.java rename to inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsAutoConfiguration.java index e1263146668..1672feb7fc7 100644 --- a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/config/UimaFormatsAutoConfiguration.java +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsAutoConfiguration.java @@ -15,16 +15,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.clarin.webanno.xmi.config; +package de.tudarmstadt.ukp.inception.io.xmi.config; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import de.tudarmstadt.ukp.clarin.webanno.xmi.BinaryCasFormatSupport; -import de.tudarmstadt.ukp.clarin.webanno.xmi.UimaInlineXmlFormatSupport; -import de.tudarmstadt.ukp.clarin.webanno.xmi.XmiFormatSupport; -import de.tudarmstadt.ukp.clarin.webanno.xmi.XmiXml11FormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.BinaryCasFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.UimaInlineXmlFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.XmiXml11FormatSupport; @Configuration public class UimaFormatsAutoConfiguration diff --git a/inception/inception-io-xmi/src/main/resources/META-INF/spring.factories b/inception/inception-io-xmi/src/main/resources/META-INF/spring.factories deleted file mode 100644 index 7f3e2b7c9b2..00000000000 --- a/inception/inception-io-xmi/src/main/resources/META-INF/spring.factories +++ /dev/null @@ -1,2 +0,0 @@ -org.springframework.boot.autoconfigure.EnableAutoConfiguration=\ -de.tudarmstadt.ukp.clarin.webanno.xmi.config.UimaFormatsAutoConfiguration \ No newline at end of file diff --git a/inception/inception-io-xmi/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports b/inception/inception-io-xmi/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports new file mode 100644 index 00000000000..e99ca2decff --- /dev/null +++ b/inception/inception-io-xmi/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports @@ -0,0 +1 @@ +de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsAutoConfiguration diff --git a/inception/inception-io-xml/src/main/resources/desc/type/XmlStructure.xml b/inception/inception-io-xml/src/main/resources/desc/type/XmlStructure.xml index 20aee100293..fcb5edaffac 100644 --- a/inception/inception-io-xml/src/main/resources/desc/type/XmlStructure.xml +++ b/inception/inception-io-xml/src/main/resources/desc/type/XmlStructure.xml @@ -1,673 +1,123 @@ - - - - - XML - - - - - - - - - - - + ${version} - - - - - Ubiquitous Knowledge Processing (UKP) Lab, Technische Universität Darmstadt - - - - - - - - - - - - - - - org.dkpro.core.api.xml.type.XmlElement - - - - - XML element - - - - - org.dkpro.core.api.xml.type.XmlNode - - - - - - - - - - - - - - - uri - - - - - Namespace URI of the element. - - - - - uima.cas.String - - - - - - - - - - - - - - - localName - - - - - Local name of the XML element. - - - - - uima.cas.String - - - - - - - - - - - - - - - attributes - - - - - Array of attributes of the XML element. - - - - - uima.cas.FSArray - - - - - org.dkpro.core.api.xml.type.XmlAttribute - - - - - - - - - - - - - - - children - - - - - Children of this XML element. - - - - - uima.cas.FSArray - - - - - org.dkpro.core.api.xml.type.XmlNode - - - - - - - - - - - - - - - qName - - - - - - - - - - - + uima.cas.String - - - - - - - - - - - - - - - - - - - - - - - - - org.dkpro.core.api.xml.type.XmlAttribute - - - - - - - - - - - + uima.cas.TOP - - - - - - - - - - - - - - - uri - - - - - Namespace URI of the attribute. - - - - - uima.cas.String - - - - - - - - - - - - - - - localName - - - - - Local name of the attribute. - - - - - uima.cas.String - - - - - - - - - - - - - - - value - - - - - Value of the XML attribute. - - - - - uima.cas.String - - - - - - - - - - - - - - - qName - - - - - - - - - - - + uima.cas.String - - - - - - - - - - - - - - - valueType - - - - - - - - - - - + uima.cas.String - - - - - - - - - - - - - - - - - - - - - - - - - org.dkpro.core.api.xml.type.XmlNode - - - - - Supertype for XmlElements and XmlTextNodes. - - - - - uima.tcas.Annotation - - - - - - - - - - - - - - - parent - - - - - - - - - - - + org.dkpro.core.api.xml.type.XmlElement - - - - - - - - - - - - - - - - - - - - - - - - - org.dkpro.core.api.xml.type.XmlDocument - - - - - XML document - - - - - uima.tcas.Annotation - - - - - - - - - - - - - - - root - - - - - Root element of the XML document. - - - - - org.dkpro.core.api.xml.type.XmlElement - - - - - - - - - - - captureRoots - - - + uima.cas.FSArray - org.dkpro.core.api.xml.type.XmlNode - true - - - - - - - - - - - - - - - - org.dkpro.core.api.xml.type.XmlTextNode - - - - - XML text node. - - - - - org.dkpro.core.api.xml.type.XmlNode - - - - - - - - - - - text - - - - - - - + uima.cas.String - - - - - - - - - captured - - - Whether the text node has been added to the document text. - - - uima.cas.Boolean - - - - - - - - - - - - - - - - - - + \ No newline at end of file diff --git a/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/task/curated/CuratedDocumentsProjectExporterPanel.java b/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/task/curated/CuratedDocumentsProjectExporterPanel.java index e71069a43be..bb2e987a91e 100644 --- a/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/task/curated/CuratedDocumentsProjectExporterPanel.java +++ b/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/task/curated/CuratedDocumentsProjectExporterPanel.java @@ -30,7 +30,7 @@ import de.tudarmstadt.ukp.clarin.webanno.support.lambda.LambdaAjaxFormComponentUpdatingBehavior; import de.tudarmstadt.ukp.clarin.webanno.support.lambda.LambdaAjaxLink; import de.tudarmstadt.ukp.clarin.webanno.support.lambda.LambdaBehavior; -import de.tudarmstadt.ukp.clarin.webanno.xmi.XmiFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.inception.project.export.ProjectExportService; import de.tudarmstadt.ukp.inception.project.export.settings.FormatDropdownChoice; import de.tudarmstadt.ukp.inception.project.export.settings.ProjectExporterPanelImplBase; diff --git a/inception/inception-project-export/src/test/java/de/tudarmstadt/ukp/inception/project/export/AnnotationDocumentsExporterTest.java b/inception/inception-project-export/src/test/java/de/tudarmstadt/ukp/inception/project/export/AnnotationDocumentsExporterTest.java index e5e7bad5f10..c217e9f9849 100644 --- a/inception/inception-project-export/src/test/java/de/tudarmstadt/ukp/inception/project/export/AnnotationDocumentsExporterTest.java +++ b/inception/inception-project-export/src/test/java/de/tudarmstadt/ukp/inception/project/export/AnnotationDocumentsExporterTest.java @@ -47,7 +47,6 @@ import de.tudarmstadt.ukp.clarin.webanno.export.model.ExportedSourceDocument; import de.tudarmstadt.ukp.clarin.webanno.model.Project; import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument; -import de.tudarmstadt.ukp.clarin.webanno.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.inception.annotation.storage.CasStorageServiceImpl; import de.tudarmstadt.ukp.inception.annotation.storage.config.CasStorageBackupProperties; import de.tudarmstadt.ukp.inception.annotation.storage.config.CasStorageCachePropertiesImpl; @@ -56,6 +55,7 @@ import de.tudarmstadt.ukp.inception.export.DocumentImportExportServiceImpl; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceProperties; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServicePropertiesImpl; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.inception.schema.AnnotationSchemaService; import de.tudarmstadt.ukp.inception.schema.exporters.AnnotationDocumentExporter; diff --git a/inception/inception-versioning/src/test/java/de/tudarmstadt/ukp/inception/versioning/VersioningServiceImplTest.java b/inception/inception-versioning/src/test/java/de/tudarmstadt/ukp/inception/versioning/VersioningServiceImplTest.java index f7b868d4304..6f61be8eedb 100644 --- a/inception/inception-versioning/src/test/java/de/tudarmstadt/ukp/inception/versioning/VersioningServiceImplTest.java +++ b/inception/inception-versioning/src/test/java/de/tudarmstadt/ukp/inception/versioning/VersioningServiceImplTest.java @@ -61,12 +61,12 @@ import de.tudarmstadt.ukp.clarin.webanno.security.config.SecurityAutoConfiguration; import de.tudarmstadt.ukp.clarin.webanno.security.model.User; import de.tudarmstadt.ukp.clarin.webanno.text.config.TextFormatsAutoConfiguration; -import de.tudarmstadt.ukp.clarin.webanno.xmi.config.UimaFormatsAutoConfiguration; import de.tudarmstadt.ukp.inception.annotation.storage.CasStorageSession; import de.tudarmstadt.ukp.inception.annotation.storage.config.CasStorageServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.curation.config.CurationDocumentServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.documents.config.DocumentServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsAutoConfiguration; import de.tudarmstadt.ukp.inception.schema.config.AnnotationSchemaServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.versioning.config.VersioningServiceAutoConfiguration; From d90e34ca5aff090dc15df6e9d918a2a67e184c7b Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Thu, 15 Jun 2023 19:22:09 +0200 Subject: [PATCH 2/2] #4058 - Cannot export texts containing certain characters as UIMA CAS XMI - Enable sanitziing of illegal characters on export for XMI formats - Added option to control if sanitation happens or not - Updated documentation of XMI formats --- .../export/CuratedDocumentsExporterTest.java | 6 +- .../DocumentImportExportServiceImpl.java | 9 ++- ...tImportExportServiceAutoConfiguration.java | 6 +- .../DocumentImportExportServiceImplTest.java | 8 ++- inception/inception-io-xmi/pom.xml | 4 ++ .../inception/io/xmi/XmiFormatSupport.java | 18 ++++- .../io/xmi/XmiXml11FormatSupport.java | 18 ++++- .../config/UimaFormatsAutoConfiguration.java | 10 +-- .../io/xmi/config/UimaFormatsProperties.java | 27 +++++++ .../xmi/config/UimaFormatsPropertiesImpl.java | 71 +++++++++++++++++++ .../asciidoc/user-guide/formats-uimaxmi.adoc | 9 ++- .../AnnotationDocumentsExporterTest.java | 6 +- inception/inception-search-mtas/pom.xml | 5 ++ .../index/mtas/MtasDocumentIndexTest.java | 8 +++ .../search/index/mtas/MtasUpgradeTest.java | 8 +++ 15 files changed, 192 insertions(+), 21 deletions(-) create mode 100644 inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsProperties.java create mode 100644 inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsPropertiesImpl.java diff --git a/inception/inception-curation/src/test/java/de/tudarmstadt/ukp/inception/curation/export/CuratedDocumentsExporterTest.java b/inception/inception-curation/src/test/java/de/tudarmstadt/ukp/inception/curation/export/CuratedDocumentsExporterTest.java index 1ce83dfb208..3e69c33c985 100644 --- a/inception/inception-curation/src/test/java/de/tudarmstadt/ukp/inception/curation/export/CuratedDocumentsExporterTest.java +++ b/inception/inception-curation/src/test/java/de/tudarmstadt/ukp/inception/curation/export/CuratedDocumentsExporterTest.java @@ -60,6 +60,7 @@ import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceProperties; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServicePropertiesImpl; import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; import de.tudarmstadt.ukp.inception.project.export.ProjectExportServiceImpl; import de.tudarmstadt.ukp.inception.schema.AnnotationSchemaService; @@ -103,9 +104,10 @@ public void setUp() throws Exception casStorageService = spy(new CasStorageServiceImpl(driver, new CasStorageCachePropertiesImpl(), null, schemaService)); + var xmiFormatSupport = new XmiFormatSupport(new XmiFormatProperties()); importExportSerivce = new DocumentImportExportServiceImpl(repositoryProperties, - asList(new XmiFormatSupport()), casStorageService, schemaService, properties, - checksRegistry, repairsRegistry); + asList(xmiFormatSupport), casStorageService, schemaService, properties, + checksRegistry, repairsRegistry, xmiFormatSupport); // Dynamically generate a SourceDocument with an incrementing ID when asked for one when(documentService.getSourceDocument(any(), any())).then(invocation -> { diff --git a/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImpl.java b/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImpl.java index 69dbd9b4396..76c0f80f6f5 100644 --- a/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImpl.java +++ b/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImpl.java @@ -98,7 +98,6 @@ import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceProperties; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceProperties.CasDoctorOnImportPolicy; -import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.inception.schema.AnnotationSchemaService; import it.unimi.dsi.fastutil.ints.IntArrayList; @@ -128,6 +127,8 @@ public class DocumentImportExportServiceImpl private final ChecksRegistry checksRegistry; private final RepairsRegistry repairsRegistry; + private final FormatSupport fallbackFormat; + private final List formatsProxy; private Map formats; @@ -137,7 +138,8 @@ public DocumentImportExportServiceImpl(RepositoryProperties aRepositoryPropertie @Lazy @Autowired(required = false) List aFormats, CasStorageService aCasStorageService, AnnotationSchemaService aAnnotationService, DocumentImportExportServiceProperties aServiceProperties, - ChecksRegistry aChecksRegistry, RepairsRegistry aRepairsRegistry) + ChecksRegistry aChecksRegistry, RepairsRegistry aRepairsRegistry, + FormatSupport aFallbackFormat) { repositoryProperties = aRepositoryProperties; casStorageService = aCasStorageService; @@ -146,6 +148,7 @@ public DocumentImportExportServiceImpl(RepositoryProperties aRepositoryPropertie properties = aServiceProperties; checksRegistry = aChecksRegistry; repairsRegistry = aRepairsRegistry; + fallbackFormat = aFallbackFormat; schemaTypeSystem = createTypeSystemDescription( "de/tudarmstadt/ukp/clarin/webanno/api/type/schema-types"); @@ -203,7 +206,7 @@ public List getFormats() @Override public FormatSupport getFallbackFormat() { - return new XmiFormatSupport(); + return fallbackFormat; } @Override diff --git a/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/config/DocumentImportExportServiceAutoConfiguration.java b/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/config/DocumentImportExportServiceAutoConfiguration.java index 45db2bf2620..a579914fb2e 100644 --- a/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/config/DocumentImportExportServiceAutoConfiguration.java +++ b/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/config/DocumentImportExportServiceAutoConfiguration.java @@ -35,6 +35,7 @@ import de.tudarmstadt.ukp.inception.export.exporters.ProjectLogExporter; import de.tudarmstadt.ukp.inception.export.exporters.ProjectMetaInfExporter; import de.tudarmstadt.ukp.inception.export.exporters.ProjectSettingsExporter; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.inception.schema.AnnotationSchemaService; @Configuration @@ -47,11 +48,12 @@ public DocumentImportExportService documentImportExportService( @Lazy @Autowired(required = false) List aFormats, CasStorageService aCasStorageService, AnnotationSchemaService aAnnotationService, DocumentImportExportServiceProperties aServiceProperties, - ChecksRegistry aChecksRegistry, RepairsRegistry aRepairsRegistry) + ChecksRegistry aChecksRegistry, RepairsRegistry aRepairsRegistry, + XmiFormatSupport fallbackFormat) { return new DocumentImportExportServiceImpl(aRepositoryProperties, aFormats, aCasStorageService, aAnnotationService, aServiceProperties, aChecksRegistry, - aRepairsRegistry); + aRepairsRegistry, fallbackFormat); } @Bean diff --git a/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImplTest.java b/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImplTest.java index e1e3fef567b..67ac9d09d4b 100644 --- a/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImplTest.java +++ b/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImplTest.java @@ -86,6 +86,7 @@ import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceProperties; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServicePropertiesImpl; import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; import de.tudarmstadt.ukp.inception.schema.AnnotationSchemaService; import de.tudarmstadt.ukp.inception.schema.service.AnnotationSchemaServiceImpl; @@ -124,9 +125,10 @@ public void setup() throws Exception var storageService = new CasStorageServiceImpl(driver, new CasStorageCachePropertiesImpl(), null, null); - sut = new DocumentImportExportServiceImpl(repositoryProperties, - List.of(new XmiFormatSupport()), storageService, schemaService, properties, - checksRegistry, repairsRegistry); + var xmiFormatSupport = new XmiFormatSupport(new XmiFormatProperties()); + sut = new DocumentImportExportServiceImpl(repositoryProperties, List.of(xmiFormatSupport), + storageService, schemaService, properties, checksRegistry, repairsRegistry, + xmiFormatSupport); sut.onContextRefreshedEvent(); doReturn(emptyList()).when(schemaService).listAnnotationLayer(any()); diff --git a/inception/inception-io-xmi/pom.xml b/inception/inception-io-xmi/pom.xml index d1b418bed7f..3df14342923 100644 --- a/inception/inception-io-xmi/pom.xml +++ b/inception/inception-io-xmi/pom.xml @@ -37,6 +37,10 @@ org.springframework spring-context + + org.springframework.boot + spring-boot + org.springframework.boot spring-boot-autoconfigure diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiFormatSupport.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiFormatSupport.java index 758eb4e0cf3..1807a2d794e 100644 --- a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiFormatSupport.java +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiFormatSupport.java @@ -31,6 +31,7 @@ import de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport; import de.tudarmstadt.ukp.clarin.webanno.model.Project; import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; /** *

@@ -44,6 +45,13 @@ public class XmiFormatSupport public static final String ID = "xmi"; public static final String NAME = "UIMA CAS XMI (XML 1.0)"; + private final XmiFormatProperties properties; + + public XmiFormatSupport(XmiFormatProperties aProperties) + { + properties = aProperties; + } + @Override public String getId() { @@ -79,7 +87,9 @@ public CollectionReaderDescription getReaderDescription(Project aProject, TypeSystemDescription aTSD) throws ResourceInitializationException { - return createReaderDescription(XmiReader.class, XmiReader.PARAM_LENIENT, true); + return createReaderDescription( // + XmiReader.class, // + XmiReader.PARAM_LENIENT, true); } @Override @@ -87,6 +97,10 @@ public AnalysisEngineDescription getWriterDescription(Project aProject, TypeSystemDescription aTSD, CAS aCAS) throws ResourceInitializationException { - return createEngineDescription(XmiWriter.class, aTSD, XmiWriter.PARAM_VERSION, "1.0"); + return createEngineDescription( // + XmiWriter.class, aTSD, // + XmiWriter.PARAM_VERSION, "1.0", // + XmiWriter.PARAM_SANITIZE_ILLEGAL_CHARACTERS, + properties.isSanitizeIllegalCharacters()); } } diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiXml11FormatSupport.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiXml11FormatSupport.java index 7cdfe7065e1..473c404ced7 100644 --- a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiXml11FormatSupport.java +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiXml11FormatSupport.java @@ -31,6 +31,7 @@ import de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport; import de.tudarmstadt.ukp.clarin.webanno.model.Project; import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; /** *

@@ -44,6 +45,13 @@ public class XmiXml11FormatSupport public static final String ID = "xmi-xml1.1"; public static final String NAME = "UIMA CAS XMI (XML 1.1)"; + private final XmiFormatProperties properties; + + public XmiXml11FormatSupport(XmiFormatProperties aProperties) + { + properties = aProperties; + } + @Override public String getId() { @@ -79,7 +87,9 @@ public CollectionReaderDescription getReaderDescription(Project aProject, TypeSystemDescription aTSD) throws ResourceInitializationException { - return createReaderDescription(XmiReader.class, XmiReader.PARAM_LENIENT, true); + return createReaderDescription( // + XmiReader.class, // + XmiReader.PARAM_LENIENT, true); } @Override @@ -87,6 +97,10 @@ public AnalysisEngineDescription getWriterDescription(Project aProject, TypeSystemDescription aTSD, CAS aCAS) throws ResourceInitializationException { - return createEngineDescription(XmiWriter.class, aTSD, XmiWriter.PARAM_VERSION, "1.1"); + return createEngineDescription( // + XmiWriter.class, aTSD, // + XmiWriter.PARAM_VERSION, "1.1", // + XmiWriter.PARAM_SANITIZE_ILLEGAL_CHARACTERS, + properties.isSanitizeIllegalCharacters()); } } diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsAutoConfiguration.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsAutoConfiguration.java index 1672feb7fc7..79512c4117d 100644 --- a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsAutoConfiguration.java +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsAutoConfiguration.java @@ -18,6 +18,7 @@ package de.tudarmstadt.ukp.inception.io.xmi.config; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -27,6 +28,7 @@ import de.tudarmstadt.ukp.inception.io.xmi.XmiXml11FormatSupport; @Configuration +@EnableConfigurationProperties(UimaFormatsPropertiesImpl.class) public class UimaFormatsAutoConfiguration { @ConditionalOnProperty(prefix = "format.uima-binary-cas", name = "enabled", // @@ -40,17 +42,17 @@ public BinaryCasFormatSupport binaryCasFormatSupport() @ConditionalOnProperty(prefix = "format.uima-xmi-xml1_1", name = "enabled", // havingValue = "true", matchIfMissing = true) @Bean - public XmiXml11FormatSupport xmiXml11FormatSupport() + public XmiXml11FormatSupport xmiXml11FormatSupport(UimaFormatsProperties aProperties) { - return new XmiXml11FormatSupport(); + return new XmiXml11FormatSupport(aProperties.getUimaXmiXml1_1()); } @ConditionalOnProperty(prefix = "format.uima-xmi", name = "enabled", // havingValue = "true", matchIfMissing = true) @Bean - public XmiFormatSupport xmiFormatSupport() + public XmiFormatSupport xmiFormatSupport(UimaFormatsProperties aProperties) { - return new XmiFormatSupport(); + return new XmiFormatSupport(aProperties.getUimaXmi()); } @ConditionalOnProperty(prefix = "format.uima-inline-xml", name = "enabled", // diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsProperties.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsProperties.java new file mode 100644 index 00000000000..4523527aa0f --- /dev/null +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsProperties.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.xmi.config; + +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; + +public interface UimaFormatsProperties +{ + XmiFormatProperties getUimaXmiXml1_1(); + + XmiFormatProperties getUimaXmi(); +} diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsPropertiesImpl.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsPropertiesImpl.java new file mode 100644 index 00000000000..2083e322ea7 --- /dev/null +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsPropertiesImpl.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.xmi.config; + +import org.springframework.boot.context.properties.ConfigurationProperties; + +/** + *

+ * This class is exposed as a Spring Component via {@link UimaFormatsAutoConfiguration}. + *

+ */ +@ConfigurationProperties("format") +public class UimaFormatsPropertiesImpl + implements UimaFormatsProperties +{ + private XmiFormatProperties uimaXmi = new XmiFormatProperties(); + private XmiFormatProperties uimaXmiXml1_1 = new XmiFormatProperties(); + + @Override + public XmiFormatProperties getUimaXmi() + { + return uimaXmi; + } + + public void setUimaXmi(XmiFormatProperties aUimaXmi) + { + uimaXmi = aUimaXmi; + } + + @Override + public XmiFormatProperties getUimaXmiXml1_1() + { + return uimaXmiXml1_1; + } + + public void setUimaXmiXml1_1(XmiFormatProperties aUimaXmiXml1_1) + { + uimaXmiXml1_1 = aUimaXmiXml1_1; + } + + public static class XmiFormatProperties + { + private boolean sanitizeIllegalCharacters = true; + + public void setSanitizeIllegalCharacters(boolean aSanitizeIllegalCharacters) + { + sanitizeIllegalCharacters = aSanitizeIllegalCharacters; + } + + public boolean isSanitizeIllegalCharacters() + { + return sanitizeIllegalCharacters; + } + + } +} diff --git a/inception/inception-io-xmi/src/main/resources/META-INF/asciidoc/user-guide/formats-uimaxmi.adoc b/inception/inception-io-xmi/src/main/resources/META-INF/asciidoc/user-guide/formats-uimaxmi.adoc index 8f261395805..b0a0d67dd7d 100644 --- a/inception/inception-io-xmi/src/main/resources/META-INF/asciidoc/user-guide/formats-uimaxmi.adoc +++ b/inception/inception-io-xmi/src/main/resources/META-INF/asciidoc/user-guide/formats-uimaxmi.adoc @@ -20,7 +20,14 @@ The probably most commonly used formats supported by the Apache UIMA framework is UIMA CAS XMI. It is able to capture all the information contained in the CAS. This is the de-facto standard for exchanging data in the UIMA world. Most UIMA-related tools support it. -The XMI format does not include type system information. When exporting files in the XMI format, a ZIP file is created for each document which contains the XMI file itself as well as an XML file containing the type system. +The XMI format does not include type system information. When exporting files in the XMI format, a ZIP file is created for each document which contains the XMI file itself as well as an XML file containing the type system. In order to import such files +again, the ZIPs would need to be extracted and only the XMI files contained within should be imported. + +XML 1.0 and XML 1.1 do not allow all Unicode characters. In particular, certain control characters are not permitted. +{product-name} by default will replace illegal characters with a space character on export. This behavior can be +disabled using the boolean properties `format.uima-xmi.sanitize-illegal-characters` and +`format.uima-xmi-xml1_1.sanitize-illegal-characters`. When disabled, an error is produced when trying to export texts +containing illegal characters. There are two flavors of CAS XMI, namely link:http://www.w3.org/TR/2006/REC-xml-20060816/Overview.html[XML 1.0] and link:http://www.w3.org/TR/xml11/Overview.html[XML 1.1]. XML 1.0 is more widely supported in the world of XML parsers, so you may expect better interoperability with other programming languages diff --git a/inception/inception-project-export/src/test/java/de/tudarmstadt/ukp/inception/project/export/AnnotationDocumentsExporterTest.java b/inception/inception-project-export/src/test/java/de/tudarmstadt/ukp/inception/project/export/AnnotationDocumentsExporterTest.java index c217e9f9849..5d8e08c450e 100644 --- a/inception/inception-project-export/src/test/java/de/tudarmstadt/ukp/inception/project/export/AnnotationDocumentsExporterTest.java +++ b/inception/inception-project-export/src/test/java/de/tudarmstadt/ukp/inception/project/export/AnnotationDocumentsExporterTest.java @@ -56,6 +56,7 @@ import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceProperties; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServicePropertiesImpl; import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; import de.tudarmstadt.ukp.inception.schema.AnnotationSchemaService; import de.tudarmstadt.ukp.inception.schema.exporters.AnnotationDocumentExporter; @@ -99,9 +100,10 @@ public void setUp() throws Exception casStorageService = new CasStorageServiceImpl(driver, new CasStorageCachePropertiesImpl(), null, schemaService); + var xmiFormatSupport = new XmiFormatSupport(new XmiFormatProperties()); importExportSerivce = new DocumentImportExportServiceImpl(repositoryProperties, - asList(new XmiFormatSupport()), casStorageService, schemaService, properties, - checksRegistry, repairsRegistry); + asList(xmiFormatSupport), casStorageService, schemaService, properties, + checksRegistry, repairsRegistry, xmiFormatSupport); sut = new AnnotationDocumentExporter(documentService, null, importExportSerivce, repositoryProperties); diff --git a/inception/inception-search-mtas/pom.xml b/inception/inception-search-mtas/pom.xml index 10d62bc56b2..c2015fb1b18 100644 --- a/inception/inception-search-mtas/pom.xml +++ b/inception/inception-search-mtas/pom.xml @@ -224,6 +224,11 @@ inception-project-initializers test + + de.tudarmstadt.ukp.inception.app + inception-io-xmi + test + de.tudarmstadt.ukp.inception.app inception-kb diff --git a/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasDocumentIndexTest.java b/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasDocumentIndexTest.java index 0b64b49f5df..5e19eab2a90 100644 --- a/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasDocumentIndexTest.java +++ b/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasDocumentIndexTest.java @@ -82,6 +82,8 @@ import de.tudarmstadt.ukp.inception.annotation.storage.config.CasStorageServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.documents.config.DocumentServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; import de.tudarmstadt.ukp.inception.kb.config.KnowledgeBaseServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.preferences.config.PreferencesServiceAutoConfig; import de.tudarmstadt.ukp.inception.scheduling.config.SchedulingServiceAutoConfiguration; @@ -721,5 +723,11 @@ public ApplicationContextProvider contextProvider() { return new ApplicationContextProvider(); } + + @Bean + public XmiFormatSupport xmiFormatSupport() + { + return new XmiFormatSupport(new XmiFormatProperties()); + } } } diff --git a/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasUpgradeTest.java b/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasUpgradeTest.java index 567dec41e77..4f194890802 100644 --- a/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasUpgradeTest.java +++ b/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasUpgradeTest.java @@ -54,6 +54,8 @@ import de.tudarmstadt.ukp.inception.annotation.storage.config.CasStorageServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.documents.config.DocumentServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; import de.tudarmstadt.ukp.inception.preferences.config.PreferencesServiceAutoConfig; import de.tudarmstadt.ukp.inception.scheduling.config.SchedulingServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.schema.config.AnnotationSchemaServiceAutoConfiguration; @@ -186,5 +188,11 @@ RepositoryProperties repositoryProperties() props.setPath(new File(WORK_DIR)); return props; } + + @Bean + public XmiFormatSupport xmiFormatSupport() + { + return new XmiFormatSupport(new XmiFormatProperties()); + } } }