diff --git a/inception/inception-curation/src/test/java/de/tudarmstadt/ukp/inception/curation/export/CuratedDocumentsExporterTest.java b/inception/inception-curation/src/test/java/de/tudarmstadt/ukp/inception/curation/export/CuratedDocumentsExporterTest.java index 089a683d9fd..3e69c33c985 100644 --- a/inception/inception-curation/src/test/java/de/tudarmstadt/ukp/inception/curation/export/CuratedDocumentsExporterTest.java +++ b/inception/inception-curation/src/test/java/de/tudarmstadt/ukp/inception/curation/export/CuratedDocumentsExporterTest.java @@ -50,7 +50,6 @@ import de.tudarmstadt.ukp.clarin.webanno.export.model.ExportedProject; import de.tudarmstadt.ukp.clarin.webanno.model.Project; import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument; -import de.tudarmstadt.ukp.clarin.webanno.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.inception.annotation.storage.CasStorageServiceImpl; import de.tudarmstadt.ukp.inception.annotation.storage.config.CasStorageBackupProperties; import de.tudarmstadt.ukp.inception.annotation.storage.config.CasStorageCachePropertiesImpl; @@ -60,6 +59,8 @@ import de.tudarmstadt.ukp.inception.export.DocumentImportExportServiceImpl; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceProperties; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServicePropertiesImpl; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; import de.tudarmstadt.ukp.inception.project.export.ProjectExportServiceImpl; import de.tudarmstadt.ukp.inception.schema.AnnotationSchemaService; @@ -103,9 +104,10 @@ public void setUp() throws Exception casStorageService = spy(new CasStorageServiceImpl(driver, new CasStorageCachePropertiesImpl(), null, schemaService)); + var xmiFormatSupport = new XmiFormatSupport(new XmiFormatProperties()); importExportSerivce = new DocumentImportExportServiceImpl(repositoryProperties, - asList(new XmiFormatSupport()), casStorageService, schemaService, properties, - checksRegistry, repairsRegistry); + asList(xmiFormatSupport), casStorageService, schemaService, properties, + checksRegistry, repairsRegistry, xmiFormatSupport); // Dynamically generate a SourceDocument with an incrementing ID when asked for one when(documentService.getSourceDocument(any(), any())).then(invocation -> { diff --git a/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImpl.java b/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImpl.java index 531d4f86af7..76c0f80f6f5 100644 --- a/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImpl.java +++ b/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImpl.java @@ -90,7 +90,6 @@ import de.tudarmstadt.ukp.clarin.webanno.model.TagSet; import de.tudarmstadt.ukp.clarin.webanno.support.logging.BaseLoggers; import de.tudarmstadt.ukp.clarin.webanno.support.logging.LogMessage; -import de.tudarmstadt.ukp.clarin.webanno.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; @@ -128,6 +127,8 @@ public class DocumentImportExportServiceImpl private final ChecksRegistry checksRegistry; private final RepairsRegistry repairsRegistry; + private final FormatSupport fallbackFormat; + private final List formatsProxy; private Map formats; @@ -137,7 +138,8 @@ public DocumentImportExportServiceImpl(RepositoryProperties aRepositoryPropertie @Lazy @Autowired(required = false) List aFormats, CasStorageService aCasStorageService, AnnotationSchemaService aAnnotationService, DocumentImportExportServiceProperties aServiceProperties, - ChecksRegistry aChecksRegistry, RepairsRegistry aRepairsRegistry) + ChecksRegistry aChecksRegistry, RepairsRegistry aRepairsRegistry, + FormatSupport aFallbackFormat) { repositoryProperties = aRepositoryProperties; casStorageService = aCasStorageService; @@ -146,6 +148,7 @@ public DocumentImportExportServiceImpl(RepositoryProperties aRepositoryPropertie properties = aServiceProperties; checksRegistry = aChecksRegistry; repairsRegistry = aRepairsRegistry; + fallbackFormat = aFallbackFormat; schemaTypeSystem = createTypeSystemDescription( "de/tudarmstadt/ukp/clarin/webanno/api/type/schema-types"); @@ -203,7 +206,7 @@ public List getFormats() @Override public FormatSupport getFallbackFormat() { - return new XmiFormatSupport(); + return fallbackFormat; } @Override diff --git a/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/config/DocumentImportExportServiceAutoConfiguration.java b/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/config/DocumentImportExportServiceAutoConfiguration.java index 45db2bf2620..a579914fb2e 100644 --- a/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/config/DocumentImportExportServiceAutoConfiguration.java +++ b/inception/inception-export/src/main/java/de/tudarmstadt/ukp/inception/export/config/DocumentImportExportServiceAutoConfiguration.java @@ -35,6 +35,7 @@ import de.tudarmstadt.ukp.inception.export.exporters.ProjectLogExporter; import de.tudarmstadt.ukp.inception.export.exporters.ProjectMetaInfExporter; import de.tudarmstadt.ukp.inception.export.exporters.ProjectSettingsExporter; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.inception.schema.AnnotationSchemaService; @Configuration @@ -47,11 +48,12 @@ public DocumentImportExportService documentImportExportService( @Lazy @Autowired(required = false) List aFormats, CasStorageService aCasStorageService, AnnotationSchemaService aAnnotationService, DocumentImportExportServiceProperties aServiceProperties, - ChecksRegistry aChecksRegistry, RepairsRegistry aRepairsRegistry) + ChecksRegistry aChecksRegistry, RepairsRegistry aRepairsRegistry, + XmiFormatSupport fallbackFormat) { return new DocumentImportExportServiceImpl(aRepositoryProperties, aFormats, aCasStorageService, aAnnotationService, aServiceProperties, aChecksRegistry, - aRepairsRegistry); + aRepairsRegistry, fallbackFormat); } @Bean diff --git a/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImplTest.java b/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImplTest.java index 9a8ae4391ff..67ac9d09d4b 100644 --- a/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImplTest.java +++ b/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/DocumentImportExportServiceImplTest.java @@ -76,7 +76,6 @@ import de.tudarmstadt.ukp.clarin.webanno.model.Project; import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument; import de.tudarmstadt.ukp.clarin.webanno.support.logging.Logging; -import de.tudarmstadt.ukp.clarin.webanno.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; import de.tudarmstadt.ukp.inception.annotation.storage.CasStorageServiceImpl; import de.tudarmstadt.ukp.inception.annotation.storage.CasStorageSession; @@ -86,6 +85,8 @@ import de.tudarmstadt.ukp.inception.annotation.storage.driver.filesystem.FileSystemCasStorageDriver; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceProperties; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServicePropertiesImpl; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; import de.tudarmstadt.ukp.inception.schema.AnnotationSchemaService; import de.tudarmstadt.ukp.inception.schema.service.AnnotationSchemaServiceImpl; @@ -124,9 +125,10 @@ public void setup() throws Exception var storageService = new CasStorageServiceImpl(driver, new CasStorageCachePropertiesImpl(), null, null); - sut = new DocumentImportExportServiceImpl(repositoryProperties, - List.of(new XmiFormatSupport()), storageService, schemaService, properties, - checksRegistry, repairsRegistry); + var xmiFormatSupport = new XmiFormatSupport(new XmiFormatProperties()); + sut = new DocumentImportExportServiceImpl(repositoryProperties, List.of(xmiFormatSupport), + storageService, schemaService, properties, checksRegistry, repairsRegistry, + xmiFormatSupport); sut.onContextRefreshedEvent(); doReturn(emptyList()).when(schemaService).listAnnotationLayer(any()); diff --git a/inception/inception-io-xmi/pom.xml b/inception/inception-io-xmi/pom.xml index d1b418bed7f..3df14342923 100644 --- a/inception/inception-io-xmi/pom.xml +++ b/inception/inception-io-xmi/pom.xml @@ -37,6 +37,10 @@ org.springframework spring-context + + org.springframework.boot + spring-boot + org.springframework.boot spring-boot-autoconfigure diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/BinaryCasFormatSupport.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/BinaryCasFormatSupport.java similarity index 95% rename from inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/BinaryCasFormatSupport.java rename to inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/BinaryCasFormatSupport.java index afa9721e7d0..9b72dc08c88 100644 --- a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/BinaryCasFormatSupport.java +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/BinaryCasFormatSupport.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.clarin.webanno.xmi; +package de.tudarmstadt.ukp.inception.io.xmi; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; @@ -30,7 +30,7 @@ import de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport; import de.tudarmstadt.ukp.clarin.webanno.model.Project; -import de.tudarmstadt.ukp.clarin.webanno.xmi.config.UimaFormatsAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsAutoConfiguration; /** *

diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/UimaInlineXmlFormatSupport.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/UimaInlineXmlFormatSupport.java similarity index 97% rename from inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/UimaInlineXmlFormatSupport.java rename to inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/UimaInlineXmlFormatSupport.java index d672caa704a..d48a3ef9cdc 100644 --- a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/UimaInlineXmlFormatSupport.java +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/UimaInlineXmlFormatSupport.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.clarin.webanno.xmi; +package de.tudarmstadt.ukp.inception.io.xmi; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/XmiFormatSupport.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiFormatSupport.java similarity index 77% rename from inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/XmiFormatSupport.java rename to inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiFormatSupport.java index f00ad21b1d4..1807a2d794e 100644 --- a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/XmiFormatSupport.java +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiFormatSupport.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.clarin.webanno.xmi; +package de.tudarmstadt.ukp.inception.io.xmi; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; @@ -30,7 +30,8 @@ import de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport; import de.tudarmstadt.ukp.clarin.webanno.model.Project; -import de.tudarmstadt.ukp.clarin.webanno.xmi.config.UimaFormatsAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; /** *

@@ -44,6 +45,13 @@ public class XmiFormatSupport public static final String ID = "xmi"; public static final String NAME = "UIMA CAS XMI (XML 1.0)"; + private final XmiFormatProperties properties; + + public XmiFormatSupport(XmiFormatProperties aProperties) + { + properties = aProperties; + } + @Override public String getId() { @@ -79,7 +87,9 @@ public CollectionReaderDescription getReaderDescription(Project aProject, TypeSystemDescription aTSD) throws ResourceInitializationException { - return createReaderDescription(XmiReader.class, XmiReader.PARAM_LENIENT, true); + return createReaderDescription( // + XmiReader.class, // + XmiReader.PARAM_LENIENT, true); } @Override @@ -87,6 +97,10 @@ public AnalysisEngineDescription getWriterDescription(Project aProject, TypeSystemDescription aTSD, CAS aCAS) throws ResourceInitializationException { - return createEngineDescription(XmiWriter.class, aTSD, XmiWriter.PARAM_VERSION, "1.0"); + return createEngineDescription( // + XmiWriter.class, aTSD, // + XmiWriter.PARAM_VERSION, "1.0", // + XmiWriter.PARAM_SANITIZE_ILLEGAL_CHARACTERS, + properties.isSanitizeIllegalCharacters()); } } diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/XmiXml11FormatSupport.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiXml11FormatSupport.java similarity index 77% rename from inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/XmiXml11FormatSupport.java rename to inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiXml11FormatSupport.java index 7164e1eb790..473c404ced7 100644 --- a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/XmiXml11FormatSupport.java +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/XmiXml11FormatSupport.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.clarin.webanno.xmi; +package de.tudarmstadt.ukp.inception.io.xmi; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; @@ -30,7 +30,8 @@ import de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport; import de.tudarmstadt.ukp.clarin.webanno.model.Project; -import de.tudarmstadt.ukp.clarin.webanno.xmi.config.UimaFormatsAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; /** *

@@ -44,6 +45,13 @@ public class XmiXml11FormatSupport public static final String ID = "xmi-xml1.1"; public static final String NAME = "UIMA CAS XMI (XML 1.1)"; + private final XmiFormatProperties properties; + + public XmiXml11FormatSupport(XmiFormatProperties aProperties) + { + properties = aProperties; + } + @Override public String getId() { @@ -79,7 +87,9 @@ public CollectionReaderDescription getReaderDescription(Project aProject, TypeSystemDescription aTSD) throws ResourceInitializationException { - return createReaderDescription(XmiReader.class, XmiReader.PARAM_LENIENT, true); + return createReaderDescription( // + XmiReader.class, // + XmiReader.PARAM_LENIENT, true); } @Override @@ -87,6 +97,10 @@ public AnalysisEngineDescription getWriterDescription(Project aProject, TypeSystemDescription aTSD, CAS aCAS) throws ResourceInitializationException { - return createEngineDescription(XmiWriter.class, aTSD, XmiWriter.PARAM_VERSION, "1.1"); + return createEngineDescription( // + XmiWriter.class, aTSD, // + XmiWriter.PARAM_VERSION, "1.1", // + XmiWriter.PARAM_SANITIZE_ILLEGAL_CHARACTERS, + properties.isSanitizeIllegalCharacters()); } } diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/config/UimaFormatsAutoConfiguration.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsAutoConfiguration.java similarity index 71% rename from inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/config/UimaFormatsAutoConfiguration.java rename to inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsAutoConfiguration.java index e1263146668..79512c4117d 100644 --- a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/config/UimaFormatsAutoConfiguration.java +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsAutoConfiguration.java @@ -15,18 +15,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.clarin.webanno.xmi.config; +package de.tudarmstadt.ukp.inception.io.xmi.config; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import de.tudarmstadt.ukp.clarin.webanno.xmi.BinaryCasFormatSupport; -import de.tudarmstadt.ukp.clarin.webanno.xmi.UimaInlineXmlFormatSupport; -import de.tudarmstadt.ukp.clarin.webanno.xmi.XmiFormatSupport; -import de.tudarmstadt.ukp.clarin.webanno.xmi.XmiXml11FormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.BinaryCasFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.UimaInlineXmlFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.XmiXml11FormatSupport; @Configuration +@EnableConfigurationProperties(UimaFormatsPropertiesImpl.class) public class UimaFormatsAutoConfiguration { @ConditionalOnProperty(prefix = "format.uima-binary-cas", name = "enabled", // @@ -40,17 +42,17 @@ public BinaryCasFormatSupport binaryCasFormatSupport() @ConditionalOnProperty(prefix = "format.uima-xmi-xml1_1", name = "enabled", // havingValue = "true", matchIfMissing = true) @Bean - public XmiXml11FormatSupport xmiXml11FormatSupport() + public XmiXml11FormatSupport xmiXml11FormatSupport(UimaFormatsProperties aProperties) { - return new XmiXml11FormatSupport(); + return new XmiXml11FormatSupport(aProperties.getUimaXmiXml1_1()); } @ConditionalOnProperty(prefix = "format.uima-xmi", name = "enabled", // havingValue = "true", matchIfMissing = true) @Bean - public XmiFormatSupport xmiFormatSupport() + public XmiFormatSupport xmiFormatSupport(UimaFormatsProperties aProperties) { - return new XmiFormatSupport(); + return new XmiFormatSupport(aProperties.getUimaXmi()); } @ConditionalOnProperty(prefix = "format.uima-inline-xml", name = "enabled", // diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/package-info.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsProperties.java similarity index 74% rename from inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/package-info.java rename to inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsProperties.java index ce8b812ff5c..4523527aa0f 100644 --- a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/clarin/webanno/xmi/package-info.java +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsProperties.java @@ -15,10 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +package de.tudarmstadt.ukp.inception.io.xmi.config; -/** - * Support for (de)serializing the CAS to/from XMI files. - * - * @since 1.1.0 - */ -package de.tudarmstadt.ukp.clarin.webanno.xmi; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; + +public interface UimaFormatsProperties +{ + XmiFormatProperties getUimaXmiXml1_1(); + + XmiFormatProperties getUimaXmi(); +} diff --git a/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsPropertiesImpl.java b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsPropertiesImpl.java new file mode 100644 index 00000000000..2083e322ea7 --- /dev/null +++ b/inception/inception-io-xmi/src/main/java/de/tudarmstadt/ukp/inception/io/xmi/config/UimaFormatsPropertiesImpl.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.xmi.config; + +import org.springframework.boot.context.properties.ConfigurationProperties; + +/** + *

+ * This class is exposed as a Spring Component via {@link UimaFormatsAutoConfiguration}. + *

+ */ +@ConfigurationProperties("format") +public class UimaFormatsPropertiesImpl + implements UimaFormatsProperties +{ + private XmiFormatProperties uimaXmi = new XmiFormatProperties(); + private XmiFormatProperties uimaXmiXml1_1 = new XmiFormatProperties(); + + @Override + public XmiFormatProperties getUimaXmi() + { + return uimaXmi; + } + + public void setUimaXmi(XmiFormatProperties aUimaXmi) + { + uimaXmi = aUimaXmi; + } + + @Override + public XmiFormatProperties getUimaXmiXml1_1() + { + return uimaXmiXml1_1; + } + + public void setUimaXmiXml1_1(XmiFormatProperties aUimaXmiXml1_1) + { + uimaXmiXml1_1 = aUimaXmiXml1_1; + } + + public static class XmiFormatProperties + { + private boolean sanitizeIllegalCharacters = true; + + public void setSanitizeIllegalCharacters(boolean aSanitizeIllegalCharacters) + { + sanitizeIllegalCharacters = aSanitizeIllegalCharacters; + } + + public boolean isSanitizeIllegalCharacters() + { + return sanitizeIllegalCharacters; + } + + } +} diff --git a/inception/inception-io-xmi/src/main/resources/META-INF/asciidoc/user-guide/formats-uimaxmi.adoc b/inception/inception-io-xmi/src/main/resources/META-INF/asciidoc/user-guide/formats-uimaxmi.adoc index 8f261395805..b0a0d67dd7d 100644 --- a/inception/inception-io-xmi/src/main/resources/META-INF/asciidoc/user-guide/formats-uimaxmi.adoc +++ b/inception/inception-io-xmi/src/main/resources/META-INF/asciidoc/user-guide/formats-uimaxmi.adoc @@ -20,7 +20,14 @@ The probably most commonly used formats supported by the Apache UIMA framework is UIMA CAS XMI. It is able to capture all the information contained in the CAS. This is the de-facto standard for exchanging data in the UIMA world. Most UIMA-related tools support it. -The XMI format does not include type system information. When exporting files in the XMI format, a ZIP file is created for each document which contains the XMI file itself as well as an XML file containing the type system. +The XMI format does not include type system information. When exporting files in the XMI format, a ZIP file is created for each document which contains the XMI file itself as well as an XML file containing the type system. In order to import such files +again, the ZIPs would need to be extracted and only the XMI files contained within should be imported. + +XML 1.0 and XML 1.1 do not allow all Unicode characters. In particular, certain control characters are not permitted. +{product-name} by default will replace illegal characters with a space character on export. This behavior can be +disabled using the boolean properties `format.uima-xmi.sanitize-illegal-characters` and +`format.uima-xmi-xml1_1.sanitize-illegal-characters`. When disabled, an error is produced when trying to export texts +containing illegal characters. There are two flavors of CAS XMI, namely link:http://www.w3.org/TR/2006/REC-xml-20060816/Overview.html[XML 1.0] and link:http://www.w3.org/TR/xml11/Overview.html[XML 1.1]. XML 1.0 is more widely supported in the world of XML parsers, so you may expect better interoperability with other programming languages diff --git a/inception/inception-io-xmi/src/main/resources/META-INF/spring.factories b/inception/inception-io-xmi/src/main/resources/META-INF/spring.factories deleted file mode 100644 index 7f3e2b7c9b2..00000000000 --- a/inception/inception-io-xmi/src/main/resources/META-INF/spring.factories +++ /dev/null @@ -1,2 +0,0 @@ -org.springframework.boot.autoconfigure.EnableAutoConfiguration=\ -de.tudarmstadt.ukp.clarin.webanno.xmi.config.UimaFormatsAutoConfiguration \ No newline at end of file diff --git a/inception/inception-io-xmi/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports b/inception/inception-io-xmi/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports new file mode 100644 index 00000000000..e99ca2decff --- /dev/null +++ b/inception/inception-io-xmi/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports @@ -0,0 +1 @@ +de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsAutoConfiguration diff --git a/inception/inception-io-xml/src/main/resources/desc/type/XmlStructure.xml b/inception/inception-io-xml/src/main/resources/desc/type/XmlStructure.xml index 20aee100293..fcb5edaffac 100644 --- a/inception/inception-io-xml/src/main/resources/desc/type/XmlStructure.xml +++ b/inception/inception-io-xml/src/main/resources/desc/type/XmlStructure.xml @@ -1,673 +1,123 @@ - - - - - XML - - - - - - - - - - - + ${version} - - - - - Ubiquitous Knowledge Processing (UKP) Lab, Technische Universität Darmstadt - - - - - - - - - - - - - - - org.dkpro.core.api.xml.type.XmlElement - - - - - XML element - - - - - org.dkpro.core.api.xml.type.XmlNode - - - - - - - - - - - - - - - uri - - - - - Namespace URI of the element. - - - - - uima.cas.String - - - - - - - - - - - - - - - localName - - - - - Local name of the XML element. - - - - - uima.cas.String - - - - - - - - - - - - - - - attributes - - - - - Array of attributes of the XML element. - - - - - uima.cas.FSArray - - - - - org.dkpro.core.api.xml.type.XmlAttribute - - - - - - - - - - - - - - - children - - - - - Children of this XML element. - - - - - uima.cas.FSArray - - - - - org.dkpro.core.api.xml.type.XmlNode - - - - - - - - - - - - - - - qName - - - - - - - - - - - + uima.cas.String - - - - - - - - - - - - - - - - - - - - - - - - - org.dkpro.core.api.xml.type.XmlAttribute - - - - - - - - - - - + uima.cas.TOP - - - - - - - - - - - - - - - uri - - - - - Namespace URI of the attribute. - - - - - uima.cas.String - - - - - - - - - - - - - - - localName - - - - - Local name of the attribute. - - - - - uima.cas.String - - - - - - - - - - - - - - - value - - - - - Value of the XML attribute. - - - - - uima.cas.String - - - - - - - - - - - - - - - qName - - - - - - - - - - - + uima.cas.String - - - - - - - - - - - - - - - valueType - - - - - - - - - - - + uima.cas.String - - - - - - - - - - - - - - - - - - - - - - - - - org.dkpro.core.api.xml.type.XmlNode - - - - - Supertype for XmlElements and XmlTextNodes. - - - - - uima.tcas.Annotation - - - - - - - - - - - - - - - parent - - - - - - - - - - - + org.dkpro.core.api.xml.type.XmlElement - - - - - - - - - - - - - - - - - - - - - - - - - org.dkpro.core.api.xml.type.XmlDocument - - - - - XML document - - - - - uima.tcas.Annotation - - - - - - - - - - - - - - - root - - - - - Root element of the XML document. - - - - - org.dkpro.core.api.xml.type.XmlElement - - - - - - - - - - - captureRoots - - - + uima.cas.FSArray - org.dkpro.core.api.xml.type.XmlNode - true - - - - - - - - - - - - - - - - org.dkpro.core.api.xml.type.XmlTextNode - - - - - XML text node. - - - - - org.dkpro.core.api.xml.type.XmlNode - - - - - - - - - - - text - - - - - - - + uima.cas.String - - - - - - - - - captured - - - Whether the text node has been added to the document text. - - - uima.cas.Boolean - - - - - - - - - - - - - - - - - - + \ No newline at end of file diff --git a/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/task/curated/CuratedDocumentsProjectExporterPanel.java b/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/task/curated/CuratedDocumentsProjectExporterPanel.java index e71069a43be..bb2e987a91e 100644 --- a/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/task/curated/CuratedDocumentsProjectExporterPanel.java +++ b/inception/inception-project-export/src/main/java/de/tudarmstadt/ukp/inception/project/export/task/curated/CuratedDocumentsProjectExporterPanel.java @@ -30,7 +30,7 @@ import de.tudarmstadt.ukp.clarin.webanno.support.lambda.LambdaAjaxFormComponentUpdatingBehavior; import de.tudarmstadt.ukp.clarin.webanno.support.lambda.LambdaAjaxLink; import de.tudarmstadt.ukp.clarin.webanno.support.lambda.LambdaBehavior; -import de.tudarmstadt.ukp.clarin.webanno.xmi.XmiFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.inception.project.export.ProjectExportService; import de.tudarmstadt.ukp.inception.project.export.settings.FormatDropdownChoice; import de.tudarmstadt.ukp.inception.project.export.settings.ProjectExporterPanelImplBase; diff --git a/inception/inception-project-export/src/test/java/de/tudarmstadt/ukp/inception/project/export/AnnotationDocumentsExporterTest.java b/inception/inception-project-export/src/test/java/de/tudarmstadt/ukp/inception/project/export/AnnotationDocumentsExporterTest.java index e5e7bad5f10..5d8e08c450e 100644 --- a/inception/inception-project-export/src/test/java/de/tudarmstadt/ukp/inception/project/export/AnnotationDocumentsExporterTest.java +++ b/inception/inception-project-export/src/test/java/de/tudarmstadt/ukp/inception/project/export/AnnotationDocumentsExporterTest.java @@ -47,7 +47,6 @@ import de.tudarmstadt.ukp.clarin.webanno.export.model.ExportedSourceDocument; import de.tudarmstadt.ukp.clarin.webanno.model.Project; import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument; -import de.tudarmstadt.ukp.clarin.webanno.xmi.XmiFormatSupport; import de.tudarmstadt.ukp.inception.annotation.storage.CasStorageServiceImpl; import de.tudarmstadt.ukp.inception.annotation.storage.config.CasStorageBackupProperties; import de.tudarmstadt.ukp.inception.annotation.storage.config.CasStorageCachePropertiesImpl; @@ -56,6 +55,8 @@ import de.tudarmstadt.ukp.inception.export.DocumentImportExportServiceImpl; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceProperties; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServicePropertiesImpl; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; import de.tudarmstadt.ukp.inception.schema.AnnotationSchemaService; import de.tudarmstadt.ukp.inception.schema.exporters.AnnotationDocumentExporter; @@ -99,9 +100,10 @@ public void setUp() throws Exception casStorageService = new CasStorageServiceImpl(driver, new CasStorageCachePropertiesImpl(), null, schemaService); + var xmiFormatSupport = new XmiFormatSupport(new XmiFormatProperties()); importExportSerivce = new DocumentImportExportServiceImpl(repositoryProperties, - asList(new XmiFormatSupport()), casStorageService, schemaService, properties, - checksRegistry, repairsRegistry); + asList(xmiFormatSupport), casStorageService, schemaService, properties, + checksRegistry, repairsRegistry, xmiFormatSupport); sut = new AnnotationDocumentExporter(documentService, null, importExportSerivce, repositoryProperties); diff --git a/inception/inception-search-mtas/pom.xml b/inception/inception-search-mtas/pom.xml index 10d62bc56b2..c2015fb1b18 100644 --- a/inception/inception-search-mtas/pom.xml +++ b/inception/inception-search-mtas/pom.xml @@ -224,6 +224,11 @@ inception-project-initializers test
+ + de.tudarmstadt.ukp.inception.app + inception-io-xmi + test + de.tudarmstadt.ukp.inception.app inception-kb diff --git a/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasDocumentIndexTest.java b/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasDocumentIndexTest.java index 0b64b49f5df..5e19eab2a90 100644 --- a/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasDocumentIndexTest.java +++ b/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasDocumentIndexTest.java @@ -82,6 +82,8 @@ import de.tudarmstadt.ukp.inception.annotation.storage.config.CasStorageServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.documents.config.DocumentServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; import de.tudarmstadt.ukp.inception.kb.config.KnowledgeBaseServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.preferences.config.PreferencesServiceAutoConfig; import de.tudarmstadt.ukp.inception.scheduling.config.SchedulingServiceAutoConfiguration; @@ -721,5 +723,11 @@ public ApplicationContextProvider contextProvider() { return new ApplicationContextProvider(); } + + @Bean + public XmiFormatSupport xmiFormatSupport() + { + return new XmiFormatSupport(new XmiFormatProperties()); + } } } diff --git a/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasUpgradeTest.java b/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasUpgradeTest.java index 567dec41e77..4f194890802 100644 --- a/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasUpgradeTest.java +++ b/inception/inception-search-mtas/src/test/java/de/tudarmstadt/ukp/inception/search/index/mtas/MtasUpgradeTest.java @@ -54,6 +54,8 @@ import de.tudarmstadt.ukp.inception.annotation.storage.config.CasStorageServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.documents.config.DocumentServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.XmiFormatSupport; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsPropertiesImpl.XmiFormatProperties; import de.tudarmstadt.ukp.inception.preferences.config.PreferencesServiceAutoConfig; import de.tudarmstadt.ukp.inception.scheduling.config.SchedulingServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.schema.config.AnnotationSchemaServiceAutoConfiguration; @@ -186,5 +188,11 @@ RepositoryProperties repositoryProperties() props.setPath(new File(WORK_DIR)); return props; } + + @Bean + public XmiFormatSupport xmiFormatSupport() + { + return new XmiFormatSupport(new XmiFormatProperties()); + } } } diff --git a/inception/inception-versioning/src/test/java/de/tudarmstadt/ukp/inception/versioning/VersioningServiceImplTest.java b/inception/inception-versioning/src/test/java/de/tudarmstadt/ukp/inception/versioning/VersioningServiceImplTest.java index f7b868d4304..6f61be8eedb 100644 --- a/inception/inception-versioning/src/test/java/de/tudarmstadt/ukp/inception/versioning/VersioningServiceImplTest.java +++ b/inception/inception-versioning/src/test/java/de/tudarmstadt/ukp/inception/versioning/VersioningServiceImplTest.java @@ -61,12 +61,12 @@ import de.tudarmstadt.ukp.clarin.webanno.security.config.SecurityAutoConfiguration; import de.tudarmstadt.ukp.clarin.webanno.security.model.User; import de.tudarmstadt.ukp.clarin.webanno.text.config.TextFormatsAutoConfiguration; -import de.tudarmstadt.ukp.clarin.webanno.xmi.config.UimaFormatsAutoConfiguration; import de.tudarmstadt.ukp.inception.annotation.storage.CasStorageSession; import de.tudarmstadt.ukp.inception.annotation.storage.config.CasStorageServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.curation.config.CurationDocumentServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.documents.config.DocumentServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.export.config.DocumentImportExportServiceAutoConfiguration; +import de.tudarmstadt.ukp.inception.io.xmi.config.UimaFormatsAutoConfiguration; import de.tudarmstadt.ukp.inception.schema.config.AnnotationSchemaServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.versioning.config.VersioningServiceAutoConfiguration;