Skip to content

Commit

Permalink
#1571 - Upgrade dependencies
Browse files Browse the repository at this point in the history
- Update webanno-tsv code and test data to version from INCEpTION 29.1
  • Loading branch information
reckart committed Sep 13, 2023
1 parent d5fb2c4 commit 9e68f5e
Show file tree
Hide file tree
Showing 150 changed files with 2,437 additions and 1,946 deletions.
13 changes: 8 additions & 5 deletions dkpro-core-io-webanno-asl/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
Expand Down Expand Up @@ -71,11 +79,6 @@
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-api-syntax-asl</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-io-xmi-asl</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
/*
* Copyright 2017
* Ubiquitous Knowledge Processing (UKP) Lab and FG Language Technology
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand All @@ -23,19 +23,14 @@

import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.MimeTypeCapability;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.jcas.JCas;
import org.dkpro.core.api.io.JCasResourceCollectionReader_ImplBase;
import org.dkpro.core.api.parameter.ComponentParameters;
import org.dkpro.core.api.parameter.MimeTypes;
import org.dkpro.core.io.webanno.tsv.internal.tsv3x.Tsv3XDeserializer;

/**
* Reads the WebAnno TSV v3.x format.
*/
@ResourceMetaData(name = "WebAnno TSV v3.x Reader")
@MimeTypeCapability({MimeTypes.TEXT_X_WEBANNO_TSV3})
public class WebannoTsv3XReader
extends JCasResourceCollectionReader_ImplBase
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
/*
* Copyright 2017
* Ubiquitous Knowledge Processing (UKP) Lab and FG Language Technology
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand All @@ -17,19 +17,17 @@
*/
package org.dkpro.core.io.webanno.tsv;

import static org.apache.commons.io.IOUtils.buffer;

import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;

import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.MimeTypeCapability;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.jcas.JCas;
import org.dkpro.core.api.io.JCasFileWriter_ImplBase;
import org.dkpro.core.api.parameter.ComponentParameters;
import org.dkpro.core.api.parameter.MimeTypes;
import org.dkpro.core.io.webanno.tsv.internal.tsv3x.Tsv3XCasDocumentBuilder;
import org.dkpro.core.io.webanno.tsv.internal.tsv3x.Tsv3XCasSchemaAnalyzer;
import org.dkpro.core.io.webanno.tsv.internal.tsv3x.Tsv3XSerializer;
Expand All @@ -39,13 +37,6 @@
/**
* Writes the WebAnno TSV v3.x format.
*/
@ResourceMetaData(name = "WebAnno TSV v3.x Writer")
@MimeTypeCapability({MimeTypes.TEXT_X_WEBANNO_TSV3})
@TypeCapability(
inputs = {
"de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData",
"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token",
"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence"})
public class WebannoTsv3XWriter
extends JCasFileWriter_ImplBase
{
Expand All @@ -59,20 +50,19 @@ public class WebannoTsv3XWriter
/**
* Use this filename extension.
*/
public static final String PARAM_FILENAME_EXTENSION =
ComponentParameters.PARAM_FILENAME_EXTENSION;
public static final String PARAM_FILENAME_EXTENSION = ComponentParameters.PARAM_FILENAME_EXTENSION;
@ConfigurationParameter(name = PARAM_FILENAME_EXTENSION, mandatory = true, defaultValue = ".tsv")
private String filenameSuffix;

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException
{
TsvSchema schema = Tsv3XCasSchemaAnalyzer.analyze(aJCas.getTypeSystem());

TsvDocument doc = Tsv3XCasDocumentBuilder.of(schema, aJCas);
try (PrintWriter docOS = new PrintWriter(new OutputStreamWriter(
getOutputStream(aJCas, filenameSuffix), encoding))) {

try (PrintWriter docOS = new PrintWriter(
new OutputStreamWriter(buffer(getOutputStream(aJCas, filenameSuffix)), encoding))) {
new Tsv3XSerializer().write(docOS, doc);
}
catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
/*
* Copyright 2017
* Ubiquitous Knowledge Processing (UKP) Lab and FG Language Technology
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/package org.dkpro.core.io.webanno.tsv.internal.tsv3x;
*/
package org.dkpro.core.io.webanno.tsv.internal.tsv3x;

import static org.apache.commons.lang3.StringEscapeUtils.unescapeJava;
import static org.apache.commons.text.StringEscapeUtils.unescapeJava;

import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -77,11 +78,12 @@ public static String escapeText(String aText)
esc.toArray(new String[esc.size()]));
}

public static String unescapeText(TsvFormatHeader aHeader, String aText) {
if ("3.1".equals(aHeader.getVersion())) {
public static String unescapeText(TsvFormatHeader aHeader, String aText)
{
if (aHeader.getMajorVersion() == 3 && aHeader.getMinorVersion() <= 1) {
return unescapeJava(aText);
}
else if ("3.2".equals(aHeader.getVersion())) {
else if (aHeader.getMajorVersion() == 3 && aHeader.getMinorVersion() >= 2) {
List<String> pat = new ArrayList<>();
List<String> esc = new ArrayList<>();
for (int i = 0; i < 32; i++) {
Expand Down Expand Up @@ -110,8 +112,8 @@ else if ("3.2".equals(aHeader.getVersion())) {
// backslash
pat.add("\\");
esc.add("\\\\");
return StringUtils.replaceEach(aText,
esc.toArray(new String[esc.size()]), pat.toArray(new String[pat.size()]));
return StringUtils.replaceEach(aText, esc.toArray(new String[esc.size()]),
pat.toArray(new String[pat.size()]));
}
else {
throw new IllegalStateException("Unknown version: [" + aHeader.getVersion() + "]");
Expand Down
Loading

0 comments on commit 9e68f5e

Please sign in to comment.