From 6f98fcd9f202bc07abdc570f4b4c3c0b99fe4577 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Sat, 31 Dec 2022 11:34:35 +0100 Subject: [PATCH] #1553 - Remove SemanticFieldAnnotator - Removed code and related dependencies --- dkpro-core-dictionaryannotator-asl/pom.xml | 18 --- .../SemanticFieldAnnotator.java | 129 ------------------ .../semantictagging/SemanticTagProvider.java | 59 -------- .../semantictagging/SemanticTagResource.java | 129 ------------------ .../semantictagging/package-info.java | 22 --- .../SemanticFieldAnnotatorTest.java | 114 ---------------- 6 files changed, 471 deletions(-) delete mode 100644 dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticFieldAnnotator.java delete mode 100644 dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticTagProvider.java delete mode 100644 dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticTagResource.java delete mode 100644 dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/package-info.java delete mode 100644 dkpro-core-dictionaryannotator-asl/src/test/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticFieldAnnotatorTest.java diff --git a/dkpro-core-dictionaryannotator-asl/pom.xml b/dkpro-core-dictionaryannotator-asl/pom.xml index e99aad6a28..032a0d5cb2 100644 --- a/dkpro-core-dictionaryannotator-asl/pom.xml +++ b/dkpro-core-dictionaryannotator-asl/pom.xml @@ -36,11 +36,6 @@ org.apache.uima uimafit-core - - commons-jxpath - commons-jxpath - 1.3 - org.apache.commons commons-lang3 @@ -57,18 +52,10 @@ org.dkpro.core dkpro-core-api-segmentation-asl - - org.dkpro.core - dkpro-core-api-semantics-asl - org.dkpro.core dkpro-core-api-parameter-asl - - org.dkpro.core - dkpro-core-api-lexmorph-asl - eu.openminted.share.annotations omtd-share-annotations-api @@ -78,11 +65,6 @@ junit test - - org.dkpro.core - dkpro-core-testing-asl - test - org.dkpro.core dkpro-core-api-ner-asl diff --git a/dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticFieldAnnotator.java b/dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticFieldAnnotator.java deleted file mode 100644 index 29f2f8a5f1..0000000000 --- a/dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticFieldAnnotator.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.dictionaryannotator.semantictagging; - -import java.util.Collection; -import java.util.Collections; - -import org.apache.commons.jxpath.JXPathContext; -import org.apache.uima.analysis_engine.AnalysisEngineProcessException; -import org.apache.uima.cas.CAS; -import org.apache.uima.cas.text.AnnotationFS; -import org.apache.uima.fit.component.JCasAnnotator_ImplBase; -import org.apache.uima.fit.descriptor.ConfigurationParameter; -import org.apache.uima.fit.descriptor.ExternalResource; -import org.apache.uima.fit.descriptor.ResourceMetaData; -import org.apache.uima.fit.descriptor.TypeCapability; -import org.apache.uima.fit.util.CasUtil; -import org.apache.uima.fit.util.JCasUtil; -import org.apache.uima.jcas.JCas; -import org.apache.uima.resource.ResourceAccessException; - -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; -import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemanticField; -import eu.openminted.share.annotations.api.Component; -import eu.openminted.share.annotations.api.DocumentationResource; -import eu.openminted.share.annotations.api.constants.OperationType; - -/** - * This Analysis Engine annotates - * English single words with semantic field information retrieved from an ExternalResource. - * This could be a lexical resource such as WordNet or a simple key-value map. - * The annotation is stored in the SemanticField annotation type. - */ -@Component(OperationType.MATCHER) -@ResourceMetaData(name = "Semantic Field Annotator") -@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}") -@TypeCapability( - inputs = { - "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", - "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma", - "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS" }, - outputs = { - "de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity" }) -public class SemanticFieldAnnotator - extends JCasAnnotator_ImplBase -{ - /** - * The semantic resource to use. - */ - public static final String RES_SEMANTIC_FIELD_RESOURCE = "semanticFieldResource"; - @ExternalResource(key = RES_SEMANTIC_FIELD_RESOURCE) - private SemanticTagResource semanticFieldResource; - - // TODO a parameter for the language would be good - - /** - * Annotation types which should be annotated with semantic fields - */ - public static final String PARAM_ANNOTATION_TYPE = "annotationType"; - @ConfigurationParameter(name = PARAM_ANNOTATION_TYPE, mandatory = true) - private String annotationType; - - /** - * A constraint on the annotations that should be considered in form of a JXPath statement. - * Example: set {@link #PARAM_ANNOTATION_TYPE} to a {@code NamedEntity} type and set the - * {@link #PARAM_CONSTRAINT} to {@code ".[value = 'LOCATION']"} to annotate only tokens with - * semantic fields that are part of a location named entity. - */ - public static final String PARAM_CONSTRAINT = "constraint"; - @ConfigurationParameter(name = PARAM_CONSTRAINT, mandatory = false) - private String constraint; - - @Override - public void process(JCas aJCas) - throws AnalysisEngineProcessException - { - CAS cas = aJCas.getCas(); - - for (AnnotationFS cover : CasUtil.select(cas, - CasUtil.getAnnotationType(cas, annotationType))) { - - // If there is a constraint, check if it matches - if (constraint != null) { - JXPathContext ctx = JXPathContext.newContext(cover); - boolean match = ctx.iterate(constraint).hasNext(); - if (!match) { - continue; - } - } - - // If the target type is a token, use it directly, otherwise select the covered tokens - Collection tokens; - if (cover instanceof Token) { - tokens = Collections.singleton((Token) cover); - } - else { - tokens = JCasUtil.selectCovered(aJCas, Token.class, cover); - } - - for (Token token : tokens) { - try { - String semanticField = semanticFieldResource.getSemanticTag(token); - SemanticField semanticFieldAnnotation = new SemanticField(aJCas, - token.getBegin(), token.getEnd()); - semanticFieldAnnotation.setValue(semanticField); - semanticFieldAnnotation.addToIndexes(); - } - catch (ResourceAccessException e) { - throw new AnalysisEngineProcessException(e); - } - } - } - } -} diff --git a/dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticTagProvider.java b/dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticTagProvider.java deleted file mode 100644 index 0e7f1085a6..0000000000 --- a/dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticTagProvider.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.dictionaryannotator.semantictagging; - -import java.util.List; - -import org.apache.uima.resource.ResourceAccessException; - -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; - -/** - * - * This interface can be used to create various UIMA resources that provide semantic tags. - * - */ -public interface SemanticTagProvider -{ - /** - * Get a semantic tag for a token. - * - * @param token - * token to tag - * @return semantic tag of the token. Returns "UNKNOWN" if the (lemma of the) token does not - * exist in the resource. - * @throws ResourceAccessException - * if the semantic resource cannot be accessed. - */ - public String getSemanticTag(Token token) - throws ResourceAccessException; - - /** - * Get a semantic tag for a list of tokens (e.g. a multiword). - * - * @param tokens - * list of tokens to tag - * @return semantic tag of the multiword. Returns "UNKNOWN" if the (lemma of the) multiword does - * not exist in the resource. - * @throws ResourceAccessException - * if the semantic resource cannot be accessed. - */ - public String getSemanticTag(List tokens) - throws ResourceAccessException; - -} diff --git a/dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticTagResource.java b/dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticTagResource.java deleted file mode 100644 index 5c1a44697d..0000000000 --- a/dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticTagResource.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.dictionaryannotator.semantictagging; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.net.URL; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.commons.lang3.StringUtils; -import org.apache.uima.fit.component.Resource_ImplBase; -import org.apache.uima.fit.descriptor.ConfigurationParameter; -import org.apache.uima.resource.ResourceAccessException; -import org.apache.uima.resource.ResourceInitializationException; -import org.apache.uima.resource.ResourceSpecifier; -import org.dkpro.core.api.resources.ResourceUtils; - -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; - - -/** - * - * This shared resource can be added as ExternalResource in Analysis Engines - * that annotate tokens with semantic tags looked up in a key-value map - * e.g., to annotate common nouns with semantic field information from WordNet. - */ -public class SemanticTagResource - extends Resource_ImplBase - implements SemanticTagProvider -{ - - public final static String PARAM_RESOURCE_PATH = "resourcePath"; - @ConfigurationParameter(name = PARAM_RESOURCE_PATH, mandatory = true) - // TODO add default like: defaultValue = "classpath:de/tudarmstadt/ukp/dkpro/core/decompounding/lib/spelling/de/igerman98/de_DE_igerman98.dic" - private String resourcePath; - - private Map keySemanticTagMap = new HashMap(); - - @Override - public boolean initialize(ResourceSpecifier aSpecifier, Map aAdditionalParams) - throws ResourceInitializationException - { - if (!super.initialize(aSpecifier, aAdditionalParams)) { - return false; - } - - try { - final URL uri = ResourceUtils.resolveLocation(resourcePath, this, null); - readFileToMap(new BufferedReader(new InputStreamReader(uri.openStream()))); - - } - catch (IOException e) { - throw new ResourceInitializationException(e); - } - - return true; - - } - - - @Override - public String getSemanticTag(Token token) throws ResourceAccessException { - - try { - if (keySemanticTagMap.containsKey(token.getLemma().getValue())) { - return keySemanticTagMap.get(token.getLemma().getValue()); - } else { - return "UNKNOWN"; - } - } catch (Exception e) { - throw new ResourceAccessException(e); - } - } - - @Override - public String getSemanticTag(List tokens) throws ResourceAccessException { - - List lemmas = new ArrayList(); - for (Token token : tokens) { - lemmas.add(token.getLemma().getValue()); - } - String lemmaString = StringUtils.join(lemmas, " "); - - try { - if (keySemanticTagMap.containsKey(lemmaString)) { - return keySemanticTagMap.get(lemmaString); - } else { - return "UNKNOWN"; - } - } catch (Exception e) { - throw new ResourceAccessException(e); - } - } - - - - private void readFileToMap(BufferedReader bufferedReader) throws IOException { - String line; - - while ((line = bufferedReader.readLine()) != null) { - String[] temp = line.split("\t"); - String key = temp[0]; - String semField = temp[1]; - System.out.println(line); - keySemanticTagMap.put(key, semField); - } - } - - -} diff --git a/dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/package-info.java b/dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/package-info.java deleted file mode 100644 index 2092d515e9..0000000000 --- a/dkpro-core-dictionaryannotator-asl/src/main/java/org/dkpro/core/dictionaryannotator/semantictagging/package-info.java +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Interface that provides access to semantic tags from various resources (UBY, key-value maps ...) - */ -package org.dkpro.core.dictionaryannotator.semantictagging; diff --git a/dkpro-core-dictionaryannotator-asl/src/test/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticFieldAnnotatorTest.java b/dkpro-core-dictionaryannotator-asl/src/test/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticFieldAnnotatorTest.java deleted file mode 100644 index 66a232797c..0000000000 --- a/dkpro-core-dictionaryannotator-asl/src/test/java/org/dkpro/core/dictionaryannotator/semantictagging/SemanticFieldAnnotatorTest.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright 2017 - * Ubiquitous Knowledge Processing (UKP) Lab - * Technische Universität Darmstadt - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dkpro.core.dictionaryannotator.semantictagging; - -import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine; -import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; -import static org.apache.uima.fit.factory.ExternalResourceFactory.createResourceDescription; -import static org.apache.uima.fit.util.JCasUtil.select; - -import org.apache.uima.UIMAException; -import org.apache.uima.analysis_engine.AnalysisEngine; -import org.apache.uima.analysis_engine.AnalysisEngineDescription; -import org.apache.uima.fit.testing.factory.TokenBuilder; -import org.apache.uima.fit.util.JCasUtil; -import org.apache.uima.jcas.JCas; -import org.dkpro.core.testing.AssertAnnotations; -import org.junit.Test; - -import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; -import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_NOUN; -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma; -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; -import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemanticField; - -public class SemanticFieldAnnotatorTest -{ - - @Test - public void test() - throws Exception - { - runTest("en", "Vanilla in the sky prefers braveness over jumpiness .", - new String[] { "vanilla", "in", "the", "sky", "prefer", "braveness", "over", - "jumpiness", "." }, - new String[] { "NN", "NOT_RELEVANT", "NOT_RELEVANT", "NN", "NOT_RELEVANT", "NN", - "NOT_RELEVANT", "NN", "$." }, - new String[] { "plant", "object", "attribute", "feeling" }); - - runTest("en", "Vanilla in the distantGalaxyBehindJupiter prefers braveness over jumpiness .", - new String[] { "vanilla", "in", "the", "distantGalaxyBehindJupiter", "prefer", - "braveness", "over", "jumpiness", "." }, - new String[] { "NN", "NOT_RELEVANT", "NOT_RELEVANT", "NN", "NOT_RELEVANT", "NN", - "NOT_RELEVANT", "NN", "$." }, - new String[] { "plant", "UNKNOWN", "attribute", "feeling" }); - } - - private void runTest(String language, String testDocument, String[] documentLemmas, - String[] documentPosTags, String[] documentNounSemanticFields) - throws UIMAException - { - - AnalysisEngineDescription processor = createEngineDescription( - createEngineDescription( - SemanticFieldAnnotator.class, - SemanticFieldAnnotator.PARAM_ANNOTATION_TYPE, Token.class, - SemanticFieldAnnotator.PARAM_CONSTRAINT, ".[pos/posValue = 'NN']", - SemanticFieldAnnotator.RES_SEMANTIC_FIELD_RESOURCE, - createResourceDescription(SemanticTagResource.class, - SemanticTagResource.PARAM_RESOURCE_PATH, - "src/test/resources/nounSemanticFieldMapTest.txt"))); - - AnalysisEngine engine = createEngine(processor); - JCas aJCas = engine.newJCas(); - aJCas.setDocumentLanguage(language); - - TokenBuilder tb = new TokenBuilder(Token.class, - Sentence.class); - tb.buildTokens(aJCas, testDocument); - - int offset = 0; - for (Token token : JCasUtil.select(aJCas, Token.class)) { - - if (documentPosTags[offset].matches("NN")) { - POS_NOUN nn = new POS_NOUN(aJCas, token.getBegin(), token.getEnd()); - nn.setPosValue(documentPosTags[offset]); - nn.addToIndexes(); - token.setPos(nn); - } - else { - POS pos = new POS(aJCas, token.getBegin(), token.getEnd()); - pos.setPosValue(documentPosTags[offset]); - pos.addToIndexes(); - token.setPos(pos); - } - - Lemma lemma = new Lemma(aJCas, token.getBegin(), token.getEnd()); - lemma.setValue(documentLemmas[offset]); - lemma.addToIndexes(); - token.setLemma(lemma); - - offset++; - } - engine.process(aJCas); - - AssertAnnotations.assertSemanticField(documentNounSemanticFields, - select(aJCas, SemanticField.class)); - } -}