From 51cdb853467de9c2a066e54497828d06274088af Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Thu, 6 Jun 2024 22:31:46 +0200 Subject: [PATCH] #4064 - Support for agreement calculation on document-level annotations - Added a diff adapter for document-level annotations - Allow document-level layers in coding agreements --- inception/inception-agreement/pom.xml | 4 + ...AbstractCodingAgreementMeasureSupport.java | 4 +- inception/inception-curation-legacy/pom.xml | 5 ++ .../webanno/curation/casdiff/CasDiff.java | 9 +- .../curation/casdiff/api/DiffAdapter.java | 8 +- .../casdiff/api/DiffAdapter_ImplBase.java | 3 +- .../docmeta/DocumentMetadataDiffAdapter.java | 85 +++++++++++++++++++ .../casdiff/docmeta/DocumentPosition.java | 54 ++++++++++++ 8 files changed, 165 insertions(+), 7 deletions(-) create mode 100644 inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/docmeta/DocumentMetadataDiffAdapter.java create mode 100644 inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/docmeta/DocumentPosition.java diff --git a/inception/inception-agreement/pom.xml b/inception/inception-agreement/pom.xml index d95e433da0b..760a5c54835 100644 --- a/inception/inception-agreement/pom.xml +++ b/inception/inception-agreement/pom.xml @@ -97,6 +97,10 @@ de.tudarmstadt.ukp.inception.app inception-annotation-storage-api + + de.tudarmstadt.ukp.inception.app + inception-layer-docmetadata + org.dkpro.statistics diff --git a/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/results/coding/AbstractCodingAgreementMeasureSupport.java b/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/results/coding/AbstractCodingAgreementMeasureSupport.java index 56c5aee7b47..66d5771a397 100644 --- a/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/results/coding/AbstractCodingAgreementMeasureSupport.java +++ b/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/results/coding/AbstractCodingAgreementMeasureSupport.java @@ -39,6 +39,7 @@ import de.tudarmstadt.ukp.clarin.webanno.model.MultiValueMode; import de.tudarmstadt.ukp.inception.annotation.layer.relation.RelationLayerSupport; import de.tudarmstadt.ukp.inception.annotation.layer.span.SpanLayerSupport; +import de.tudarmstadt.ukp.inception.ui.core.docanno.layer.DocumentMetadataLayerSupport; public abstract class AbstractCodingAgreementMeasureSupport extends AgreementMeasureSupport_ImplBase @@ -48,7 +49,8 @@ public boolean accepts(AnnotationFeature aFeature) { AnnotationLayer layer = aFeature.getLayer(); - return asList(SpanLayerSupport.TYPE, RelationLayerSupport.TYPE).contains(layer.getType()) + return asList(SpanLayerSupport.TYPE, RelationLayerSupport.TYPE, + DocumentMetadataLayerSupport.TYPE).contains(layer.getType()) && asList(SINGLE_TOKEN, TOKENS, SENTENCES).contains(layer.getAnchoringMode()) // Link features are supported (because the links generate sub-positions in the diff // but multi-value primitives (e.g. multi-value strings) are not supported diff --git a/inception/inception-curation-legacy/pom.xml b/inception/inception-curation-legacy/pom.xml index d6d72ecdcda..d695e7cf070 100644 --- a/inception/inception-curation-legacy/pom.xml +++ b/inception/inception-curation-legacy/pom.xml @@ -64,6 +64,10 @@ de.tudarmstadt.ukp.inception.app inception-model-vdoc + + de.tudarmstadt.ukp.inception.app + inception-layer-docmetadata + org.dkpro.core @@ -115,6 +119,7 @@ de.tudarmstadt.ukp.inception.app:inception-support + de.tudarmstadt.ukp.inception.app:inception-layer-docmetadata diff --git a/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/CasDiff.java b/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/CasDiff.java index dcdb0391e6b..ef53ff5b9aa 100644 --- a/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/CasDiff.java +++ b/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/CasDiff.java @@ -45,6 +45,7 @@ import org.apache.uima.cas.TypeSystem; import org.apache.uima.cas.text.AnnotationFS; import org.apache.uima.fit.util.FSUtil; +import org.apache.uima.jcas.cas.AnnotationBase; import org.apache.uima.jcas.tcas.Annotation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,6 +53,7 @@ import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.DiffAdapter; import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.DiffAdapter_ImplBase; import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position; +import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.docmeta.DocumentMetadataDiffAdapter; import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.internal.AID; import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.relation.RelationDiffAdapter; import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.span.SpanDiffAdapter; @@ -65,6 +67,7 @@ import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService; import de.tudarmstadt.ukp.inception.support.uima.ICasUtil; import de.tudarmstadt.ukp.inception.support.uima.WebAnnoCasUtil; +import de.tudarmstadt.ukp.inception.ui.core.docanno.layer.DocumentMetadataLayerSupport; public class CasDiff { @@ -229,7 +232,7 @@ private void addCas(String aCasGroupId, CAS aCas, String aType) var adapter = getAdapter(aType); - Collection annotations; + Collection annotations; if (begin == -1 && end == -1) { annotations = aCas. select(type).asList(); } @@ -682,6 +685,10 @@ public static List getDiffAdapters(AnnotationSchemaService schemaSe typeAdpt.getTargetFeatureName(), labelFeatures); break; } + case DocumentMetadataLayerSupport.TYPE: { + adapter = new DocumentMetadataDiffAdapter(layer.getName(), labelFeatures); + break; + } default: LOG.debug("Curation for layer type [{}] not supported - ignoring", layer.getType()); continue nextLayer; diff --git a/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/api/DiffAdapter.java b/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/api/DiffAdapter.java index d36ff6f0b32..3b52aacc961 100644 --- a/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/api/DiffAdapter.java +++ b/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/api/DiffAdapter.java @@ -23,8 +23,7 @@ import org.apache.uima.cas.CAS; import org.apache.uima.cas.FeatureStructure; -import org.apache.uima.cas.text.AnnotationFS; -import org.apache.uima.jcas.tcas.Annotation; +import org.apache.uima.jcas.cas.AnnotationBase; import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.LinkCompareBehavior; import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.LinkFeatureDecl; @@ -36,7 +35,7 @@ public interface DiffAdapter */ String getType(); - Collection generateSubPositions(AnnotationFS aFs, + Collection generateSubPositions(AnnotationBase aFs, LinkCompareBehavior aLinkCompareBehavior); LinkFeatureDecl getLinkFeature(String aFeature); @@ -48,5 +47,6 @@ Collection generateSubPositions(AnnotationFS aFs, Position getPosition(FeatureStructure aFS, String aFeature, String aRole, int aLinkTargetBegin, int aLinkTargetEnd, LinkCompareBehavior aLinkCompareBehavior); - List selectAnnotationsInWindow(CAS aCas, int aWindowBegin, int aWindowEnd); + List selectAnnotationsInWindow(CAS aCas, int aWindowBegin, + int aWindowEnd); } diff --git a/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/api/DiffAdapter_ImplBase.java b/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/api/DiffAdapter_ImplBase.java index 6493566b256..e1658f786a9 100644 --- a/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/api/DiffAdapter_ImplBase.java +++ b/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/api/DiffAdapter_ImplBase.java @@ -27,6 +27,7 @@ import org.apache.uima.cas.FeatureStructure; import org.apache.uima.cas.text.AnnotationFS; import org.apache.uima.fit.util.FSUtil; +import org.apache.uima.jcas.cas.AnnotationBase; import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.LinkCompareBehavior; import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.LinkFeatureDecl; @@ -81,7 +82,7 @@ public Position getPosition(FeatureStructure aFS) } @Override - public List generateSubPositions(AnnotationFS aFs, + public List generateSubPositions(AnnotationBase aFs, LinkCompareBehavior aLinkCompareBehavior) { var subPositions = new ArrayList(); diff --git a/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/docmeta/DocumentMetadataDiffAdapter.java b/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/docmeta/DocumentMetadataDiffAdapter.java new file mode 100644 index 00000000000..791afb0ed45 --- /dev/null +++ b/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/docmeta/DocumentMetadataDiffAdapter.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.docmeta; + +import static java.util.Arrays.asList; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.fit.util.FSUtil; +import org.apache.uima.jcas.cas.AnnotationBase; + +import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.LinkCompareBehavior; +import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.DiffAdapter_ImplBase; +import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position; +import de.tudarmstadt.ukp.inception.annotation.layer.span.SpanRenderer; +import de.tudarmstadt.ukp.inception.support.uima.WebAnnoCasUtil; + +public class DocumentMetadataDiffAdapter + extends DiffAdapter_ImplBase +{ + public DocumentMetadataDiffAdapter(String aType, String... aLabelFeatures) + { + this(aType, new HashSet<>(asList(aLabelFeatures))); + } + + public DocumentMetadataDiffAdapter(String aType, Set aLabelFeatures) + { + super(aType, aLabelFeatures); + } + + /** + * @see SpanRenderer#selectAnnotationsInWindow + */ + @Override + public List selectAnnotationsInWindow(CAS aCas, int aWindowBegin, + int aWindowEnd) + { + return aCas. select(getType()).asList(); + } + + @Override + public Position getPosition(FeatureStructure aFS, String aFeature, String aRole, + int aLinkTargetBegin, int aLinkTargetEnd, LinkCompareBehavior aLinkCompareBehavior) + { + String collectionId = null; + String documentId = null; + try { + var dmd = WebAnnoCasUtil.getDocumentMetadata(aFS.getCAS()); + collectionId = FSUtil.getFeature(dmd, "collectionId", String.class); + documentId = FSUtil.getFeature(dmd, "documentId", String.class); + } + catch (IllegalArgumentException e) { + // We use this information only for debugging - so we can ignore if the information + // is missing. + } + + String linkTargetText = null; + if (aLinkTargetBegin != -1 && aFS.getCAS().getDocumentText() != null) { + linkTargetText = aFS.getCAS().getDocumentText().substring(aLinkTargetBegin, + aLinkTargetEnd); + } + + return new DocumentPosition(collectionId, documentId, getType(), aFeature, aRole, + aLinkTargetBegin, aLinkTargetEnd, linkTargetText, aLinkCompareBehavior); + } +} diff --git a/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/docmeta/DocumentPosition.java b/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/docmeta/DocumentPosition.java new file mode 100644 index 00000000000..a80cc886283 --- /dev/null +++ b/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/docmeta/DocumentPosition.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.docmeta; + +import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.LinkCompareBehavior; +import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.api.Position_ImplBase; + +/** + * Represents a document position. + */ +public class DocumentPosition + extends Position_ImplBase +{ + private static final long serialVersionUID = -1020728944030217843L; + + public DocumentPosition(String aCollectionId, String aDocumentId, String aType, String aFeature, + String aRole, int aLinkTargetBegin, int aLinkTargetEnd, String aLinkTargetText, + LinkCompareBehavior aLinkCompareBehavior) + { + super(aCollectionId, aDocumentId, aType, aFeature, aRole, aLinkTargetBegin, aLinkTargetEnd, + aLinkTargetText, aLinkCompareBehavior); + } + + @Override + public String toString() + { + StringBuilder builder = new StringBuilder(); + builder.append("Document ["); + toStringFragment(builder); + builder.append(']'); + return builder.toString(); + } + + @Override + public String toMinimalString() + { + return "Document"; + } +}