From d3fb2220a544705615d40fe361954eecc9367413 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Sat, 23 Sep 2023 09:27:22 +0200 Subject: [PATCH 1/4] #4158 - Exception when annotating something after a longer pause - Replace calls to SelectFS with calls to CASUtil which does not create temporary annotations --- .../annotation/layer/span/SpanRenderer.java | 38 +++++++++++++++-- .../casdiff/span/SpanDiffAdapter.java | 41 +++++++++++++++++-- ...ationsStartAndEndWithinSentencesCheck.java | 18 ++++++-- .../inception/io/bioc/model/CasToBioC.java | 21 +++++++--- .../io/bioc/xml/Cas2BioCSaxEvents.java | 8 +++- .../pdfeditor2/format/VisualPdfReader.java | 7 +++- .../RecommendationEditorExtension.java | 4 +- .../service/RecommendationServiceImpl.java | 36 ++++++++++------ .../detail/AnnotationDetailEditorPanel.java | 13 ++++-- 9 files changed, 150 insertions(+), 36 deletions(-) diff --git a/inception/inception-api-annotation/src/main/java/de/tudarmstadt/ukp/inception/annotation/layer/span/SpanRenderer.java b/inception/inception-api-annotation/src/main/java/de/tudarmstadt/ukp/inception/annotation/layer/span/SpanRenderer.java index 050b2d41c57..35254744d36 100644 --- a/inception/inception-api-annotation/src/main/java/de/tudarmstadt/ukp/inception/annotation/layer/span/SpanRenderer.java +++ b/inception/inception-api-annotation/src/main/java/de/tudarmstadt/ukp/inception/annotation/layer/span/SpanRenderer.java @@ -30,6 +30,7 @@ import java.util.Optional; import org.apache.uima.cas.CAS; +import org.apache.uima.cas.FSIterator; import org.apache.uima.cas.FeatureStructure; import org.apache.uima.cas.Type; import org.apache.uima.cas.TypeSystem; @@ -92,9 +93,40 @@ protected boolean typeSystemInit(TypeSystem aTypeSystem) @Override public List selectAnnotationsInWindow(CAS aCas, int aWindowBegin, int aWindowEnd) { - return aCas.select(type).coveredBy(0, aWindowEnd).includeAnnotationsWithEndBeyondBounds() - .map(fs -> (AnnotationFS) fs) - .filter(ann -> AnnotationPredicates.overlapping(ann, aWindowBegin, aWindowEnd)) + // https://github.com/apache/uima-uimaj/issues/345 + // return aCas.select(type).coveredBy(0, aWindowEnd).includeAnnotationsWithEndBeyondBounds() + // .map(fs -> (AnnotationFS) fs) + // .filter(ann -> AnnotationPredicates.overlapping(ann, aWindowBegin, aWindowEnd)) + // .collect(toList()); + + List list = new ArrayList(); + + // withSnapshotIterators() not needed here since we copy the FSes to a list anyway + FSIterator it = aCas.getAnnotationIndex(type).iterator(); + + // Skip annotations whose start is before the start parameter. + while (it.isValid() && (it.get()).getBegin() < aWindowBegin) { + it.moveToNext(); + } + + boolean strict = false; + while (it.isValid()) { + AnnotationFS a = it.get(); + // If the start of the current annotation is past the end parameter, we're done. + if (a.getBegin() > aWindowEnd) { + break; + } + it.moveToNext(); + if (strict && a.getEnd() > aWindowEnd) { + continue; + } + + list.add(a); + } + + return list.stream() // + .map(fs -> (AnnotationFS) fs) // + .filter(ann -> AnnotationPredicates.overlapping(ann, aWindowBegin, aWindowEnd)) // .collect(toList()); } diff --git a/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/span/SpanDiffAdapter.java b/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/span/SpanDiffAdapter.java index 4ad640c22bc..e17a522228e 100644 --- a/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/span/SpanDiffAdapter.java +++ b/inception/inception-curation-legacy/src/main/java/de/tudarmstadt/ukp/clarin/webanno/curation/casdiff/span/SpanDiffAdapter.java @@ -20,14 +20,17 @@ import static java.util.Arrays.asList; import static java.util.stream.Collectors.toList; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.uima.cas.CAS; +import org.apache.uima.cas.FSIterator; import org.apache.uima.cas.FeatureStructure; import org.apache.uima.cas.text.AnnotationFS; import org.apache.uima.cas.text.AnnotationPredicates; +import org.apache.uima.fit.util.CasUtil; import org.apache.uima.fit.util.FSUtil; import de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil; @@ -71,9 +74,41 @@ public SpanDiffAdapter(String aType, Set aLabelFeatures) @Override public List selectAnnotationsInWindow(CAS aCas, int aWindowBegin, int aWindowEnd) { - return aCas.select(getType()).coveredBy(0, aWindowEnd) - .includeAnnotationsWithEndBeyondBounds().map(fs -> (AnnotationFS) fs) - .filter(ann -> AnnotationPredicates.overlapping(ann, aWindowBegin, aWindowEnd)) + // https://github.com/apache/uima-uimaj/issues/345 + // return aCas.select(type).coveredBy(0, aWindowEnd).includeAnnotationsWithEndBeyondBounds() + // .map(fs -> (AnnotationFS) fs) + // .filter(ann -> AnnotationPredicates.overlapping(ann, aWindowBegin, aWindowEnd)) + // .collect(toList()); + + List list = new ArrayList(); + + // withSnapshotIterators() not needed here since we copy the FSes to a list anyway + FSIterator it = aCas.getAnnotationIndex(CasUtil.getType(aCas, getType())) + .iterator(); + + // Skip annotations whose start is before the start parameter. + while (it.isValid() && (it.get()).getBegin() < aWindowBegin) { + it.moveToNext(); + } + + boolean strict = false; + while (it.isValid()) { + AnnotationFS a = it.get(); + // If the start of the current annotation is past the end parameter, we're done. + if (a.getBegin() > aWindowEnd) { + break; + } + it.moveToNext(); + if (strict && a.getEnd() > aWindowEnd) { + continue; + } + + list.add(a); + } + + return list.stream() // + .map(fs -> (AnnotationFS) fs) // + .filter(ann -> AnnotationPredicates.overlapping(ann, aWindowBegin, aWindowEnd)) // .collect(toList()); } diff --git a/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/checks/AllAnnotationsStartAndEndWithinSentencesCheck.java b/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/checks/AllAnnotationsStartAndEndWithinSentencesCheck.java index f17ba3e1434..d68d344e0cf 100644 --- a/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/checks/AllAnnotationsStartAndEndWithinSentencesCheck.java +++ b/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/checks/AllAnnotationsStartAndEndWithinSentencesCheck.java @@ -27,6 +27,7 @@ import org.apache.uima.cas.CAS; import org.apache.uima.cas.Type; import org.apache.uima.cas.text.AnnotationFS; +import org.apache.uima.fit.util.CasUtil; import org.springframework.util.CollectionUtils; import de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer; @@ -76,10 +77,19 @@ public boolean check(Project aProject, CAS aCas, List aMessages) } for (AnnotationFS ann : select(aCas, type)) { - var startsOutside = aCas.select(Sentence._TypeName) - .covering(ann.getBegin(), ann.getBegin()).isEmpty(); - var endsOutside = aCas.select(Sentence._TypeName) - .covering(ann.getEnd(), ann.getEnd()).isEmpty(); + // https://github.com/apache/uima-uimaj/issues/345 + // var startsOutside = aCas.select(Sentence._TypeName) + // .covering(ann.getBegin(), ann.getBegin()).isEmpty(); + var startsOutside = CasUtil + .selectCovering(ann.getCAS(), CasUtil.getType(ann.getCAS(), Sentence.class), + ann.getBegin(), ann.getBegin()) + .isEmpty(); + // https://github.com/apache/uima-uimaj/issues/345 + // var endsOutside = aCas.select(Sentence._TypeName) + // .covering(ann.getEnd(), ann.getEnd()).isEmpty(); + var endsOutside = CasUtil.selectCovering(ann.getCAS(), + CasUtil.getType(ann.getCAS(), Sentence.class), ann.getEnd(), ann.getEnd()) + .isEmpty(); if (!startsOutside && !endsOutside) { continue; diff --git a/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/model/CasToBioC.java b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/model/CasToBioC.java index 4aa35a4a7c7..8e389b28220 100644 --- a/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/model/CasToBioC.java +++ b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/model/CasToBioC.java @@ -40,6 +40,7 @@ import org.apache.uima.cas.text.AnnotationFS; import org.apache.uima.fit.util.FSUtil; +import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import org.dkpro.core.api.xml.type.XmlElement; @@ -81,18 +82,24 @@ public void convert(JCas aJCas, BioCCollection aCollection) bioCPassage.addInfon(I_TYPE, div.getDivType()); } - var sentences = aJCas.select(Sentence.class).coveredBy(div).asList(); + // https://github.com/apache/uima-uimaj/issues/345 + // var sentences = aJCas.select(Sentence.class).coveredBy(div).asList(); + var sentences = JCasUtil.selectCovered(Sentence.class, div); + // https://github.com/apache/uima-uimaj/issues/345 + // var annotations = aJCas.select(Annotation.class).coveredBy(div); + var annotations = JCasUtil.selectCovered(Annotation.class, div); if (sentences.isEmpty()) { bioCPassage.setText(div.getCoveredText()); - processAnnotations(bioCPassage, bioCPassage.getOffset(), - aJCas.select(Annotation.class).coveredBy(div)); + processAnnotations(bioCPassage, bioCPassage.getOffset(), annotations); } else { var bioCSentences = processSentences(div.getBegin(), sentences); bioCPassage.setSentences(bioCSentences); processAnnotations(bioCPassage, bioCPassage.getOffset(), - aJCas.select(Annotation.class).coveredBy(div) - .filter(a -> aJCas.select(Sentence.class).covering(a).isEmpty()) + annotations.stream().filter(a -> + // https://github.com/apache/uima-uimaj/issues/345 + // aJCas.select(Sentence.class).covering(a).isEmpty() + JCasUtil.selectCovering(Sentence.class, a).isEmpty()) .collect(Collectors.toList())); } } @@ -129,7 +136,9 @@ private List processSentences(int aPassageOffset, List s bioCSentence.setText(sentence.getCoveredText()); processAnnotations(bioCSentence, sentence.getBegin(), - sentence.getCAS().select(Annotation.class).coveredBy(sentence)); + // https://github.com/apache/uima-uimaj/issues/345 + // sentence.getCAS().select(Annotation.class).coveredBy(sentence) + JCasUtil.selectCovered(Annotation.class, sentence)); bioCSentences.add(bioCSentence); } diff --git a/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/xml/Cas2BioCSaxEvents.java b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/xml/Cas2BioCSaxEvents.java index a97c4d815c0..d2a7351539c 100644 --- a/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/xml/Cas2BioCSaxEvents.java +++ b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/xml/Cas2BioCSaxEvents.java @@ -58,6 +58,7 @@ import org.apache.uima.cas.CAS; import org.apache.uima.cas.text.AnnotationFS; import org.apache.uima.fit.util.FSUtil; +import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import org.dkpro.core.api.xml.type.XmlElement; @@ -139,8 +140,11 @@ private void processSentenceElement(XmlElement aSentenceElement) throws SAXExcep return; } - for (var annotation : aSentenceElement.getCAS().select(Annotation.class) - .coveredBy(aSentenceElement)) { + // https://github.com/apache/uima-uimaj/issues/345 + // var annotations = aSentenceElement.getCAS().select(Annotation.class) + // .coveredBy(aSentenceElement); + var annotations = JCasUtil.selectCovered(Annotation.class, aSentenceElement); + for (var annotation : annotations) { serializeAnnotation(sentenceTextElement.get().getBegin(), annotation); } } diff --git a/inception/inception-pdf-editor2/src/main/java/de/tudarmstadt/ukp/inception/pdfeditor2/format/VisualPdfReader.java b/inception/inception-pdf-editor2/src/main/java/de/tudarmstadt/ukp/inception/pdfeditor2/format/VisualPdfReader.java index 43fcf64c132..318683c6fa1 100644 --- a/inception/inception-pdf-editor2/src/main/java/de/tudarmstadt/ukp/inception/pdfeditor2/format/VisualPdfReader.java +++ b/inception/inception-pdf-editor2/src/main/java/de/tudarmstadt/ukp/inception/pdfeditor2/format/VisualPdfReader.java @@ -27,6 +27,7 @@ import org.apache.uima.cas.CAS; import org.apache.uima.collection.CollectionException; import org.apache.uima.fit.descriptor.ConfigurationParameter; +import org.apache.uima.fit.util.CasUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.cas.FloatArray; import org.apache.uima.jcas.cas.IntegerArray; @@ -117,7 +118,11 @@ public static VModel visualModelFromCas(CAS cas, List pdfPages) List vPages = new ArrayList<>(); for (PdfPage pdfPage : pdfPages) { List vChunks = new ArrayList<>(); - for (var pdfChunk : cas.select(PdfChunk.class).coveredBy(pdfPage)) { + // https://github.com/apache/uima-uimaj/issues/345 + // SelectFSs coveredBy = cas.select(PdfChunk.class).coveredBy(pdfPage); + List coveredBy = (List) CasUtil + .selectCovered(CasUtil.getType(cas, PdfChunk.class), pdfPage); + for (var pdfChunk : coveredBy) { float d = pdfChunk.getD(); List vGlyphs = new ArrayList<>(); IntegerArray charWidths = pdfChunk.getC(); diff --git a/inception/inception-recommendation/src/main/java/de/tudarmstadt/ukp/inception/recommendation/RecommendationEditorExtension.java b/inception/inception-recommendation/src/main/java/de/tudarmstadt/ukp/inception/recommendation/RecommendationEditorExtension.java index de66ed47c9a..a6f4003e684 100644 --- a/inception/inception-recommendation/src/main/java/de/tudarmstadt/ukp/inception/recommendation/RecommendationEditorExtension.java +++ b/inception/inception-recommendation/src/main/java/de/tudarmstadt/ukp/inception/recommendation/RecommendationEditorExtension.java @@ -226,7 +226,7 @@ private void actionAcceptSpanRecommendation(AjaxRequestTarget aTarget, page.writeEditorCas(aCas); // Set selection to the accepted annotation and select it and load it into the detail editor - adapter.select(VID.of(span), span); + aState.getSelection().set(adapter.select(VID.of(span), span)); // Send a UI event that the suggestion has been accepted page.send(page, BREADTH, @@ -251,7 +251,7 @@ private void actionAcceptRelationRecommendation(AjaxRequestTarget aTarget, page.writeEditorCas(aCas); // Set selection to the accepted annotation and select it and load it into the detail editor - adapter.select(aVID, relation); + aState.getSelection().set(adapter.select(aVID, relation)); // Send a UI event that the suggestion has been accepted page.send(page, BREADTH, new AjaxRecommendationAcceptedEvent(aTarget, aState, aVID)); diff --git a/inception/inception-recommendation/src/main/java/de/tudarmstadt/ukp/inception/recommendation/service/RecommendationServiceImpl.java b/inception/inception-recommendation/src/main/java/de/tudarmstadt/ukp/inception/recommendation/service/RecommendationServiceImpl.java index fa7e72568d6..c13152f343a 100644 --- a/inception/inception-recommendation/src/main/java/de/tudarmstadt/ukp/inception/recommendation/service/RecommendationServiceImpl.java +++ b/inception/inception-recommendation/src/main/java/de/tudarmstadt/ukp/inception/recommendation/service/RecommendationServiceImpl.java @@ -1081,9 +1081,12 @@ private AnnotationFS acceptOrCorrectSuggestion(String aSessionOwner, SourceDocum var aEnd = aSuggestion.getEnd(); var aValue = aSuggestion.getLabel(); - var candidates = aCas. select(aAdapter.getAnnotationTypeName()) // - .at(aBegin, aEnd) // - .asList(); + // https://github.com/apache/uima-uimaj/issues/345 + // var candidates = aCas. select(aAdapter.getAnnotationTypeName()) // + // .at(aBegin, aEnd) // + // .asList(); + var candidates = CasUtil.selectAt(aCas, + CasUtil.getType(aCas, aAdapter.getAnnotationTypeName()), aBegin, aEnd); var candidateWithEmptyLabel = candidates.stream() // .filter(c -> aAdapter.getFeatureValue(aFeature, c) == null) // @@ -2084,9 +2087,12 @@ private static Optional getOffsetsAnchoredOnSingleTokens(CAS aOriginalCa Annotation aPredictedAnnotation) { Type tokenType = getType(aOriginalCas, Token.class); - var tokens = aOriginalCas. select(tokenType) // - .coveredBy(aPredictedAnnotation) // - .limit(2).asList(); + // https://github.com/apache/uima-uimaj/issues/345 + // var tokens = aOriginalCas. select(tokenType) // + // .coveredBy(aPredictedAnnotation) // + // .limit(2).asList(); + var tokens = CasUtil.selectCovered(tokenType, aPredictedAnnotation).stream() // + .limit(2).collect(toList()); if (tokens.isEmpty()) { // This can happen if a recommender uses different token boundaries (e.g. if a @@ -2113,9 +2119,12 @@ private static Optional getOffsetsAnchoredOnSingleTokens(CAS aOriginalCa private static Optional getOffsetsAnchoredOnSentences(CAS aOriginalCas, Annotation aPredictedAnnotation) { - var sentences = aOriginalCas.select(Sentence.class) // - .coveredBy(aPredictedAnnotation) // - .asList(); + // https://github.com/apache/uima-uimaj/issues/345 + // var sentences = aOriginalCas.select(Sentence.class) // + // .coveredBy(aPredictedAnnotation) // + // .asList(); + var sentences = CasUtil.selectCovered(CasUtil.getType(aOriginalCas, Sentence.class), + aPredictedAnnotation); if (sentences.isEmpty()) { // This can happen if a recommender uses different token boundaries (e.g. if a @@ -2134,9 +2143,12 @@ private static Optional getOffsetsAnchoredOnSentences(CAS aOriginalCas, static Optional getOffsetsAnchoredOnTokens(CAS aOriginalCas, Annotation aPredictedAnnotation) { - var tokens = aOriginalCas.select(Token.class) // - .coveredBy(aPredictedAnnotation) // - .asList(); + // https://github.com/apache/uima-uimaj/issues/345 + // var tokens = aOriginalCas.select(Token.class) // + // .coveredBy(aPredictedAnnotation) // + // .asList(); + var tokens = CasUtil.selectCovered(CasUtil.getType(aOriginalCas, Token.class), + aPredictedAnnotation); if (tokens.isEmpty()) { if (aPredictedAnnotation.getBegin() == aPredictedAnnotation.getEnd()) { diff --git a/inception/inception-ui-annotation/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/annotation/detail/AnnotationDetailEditorPanel.java b/inception/inception-ui-annotation/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/annotation/detail/AnnotationDetailEditorPanel.java index bb97e38facc..5c6344ed73b 100644 --- a/inception/inception-ui-annotation/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/annotation/detail/AnnotationDetailEditorPanel.java +++ b/inception/inception-ui-annotation/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/annotation/detail/AnnotationDetailEditorPanel.java @@ -1235,13 +1235,20 @@ public void onBulkAnnotationEvent(BulkAnnotationEvent aEvent) try { var selection = getModelObject().getSelection(); int id = selection.getAnnotation().getId(); - boolean annotationStillExists = getEditorCas().select(Annotation.class) // - .at(selection.getBegin(), selection.getEnd()) // + // https://github.com/apache/uima-uimaj/issues/345 + // boolean annotationStillExists = getEditorCas().select(Annotation.class) // + // .at(selection.getBegin(), selection.getEnd()) // + // .anyMatch(ann -> ann._id() == id); + var cas = getEditorCas(); + boolean annotationStillExists = CasUtil + .selectAt(cas, CasUtil.getType(cas, Annotation.class), selection.getBegin(), + selection.getEnd()) + .stream() // .anyMatch(ann -> ann._id() == id); + if (!annotationStillExists) { selection.clear(); refresh(aEvent.getRequestTarget()); - } } catch (Exception e) { From 03719c5ff5550325c3130e49ca9df7d4a9b67f25 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Sat, 23 Sep 2023 13:44:23 +0200 Subject: [PATCH 2/4] #4158 - Exception when annotating something after a longer pause - Added new CAS Doctor check for unreachable feature structures - Updated UpgradeCasRepair to report garbarge-collected feature structures - Updated description of UpgradeCasRepair --- .../checks/UnreachableAnnotationsCheck.java | 100 ++++++++++++++++++ .../config/CasDoctorAutoConfiguration.java | 7 ++ .../diag/repairs/UpgradeCasRepair.java | 40 ++++++- .../asciidoc/user-guide/casdoctor.adoc | 18 +++- 4 files changed, 162 insertions(+), 3 deletions(-) create mode 100644 inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/checks/UnreachableAnnotationsCheck.java diff --git a/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/checks/UnreachableAnnotationsCheck.java b/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/checks/UnreachableAnnotationsCheck.java new file mode 100644 index 00000000000..3f006d4bac8 --- /dev/null +++ b/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/checks/UnreachableAnnotationsCheck.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.clarin.webanno.diag.checks; + +import static de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.getRealCas; +import static java.util.function.Function.identity; +import static java.util.stream.Collectors.counting; +import static java.util.stream.Collectors.groupingBy; +import static org.apache.uima.cas.impl.Serialization.deserializeCASComplete; +import static org.apache.uima.cas.impl.Serialization.serializeCASComplete; + +import java.util.List; +import java.util.Map; + +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.resource.ResourceInitializationException; + +import de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil; +import de.tudarmstadt.ukp.clarin.webanno.model.Project; +import de.tudarmstadt.ukp.clarin.webanno.support.logging.LogMessage; + +public class UnreachableAnnotationsCheck + implements Check +{ + @Override + public boolean check(Project aProject, CAS aCas, List aMessages) + { + var casImpl = (CASImpl) getRealCas(aCas); + + var annotationCountsBefore = countFeatureStructures(casImpl); + + // Disable forced retaining of all assigned annotations so that during serialization, + // any temporary annotations that got potentially stuck in the CAS can be released. + var dummy = makeDummyCas(); + try (var ctx = casImpl.ll_enableV2IdRefs(false); + var ctx1 = dummy.ll_enableV2IdRefs(false)) { + var data = serializeCASComplete(casImpl); + deserializeCASComplete(data, dummy); + } + + var annotationCountsAfter = countFeatureStructures(dummy); + + var diffTypes = 0; + var totalDiff = 0; + for (var typeName : annotationCountsBefore.keySet().stream().sorted() + .toArray(String[]::new)) { + var before = annotationCountsBefore.getOrDefault(typeName, 0l); + var after = annotationCountsAfter.getOrDefault(typeName, 0l); + var diff = before - after; + totalDiff += diff; + if (diff > 0) { + diffTypes++; + aMessages.add(LogMessage.info(this, "Type [%s] has [%d] unreachable instances", + typeName, diff)); + } + } + + if (totalDiff > 0) { + if (diffTypes > 1) { + aMessages.add(LogMessage.info(this, + "A total of [%d] unreachable instances that were found", totalDiff)); + } + } + + return true; + } + + public static CASImpl makeDummyCas() + { + try { + return (CASImpl) WebAnnoCasUtil.getRealCas(WebAnnoCasUtil.createCas()); + } + catch (ResourceInitializationException e) { + throw new IllegalStateException(e); + } + } + + public static Map countFeatureStructures(CASImpl casImpl) + { + return WebAnnoCasUtil.findAllFeatureStructures(casImpl).stream() // + .map(fs -> fs.getType().getName()) // + .collect(groupingBy(identity(), counting())); + } +} diff --git a/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/config/CasDoctorAutoConfiguration.java b/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/config/CasDoctorAutoConfiguration.java index 861bdae1b09..81955c278fd 100644 --- a/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/config/CasDoctorAutoConfiguration.java +++ b/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/config/CasDoctorAutoConfiguration.java @@ -43,6 +43,7 @@ import de.tudarmstadt.ukp.clarin.webanno.diag.checks.RelationOffsetsCheck; import de.tudarmstadt.ukp.clarin.webanno.diag.checks.TokensAndSententencedDoNotOverlapCheck; import de.tudarmstadt.ukp.clarin.webanno.diag.checks.UniqueDocumentAnnotationCheck; +import de.tudarmstadt.ukp.clarin.webanno.diag.checks.UnreachableAnnotationsCheck; import de.tudarmstadt.ukp.clarin.webanno.diag.repairs.CoverAllTextInSentencesRepair; import de.tudarmstadt.ukp.clarin.webanno.diag.repairs.ReattachFeatureAttachedSpanAnnotationsAndDeleteExtrasRepair; import de.tudarmstadt.ukp.clarin.webanno.diag.repairs.ReattachFeatureAttachedSpanAnnotationsRepair; @@ -229,4 +230,10 @@ public TokensAndSententencedDoNotOverlapCheck tokensAndSententencedDoNotOverlapC { return new TokensAndSententencedDoNotOverlapCheck(); } + + @Bean + public UnreachableAnnotationsCheck unreachableAnnotationsCheck() + { + return new UnreachableAnnotationsCheck(); + } } diff --git a/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/repairs/UpgradeCasRepair.java b/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/repairs/UpgradeCasRepair.java index cf7d4299dc0..b5e3216f57e 100644 --- a/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/repairs/UpgradeCasRepair.java +++ b/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/repairs/UpgradeCasRepair.java @@ -17,11 +17,15 @@ */ package de.tudarmstadt.ukp.clarin.webanno.diag.repairs; +import static de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.getRealCas; +import static de.tudarmstadt.ukp.clarin.webanno.diag.checks.UnreachableAnnotationsCheck.countFeatureStructures; + import java.io.IOException; import java.util.List; import org.apache.uima.UIMAException; import org.apache.uima.cas.CAS; +import org.apache.uima.cas.impl.CASImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,12 +55,46 @@ public UpgradeCasRepair(AnnotationSchemaService aAnnotationService) public void repair(Project aProject, CAS aCas, List aMessages) { try { + var casImpl = (CASImpl) getRealCas(aCas); + + var annotationCountsBefore = countFeatureStructures(casImpl); + annotationService.upgradeCas(aCas, aProject); aMessages.add(LogMessage.info(this, "CAS upgraded.")); + + var annotationCountsAfter = countFeatureStructures(casImpl); + + var diffTypes = 0; + var totalDiff = 0; + var totalBefore = 0; + var totalAfter = 0; + for (var typeName : annotationCountsBefore.keySet().stream().sorted() + .toArray(String[]::new)) { + var before = annotationCountsBefore.getOrDefault(typeName, 0l); + var after = annotationCountsAfter.getOrDefault(typeName, 0l); + var diff = before - after; + totalDiff += diff; + totalBefore += before; + totalAfter += after; + if (diff > 0) { + diffTypes++; + aMessages.add(LogMessage.info(this, + "Type [%s] had [%d] unreachable instances that were removed (before: [%d], after: [%d])", + typeName, diff, before, after)); + } + } + + if (totalDiff > 0) { + if (diffTypes > 1) { + aMessages.add(LogMessage.info(this, + "A total of [%d] unreachable instances that were removed (before: [%d], after: [%d])", + totalDiff, totalBefore, totalAfter)); + } + } } catch (UIMAException | IOException e) { log.error("Unabled to access CAS", e); - aMessages.add(LogMessage.error(this, "Unabled to access CAS", e.getMessage())); + aMessages.add(LogMessage.error(this, "Unabled to access CAS: %s", e.getMessage())); } } } diff --git a/inception/inception-diag/src/main/resources/META-INF/asciidoc/user-guide/casdoctor.adoc b/inception/inception-diag/src/main/resources/META-INF/asciidoc/user-guide/casdoctor.adoc index bebcb37b867..e67d37c257f 100644 --- a/inception/inception-diag/src/main/resources/META-INF/asciidoc/user-guide/casdoctor.adoc +++ b/inception/inception-diag/src/main/resources/META-INF/asciidoc/user-guide/casdoctor.adoc @@ -173,9 +173,10 @@ dependent. ID:: `CASMetadataTypeIsPresentCheck` Related repairs:: <> -Checks if the ìnternal type `CASMetadata is defined in the type system of this CAS. If this is +Checks if the internal type `CASMetadata` is defined in the type system of this CAS. If this is not the case, then the application may not be able to detect concurrent modifications. + [[check_DanglingRelationsCheck]] === Dangling relations [horizontal] @@ -210,6 +211,16 @@ TSV or CoNLL formats will not include any text and annotations of parts of the d not covered by sentences or may produce errors during export. +[[check_UnreachableAnnotationsCheck]] +=== Unreachable annotations check +[horizontal] +ID:: `UnreachableAnnotationsCheck` +Related repairs:: <> + +Checks if there are any unreachable feature structures. Such feature structures take up memory, but +they are not regularly accessible. Such feature structures may be created as a result of bugs. +Removing them is harmless and reduces memory and disk space usage. + [[sect_repairs]] == Repairs @@ -332,6 +343,9 @@ ID:: `UpgradeCasRepair` Ensures that the CAS is up-to-date with the project type system. It performs the same operation which is regularly performed when a user opens a document for annotation/curation. +This repair also removes any unreachable feature structures. Such feature structures may be created as a result of bugs. +Removing them is harmless and reduces memory and disk space usage. + This is considered to be safe repair action as it only garbage-collects data from the CAS that is no longer reachable anyway. @@ -354,4 +368,4 @@ ID:: `CoverAllTextInSentencesRepair` This repair checks if there is any text not covered by sentences. If there is, it creates a new sentence annotation on this text starting at the end of the last sentence before it (or the start -of the document text) and the begin of the next sentence (or the end of the document text). \ No newline at end of file +of the document text) and the begin of the next sentence (or the end of the document text). From 3f55f2d70a8638ad318ea5c2e435592c3457682c Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Sat, 23 Sep 2023 13:55:26 +0200 Subject: [PATCH 3/4] No issue: Enable parallel builds --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index ec7a44ba322..d4564e138e3 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -2,7 +2,7 @@ config = [ agentLabel: '', maven: 'Maven 3', jdk: 'Zulu 11', - extraMavenArguments: '', + extraMavenArguments: '-Ddkpro.core.testCachePath="${WORKSPACE}/cache/dkpro-core-datasets" -T 4', wipeWorkspaceBeforeBuild: true, wipeWorkspaceAfterBuild: true ] From 70e7091e7be69a8a3d5d9adb3b2d7c9eee951683 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Sat, 23 Sep 2023 14:57:46 +0200 Subject: [PATCH 4/4] #4158 - Exception when annotating something after a longer pause - Fix bad calls to CASUtil --- .../service/RecommendationServiceImpl.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/inception/inception-recommendation/src/main/java/de/tudarmstadt/ukp/inception/recommendation/service/RecommendationServiceImpl.java b/inception/inception-recommendation/src/main/java/de/tudarmstadt/ukp/inception/recommendation/service/RecommendationServiceImpl.java index c13152f343a..7d6ba5421c2 100644 --- a/inception/inception-recommendation/src/main/java/de/tudarmstadt/ukp/inception/recommendation/service/RecommendationServiceImpl.java +++ b/inception/inception-recommendation/src/main/java/de/tudarmstadt/ukp/inception/recommendation/service/RecommendationServiceImpl.java @@ -2091,7 +2091,10 @@ private static Optional getOffsetsAnchoredOnSingleTokens(CAS aOriginalCa // var tokens = aOriginalCas. select(tokenType) // // .coveredBy(aPredictedAnnotation) // // .limit(2).asList(); - var tokens = CasUtil.selectCovered(tokenType, aPredictedAnnotation).stream() // + var tokens = CasUtil + .selectCovered(aOriginalCas, tokenType, aPredictedAnnotation.getBegin(), + aPredictedAnnotation.getEnd()) + .stream() // .limit(2).collect(toList()); if (tokens.isEmpty()) { @@ -2123,8 +2126,9 @@ private static Optional getOffsetsAnchoredOnSentences(CAS aOriginalCas, // var sentences = aOriginalCas.select(Sentence.class) // // .coveredBy(aPredictedAnnotation) // // .asList(); - var sentences = CasUtil.selectCovered(CasUtil.getType(aOriginalCas, Sentence.class), - aPredictedAnnotation); + var sentences = CasUtil.selectCovered(aOriginalCas, + CasUtil.getType(aOriginalCas, Sentence.class), aPredictedAnnotation.getBegin(), + aPredictedAnnotation.getEnd()); if (sentences.isEmpty()) { // This can happen if a recommender uses different token boundaries (e.g. if a @@ -2147,8 +2151,8 @@ static Optional getOffsetsAnchoredOnTokens(CAS aOriginalCas, // var tokens = aOriginalCas.select(Token.class) // // .coveredBy(aPredictedAnnotation) // // .asList(); - var tokens = CasUtil.selectCovered(CasUtil.getType(aOriginalCas, Token.class), - aPredictedAnnotation); + var tokens = CasUtil.selectCovered(aOriginalCas, CasUtil.getType(aOriginalCas, Token.class), + aPredictedAnnotation.getBegin(), aPredictedAnnotation.getEnd()); if (tokens.isEmpty()) { if (aPredictedAnnotation.getBegin() == aPredictedAnnotation.getEnd()) {