Skip to content

Commit

Permalink
Merge pull request #4788 from inception-project/bugfix/4787-When-re-m…
Browse files Browse the repository at this point in the history
…erging-a-document-CASMetadata-is-not-retained

#4787 - When re-merging a document CASMetadata is not retained
  • Loading branch information
reckart authored May 1, 2024
2 parents 213d81e + b2264b1 commit 546ed74
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import static org.apache.uima.fit.util.CasUtil.getType;
import static org.apache.uima.fit.util.FSUtil.setFeature;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Optional;

Expand Down Expand Up @@ -64,7 +63,6 @@ public static void clearCasMetadata(CAS aCas) throws IllegalStateException

public static void addOrUpdateCasMetadata(CAS aCas, long aTimeStamp, SourceDocument aDocument,
String aUsername)
throws IOException
{
// If the type system of the CAS does not yet support CASMetadata, then we do not add it
// and wait for the next regular CAS upgrade before we include this data.
Expand All @@ -81,7 +79,7 @@ public static void addOrUpdateCasMetadata(CAS aCas, long aTimeStamp, SourceDocum
FeatureStructure cmd;
var cmds = aCas.select(CASMetadata.class).toList();
if (cmds.size() > 1) {
throw new IOException("CAS contains more than one CASMetadata instance!");
throw new IllegalStateException("CAS contains more than one CASMetadata instance!");
}

if (cmds.size() == 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
import static java.lang.System.currentTimeMillis;
import static java.util.concurrent.TimeUnit.MINUTES;
import static java.util.stream.Collectors.toList;
import static org.apache.commons.lang3.StringUtils.isEmpty;

import java.util.List;
import java.util.Objects;

import org.apache.commons.lang3.Validate;
import org.apache.uima.cas.CAS;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
Expand All @@ -37,9 +37,7 @@

import de.tudarmstadt.ukp.clarin.webanno.api.annotation.config.AnnotationAutoConfiguration;
import de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature;
import de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer;
import de.tudarmstadt.ukp.clarin.webanno.model.Project;
import de.tudarmstadt.ukp.inception.rendering.Renderer;
import de.tudarmstadt.ukp.inception.rendering.pipeline.RenderStep;
import de.tudarmstadt.ukp.inception.rendering.request.RenderRequest;
import de.tudarmstadt.ukp.inception.rendering.vmodel.VDocument;
Expand Down Expand Up @@ -95,41 +93,42 @@ public void render(VDocument aResponse, RenderRequest aRequest)
{
log.trace("Prerenderer.render()");

CAS cas = aRequest.getCas();
var cas = aRequest.getCas();
var documentText = cas.getDocumentText();

Validate.notNull(cas, "CAS cannot be null");

String documentText = cas.getDocumentText();
int renderBegin = Math.max(0, aRequest.getWindowBeginOffset());
int renderEnd = Math.min(documentText.length(), aRequest.getWindowEndOffset());
if (aRequest.getVisibleLayers().isEmpty() || isEmpty(documentText)) {
return;
}

var renderBegin = Math.max(0, aRequest.getWindowBeginOffset());
var renderEnd = Math.min(documentText.length(), aRequest.getWindowEndOffset());
aResponse.setText(documentText.substring(renderBegin, renderEnd));
aResponse.setWindowBegin(renderBegin);
aResponse.setWindowEnd(renderEnd);

if (aRequest.getVisibleLayers().isEmpty()) {
return;
}

long start = System.currentTimeMillis();
Project project = aRequest.getProject();
var start = System.currentTimeMillis();
var project = aRequest.getProject();

// Listing the features once is faster than repeatedly hitting the DB to list features for
// every layer.
List<AnnotationFeature> supportedFeatures = supportedFeaturesCache.get(project);
List<AnnotationFeature> allFeatures = allFeaturesCache.get(project);
var supportedFeatures = supportedFeaturesCache.get(project);
var allFeatures = allFeaturesCache.get(project);

// Render (custom) layers
for (AnnotationLayer layer : aRequest.getVisibleLayers()) {
List<AnnotationFeature> layerSupportedFeatures = supportedFeatures.stream() //
for (var layer : aRequest.getVisibleLayers()) {
var layerSupportedFeatures = supportedFeatures.stream() //
.filter(feature -> feature.getLayer().equals(layer)) //
.collect(toList());
List<AnnotationFeature> layerAllFeatures = allFeatures.stream() //
var layerAllFeatures = allFeatures.stream() //
.filter(feature -> feature.getLayer().equals(layer)) //
.collect(toList());
// We need to pass in *all* the annotation features here because we also to that in
// other places where we create renderers - and the set of features must always be
// the same because otherwise the IDs of armed slots would be inconsistent
LayerSupport<?, ?> layerSupport = layerSupportRegistry.getLayerSupport(layer);
Renderer renderer = layerSupport.createRenderer(layer, () -> layerAllFeatures);
var renderer = layerSupport.createRenderer(layer, () -> layerAllFeatures);
renderer.render(cas, layerSupportedFeatures, aResponse, renderBegin, renderEnd);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,18 @@ public void setPageBegin(CAS aCas, int aOffset)
@Override
public void setVisibleUnits(List<Unit> aUnits, int aTotalUnitCount)
{
if (aUnits.isEmpty()) {
unitCount = 0;
visibleUnits = aUnits;
firstVisibleUnitIndex = 0;
lastVisibleUnitIndex = 0;
focusUnitIndex = 0;
windowBeginOffset = 0;
windowEndOffset = 0;
fireViewStateChanged();
return;
}

unitCount = aTotalUnitCount;
visibleUnits = aUnits;
firstVisibleUnitIndex = aUnits.get(0).getIndex();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,11 @@ default Unit unitAtIndex(CAS aCas, int aIndex)
index = 1;
}

List<Unit> units = units(aCas);
var units = units(aCas);

if (units.isEmpty()) {
return null;
}

if (index > units.size()) {
index = units.size();
Expand Down Expand Up @@ -128,8 +132,10 @@ default List<Unit> unitsStartingAtOffset(CAS aCas, int aOffset, int aCount)
*/
default void moveToUnit(AnnotatorViewState aState, CAS aCas, int aIndex, FocusPosition aPos)
{
Unit unit = unitAtIndex(aCas, aIndex);
moveToOffset(aState, aCas, unit.getBegin(), aPos);
var unit = unitAtIndex(aCas, aIndex);
if (unit != null) {
moveToOffset(aState, aCas, unit.getBegin(), aPos);
}
}

default void moveToPreviousPage(AnnotatorViewState aState, CAS aCas, FocusPosition aPos)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.LoadingCache;

import de.tudarmstadt.ukp.clarin.webanno.api.type.CASMetadata;
import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.Configuration;
import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.ConfigurationSet;
import de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.DiffResult;
Expand All @@ -80,6 +81,7 @@
import de.tudarmstadt.ukp.inception.annotation.feature.link.LinkFeatureTraits;
import de.tudarmstadt.ukp.inception.annotation.layer.relation.RelationAdapter;
import de.tudarmstadt.ukp.inception.annotation.layer.span.SpanAdapter;
import de.tudarmstadt.ukp.inception.annotation.storage.CasMetadataUtils;
import de.tudarmstadt.ukp.inception.curation.merge.strategy.DefaultMergeStrategy;
import de.tudarmstadt.ukp.inception.curation.merge.strategy.MergeStrategy;
import de.tudarmstadt.ukp.inception.rendering.vmodel.VID;
Expand Down Expand Up @@ -206,7 +208,7 @@ public Set<LogMessage> clearAndMergeCas(DiffResult aDiff, SourceDocument aTarget
throws UIMAException
{
// Remove any annotations from the target CAS - keep type system, sentences and tokens
clearAnnotations(aTargetDocument.getProject(), aTargetCas);
clearAnnotations(aTargetDocument, aTargetCas);

return mergeCas(aDiff, aTargetDocument, aTargetUsername, aTargetCas, aCasMap);
}
Expand Down Expand Up @@ -444,15 +446,14 @@ public Set<LogMessage> mergeCas(DiffResult aDiff, SourceDocument aTargetDocument
* Removes all annotations except {@link Token} and {@link Sentence} annotations - but from
* these also only the offsets are kept and all other features are cleared.
*
* @param aProject
* the project to which the CAS belongs.
*
* @param aDocument
* the document to which the CAS belongs.
* @param aCas
* the CAS to clear.
* @throws UIMAException
* if there was a problem clearing the CAS.
*/
private void clearAnnotations(Project aProject, CAS aCas) throws UIMAException
private void clearAnnotations(SourceDocument aDocument, CAS aCas) throws UIMAException
{
// Copy the CAS - basically we do this just to keep the full type system information
var backup = WebAnnoCasUtil.createCasCopy(aCas);
Expand All @@ -468,10 +469,18 @@ private void clearAnnotations(Project aProject, CAS aCas) throws UIMAException
createDocumentMetadata(aCas);
}

var casMetadataType = aCas.getTypeSystem().getType(CASMetadata._TypeName);
if (casMetadataType != null && exists(backup, casMetadataType)) {
var casMetadata = backup.select(CASMetadata.class).single();
var timestamp = casMetadata.getLastChangedOnDisk();
var username = casMetadata.getUsername();
CasMetadataUtils.addOrUpdateCasMetadata(aCas, timestamp, aDocument, username);
}

aCas.setDocumentLanguage(backup.getDocumentLanguage()); // DKPro Core Issue 435
aCas.setDocumentText(backup.getDocumentText());

transferSegmentation(aProject, aCas, backup);
transferSegmentation(aDocument.getProject(), aCas, backup);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,15 @@ public class CASMetadataTypeIsPresentCheck
@Override
public boolean check(Project aProject, CAS aCas, List<LogMessage> aMessages)
{
if (aCas.getTypeSystem().getType(CASMetadata.class.getName()) == null) {
aMessages.add(LogMessage.warn(this, "CAS needs upgrade to support CASMetadata which is "
if (aCas.getTypeSystem().getType(CASMetadata._TypeName) == null) {
aMessages.add(LogMessage.info(this, "CAS needs upgrade to support CASMetadata which is "
+ "required to detect concurrent modifications to CAS files."));
return true;
}

if (aCas.select(CASMetadata.class).isEmpty()) {
aMessages.add(LogMessage.warn(this,
"CAS contains no CASMetadata. Cannot check concurrent access."));
}

// This is an informative check - not critical, so we always pass it.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public boolean check(Project aProject, CAS aCas, List<LogMessage> aMessages)
{
boolean ok = true;

for (Annotation ann : aCas.select(Annotation.class)) {
for (var ann : aCas.select(Annotation.class)) {
if (ann.getBegin() > ann.getEnd()) {
aMessages.add(error(this, "[%s] at [%d-%d] has negative size (starts after ending)",
ann.getType().getName(), ann.getBegin(), ann.getEnd()));
Expand Down

0 comments on commit 546ed74

Please sign in to comment.