Skip to content

Commit

Permalink
Merge pull request #4394 from inception-project/feature/2696-Document…
Browse files Browse the repository at this point in the history
…-level-recommendations

#2696 - Document-level recommendations
  • Loading branch information
reckart authored Dec 27, 2023
2 parents 5ec948d + 9e04269 commit 30ea848
Show file tree
Hide file tree
Showing 19 changed files with 300 additions and 348 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import static java.util.Arrays.asList;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
import static org.apache.uima.fit.factory.CollectionReaderFactory.createReader;
import static org.apache.uima.fit.util.CasUtil.getType;
import static org.apache.uima.fit.util.CasUtil.select;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.contentOf;
import static org.mockito.ArgumentMatchers.any;
Expand All @@ -32,6 +34,7 @@

import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.fit.factory.JCasFactory;
import org.dkpro.core.io.tcf.TcfReader;
Expand Down Expand Up @@ -59,6 +62,7 @@
import de.tudarmstadt.ukp.clarin.webanno.model.Project;
import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.inception.annotation.feature.bool.BooleanFeatureSupport;
import de.tudarmstadt.ukp.inception.annotation.feature.link.LinkFeatureSupport;
Expand All @@ -76,7 +80,6 @@
import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService;
import de.tudarmstadt.ukp.inception.schema.service.FeatureSupportRegistryImpl;
import de.tudarmstadt.ukp.inception.support.json.JSONUtil;
import de.tudarmstadt.ukp.inception.support.uima.WebAnnoCasUtil;

@ExtendWith(MockitoExtension.class)
public class BratSerializerImplTest
Expand Down Expand Up @@ -180,7 +183,7 @@ public void thatSentenceOrientedStrategyRenderCorrectly() throws Exception
state.setAllAnnotationLayers(schemaService.listAnnotationLayer(project));
state.setPagingStrategy(new SentenceOrientedPagingStrategy());
state.getPreferences().setWindowSize(10);
state.setFirstVisibleUnit(WebAnnoCasUtil.getFirstSentence(cas));
state.setFirstVisibleUnit(getFirstSentence(cas));
state.setProject(project);
state.setDocument(sourceDocument, asList(sourceDocument));

Expand Down Expand Up @@ -219,7 +222,7 @@ public void thatLineOrientedStrategyRenderCorrectly() throws Exception
AnnotatorState state = new AnnotatorStateImpl(Mode.ANNOTATION);
state.setPagingStrategy(new LineOrientedPagingStrategy());
state.getPreferences().setWindowSize(10);
state.setFirstVisibleUnit(WebAnnoCasUtil.getFirstSentence(cas));
state.setFirstVisibleUnit(getFirstSentence(cas));
state.setProject(project);
state.setDocument(sourceDocument, asList(sourceDocument));

Expand Down Expand Up @@ -258,7 +261,7 @@ public void thatTokenWrappingStrategyRenderCorrectly() throws Exception
AnnotatorState state = new AnnotatorStateImpl(Mode.ANNOTATION);
state.setPagingStrategy(new TokenWrappingPagingStrategy(80));
state.getPreferences().setWindowSize(10);
state.setFirstVisibleUnit(WebAnnoCasUtil.getFirstSentence(cas));
state.setFirstVisibleUnit(getFirstSentence(cas));
state.setProject(project);
state.setDocument(sourceDocument, asList(sourceDocument));

Expand All @@ -281,4 +284,22 @@ public void thatTokenWrappingStrategyRenderCorrectly() throws Exception
assertThat(contentOf(new File("src/test/resources/longlines.json"), UTF_8))
.isEqualToNormalizingNewlines(contentOf(new File(jsonFilePath), UTF_8));
}

/**
* Get the internal address of the first sentence annotation from CAS. This will be used as a
* reference for moving forward/backward sentences positions
*
* @param aCas
* The CAS object assumed to contains some sentence annotations
* @return the sentence number or -1 if aCas don't have sentence annotation
*/
private static AnnotationFS getFirstSentence(CAS aCas)
{
AnnotationFS firstSentence = null;
for (AnnotationFS s : select(aCas, getType(aCas, Sentence.class))) {
firstSentence = s;
break;
}
return firstSentence;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_BEST_MATCH_TERM_NC;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_NC;
import static de.tudarmstadt.ukp.inception.support.uima.WebAnnoCasUtil.selectSentenceCovering;
import static de.tudarmstadt.ukp.inception.support.uima.WebAnnoCasUtil.selectTokensCovered;
import static java.lang.System.currentTimeMillis;
import static java.util.Arrays.asList;
import static java.util.Collections.unmodifiableList;
import static java.util.Comparator.comparingInt;
import static java.util.stream.Collectors.toCollection;
import static org.apache.uima.fit.util.CasUtil.getType;
import static org.apache.uima.fit.util.CasUtil.select;

import java.io.File;
import java.net.URISyntaxException;
Expand All @@ -47,6 +47,7 @@
import org.apache.commons.lang3.Validate;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.util.CasUtil;
import org.eclipse.rdf4j.common.net.ParsedIRI;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -58,6 +59,8 @@
import org.springframework.core.annotation.AnnotationAwareOrderComparator;

import de.tudarmstadt.ukp.clarin.webanno.model.Project;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.inception.conceptlinking.config.EntityLinkingProperties;
import de.tudarmstadt.ukp.inception.conceptlinking.config.EntityLinkingPropertiesImpl;
import de.tudarmstadt.ukp.inception.conceptlinking.config.EntityLinkingServiceAutoConfiguration;
Expand Down Expand Up @@ -513,4 +516,31 @@ public List<KBHandle> searchItems(KnowledgeBase aKB, String aQuery)
{
return disambiguate(aKB, null, ConceptFeatureValueType.ANY_OBJECT, aQuery, null, 0, null);
}

/**
* Get the sentence based on the annotation begin offset
*
* @param aCas
* the CAS.
* @param aBegin
* the begin offset.
* @return the sentence.
*/
private static AnnotationFS selectSentenceCovering(CAS aCas, int aBegin)
{
AnnotationFS currentSentence = null;
for (AnnotationFS sentence : select(aCas, getType(aCas, Sentence.class))) {
if (sentence.getBegin() <= aBegin && sentence.getEnd() > aBegin) {
currentSentence = sentence;
break;
}
}
return currentSentence;
}

private static Collection<AnnotationFS> selectTokensCovered(AnnotationFS aCover)
{
return CasUtil.selectCovered(aCover.getCAS(), getType(aCover.getCAS(), Token.class),
aCover);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,13 @@
import static org.apache.uima.cas.impl.Serialization.deserializeCASComplete;
import static org.apache.uima.cas.impl.Serialization.serializeCASComplete;

import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.impl.CASImpl;
import org.apache.uima.resource.ResourceInitializationException;

Expand Down Expand Up @@ -93,8 +96,15 @@ public static CASImpl makeDummyCas()

public static Map<String, Long> countFeatureStructures(CASImpl casImpl)
{
return WebAnnoCasUtil.findAllFeatureStructures(casImpl).stream() //
return findAllFeatureStructures(casImpl).stream() //
.map(fs -> fs.getType().getName()) //
.collect(groupingBy(identity(), counting()));
}

public static Set<FeatureStructure> findAllFeatureStructures(CAS aCas)
{
Set<FeatureStructure> allFSes = new LinkedHashSet<>();
((CASImpl) aCas).walkReachablePlusFSsSorted(allFSes::add, null, null, null);
return allFSes;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,10 @@ public void train(RecommenderContext aContext, List<CAS> aCasses) throws Recomme
// OpenNLP
int beamSize = Math.max(maxRecommendations, NameFinderME.DEFAULT_BEAM_SIZE);

TrainingParameters params = traits.getParameters();
var params = traits.getParameters();
params.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));

TokenNameFinderModel model = train(nameSamples, params);
var model = train(nameSamples, params);

aContext.put(KEY_MODEL, model);
}
Expand Down Expand Up @@ -304,7 +304,7 @@ private String determineLabel(Span aName, int aTokenIdx)
return label;
}

private List<NameSample> extractNameSamples(List<CAS> aCasses)
private List<NameSample> extractNameSamples(Iterable<CAS> aCasses)
{
var nameSamples = new ArrayList<NameSample>();

Expand Down Expand Up @@ -402,8 +402,8 @@ private TokenNameFinderModel train(List<NameSample> aNameSamples,
TrainingParameters aParameters)
throws RecommendationException
{
try (NameSampleStream stream = new NameSampleStream(aNameSamples)) {
TokenNameFinderFactory finderFactory = new TokenNameFinderFactory();
try (var stream = new NameSampleStream(aNameSamples)) {
var finderFactory = new TokenNameFinderFactory();
return NameFinderME.train("unknown", null, stream, aParameters, finderFactory);
}
catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@

import static de.tudarmstadt.ukp.inception.recommendation.api.model.AnnotationSuggestion.FLAG_ALL;
import static de.tudarmstadt.ukp.inception.recommendation.api.model.AnnotationSuggestion.FLAG_OVERLAP;
import static de.tudarmstadt.ukp.inception.recommendation.api.model.AnnotationSuggestion.FLAG_TRANSIENT_REJECTED;
import static de.tudarmstadt.ukp.inception.recommendation.api.model.LearningRecordUserAction.REJECTED;

import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
Expand All @@ -29,7 +27,6 @@
import java.util.Objects;
import java.util.Optional;

import org.apache.commons.lang3.NotImplementedException;
import org.apache.uima.cas.AnnotationBaseFS;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
Expand All @@ -45,7 +42,6 @@
import de.tudarmstadt.ukp.inception.recommendation.api.RecommendationService;
import de.tudarmstadt.ukp.inception.recommendation.api.SuggestionRenderer;
import de.tudarmstadt.ukp.inception.recommendation.api.SuggestionSupport_ImplBase;
import de.tudarmstadt.ukp.inception.recommendation.api.event.RecommendationRejectedEvent;
import de.tudarmstadt.ukp.inception.recommendation.api.model.AnnotationSuggestion;
import de.tudarmstadt.ukp.inception.recommendation.api.model.LearningRecord;
import de.tudarmstadt.ukp.inception.recommendation.api.model.LearningRecordChangeLocation;
Expand Down Expand Up @@ -135,52 +131,22 @@ else if (candidates.isEmpty() || !aAdapter.getTraits(DocumentMetadataLayerTraits
return annotation;
}

@Override
public void rejectSuggestion(String aSessionOwner, SourceDocument aDocument, String aDataOwner,
AnnotationSuggestion aSuggestion, LearningRecordChangeLocation aAction)
throws AnnotationException
{
var suggestion = (MetadataSuggestion) aSuggestion;

// Hide the suggestion. This is faster than having to recalculate the visibility status
// for the entire document or even for the part visible on screen.
suggestion.hide(FLAG_TRANSIENT_REJECTED);

var recommender = recommendationService.getRecommender(suggestion);
var feature = recommender.getFeature();
// Log the action to the learning record
learningRecordService.logRecord(aSessionOwner, aDocument, aDataOwner, suggestion, feature,
REJECTED, aAction);

// Send an application event that the suggestion has been rejected
applicationEventPublisher.publishEvent(new RecommendationRejectedEvent(this, aDocument,
aDataOwner, feature, suggestion.getLabel()));
}

@Override
public void skipSuggestion(String aSessionOwner, SourceDocument aDocument, String aDataOwner,
AnnotationSuggestion aSuggestion, LearningRecordChangeLocation aAction)
throws AnnotationException
{
throw new NotImplementedException("Not yet implemented");
}

@Override
public <T extends AnnotationSuggestion> void calculateSuggestionVisibility(String aSessionOwner,
SourceDocument aDocument, CAS aCas, String aDataOwner, AnnotationLayer aLayer,
Collection<SuggestionGroup<T>> aRecommendations, int aWindowBegin, int aWindowEnd)
{
LOG.trace("calculateSuggestionVisibility() for layer {} on document {}", aLayer, aDocument);

var type = aCas.getTypeSystem().getType(aLayer.getName());
if (type == null) {
var predictedType = aCas.getTypeSystem().getType(aLayer.getName());
if (predictedType == null) {
// The type does not exist in the type system of the CAS. Probably it has not
// been upgraded to the latest version of the type system yet. If this is the case,
// we'll just skip.
return;
}

var annotations = aCas.<AnnotationBase> select(type).asList();
var annotations = aCas.<AnnotationBase> select(predictedType).asList();

var suggestionsForLayer = aRecommendations.stream()
// Only suggestions for the given layer
Expand All @@ -194,7 +160,7 @@ public <T extends AnnotationSuggestion> void calculateSuggestionVisibility(Strin
var adapter = schemaService.getAdapter(aLayer);
var traits = adapter.getTraits(DocumentMetadataLayerTraits.class).get();
for (var feature : schemaService.listSupportedFeatures(aLayer)) {
var feat = type.getFeatureByBaseName(feature.getName());
var feat = predictedType.getFeatureByBaseName(feature.getName());

if (feat == null) {
// The feature does not exist in the type system of the CAS. Probably it has not
Expand Down Expand Up @@ -267,14 +233,16 @@ static void hideSuggestionsRejectedOrSkipped(MetadataSuggestion aSuggestion,
}

@Override
public LearningRecord toLearningRecord(SourceDocument aDocument, String aUsername,
public LearningRecord toLearningRecord(SourceDocument aDocument, String aDataOwner,
AnnotationSuggestion aSuggestion, AnnotationFeature aFeature,
LearningRecordUserAction aUserAction, LearningRecordChangeLocation aLocation)
{
var record = new LearningRecord();
record.setUser(aUsername);
record.setUser(aDataOwner);
record.setSourceDocument(aDocument);
record.setUserAction(aUserAction);
record.setOffsetBegin(-1);
record.setOffsetEnd(-1);
record.setOffsetBegin2(-1);
record.setOffsetEnd2(-1);
record.setAnnotation(aSuggestion.getLabel());
Expand Down
9 changes: 4 additions & 5 deletions inception/inception-pdf-editor/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-io-pdf-asl</artifactId>
</dependency>
<dependency>
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-api-segmentation-asl</artifactId>
</dependency>
<dependency>
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-api-resources-asl</artifactId>
Expand Down Expand Up @@ -168,11 +172,6 @@
<artifactId>dkpro-core-api-lexmorph-asl</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-api-segmentation-asl</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.inception.app</groupId>
<artifactId>inception-schema</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
import static de.tudarmstadt.ukp.inception.pdfeditor.pdfanno.render.PdfAnnoSerializer.convertToDocumentOffset;
import static de.tudarmstadt.ukp.inception.pdfeditor.pdfanno.render.PdfAnnoSerializer.convertToDocumentOffsets;
import static de.tudarmstadt.ukp.inception.rendering.vmodel.VID.NONE_ID;
import static de.tudarmstadt.ukp.inception.support.uima.WebAnnoCasUtil.selectSentenceAt;
import static de.tudarmstadt.ukp.inception.support.wicket.ServletContextUtils.referenceToUrl;
import static java.lang.String.join;
import static java.util.Arrays.asList;
import static org.apache.uima.fit.util.CasUtil.getType;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -37,6 +37,7 @@
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.util.CasUtil;
import org.apache.wicket.Component;
import org.apache.wicket.ajax.AjaxRequestTarget;
import org.apache.wicket.feedback.IFeedback;
Expand All @@ -48,6 +49,7 @@
import org.slf4j.LoggerFactory;

import de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasProvider;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.inception.documents.api.DocumentService;
import de.tudarmstadt.ukp.inception.editor.AnnotationEditorExtensionRegistry;
import de.tudarmstadt.ukp.inception.editor.action.AnnotationActionHandler;
Expand Down Expand Up @@ -314,4 +316,20 @@ private String getAnnotationsJS(PdfAnnoModel aPdfAnnoModel)
"'colorMap': {},", //
"'annotations':[annoFile]}, true);");
}

/**
* Get the sentence for this CAS based on the begin and end offsets. This is basically used to
* transform sentence address in one CAS to other sentence address for different CAS
*
* @param aCas
* the CAS.
* @param aBegin
* the begin offset.
* @return the sentence.
*/
private static AnnotationFS selectSentenceAt(CAS aCas, int aBegin)
{
return CasUtil.select(aCas, getType(aCas, Sentence.class)).stream()
.filter(s -> s.getBegin() == aBegin).findFirst().orElse(null);
}
}
Loading

0 comments on commit 30ea848

Please sign in to comment.