Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#4251 - Improve speed of yield calculation #4253

Merged
merged 1 commit into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,17 @@

import static java.util.Arrays.asList;
import static java.util.Collections.emptyList;
import static java.util.Comparator.comparingInt;
import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.toList;
import static org.apache.commons.lang3.StringUtils.abbreviate;
import static org.apache.uima.fit.util.CasUtil.selectCovered;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
Expand Down Expand Up @@ -76,9 +80,9 @@ public class RelationRenderer

private Type type;
private Type spanType;
private Feature dependentFeature;
private Feature governorFeature;
private Feature arcSpanFeature;
private Feature targetFeature;
private Feature sourceFeature;
private Feature attachFeature;

public RelationRenderer(RelationAdapter aTypeAdapter,
LayerSupportRegistry aLayerSupportRegistry,
Expand Down Expand Up @@ -113,9 +117,9 @@ protected boolean typeSystemInit(TypeSystem aTypeSystem)
return false;
}

dependentFeature = type.getFeatureByBaseName(typeAdapter.getTargetFeatureName());
governorFeature = type.getFeatureByBaseName(typeAdapter.getSourceFeatureName());
arcSpanFeature = spanType.getFeatureByBaseName(typeAdapter.getAttachFeatureName());
targetFeature = type.getFeatureByBaseName(typeAdapter.getTargetFeatureName());
sourceFeature = type.getFeatureByBaseName(typeAdapter.getSourceFeatureName());
attachFeature = spanType.getFeatureByBaseName(typeAdapter.getAttachFeatureName());

return true;
}
Expand Down Expand Up @@ -159,24 +163,29 @@ public void render(final CAS aCas, List<AnnotationFeature> aFeatures, VDocument
}
}

private Optional<String> renderYield(AnnotationFS fs)
Optional<String> renderYield(AnnotationFS fs)
{
FeatureStructure dependentFs = getDependentFs(fs);

var relationLinks = getRelationLinks(fs.getCAS());

if (!relationLinks.keySet().contains(ICasUtil.getAddr(dependentFs))) {
return Optional.empty();
var yield = new HashSet<Annotation>();
var queue = new ArrayDeque<Annotation>();
queue.add((Annotation) getTargetFs(fs));
var relationsBySource = fs.getCAS().<Annotation> select(type)
.collect(groupingBy(this::getSourceFs));
while (!queue.isEmpty()) {
var source = queue.pop();
if (!yield.contains(source)) {
yield.add(source);
var relations = relationsBySource.getOrDefault(source, emptyList());
for (var rel : relations) {
queue.add((Annotation) getTargetFs(rel));
}
}
}

// sort the annotations (begin, end)
var sortedDepFs = new ArrayList<>(relationLinks.get(ICasUtil.getAddr(dependentFs)));
sortedDepFs.sort(comparingInt(
arg0 -> ICasUtil.selectAnnotationByAddr(fs.getCAS(), arg0).getBegin()));

var cm = getYieldMessage(fs.getCAS(), sortedDepFs);

return Optional.of(cm);
var sortedYield = yield.stream() //
.sorted(Comparator.comparingInt(Annotation::getBegin)) //
.collect(toList());
var message = getYieldMessage(sortedYield);
return Optional.of(message);
}

@Override
Expand All @@ -188,8 +197,8 @@ public List<VObject> render(VDocument aVDocument, AnnotationFS aFS,
}

var typeAdapter = getTypeAdapter();
var dependentFs = getDependentFs(aFS);
var governorFs = getGovernorFs(aFS);
var dependentFs = getTargetFs(aFS);
var governorFs = getSourceFs(aFS);

if (dependentFs == null || governorFs == null) {
StringBuilder message = new StringBuilder();
Expand Down Expand Up @@ -262,13 +271,13 @@ public List<VLazyDetailGroup> lookupLazyDetails(CAS aCas, VID aVid, int aWindowB

var group = new VLazyDetailGroup();

var dependentFs = getDependentFs(fs);
var dependentFs = getTargetFs(fs);
if (dependentFs instanceof AnnotationFS) {
group.addDetail(new VLazyDetail("Target",
abbreviate(((AnnotationFS) dependentFs).getCoveredText(), 300)));
}

var governorFs = getGovernorFs(fs);
var governorFs = getSourceFs(fs);
if (governorFs instanceof AnnotationFS) {
group.addDetail(new VLazyDetail("Origin",
abbreviate(((AnnotationFS) governorFs).getCoveredText(), 300)));
Expand All @@ -287,29 +296,27 @@ public List<VLazyDetailGroup> lookupLazyDetails(CAS aCas, VID aVid, int aWindowB
/**
* The relations yield message
*/
private String getYieldMessage(CAS aCas, List<Integer> sortedDepFs)
private String getYieldMessage(Iterable<Annotation> sortedDepFs)
{
StringBuilder cm = new StringBuilder();
int end = -1;
for (Integer depFs : sortedDepFs) {
for (Annotation depFs : sortedDepFs) {
if (end == -1) {
cm.append(ICasUtil.selectAnnotationByAddr(aCas, depFs).getCoveredText());
end = ICasUtil.selectAnnotationByAddr(aCas, depFs).getEnd();
cm.append(depFs.getCoveredText());
end = depFs.getEnd();
}
// if no space between token and punct
else if (end == ICasUtil.selectAnnotationByAddr(aCas, depFs).getBegin()) {
cm.append(ICasUtil.selectAnnotationByAddr(aCas, depFs).getCoveredText());
end = ICasUtil.selectAnnotationByAddr(aCas, depFs).getEnd();
else if (end == depFs.getBegin()) {
cm.append(depFs.getCoveredText());
end = depFs.getEnd();
}
else if (end + 1 != ICasUtil.selectAnnotationByAddr(aCas, depFs).getBegin()) {
cm.append(" ... ")
.append(ICasUtil.selectAnnotationByAddr(aCas, depFs).getCoveredText());
end = ICasUtil.selectAnnotationByAddr(aCas, depFs).getEnd();
else if (end + 1 != depFs.getBegin()) {
cm.append(" ... ").append(depFs.getCoveredText());
end = depFs.getEnd();
}
else {
cm.append(" ")
.append(ICasUtil.selectAnnotationByAddr(aCas, depFs).getCoveredText());
end = ICasUtil.selectAnnotationByAddr(aCas, depFs).getEnd();
cm.append(" ").append(depFs.getCoveredText());
end = depFs.getEnd();
}

}
Expand All @@ -325,8 +332,8 @@ private Map<Integer, Set<Integer>> getRelationLinks(CAS aCas)
var relations = new ConcurrentHashMap<Integer, Set<Integer>>();

for (var fs : aCas.<Annotation> select(type)) {
var govFs = getGovernorFs(fs);
var depFs = getDependentFs(fs);
var govFs = getSourceFs(fs);
var depFs = getTargetFs(fs);

if (govFs == null || depFs == null) {
log.warn("Relation [" + typeAdapter.getLayer().getName() + "] with id ["
Expand Down Expand Up @@ -369,29 +376,21 @@ private void updateLinks(Map<Integer, Set<Integer>> aRelLinks, Integer aGov)
}
}

private FeatureStructure getGovernorFs(FeatureStructure fs)
private FeatureStructure getSourceFs(FeatureStructure fs)
{
RelationAdapter typeAdapter = getTypeAdapter();
FeatureStructure governorFs;
if (typeAdapter.getAttachFeatureName() != null) {
governorFs = fs.getFeatureValue(governorFeature).getFeatureValue(arcSpanFeature);
if (attachFeature != null) {
return fs.getFeatureValue(sourceFeature).getFeatureValue(attachFeature);
}
else {
governorFs = fs.getFeatureValue(governorFeature);
}
return governorFs;

return fs.getFeatureValue(sourceFeature);
}

private FeatureStructure getDependentFs(FeatureStructure fs)
private FeatureStructure getTargetFs(FeatureStructure fs)
{
RelationAdapter typeAdapter = getTypeAdapter();
FeatureStructure dependentFs;
if (typeAdapter.getAttachFeatureName() != null) {
dependentFs = fs.getFeatureValue(dependentFeature).getFeatureValue(arcSpanFeature);
if (attachFeature != null) {
return fs.getFeatureValue(targetFeature).getFeatureValue(attachFeature);
}
else {
dependentFs = fs.getFeatureValue(dependentFeature);
}
return dependentFs;

return fs.getFeatureValue(targetFeature);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -130,34 +130,82 @@ public void setup() throws Exception
}

@Test
public void thatRelationCrossSentenceBehaviorOnRenderGeneratesErrors() throws Exception
void thatYieldWorks() throws Exception
{
TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class);
var builder = new TokenBuilder<>(Token.class, Sentence.class);
builder.buildTokens(jcas, "1 2 3 4 5 6 7 8 9");

for (var t : select(jcas, Token.class)) {
var pos = new POS(jcas, t.getBegin(), t.getEnd());
t.setPos(pos);
pos.addToIndexes();
}

var pos = jcas.select(POS.class).asList();

var adapter = new RelationAdapter(layerSupportRegistry, featureSupportRegistry, null,
depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE,
() -> asList(dependencyLayerGovernor, dependencyLayerDependent), behaviors);

var sut = new RelationRenderer(adapter, layerSupportRegistry, featureSupportRegistry,
asList(new RelationCrossSentenceBehavior()));
sut.typeSystemInit(jcas.getTypeSystem());

var rel1 = adapter.add(document, username, pos.get(0), pos.get(4), jcas.getCas());
assertThat(sut.renderYield(rel1)).hasValue("5");

var rel2 = adapter.add(document, username, pos.get(4), pos.get(5), jcas.getCas());
assertThat(sut.renderYield(rel1)).hasValue("5 6");
assertThat(sut.renderYield(rel2)).hasValue("6");

var rel3 = adapter.add(document, username, pos.get(4), pos.get(2), jcas.getCas());
assertThat(sut.renderYield(rel1)).hasValue("3 ... 5 6");
assertThat(sut.renderYield(rel2)).hasValue("6");
assertThat(sut.renderYield(rel3)).hasValue("3");

var rel4 = adapter.add(document, username, pos.get(2), pos.get(1), jcas.getCas());
assertThat(sut.renderYield(rel1)).hasValue("2 3 ... 5 6");
assertThat(sut.renderYield(rel2)).hasValue("6");
assertThat(sut.renderYield(rel3)).hasValue("2 3");
assertThat(sut.renderYield(rel4)).hasValue("2");

var rel5 = adapter.add(document, username, pos.get(5), pos.get(4), jcas.getCas());
assertThat(sut.renderYield(rel1)).hasValue("2 3 ... 5 6");
assertThat(sut.renderYield(rel2)).hasValue("2 3 ... 5 6");
assertThat(sut.renderYield(rel3)).hasValue("2 3");
assertThat(sut.renderYield(rel4)).hasValue("2");
assertThat(sut.renderYield(rel5)).hasValue("2 3 ... 5 6");
}

@Test
void thatRelationCrossSentenceBehaviorOnRenderGeneratesErrors() throws Exception
{
var builder = new TokenBuilder<>(Token.class, Sentence.class);
builder.buildTokens(jcas, "This is a test .\nThis is sentence two .");

for (Token t : select(jcas, Token.class)) {
POS pos = new POS(jcas, t.getBegin(), t.getEnd());
for (var t : select(jcas, Token.class)) {
var pos = new POS(jcas, t.getBegin(), t.getEnd());
t.setPos(pos);
pos.addToIndexes();
}

RelationAdapter adapter = new RelationAdapter(layerSupportRegistry, featureSupportRegistry,
null, depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE,
var adapter = new RelationAdapter(layerSupportRegistry, featureSupportRegistry, null,
depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE,
() -> asList(dependencyLayerGovernor, dependencyLayerDependent), behaviors);

List<POS> posAnnotations = new ArrayList<>(select(jcas, POS.class));
var posAnnotations = new ArrayList<>(select(jcas, POS.class));

POS source = posAnnotations.get(0);
POS target = posAnnotations.get(posAnnotations.size() - 1);
var source = posAnnotations.get(0);
var target = posAnnotations.get(posAnnotations.size() - 1);

depLayer.setCrossSentence(true);
AnnotationFS dep = adapter.add(document, username, source, target, jcas.getCas());

depLayer.setCrossSentence(false);
RelationRenderer sut = new RelationRenderer(adapter, layerSupportRegistry,
featureSupportRegistry, asList(new RelationCrossSentenceBehavior()));
var sut = new RelationRenderer(adapter, layerSupportRegistry, featureSupportRegistry,
asList(new RelationCrossSentenceBehavior()));

VDocument vdoc = new VDocument();
var vdoc = new VDocument();
sut.render(jcas.getCas(), asList(), vdoc, 0, jcas.getDocumentText().length());

assertThat(vdoc.comments()) //
Expand All @@ -167,28 +215,28 @@ public void thatRelationCrossSentenceBehaviorOnRenderGeneratesErrors() throws Ex
}

@Test
public void thatRelationOverlapBehaviorOnRenderGeneratesErrors() throws Exception
void thatRelationOverlapBehaviorOnRenderGeneratesErrors() throws Exception
{
TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class);
var builder = new TokenBuilder<>(Token.class, Sentence.class);
builder.buildTokens(jcas, "This is a test .\nThis is sentence two .");

for (Token t : select(jcas, Token.class)) {
POS pos = new POS(jcas, t.getBegin(), t.getEnd());
for (var t : select(jcas, Token.class)) {
var pos = new POS(jcas, t.getBegin(), t.getEnd());
t.setPos(pos);
pos.addToIndexes();
}

RelationAdapter adapter = new RelationAdapter(layerSupportRegistry, featureSupportRegistry,
null, depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE,
var adapter = new RelationAdapter(layerSupportRegistry, featureSupportRegistry, null,
depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE,
() -> asList(dependencyLayerGovernor, dependencyLayerDependent), behaviors);

List<POS> posAnnotations = new ArrayList<>(select(jcas, POS.class));
var posAnnotations = new ArrayList<>(select(jcas, POS.class));

POS source = posAnnotations.get(0);
POS target = posAnnotations.get(1);
var source = posAnnotations.get(0);
var target = posAnnotations.get(1);

RelationRenderer sut = new RelationRenderer(adapter, layerSupportRegistry,
featureSupportRegistry, asList(new RelationOverlapBehavior()));
var sut = new RelationRenderer(adapter, layerSupportRegistry, featureSupportRegistry,
asList(new RelationOverlapBehavior()));

// Create two annotations stacked annotations
depLayer.setOverlapMode(ANY_OVERLAP);
Expand Down