diff --git a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/FtsScoreFeatureGenerator.java b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/FtsScoreFeatureGenerator.java
index 85ce7d92825..a212acf8b78 100644
--- a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/FtsScoreFeatureGenerator.java
+++ b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/FtsScoreFeatureGenerator.java
@@ -32,6 +32,6 @@ public class FtsScoreFeatureGenerator
@Override
public void apply(CandidateEntity aCandidate)
{
- aCandidate.put(CandidateEntity.KEY_FTS_SCORE, aCandidate.getHandle().getScore());
+ aCandidate.put(CandidateEntity.SCORE_FTS, aCandidate.getHandle().getScore());
}
}
diff --git a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/LevenshteinFeatureGenerator.java b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/LevenshteinFeatureGenerator.java
index c541d4b0c99..fa2a46e4dd4 100644
--- a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/LevenshteinFeatureGenerator.java
+++ b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/LevenshteinFeatureGenerator.java
@@ -17,20 +17,21 @@
*/
package de.tudarmstadt.ukp.inception.conceptlinking.feature;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_MENTION;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_MENTION_CONTEXT;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_MENTION_NC;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_QUERY;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_QUERY_NC;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_MENTION;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_MENTION_CONTEXT;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_MENTION_NC;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_BEST_MATCH_TERM_NC;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_NC;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_MENTION;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_MENTION_CONTEXT;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_MENTION_NC;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_QUERY;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_QUERY_NC;
import static org.apache.commons.lang3.StringUtils.join;
import org.apache.commons.text.similarity.LevenshteinDistance;
+import org.springframework.core.annotation.Order;
import de.tudarmstadt.ukp.inception.conceptlinking.config.EntityLinkingServiceAutoConfiguration;
import de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity;
@@ -41,6 +42,7 @@
* {@link EntityLinkingServiceAutoConfiguration#levenshteinFeatureGenerator()}.
*
*/
+@Order(100)
public class LevenshteinFeatureGenerator
implements EntityRankingFeatureGenerator
{
@@ -60,26 +62,26 @@ private void update(CandidateEntity aCandidate, String aTerm)
aCandidate.get(KEY_MENTION_NC) //
.map(mention -> MEASURE.apply(termNC, mention)) //
- .ifPresent(score -> aCandidate.mergeMin(KEY_LEVENSHTEIN_MENTION_NC, score));
+ .ifPresent(score -> aCandidate.mergeMin(SCORE_LEVENSHTEIN_MENTION_NC, score));
aCandidate.get(KEY_QUERY_NC) //
.map(query -> MEASURE.apply(termNC, query)) //
.ifPresent(score -> {
- if (aCandidate.mergeMin(KEY_LEVENSHTEIN_QUERY_NC, score)) {
+ if (aCandidate.mergeMin(SCORE_LEVENSHTEIN_QUERY_NC, score)) {
aCandidate.put(KEY_QUERY_BEST_MATCH_TERM_NC, aTerm);
}
});
aCandidate.get(KEY_MENTION) //
.map(mention -> MEASURE.apply(aTerm, mention)) //
- .ifPresent(score -> aCandidate.mergeMin(KEY_LEVENSHTEIN_MENTION, score));
+ .ifPresent(score -> aCandidate.mergeMin(SCORE_LEVENSHTEIN_MENTION, score));
aCandidate.get(KEY_QUERY) //
.map(query -> MEASURE.apply(aTerm, query)) //
- .ifPresent(score -> aCandidate.mergeMin(KEY_LEVENSHTEIN_QUERY, score));
+ .ifPresent(score -> aCandidate.mergeMin(SCORE_LEVENSHTEIN_QUERY, score));
aCandidate.get(KEY_MENTION_CONTEXT) //
.map(context -> MEASURE.apply(aTerm, join(context, ' '))) //
- .ifPresent(score -> aCandidate.mergeMin(KEY_LEVENSHTEIN_MENTION_CONTEXT, score));
+ .ifPresent(score -> aCandidate.mergeMin(SCORE_LEVENSHTEIN_MENTION_CONTEXT, score));
}
}
diff --git a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGenerator.java b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGenerator.java
index c3884a6fa8c..18a15124fb8 100644
--- a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGenerator.java
+++ b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGenerator.java
@@ -23,14 +23,16 @@
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_BEST_MATCH_TERM_NC;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_BOW;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_BOW_NC;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_TOKEN_OVERLAP_MENTION;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_TOKEN_OVERLAP_MENTION_CONTEXT;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_TOKEN_OVERLAP_MENTION_NC;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_TOKEN_OVERLAP_QUERY;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_TOKEN_OVERLAP_QUERY_NC;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_TOKEN_OVERLAP_MENTION;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_TOKEN_OVERLAP_MENTION_CONTEXT;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_TOKEN_OVERLAP_MENTION_NC;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_TOKEN_OVERLAP_QUERY;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_TOKEN_OVERLAP_QUERY_NC;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.sortedBagOfWords;
import static java.util.Arrays.copyOf;
+import org.springframework.core.annotation.Order;
+
import de.tudarmstadt.ukp.inception.conceptlinking.config.EntityLinkingServiceAutoConfiguration;
import de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity;
@@ -40,6 +42,7 @@
* {@link EntityLinkingServiceAutoConfiguration#matchingTokenOverlapFeatureGenerator}.
*
*/
+@Order(200) // Make sure QUERY_BEST_MATCH_TERM from Levenshtein is overwritten
public class MatchingTokenOverlapFeatureGenerator
implements EntityRankingFeatureGenerator
{
@@ -47,8 +50,7 @@ public class MatchingTokenOverlapFeatureGenerator
@Override
public void apply(CandidateEntity aCandidate)
{
- var label = aCandidate.getLabel();
- update(aCandidate, label);
+ update(aCandidate, aCandidate.getLabel());
aCandidate.getHandle().getMatchTerms().forEach(p -> update(aCandidate, p.getKey()));
}
@@ -60,13 +62,13 @@ private void update(CandidateEntity aCandidate, String aTerm)
aCandidate.get(KEY_MENTION_BOW_NC) //
.map(mention -> distance(tokensNC, mention)) //
.filter(score -> score >= 0) //
- .ifPresent(score -> aCandidate.mergeMin(KEY_TOKEN_OVERLAP_MENTION_NC, score));
+ .ifPresent(score -> aCandidate.mergeMin(SCORE_TOKEN_OVERLAP_MENTION_NC, score));
aCandidate.get(KEY_QUERY_BOW_NC) //
.map(query -> distance(tokensNC, query)) //
.filter(score -> score >= 0) //
.ifPresent(score -> {
- if (aCandidate.mergeMin(KEY_TOKEN_OVERLAP_QUERY_NC, score)) {
+ if (aCandidate.mergeMin(SCORE_TOKEN_OVERLAP_QUERY_NC, score)) {
aCandidate.put(KEY_QUERY_BEST_MATCH_TERM_NC, aTerm);
}
});
@@ -74,17 +76,18 @@ private void update(CandidateEntity aCandidate, String aTerm)
aCandidate.get(KEY_MENTION_BOW) //
.map(mention -> distance(tokens, mention)) //
.filter(score -> score >= 0) //
- .ifPresent(score -> aCandidate.mergeMin(KEY_TOKEN_OVERLAP_MENTION, score));
+ .ifPresent(score -> aCandidate.mergeMin(SCORE_TOKEN_OVERLAP_MENTION, score));
aCandidate.get(KEY_QUERY_BOW) //
.map(query -> distance(tokens, query)) //
.filter(score -> score >= 0) //
- .ifPresent(score -> aCandidate.mergeMin(KEY_TOKEN_OVERLAP_QUERY, score));
+ .ifPresent(score -> aCandidate.mergeMin(SCORE_TOKEN_OVERLAP_QUERY, score));
aCandidate.get(KEY_MENTION_CONTEXT) //
.map(context -> distance(tokens, context.toArray(String[]::new))) //
.filter(score -> score >= 0) //
- .ifPresent(score -> aCandidate.mergeMin(KEY_TOKEN_OVERLAP_MENTION_CONTEXT, score));
+ .ifPresent(
+ score -> aCandidate.mergeMin(SCORE_TOKEN_OVERLAP_MENTION_CONTEXT, score));
}
private int distance(String[] aSortedBowCandidate, String[] aSortedBowUser)
diff --git a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/SemanticSignatureFeatureGenerator.java b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/SemanticSignatureFeatureGenerator.java
index b67fa7e5802..9649af1a825 100644
--- a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/SemanticSignatureFeatureGenerator.java
+++ b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/SemanticSignatureFeatureGenerator.java
@@ -20,7 +20,7 @@
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_MENTION_CONTEXT;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_NUM_RELATIONS;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_SIGNATURE_OVERLAP;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_SIGNATURE_OVERLAP_SCORE;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_SIGNATURE_OVERLAP;
import java.io.File;
import java.util.Arrays;
@@ -138,7 +138,7 @@ public void apply(CandidateEntity aCandidate)
}
aCandidate.put(KEY_SIGNATURE_OVERLAP, signatureOverlap);
- aCandidate.put(KEY_SIGNATURE_OVERLAP_SCORE, signatureOverlap.size());
+ aCandidate.put(SCORE_SIGNATURE_OVERLAP, signatureOverlap.size());
aCandidate.put(KEY_NUM_RELATIONS,
(sig.getRelatedRelations() != null) ? sig.getRelatedRelations().size() : 0);
}
diff --git a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/model/CandidateEntity.java b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/model/CandidateEntity.java
index 4d7b493a497..f3511d21929 100644
--- a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/model/CandidateEntity.java
+++ b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/model/CandidateEntity.java
@@ -42,7 +42,7 @@
*/
public class CandidateEntity
{
- public static final Pattern TOKENKIZER_PATTERN = Pattern.compile("\\s+");
+ public static final Pattern TOKENKIZER_PATTERN = Pattern.compile("[\\s()\\-]+");
public static String[] sortedBagOfWords(String aString)
{
@@ -94,15 +94,15 @@ public static String[] sortedBagOfWords(String aString)
* the default value to ensure that candidates are ranked last on this feature if it could not
* be calculated.
*/
- public static final Key KEY_LEVENSHTEIN_MENTION = new Key<>("levMention", MAX_VALUE);
+ public static final Key SCORE_LEVENSHTEIN_MENTION = new Key<>("levMention", MAX_VALUE);
- public static final Key KEY_LEVENSHTEIN_MENTION_NC = new Key<>("levMentionNc",
+ public static final Key SCORE_LEVENSHTEIN_MENTION_NC = new Key<>("levMentionNc",
MAX_VALUE);
- public static final Key KEY_TOKEN_OVERLAP_MENTION = new Key<>("tokenOverlapMention",
+ public static final Key SCORE_TOKEN_OVERLAP_MENTION = new Key<>("tokenOverlapMention",
MAX_VALUE);
- public static final Key KEY_TOKEN_OVERLAP_MENTION_NC = new Key<>(
+ public static final Key SCORE_TOKEN_OVERLAP_MENTION_NC = new Key<>(
"tokenOverlapMentionNc", MAX_VALUE);
/**
@@ -112,10 +112,10 @@ public static String[] sortedBagOfWords(String aString)
* the default value to ensure that candidates are ranked last on this feature if it could not
* be calculated.
*/
- public static final Key KEY_LEVENSHTEIN_MENTION_CONTEXT = new Key<>("levContext",
+ public static final Key SCORE_LEVENSHTEIN_MENTION_CONTEXT = new Key<>("levContext",
MAX_VALUE);
- public static final Key KEY_TOKEN_OVERLAP_MENTION_CONTEXT = new Key<>(
+ public static final Key SCORE_TOKEN_OVERLAP_MENTION_CONTEXT = new Key<>(
"tokenOverlapContext", MAX_VALUE);
/**
@@ -125,14 +125,15 @@ public static String[] sortedBagOfWords(String aString)
* the default value to ensure that candidates are ranked last on this feature if it could not
* be calculated.
*/
- public static final Key KEY_LEVENSHTEIN_QUERY = new Key<>("levQuery", MAX_VALUE);
+ public static final Key SCORE_LEVENSHTEIN_QUERY = new Key<>("levQuery", MAX_VALUE);
- public static final Key KEY_LEVENSHTEIN_QUERY_NC = new Key<>("levQueryNc", MAX_VALUE);
+ public static final Key SCORE_LEVENSHTEIN_QUERY_NC = new Key<>("levQueryNc",
+ MAX_VALUE);
- public static final Key KEY_TOKEN_OVERLAP_QUERY = new Key<>("tokenOverlapQuery",
+ public static final Key SCORE_TOKEN_OVERLAP_QUERY = new Key<>("tokenOverlapQuery",
MAX_VALUE);
- public static final Key KEY_TOKEN_OVERLAP_QUERY_NC = new Key<>("tokenOverlapQueryNc",
+ public static final Key SCORE_TOKEN_OVERLAP_QUERY_NC = new Key<>("tokenOverlapQueryNc",
MAX_VALUE);
/**
@@ -150,7 +151,7 @@ public static String[] sortedBagOfWords(String aString)
* number of related entities whose entity label occurs in content tokens Content
* tokens consist of tokens in mention sentence annotated as nouns, verbs or adjectives
*/
- public static final Key KEY_SIGNATURE_OVERLAP_SCORE = new Key<>(
+ public static final Key SCORE_SIGNATURE_OVERLAP = new Key<>(
"signatureOverlapScore", 0);
/**
@@ -161,7 +162,7 @@ public static String[] sortedBagOfWords(String aString)
/**
* FTS score - score assigned by the KB FTS (if any)
*/
- public static final Key KEY_FTS_SCORE = new Key<>("ftsScore", 0.0d);
+ public static final Key SCORE_FTS = new Key<>("ftsScore", 0.0d);
/**
* in-link count of wikipedia article of IRI
diff --git a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/ranking/BaselineRankingStrategy.java b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/ranking/BaselineRankingStrategy.java
index 63ccbc4fa16..27e6f1c0b72 100644
--- a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/ranking/BaselineRankingStrategy.java
+++ b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/ranking/BaselineRankingStrategy.java
@@ -19,17 +19,15 @@
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_FREQUENCY;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_ID_RANK;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LABEL_NC;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_MENTION;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_MENTION_CONTEXT;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_QUERY;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_QUERY_NC;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_NUM_RELATIONS;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_IS_LOWER_CASE;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_NC;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_SIGNATURE_OVERLAP_SCORE;
-import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_TOKEN_OVERLAP_QUERY_NC;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_MENTION;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_MENTION_CONTEXT;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_QUERY;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_QUERY_NC;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_SIGNATURE_OVERLAP;
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_TOKEN_OVERLAP_QUERY_NC;
import java.util.Comparator;
@@ -45,13 +43,9 @@ public class BaselineRankingStrategy
// a 0 while a mismatch is represented using 1. The typical case is that neither
// candidate matches the query which causes the next ranking criteria to be evaluated
.append(queryMatchesIri(e1), queryMatchesIri(e2))
- // Use FTS score if available
- // .append(e2.get(KEY_FTS_SCORE).get(), e1.get(KEY_FTS_SCORE).get())
// Require token overlap
- .append(e1.get(KEY_TOKEN_OVERLAP_QUERY_NC).get(),
- e2.get(KEY_TOKEN_OVERLAP_QUERY_NC).get())
- // Prefer matches where the query appears in the label
- .append(labelMatchesQueryNC(e1), labelMatchesQueryNC(e2))
+ .append(e1.get(SCORE_TOKEN_OVERLAP_QUERY_NC).get(),
+ e2.get(SCORE_TOKEN_OVERLAP_QUERY_NC).get())
// Compare geometric mean of the Levenshtein distance to query and mention
// since both are important and a very close similarity in say the mention outweighs
// a not so close similarity in the query
@@ -61,11 +55,10 @@ public class BaselineRankingStrategy
// Cased over caseless
.append(casedOverCaseless(e1), casedOverCaseless(e2))
// A high signature overlap score is preferred.
- .append(e2.get(KEY_SIGNATURE_OVERLAP_SCORE).get(),
- e1.get(KEY_SIGNATURE_OVERLAP_SCORE).get())
+ .append(e2.get(SCORE_SIGNATURE_OVERLAP).get(), e1.get(SCORE_SIGNATURE_OVERLAP).get())
// A low edit distance is preferred.
- .append(e1.get(KEY_LEVENSHTEIN_MENTION_CONTEXT).get(),
- e2.get(KEY_LEVENSHTEIN_MENTION_CONTEXT).get())
+ .append(e1.get(SCORE_LEVENSHTEIN_MENTION_CONTEXT).get(),
+ e2.get(SCORE_LEVENSHTEIN_MENTION_CONTEXT).get())
// A high entity frequency is preferred.
.append(e2.get(KEY_FREQUENCY).get(), e1.get(KEY_FREQUENCY).get())
// A high number of related relations is preferred.
@@ -82,17 +75,10 @@ private static double queryMatchesIri(CandidateEntity aCandidate)
return aCandidate.get(KEY_QUERY).map(q -> q.equals(aCandidate.getIRI()) ? 0 : 1).orElse(1);
}
- private static double labelMatchesQueryNC(CandidateEntity aCandidate)
- {
- return aCandidate.get(KEY_QUERY_NC)
- .map(q -> aCandidate.get(KEY_LABEL_NC).map(l -> l.contains(q) ? 0 : 1).orElse(1))
- .orElse(1);
- }
-
private static double casedOverCaseless(CandidateEntity aCandidate)
{
- int queryNC = aCandidate.get(KEY_LEVENSHTEIN_QUERY_NC).get();
- int query = aCandidate.get(KEY_LEVENSHTEIN_QUERY).get();
+ int queryNC = aCandidate.get(SCORE_LEVENSHTEIN_QUERY_NC).get();
+ int query = aCandidate.get(SCORE_LEVENSHTEIN_QUERY).get();
return queryNC <= query ? 0 : 1;
}
@@ -101,8 +87,8 @@ private static double queryOverMention(CandidateEntity aCandidate)
{
boolean caseInsensitive = aCandidate.get(KEY_QUERY_IS_LOWER_CASE).orElse(true);
int query = aCandidate
- .get(caseInsensitive ? KEY_LEVENSHTEIN_QUERY_NC : KEY_LEVENSHTEIN_QUERY).get();
- int mention = aCandidate.get(KEY_LEVENSHTEIN_MENTION).get();
+ .get(caseInsensitive ? SCORE_LEVENSHTEIN_QUERY_NC : SCORE_LEVENSHTEIN_QUERY).get();
+ int mention = aCandidate.get(SCORE_LEVENSHTEIN_MENTION).get();
return query <= mention ? 0 : 1;
}
@@ -112,8 +98,8 @@ private static double weightedLevenshteinDistance(CandidateEntity aCandidate)
boolean caseInsensitive = aCandidate.get(KEY_QUERY_IS_LOWER_CASE).orElse(true);
int query = aCandidate
- .get(caseInsensitive ? KEY_LEVENSHTEIN_QUERY_NC : KEY_LEVENSHTEIN_QUERY).get();
- int mention = aCandidate.get(KEY_LEVENSHTEIN_MENTION).get();
+ .get(caseInsensitive ? SCORE_LEVENSHTEIN_QUERY_NC : SCORE_LEVENSHTEIN_QUERY).get();
+ int mention = aCandidate.get(SCORE_LEVENSHTEIN_MENTION).get();
if (query == Integer.MAX_VALUE && mention == Integer.MAX_VALUE) {
return Double.MAX_VALUE;
diff --git a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/service/ConceptLinkingServiceImpl.java b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/service/ConceptLinkingServiceImpl.java
index d89192b61e4..c95b34c0478 100644
--- a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/service/ConceptLinkingServiceImpl.java
+++ b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/service/ConceptLinkingServiceImpl.java
@@ -304,8 +304,8 @@ private List findContainingMatches(KnowledgeBase aKB, String aConceptS
}
var duration = currentTimeMillis() - startTime;
- LOG.debug("Found [{}] candidates using matching {} in {}ms", result.size(),
- asList(longLabels), duration);
+ LOG.debug("Found [{}] candidates containing {} in {}ms", result.size(), asList(longLabels),
+ duration);
WicketUtil.serverTiming("findContainingMatches", duration);
return result;
diff --git a/inception/inception-concept-linking/src/test/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGeneratorTest.java b/inception/inception-concept-linking/src/test/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGeneratorTest.java
index f0313f42e95..16dffe6a946 100644
--- a/inception/inception-concept-linking/src/test/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGeneratorTest.java
+++ b/inception/inception-concept-linking/src/test/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGeneratorTest.java
@@ -75,7 +75,7 @@ private void assertDistance(String query, String label, int distance)
sut.apply(candidateEntity);
- assertThat(candidateEntity.get(CandidateEntity.KEY_TOKEN_OVERLAP_QUERY)) //
+ assertThat(candidateEntity.get(CandidateEntity.SCORE_TOKEN_OVERLAP_QUERY)) //
.get().isEqualTo(distance);
}
}
diff --git a/inception/inception-concept-linking/src/test/java/de/tudarmstadt/ukp/inception/conceptlinking/model/CandidateEntityTest.java b/inception/inception-concept-linking/src/test/java/de/tudarmstadt/ukp/inception/conceptlinking/model/CandidateEntityTest.java
new file mode 100644
index 00000000000..37416203df0
--- /dev/null
+++ b/inception/inception-concept-linking/src/test/java/de/tudarmstadt/ukp/inception/conceptlinking/model/CandidateEntityTest.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Technische Universität Darmstadt under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The Technische Universität Darmstadt
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package de.tudarmstadt.ukp.inception.conceptlinking.model;
+
+import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.sortedBagOfWords;
+import static org.assertj.core.api.Assertions.assertThat;
+
+import org.junit.jupiter.api.Test;
+
+class CandidateEntityTest
+{
+
+ @Test
+ void testSortedBagOfWords()
+ {
+ assertThat(sortedBagOfWords("this is (a test)")) //
+ .containsExactly("test", "this", "is", "a");
+ }
+
+}
diff --git a/inception/inception-ui-curation/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/curation/actionbar/CuratorWorkflowActionBarItemGroup.java b/inception/inception-ui-curation/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/curation/actionbar/CuratorWorkflowActionBarItemGroup.java
index d72ef3a367b..8c75b23cc0f 100644
--- a/inception/inception-ui-curation/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/curation/actionbar/CuratorWorkflowActionBarItemGroup.java
+++ b/inception/inception-ui-curation/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/curation/actionbar/CuratorWorkflowActionBarItemGroup.java
@@ -133,21 +133,21 @@ protected boolean isEditable()
protected void actionToggleCurationState(AjaxRequestTarget aTarget)
throws IOException, AnnotationException
{
- try {
- page.actionValidateDocument(aTarget, page.getEditorCas());
- }
- catch (ValidationException e) {
- page.error("Document cannot be marked as finished: " + e.getMessage());
- aTarget.addChildren(page, IFeedback.class);
- return;
- }
-
- AnnotatorState state = page.getModelObject();
- SourceDocument sourceDocument = state.getDocument();
+ var state = page.getModelObject();
+ var sourceDocument = state.getDocument();
var docState = sourceDocument.getState();
switch (docState) {
case CURATION_IN_PROGRESS:
+ try {
+ page.actionValidateDocument(aTarget, page.getEditorCas());
+ }
+ catch (ValidationException e) {
+ page.error("Document cannot be marked as finished: " + e.getMessage());
+ aTarget.addChildren(page, IFeedback.class);
+ return;
+ }
+
documentService.setSourceDocumentState(sourceDocument, CURATION_FINISHED);
aTarget.add(page);
break;
diff --git a/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureTraitsEditor.html b/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureTraitsEditor.html
index cbbdaaca471..7e446ba1501 100644
--- a/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureTraitsEditor.html
+++ b/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureTraitsEditor.html
@@ -42,6 +42,9 @@