diff --git a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/FtsScoreFeatureGenerator.java b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/FtsScoreFeatureGenerator.java index 85ce7d92825..a212acf8b78 100644 --- a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/FtsScoreFeatureGenerator.java +++ b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/FtsScoreFeatureGenerator.java @@ -32,6 +32,6 @@ public class FtsScoreFeatureGenerator @Override public void apply(CandidateEntity aCandidate) { - aCandidate.put(CandidateEntity.KEY_FTS_SCORE, aCandidate.getHandle().getScore()); + aCandidate.put(CandidateEntity.SCORE_FTS, aCandidate.getHandle().getScore()); } } diff --git a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/LevenshteinFeatureGenerator.java b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/LevenshteinFeatureGenerator.java index c541d4b0c99..fa2a46e4dd4 100644 --- a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/LevenshteinFeatureGenerator.java +++ b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/LevenshteinFeatureGenerator.java @@ -17,20 +17,21 @@ */ package de.tudarmstadt.ukp.inception.conceptlinking.feature; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_MENTION; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_MENTION_CONTEXT; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_MENTION_NC; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_QUERY; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_QUERY_NC; import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_MENTION; import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_MENTION_CONTEXT; import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_MENTION_NC; import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY; import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_BEST_MATCH_TERM_NC; import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_NC; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_MENTION; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_MENTION_CONTEXT; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_MENTION_NC; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_QUERY; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_QUERY_NC; import static org.apache.commons.lang3.StringUtils.join; import org.apache.commons.text.similarity.LevenshteinDistance; +import org.springframework.core.annotation.Order; import de.tudarmstadt.ukp.inception.conceptlinking.config.EntityLinkingServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity; @@ -41,6 +42,7 @@ * {@link EntityLinkingServiceAutoConfiguration#levenshteinFeatureGenerator()}. *

*/ +@Order(100) public class LevenshteinFeatureGenerator implements EntityRankingFeatureGenerator { @@ -60,26 +62,26 @@ private void update(CandidateEntity aCandidate, String aTerm) aCandidate.get(KEY_MENTION_NC) // .map(mention -> MEASURE.apply(termNC, mention)) // - .ifPresent(score -> aCandidate.mergeMin(KEY_LEVENSHTEIN_MENTION_NC, score)); + .ifPresent(score -> aCandidate.mergeMin(SCORE_LEVENSHTEIN_MENTION_NC, score)); aCandidate.get(KEY_QUERY_NC) // .map(query -> MEASURE.apply(termNC, query)) // .ifPresent(score -> { - if (aCandidate.mergeMin(KEY_LEVENSHTEIN_QUERY_NC, score)) { + if (aCandidate.mergeMin(SCORE_LEVENSHTEIN_QUERY_NC, score)) { aCandidate.put(KEY_QUERY_BEST_MATCH_TERM_NC, aTerm); } }); aCandidate.get(KEY_MENTION) // .map(mention -> MEASURE.apply(aTerm, mention)) // - .ifPresent(score -> aCandidate.mergeMin(KEY_LEVENSHTEIN_MENTION, score)); + .ifPresent(score -> aCandidate.mergeMin(SCORE_LEVENSHTEIN_MENTION, score)); aCandidate.get(KEY_QUERY) // .map(query -> MEASURE.apply(aTerm, query)) // - .ifPresent(score -> aCandidate.mergeMin(KEY_LEVENSHTEIN_QUERY, score)); + .ifPresent(score -> aCandidate.mergeMin(SCORE_LEVENSHTEIN_QUERY, score)); aCandidate.get(KEY_MENTION_CONTEXT) // .map(context -> MEASURE.apply(aTerm, join(context, ' '))) // - .ifPresent(score -> aCandidate.mergeMin(KEY_LEVENSHTEIN_MENTION_CONTEXT, score)); + .ifPresent(score -> aCandidate.mergeMin(SCORE_LEVENSHTEIN_MENTION_CONTEXT, score)); } } diff --git a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGenerator.java b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGenerator.java index c3884a6fa8c..18a15124fb8 100644 --- a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGenerator.java +++ b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGenerator.java @@ -23,14 +23,16 @@ import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_BEST_MATCH_TERM_NC; import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_BOW; import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_BOW_NC; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_TOKEN_OVERLAP_MENTION; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_TOKEN_OVERLAP_MENTION_CONTEXT; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_TOKEN_OVERLAP_MENTION_NC; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_TOKEN_OVERLAP_QUERY; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_TOKEN_OVERLAP_QUERY_NC; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_TOKEN_OVERLAP_MENTION; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_TOKEN_OVERLAP_MENTION_CONTEXT; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_TOKEN_OVERLAP_MENTION_NC; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_TOKEN_OVERLAP_QUERY; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_TOKEN_OVERLAP_QUERY_NC; import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.sortedBagOfWords; import static java.util.Arrays.copyOf; +import org.springframework.core.annotation.Order; + import de.tudarmstadt.ukp.inception.conceptlinking.config.EntityLinkingServiceAutoConfiguration; import de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity; @@ -40,6 +42,7 @@ * {@link EntityLinkingServiceAutoConfiguration#matchingTokenOverlapFeatureGenerator}. *

*/ +@Order(200) // Make sure QUERY_BEST_MATCH_TERM from Levenshtein is overwritten public class MatchingTokenOverlapFeatureGenerator implements EntityRankingFeatureGenerator { @@ -47,8 +50,7 @@ public class MatchingTokenOverlapFeatureGenerator @Override public void apply(CandidateEntity aCandidate) { - var label = aCandidate.getLabel(); - update(aCandidate, label); + update(aCandidate, aCandidate.getLabel()); aCandidate.getHandle().getMatchTerms().forEach(p -> update(aCandidate, p.getKey())); } @@ -60,13 +62,13 @@ private void update(CandidateEntity aCandidate, String aTerm) aCandidate.get(KEY_MENTION_BOW_NC) // .map(mention -> distance(tokensNC, mention)) // .filter(score -> score >= 0) // - .ifPresent(score -> aCandidate.mergeMin(KEY_TOKEN_OVERLAP_MENTION_NC, score)); + .ifPresent(score -> aCandidate.mergeMin(SCORE_TOKEN_OVERLAP_MENTION_NC, score)); aCandidate.get(KEY_QUERY_BOW_NC) // .map(query -> distance(tokensNC, query)) // .filter(score -> score >= 0) // .ifPresent(score -> { - if (aCandidate.mergeMin(KEY_TOKEN_OVERLAP_QUERY_NC, score)) { + if (aCandidate.mergeMin(SCORE_TOKEN_OVERLAP_QUERY_NC, score)) { aCandidate.put(KEY_QUERY_BEST_MATCH_TERM_NC, aTerm); } }); @@ -74,17 +76,18 @@ private void update(CandidateEntity aCandidate, String aTerm) aCandidate.get(KEY_MENTION_BOW) // .map(mention -> distance(tokens, mention)) // .filter(score -> score >= 0) // - .ifPresent(score -> aCandidate.mergeMin(KEY_TOKEN_OVERLAP_MENTION, score)); + .ifPresent(score -> aCandidate.mergeMin(SCORE_TOKEN_OVERLAP_MENTION, score)); aCandidate.get(KEY_QUERY_BOW) // .map(query -> distance(tokens, query)) // .filter(score -> score >= 0) // - .ifPresent(score -> aCandidate.mergeMin(KEY_TOKEN_OVERLAP_QUERY, score)); + .ifPresent(score -> aCandidate.mergeMin(SCORE_TOKEN_OVERLAP_QUERY, score)); aCandidate.get(KEY_MENTION_CONTEXT) // .map(context -> distance(tokens, context.toArray(String[]::new))) // .filter(score -> score >= 0) // - .ifPresent(score -> aCandidate.mergeMin(KEY_TOKEN_OVERLAP_MENTION_CONTEXT, score)); + .ifPresent( + score -> aCandidate.mergeMin(SCORE_TOKEN_OVERLAP_MENTION_CONTEXT, score)); } private int distance(String[] aSortedBowCandidate, String[] aSortedBowUser) diff --git a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/SemanticSignatureFeatureGenerator.java b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/SemanticSignatureFeatureGenerator.java index b67fa7e5802..9649af1a825 100644 --- a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/SemanticSignatureFeatureGenerator.java +++ b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/SemanticSignatureFeatureGenerator.java @@ -20,7 +20,7 @@ import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_MENTION_CONTEXT; import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_NUM_RELATIONS; import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_SIGNATURE_OVERLAP; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_SIGNATURE_OVERLAP_SCORE; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_SIGNATURE_OVERLAP; import java.io.File; import java.util.Arrays; @@ -138,7 +138,7 @@ public void apply(CandidateEntity aCandidate) } aCandidate.put(KEY_SIGNATURE_OVERLAP, signatureOverlap); - aCandidate.put(KEY_SIGNATURE_OVERLAP_SCORE, signatureOverlap.size()); + aCandidate.put(SCORE_SIGNATURE_OVERLAP, signatureOverlap.size()); aCandidate.put(KEY_NUM_RELATIONS, (sig.getRelatedRelations() != null) ? sig.getRelatedRelations().size() : 0); } diff --git a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/model/CandidateEntity.java b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/model/CandidateEntity.java index 4d7b493a497..f3511d21929 100644 --- a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/model/CandidateEntity.java +++ b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/model/CandidateEntity.java @@ -42,7 +42,7 @@ */ public class CandidateEntity { - public static final Pattern TOKENKIZER_PATTERN = Pattern.compile("\\s+"); + public static final Pattern TOKENKIZER_PATTERN = Pattern.compile("[\\s()\\-]+"); public static String[] sortedBagOfWords(String aString) { @@ -94,15 +94,15 @@ public static String[] sortedBagOfWords(String aString) * the default value to ensure that candidates are ranked last on this feature if it could not * be calculated. */ - public static final Key KEY_LEVENSHTEIN_MENTION = new Key<>("levMention", MAX_VALUE); + public static final Key SCORE_LEVENSHTEIN_MENTION = new Key<>("levMention", MAX_VALUE); - public static final Key KEY_LEVENSHTEIN_MENTION_NC = new Key<>("levMentionNc", + public static final Key SCORE_LEVENSHTEIN_MENTION_NC = new Key<>("levMentionNc", MAX_VALUE); - public static final Key KEY_TOKEN_OVERLAP_MENTION = new Key<>("tokenOverlapMention", + public static final Key SCORE_TOKEN_OVERLAP_MENTION = new Key<>("tokenOverlapMention", MAX_VALUE); - public static final Key KEY_TOKEN_OVERLAP_MENTION_NC = new Key<>( + public static final Key SCORE_TOKEN_OVERLAP_MENTION_NC = new Key<>( "tokenOverlapMentionNc", MAX_VALUE); /** @@ -112,10 +112,10 @@ public static String[] sortedBagOfWords(String aString) * the default value to ensure that candidates are ranked last on this feature if it could not * be calculated. */ - public static final Key KEY_LEVENSHTEIN_MENTION_CONTEXT = new Key<>("levContext", + public static final Key SCORE_LEVENSHTEIN_MENTION_CONTEXT = new Key<>("levContext", MAX_VALUE); - public static final Key KEY_TOKEN_OVERLAP_MENTION_CONTEXT = new Key<>( + public static final Key SCORE_TOKEN_OVERLAP_MENTION_CONTEXT = new Key<>( "tokenOverlapContext", MAX_VALUE); /** @@ -125,14 +125,15 @@ public static String[] sortedBagOfWords(String aString) * the default value to ensure that candidates are ranked last on this feature if it could not * be calculated. */ - public static final Key KEY_LEVENSHTEIN_QUERY = new Key<>("levQuery", MAX_VALUE); + public static final Key SCORE_LEVENSHTEIN_QUERY = new Key<>("levQuery", MAX_VALUE); - public static final Key KEY_LEVENSHTEIN_QUERY_NC = new Key<>("levQueryNc", MAX_VALUE); + public static final Key SCORE_LEVENSHTEIN_QUERY_NC = new Key<>("levQueryNc", + MAX_VALUE); - public static final Key KEY_TOKEN_OVERLAP_QUERY = new Key<>("tokenOverlapQuery", + public static final Key SCORE_TOKEN_OVERLAP_QUERY = new Key<>("tokenOverlapQuery", MAX_VALUE); - public static final Key KEY_TOKEN_OVERLAP_QUERY_NC = new Key<>("tokenOverlapQueryNc", + public static final Key SCORE_TOKEN_OVERLAP_QUERY_NC = new Key<>("tokenOverlapQueryNc", MAX_VALUE); /** @@ -150,7 +151,7 @@ public static String[] sortedBagOfWords(String aString) * number of related entities whose entity label occurs in content tokens Content * tokens consist of tokens in mention sentence annotated as nouns, verbs or adjectives */ - public static final Key KEY_SIGNATURE_OVERLAP_SCORE = new Key<>( + public static final Key SCORE_SIGNATURE_OVERLAP = new Key<>( "signatureOverlapScore", 0); /** @@ -161,7 +162,7 @@ public static String[] sortedBagOfWords(String aString) /** * FTS score - score assigned by the KB FTS (if any) */ - public static final Key KEY_FTS_SCORE = new Key<>("ftsScore", 0.0d); + public static final Key SCORE_FTS = new Key<>("ftsScore", 0.0d); /** * in-link count of wikipedia article of IRI diff --git a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/ranking/BaselineRankingStrategy.java b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/ranking/BaselineRankingStrategy.java index 63ccbc4fa16..27e6f1c0b72 100644 --- a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/ranking/BaselineRankingStrategy.java +++ b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/ranking/BaselineRankingStrategy.java @@ -19,17 +19,15 @@ import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_FREQUENCY; import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_ID_RANK; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LABEL_NC; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_MENTION; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_MENTION_CONTEXT; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_QUERY; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_LEVENSHTEIN_QUERY_NC; import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_NUM_RELATIONS; import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY; import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_IS_LOWER_CASE; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_NC; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_SIGNATURE_OVERLAP_SCORE; -import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_TOKEN_OVERLAP_QUERY_NC; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_MENTION; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_MENTION_CONTEXT; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_QUERY; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_LEVENSHTEIN_QUERY_NC; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_SIGNATURE_OVERLAP; +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.SCORE_TOKEN_OVERLAP_QUERY_NC; import java.util.Comparator; @@ -45,13 +43,9 @@ public class BaselineRankingStrategy // a 0 while a mismatch is represented using 1. The typical case is that neither // candidate matches the query which causes the next ranking criteria to be evaluated .append(queryMatchesIri(e1), queryMatchesIri(e2)) - // Use FTS score if available - // .append(e2.get(KEY_FTS_SCORE).get(), e1.get(KEY_FTS_SCORE).get()) // Require token overlap - .append(e1.get(KEY_TOKEN_OVERLAP_QUERY_NC).get(), - e2.get(KEY_TOKEN_OVERLAP_QUERY_NC).get()) - // Prefer matches where the query appears in the label - .append(labelMatchesQueryNC(e1), labelMatchesQueryNC(e2)) + .append(e1.get(SCORE_TOKEN_OVERLAP_QUERY_NC).get(), + e2.get(SCORE_TOKEN_OVERLAP_QUERY_NC).get()) // Compare geometric mean of the Levenshtein distance to query and mention // since both are important and a very close similarity in say the mention outweighs // a not so close similarity in the query @@ -61,11 +55,10 @@ public class BaselineRankingStrategy // Cased over caseless .append(casedOverCaseless(e1), casedOverCaseless(e2)) // A high signature overlap score is preferred. - .append(e2.get(KEY_SIGNATURE_OVERLAP_SCORE).get(), - e1.get(KEY_SIGNATURE_OVERLAP_SCORE).get()) + .append(e2.get(SCORE_SIGNATURE_OVERLAP).get(), e1.get(SCORE_SIGNATURE_OVERLAP).get()) // A low edit distance is preferred. - .append(e1.get(KEY_LEVENSHTEIN_MENTION_CONTEXT).get(), - e2.get(KEY_LEVENSHTEIN_MENTION_CONTEXT).get()) + .append(e1.get(SCORE_LEVENSHTEIN_MENTION_CONTEXT).get(), + e2.get(SCORE_LEVENSHTEIN_MENTION_CONTEXT).get()) // A high entity frequency is preferred. .append(e2.get(KEY_FREQUENCY).get(), e1.get(KEY_FREQUENCY).get()) // A high number of related relations is preferred. @@ -82,17 +75,10 @@ private static double queryMatchesIri(CandidateEntity aCandidate) return aCandidate.get(KEY_QUERY).map(q -> q.equals(aCandidate.getIRI()) ? 0 : 1).orElse(1); } - private static double labelMatchesQueryNC(CandidateEntity aCandidate) - { - return aCandidate.get(KEY_QUERY_NC) - .map(q -> aCandidate.get(KEY_LABEL_NC).map(l -> l.contains(q) ? 0 : 1).orElse(1)) - .orElse(1); - } - private static double casedOverCaseless(CandidateEntity aCandidate) { - int queryNC = aCandidate.get(KEY_LEVENSHTEIN_QUERY_NC).get(); - int query = aCandidate.get(KEY_LEVENSHTEIN_QUERY).get(); + int queryNC = aCandidate.get(SCORE_LEVENSHTEIN_QUERY_NC).get(); + int query = aCandidate.get(SCORE_LEVENSHTEIN_QUERY).get(); return queryNC <= query ? 0 : 1; } @@ -101,8 +87,8 @@ private static double queryOverMention(CandidateEntity aCandidate) { boolean caseInsensitive = aCandidate.get(KEY_QUERY_IS_LOWER_CASE).orElse(true); int query = aCandidate - .get(caseInsensitive ? KEY_LEVENSHTEIN_QUERY_NC : KEY_LEVENSHTEIN_QUERY).get(); - int mention = aCandidate.get(KEY_LEVENSHTEIN_MENTION).get(); + .get(caseInsensitive ? SCORE_LEVENSHTEIN_QUERY_NC : SCORE_LEVENSHTEIN_QUERY).get(); + int mention = aCandidate.get(SCORE_LEVENSHTEIN_MENTION).get(); return query <= mention ? 0 : 1; } @@ -112,8 +98,8 @@ private static double weightedLevenshteinDistance(CandidateEntity aCandidate) boolean caseInsensitive = aCandidate.get(KEY_QUERY_IS_LOWER_CASE).orElse(true); int query = aCandidate - .get(caseInsensitive ? KEY_LEVENSHTEIN_QUERY_NC : KEY_LEVENSHTEIN_QUERY).get(); - int mention = aCandidate.get(KEY_LEVENSHTEIN_MENTION).get(); + .get(caseInsensitive ? SCORE_LEVENSHTEIN_QUERY_NC : SCORE_LEVENSHTEIN_QUERY).get(); + int mention = aCandidate.get(SCORE_LEVENSHTEIN_MENTION).get(); if (query == Integer.MAX_VALUE && mention == Integer.MAX_VALUE) { return Double.MAX_VALUE; diff --git a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/service/ConceptLinkingServiceImpl.java b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/service/ConceptLinkingServiceImpl.java index d89192b61e4..c95b34c0478 100644 --- a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/service/ConceptLinkingServiceImpl.java +++ b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/service/ConceptLinkingServiceImpl.java @@ -304,8 +304,8 @@ private List findContainingMatches(KnowledgeBase aKB, String aConceptS } var duration = currentTimeMillis() - startTime; - LOG.debug("Found [{}] candidates using matching {} in {}ms", result.size(), - asList(longLabels), duration); + LOG.debug("Found [{}] candidates containing {} in {}ms", result.size(), asList(longLabels), + duration); WicketUtil.serverTiming("findContainingMatches", duration); return result; diff --git a/inception/inception-concept-linking/src/test/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGeneratorTest.java b/inception/inception-concept-linking/src/test/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGeneratorTest.java index f0313f42e95..16dffe6a946 100644 --- a/inception/inception-concept-linking/src/test/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGeneratorTest.java +++ b/inception/inception-concept-linking/src/test/java/de/tudarmstadt/ukp/inception/conceptlinking/feature/MatchingTokenOverlapFeatureGeneratorTest.java @@ -75,7 +75,7 @@ private void assertDistance(String query, String label, int distance) sut.apply(candidateEntity); - assertThat(candidateEntity.get(CandidateEntity.KEY_TOKEN_OVERLAP_QUERY)) // + assertThat(candidateEntity.get(CandidateEntity.SCORE_TOKEN_OVERLAP_QUERY)) // .get().isEqualTo(distance); } } diff --git a/inception/inception-concept-linking/src/test/java/de/tudarmstadt/ukp/inception/conceptlinking/model/CandidateEntityTest.java b/inception/inception-concept-linking/src/test/java/de/tudarmstadt/ukp/inception/conceptlinking/model/CandidateEntityTest.java new file mode 100644 index 00000000000..37416203df0 --- /dev/null +++ b/inception/inception-concept-linking/src/test/java/de/tudarmstadt/ukp/inception/conceptlinking/model/CandidateEntityTest.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.conceptlinking.model; + +import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.sortedBagOfWords; +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.Test; + +class CandidateEntityTest +{ + + @Test + void testSortedBagOfWords() + { + assertThat(sortedBagOfWords("this is (a test)")) // + .containsExactly("test", "this", "is", "a"); + } + +} diff --git a/inception/inception-ui-curation/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/curation/actionbar/CuratorWorkflowActionBarItemGroup.java b/inception/inception-ui-curation/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/curation/actionbar/CuratorWorkflowActionBarItemGroup.java index d72ef3a367b..8c75b23cc0f 100644 --- a/inception/inception-ui-curation/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/curation/actionbar/CuratorWorkflowActionBarItemGroup.java +++ b/inception/inception-ui-curation/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/curation/actionbar/CuratorWorkflowActionBarItemGroup.java @@ -133,21 +133,21 @@ protected boolean isEditable() protected void actionToggleCurationState(AjaxRequestTarget aTarget) throws IOException, AnnotationException { - try { - page.actionValidateDocument(aTarget, page.getEditorCas()); - } - catch (ValidationException e) { - page.error("Document cannot be marked as finished: " + e.getMessage()); - aTarget.addChildren(page, IFeedback.class); - return; - } - - AnnotatorState state = page.getModelObject(); - SourceDocument sourceDocument = state.getDocument(); + var state = page.getModelObject(); + var sourceDocument = state.getDocument(); var docState = sourceDocument.getState(); switch (docState) { case CURATION_IN_PROGRESS: + try { + page.actionValidateDocument(aTarget, page.getEditorCas()); + } + catch (ValidationException e) { + page.error("Document cannot be marked as finished: " + e.getMessage()); + aTarget.addChildren(page, IFeedback.class); + return; + } + documentService.setSourceDocumentState(sourceDocument, CURATION_FINISHED); aTarget.add(page); break; diff --git a/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureTraitsEditor.html b/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureTraitsEditor.html index cbbdaaca471..7e446ba1501 100644 --- a/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureTraitsEditor.html +++ b/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureTraitsEditor.html @@ -42,6 +42,9 @@
+
+ Setting a scope can significantly slow down auto-complete. +