From c3c35a0fb17f4a0ac1a5d58c2d820cdceef4f158 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Fri, 5 Jan 2024 12:24:43 +0100 Subject: [PATCH 1/4] #4428 - Slow knowledge-base lookups on relation layers - Improve translation of query terms into suitable regular expression - Log slow queries at debug level - Better generation of query terms from mention in the concepf feature editor --- .../service/ConceptLinkingServiceImpl.java | 3 +- .../kb/querybuilder/SPARQLQueryBuilder.java | 33 +++++++++++++++---- .../ConceptFeatureEditor_ImplBase.java | 26 ++++++++++----- 3 files changed, 45 insertions(+), 17 deletions(-) diff --git a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/service/ConceptLinkingServiceImpl.java b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/service/ConceptLinkingServiceImpl.java index ac2cc14ba78..7d6fba33c65 100644 --- a/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/service/ConceptLinkingServiceImpl.java +++ b/inception/inception-concept-linking/src/main/java/de/tudarmstadt/ukp/inception/conceptlinking/service/ConceptLinkingServiceImpl.java @@ -381,8 +381,7 @@ public List disambiguate(KnowledgeBase aKB, String aConceptScope, ConceptFeatureValueType aValueType, String aQuery, String aMention, int aMentionBeginOffset, CAS aCas) { - Set candidates = generateCandidates(aKB, aConceptScope, aValueType, aQuery, - aMention); + var candidates = generateCandidates(aKB, aConceptScope, aValueType, aQuery, aMention); return rankCandidates(aQuery, aMention, candidates, aCas, aMentionBeginOffset); } diff --git a/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/querybuilder/SPARQLQueryBuilder.java b/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/querybuilder/SPARQLQueryBuilder.java index 287d220472d..4fc4be6197f 100644 --- a/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/querybuilder/SPARQLQueryBuilder.java +++ b/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/querybuilder/SPARQLQueryBuilder.java @@ -1622,7 +1622,10 @@ private String asRegexp(String aValue) String value = aValue; // Escape metacharacters // value = value.replaceAll("[{}()\\[\\].+*?^$\\\\|]", "\\\\\\\\$0"); - value = value.replaceAll("[{}()\\[\\].+*?^$\\\\|]+", ".+"); + // Replace metacharacters with a match for any single char (.+ would be too slow) + value = value.replaceAll("[{}()\\[\\].+*?^$\\\\|]+", "."); + // Drop metacharacters + // value = value.replaceAll("[{}()\\[\\].+*?^$\\\\|]+", " "); // Replace consecutive whitespace or control chars with a whitespace matcher value = value.replaceAll("[\\p{Space}\\p{Cntrl}]+", "\\\\s+"); return value; @@ -1980,8 +1983,13 @@ public List asHandles(RepositoryConnection aConnection, boolean aAll) results = evaluateListQuery(tupleQuery, aAll); results.sort(comparing(KBObject::getUiLabel, CASE_INSENSITIVE_ORDER)); - LOG.debug("[{}] Query returned {} results in {}ms", queryId, results.size(), - currentTimeMillis() - startTime); + long duration = currentTimeMillis() - startTime; + LOG.debug("[{}] Query returned {} results in {}ms {}", queryId, results.size(), + duration, duration > 1000 ? "-- SLOW QUERY!" : ""); + + if (duration > 1000 && !LOG.isTraceEnabled()) { + LOG.debug("[{}] Slow query: {}", queryId, queryString); + } return results; } @@ -2026,8 +2034,13 @@ public boolean exists(RepositoryConnection aConnection, boolean aAll) TupleQuery tupleQuery = aConnection.prepareTupleQuery(queryString); boolean result = !evaluateListQuery(tupleQuery, aAll).isEmpty(); - LOG.debug("[{}] Query returned {} in {}ms", queryId, result, - currentTimeMillis() - startTime); + long duration = currentTimeMillis() - startTime; + LOG.debug("[{}] Query returned {} in {}ms {}", queryId, result, duration, + duration > 1000 ? "-- SLOW QUERY!" : ""); + + if (duration > 1000 && !LOG.isTraceEnabled()) { + LOG.debug("[{}] Slow query: {}", queryId, queryString); + } return result; } @@ -2060,8 +2073,14 @@ public Optional asHandle(RepositoryConnection aConnection, boolean aAl tupleQuery.setIncludeInferred(includeInferred); result = evaluateListQuery(tupleQuery, aAll).stream().findFirst(); - LOG.debug("[{}] Query returned a result in {}ms", queryId, - currentTimeMillis() - startTime); + long duration = currentTimeMillis() - startTime; + LOG.debug("[{}] Query returned a result in {}ms {}", queryId, duration, + duration > 1000 ? "-- SLOW QUERY!" : ""); + + if (duration > 1000 && !LOG.isTraceEnabled()) { + LOG.debug("[{}] Slow query: {}", queryId, queryString); + } + return result; } catch (QueryEvaluationException e) { diff --git a/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureEditor_ImplBase.java b/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureEditor_ImplBase.java index 3f451ab6e1b..c515ba66c59 100644 --- a/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureEditor_ImplBase.java +++ b/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureEditor_ImplBase.java @@ -32,7 +32,6 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; -import org.apache.uima.cas.CAS; import org.apache.wicket.MarkupContainer; import org.apache.wicket.core.request.handler.IPartialPageRequestHandler; import org.apache.wicket.feedback.IFeedback; @@ -131,7 +130,7 @@ protected List getCandidates(IModel aStateModel, AnnotationFeature feat = getModelObject().feature; var traits = readFeatureTraits(feat); - String repoId = traits.getRepositoryId(); + var repoId = traits.getRepositoryId(); // Check if kb is actually enabled if (!(repoId == null || kbService.isKnowledgeBaseEnabled(feat.getProject(), repoId))) { return Collections.emptyList(); @@ -140,12 +139,23 @@ protected List getCandidates(IModel aStateModel, // If there is a selection, we try obtaining its text from the CAS and use it as an // additional item in the query. Note that there is not always a mention, e.g. when the // feature is used in a document-level annotations. - CAS cas = aHandler != null ? aHandler.getEditorCas() : null; - String mention = aStateModel != null ? aStateModel.getObject().getSelection().getText() - : null; - int mentionBegin = aStateModel != null - ? aStateModel.getObject().getSelection().getBegin() - : -1; + var cas = aHandler != null ? aHandler.getEditorCas() : null; + + String mention = null; + int mentionBegin = -1; + + if (aStateModel != null) { + var selection = aStateModel.getObject().getSelection(); + if (selection.isSpan()) { + mention = selection.getText(); + mentionBegin = selection.getBegin(); + } + + if (selection.isArc()) { + mention = selection.getOriginText() + " " + selection.getTargetText(); + mentionBegin = selection.getBegin(); + } + } choices = clService.getLinkingInstancesInKBScope(traits.getRepositoryId(), traits.getScope(), traits.getAllowedValueType(), finalInput, mention, From e3481a0330f1cbefb740de7eb2b1627aab930f69 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Fri, 5 Jan 2024 13:20:50 +0100 Subject: [PATCH 2/4] #4428 - Slow knowledge-base lookups on relation layers - Small layout fix in the concept feature traits editor - Improve documentation for the meaning of root concepts --- .../ui/kb/feature/ConceptFeatureTraitsEditor.html | 2 +- .../asciidoc/user-guide/projects_knowledge-base.adoc | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureTraitsEditor.html b/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureTraitsEditor.html index 80c1d3fbb89..cbbdaaca471 100644 --- a/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureTraitsEditor.html +++ b/inception/inception-ui-kb/src/main/java/de/tudarmstadt/ukp/inception/ui/kb/feature/ConceptFeatureTraitsEditor.html @@ -41,7 +41,7 @@
- +
diff --git a/inception/inception-ui-kb/src/main/resources/META-INF/asciidoc/user-guide/projects_knowledge-base.adoc b/inception/inception-ui-kb/src/main/resources/META-INF/asciidoc/user-guide/projects_knowledge-base.adoc index 923df966308..dcbdd2aa6bc 100644 --- a/inception/inception-ui-kb/src/main/resources/META-INF/asciidoc/user-guide/projects_knowledge-base.adoc +++ b/inception/inception-ui-kb/src/main/resources/META-INF/asciidoc/user-guide/projects_knowledge-base.adoc @@ -225,7 +225,14 @@ user can choose one of the pre-configured mapping or provide a custom mapping. === Root Concepts -In the advanced settings, the user can leverage this feature of KB settings when one doesn't want the entire knowledge base to be used and rather choose to identify some specific root concepts. This feature specially helps in case of large knowledge bases such as Wikidata. +The knowledge base browser displays a class tree. By default, it tries to automatically determine the root classes of +this tree. However, for very large KBs this can be slow. Also you might not be interested in browsing the entire KB +but would rather focus on specific subtrees. In such cases, you can define the root concepts explicitly here. + +NOTE: This setting currently affects **only class tree in the knowledge base browser**. You can still search for concepts + that are outside of the subtrees induced by the root concepts using the search field on the knowledge-base page and you + can also still link concept features. to concepts outside the subtrees. In order to limit a concept feature to a particular + subtree, use the **Scope** setting in the <>. === Additional Matching Properties From b131f6c0fc0eaeb492a59db1d9ba13a405efbecb Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Fri, 5 Jan 2024 15:24:05 +0100 Subject: [PATCH 3/4] #4428 - Slow knowledge-base lookups on relation layers - Small layout fix in the concept feature traits editor - Improve documentation for the meaning of root concepts --- .../resources/META-INF/asciidoc/user-guide/knowledge_base.adoc | 1 + 1 file changed, 1 insertion(+) diff --git a/inception/inception-ui-kb/src/main/resources/META-INF/asciidoc/user-guide/knowledge_base.adoc b/inception/inception-ui-kb/src/main/resources/META-INF/asciidoc/user-guide/knowledge_base.adoc index 01c2b47377d..5448f647195 100644 --- a/inception/inception-ui-kb/src/main/resources/META-INF/asciidoc/user-guide/knowledge_base.adoc +++ b/inception/inception-ui-kb/src/main/resources/META-INF/asciidoc/user-guide/knowledge_base.adoc @@ -41,6 +41,7 @@ The same is true for the object of a statement: After choosing the property for 4. *KB Resource*: This is provided as an option when the property has a range as a particular concept from the knowledge base. In this option, the user is provided with an auto-complete field with a list of knowledge base entities. This includes the subclass and instances of the range specified for the property. +[[sect_concept_features]] === Concept features Concept features are features that allow referencing concepts in the knowledge base during annotation. From 7aeed3ac8d04a79d72bc0eb6ddc1aa854d9adaea Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Fri, 5 Jan 2024 16:20:38 +0100 Subject: [PATCH 4/4] Update maven.yml --- .github/workflows/maven.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 7b780df9fc3..fbac654804f 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -28,7 +28,7 @@ jobs: distribution: 'temurin' cache: maven - name: Build with Maven - run: mvn --no-transfer-progress -B clean package -T2 --file pom.xml + run: mvn --no-transfer-progress -B clean package --file pom.xml # Fails with error message - no idea why... # Optional: Uploads the full dependency graph to GitHub to improve the quality of Dependabot alerts this repository can receive