Skip to content

Commit

Permalink
Merge pull request #4524 from inception-project/refactoring/4523-Fact…
Browse files Browse the repository at this point in the history
…or-endpoint-specific-FTS-code-out-into-adapter-classes

#4523 - Factor endpoint-specific FTS code out into adapter classes
  • Loading branch information
reckart authored Feb 18, 2024
2 parents cfe0c94 + 80fc775 commit 7916bd4
Show file tree
Hide file tree
Showing 19 changed files with 3,508 additions and 3,002 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.kb.querybuilder;

public interface FtsAdapter
extends SPARQLVariables
{
void withLabelMatchingExactlyAnyOf(String... values);

void withLabelContainingAnyOf(String... values);

void withLabelMatchingAnyOf(String... values);

void withLabelStartingWith(String prefix);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.kb.querybuilder;

import static de.tudarmstadt.ukp.inception.kb.querybuilder.SPARQLQueryBuilder.Priority.PRIMARY;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPatterns.and;
import static org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPatterns.union;

import java.util.ArrayList;

import org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPattern;

public class FtsAdapterBlazegraph
implements FtsAdapter
{
private final SPARQLQueryBuilder builder;

public FtsAdapterBlazegraph(SPARQLQueryBuilder aBuilder)
{
builder = aBuilder;
builder.addPrefix(SPARQLQueryBuilder.PREFIX_BLAZEGRAPH_SEARCH);
}

@Override
public void withLabelMatchingExactlyAnyOf(String[] aValues)
{
var kb = builder.getKnowledgeBase();

var valuePatterns = new ArrayList<GraphPattern>();
for (var value : aValues) {
var sanitizedValue = SPARQLQueryBuilder.sanitizeQueryString_FTS(value);

// We assume that the FTS is case insensitive and found that some FTSes (i.e.
// Fuseki) can have trouble matching if they get upper-case query when they
// internally lower-case#
if (builder.isCaseInsensitive()) {
sanitizedValue = SPARQLQueryBuilder.toLowerCase(kb, sanitizedValue);
}

if (isBlank(sanitizedValue)) {
continue;
}

builder.addProjection(SPARQLQueryBuilder.VAR_SCORE);

valuePatterns.add(new BlazegraphFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT,
SPARQLQueryBuilder.VAR_SCORE, SPARQLQueryBuilder.VAR_MATCH_TERM,
SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, sanitizedValue) //
.withLimit(builder.getLimit()) //
.filter(builder.equalsPattern(SPARQLQueryBuilder.VAR_MATCH_TERM, value,
kb)));
}

builder.addPattern(PRIMARY, and( //
builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY), //
union(valuePatterns.toArray(GraphPattern[]::new))));
}

@Override
public void withLabelContainingAnyOf(String... aValues)
{
var kb = builder.getKnowledgeBase();

var valuePatterns = new ArrayList<GraphPattern>();
for (var value : aValues) {
var sanitizedValue = SPARQLQueryBuilder.sanitizeQueryString_FTS(value);

// We assume that the FTS is case insensitive and found that some FTSes (i.e.
// Fuseki) can have trouble matching if they get upper-case query when they
// internally lower-case#
if (builder.isCaseInsensitive()) {
sanitizedValue = SPARQLQueryBuilder.toLowerCase(kb, sanitizedValue);
}

if (isBlank(sanitizedValue)) {
continue;
}

builder.addProjection(SPARQLQueryBuilder.VAR_SCORE);

valuePatterns.add(new BlazegraphFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT,
SPARQLQueryBuilder.VAR_SCORE, SPARQLQueryBuilder.VAR_MATCH_TERM,
SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, sanitizedValue) //
.withLimit(builder.getLimit()) //
.filter(builder.containsPattern(SPARQLQueryBuilder.VAR_MATCH_TERM,
value)));
}

builder.addPattern(PRIMARY,
and(builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY),
union(valuePatterns.toArray(GraphPattern[]::new))));
}

@Override
public void withLabelStartingWith(String aPrefixQuery)
{
var kb = builder.getKnowledgeBase();

var queryString = aPrefixQuery.trim();

// We assume that the FTS is case insensitive and found that some FTSes (i.e.
// Fuseki) can have trouble matching if they get upper-case query when they
// internally lower-case#
if (builder.isCaseInsensitive()) {
queryString = SPARQLQueryBuilder.toLowerCase(kb, queryString);
}

if (queryString.isEmpty()) {
builder.setReturnEmptyResult(true);
}

// If the query string entered by the user does not end with a space character, then
// we assume that the user may not yet have finished writing the word and add a
// wildcard
if (!aPrefixQuery.endsWith(" ")) {
queryString += "*";
}

builder.addProjection(SPARQLQueryBuilder.VAR_SCORE);

// Locate all entries where the label contains the prefix (using the FTS) and then
// filter them by those which actually start with the prefix.
builder.addPattern(PRIMARY, and( //
builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY), //
new BlazegraphFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT, SPARQLQueryBuilder.VAR_SCORE,
SPARQLQueryBuilder.VAR_MATCH_TERM,
SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, queryString) //
.withLimit(builder.getLimit()) //
.filter(builder.startsWithPattern(SPARQLQueryBuilder.VAR_MATCH_TERM,
aPrefixQuery))));
}

@Override
public void withLabelMatchingAnyOf(String... aValues)
{
var kb = builder.getKnowledgeBase();

var valuePatterns = new ArrayList<GraphPattern>();
for (var value : aValues) {
var sanitizedValue = SPARQLQueryBuilder.sanitizeQueryString_FTS(value);

if (isBlank(sanitizedValue)) {
continue;
}

// We assume that the FTS is case insensitive and found that some FTSes (i.e.
// Fuseki) can have trouble matching if they get upper-case query when they
// internally lower-case#
if (builder.isCaseInsensitive()) {
sanitizedValue = SPARQLQueryBuilder.toLowerCase(kb, sanitizedValue);
}

var fuzzyQuery = SPARQLQueryBuilder.convertToFuzzyMatchingQuery(sanitizedValue, "*");

if (isBlank(fuzzyQuery)) {
continue;
}

builder.addProjection(SPARQLQueryBuilder.VAR_SCORE);

valuePatterns.add(new BlazegraphFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT,
SPARQLQueryBuilder.VAR_SCORE, SPARQLQueryBuilder.VAR_MATCH_TERM,
SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, fuzzyQuery)
.withLimit(builder.getLimit()));
}

builder.addPattern(PRIMARY,
and(builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY),
union(valuePatterns.toArray(GraphPattern[]::new))));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.kb.querybuilder;

import static de.tudarmstadt.ukp.inception.kb.querybuilder.SPARQLQueryBuilder.Priority.PRIMARY;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPatterns.and;
import static org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPatterns.union;

import java.util.ArrayList;

import org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPattern;

public class FtsAdapterFuseki
implements FtsAdapter
{
private final SPARQLQueryBuilder builder;

public FtsAdapterFuseki(SPARQLQueryBuilder aBuilder)
{
builder = aBuilder;
}

@Override
public void withLabelMatchingExactlyAnyOf(String... aValues)
{
builder.addPrefix(PREFIX_FUSEKI_SEARCH);

var valuePatterns = new ArrayList<GraphPattern>();
for (var value : aValues) {
var sanitizedValue = SPARQLQueryBuilder.sanitizeQueryString_FTS(value);

// We assume that the FTS is case insensitive and found that some FTSes (i.e.
// Fuseki) can have trouble matching if they get upper-case query when they
// internally lower-case#
if (builder.isCaseInsensitive()) {
sanitizedValue = SPARQLQueryBuilder.toLowerCase(builder.getKnowledgeBase(),
sanitizedValue);
}

if (isBlank(sanitizedValue)) {
continue;
}

builder.addProjection(VAR_SCORE);

valuePatterns.add(new FusekiFtsQuery(VAR_SUBJECT, VAR_SCORE, VAR_MATCH_TERM,
VAR_MATCH_TERM_PROPERTY, sanitizedValue) //
.withLimit(builder.getLimit()) //
.filter(builder.equalsPattern(VAR_MATCH_TERM, value,
builder.getKnowledgeBase())));
}

builder.addPattern(PRIMARY, and( //
builder.bindMatchTermProperties(VAR_MATCH_TERM_PROPERTY), //
union(valuePatterns.toArray(GraphPattern[]::new))));
}

@Override
public void withLabelContainingAnyOf(String... aValues)
{
builder.addPrefix(PREFIX_FUSEKI_SEARCH);

var valuePatterns = new ArrayList<GraphPattern>();
for (var value : aValues) {
var sanitizedValue = SPARQLQueryBuilder.sanitizeQueryString_FTS(value);

// We assume that the FTS is case insensitive and found that some FTSes (i.e.
// Fuseki) can have trouble matching if they get upper-case query when they
// internally lower-case#
if (builder.isCaseInsensitive()) {
sanitizedValue = SPARQLQueryBuilder.toLowerCase(builder.getKnowledgeBase(),
sanitizedValue);
}

if (isBlank(sanitizedValue)) {
continue;
}

builder.addProjection(VAR_SCORE);

valuePatterns.add(new FusekiFtsQuery(VAR_SUBJECT, VAR_SCORE, VAR_MATCH_TERM,
VAR_MATCH_TERM_PROPERTY, sanitizedValue) //
.withLimit(builder.getLimit()) //
.filter(builder.containsPattern(VAR_MATCH_TERM, value)));
}

builder.addPattern(PRIMARY, and(builder.bindMatchTermProperties(VAR_MATCH_TERM_PROPERTY),
union(valuePatterns.toArray(GraphPattern[]::new))));
}

@Override
public void withLabelStartingWith(String aPrefixQuery)
{
builder.addPrefix(PREFIX_FUSEKI_SEARCH);

var queryString = aPrefixQuery.trim();

// We assume that the FTS is case insensitive and found that some FTSes (i.e.
// Fuseki) can have trouble matching if they get upper-case query when they
// internally lower-case#
if (builder.isCaseInsensitive()) {
queryString = SPARQLQueryBuilder.toLowerCase(builder.getKnowledgeBase(), queryString);
}

if (queryString.isEmpty()) {
builder.setReturnEmptyResult(true);
}

// If the query string entered by the user does not end with a space character, then
// we assume that the user may not yet have finished writing the word and add a
// wildcard
if (!aPrefixQuery.endsWith(" ")) {
queryString += "*";
}

builder.addProjection(VAR_SCORE);

// Locate all entries where the label contains the prefix (using the FTS) and then
// filter them by those which actually start with the prefix.
builder.addPattern(PRIMARY, and( //
builder.bindMatchTermProperties(VAR_MATCH_TERM_PROPERTY), //
new FusekiFtsQuery(VAR_SUBJECT, VAR_SCORE, VAR_MATCH_TERM, VAR_MATCH_TERM_PROPERTY,
queryString) //
.withLimit(builder.getLimit()) //
.filter(builder.startsWithPattern(VAR_MATCH_TERM, aPrefixQuery))));
}

@Override
public void withLabelMatchingAnyOf(String... aValues)
{
builder.addPrefix(PREFIX_FUSEKI_SEARCH);

var valuePatterns = new ArrayList<GraphPattern>();
for (var value : aValues) {
var sanitizedValue = SPARQLQueryBuilder.sanitizeQueryString_FTS(value);

if (isBlank(sanitizedValue)) {
continue;
}

// We assume that the FTS is case insensitive and found that some FTSes (i.e.
// Fuseki) can have trouble matching if they get upper-case query when they
// internally lower-case#
if (builder.isCaseInsensitive()) {
sanitizedValue = SPARQLQueryBuilder.toLowerCase(builder.getKnowledgeBase(),
sanitizedValue);
}

String fuzzyQuery = SPARQLQueryBuilder.convertToFuzzyMatchingQuery(sanitizedValue, "~");

if (isBlank(fuzzyQuery)) {
continue;
}

builder.addProjection(VAR_SCORE);

valuePatterns.add(new FusekiFtsQuery(VAR_SUBJECT, VAR_SCORE, VAR_MATCH_TERM,
VAR_MATCH_TERM_PROPERTY, fuzzyQuery).withLimit(builder.getLimit()));
}

builder.addPattern(PRIMARY, and(builder.bindMatchTermProperties(VAR_MATCH_TERM_PROPERTY),
union(valuePatterns.toArray(GraphPattern[]::new))));
}
}
Loading

0 comments on commit 7916bd4

Please sign in to comment.