Skip to content

Commit

Permalink
#4931 - Support for GraphDB knowledge bases
Browse files Browse the repository at this point in the history
- Tentative support - fails many tests
  • Loading branch information
reckart committed Jul 6, 2024
1 parent beca769 commit 574a4db
Show file tree
Hide file tree
Showing 7 changed files with 495 additions and 1 deletion.
6 changes: 5 additions & 1 deletion inception/inception-kb/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -263,13 +263,17 @@
<artifactId>jackson-annotations</artifactId>
</dependency>

<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<artifactId>httpmime</artifactId>
</dependency>

<!-- DEPENDENCIES FOR TESTING -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ public class IriConstants
public static final String PREFIX_MWAPI = "https://www.mediawiki.org/ontology#API/";
public static final String PREFIX_STARDOG = "tag:stardog:api:search:";
public static final String PREFIX_BLAZEGRAPH = "http://www.bigdata.com/rdf/search#";
public static final String PREFIX_GRAPHDB = "http://www.ontotext.com/";

public static final String UKP_WIKIDATA_SPARQL_ENDPOINT = "http://knowledgebase.ukp.informatik.tu-darmstadt.de:8890/sparql";
public static final Set<String> IMPLICIT_NAMESPACES = Set.of(RDF.NAMESPACE, RDFS.NAMESPACE,
Expand Down Expand Up @@ -90,6 +91,7 @@ public class IriConstants
public static final IRI FTS_WIKIDATA;
public static final IRI FTS_STARDOG;
public static final IRI FTS_BLAZEGRAPH;
public static final IRI FTS_GRAPHDB;
public static final IRI FTS_NONE;

public static final List<IRI> CLASS_IRIS;
Expand Down Expand Up @@ -121,6 +123,7 @@ public class IriConstants
FTS_WIKIDATA = vf.createIRI(PREFIX_MWAPI, "search");
FTS_STARDOG = vf.createIRI(PREFIX_STARDOG, "textMatch");
FTS_BLAZEGRAPH = vf.createIRI(PREFIX_BLAZEGRAPH, "search");
FTS_GRAPHDB = vf.createIRI(PREFIX_GRAPHDB, "fts");
FTS_NONE = vf.createIRI("FTS:NONE");

CLASS_IRIS = asList(RDFS.CLASS, OWL.CLASS, WIKIDATA_CLASS, SKOS.CONCEPT);
Expand All @@ -147,6 +150,10 @@ public static String getFtsBackendName(String aFTS)
return "Blazegraph DB";
}

if (FTS_GRAPHDB.stringValue().equals(aFTS)) {
return "GraphDB";
}

if (FTS_VIRTUOSO.stringValue().equals(aFTS)) {
return "Virtuoso";
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.kb.querybuilder;

import static de.tudarmstadt.ukp.inception.kb.IriConstants.PREFIX_GRAPHDB;
import static de.tudarmstadt.ukp.inception.kb.querybuilder.SPARQLQueryBuilder.convertToRequiredTokenPrefixMatchingQuery;
import static de.tudarmstadt.ukp.inception.kb.querybuilder.SPARQLQueryBuilder.Priority.PRIMARY;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.eclipse.rdf4j.sparqlbuilder.constraint.Expressions.and;
import static org.eclipse.rdf4j.sparqlbuilder.core.SparqlBuilder.prefix;
import static org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPatterns.and;
import static org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPatterns.union;
import static org.eclipse.rdf4j.sparqlbuilder.rdf.Rdf.iri;

import java.util.ArrayList;

import org.eclipse.rdf4j.sparqlbuilder.constraint.Expression;
import org.eclipse.rdf4j.sparqlbuilder.core.Prefix;
import org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPattern;

public class FtsAdapterGraphDb
implements FtsAdapter
{
private static final String MULTI_CHAR_WILDCARD = "*";

private static final Prefix PREFIX_GRAPHDB_SEARCH = prefix("onto", iri(PREFIX_GRAPHDB));

private final SPARQLQueryBuilder builder;

public FtsAdapterGraphDb(SPARQLQueryBuilder aBuilder)
{
builder = aBuilder;
builder.addPrefix(PREFIX_GRAPHDB_SEARCH);
}

@Override
public void withLabelMatchingExactlyAnyOf(String... aValues)
{
var kb = builder.getKnowledgeBase();

var valuePatterns = new ArrayList<GraphPattern>();
for (var value : aValues) {
var sanitizedValue = builder.sanitizeQueryString_FTS(value);

if (isBlank(sanitizedValue)) {
continue;
}

builder.addProjection(SPARQLQueryBuilder.VAR_SCORE);

valuePatterns.add(new GraphDbFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT,
SPARQLQueryBuilder.VAR_SCORE, SPARQLQueryBuilder.VAR_MATCH_TERM,
SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, sanitizedValue) //
.withLimit(builder.getLimit()) //
.filter(builder.equalsPattern(SPARQLQueryBuilder.VAR_MATCH_TERM, value,
kb)));
}

if (valuePatterns.isEmpty()) {
builder.noResult();
}

builder.addPattern(PRIMARY, and( //
builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY), //
union(valuePatterns.toArray(GraphPattern[]::new))));
}

@Override
public void withLabelContainingAnyOf(String... aValues)
{
var valuePatterns = new ArrayList<GraphPattern>();
for (var value : aValues) {
var sanitizedValue = builder.sanitizeQueryString_FTS(value);

if (isBlank(sanitizedValue)) {
continue;
}

builder.addProjection(SPARQLQueryBuilder.VAR_SCORE);

valuePatterns.add(new GraphDbFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT,
SPARQLQueryBuilder.VAR_SCORE, SPARQLQueryBuilder.VAR_MATCH_TERM,
SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, sanitizedValue) //
.withLimit(builder.getLimit()) //
.filter(builder.containsPattern(SPARQLQueryBuilder.VAR_MATCH_TERM,
value)));
}

if (valuePatterns.isEmpty()) {
builder.noResult();
}

builder.addPattern(PRIMARY,
and(builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY),
union(valuePatterns.toArray(GraphPattern[]::new))));
}

@Override
public void withLabelStartingWith(String aPrefixQuery)
{
// Strip single quotes and asterisks because they have special semantics
var queryString = builder.sanitizeQueryString_FTS(aPrefixQuery);

if (isBlank(queryString)) {
builder.noResult();
}

// If the query string entered by the user does not end with a space character, then
// we assume that the user may not yet have finished writing the word and add a
// wildcard
if (!aPrefixQuery.endsWith(" ")) {
queryString += MULTI_CHAR_WILDCARD;
}

builder.addProjection(SPARQLQueryBuilder.VAR_SCORE);

// Locate all entries where the label contains the prefix (using the FTS) and then
// filter them by those which actually start with the prefix.
builder.addPattern(PRIMARY, and( //
builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY), //
new GraphDbFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT, SPARQLQueryBuilder.VAR_SCORE,
SPARQLQueryBuilder.VAR_MATCH_TERM,
SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, queryString) //
.withLimit(builder.getLimit()) //
.filter(builder.startsWithPattern(SPARQLQueryBuilder.VAR_MATCH_TERM,
aPrefixQuery))));
}

@Override
public void withLabelMatchingAnyOf(String... aValues)
{
var valuePatterns = new ArrayList<GraphPattern>();
for (var value : aValues) {
var sanitizedValue = builder.sanitizeQueryString_FTS(value);

if (isBlank(sanitizedValue)) {
continue;
}

var fuzzyQuery = convertToRequiredTokenPrefixMatchingQuery(sanitizedValue, "",
MULTI_CHAR_WILDCARD);

if (isBlank(fuzzyQuery)) {
continue;
}

builder.addProjection(SPARQLQueryBuilder.VAR_SCORE);

var labelFilterExpressions = new ArrayList<Expression<?>>();
labelFilterExpressions.add(builder.matchKbLanguage(VAR_MATCH_TERM));

valuePatterns.add(new GraphDbFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT,
SPARQLQueryBuilder.VAR_SCORE, SPARQLQueryBuilder.VAR_MATCH_TERM,
SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, fuzzyQuery) //
.withLimit(builder.getLimit()) //
.filter(and(labelFilterExpressions.toArray(Expression[]::new))));
}

if (valuePatterns.isEmpty()) {
builder.noResult();
}

builder.addPattern(PRIMARY,
and(builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY),
union(valuePatterns.toArray(GraphPattern[]::new))));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.kb.querybuilder;

import static de.tudarmstadt.ukp.inception.kb.querybuilder.RdfCollection.collectionOf;
import static org.eclipse.rdf4j.sparqlbuilder.core.SparqlBuilder.prefix;
import static org.eclipse.rdf4j.sparqlbuilder.rdf.Rdf.iri;
import static org.eclipse.rdf4j.sparqlbuilder.rdf.Rdf.literalOf;

import java.util.ArrayList;

import org.eclipse.rdf4j.sparqlbuilder.core.Prefix;
import org.eclipse.rdf4j.sparqlbuilder.core.QueryElement;
import org.eclipse.rdf4j.sparqlbuilder.core.Variable;
import org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPattern;
import org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPatterns;
import org.eclipse.rdf4j.sparqlbuilder.rdf.Iri;

import de.tudarmstadt.ukp.inception.kb.IriConstants;

public class GraphDbFtsQuery
implements GraphPattern
{
public static final Prefix PREFIX_GRAPHDB_FTS = prefix("onto",
iri(IriConstants.PREFIX_GRAPHDB));
public static final Iri GRAPHDB_FTS = PREFIX_GRAPHDB_FTS.iri("fts");

private final Variable subject;
private final Variable score;
private final Variable matchTerm;
private final Variable matchTermProperty;
private final String query;
private int limit = 0;

public GraphDbFtsQuery(Variable aSubject, Variable aScore, Variable aMatchTerm,
Variable aMatchTermProperty, String aQuery)
{
subject = aSubject;
score = aScore;
matchTerm = aMatchTerm;
matchTermProperty = aMatchTermProperty;
query = aQuery;
}

public GraphDbFtsQuery withLimit(int aLimit)
{
limit = aLimit;
return this;
}

@Override
public String getQueryString()
{
var queryElements = new ArrayList<QueryElement>();
queryElements.add(literalOf(query));
if (limit > 0) {
queryElements.add(literalOf(2 * limit));
}

return GraphPatterns.and( //
matchTerm.has(GRAPHDB_FTS, collectionOf(queryElements)), //
subject.has(matchTermProperty, matchTerm)) //
.getQueryString();
}

@Override
public boolean isEmpty()
{
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_ALLEGRO_GRAPH;
import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_BLAZEGRAPH;
import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_FUSEKI;
import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_GRAPHDB;
import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_NONE;
import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_RDF4J_LUCENE;
import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_STARDOG;
Expand Down Expand Up @@ -799,6 +800,10 @@ private FtsAdapter getAdapter()
return new FtsAdapterBlazegraph(this);
}

if (FTS_GRAPHDB.equals(ftsMode)) {
return new FtsAdapterGraphDb(this);
}

if (FTS_FUSEKI.equals(ftsMode)) {
return new FtsAdapterFuseki(this);
}
Expand Down Expand Up @@ -995,6 +1000,9 @@ private Expression<?> matchString(SparqlFunction aFunction, Variable aVariable,
value = Stream.of(TOKENKIZER_PATTERN.split(aValue)) //
.map(t -> "(?=.*" + asRegexp(t) + ")") //
.collect(joining());
// value = Stream.of(TOKENKIZER_PATTERN.split(aValue)) //
// .map(t -> asRegexp(t)) //
// .collect(joining("|"));
break;
default:
throw new IllegalArgumentException(
Expand Down
Loading

0 comments on commit 574a4db

Please sign in to comment.