Skip to content

Commit

Permalink
Fix string casting (see #416)
Browse files Browse the repository at this point in the history
Include QGram distance (see #394)
  • Loading branch information
luigi-asprino committed Nov 3, 2023
1 parent 30bf10a commit 2c5d286
Show file tree
Hide file tree
Showing 8 changed files with 176 additions and 118 deletions.
7 changes: 7 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,13 @@
</dependency>


<dependency>
<groupId>info.debatty</groupId>
<artifactId>java-string-similarity</artifactId>
<version>2.0.0</version>
</dependency>


</dependencies>
</dependencyManagement>

Expand Down
6 changes: 6 additions & 0 deletions sparql-anything-engine/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,12 @@
<scope>test</scope>
</dependency>


<dependency>
<groupId>info.debatty</groupId>
<artifactId>java-string-similarity</artifactId>
</dependency>

</dependencies>

</project>

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright (c) 2023 SPARQL Anything Contributors @ http://github.com/sparql-anything
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.github.sparqlanything.engine.functions;

import org.apache.jena.sparql.expr.ExprEvalException;
import org.apache.jena.sparql.expr.NodeValue;

public abstract class FunctionsUtils {


public static String nodeValueAsString(NodeValue nodeValue) {
if (nodeValue.isLiteral()) {
return nodeValue.getString();
} else if (nodeValue.isIRI()) {
return nodeValue.asNode().getURI();
}

throw new ExprEvalException("Argument must be literal or IRI");
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright (c) 2023 SPARQL Anything Contributors @ http://github.com/sparql-anything
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.github.sparqlanything.engine.functions;

import org.apache.commons.text.similarity.SimilarityScore;
import org.apache.jena.sparql.expr.NodeValue;
import org.apache.jena.sparql.function.Function;
import org.apache.jena.sparql.function.FunctionBase2;
import org.apache.jena.sparql.function.FunctionFactory;

public class SimilarityScoreFunctionFactory<T> implements FunctionFactory {

private final SimilarityScore<T> similarityScore;

public SimilarityScoreFunctionFactory(SimilarityScore<T> similarityScore){
super();
this.similarityScore = similarityScore;
}


@Override
public Function create(String s) {
return new FunctionBase2() {
@Override
public NodeValue exec(NodeValue nodeValue, NodeValue nodeValue1) {
T result = similarityScore.apply(FunctionsUtils.nodeValueAsString(nodeValue),FunctionsUtils.nodeValueAsString(nodeValue1));
if(result instanceof Integer){
return NodeValue.makeInteger((Integer)result);
} else if(result instanceof Double){
return NodeValue.makeDouble((Double)result);
}
return NodeValue.nvNaN;
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,19 @@

package io.github.sparqlanything.engine.functions;

import org.apache.commons.text.similarity.SimilarityScore;
import org.apache.jena.sparql.expr.ExprEvalException;
import info.debatty.java.stringsimilarity.interfaces.StringDistance;
import org.apache.jena.sparql.expr.NodeValue;
import org.apache.jena.sparql.function.Function;
import org.apache.jena.sparql.function.FunctionBase2;
import org.apache.jena.sparql.function.FunctionFactory;

public class StringDistanceFunctionFactory<T> implements FunctionFactory {
public class StringDistanceFunctionFactory implements FunctionFactory {

private final SimilarityScore<T> similarityScore;
private final StringDistance similarityScore;

private static String nodeValueAsString(NodeValue nodeValue) {
if (nodeValue.isLiteral()) {
return nodeValue.toString();
} else if (nodeValue.isIRI()) {
return nodeValue.asNode().getURI();
}

throw new ExprEvalException("Argument must be literal or IRI");
}

public StringDistanceFunctionFactory(SimilarityScore<T> similarityScore){
public StringDistanceFunctionFactory(StringDistance similarityScore){
super();
this.similarityScore = similarityScore;
}

Expand All @@ -47,13 +38,7 @@ public Function create(String s) {
return new FunctionBase2() {
@Override
public NodeValue exec(NodeValue nodeValue, NodeValue nodeValue1) {
T result = similarityScore.apply(nodeValueAsString(nodeValue),nodeValueAsString(nodeValue1));
if(result instanceof Integer){
return NodeValue.makeInteger((Integer)result);
} else if(result instanceof Double){
return NodeValue.makeDouble((Double)result);
}
return NodeValue.nvNaN;
return NodeValue.makeDouble(similarityScore.distance(FunctionsUtils.nodeValueAsString(nodeValue),FunctionsUtils.nodeValueAsString(nodeValue1)));
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@ public void levenshteinDistance() {
Assert.assertEquals(2, dist);
}

@Test
public void qgramDistance() {
String q = "PREFIX fx: <http://sparql.xyz/facade-x/ns/> SELECT ?result WHERE { BIND (fx:QGramDistance(\"ABCD\", \"ABCE\") AS ?result) } ";
ResultSet result = execute(q);
Assert.assertTrue(result.hasNext());
double dist = result.next().get("result").asLiteral().getDouble();
Assert.assertEquals(2.0, dist, 0.0);
}

@Test
public void levenshteinDistanceURI() {
String q = "PREFIX fx: <http://sparql.xyz/facade-x/ns/> SELECT ?result WHERE { BIND (fx:LevenshteinDistance(<abc>, <cbe>) AS ?result) } ";
Expand All @@ -89,7 +98,7 @@ public void jaccardDistance() {
ResultSet result = execute(q);
Assert.assertTrue(result.hasNext());
double dist = result.next().get("result").asLiteral().getDouble();
Assert.assertEquals(0.4, dist, 0.01);
Assert.assertEquals(0.5, dist, 0.0);
}

@Test
Expand All @@ -98,7 +107,7 @@ public void jaroWinklerDistance() {
ResultSet result = execute(q);
Assert.assertTrue(result.hasNext());
double dist = result.next().get("result").asLiteral().getDouble();
Assert.assertEquals(0.24, dist, 0.01);
Assert.assertEquals(0.44, dist, 0.01);
}
@Test
public void longestCommonSubsequenceDistance() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package io.github.sparqlanything.it;

import info.debatty.java.stringsimilarity.QGram;
import org.apache.jena.graph.Graph;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.graph.Triple;
Expand All @@ -35,6 +36,13 @@

public class SandboxTest {

@Ignore
@Test
public void m(){
QGram d = new QGram();
System.out.println(d.distance("ABCD", "ABCE"));
}

@Ignore
@Test
public void model(){
Expand Down

0 comments on commit 2c5d286

Please sign in to comment.