diff --git a/pom.xml b/pom.xml
index 377642b3..9f8910d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -406,6 +406,13 @@
+
+ info.debatty
+ java-string-similarity
+ 2.0.0
+
+
+
diff --git a/sparql-anything-engine/pom.xml b/sparql-anything-engine/pom.xml
index cc0f94ac..28ee3e99 100644
--- a/sparql-anything-engine/pom.xml
+++ b/sparql-anything-engine/pom.xml
@@ -149,6 +149,12 @@
test
+
+
+ info.debatty
+ java-string-similarity
+
+
diff --git a/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/FacadeX.java b/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/FacadeX.java
index ec9f2679..49d63856 100644
--- a/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/FacadeX.java
+++ b/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/FacadeX.java
@@ -16,6 +16,7 @@
package io.github.sparqlanything.engine;
+import info.debatty.java.stringsimilarity.QGram;
import io.github.sparqlanything.engine.functions.*;
import io.github.sparqlanything.engine.functions.reflection.ReflectionFunctionFactory;
import io.github.sparqlanything.model.Triplifier;
@@ -39,53 +40,32 @@ public final class FacadeX {
public final static OpExecutorFactory ExecutorFactory = FacadeXOpExecutor::new;
public final static TriplifierRegister Registry = TriplifierRegister.getInstance();
+ public static final String ANY_SLOT_URI = Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "anySlot";
private static final Logger log = LoggerFactory.getLogger(FacadeX.class);
- public static final String ANY_SLOT_URI = Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "anySlot";
static {
try {
log.trace("Registering isFacadeXExtension function");
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "isFacadeXExtension",
- IsFacadeXExtension.class);
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "isFacadeXExtension", IsFacadeXExtension.class);
enablingMagicProperties();
enablingFunctions();
log.trace("Registering standard triplifiers");
- Registry.registerTriplifier("io.github.sparqlanything.bib.BibtexTriplifier",
- new String[]{"bib", "bibtex"}, new String[]{"application/x-bibtex"});
- Registry.registerTriplifier("io.github.sparqlanything.xml.XMLTriplifier", new String[]{"xml"},
- new String[]{"application/xml", "text/xml"});
- Registry.registerTriplifier("io.github.sparqlanything.csv.CSVTriplifier", new String[]{"csv", "tsv", "tab"},
- new String[]{"text/csv", "text/tab-separated-values"});
- Registry.registerTriplifier("io.github.sparqlanything.html.HTMLTriplifier", new String[]{"html"},
- new String[]{"text/html"});
- Registry.registerTriplifier("io.github.sparqlanything.text.TextTriplifier", new String[]{"txt"},
- new String[]{"text/plain"});
- Registry.registerTriplifier("io.github.sparqlanything.markdown.MARKDOWNTriplifier", new String[]{"md"},
- new String[]{"text/markdown", "text/x-markdown"});
- Registry.registerTriplifier("io.github.sparqlanything.docs.DocxTriplifier", new String[]{"docx"},
- new String[]{"application/vnd.openxmlformats-officedocument.wordprocessingml.document"});
- Registry.registerTriplifier("io.github.sparqlanything.zip.TarTriplifier", new String[]{"tar"},
- new String[]{"application/x-tar"});
- Registry.registerTriplifier("io.github.sparqlanything.zip.ZipTriplifier", new String[]{"zip"},
- new String[]{"application/zip"});
- Registry.registerTriplifier("io.github.sparqlanything.binary.BinaryTriplifier",
- new String[]{"bin", "dat"}, new String[]{"application/octet-stream"});
- Registry.registerTriplifier("io.github.sparqlanything.json.JSONTriplifier", new String[]{"json"},
- new String[]{"application/json", "application/problem+json"});
- Registry.registerTriplifier("io.github.sparqlanything.yaml.YAMLTriplifier", new String[]{"yaml"},
- new String[]{"application/yaml", "text/yaml", "x-text/yaml"});
- Registry.registerTriplifier("io.github.sparqlanything.spreadsheet.SpreadsheetTriplifier",
- new String[]{"xls", "xlsx"}, new String[]{"application/vnd.ms-excel",
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"});
- Registry.registerTriplifier(RDFTriplifier.class.getCanonicalName(),
- new String[]{"rdf", "ttl", "nt", "jsonld", "owl", "trig", "nq", "trix", "trdf"},
- new String[]{"application/rdf+thrift", "application/trix+xml", "application/n-quads", "text/trig",
- "application/owl+xml", "text/turtle", "application/rdf+xml", "application/n-triples",
- "application/ld+json"});
- Registry.registerTriplifier("io.github.sparqlanything.binary.BinaryTriplifier",
- new String[]{"png", "jpeg", "jpg", "bmp", "tiff", "tif", "ico"},
- new String[]{"image/png", "image/jpeg", "image/bmp", "image/tiff", "image/vnd.microsoft.icon"});
+ Registry.registerTriplifier("io.github.sparqlanything.bib.BibtexTriplifier", new String[]{"bib", "bibtex"}, new String[]{"application/x-bibtex"});
+ Registry.registerTriplifier("io.github.sparqlanything.xml.XMLTriplifier", new String[]{"xml"}, new String[]{"application/xml", "text/xml"});
+ Registry.registerTriplifier("io.github.sparqlanything.csv.CSVTriplifier", new String[]{"csv", "tsv", "tab"}, new String[]{"text/csv", "text/tab-separated-values"});
+ Registry.registerTriplifier("io.github.sparqlanything.html.HTMLTriplifier", new String[]{"html"}, new String[]{"text/html"});
+ Registry.registerTriplifier("io.github.sparqlanything.text.TextTriplifier", new String[]{"txt"}, new String[]{"text/plain"});
+ Registry.registerTriplifier("io.github.sparqlanything.markdown.MARKDOWNTriplifier", new String[]{"md"}, new String[]{"text/markdown", "text/x-markdown"});
+ Registry.registerTriplifier("io.github.sparqlanything.docs.DocxTriplifier", new String[]{"docx"}, new String[]{"application/vnd.openxmlformats-officedocument.wordprocessingml.document"});
+ Registry.registerTriplifier("io.github.sparqlanything.zip.TarTriplifier", new String[]{"tar"}, new String[]{"application/x-tar"});
+ Registry.registerTriplifier("io.github.sparqlanything.zip.ZipTriplifier", new String[]{"zip"}, new String[]{"application/zip"});
+ Registry.registerTriplifier("io.github.sparqlanything.binary.BinaryTriplifier", new String[]{"bin", "dat"}, new String[]{"application/octet-stream"});
+ Registry.registerTriplifier("io.github.sparqlanything.json.JSONTriplifier", new String[]{"json"}, new String[]{"application/json", "application/problem+json"});
+ Registry.registerTriplifier("io.github.sparqlanything.yaml.YAMLTriplifier", new String[]{"yaml"}, new String[]{"application/yaml", "text/yaml", "x-text/yaml"});
+ Registry.registerTriplifier("io.github.sparqlanything.spreadsheet.SpreadsheetTriplifier", new String[]{"xls", "xlsx"}, new String[]{"application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"});
+ Registry.registerTriplifier(RDFTriplifier.class.getCanonicalName(), new String[]{"rdf", "ttl", "nt", "jsonld", "owl", "trig", "nq", "trix", "trdf"}, new String[]{"application/rdf+thrift", "application/trix+xml", "application/n-quads", "text/trig", "application/owl+xml", "text/turtle", "application/rdf+xml", "application/n-triples", "application/ld+json"});
+ Registry.registerTriplifier("io.github.sparqlanything.binary.BinaryTriplifier", new String[]{"png", "jpeg", "jpg", "bmp", "tiff", "tif", "ico"}, new String[]{"image/png", "image/jpeg", "image/bmp", "image/tiff", "image/vnd.microsoft.icon"});
} catch (TriplifierRegisterException e) {
throw new RuntimeException(e);
@@ -104,9 +84,9 @@ public static void enablingMagicProperties() {
final PropertyFunctionRegistry reg = PropertyFunctionRegistry.chooseRegistry(ARQ.getContext());
//log.trace("Registering {} magic property", ANY_SLOT_URI);
reg.put(ANY_SLOT_URI, p);
- if(log.isTraceEnabled()){
+ if (log.isTraceEnabled()) {
Iterator i = reg.keys();
- while(i.hasNext()){
+ while (i.hasNext()) {
log.trace("Registering magic property: {}", i.next());
}
}
@@ -129,58 +109,35 @@ public static void enablingFunctions() {
FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "isContainerMembershipProperty", IsContainerMembershipProperty.class);
log.trace("Enabling String functions");
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.trim",
- ReflectionFunctionFactory.get().makeFunction(String.class, "trim"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.substring",
- ReflectionFunctionFactory.get().makeFunction(String.class, "substring"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.indexOf",
- ReflectionFunctionFactory.get().makeFunction(String.class, "indexOf"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.startsWith",
- ReflectionFunctionFactory.get().makeFunction(String.class, "startsWith"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.endsWith",
- ReflectionFunctionFactory.get().makeFunction(String.class, "endsWith"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.replace",
- ReflectionFunctionFactory.get().makeFunction(String.class, "replace"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.strip",
- ReflectionFunctionFactory.get().makeFunction(String.class, "strip"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.stripLeading",
- ReflectionFunctionFactory.get().makeFunction(String.class, "stripLeading"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.stripTrailing",
- ReflectionFunctionFactory.get().makeFunction(String.class, "stripTrailing"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.trim", ReflectionFunctionFactory.get().makeFunction(String.class, "trim"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.substring", ReflectionFunctionFactory.get().makeFunction(String.class, "substring"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.indexOf", ReflectionFunctionFactory.get().makeFunction(String.class, "indexOf"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.startsWith", ReflectionFunctionFactory.get().makeFunction(String.class, "startsWith"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.endsWith", ReflectionFunctionFactory.get().makeFunction(String.class, "endsWith"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.replace", ReflectionFunctionFactory.get().makeFunction(String.class, "replace"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.strip", ReflectionFunctionFactory.get().makeFunction(String.class, "strip"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.stripLeading", ReflectionFunctionFactory.get().makeFunction(String.class, "stripLeading"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.stripTrailing", ReflectionFunctionFactory.get().makeFunction(String.class, "stripTrailing"));
FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.removeTags", RemoveTags.class);
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.lastIndexOf",
- ReflectionFunctionFactory.get().makeFunction(String.class, "lastIndexOf"));
-
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "DigestUtils.md2Hex",
- ReflectionFunctionFactory.get().makeFunction(DigestUtils.class, "md2Hex"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "DigestUtils.md5Hex",
- ReflectionFunctionFactory.get().makeFunction(DigestUtils.class, "md5Hex"));
-
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "DigestUtils.sha1Hex",
- ReflectionFunctionFactory.get().makeFunction(DigestUtils.class, "sha1Hex"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "DigestUtils.sha256Hex",
- ReflectionFunctionFactory.get().makeFunction(DigestUtils.class, "sha256Hex"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "DigestUtils.sha384Hex",
- ReflectionFunctionFactory.get().makeFunction(DigestUtils.class, "sha384Hex"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "DigestUtils.sha512Hex",
- ReflectionFunctionFactory.get().makeFunction(DigestUtils.class, "sha512Hex"));
-
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "WordUtils.capitalize",
- ReflectionFunctionFactory.get().makeFunction(WordUtils.class, "capitalize"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "WordUtils.capitalizeFully",
- ReflectionFunctionFactory.get().makeFunction(WordUtils.class, "capitalizeFully"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "WordUtils.initials",
- ReflectionFunctionFactory.get().makeFunction(WordUtils.class, "initials"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "WordUtils.swapCase",
- ReflectionFunctionFactory.get().makeFunction(WordUtils.class, "swapCase"));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "WordUtils.uncapitalize",
- ReflectionFunctionFactory.get().makeFunction(WordUtils.class, "uncapitalize"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.lastIndexOf", ReflectionFunctionFactory.get().makeFunction(String.class, "lastIndexOf"));
+
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "DigestUtils.md2Hex", ReflectionFunctionFactory.get().makeFunction(DigestUtils.class, "md2Hex"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "DigestUtils.md5Hex", ReflectionFunctionFactory.get().makeFunction(DigestUtils.class, "md5Hex"));
+
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "DigestUtils.sha1Hex", ReflectionFunctionFactory.get().makeFunction(DigestUtils.class, "sha1Hex"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "DigestUtils.sha256Hex", ReflectionFunctionFactory.get().makeFunction(DigestUtils.class, "sha256Hex"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "DigestUtils.sha384Hex", ReflectionFunctionFactory.get().makeFunction(DigestUtils.class, "sha384Hex"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "DigestUtils.sha512Hex", ReflectionFunctionFactory.get().makeFunction(DigestUtils.class, "sha512Hex"));
+
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "WordUtils.capitalize", ReflectionFunctionFactory.get().makeFunction(WordUtils.class, "capitalize"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "WordUtils.capitalizeFully", ReflectionFunctionFactory.get().makeFunction(WordUtils.class, "capitalizeFully"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "WordUtils.initials", ReflectionFunctionFactory.get().makeFunction(WordUtils.class, "initials"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "WordUtils.swapCase", ReflectionFunctionFactory.get().makeFunction(WordUtils.class, "swapCase"));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "WordUtils.uncapitalize", ReflectionFunctionFactory.get().makeFunction(WordUtils.class, "uncapitalize"));
try {
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.toLowerCase",
- ReflectionFunctionFactory.get().makeFunction(String.class.getMethod("toLowerCase")));
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.toUpperCase",
- ReflectionFunctionFactory.get().makeFunction(String.class.getMethod("toUpperCase")));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.toLowerCase", ReflectionFunctionFactory.get().makeFunction(String.class.getMethod("toLowerCase")));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "String.toUpperCase", ReflectionFunctionFactory.get().makeFunction(String.class.getMethod("toUpperCase")));
} catch (NoSuchMethodException e) {
throw new RuntimeException(e);
}
@@ -192,12 +149,13 @@ public static void enablingFunctions() {
log.error("", e);
}
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "LevenshteinDistance",new StringDistanceFunctionFactory<>(new LevenshteinDistance()) );
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "CosineDistance",new StringDistanceFunctionFactory<>(new CosineDistance()) );
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "JaccardDistance",new StringDistanceFunctionFactory<>(new JaccardDistance()) );
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "JaroWinklerDistance",new StringDistanceFunctionFactory<>(new JaroWinklerDistance()) );
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "LongestCommonSubsequenceDistance",new StringDistanceFunctionFactory<>(new LongestCommonSubsequenceDistance()) );
- FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "HammingDistance",new StringDistanceFunctionFactory<>(new HammingDistance()) );
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "LevenshteinDistance", new SimilarityScoreFunctionFactory<>(new LevenshteinDistance()));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "CosineDistance", new SimilarityScoreFunctionFactory<>(new CosineDistance()));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "JaccardDistance", new SimilarityScoreFunctionFactory<>(new JaccardDistance()));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "JaroWinklerDistance", new SimilarityScoreFunctionFactory<>(new JaroWinklerDistance()));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "LongestCommonSubsequenceDistance", new SimilarityScoreFunctionFactory<>(new LongestCommonSubsequenceDistance()));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "HammingDistance", new SimilarityScoreFunctionFactory<>(new HammingDistance()));
+ FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "QGramDistance", new StringDistanceFunctionFactory(new QGram()));
log.trace("Enabling function `serial`");
FunctionRegistry.get().put(Triplifier.FACADE_X_CONST_NAMESPACE_IRI + "serial", Serial.class);
diff --git a/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/functions/FunctionsUtils.java b/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/functions/FunctionsUtils.java
new file mode 100644
index 00000000..c4233f6f
--- /dev/null
+++ b/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/functions/FunctionsUtils.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2023 SPARQL Anything Contributors @ http://github.com/sparql-anything
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.github.sparqlanything.engine.functions;
+
+import org.apache.jena.sparql.expr.ExprEvalException;
+import org.apache.jena.sparql.expr.NodeValue;
+
+public abstract class FunctionsUtils {
+
+
+ public static String nodeValueAsString(NodeValue nodeValue) {
+ if (nodeValue.isLiteral()) {
+ return nodeValue.getString();
+ } else if (nodeValue.isIRI()) {
+ return nodeValue.asNode().getURI();
+ }
+
+ throw new ExprEvalException("Argument must be literal or IRI");
+ }
+
+}
diff --git a/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/functions/SimilarityScoreFunctionFactory.java b/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/functions/SimilarityScoreFunctionFactory.java
new file mode 100644
index 00000000..eb18e071
--- /dev/null
+++ b/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/functions/SimilarityScoreFunctionFactory.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023 SPARQL Anything Contributors @ http://github.com/sparql-anything
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.github.sparqlanything.engine.functions;
+
+import org.apache.commons.text.similarity.SimilarityScore;
+import org.apache.jena.sparql.expr.NodeValue;
+import org.apache.jena.sparql.function.Function;
+import org.apache.jena.sparql.function.FunctionBase2;
+import org.apache.jena.sparql.function.FunctionFactory;
+
+public class SimilarityScoreFunctionFactory implements FunctionFactory {
+
+ private final SimilarityScore similarityScore;
+
+ public SimilarityScoreFunctionFactory(SimilarityScore similarityScore){
+ super();
+ this.similarityScore = similarityScore;
+ }
+
+
+ @Override
+ public Function create(String s) {
+ return new FunctionBase2() {
+ @Override
+ public NodeValue exec(NodeValue nodeValue, NodeValue nodeValue1) {
+ T result = similarityScore.apply(FunctionsUtils.nodeValueAsString(nodeValue),FunctionsUtils.nodeValueAsString(nodeValue1));
+ if(result instanceof Integer){
+ return NodeValue.makeInteger((Integer)result);
+ } else if(result instanceof Double){
+ return NodeValue.makeDouble((Double)result);
+ }
+ return NodeValue.nvNaN;
+ }
+ };
+ }
+}
diff --git a/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/functions/StringDistanceFunctionFactory.java b/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/functions/StringDistanceFunctionFactory.java
index 67fac61e..851ab1d5 100644
--- a/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/functions/StringDistanceFunctionFactory.java
+++ b/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/functions/StringDistanceFunctionFactory.java
@@ -16,28 +16,19 @@
package io.github.sparqlanything.engine.functions;
-import org.apache.commons.text.similarity.SimilarityScore;
-import org.apache.jena.sparql.expr.ExprEvalException;
+import info.debatty.java.stringsimilarity.interfaces.StringDistance;
import org.apache.jena.sparql.expr.NodeValue;
import org.apache.jena.sparql.function.Function;
import org.apache.jena.sparql.function.FunctionBase2;
import org.apache.jena.sparql.function.FunctionFactory;
-public class StringDistanceFunctionFactory implements FunctionFactory {
+public class StringDistanceFunctionFactory implements FunctionFactory {
- private final SimilarityScore similarityScore;
+ private final StringDistance similarityScore;
- private static String nodeValueAsString(NodeValue nodeValue) {
- if (nodeValue.isLiteral()) {
- return nodeValue.toString();
- } else if (nodeValue.isIRI()) {
- return nodeValue.asNode().getURI();
- }
- throw new ExprEvalException("Argument must be literal or IRI");
- }
-
- public StringDistanceFunctionFactory(SimilarityScore similarityScore){
+ public StringDistanceFunctionFactory(StringDistance similarityScore){
+ super();
this.similarityScore = similarityScore;
}
@@ -47,13 +38,7 @@ public Function create(String s) {
return new FunctionBase2() {
@Override
public NodeValue exec(NodeValue nodeValue, NodeValue nodeValue1) {
- T result = similarityScore.apply(nodeValueAsString(nodeValue),nodeValueAsString(nodeValue1));
- if(result instanceof Integer){
- return NodeValue.makeInteger((Integer)result);
- } else if(result instanceof Double){
- return NodeValue.makeDouble((Double)result);
- }
- return NodeValue.nvNaN;
+ return NodeValue.makeDouble(similarityScore.distance(FunctionsUtils.nodeValueAsString(nodeValue),FunctionsUtils.nodeValueAsString(nodeValue1)));
}
};
}
diff --git a/sparql-anything-engine/src/test/java/io/github/sparqlanything/engine/test/FunctionsTest.java b/sparql-anything-engine/src/test/java/io/github/sparqlanything/engine/test/FunctionsTest.java
index 0e3b2d8b..25f3d507 100644
--- a/sparql-anything-engine/src/test/java/io/github/sparqlanything/engine/test/FunctionsTest.java
+++ b/sparql-anything-engine/src/test/java/io/github/sparqlanything/engine/test/FunctionsTest.java
@@ -65,6 +65,15 @@ public void levenshteinDistance() {
Assert.assertEquals(2, dist);
}
+ @Test
+ public void qgramDistance() {
+ String q = "PREFIX fx: SELECT ?result WHERE { BIND (fx:QGramDistance(\"ABCD\", \"ABCE\") AS ?result) } ";
+ ResultSet result = execute(q);
+ Assert.assertTrue(result.hasNext());
+ double dist = result.next().get("result").asLiteral().getDouble();
+ Assert.assertEquals(2.0, dist, 0.0);
+ }
+
@Test
public void levenshteinDistanceURI() {
String q = "PREFIX fx: SELECT ?result WHERE { BIND (fx:LevenshteinDistance(, ) AS ?result) } ";
@@ -89,7 +98,7 @@ public void jaccardDistance() {
ResultSet result = execute(q);
Assert.assertTrue(result.hasNext());
double dist = result.next().get("result").asLiteral().getDouble();
- Assert.assertEquals(0.4, dist, 0.01);
+ Assert.assertEquals(0.5, dist, 0.0);
}
@Test
@@ -98,7 +107,7 @@ public void jaroWinklerDistance() {
ResultSet result = execute(q);
Assert.assertTrue(result.hasNext());
double dist = result.next().get("result").asLiteral().getDouble();
- Assert.assertEquals(0.24, dist, 0.01);
+ Assert.assertEquals(0.44, dist, 0.01);
}
@Test
public void longestCommonSubsequenceDistance() {
diff --git a/sparql-anything-it/src/test/java/io/github/sparqlanything/it/SandboxTest.java b/sparql-anything-it/src/test/java/io/github/sparqlanything/it/SandboxTest.java
index 01de1b8c..b97fddea 100644
--- a/sparql-anything-it/src/test/java/io/github/sparqlanything/it/SandboxTest.java
+++ b/sparql-anything-it/src/test/java/io/github/sparqlanything/it/SandboxTest.java
@@ -16,6 +16,7 @@
package io.github.sparqlanything.it;
+import info.debatty.java.stringsimilarity.QGram;
import org.apache.jena.graph.Graph;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.graph.Triple;
@@ -35,6 +36,13 @@
public class SandboxTest {
+ @Ignore
+ @Test
+ public void m(){
+ QGram d = new QGram();
+ System.out.println(d.distance("ABCD", "ABCE"));
+ }
+
@Ignore
@Test
public void model(){