diff --git a/.gitignore b/.gitignore
index 3b10967..47e7904 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,3 +17,5 @@ project/plugins/project/
.worksheet
*.iml
.idea
+
+scalastyle-output.xml
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..9374038
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,10 @@
+language: scala
+sudo: false
+cache:
+ directories:
+ - $HOME/.m2
+scala:
+ - 2.11.11
+script:
+ - mvn scalastyle:check
+ - mvn test
\ No newline at end of file
diff --git a/README.md b/README.md
index cf2804e..c2b847f 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
# SANSA Inference Layer
[![Maven Central](https://maven-badges.herokuapp.com/maven-central/net.sansa-stack/sansa-inference-parent_2.11/badge.svg)](https://maven-badges.herokuapp.com/maven-central/net.sansa-stack/sansa-inference-parent_2.11)
-[![Build Status](https://ci.aksw.org/jenkins/job/SANSA%20Inference%20Layer/job/develop/badge/icon)](https://ci.aksw.org/jenkins/job/SANSA%20Inference%20Layer/job/develop/)
+[![Build Status](https://travis-ci.com/SANSA-Stack/SANSA-Inference.svg?branch=develop)](https://travis-ci.com/SANSA-Stack/SANSA-Inference)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
[![Twitter](https://img.shields.io/twitter/follow/SANSA_Stack.svg?style=social)](https://twitter.com/SANSA_Stack)
@@ -17,14 +17,15 @@
- [Setup](#setup)
- [Prerequisites](#prerequisites)
- [From source](#from-source)
- - [Using Maven pre-build artifacts](#)
+ - [Using Maven pre-build artifacts](#using-maven-pre-build-artifacts)
- [Using SBT](#using-SBT)
- [Usage](#usage)
- [Example](#example)
- - [Supported Reasoning Profiles](#)
+ - [Supported Reasoning Profiles](#supported-reasoning-profiles)
- [RDFS](#rdfs)
- [RDFS Simple](#rdfs-simple)
- [OWL Horst](#owl-horst)
+ - [How to Contribute](#how-to-contribute)
## Structure
@@ -216,3 +217,7 @@ OWL Horst is a fragment of OWL and was proposed by Herman ter Horst [1] defining
[1] Herman J. ter Horst:
*Completeness, decidability and complexity of entailment for RDF Schema and a semantic extension involving the OWL vocabulary.* J. Web Sem. 3(2-3): 79-115 (2005)
+
+## How to Contribute
+We always welcome new contributors to the project! Please see [our contribution guide](http://sansa-stack.net/contributing-to-sansa/) for more details on how to get started contributing to SANSA.
+
diff --git a/pom.xml b/pom.xml
index 99a389d..7cd5a03 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
4.0.0
net.sansa-stack
sansa-inference-parent_2.11
- 0.3.0
+ 0.4.0
pom
Inference API - Parent
@@ -66,12 +66,12 @@
UTF-8
UTF-8
- 2.11.11
+ 2.11.12
2.11
- 2.2.1
- 1.3.2
- 3.5.0
- 0.3.0
+ 2.3.1
+ 1.5.0
+ 3.7.0
+ 0.4.0
${sansa.stack.version}
${sansa.stack.version}
${sansa.stack.version}
@@ -81,7 +81,8 @@
512m
512m
AKSW
- 5.1.3
+ 5.1.5
+ ${project.basedir}/scalastyle-config.xml
@@ -93,8 +94,17 @@
${project.groupId}
- sansa-rdf-spark-core
+ sansa-rdf-spark_${scala.binary.version}
${sansa.rdf.version}
+
+
+
+ net.jpountz.lz4
+ lz4
+
+
${project.groupId}
@@ -148,6 +158,16 @@
spark-sql_${scala.binary.version}
${spark.version}
+
+ org.apache.spark
+ spark-streaming_${scala.binary.version}
+ ${spark.version}
+
+
+ org.apache.spark
+ spark-streaming-kafka-0-10_${scala.binary.version}
+ ${spark.version}
+
@@ -177,6 +197,16 @@
jena-arq
${jena.version}
+
+ org.apache.jena
+ jena-tdb
+ ${jena.version}
+
+
+ org.apache.jena
+ jena-cmds
+ ${jena.version}
+
@@ -202,24 +232,29 @@
- com.assembla.scala-incubator
+ org.scala-graph
graph-core_${scala.binary.version}
- 1.10.0
+ 1.12.5
- com.assembla.scala-incubator
+ org.scala-graph
graph-dot_${scala.binary.version}
- 1.9.0
+ 1.11.5
org.jgrapht
jgrapht-core
- 1.1.0
+ 1.2.0
+
+
+ org.jgrapht
+ jgrapht-io
+ 1.2.0
org.jgrapht
jgrapht-ext
- 1.1.0
+ 1.2.0
org.gephi
@@ -231,7 +266,7 @@
org.apache.calcite
calcite-core
- 1.13.0
+ 1.16.0
@@ -250,13 +285,13 @@
org.specs2
specs2-core_${scala.binary.version}
- 4.0.2
+ 4.2.0
test
org.specs2
specs2-junit_${scala.binary.version}
- 4.0.2
+ 4.2.0
test
@@ -264,7 +299,7 @@
com.typesafe.scala-logging
scala-logging_${scala.binary.version}
- 3.7.2
+ 3.9.0
@@ -278,7 +313,7 @@
com.chuusai
shapeless_${scala.binary.version}
- 2.3.2
+ 2.3.3
@@ -292,7 +327,7 @@
com.typesafe
config
- 1.3.2
+ 1.3.3
@@ -520,30 +555,33 @@
com.versioneye
versioneye-maven-plugin
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+ org.scalastyle
+ scalastyle-maven-plugin
+ 1.0.0
+
+ false
+ true
+ true
+ false
+ ${project.basedir}/src/main/scala
+ ${project.basedir}/src/test/scala
+
+ ${scalastyle.config.path}
+ ${project.basedir}/scalastyle-output.xml
+ UTF-8
+
+
+
+
+ check
+
+
+
+
+
@@ -668,7 +706,8 @@
- ossrh
+
+ release
ossrh
@@ -786,5 +825,18 @@
+
+
+
+ root-dir
+
+
+ ${project.basedir}/../../scalastyle-config.xml
+
+
+
+ ${project.basedir}/../scalastyle-config.xml
+
+
diff --git a/sansa-inference-common/pom.xml b/sansa-inference-common/pom.xml
index cc789a9..60a4610 100644
--- a/sansa-inference-common/pom.xml
+++ b/sansa-inference-common/pom.xml
@@ -4,12 +4,12 @@
sansa-inference-parent_2.11
net.sansa-stack
- 0.3.0
+ 0.4.0
../pom.xml
net.sansa-stack
sansa-inference-common_${scala.binary.version}
- 0.3.0
+ 0.4.0
Inference API - Common
A set of common objects used in the Inference API
@@ -31,21 +31,19 @@
org.apache.jena
jena-tdb
- 3.5.0
org.apache.jena
jena-cmds
- 3.5.0
- com.assembla.scala-incubator
+ org.scala-graph
graph-core_${scala.binary.version}
- com.assembla.scala-incubator
+ org.scala-graph
graph-dot_${scala.binary.version}
@@ -56,6 +54,10 @@
org.jgrapht
jgrapht-ext
+
+ org.jgrapht
+ jgrapht-io
+
org.gephi
gephi-toolkit
@@ -65,6 +67,7 @@
google-collections
+ compile
@@ -84,6 +87,11 @@
3.5.0
+
+
+ com.github.scopt
+ scopt_${scala.binary.version}
+
diff --git a/sansa-inference-common/src/main/resources/log4j.properties b/sansa-inference-common/src/main/resources/log4j.properties
index 0caae7a..dae125a 100644
--- a/sansa-inference-common/src/main/resources/log4j.properties
+++ b/sansa-inference-common/src/main/resources/log4j.properties
@@ -1,5 +1,5 @@
# Root logger option
-log4j.rootLogger=INFO, stdout
+log4j.rootLogger=DEBUG, stdout
# Direct log messages to a log file
log4j.appender.file=org.apache.log4j.RollingFileAppender
@@ -20,3 +20,5 @@ log4j.logger.akka.remote.Remoting=ERROR
log4j.logger.org.apache.hadoop=ERROR
log4j.logger.org.apache.calcite=ERROR
+
+log4j.logger.scalax.collection.connectivity.GraphComponents=OFF
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/JenaOps.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/JenaOps.scala
index f7b5b47..ca33a96 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/JenaOps.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/JenaOps.scala
@@ -1,10 +1,10 @@
package net.sansa_stack.inference.data
+import scala.collection.JavaConverters._
+
import org.apache.jena.datatypes.{BaseDatatype, RDFDatatype, TypeMapper}
import org.apache.jena.graph.{Graph => JenaGraph, Node => JenaNode, Triple => JenaTriple, _}
-import org.apache.jena.rdf.model.{Literal => JenaLiteral, Seq => _}
-
-import scala.collection.JavaConverters._
+import org.apache.jena.rdf.model.{Seq => _}
class JenaOps extends RDFOps[Jena] {
@@ -33,10 +33,12 @@ class JenaOps extends RDFOps[Jena] {
val s = t.getSubject
val p = t.getPredicate
val o = t.getObject
- if (p.isInstanceOf[Jena#URI])
- (s, p.asInstanceOf[Jena#URI], o)
- else
- throw new RuntimeException("fromTriple: predicate " + p.toString + " must be a URI")
+ p match {
+ case uri: Node_URI =>
+ (s, uri, o)
+ case _ =>
+ throw new RuntimeException("fromTriple: predicate " + p.toString + " must be a URI")
+ }
}
// node
@@ -52,10 +54,11 @@ class JenaOps extends RDFOps[Jena] {
def makeUri(iriStr: String): Jena#URI = { NodeFactory.createURI(iriStr).asInstanceOf[Node_URI] }
def fromUri(node: Jena#URI): String =
- if (node.isURI)
+ if (node.isURI) {
node.getURI
- else
+ } else {
throw new RuntimeException("fromUri: " + node.toString() + " must be a URI")
+ }
// bnode
@@ -67,17 +70,18 @@ class JenaOps extends RDFOps[Jena] {
}
def fromBNode(bn: Jena#BNode): String =
- if (bn.isBlank)
+ if (bn.isBlank) {
bn.getBlankNodeId.getLabelString
- else
+ } else {
throw new RuntimeException("fromBNode: " + bn.toString + " must be a BNode")
+ }
// literal
// TODO the javadoc doesn't say if this is thread safe
lazy val mapper = TypeMapper.getInstance
- def jenaDatatype(datatype: Jena#URI) = {
+ private def jenaDatatype(datatype: Jena#URI) = {
val iriString = fromUri(datatype)
val typ = mapper.getTypeByName(iriString)
if (typ == null) {
@@ -94,10 +98,11 @@ class JenaOps extends RDFOps[Jena] {
val __rdfLangStringURI: Jena#URI = makeUri("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
def makeLiteral(lexicalForm: String, datatype: Jena#URI): Jena#Literal =
- if (datatype == __xsdStringURI)
+ if (datatype == __xsdStringURI) {
NodeFactory.createLiteral(lexicalForm, null, null).asInstanceOf[Node_Literal]
- else
+ } else {
NodeFactory.createLiteral(lexicalForm, null, jenaDatatype(datatype)).asInstanceOf[Node_Literal]
+ }
def makeLangTaggedLiteral(lexicalForm: String, lang: Jena#Lang): Jena#Literal =
NodeFactory.createLiteral(lexicalForm, fromLang(lang), null).asInstanceOf[Node_Literal]
@@ -105,9 +110,9 @@ class JenaOps extends RDFOps[Jena] {
// lang
- def makeLang(langString: String) = langString
+ def makeLang(langString: String): String = langString
- def fromLang(lang: Jena#Lang) = lang
+ def fromLang(lang: Jena#Lang): String = lang
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/RDF.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/RDF.scala
index c520929..244fdb9 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/RDF.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/RDF.scala
@@ -39,4 +39,4 @@ trait RDF {
// types for the graph traversal API
type NodeMatch
type NodeAny <: NodeMatch
-}
\ No newline at end of file
+}
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/RDFTuple.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/RDFTuple.scala
index 4be2f51..e5db778 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/RDFTuple.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/RDFTuple.scala
@@ -8,6 +8,7 @@ package net.sansa_stack.inference.data
* @author Lorenz Buehmann
*/
case class RDFTuple(s: String, o: String) extends Product2[String, String] {
- override def _1: String = s
- override def _2: String = o
- }
\ No newline at end of file
+ override def _1: String = s
+
+ override def _2: String = o
+}
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/SimpleRDFOps.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/SimpleRDFOps.scala
index 0db5882..2d78419 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/SimpleRDFOps.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/data/SimpleRDFOps.scala
@@ -17,10 +17,12 @@ class SimpleRDFOps extends RDFOps[SimpleRDF] {
val s = t.s
val p = t.p
val o = t.o
- if (p.isInstanceOf[SimpleRDF#URI])
- (s, p.asInstanceOf[SimpleRDF#URI], o)
- else
- throw new RuntimeException("fromTriple: predicate " + p.toString + " must be a URI")
+ p match {
+ case uri: String =>
+ (s, uri, o)
+ case _ =>
+ throw new RuntimeException("fromTriple: predicate " + p.toString + " must be a URI")
+ }
}
// node
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/RuleDependencyGraphAnalyzer.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/RuleDependencyGraphAnalyzer.scala
index 1a43d47..e1f6552 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/RuleDependencyGraphAnalyzer.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/RuleDependencyGraphAnalyzer.scala
@@ -137,8 +137,8 @@ object RuleDependencyGraphAnalyzer extends Logging{
def main(args: Array[String]) {
// we re-use the JENA API for parsing rules
val filenames = List(
-// "rules/rdfs-simple.rules"
- "rules/owl_horst.rules"
+ "rules/rdfs-simple.rules"
+// "rules/owl_horst.rules"
// "rules/owl_rl.rules"
)
@@ -154,7 +154,7 @@ object RuleDependencyGraphAnalyzer extends Logging{
// print each rule as graph
rules.foreach { r =>
- val g = RuleUtils.asGraph(r).export(new File(graphDir, r.getName + ".graphml").toString)
+ RuleUtils.asGraph(r).export(new File(graphDir, r.getName + ".graphml").toString)
}
// generate graph
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/RuleDependencyGraphGenerator.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/RuleDependencyGraphGenerator.scala
index 8f16335..ee4de84 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/RuleDependencyGraphGenerator.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/RuleDependencyGraphGenerator.scala
@@ -5,6 +5,7 @@ import java.util.stream.Collectors
import scala.collection
import scala.collection.JavaConverters._
import scala.language.{existentials, implicitConversions}
+
import scalax.collection.GraphPredef._
import scalax.collection.GraphTraversal.Parameters
import scalax.collection._
@@ -13,7 +14,6 @@ import scalax.collection.edge._
import scalax.collection.mutable.DefaultGraphImpl
import scalax.collection.GraphPredef._
import scalax.collection.GraphEdge._
-
import org.apache.jena.graph.{Node, NodeFactory}
import org.apache.jena.reasoner.TriplePattern
import org.apache.jena.reasoner.rulesys.Rule
@@ -259,12 +259,12 @@ object RuleDependencyGraphGenerator extends Logging {
pairsOfRules :+= (cycle.last, cycle(0))
// map to list of edges
- val edges: Buffer[graph.EdgeT] = pairsOfRules.map(e => {
+ val edges: Buffer[graph.EdgeT] = pairsOfRules.flatMap(e => {
val node1 = graph get e._1
val node2 = graph get e._2
node1.outgoing.filter(_.target == node2)
- }).flatten
+ })
debug("Edges: " + edges.mkString(", "))
// map to edge labels, i.e. the predicates
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/minimizer/MinimizationRule.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/minimizer/MinimizationRule.scala
index 8b15776..362a8df 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/minimizer/MinimizationRule.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/minimizer/MinimizationRule.scala
@@ -18,4 +18,4 @@ abstract class MinimizationRule extends Logging {
def apply(graph: RuleDependencyGraph): RuleDependencyGraph
-}
\ No newline at end of file
+}
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/minimizer/RuleDependencyGraphMinimizer.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/minimizer/RuleDependencyGraphMinimizer.scala
index c93103a..4d50e6d 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/minimizer/RuleDependencyGraphMinimizer.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/minimizer/RuleDependencyGraphMinimizer.scala
@@ -1,10 +1,12 @@
package net.sansa_stack.inference.rules.minimizer
import scala.collection.mutable.{ArrayBuffer, Buffer}
+
import scalax.collection.Graph
import scalax.collection.edge.LDiEdge
import scala.collection.JavaConverters._
-import scalax.collection.GraphTraversal.Parameters
+import scala.collection.mutable
+import scalax.collection.GraphTraversal.Parameters
import org.apache.jena.graph.{Node, NodeFactory}
import org.apache.jena.reasoner.TriplePattern
import org.apache.jena.reasoner.rulesys.Rule
@@ -16,7 +18,6 @@ import net.sansa_stack.inference.rules.RuleDependencyGraphGenerator.{asString, d
import net.sansa_stack.inference.utils.{GraphUtils, RuleUtils}
import net.sansa_stack.inference.utils.graph.LabeledEdge
import net.sansa_stack.inference.utils.RuleUtils._
-
import scalax.collection.GraphTraversal.Parameters
import scalax.collection._
import scalax.collection.edge.Implicits._
@@ -294,11 +295,13 @@ abstract class RuleDependencyGraphMinimizer extends MinimizationRuleExecutor {
// debug(cycles.asScala.mkString(","))
// cycles that contain the current node
- val cyclesWithNode: Buffer[Buffer[Rule]] = allCycles.asScala.filter(cycle => cycle.contains(node.value)).map(cycle => cycle.asScala)
+ val cyclesWithNode: mutable.Buffer[mutable.Buffer[Rule]] = allCycles.asScala
+ .filter(cycle => cycle.contains(node.value))
+ .map(cycle => cycle.asScala)
debug("Cycles: " + cyclesWithNode.map(c => c.map(r => r.getName)).mkString(","))
// cycles that use the same property
- val cyclesWithNodeSameProp: Map[Node, scala.List[Buffer[graph.EdgeT]]] = cyclesWithNode.map(cycle => {
+ val cyclesWithNodeSameProp: Map[Node, scala.List[mutable.Buffer[graph.EdgeT]]] = cyclesWithNode.map(cycle => {
debug("Cycle: " + cycle.map(r => r.getName).mkString(", "))
@@ -307,12 +310,12 @@ abstract class RuleDependencyGraphMinimizer extends MinimizationRuleExecutor {
pairsOfRules :+= (cycle.last, cycle(0))
// map to list of edges
- val edges: Buffer[graph.EdgeT] = pairsOfRules.map(e => {
+ val edges: mutable.Buffer[graph.EdgeT] = pairsOfRules.flatMap(e => {
val node1 = graph get e._1
val node2 = graph get e._2
node1.outgoing.filter(_.target == node2)
- }).flatten
+ })
debug("Edges: " + edges.mkString(", "))
// map to edge labels, i.e. the predicates
@@ -325,9 +328,14 @@ abstract class RuleDependencyGraphMinimizer extends MinimizationRuleExecutor {
if (samePred) Some(predicates(0), edges) else None
}).filter(_.isDefined).map(_.get).groupBy(e => e._1).mapValues(e => e.map(x => x._2).toList)
- var removedCycles: collection.mutable.Set[Buffer[graph.EdgeT]] = collection.mutable.Set()
+ var removedCycles: collection.mutable.Set[mutable.Buffer[graph.EdgeT]] = collection.mutable.Set()
- val tmp: Map[Node, Map[Int, List[Buffer[graph.EdgeT]]]] = cyclesWithNodeSameProp.mapValues(value => value.map(cycle => (cycle.size, cycle)).groupBy(_._1).mapValues(e => e.map(x => x._2).toList))
+ val tmp: Map[Node, Map[Int, List[mutable.Buffer[graph.EdgeT]]]] =
+ cyclesWithNodeSameProp
+ .mapValues(value =>
+ value.map(cycle => (cycle.size, cycle))
+ .groupBy(_._1)
+ .mapValues(e => e.map(x => x._2)))
tmp.foreach(predicate2Cycles => {
debug("predicate: " + predicate2Cycles._1)
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/SimpleCalciteConnection.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/SimpleCalciteConnection.scala
index 3e246e0..acae963 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/SimpleCalciteConnection.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/SimpleCalciteConnection.scala
@@ -8,7 +8,7 @@ import java.util.concurrent.Executor
import org.apache.calcite.adapter.java.JavaTypeFactory
import org.apache.calcite.config.CalciteConnectionConfig
-import org.apache.calcite.jdbc.CalciteConnection
+import org.apache.calcite.jdbc.{CalciteConnection, CalcitePrepare}
import org.apache.calcite.linq4j.tree.Expression
import org.apache.calcite.linq4j.{Enumerator, Queryable}
import org.apache.calcite.schema.SchemaPlus
@@ -150,4 +150,6 @@ class SimpleCalciteConnection extends CalciteConnection{
override def execute[T](expression: Expression, `type`: Type): T = null.asInstanceOf[T]
override def executeQuery[T](queryable: Queryable[T]): Enumerator[T] = null
+
+ override def createPrepareContext(): CalcitePrepare.Context = null
}
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/SimplePlanGenerator.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/SimplePlanGenerator.scala
index b35fbe8..75e1a26 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/SimplePlanGenerator.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/SimplePlanGenerator.scala
@@ -1,30 +1,20 @@
package net.sansa_stack.inference.rules.plan
-import java.io.PrintWriter
-import java.util.Collections
+import scala.collection.JavaConverters._
+import scala.util.Try
-import com.google.common.collect.ImmutableList
import org.apache.calcite.config.Lex
+import org.apache.calcite.interpreter.{BindableConvention, Bindables}
import org.apache.calcite.plan.{RelOptUtil, _}
+import org.apache.calcite.rel.`type`.RelDataTypeSystem
+import org.apache.calcite.rel.rules._
import org.apache.calcite.rel.{RelCollationTraitDef, RelNode}
import org.apache.calcite.schema.SchemaPlus
import org.apache.calcite.sql.parser.SqlParser
import org.apache.calcite.tools._
-import collection.JavaConverters._
-import scala.util.Try
-
-import org.apache.calcite.rel.`type`.RelDataTypeSystem
-import org.apache.calcite.rel.externalize.RelWriterImpl
-import org.apache.calcite.rel.rules._
import org.apache.jena.reasoner.rulesys.Rule
import net.sansa_stack.inference.utils.{Logging, RuleUtils}
-import org.apache.calcite.adapter.enumerable.{EnumerableConvention, EnumerableRules}
-import org.apache.calcite.interpreter.{BindableConvention, Bindables}
-import org.apache.calcite.plan.RelOptPlanner.CannotPlanException
-import org.apache.calcite.plan.hep.{HepMatchOrder, HepPlanner, HepProgramBuilder}
-import org.apache.calcite.plan.volcano.VolcanoPlanner
-import org.apache.calcite.sql2rel.{RelDecorrelator, SqlToRelConverter}
/**
* @author Lorenz Buehmann
@@ -37,7 +27,7 @@ class SimplePlanGenerator(schema: SchemaPlus) extends Logging {
BindableConvention.INSTANCE.getTraitDef
)
- val optRuleSet = RuleSets.ofList(
+ val optRuleSet: RuleSet = RuleSets.ofList(
FilterJoinRule.FILTER_ON_JOIN,// push a filter into a join
FilterJoinRule.JOIN,// push filter into the children of a join
ProjectJoinTransposeRule.INSTANCE// push a projection to the children of a join
@@ -68,13 +58,13 @@ class SimplePlanGenerator(schema: SchemaPlus) extends Logging {
// // Context provides a way to store data within the planner session that can be accessed in planner rules.
// .context(Contexts.EMPTY_CONTEXT)
// // Rule sets to use in transformation phases. Each transformation phase can use a different set of rules.
-//// .ruleSets(optRuleSet)
+// // .ruleSets(optRuleSet)
// .ruleSets(RuleSets.ofList(Bindables.BINDABLE_TABLE_SCAN_RULE, Bindables.BINDABLE_PROJECT_RULE, Bindables.BINDABLE_JOIN_RULE, Bindables.BINDABLE_FILTER_RULE, FilterJoinRule.FILTER_ON_JOIN))
// .programs(Programs.ofRules(Bindables.BINDABLE_TABLE_SCAN_RULE, Bindables.BINDABLE_PROJECT_RULE, Bindables.BINDABLE_JOIN_RULE, Bindables.BINDABLE_FILTER_RULE, FilterJoinRule.FILTER_ON_JOIN))
//
// // Custom cost factory to use during optimization
// .costFactory(null)
-//// .programs(program)
+// // .programs(program)
// .typeSystem(RelDataTypeSystem.DEFAULT)
// .build()
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/SimpleRelBuilder.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/SimpleRelBuilder.scala
index fc3f415..4b24037 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/SimpleRelBuilder.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/SimpleRelBuilder.scala
@@ -1,10 +1,14 @@
package net.sansa_stack.inference.rules.plan
+import java.util.Properties
+
+import org.apache.calcite.config.{CalciteConnectionConfig, CalciteConnectionConfigImpl, CalciteConnectionProperty}
import org.apache.calcite.jdbc.CalciteSchema
import org.apache.calcite.plan.{Context, RelOptCluster, RelOptPlanner, RelOptSchema}
import org.apache.calcite.prepare.CalciteCatalogReader
import org.apache.calcite.rex.RexBuilder
import org.apache.calcite.schema.SchemaPlus
+import org.apache.calcite.sql.parser.SqlParser
import org.apache.calcite.tools.Frameworks.PlannerAction
import org.apache.calcite.tools.{FrameworkConfig, Frameworks, RelBuilder}
@@ -56,11 +60,18 @@ object SimpleRelBuilder {
val calciteSchema = CalciteSchema.from(config.getDefaultSchema)
val relOptSchema = new CalciteCatalogReader(
calciteSchema,
- config.getParserConfig.caseSensitive(),
defaultRelOptSchema.getSchemaPaths.get(0),
- typeFactory)
+ typeFactory,
+ connectionConfig(config.getParserConfig))
new SimpleRelBuilder(config.getContext, cluster, relOptSchema)
}
+ def connectionConfig(parserConfig : SqlParser.Config): CalciteConnectionConfig = {
+ val prop = new Properties()
+ prop.setProperty(CalciteConnectionProperty.CASE_SENSITIVE.camelName,
+ String.valueOf(parserConfig.caseSensitive))
+ new CalciteConnectionConfigImpl(prop)
+ }
+
}
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/TriplesTableFactory.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/TriplesTableFactory.scala
index 330e0bd..663f504 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/TriplesTableFactory.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/rules/plan/TriplesTableFactory.scala
@@ -5,11 +5,13 @@ import java.util
import scala.collection.JavaConverters._
import org.apache.calcite.DataContext
+import org.apache.calcite.config.CalciteConnectionConfig
import org.apache.calcite.linq4j.{Enumerable, Linq4j}
import org.apache.calcite.rel.`type`.{RelDataType, RelDataTypeFactory, RelProtoDataType}
import org.apache.calcite.rex.RexNode
import org.apache.calcite.schema.Schema.TableType
import org.apache.calcite.schema._
+import org.apache.calcite.sql.{SqlCall, SqlNode}
import org.apache.calcite.sql.`type`.SqlTypeName
/**
@@ -53,6 +55,10 @@ class TriplesTableFactory extends TableFactory[Table] {
override def getRowType(typeFactory: RelDataTypeFactory): RelDataType = protoRowType.apply(typeFactory)
+ override def isRolledUp(s: String): Boolean = false
+ override def rolledUpColumnValidInsideAgg(s: String, sqlCall: SqlCall, sqlNode: SqlNode,
+ calciteConnectionConfig: CalciteConnectionConfig): Boolean = false
}
+
}
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/GraphUtils.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/GraphUtils.scala
index 0d398db..fbea2fb 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/GraphUtils.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/GraphUtils.scala
@@ -1,14 +1,14 @@
package net.sansa_stack.inference.utils
import java.io.{ByteArrayOutputStream, File, FileOutputStream, FileWriter}
-
-import scalax.collection.edge.LDiEdge
+import java.util
import com.itextpdf.text.PageSize
import org.apache.jena.graph.Node
import org.apache.jena.reasoner.TriplePattern
import org.apache.jena.reasoner.rulesys.Rule
import org.apache.jena.shared.PrefixMapping
+import org.apache.jena.sparql.util.FmtUtils
import org.gephi.graph.api.GraphController
import org.gephi.io.exporter.api.ExportController
import org.gephi.io.exporter.preview.PDFExporter
@@ -19,13 +19,15 @@ import org.gephi.layout.plugin.force.yifanHu.YifanHuLayout
import org.gephi.preview.api.{Item, PreviewController, PreviewProperty}
import org.gephi.preview.types.EdgeColor
import org.gephi.project.api.ProjectController
-import org.jgrapht.DirectedGraph
+import org.jgrapht.Graph
import org.jgrapht.alg.isomorphism.VF2GraphIsomorphismInspector
-import org.jgrapht.ext._
-import org.jgrapht.graph._
+import org.jgrapht.graph.{DefaultDirectedGraph, DirectedPseudograph}
+import org.jgrapht.io.GraphMLExporter.AttributeCategory
+import org.jgrapht.io._
import org.openide.util.Lookup
+import scalax.collection.edge.LDiEdge
-import net.sansa_stack.inference.utils.graph.{EdgeEquivalenceComparator, LabeledEdge, NodeEquivalenceComparator};
+import net.sansa_stack.inference.utils.graph.{EdgeEquivalenceComparator, LabeledEdge, NodeEquivalenceComparator}
/**
* @author Lorenz Buehmann
@@ -52,8 +54,8 @@ object GraphUtils {
* @param graph the 'Graph for Scala' graph
* @return the JGraphT graph
*/
- def asJGraphtRuleGraph(graph: scalax.collection.mutable.Graph[Node, LDiEdge]): DirectedGraph[Node, LabeledEdge[Node, String]] = {
- val g: DirectedGraph[Node, LabeledEdge[Node, String]] = new DefaultDirectedGraph[Node, LabeledEdge[Node, String]](classOf[LabeledEdge[Node, String]])
+ def asJGraphtRuleGraph(graph: scalax.collection.mutable.Graph[Node, LDiEdge]): Graph[Node, LabeledEdge[Node, String]] = {
+ val g: Graph[Node, LabeledEdge[Node, String]] = new DefaultDirectedGraph[Node, LabeledEdge[Node, String]](classOf[LabeledEdge[Node, String]])
val edges = graph.edges.toList
@@ -89,7 +91,8 @@ object GraphUtils {
* @param graph the 'Graph for Scala' graph
* @return the JGraphT graph
*/
- def asJGraphtRuleSetGraph(graph: scalax.collection.mutable.Graph[Rule, LDiEdge]): DirectedGraph[Rule, LabeledEdge[Rule, TriplePattern]] = {
+ def asJGraphtRuleSetGraph(graph: scalax.collection.mutable.Graph[Rule, LDiEdge],
+ showInFlowDirection: Boolean = false): Graph[Rule, LabeledEdge[Rule, TriplePattern]] = {
val g = new DefaultDirectedGraph[Rule, LabeledEdge[Rule, TriplePattern]](classOf[LabeledEdge[Rule, TriplePattern]])
val edges = graph.edges.toList
@@ -102,7 +105,11 @@ object GraphUtils {
val label = e.label.asInstanceOf[TriplePattern]
- g.addEdge(s, t, LabeledEdge[Rule, TriplePattern](s, t, label))
+ if (showInFlowDirection) {
+ g.addEdge(t, s, LabeledEdge[Rule, TriplePattern](t, s, label))
+ } else {
+ g.addEdge(s, t, LabeledEdge[Rule, TriplePattern](s, t, label))
+ }
}
@@ -117,9 +124,10 @@ object GraphUtils {
*
* @param filename the target file
*/
- def export(filename: String, showInFlowDirection: Boolean = false): Unit = {
+ def export(filename: String, showInFlowDirection: Boolean = false,
+ prefixMapping: PrefixMapping = PrefixMapping.Standard): Unit = {
- val g: DirectedGraph[Rule, LabeledEdge[Rule, TriplePattern]] = asJGraphtRuleSetGraph(graph)
+ val g: Graph[Rule, LabeledEdge[Rule, TriplePattern]] = asJGraphtRuleSetGraph(graph, showInFlowDirection)
// In order to be able to export edge and node labels and IDs,
// we must implement providers for them
@@ -138,17 +146,38 @@ object GraphUtils {
}
val edgeLabelProvider = new ComponentNameProvider[LabeledEdge[Rule, TriplePattern]]() {
- override def getName(e: LabeledEdge[Rule, TriplePattern]): String = e.label.toString
+ override def getName(e: LabeledEdge[Rule, TriplePattern]): String = {
+ val p = e.label.getPredicate
+ // omit if predicate is a variable
+ if(p.isVariable) {
+ ""
+ } else {
+ FmtUtils.stringForNode(e.label.getPredicate, prefixMapping)
+ }
+ }
}
-// val exporter = new GraphMLExporter[String,LabeledEdge](
+ import org.jgrapht.io.DefaultAttribute
+ val ruleDescriptionProvider = new ComponentAttributeProvider[Rule]() {
+ override def getComponentAttributes(r: Rule): util.Map[String, Attribute] = {
+ val map = new util.HashMap[String, Attribute]()
+ map.put("rule", DefaultAttribute.createAttribute(r.toString))
+ map
+ }
+ }
+
+ // val exporter = new GraphMLExporter[String,LabeledEdge](
// vertexIDProvider, vertexNameProvider, edgeIDProvider,edgeLabelProvider)
val exporter = new GraphMLExporter[Rule, LabeledEdge[Rule, TriplePattern]](
new IntegerComponentNameProvider[Rule],
vertexNameProvider,
+ ruleDescriptionProvider,
new IntegerComponentNameProvider[LabeledEdge[Rule, TriplePattern]],
- edgeLabelProvider)
+ edgeLabelProvider,
+ null)
+
+ exporter.registerAttribute("rule", AttributeCategory.NODE, AttributeType.STRING)
val fw = new FileWriter(filename)
@@ -159,12 +188,12 @@ object GraphUtils {
// Gephi
// Init a project - and therefore a workspace
- val pc = Lookup.getDefault().lookup(classOf[ProjectController])
+ val pc = Lookup.getDefault.lookup(classOf[ProjectController])
pc.newProject()
- val workspace = pc.getCurrentWorkspace()
+ val workspace = pc.getCurrentWorkspace
// Get controllers and models
- val importController = Lookup.getDefault().lookup(classOf[ImportController])
+ val importController = Lookup.getDefault.lookup(classOf[ImportController])
// export as GraphML
val tmpFilename = "/tmp/temp-graph.graphml"
@@ -173,8 +202,8 @@ object GraphUtils {
// Import file
val file = new File(tmpFilename)
val container = importController.importFile(file)
- container.getLoader().setEdgeDefault(EdgeDirectionDefault.DIRECTED) // Force DIRECTED
- container.getLoader().setAllowAutoNode(false) // Don't create missing nodes
+ container.getLoader.setEdgeDefault(EdgeDirectionDefault.DIRECTED) // Force DIRECTED
+ container.getLoader.setAllowAutoNode(false) // Don't create missing nodes
// Append imported data to GraphAPI
importController.process(container, new DefaultProcessor(), workspace)
@@ -184,7 +213,7 @@ object GraphUtils {
// See if graph is well imported
- val graphModel = Lookup.getDefault().lookup(classOf[GraphController]).getGraphModel
+ val graphModel = Lookup.getDefault.lookup(classOf[GraphController]).getGraphModel
val g = graphModel.getDirectedGraph()
// Run YifanHuLayout for 100 passes - The layout always takes the current visible view
@@ -194,23 +223,23 @@ object GraphUtils {
layout.setOptimalDistance(200f)
layout.initAlgo()
- for (i <- 0 to 100 if layout.canAlgo()) {
+ for (i <- 0 to 100 if layout.canAlgo) {
layout.goAlgo()
}
layout.endAlgo()
- val model = Lookup.getDefault().lookup(classOf[PreviewController]).getModel()
- model.getProperties().putValue(PreviewProperty.SHOW_NODE_LABELS, true)
- model.getProperties().putValue(PreviewProperty.SHOW_EDGE_LABELS, true)
- model.getProperties().putValue(PreviewProperty.EDGE_CURVED, false)
- model.getProperties().putValue(PreviewProperty.EDGE_COLOR, new EdgeColor(java.awt.Color.GRAY))
- model.getProperties().putValue(PreviewProperty.EDGE_THICKNESS, 0.1f)
- model.getProperties().putValue(PreviewProperty.NODE_LABEL_FONT, model.getProperties().getFontValue(PreviewProperty.NODE_LABEL_FONT).deriveFont(8))
+ val model = Lookup.getDefault.lookup(classOf[PreviewController]).getModel()
+ model.getProperties.putValue(PreviewProperty.SHOW_NODE_LABELS, true)
+ model.getProperties.putValue(PreviewProperty.SHOW_EDGE_LABELS, true)
+ model.getProperties.putValue(PreviewProperty.EDGE_CURVED, false)
+ model.getProperties.putValue(PreviewProperty.EDGE_COLOR, new EdgeColor(java.awt.Color.GRAY))
+ model.getProperties.putValue(PreviewProperty.EDGE_THICKNESS, 0.1f)
+ model.getProperties.putValue(PreviewProperty.NODE_LABEL_FONT, model.getProperties.getFontValue(PreviewProperty.NODE_LABEL_FONT).deriveFont(8))
model.getProperties.putValue(Item.NODE_LABEL, "Vertex Label")
// Export full graph
- val ec = Lookup.getDefault().lookup(classOf[ExportController])
+ val ec = Lookup.getDefault.lookup(classOf[ExportController])
// ec.exportFile(new File("io_gexf.gexf"));
// PDF Exporter config and export to Byte array
@@ -219,7 +248,7 @@ object GraphUtils {
pdfExporter.setWorkspace(workspace)
val baos = new ByteArrayOutputStream()
ec.exportStream(baos, pdfExporter)
- new FileOutputStream(filename + ".pdf").write(baos.toByteArray())
+ new FileOutputStream(filename + ".pdf").write(baos.toByteArray)
}
}
@@ -302,7 +331,7 @@ object GraphUtils {
*/
def export(filename: String): Unit = {
- val g: DirectedGraph[Node, LabeledEdge[Node, Node]] = new DirectedPseudograph[Node, LabeledEdge[Node, Node]](classOf[LabeledEdge[Node, Node]])
+ val g: Graph[Node, LabeledEdge[Node, Node]] = new DirectedPseudograph[Node, LabeledEdge[Node, Node]](classOf[LabeledEdge[Node, Node]])
val edges = graph.edges.toList
@@ -312,7 +341,7 @@ object GraphUtils {
val label = e.label.asInstanceOf[Node]
g.addVertex(s)
g.addVertex(t)
- g.addEdge(s, t, new LabeledEdge(s, t, label))
+ g.addEdge(s, t, LabeledEdge(s, t, label))
}
// In order to be able to export edge and node labels and IDs,
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/JenaTripleToNTripleString.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/JenaTripleToNTripleString.scala
index 46d8df6..b5dee64 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/JenaTripleToNTripleString.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/JenaTripleToNTripleString.scala
@@ -9,7 +9,7 @@ import org.apache.jena.graph.Triple
* @author Lorenz Buehmann
*/
class JenaTripleToNTripleString
- extends ((Triple) => String)
+ extends Function[Triple, String]
with java.io.Serializable {
override def apply(t: Triple): String = {
val subStr =
@@ -27,7 +27,7 @@ class JenaTripleToNTripleString
} else {
s"<${t.getObject}>"
}
- s"${subStr} <${t.getPredicate}> ${objStr} ."
+ s"$subStr <${t.getPredicate}> $objStr ."
}
}
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/Logging.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/Logging.scala
index 4b75d32..e3f4ecf 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/Logging.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/Logging.scala
@@ -11,7 +11,7 @@ import scala.language.implicitConversions
*/
trait Logging {
- @transient private var log_ : Logger = null
+ @transient private var log_ : Logger = _
// Method to get or create the logger for this object
protected def log: Logger = {
@@ -22,7 +22,7 @@ trait Logging {
}
// Method to get the logger name for this object
- protected def logName = {
+ protected def logName: String = {
// Ignore trailing $'s in the class names for Scala objects
this.getClass.getName.stripSuffix("$")
}
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/NTriplesStringToJenaTriple.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/NTriplesStringToJenaTriple.scala
index f8a75d2..f46481d 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/NTriplesStringToJenaTriple.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/NTriplesStringToJenaTriple.scala
@@ -11,7 +11,7 @@ import org.apache.jena.riot.{Lang, RDFDataMgr}
* @author Lorenz Buehmann
*/
class NTriplesStringToJenaTriple
- extends Function1[String, Triple]
+ extends Function[String, Triple]
with java.io.Serializable {
override def apply(s: String): Triple = {
RDFDataMgr.createIteratorTriples(new ByteArrayInputStream(s.getBytes), Lang.NTRIPLES, null).next()
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/RDFTripleToNTripleString.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/RDFTripleToNTripleString.scala
index 634b1a8..33e1d1b 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/RDFTripleToNTripleString.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/RDFTripleToNTripleString.scala
@@ -8,7 +8,7 @@ import net.sansa_stack.inference.data.RDFTriple
* @author Lorenz Buehmann
*/
class RDFTripleToNTripleString
- extends Function1[RDFTriple, String]
+ extends Function[RDFTriple, String]
with java.io.Serializable {
override def apply(t: RDFTriple): String = {
val objStr =
@@ -17,6 +17,6 @@ class RDFTripleToNTripleString
} else {
t.o
}
- s"<${t.s}> <${t.p}> ${objStr} ."
+ s"<${t.s}> <${t.p}> $objStr ."
}
}
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/RuleUtils.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/RuleUtils.scala
index 5c0127b..fea7ba2 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/RuleUtils.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/RuleUtils.scala
@@ -226,7 +226,7 @@ object RuleUtils {
// get the path in body graph
val s = (bodyGraph get source).withSubgraph(edges = e => property == null || e.label.equals(property))
- val t = (bodyGraph get target)
+ val t = bodyGraph get target
val path = s pathTo t
@@ -299,7 +299,8 @@ object RuleUtils {
// predicates that are contained in body and head
val intersection = bodyPredicates.intersect(headPredicates)
- ruleType match {
+ // 1. check whether there is an overlap between body and head predicates (might not work)
+ val cyclic = ruleType match {
case TERMINOLOGICAL =>
// check if there is at least one predicate that occurs in body and head
val bodyPredicates = rule.getBody
@@ -331,6 +332,8 @@ object RuleUtils {
}
+ // 2. use JGraphT instead and compute cycles
+
// we generate a graph for the rule (we use a JGraphT graph which provides better cycle detection)
val g = GraphUtils.asJGraphtRuleGraph(asGraph(rule))
@@ -356,9 +359,7 @@ object RuleUtils {
* @param filename the file
* @return a set of rules
*/
- def load(filename: String): Seq[Rule] = {
- Rule.parseRules(org.apache.jena.reasoner.rulesys.Util.loadRuleParserFromResourceFile(filename)).asScala.toSeq
- }
+ def load(filename: String): Seq[Rule] = Rule.parseRules(org.apache.jena.reasoner.rulesys.Util.loadRuleParserFromResourceFile(filename)).asScala
/**
* Returns a rule by the given name from a set of rules.
@@ -423,14 +424,14 @@ object RuleUtils {
}
/**
- * Returns `true` if `rule1 has the same body as `rule2`, otherwise `false` .
+ * Returns `true` if `rule1` has the same body as `rule2`, otherwise `false` .
*/
def sameBody(rule1: Rule, rule2: Rule): Boolean = {
GraphUtils.areIsomorphic(graphOfBody(rule1), graphOfBody(rule2))
}
/**
- * Returns `true` if `rule1 has the same head as `rule2`, otherwise `false`.
+ * Returns `true` if `rule1` has the same head as `rule2`, otherwise `false`.
*/
def sameHead(rule1: Rule, rule2: Rule): Boolean = {
GraphUtils.areIsomorphic(graphOfHead(rule1), graphOfHead(rule2))
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/TriplePatternOrdering.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/TriplePatternOrdering.scala
index a3f023f..39b91bb 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/TriplePatternOrdering.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/TriplePatternOrdering.scala
@@ -9,7 +9,7 @@ import org.apache.jena.sparql.util.NodeComparator
* @author Lorenz Buehmann
*/
class TriplePatternOrdering extends Ordering[TriplePattern]{
- implicit val comp = new NodeComparator
+ implicit val comp: NodeComparator = new NodeComparator
override def compare(x: TriplePattern, y: TriplePattern): Int = {
Ordering.by{t: TriplePattern => (t.getSubject, t.getPredicate, t.getObject)}.compare(x, y)
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/TripleUtils.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/TripleUtils.scala
index cf74b36..a131789 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/TripleUtils.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/TripleUtils.scala
@@ -15,14 +15,14 @@ import org.apache.jena.vocabulary.RDFS._
object TripleUtils {
// set of properties that indicate terminological triples
- val properties = List(
+ val properties: List[Node] = List(
subClassOf, equivalentClass, disjointWith,
intersectionOf, unionOf, complementOf, someValuesFrom, allValuesFrom, hasValue,
maxCardinality, minCardinality, cardinality,
subPropertyOf, equivalentProperty, propertyDisjointWith, domain, range, inverseOf).map(t => t.asNode())
// set of types that indicate terminological triples
- val types = Set(
+ val types: Set[Node] = Set(
ObjectProperty, DatatypeProperty,
FunctionalProperty, InverseFunctionalProperty,
SymmetricProperty, AsymmetricProperty,
@@ -115,7 +115,7 @@ object TripleUtils {
* @return all var nodes of the triple pattern
*/
def vars(): Seq[Node] = {
- nodes.filter(_.isVariable)
+ nodes().filter(_.isVariable)
}
}
diff --git a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/graph/LabeledEdge.scala b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/graph/LabeledEdge.scala
index 7cc0603..adf6925 100644
--- a/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/graph/LabeledEdge.scala
+++ b/sansa-inference-common/src/main/scala/net/sansa_stack/inference/utils/graph/LabeledEdge.scala
@@ -3,6 +3,11 @@ package net.sansa_stack.inference.utils.graph
import org.jgrapht.graph.DefaultEdge
/**
+ * A labeled edge that also keeps trakc of source and target node.
+ *
+ * @param s source node
+ * @param t target node
+ * @param label the label
* @author Lorenz Buehmann
*/
case class LabeledEdge[V, L](s: V, t: V, label: L) extends DefaultEdge {}
diff --git a/sansa-inference-common/src/test/scala/net/sansa_stack/inference/common/DependencyGraphTest.scala b/sansa-inference-common/src/test/scala/net/sansa_stack/inference/common/DependencyGraphTest.scala
index 48ddcc0..38964f7 100644
--- a/sansa-inference-common/src/test/scala/net/sansa_stack/inference/common/DependencyGraphTest.scala
+++ b/sansa-inference-common/src/test/scala/net/sansa_stack/inference/common/DependencyGraphTest.scala
@@ -1,41 +1,95 @@
package net.sansa_stack.inference.common
+import java.nio.file.{Path, Paths}
+
import net.sansa_stack.inference.rules._
import net.sansa_stack.inference.rules.minimizer.DefaultRuleDependencyGraphMinimizer
import net.sansa_stack.inference.utils.GraphUtils._
import net.sansa_stack.inference.utils.RuleUtils
/**
+ * Computes a given set of rules and exports its rule dependency graph before and after minimization.
+ *
+ *
* @author Lorenz Buehmann
*/
object DependencyGraphTest {
+ // the config object
+ case class Config(in: Path = null,
+ out: Path = null,
+ profile: String = "",
+ ruleNames: Seq[String] = Seq()
+ )
+
+ implicit val pathRead: scopt.Read[Path] =
+ scopt.Read.reads(Paths.get(_))
+
+ // the CLI parser
+ val parser = new scopt.OptionParser[Config]("DependencyGraphTest") {
+
+ head("DependencyGraphTest", "0.1.0")
+
+ opt[Path]('i', "input").required().valueName("").
+ action((x, c) => c.copy(in = x)).
+ text("path to file containing the rules")
+
+ opt[Path]('o', "out").required().valueName("").
+ action((x, c) => c.copy(out = x)).
+ text("the output directory")
+
+ opt[String]('p', "profile").required().valueName("").
+ action((x, c) => c.copy(profile = x)).
+ text("the name of the set of rules to process - will be used for output files")
+
+ opt[Seq[String]]("rules").optional().valueName(",,...").
+ action((x, c) => {
+ c.copy(ruleNames = x)
+ }).
+ text("list of rule names to process just a subset of the rules contained in the given input file")
+ }
+
def main(args: Array[String]): Unit = {
- val path = "/tmp"
+ parser.parse(args, Config()) match {
+ case Some(config) =>
+ run(config)
+ case None =>
+ // scalastyle:off println
+ println(parser.usage)
+ // scalastyle:on println
+ }
+ }
+
+ def run(config: Config): Unit = {
+
+ // make output dirs
+ config.out.toFile.mkdirs()
+
+ // load the rules
+ var rules = RuleUtils.load(config.in.toAbsolutePath.toString)
+
+ // filter if necessary
+ if(config.ruleNames.nonEmpty) {
+ rules = rules.filter(r => config.ruleNames.contains(r.getName))
+ }
// val names = Seq("rdfp13a", "rdfp13b", "rdfp13c", "rdfs5", "rdfs7") // property rules
- val names = Seq("rdfp13a", "rdfp13b", "rdfp13c", "rdfs5", "rdfs7", "rdfp3", "rdfp4") // property rules + some instance rules
+ val names = Seq("rdfp13a", "rdfp13b", "rdfp13c")// , "rdfs5", "rdfs7", "rdfp3", "rdfp4") // property rules + some instance rules
// val names = Seq("rdfs5", "rdfs7", "rdfp3", "rdfp4") // property TC rule + some instance rules
- // define the rules
- val rules = RuleSets.OWL_HORST//.filter(r => names.contains(r.getName))
- val profile = ReasoningProfile.OWL_HORST
-// val rules = RuleSets.RDFS_SIMPLE
-// val profile = ReasoningProfile.RDFS_SIMPLE
-
val minimizer = new DefaultRuleDependencyGraphMinimizer()
- // export graphs
- rules.foreach(rule => RuleUtils.asGraph(rule).export(s"${path}/rule-${rule.getName}.graphml"))
+ // export graphs for each rule
+ rules.foreach(rule => RuleUtils.asGraph(rule).export(config.out.resolve(s"rule_${rule.getName}.graphml").toAbsolutePath.toString))
// generate the rule dependency graph
- var dependencyGraph = RuleDependencyGraphGenerator.generate(rules)
- dependencyGraph.export(s"${path}/rdg-${profile}.graphml")
+ var dependencyGraph = RuleDependencyGraphGenerator.generate(rules.toSet)
+ dependencyGraph.export(config.out.resolve(s"rdg_${config.profile}.graphml").toAbsolutePath.toString, showInFlowDirection = true)
+ // generate the minimized graph
dependencyGraph = minimizer.execute(dependencyGraph) // RuleDependencyGraphGenerator.generate(rules, pruned = true)
- dependencyGraph.export(s"${path}/rdg-${profile}-pruned.graphml")
-// dependencyGraph.exportAsPDF(s"${path}/rdg-${profile}-pruned.pdf")
+ dependencyGraph.export(config.out.resolve(s"rdg_${config.profile}_minimized.graphml").toAbsolutePath.toString, showInFlowDirection = true)
// generate the high-level dependency graph
val highLevelDependencyGraph = HighLevelRuleDependencyGraphGenerator.generate(dependencyGraph)
diff --git a/sansa-inference-flink/pom.xml b/sansa-inference-flink/pom.xml
index 4632164..013c9ab 100644
--- a/sansa-inference-flink/pom.xml
+++ b/sansa-inference-flink/pom.xml
@@ -23,12 +23,12 @@ under the License.
net.sansa-stack
sansa-inference-parent_2.11
- 0.3.0
+ 0.4.0
../pom.xml
net.sansa-stack
sansa-inference-flink_${scala.binary.version}
- 0.3.0
+ 0.4.0
Inference API - Flink
Apache Flink based inference layer for RDF and OWL
diff --git a/sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/data/RDFGraph.scala b/sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/data/RDFGraph.scala
index 7960d41..66c64a8 100644
--- a/sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/data/RDFGraph.scala
+++ b/sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/data/RDFGraph.scala
@@ -68,7 +68,7 @@ case class RDFGraph(triples: DataSet[RDFTriple]) {
*
* @return the number of triples
*/
- def size() = {
+ def size(): Long = {
triples.count()
}
}
diff --git a/sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/forwardchaining/ForwardRuleReasonerOWLHorst.scala b/sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/forwardchaining/ForwardRuleReasonerOWLHorst.scala
index e9eafdb..68db1f1 100644
--- a/sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/forwardchaining/ForwardRuleReasonerOWLHorst.scala
+++ b/sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/forwardchaining/ForwardRuleReasonerOWLHorst.scala
@@ -50,9 +50,9 @@ class ForwardRuleReasonerOWLHorst(env: ExecutionEnvironment) extends ForwardRule
.distinct()
// 2. we compute the transitive closure of rdfs:subPropertyOf and rdfs:subClassOf
- // rdfs11: (xxx rdfs:subClassOf yyy), (yyy rdfs:subClassOf zzz) -> (xxx rdfs:subClassOf zzz)
+ // rdfs11: (xxx rdfs:subClassOf yyy), (yyy rdfs:subClassOf zzz) -> (xxx rdfs:subClassOf zzz)
val subClassOfTriplesTrans = computeTransitiveClosure(subClassOfTriples)
- // rdfs5: (xxx rdfs:subPropertyOf yyy), (yyy rdfs:subPropertyOf zzz) -> (xxx rdfs:subPropertyOf zzz)
+ // rdfs5: (xxx rdfs:subPropertyOf yyy), (yyy rdfs:subPropertyOf zzz) -> (xxx rdfs:subPropertyOf zzz)
val subPropertyOfTriplesTrans = computeTransitiveClosure(subPropertyOfTriples)
@@ -159,8 +159,8 @@ class ForwardRuleReasonerOWLHorst(env: ExecutionEnvironment) extends ForwardRule
// 2. SubPropertyOf inheritance according to rdfs7 is computed
/*
- rdfs7 aaa rdfs:subPropertyOf bbb .
- xxx aaa yyy . xxx bbb yyy .
+ rdfs7 aaa rdfs:subPropertyOf bbb .
+ xxx aaa yyy . => xxx bbb yyy .
*/
val triplesRDFS7 =
triplesFiltered
@@ -173,8 +173,8 @@ class ForwardRuleReasonerOWLHorst(env: ExecutionEnvironment) extends ForwardRule
// 3. Domain and Range inheritance according to rdfs2 and rdfs3 is computed
/*
- rdfs2 aaa rdfs:domain xxx .
- yyy aaa zzz . yyy rdf:type xxx .
+ rdfs2 aaa rdfs:domain xxx .
+ yyy aaa zzz . => yyy rdf:type xxx .
*/
val triplesRDFS2 =
rdfs7Res
@@ -182,8 +182,8 @@ class ForwardRuleReasonerOWLHorst(env: ExecutionEnvironment) extends ForwardRule
.map(t => RDFTriple(t.s, RDF.`type`.getURI, domainMap(t.p)))
/*
- rdfs3 aaa rdfs:range xxx .
- yyy aaa zzz . zzz rdf:type xxx .
+ rdfs3 aaa rdfs:range xxx .
+ yyy aaa zzz . => zzz rdf:type xxx .
*/
val triplesRDFS3 =
rdfs7Res
@@ -195,8 +195,8 @@ class ForwardRuleReasonerOWLHorst(env: ExecutionEnvironment) extends ForwardRule
// input are the rdf:type triples from RDFS2/RDFS3 and the ones contained in the original graph
/*
- rdfs9 xxx rdfs:subClassOf yyy .
- zzz rdf:type xxx . zzz rdf:type yyy .
+ rdfs9 xxx rdfs:subClassOf yyy .
+ zzz rdf:type xxx . => zzz rdf:type yyy .
*/
val triplesRDFS9 =
triplesRDFS2
diff --git a/sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/forwardchaining/ForwardRuleReasonerRDFS.scala b/sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/forwardchaining/ForwardRuleReasonerRDFS.scala
index 4d0a418..88d2c95 100644
--- a/sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/forwardchaining/ForwardRuleReasonerRDFS.scala
+++ b/sansa-inference-flink/src/main/scala/net/sansa_stack/inference/flink/forwardchaining/ForwardRuleReasonerRDFS.scala
@@ -60,8 +60,8 @@ class ForwardRuleReasonerRDFS(env: ExecutionEnvironment) extends ForwardRuleReas
computeTransitiveClosureOptSemiNaive(subClassOfTriples).name("rdfs11")
/*
- rdfs5 xxx rdfs:subPropertyOf yyy .
- yyy rdfs:subPropertyOf zzz . xxx rdfs:subPropertyOf zzz .
+ rdfs5 xxx rdfs:subPropertyOf yyy .
+ yyy rdfs:subPropertyOf zzz . => xxx rdfs:subPropertyOf zzz .
*/
val subPropertyOfTriples =
extractTriples(schemaTriples, RDFS.subPropertyOf.getURI)
@@ -77,8 +77,8 @@ class ForwardRuleReasonerRDFS(env: ExecutionEnvironment) extends ForwardRuleReas
// 2. SubPropertyOf inheritance according to rdfs7 is computed
/*
- rdfs7 aaa rdfs:subPropertyOf bbb .
- xxx aaa yyy . xxx bbb yyy .
+ rdfs7 aaa rdfs:subPropertyOf bbb .
+ xxx aaa yyy . => xxx bbb yyy .
*/
val triplesRDFS7 = if (useSchemaBroadCasting) {
otherTriples
@@ -124,8 +124,8 @@ class ForwardRuleReasonerRDFS(env: ExecutionEnvironment) extends ForwardRuleReas
// 3. Domain and Range inheritance according to rdfs2 and rdfs3 is computed
/*
- rdfs2 aaa rdfs:domain xxx .
- yyy aaa zzz . yyy rdf:type xxx .
+ rdfs2 aaa rdfs:domain xxx .
+ yyy aaa zzz . => yyy rdf:type xxx .
*/
val domainTriples =
extractTriples(schemaTriples, RDFS.domain.getURI).name("rdfs:domain")
@@ -168,8 +168,8 @@ class ForwardRuleReasonerRDFS(env: ExecutionEnvironment) extends ForwardRuleReas
}.name("rdfs2")
/*
- rdfs3 aaa rdfs:range xxx .
- yyy aaa zzz . zzz rdf:type xxx .
+ rdfs3 aaa rdfs:range xxx .
+ yyy aaa zzz . => zzz rdf:type xxx .
*/
val rangeTriples =
extractTriples(schemaTriples, RDFS.range.getURI).name("rdfs:range")
@@ -220,8 +220,8 @@ class ForwardRuleReasonerRDFS(env: ExecutionEnvironment) extends ForwardRuleReas
// 4. SubClass inheritance according to rdfs9
/*
- rdfs9 xxx rdfs:subClassOf yyy .
- zzz rdf:type xxx . zzz rdf:type yyy .
+ rdfs9 xxx rdfs:subClassOf yyy .
+ zzz rdf:type xxx . => zzz rdf:type yyy .
*/
val triplesRDFS9 = if (useSchemaBroadCasting) {
typeTriples // all rdf:type triples (s a A)
diff --git a/sansa-inference-flink/src/test/scala/net/sansa_stack/inference/flink/TCTest.scala b/sansa-inference-flink/src/test/scala/net/sansa_stack/inference/flink/TCTest.scala
index 9f0dc01..33c66aa 100644
--- a/sansa-inference-flink/src/test/scala/net/sansa_stack/inference/flink/TCTest.scala
+++ b/sansa-inference-flink/src/test/scala/net/sansa_stack/inference/flink/TCTest.scala
@@ -1,26 +1,25 @@
package net.sansa_stack.inference.flink
+import scala.collection.mutable
+
import org.apache.flink.api.common.functions.RichJoinFunction
import org.apache.flink.api.common.operators.Order
-import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment}
+import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment, _}
import org.apache.flink.core.fs.FileSystem.WriteMode
-import org.apache.flink.test.util.{MultipleProgramsTestBase, TestBaseUtils}
import org.apache.flink.test.util.MultipleProgramsTestBase.TestExecutionMode
+import org.apache.flink.test.util.{MultipleProgramsTestBase, TestBaseUtils}
import org.apache.flink.util.Collector
import org.apache.jena.vocabulary.RDFS
-import org.junit.{After, Before, Rule, Test}
import org.junit.rules.TemporaryFolder
-import scala.collection.mutable
-
-import org.apache.flink.api.scala._
import org.junit.runner.RunWith
import org.junit.runners.Parameterized
+import org.junit.{After, Before, Rule, Test}
import net.sansa_stack.inference.data.RDFTriple
-import net.sansa_stack.inference.flink.forwardchaining.TransitiveReasoner
/**
* A test case for the computation of the transitive closure (TC).
+ *
* @author Lorenz Buehmann
*/
@RunWith(classOf[Parameterized])
@@ -35,7 +34,7 @@ class TCTest(mode: TestExecutionMode) extends MultipleProgramsTestBase(mode) {
private var expectedResult: String = ""
@Rule
- def tempFolder = _tempFolder
+ def tempFolder: TemporaryFolder = _tempFolder
@Before
def before(): Unit = {
@@ -123,12 +122,17 @@ class TCTest(mode: TestExecutionMode) extends MultipleProgramsTestBase(mode) {
def performOptimized(triples: DataSet[RDFTriple]): DataSet[(String, String)] = {
def iterate(s: DataSet[RDFTriple], ws: DataSet[RDFTriple]): (DataSet[RDFTriple], DataSet[RDFTriple]) = {
val resolvedRedirects = triples.join(ws)
- .where { _.s }
- .equalTo { _.o }
- .map { joinResult => joinResult match {
- case (redirect, link) =>
- RDFTriple(link.s, redirect.p, redirect.o)
+ .where {
+ _.s
}
+ .equalTo {
+ _.o
+ }
+ .map { joinResult =>
+ joinResult match {
+ case (redirect, link) =>
+ RDFTriple(link.s, redirect.p, redirect.o)
+ }
}.name("TC-From-Iteration")
(resolvedRedirects, resolvedRedirects)
}
@@ -136,8 +140,8 @@ class TCTest(mode: TestExecutionMode) extends MultipleProgramsTestBase(mode) {
val tc = triples
.iterateDelta(triples, 10, Array("s", "o"))(iterate)
.name("Final-TC")
-// .map { cl => cl}
-// .name("Final-Redirect-Result")
+ // .map { cl => cl}
+ // .name("Final-Redirect-Result")
tc.map(t => (t.s, t.o))
}
@@ -158,19 +162,19 @@ class TCTest(mode: TestExecutionMode) extends MultipleProgramsTestBase(mode) {
.join(tuples).where(1).equalTo(0)(
new RichJoinFunction[(String, String), (String, String), (String, String)] {
override def join(left: (String, String), right: (String, String)): (String, String) = {
-// val context = getIterationRuntimeContext
-// println("Iteration #" + context.getSuperstepNumber)
-// println(context.getIndexOfThisSubtask + "/" + context.getNumberOfParallelSubtasks)
+ // val context = getIterationRuntimeContext
+ // println("Iteration #" + context.getSuperstepNumber)
+ // println(context.getIndexOfThisSubtask + "/" + context.getNumberOfParallelSubtasks)
(left._1, right._2)
}
}
)
-// {
-// (left, right) => (left._1, right._2)
-// }
+ // {
+ // (left, right) => (left._1, right._2)
+ // }
.union(prevPaths)
.groupBy(0, 1)
- .reduce((l ,r) => l)
+ .reduce((l, r) => l)
val terminate = prevPaths
.coGroup(nextPaths)
@@ -202,32 +206,32 @@ class TCTest(mode: TestExecutionMode) extends MultipleProgramsTestBase(mode) {
val initialSolutionSet = tuples
val initialWorkset = tuples
-// val res = initialSolutionSet.iterateDelta(initialWorkset, maxIterations, Array(keyPosition)) {
-// (solution, workset) =>
-// val deltas = workset.join(solution).where(1).equalTo(0){
-// (prev, next, out: Collector[(String, String)]) => {
-// val prevPaths = prev.toSet
-// for (n <- next)
-// if (!prevPaths.contains(n)) out.collect(n)
-// }
-// }
-//
-// val nextWorkset = deltas.filter(new FilterByThreshold())
-//
-// (deltas, nextWorkset)
-// }
-// res
+ // val res = initialSolutionSet.iterateDelta(initialWorkset, maxIterations, Array(keyPosition)) {
+ // (solution, workset) =>
+ // val deltas = workset.join(solution).where(1).equalTo(0) {
+ // (prev, next, out: Collector[(String, String)]) => {
+ // val prevPaths = prev.toSet
+ // for (n <- next)
+ // if (!prevPaths.contains(n)) out.collect(n)
+ // }
+ // }
+ //
+ // val nextWorkset = deltas.filter(new FilterByThreshold())
+ //
+ // (deltas, nextWorkset)
+ // }
+ // res
tuples
}
- def getDataSimple(env: ExecutionEnvironment, scale: Int = 1) : DataSet[RDFTriple] = {
+ def getDataSimple(env: ExecutionEnvironment, scale: Int = 1): DataSet[RDFTriple] = {
val triples = new mutable.HashSet[RDFTriple]()
val begin = 1
val end = 10 * scale
- for(i <- begin to end) {
+ for (i <- begin to end) {
triples += RDFTriple(ns + "x" + i, p1, ns + "y" + i)
triples += RDFTriple(ns + "y" + i, p1, ns + "z" + i)
}
@@ -235,10 +239,10 @@ class TCTest(mode: TestExecutionMode) extends MultipleProgramsTestBase(mode) {
env.fromCollection(triples)
}
- def getExpectedResultSimple(scale: Int = 1) : String = {
+ def getExpectedResultSimple(scale: Int = 1): String = {
var res = ""
- for(i <- 1 to scale * 10) {
+ for (i <- 1 to scale * 10) {
res += s"${ns}x$i,${ns}y$i\n"
res += s"${ns}y$i,${ns}z$i\n"
res += s"${ns}x$i,${ns}z$i\n"
@@ -247,24 +251,24 @@ class TCTest(mode: TestExecutionMode) extends MultipleProgramsTestBase(mode) {
res
}
- def getDataSinglePath(env: ExecutionEnvironment, length: Int = 10) : DataSet[RDFTriple] = {
+ def getDataSinglePath(env: ExecutionEnvironment, length: Int = 10): DataSet[RDFTriple] = {
val triples = new mutable.HashSet[RDFTriple]()
// graph is a path of length n
// (x1, p, x2), (x2, p, x3), ..., (x(n-1), p, xn)
val n = 10
- for(i <- 1 until length) {
- triples += RDFTriple(ns + "x" + i, p1, ns + "x" + (i+1))
+ for (i <- 1 until length) {
+ triples += RDFTriple(ns + "x" + i, p1, ns + "x" + (i + 1))
}
env.fromCollection(triples)
}
- def getExpectedResultSinglePath(length: Int = 10) : String = {
+ def getExpectedResultSinglePath(length: Int = 10): String = {
var res = ""
- for(i <- 1 to length) {
- for(j <- i+1 to length) {
+ for (i <- 1 to length) {
+ for (j <- i + 1 to length) {
res += s"${ns}x$i,${ns}x${j}\n"
}
}
diff --git a/sansa-inference-flink/src/test/scala/net/sansa_stack/inference/flink/rules/TransitivityRuleTest.scala b/sansa-inference-flink/src/test/scala/net/sansa_stack/inference/flink/rules/TransitivityRuleTest.scala
index 2c2010f..6e1940c 100644
--- a/sansa-inference-flink/src/test/scala/net/sansa_stack/inference/flink/rules/TransitivityRuleTest.scala
+++ b/sansa-inference-flink/src/test/scala/net/sansa_stack/inference/flink/rules/TransitivityRuleTest.scala
@@ -39,8 +39,8 @@ object TransitivityRuleTest {
// graph is a path of length n
// (x1, p, x2), (x2, p, x3), ..., (x(n-1), p, xn)
val n = 10
- for(i <- 1 to end) {
- triples += RDFTriple(ns + "x" + i, p1, ns + "x" + (i+1))
+ for (i <- 1 to end) {
+ triples += RDFTriple(ns + "x" + i, p1, ns + "x" + (i + 1))
}
val triplesDataset = env.fromCollection(triples)
diff --git a/sansa-inference-spark/pom.xml b/sansa-inference-spark/pom.xml
index 87106fa..040a309 100644
--- a/sansa-inference-spark/pom.xml
+++ b/sansa-inference-spark/pom.xml
@@ -4,12 +4,12 @@
net.sansa-stack
sansa-inference-parent_2.11
- 0.3.0
+ 0.4.0
../pom.xml
net.sansa-stack
sansa-inference-spark_${scala.binary.version}
- 0.3.0
+ 0.4.0
Inference API - Spark
Apache Spark based inference layer for RDF and OWL
@@ -33,10 +33,24 @@
-
-
-
-
+
+ ${project.groupId}
+ sansa-rdf-spark_${scala.binary.version}
+
+
+ org.apache.hadoop
+ hadoop-common
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+
+
+ org.aksw.sparqlify
+ sparqlify-core
+
+
+
@@ -90,6 +104,7 @@
org.apache.jena
jena-tdb
${jena.version}
+ provided
@@ -112,11 +127,11 @@
- com.assembla.scala-incubator
+ org.scala-graph
graph-core_${scala.binary.version}
- com.assembla.scala-incubator
+ org.scala-graph
graph-dot_${scala.binary.version}
@@ -127,9 +142,11 @@
org.jgrapht
jgrapht-ext
+
org.gephi
gephi-toolkit
+ provided
@@ -137,16 +154,11 @@
junit
junit
-
- org.scalatest
- scalatest_${scala.binary.version}
- test
-
com.holdenkarau
spark-testing-base_${scala.binary.version}
- 2.2.0_0.8.0
+ 2.3.0_0.9.0
test
@@ -180,7 +192,7 @@
scala-logging_${scala.binary.version}
-
+
com.github.scopt
scopt_${scala.binary.version}
@@ -192,6 +204,23 @@
config
+
+
+ org.apache.calcite
+ calcite-core
+
+
+
+ org.codehaus.janino
+ janino
+
+
+ org.codehaus.janino
+ commons-compiler
+
+
+
+
@@ -287,8 +316,8 @@
- unpack
- package
+ resource-dependencies
+ install
unpack-dependencies
@@ -297,7 +326,7 @@
sansa-inference-tests_${scala.binary.version}
true
true
- ${project.build.directory}/core-resources
+ ${project.build.directory}/test-classes
org/**,META-INF/**,rebel.xml
true
true
@@ -431,7 +460,7 @@
org.codehaus.janino:*
org.codehaus.jettison:jettison
org.fusesource.leveldbjni:leveldbjni-all
- org.glassfish.hk2*
+ org.glassfish.hk2*
org.glassfish.jersey*
org.javassist:javassist
org.json4s:json4s*
@@ -448,6 +477,8 @@
org.gephi:*
org.jfree:*
com.itextpdf:*
+ org.apache.poi:*
+ org.apache.batik:*
@@ -468,6 +499,18 @@
**
+
+ xerces:xercesImpl
+
+ **
+
+
+
+ org.aksw.jena-sparql-api:*
+
+ **
+
+
dist-${project.artifactId}-${project.version}
@@ -586,7 +629,7 @@
commons-logging:commons-logging
commons-net:commons-net
io.dropwizard.metrics:metrics*
- io.netty:netty*
+ io.netty:netty*
javax.activation:activation
javax.annotation:javax.annotation-api
javax.servlet:javax.servlet-api
@@ -621,7 +664,7 @@
org.codehaus.janino:*
org.codehaus.jettison:jettison
org.fusesource.leveldbjni:leveldbjni-all
- org.glassfish.hk2*
+ org.glassfish.hk2*
org.glassfish.jersey*
org.javassist:javassist
org.json4s:json4s*
@@ -638,6 +681,9 @@
org.gephi:*
org.jfree:*
com.itextpdf:*
+ org.apache.poi:*
+ org.apache.batik:*
+ org.xerial:sqlite-jdbc
diff --git a/sansa-inference-spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sansa-inference-spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
deleted file mode 100644
index 50b23f2..0000000
--- a/sansa-inference-spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ /dev/null
@@ -1,2 +0,0 @@
-net.sansa_stack.inference.spark.data.loader.sql.NTriplesDataSource
-net.sansa_stack.inference.spark.data.loader.sql.TurtleDataSource
\ No newline at end of file
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/RDFGraphLoader.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/RDFGraphLoader.scala
index e525556..6e85e96 100644
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/RDFGraphLoader.scala
+++ b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/RDFGraphLoader.scala
@@ -7,13 +7,15 @@ import net.sansa_stack.inference.spark.data.model.{RDFGraph, RDFGraphDataFrame,
import net.sansa_stack.inference.utils.NTriplesStringToJenaTriple
import org.apache.jena.graph.Triple
import org.apache.jena.riot.Lang
-import org.apache.spark.sql.{Dataset, SaveMode, SparkSession}
+import org.apache.spark.sql.{Dataset, Encoder, SaveMode, SparkSession}
import org.apache.spark.{SparkConf, SparkContext}
import org.slf4j.LoggerFactory
import scala.language.implicitConversions
import org.apache.jena.vocabulary.RDF
+import net.sansa_stack.rdf.spark.io.NTripleReader
+
/**
* A class that provides methods to load an RDF graph from disk.
*
@@ -37,16 +39,7 @@ object RDFGraphLoader {
* @return an RDF graph
*/
def loadFromDisk(session: SparkSession, path: String, minPartitions: Int = 2): RDFGraph = {
- logger.info("loading triples from disk...")
- val startTime = System.currentTimeMillis()
-
- val triples = session.sparkContext
- .textFile(path, minPartitions) // read the text file
- .map(new NTriplesStringToJenaTriple()) // convert to triple object
-// .repartition(minPartitions)
-
-// logger.info("finished loading " + triples.count() + " triples in " + (System.currentTimeMillis()-startTime) + "ms.")
- RDFGraph(triples)
+ RDFGraph(NTripleReader.load(session, path))
}
/**
@@ -84,18 +77,7 @@ object RDFGraphLoader {
* @return an RDF graph
*/
def loadFromDiskAsRDD(session: SparkSession, path: String, minPartitions: Int): RDFGraphNative = {
- logger.info("loading triples from disk...")
- val startTime = System.currentTimeMillis()
-
- val converter = new NTriplesStringToJenaTriple()
-
- val triples = session.sparkContext
- .textFile(path, minPartitions) // read the text file
- .map(line => converter.apply(line)) // convert to triple object
-
- // logger.info("finished loading " + triples.count() + " triples in " +
- // (System.currentTimeMillis()-startTime) + "ms.")
- new RDFGraphNative(triples)
+ new RDFGraphNative(NTripleReader.load(session, path))
}
private case class RDFTriple2(s: String, p: String, o: String) extends Product3[String, String, String] {
@@ -127,15 +109,12 @@ object RDFGraphLoader {
Array(splitted(0), splitted(1), splitted(2))
})
- implicit val rdfTripleEncoder = org.apache.spark.sql.Encoders.kryo[Triple]
+ implicit val rdfTripleEncoder: Encoder[Triple] = org.apache.spark.sql.Encoders.kryo[Triple]
val spark = session.sqlContext
-
- val triples = session.read
- .textFile(path) // read the text file
- .map(new NTriplesStringToJenaTriple())
- .as[Triple](rdfTripleEncoder)
+ val triples = session
+ .createDataset(NTripleReader.load(session, path))(rdfTripleEncoder)
.as("triples")
// (rdfTripleEncoder)
// val rowRDD = session.sparkContext
@@ -195,7 +174,7 @@ object RDFGraphLoader {
* @param minPartitions min number of partitions for Hadoop RDDs ([[SparkContext.defaultMinPartitions]])
* @return an RDF graph based on a [[org.apache.spark.sql.DataFrame]]
*/
- def loadFromDiskAsDataFrame(session: SparkSession, path: String, minPartitions: Int, sqlSchema: SQLSchema = SQLSchemaDefault): RDFGraphDataFrame = {
+ def loadFromDiskAsDataFrame(session: SparkSession, path: String, minPartitions: Int = 4, sqlSchema: SQLSchema = SQLSchemaDefault): RDFGraphDataFrame = {
val df = session
.read
.format("net.sansa_stack.inference.spark.data.loader.sql")
@@ -208,7 +187,7 @@ object RDFGraphLoader {
}
def main(args: Array[String]): Unit = {
- import net.sansa_stack.inference.spark.data.loader.sql.rdf._
+ import net.sansa_stack.rdf.spark.io._
val path = args(0)
val lang = args(1) match {
@@ -247,9 +226,7 @@ object RDFGraphLoader {
- import net.sansa_stack.inference.spark.data.loader.rdd.rdf._
-
- val triplesRDD = session.sparkContext.rdf(lang)(path)
+ val triplesRDD = session.rdf(lang)(path)
triples.show(10)
println(triples.count())
triplesRDD
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/package.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/package.scala
deleted file mode 100644
index a960d5c..0000000
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/package.scala
+++ /dev/null
@@ -1,127 +0,0 @@
-package net.sansa_stack.inference.spark.data
-
-import com.typesafe.config.{Config, ConfigFactory}
-import org.apache.hadoop.fs.Path
-import org.apache.jena.graph.Triple
-import org.apache.jena.riot.Lang
-import org.apache.spark.SparkContext
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, DataFrameReader, DataFrameWriter, SaveMode}
-
-import net.sansa_stack.inference.utils.{JenaTripleToNTripleString, Logging, NTriplesStringToJenaTriple}
-
-package object rdf {
-
- /**
- * The mode for parsing N-Triples.
- */
- object ParseMode extends Enumeration {
- type ParseMode = Value
- val REGEX, SPLIT, JENA = Value
- }
-
- // the DataFrame methods
-
- /**
- * Adds methods, `ntriples` and `turtle`, to DataFrameWriter that allows to write N-Triples and Turtle files from a
- * [[DataFrame]] using the `DataFrameWriter`
- */
- implicit class RDFDataFrameWriter[T](writer: DataFrameWriter[T]) {
- def rdf: String => Unit = writer.format("ntriples").save
- def ntriples: String => Unit = writer.format("ntriples").save
- }
-
- /**
- * Adds methods, `rdf`, `ntriples` and `turtle`, to DataFrameReader that allows to read N-Triples and Turtle files using
- * the `DataFrameReader`
- */
- implicit class RDFDataFrameReader(reader: DataFrameReader) extends Logging {
- @transient lazy val conf: Config = ConfigFactory.load("rdf_loader")
- /**
- * Load RDF data into a `DataFrame`. Currently, only N-Triples and Turtle syntax are supported
- * @param lang the RDF language (Turtle or N-Triples)
- * @return a `DataFrame[(String, String, String)]`
- */
- def rdf(lang: Lang): String => DataFrame = lang match {
- case i if lang == Lang.NTRIPLES => ntriples
- case j if lang == Lang.TURTLE => turtle
- case _ => throw new IllegalArgumentException(s"${lang.getLabel} syntax not supported yet!")
- }
- /**
- * Load RDF data in N-Triples syntax into a `DataFrame` with columns `s`, `p`, and `o`.
- * @return a `DataFrame[(String, String, String)]`
- */
- def ntriples: String => DataFrame = {
- log.debug(s"Parsing N-Triples with ${conf.getString("rdf.ntriples.parser")} ...")
- reader.format("ntriples").load
- }
- /**
- * Load RDF data in Turtle syntax into a `DataFrame` with columns `s`, `p`, and `o`.
- * @return a `DataFrame[(String, String, String)]`
- */
- def turtle: String => DataFrame = reader.format("turtle").load
- }
-
-
- // the RDD methods
-
- /**
- * Adds methods, `ntriples` and `turtle`, to SparkContext that allows to write N-Triples and Turtle files
- */
- implicit class RDFWriter[T](triples: RDD[Triple]) {
-
- val converter = new JenaTripleToNTripleString()
-
- def saveAsNTriplesFile(path: String, mode: SaveMode = SaveMode.ErrorIfExists): Unit = {
-
- val fsPath = new Path(path)
- val fs = fsPath.getFileSystem(triples.sparkContext.hadoopConfiguration)
-
- mode match {
- case SaveMode.Append => sys.error("Append mode is not supported by " + this.getClass.getCanonicalName); sys.exit(1)
- case SaveMode.Overwrite => fs.delete(fsPath, true)
- case SaveMode.ErrorIfExists => sys.error("Given path: " + path + " already exists!!"); sys.exit(1)
- case SaveMode.Ignore => sys.exit()
- }
-
- triples
- .map(converter) // map to N-Triples string
- .saveAsTextFile(path)
- }
-
- }
-
- /**
- * Adds methods, `rdf`, `ntriples` and `turtle`, to SparkContext that allows to read N-Triples and Turtle files
- */
- implicit class RDFReader(sc: SparkContext) {
- /**
- * Load RDF data into an `RDD[Triple]`. Currently, only N-Triples and Turtle syntax are supported
- * @param lang the RDF language (Turtle or N-Triples)
- * @return the RDD
- */
- def rdf(lang: Lang): String => RDD[Triple] = lang match {
- case i if lang == Lang.NTRIPLES => ntriples
- case j if lang == Lang.TURTLE => turtle
- case _ => throw new IllegalArgumentException(s"${lang.getLabel} syntax not supported yet!")
- }
-
- /**
- * Load RDF data in N-Triples syntax into an `RDD[Triple]`
- * @return the RDD
- */
- def ntriples: String => RDD[Triple] = path =>
- sc
- .textFile(path, 4) // read the text file
- .map(new NTriplesStringToJenaTriple())
-
- /**
- * Load RDF data in Turtle syntax into an `RDD[Triple]`
- * @return the RDD
- */
- def turtle: String => RDD[Triple] = path =>
- sc
- .textFile(path, 4) // read the text file
- .map(new NTriplesStringToJenaTriple())
- }
-}
\ No newline at end of file
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/rdd/package.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/rdd/package.scala
deleted file mode 100644
index 055b499..0000000
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/rdd/package.scala
+++ /dev/null
@@ -1,73 +0,0 @@
-package net.sansa_stack.inference.spark.data.loader.rdd
-
-import org.apache.hadoop.fs.Path
-
-import net.sansa_stack.inference.utils.{JenaTripleToNTripleString, NTriplesStringToJenaTriple}
-import org.apache.jena.graph.Triple
-import org.apache.jena.riot.Lang
-import org.apache.spark.SparkContext
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrameWriter, SaveMode}
-
-package object rdf {
-
- /**
- * Adds methods, `ntriples` and `turtle`, to [[RDD]] that allows to write N-Triples and Turtle files
- */
- implicit class RDFWriter[T](triples: RDD[Triple]) {
-
- val converter = new JenaTripleToNTripleString()
-
- def saveAsNTriplesFile(path: String, mode: SaveMode = SaveMode.ErrorIfExists): Unit = {
-
- val fsPath = new Path(path)
- val fs = fsPath.getFileSystem(triples.sparkContext.hadoopConfiguration)
-
- mode match {
- case SaveMode.Append => sys.error("Append mode is not supported by " + this.getClass.getCanonicalName); sys.exit(1)
- case SaveMode.Overwrite => fs.delete(fsPath, true)
- case SaveMode.ErrorIfExists => sys.error("Given path: " + path + " already exists!!"); sys.exit(1)
- case SaveMode.Ignore => sys.exit()
- }
-
- triples
- .map(converter) // map to N-Triples string
- .saveAsTextFile(path)
- }
-
- }
-
- /**
- * Adds methods, `rdf`, `ntriples` and `turtle`, to [[SparkContext]] that allows to read N-Triples and Turtle files
- */
- implicit class RDFReader(sc: SparkContext) {
- /**
- * Load RDF data into an `RDD[Triple]`. Currently, only N-Triples and Turtle syntax are supported
- * @param lang the RDF language (Turtle or N-Triples)
- * @return the RDD
- */
- def rdf(lang: Lang): String => RDD[Triple] = lang match {
- case i if lang == Lang.NTRIPLES => ntriples
- case j if lang == Lang.TURTLE => turtle
- case _ => throw new IllegalArgumentException(s"${lang.getLabel} syntax not supported yet!")
- }
-
- /**
- * Load RDF data in N-Triples syntax into an `RDD[Triple]`
- * @return the RDD
- */
- def ntriples: String => RDD[Triple] = path =>
- sc
- .textFile(path, 4) // read the text file
- .map(new NTriplesStringToJenaTriple())
-
- /**
- * Load RDF data in Turtle syntax into an `RDD[Triple]`
- * @return the RDD
- */
- def turtle: String => RDD[Triple] = path =>
- sc
- .textFile(path, 4) // read the text file
- .map(new NTriplesStringToJenaTriple())
- }
-}
\ No newline at end of file
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/DefaultSource.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/DefaultSource.scala
deleted file mode 100644
index ea697b2..0000000
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/DefaultSource.scala
+++ /dev/null
@@ -1,19 +0,0 @@
-package net.sansa_stack.inference.spark.data.loader.sql
-
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.sources.{BaseRelation, RelationProvider, SchemaRelationProvider}
-import org.apache.spark.sql.types.StructType
-
-
-class DefaultSource extends RelationProvider with SchemaRelationProvider {
- override def createRelation(sqlContext: SQLContext, parameters: Map[String, String])
- : BaseRelation = {
- createRelation(sqlContext, parameters, null)
- }
- override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]
- , schema: StructType)
- : BaseRelation = {
- parameters.getOrElse("path", sys.error("'path' must be specified for our data."))
- return new NTriplesRelation(parameters.get("path").get, schema)(sqlContext)
- }
- }
\ No newline at end of file
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/NTriplesDataSource.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/NTriplesDataSource.scala
deleted file mode 100644
index 3492540..0000000
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/NTriplesDataSource.scala
+++ /dev/null
@@ -1,60 +0,0 @@
-package net.sansa_stack.inference.spark.data.loader.sql
-
-import com.typesafe.config.{Config, ConfigFactory}
-import net.sansa_stack.inference.spark.data.rdf.ParseMode
-import org.apache.hadoop.fs.Path
-import org.apache.spark.sql.sources._
-import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
-
-/**
- * The data source for handling N-Triples, i.e. reading from and writing to disk.
- *
- * @author Lorenz Buehmann
- */
-class NTriplesDataSource
- extends DataSourceRegister
- with RelationProvider
- with SchemaRelationProvider
- with CreatableRelationProvider {
-
- lazy val conf: Config = ConfigFactory.load("rdf_loader")
-
- override def shortName(): String = "ntriples"
-
- // Used for reading from file without a given schema
- override def createRelation(sqlContext: SQLContext,
- parameters: Map[String, String]): BaseRelation =
- new NTriplesRelation(parameters("path"), null, ParseMode.withName(conf.getString("rdf.ntriples.parser").toUpperCase))(sqlContext)
-
- // Used for reading from file with a given schema
- override def createRelation(sqlContext: SQLContext,
- parameters: Map[String, String],
- schema: StructType): BaseRelation =
- new NTriplesRelation(parameters("path"), schema, ParseMode.withName(conf.getString("rdf.ntriples.parser").toUpperCase))(sqlContext)
-
- // Used for writing to disk
- override def createRelation(sqlContext: SQLContext,
- mode: SaveMode,
- parameters: Map[String, String],
- data: DataFrame): BaseRelation = {
- val path = parameters.getOrElse("path", "./output/") // can throw an exception/error, it's just for this tutorial
- val fsPath = new Path(path)
- val fs = fsPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
-
- mode match {
- case SaveMode.Append => sys.error("Append mode is not supported by " + this.getClass.getCanonicalName); sys.exit(1)
- case SaveMode.Overwrite => fs.delete(fsPath, true)
- case SaveMode.ErrorIfExists => sys.error("Given path: " + path + " already exists!!"); sys.exit(1)
- case SaveMode.Ignore => sys.exit()
- }
-
- val ntriplesRDD = data.rdd.map(row => {
- row.toSeq.map(value => value.toString).mkString(" ") + " ."
- })
-
- ntriplesRDD.saveAsTextFile(path)
-
- createRelation(sqlContext, parameters, data.schema)
- }
-}
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/NTriplesRelation.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/NTriplesRelation.scala
deleted file mode 100644
index db8c03d..0000000
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/NTriplesRelation.scala
+++ /dev/null
@@ -1,223 +0,0 @@
-package net.sansa_stack.inference.spark.data.loader.sql
-
-import java.io.ByteArrayInputStream
-import java.util.regex.Pattern
-
-import net.sansa_stack.inference.spark.data.rdf.ParseMode.{ParseMode, _}
-import net.sansa_stack.inference.utils.Logging
-import org.apache.jena.graph.Node
-import org.apache.jena.riot.lang.LangNTriples
-import org.apache.jena.riot.system.RiotLib
-import org.apache.jena.riot.tokens.{Tokenizer, TokenizerFactory}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.sources.{BaseRelation, PrunedScan, TableScan}
-import org.apache.spark.sql.types.{StringType, StructField, StructType}
-import org.apache.spark.sql.{Row, SQLContext}
-
-import scala.util.{Failure, Success, Try}
-
-/**
- * A custom relation that represents N-Triples.
- *
- * @param location
- * @param userSchema
- * @param sqlContext
- * @param mode how to parse each line in the N-Triples file (DEFAULT: [[ParseMode]].`REGEX`)
- */
-class NTriplesRelation(location: String, userSchema: StructType, val mode: ParseMode = REGEX)
- (@transient val sqlContext: SQLContext)
- extends BaseRelation
- with TableScan
- with PrunedScan
- with Serializable
- with Logging {
-
- /**
- * Whether to skip blank lines or throw an exception.
- */
- val skipBlankLines = true
-
- override def schema: StructType = {
- if (this.userSchema != null) {
- this.userSchema
- }
- else {
- StructType(
- Seq(
- StructField("s", StringType, nullable = true),
- StructField("p", StringType, nullable = true),
- StructField("o", StringType, nullable = true)
- ))
- }
- }
-
- override def buildScan(): RDD[Row] = {
- val rdd = sqlContext
- .sparkContext
- .textFile(location)
-
- val rows = mode match {
- case REGEX => rdd.map(line => Row.fromTuple(parseRegexPattern(line)))
- case SPLIT => rdd.map(line => Row.fromSeq(line.split(" ").toList))
- case JENA => rdd.map(parseJena(_).get).map(t => Row.fromSeq(Seq(t.getSubject.toString, t.getPredicate.toString, t.getObject.toString)))
- }
- rows
- }
-
- // scan with column pruning
- override def buildScan(requiredColumns: Array[String]): RDD[Row] = {
- // load the RDD of lines first
- val rdd = sqlContext
- .sparkContext
- .textFile(location)
-
- // map column names to positions in triple
- implicit val positions = requiredColumns.map(
- {
- case "s" => 1
- case "p" => 2
- case "o" => 3
- }
- )
-
- // apply different line processing based on the configured parsing mode
- val tuples = mode match {
- case REGEX => rdd.map(line => {
- val tripleOpt = parseRegexPattern(line)
- if(tripleOpt.isDefined) {
- Some(extractFromTriple(tripleOpt.get))
- } else {
- None
- }
- })
- case SPLIT => rdd.map(line => Some(extractFromTriple(parseRegexSplit(line))))
- case JENA => rdd.map(line => Some(extractFromJenaTriple(parseJena(line).get).map(_.toString)))
- }
-
- val rows = tuples.flatMap(t => {
- if (t.isDefined) {
- Some(Row.fromSeq(t.get))
- } else {
- // TODO error handling
- None
- }
- })
-
- rows
- }
-
- private def extractFromTriple(triple: (String, String, String))(implicit positions: Array[Int]): Seq[String] = {
- positions.map({
- case 1 => triple._1
- case 2 => triple._2
- case 3 => triple._3
- }).toSeq
- }
-
- private def extractFromJenaTriple(triple: org.apache.jena.graph.Triple)(implicit positions: Array[Int]): Seq[Node] = {
- positions.map({
- case 1 => triple.getSubject
- case 2 => triple.getPredicate
- case 3 => triple.getObject
- }).toSeq
- }
-
- /**
- * Parse with Jena API
- * @param s
- * @return
- */
- private def parseJena(s: String): Try[org.apache.jena.graph.Triple] = {
- // always close the streams
- cleanly(new ByteArrayInputStream(s.getBytes))(_.close()) { is =>
- val profile = RiotLib.dftProfile
- val tokenizer: Tokenizer = TokenizerFactory.makeTokenizerUTF8(is)
- val parser = new LangNTriples(tokenizer, profile, null)
- parser.next()
- }
- }
-
- // the REGEX pattern for N-Triples
- val pattern: Pattern = Pattern.compile(
- """|^
- |(<([^>]*)>|(?]+)(?))
- |\s*
- |<([^>]+)>
- |\s*
- |(<([^>]+)>|(.*))
- |\s*[.]\s*(#.*)?$
- """.stripMargin.replaceAll("\n", "").trim)
-
- /**
- * Parse with REGEX pattern
- * @param s
- * @return
- */
- private def parseRegexPattern(s: String): Option[(String, String, String)] = {
- // skip blank lines
- if (s.trim.isEmpty) {
- None
- } else {
-
- val matcher = pattern.matcher(s)
-
-// println(matcher.matches() + "---" + s)
-
- if (matcher.matches) {
- // for(i <- 0 to matcher.groupCount())
- // println(i + ":" + matcher.group(i))
-
- // parse the subject
- val subject = if (matcher.group(2) == null) { // this means it's a blank node captured in group 1 (or 3)
- matcher.group(1)
- } else { // it is a URI
- matcher.group(2)
- }
-
- // parse the predicate
- val predicate = matcher.group(4)
-
- // parse the object
- val obj = if (matcher.group(6) == null) { // this means it is a literal
- matcher.group(7).trim
- } else { // it is a URI
- matcher.group(6)
- }
-
- Some((subject, predicate, obj))
- } else {
- throw new Exception(s"WARN: Illegal N-Triples syntax. Ignoring triple $s")
- }
- }
- }
-
- /**
- * Parse with simple split on whitespace characters and replace <, >, and . chars
- * @param s
- * @return
- */
- private def parseRegexSplit(s: String): (String, String, String) = {
- val s1 = s.trim
- val split = s1.substring(0, s1.lastIndexOf('.')).split("\\s", 3)
- var obj = split(2).trim
- obj = obj.substring(0, obj.lastIndexOf('.'))
- (split(0), split(1), obj)
- }
-
- private def cleanly[A, B](resource: A)(cleanup: A => Unit)(doWork: A => B): Try[B] = {
- try {
- Success(doWork(resource))
- } catch {
- case e: Exception => Failure(e)
- }
- finally {
- try {
- if (resource != null) {
- cleanup(resource)
- }
- } catch {
- case e: Exception => log.error(e.getMessage) // should be logged
- }
- }
- }
-}
\ No newline at end of file
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/TurtleDataSource.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/TurtleDataSource.scala
deleted file mode 100644
index 74178cc..0000000
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/TurtleDataSource.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-package net.sansa_stack.inference.spark.data.loader.sql
-
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.sources.{BaseRelation, DataSourceRegister, RelationProvider, SchemaRelationProvider}
-import org.apache.spark.sql.types.StructType
-
-/**
- * @author Lorenz Buehmann
- */
-class TurtleDataSource
- extends DataSourceRegister
- with RelationProvider
- with SchemaRelationProvider {
-
- override def shortName(): String = "turtle"
-
- override def createRelation(sqlContext: SQLContext,
- parameters: Map[String, String]): BaseRelation =
- new TurtleRelation(parameters("path"), null)(sqlContext)
-
- override def createRelation(sqlContext: SQLContext,
- parameters: Map[String, String],
- schema: StructType): BaseRelation =
- new TurtleRelation(parameters("path"), schema)(sqlContext)
-}
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/TurtleRelation.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/TurtleRelation.scala
deleted file mode 100644
index c080b09..0000000
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/TurtleRelation.scala
+++ /dev/null
@@ -1,133 +0,0 @@
-package net.sansa_stack.inference.spark.data.loader.sql
-
-import java.io.ByteArrayInputStream
-
-import org.apache.hadoop.io.{LongWritable, Text}
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
-import org.apache.jena.riot.{Lang, RDFDataMgr}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.sources.{BaseRelation, PrunedScan, TableScan}
-import org.apache.spark.sql.types.{StringType, StructField, StructType}
-import org.apache.spark.sql.{Row, SQLContext}
-
-import scala.util.{Failure, Success, Try}
-
-/**
- * A custom relation that represents RDF triples loaded from files in Turtle syntax.
- *
- * @param location
- * @param userSchema
- * @param sqlContext
- */
-class TurtleRelation(location: String, userSchema: StructType)
- (@transient val sqlContext: SQLContext)
- extends BaseRelation
- with TableScan
- with PrunedScan
- with Serializable {
-
- override def schema: StructType = {
- if (this.userSchema != null) {
- this.userSchema
- }
- else {
- StructType(
- Seq(
- StructField("s", StringType, true),
- StructField("p", StringType, true),
- StructField("o", StringType, true)
- ))
- }
- }
-
-
- import scala.collection.JavaConverters._
-
- override def buildScan(): RDD[Row] = {
-
- val confHadoop = new org.apache.hadoop.mapreduce.Job().getConfiguration
- confHadoop.set("textinputformat.record.delimiter", ".\n")
-
- // 1. parse the Turtle file into an RDD[String] with each entry containing a full Turtle snippet
- val turtleRDD = sqlContext.sparkContext.newAPIHadoopFile(
- location, classOf[TextInputFormat], classOf[LongWritable], classOf[Text], confHadoop)
- .filter(!_._2.toString.trim.isEmpty)
- .map{ case (_, v) => v.toString }
-
-// turtleRDD.collect().foreach(chunk => println("Chunk" + chunk))
-
- // 2. we need the prefixes - two options:
- // a) assume that all prefixes occur in the beginning of the document
- // b) filter all lines that contain the prefixes
- val prefixes = turtleRDD.filter(_.startsWith("@prefix"))
-
- // we broadcast the prefixes
- val prefixesBC = sqlContext.sparkContext.broadcast(prefixes.collect())
-
- // use the Jena Turtle parser to get the triples
- val rows = turtleRDD.flatMap(ttl => {
- cleanly(new ByteArrayInputStream((prefixesBC.value.mkString("\n") + ttl).getBytes))(_.close()) { is =>
- // parse the text snippet with Jena
- val iter = RDFDataMgr.createIteratorTriples(is, Lang.TURTLE, null).asScala
-
- iter.map(t => Row.fromTuple((t.getSubject.toString, t.getPredicate.toString, t.getObject.toString))).toSeq
- }.get
-
- })
-
- rows
- }
-
- override def buildScan(requiredColumns: Array[String]): RDD[Row] = {
- val confHadoop = new org.apache.hadoop.mapreduce.Job().getConfiguration
- confHadoop.set("textinputformat.record.delimiter", ".\n")
-
- // 1. parse the Turtle file into an RDD[String] with each entry containing a full Turtle snippet
- val turtleRDD = sqlContext.sparkContext.newAPIHadoopFile(
- location, classOf[TextInputFormat], classOf[LongWritable], classOf[Text], confHadoop)
- .filter(!_._2.toString.trim.isEmpty)
- .map{ case (_, v) => v.toString.trim }
-
-// turtleRDD.collect().foreach(chunk => println("Chunk:" + chunk))
-
- // 2. we need the prefixes - two options:
- // a) assume that all prefixes occur in the beginning of the document
- // b) filter all lines that contain the prefixes
- val prefixes = turtleRDD.filter(_.startsWith("@prefix"))
-
- // we broadcast the prefixes
- val prefixesBC = sqlContext.sparkContext.broadcast(prefixes.collect())
-
- // use the Jena Turtle parser to get the triples
- val rows = turtleRDD.flatMap(ttl => {
-// println("snippet:" + prefixesBC.value.mkString("\n") + ttl)
- cleanly(new ByteArrayInputStream((prefixesBC.value.mkString("\n") + ttl).getBytes))(_.close()) { is =>
- // parse the text snippet with Jena
- val iter = RDFDataMgr.createIteratorTriples(is, Lang.TURTLE, null).asScala
-
- iter.map(t => Row.fromTuple((t.getSubject.toString, t.getPredicate.toString, t.getObject.toString))).toSeq
- }.get
-
- })
-
- rows
- }
-
-
- def cleanly[A, B](resource: A)(cleanup: A => Unit)(doWork: A => B): Try[B] = {
- try {
- Success(doWork(resource))
- } catch {
- case e: Exception => Failure(e)
- }
- finally {
- try {
- if (resource != null) {
- cleanup(resource)
- }
- } catch {
- case e: Exception => println(e) // should be logged
- }
- }
- }
-}
\ No newline at end of file
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/package.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/package.scala
deleted file mode 100644
index 3cbe8da..0000000
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/loader/sql/package.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-package net.sansa_stack.inference.spark.data.loader.sql
-
-import com.typesafe.config.{Config, ConfigFactory}
-import net.sansa_stack.inference.utils.Logging
-import org.apache.jena.riot.Lang
-import org.apache.spark.sql.{DataFrame, DataFrameReader, DataFrameWriter}
-
-/**
- * Wrap implicits to load/write RDF data into/from a [[DataFrame]].
- */
-package object rdf {
-
- /**
- * Adds methods, `ntriples` and `turtle`, to [[DataFrameWriter]] that allows to write N-Triples and Turtle files.
- */
- implicit class RDFDataFrameWriter[T](writer: DataFrameWriter[T]) {
- def rdf: String => Unit = writer.format("ntriples").save
- def ntriples: String => Unit = writer.format("ntriples").save
- }
-
- /**
- * Adds methods, `rdf`, `ntriples` and `turtle`, to [[DataFrameReader]] that allows to read N-Triples and Turtle files.
- */
- implicit class RDFDataFrameReader(reader: DataFrameReader) extends Logging {
-
- @transient lazy val conf: Config = ConfigFactory.load("rdf_loader")
-
- /**
- * Load RDF data into a [[DataFrame]].
- * Currently, only N-Triples and Turtle syntax are supported!
- * @param lang the RDF language (Turtle or N-Triples)
- * @return a `DataFrame[(String, String, String)]`
- */
- def rdf(lang: Lang): String => DataFrame = lang match {
- case i if lang == Lang.NTRIPLES => ntriples
- case j if lang == Lang.TURTLE => turtle
- case _ => throw new IllegalArgumentException(s"${lang.getLabel} syntax not supported yet!")
- }
- /**
- * Load RDF data in N-Triples syntax into a [[DataFrame]] with columns `s`, `p`, and `o`.
- * @return a [[DataFrame]][(String, String, String)]
- */
- def ntriples: String => DataFrame = {
- log.debug(s"Parsing N-Triples with ${conf.getString("rdf.ntriples.parser")} ...")
- reader.format("ntriples").load
- }
- /**
- * Load RDF data in Turtle syntax into a [[DataFrame]] with columns `s`, `p`, and `o`.
- * @return a [[DataFrame]][(String, String, String)]
- */
- def turtle: String => DataFrame = reader.format("turtle").load
- }
-}
\ No newline at end of file
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/model/EmptyRDFGraphDataFrame.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/model/EmptyRDFGraphDataFrame.scala
index 94f419d..5a4f73e 100644
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/model/EmptyRDFGraphDataFrame.scala
+++ b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/model/EmptyRDFGraphDataFrame.scala
@@ -4,6 +4,8 @@ import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SQLContext}
/**
+ * Represents an empty RDF graph as Dataframe.
+ *
* @author Lorenz Buehmann
*/
object EmptyRDFGraphDataFrame {
@@ -13,7 +15,7 @@ object EmptyRDFGraphDataFrame {
val schemaString = "subject predicate object"
// generate the schema based on the string of schema
- val schema = StructType(schemaString.split(" ").map(fieldName => StructField(fieldName, StringType, true)))
+ val schema = StructType(schemaString.split(" ").map(fieldName => StructField(fieldName, StringType, nullable = true)))
// convert triples RDD to rows
val rowRDD = sqlContext.sparkContext.emptyRDD[Row]
@@ -26,4 +28,4 @@ object EmptyRDFGraphDataFrame {
triplesDataFrame
}
-}
\ No newline at end of file
+}
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/model/RDFGraphDataset.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/model/RDFGraphDataset.scala
index 089638a..bc635ce 100644
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/model/RDFGraphDataset.scala
+++ b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/model/RDFGraphDataset.scala
@@ -35,7 +35,7 @@ class RDFGraphDataset(override val triples: Dataset[Triple])
def unionAll(graphs: Seq[RDFGraphDataset]): RDFGraphDataset = {
// the Dataframe based solution
- return graphs.reduce(_ union _)
+ graphs.reduce(_ union _)
// // to limit the lineage, we convert to RDDs first, and use the SparkContext Union method for a sequence of RDDs
// val df: Option[DataFrame] = graphs match {
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/model/RDFTuple.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/model/RDFTuple.scala
index fae47f7..2e0efea 100644
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/model/RDFTuple.scala
+++ b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/data/model/RDFTuple.scala
@@ -5,10 +5,10 @@ package net.sansa_stack.inference.spark.data.model
*
* @param s the subject
* @param o the object
- *
* @author Lorenz Buehmann
*/
case class RDFTuple(s: String, o: String) extends Product2[String, String] {
- override def _1: String = s
- override def _2: String = o
- }
\ No newline at end of file
+ override def _1: String = s
+
+ override def _2: String = o
+}
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/AbstractForwardRuleReasonerRDFS.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/AbstractForwardRuleReasonerRDFS.scala
index f77e035..e9e6f67 100644
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/AbstractForwardRuleReasonerRDFS.scala
+++ b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/AbstractForwardRuleReasonerRDFS.scala
@@ -51,14 +51,14 @@ abstract class AbstractForwardRuleReasonerRDFS[Rdf <: RDF, D, G <: AbstractRDFGr
// println("others:" + others.size())
/*
- rdfs5 xxx rdfs:subPropertyOf yyy .
- yyy rdfs:subPropertyOf zzz . xxx rdfs:subPropertyOf zzz .
+ rdfs5 xxx rdfs:subPropertyOf yyy .
+ yyy rdfs:subPropertyOf zzz . xxx rdfs:subPropertyOf zzz .
*/
val r5 = rule5(graph)
/*
- rdfs7 aaa rdfs:subPropertyOf bbb .
- xxx aaa yyy . xxx bbb yyy .
+ rdfs7 aaa rdfs:subPropertyOf bbb .
+ xxx aaa yyy . xxx bbb yyy .
*/
val r7 = rule7(others)
others = others.union(r7)
@@ -73,8 +73,8 @@ abstract class AbstractForwardRuleReasonerRDFS[Rdf <: RDF, D, G <: AbstractRDFGr
val r11 = rule11(graph)
/*
- rdfs9 xxx rdfs:subClassOf yyy .
- zzz rdf:type xxx . zzz rdf:type yyy .
+ rdfs9 xxx rdfs:subClassOf yyy .
+ zzz rdf:type xxx . zzz rdf:type yyy .
*/
val r9 = rule9(types)
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/FixpointIteration.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/FixpointIteration.scala
index 9785745..c852c72 100644
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/FixpointIteration.scala
+++ b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/FixpointIteration.scala
@@ -27,7 +27,7 @@ object FixpointIteration extends Logging {
* the termination criterion. The iterations terminate when either the termination criterion
* [[RDD]] contains no elements or when `maxIterations` iterations have been performed.
*
- **/
+ */
def apply[T: ClassTag](maxIterations: Int = 10)(rdd: RDD[T], f: RDD[T] => RDD[T]): RDD[T] = {
var newRDD = rdd
newRDD.cache()
@@ -56,7 +56,7 @@ object FixpointIteration extends Logging {
* the termination criterion. The iterations terminate when either the termination criterion
* RDD contains no elements or when `maxIterations` iterations have been performed.
*
- **/
+ */
def apply2[T: ClassTag](maxIterations: Int = 10)(dataset: Dataset[T], f: Dataset[T] => Dataset[T]): Dataset[T] = {
var newDS = dataset
newDS.cache()
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasoner.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasoner.scala
index c0e1d53..a76720c 100644
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasoner.scala
+++ b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasoner.scala
@@ -15,6 +15,16 @@ import scala.collection.mutable
*/
trait ForwardRuleReasoner extends Profiler {
+ /**
+ * Applies forward chaining to the given RDD of RDF triples and returns a new
+ * RDD of RDF triples that contains all additional triples based on the underlying
+ * set of rules.
+ *
+ * @param triples the RDF triples
+ * @return the materialized set of RDF triples
+ */
+ def apply(triples: RDD[Triple]) : RDD[Triple] = apply(RDFGraph(triples)).triples
+
/**
* Applies forward chaining to the given RDF graph and returns a new RDF graph that contains all additional
* triples based on the underlying set of rules.
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasonerOWLHorst.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasonerOWLHorst.scala
index 03ee540..d6690ca 100644
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasonerOWLHorst.scala
+++ b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasonerOWLHorst.scala
@@ -179,8 +179,8 @@ class ForwardRuleReasonerOWLHorst(sc: SparkContext, parallelism: Int = 2) extend
// 2. SubPropertyOf inheritance according to rdfs7 is computed
/*
- rdfs7 aaa rdfs:subPropertyOf bbb .
- xxx aaa yyy . xxx bbb yyy .
+ rdfs7 aaa rdfs:subPropertyOf bbb .
+ xxx aaa yyy . xxx bbb yyy .
*/
val triplesRDFS7 =
triplesFiltered
@@ -193,8 +193,8 @@ class ForwardRuleReasonerOWLHorst(sc: SparkContext, parallelism: Int = 2) extend
// 3. Domain and Range inheritance according to rdfs2 and rdfs3 is computed
/*
- rdfs2 aaa rdfs:domain xxx .
- yyy aaa zzz . yyy rdf:type xxx .
+ rdfs2 aaa rdfs:domain xxx .
+ yyy aaa zzz . yyy rdf:type xxx .
*/
val triplesRDFS2 =
rdfs7Res
@@ -202,8 +202,8 @@ class ForwardRuleReasonerOWLHorst(sc: SparkContext, parallelism: Int = 2) extend
.map(t => Triple.create(t.s, RDF.`type`.asNode, domainMapBC.value(t.p)))
/*
- rdfs3 aaa rdfs:range xxx .
- yyy aaa zzz . zzz rdf:type xxx .
+ rdfs3 aaa rdfs:range xxx .
+ yyy aaa zzz . zzz rdf:type xxx .
*/
val triplesRDFS3 =
rdfs7Res
@@ -215,8 +215,8 @@ class ForwardRuleReasonerOWLHorst(sc: SparkContext, parallelism: Int = 2) extend
// input are the rdf:type triples from RDFS2/RDFS3 and the ones contained in the original graph
/*
- rdfs9 xxx rdfs:subClassOf yyy .
- zzz rdf:type xxx . zzz rdf:type yyy .
+ rdfs9 xxx rdfs:subClassOf yyy .
+ zzz rdf:type xxx . zzz rdf:type yyy .
*/
val triplesRDFS9 =
triplesRDFS2
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasonerRDFS.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasonerRDFS.scala
index 5aa4984..65f98fb 100644
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasonerRDFS.scala
+++ b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasonerRDFS.scala
@@ -10,9 +10,10 @@ import org.apache.jena.graph.Triple
import org.apache.jena.vocabulary.{RDF, RDFS}
import org.apache.spark.SparkContext
import org.slf4j.LoggerFactory
-
import scala.collection.mutable
+import org.apache.spark.rdd.RDD
+
/**
* A forward chaining implementation of the RDFS entailment regime.
*
@@ -39,8 +40,10 @@ class ForwardRuleReasonerRDFS(sc: SparkContext, parallelism: Int = 2) extends Tr
// as an optimization, we can extract all schema triples first which avoids to run on the whole dataset
// for each schema triple later
- val schemaTriples = if (extractSchemaTriplesInAdvance) new RDFSSchemaExtractor().extract(triplesRDD)
+ val schemaTriples = if (extractSchemaTriplesInAdvance) new RDFSSchemaExtractor().extract(triplesRDD).cache()
else triplesRDD
+ schemaTriples.setName("schema triples")
+// println(s"#schema: ${schemaTriples.count()}")
// 1. we first compute the transitive closure of rdfs:subPropertyOf and rdfs:subClassOf
@@ -49,14 +52,14 @@ class ForwardRuleReasonerRDFS(sc: SparkContext, parallelism: Int = 2) extends Tr
* rdfs11 xxx rdfs:subClassOf yyy .
* yyy rdfs:subClassOf zzz . xxx rdfs:subClassOf zzz .
*/
- val subClassOfTriples = extractTriples(schemaTriples, RDFS.subClassOf.asNode()) // extract rdfs:subClassOf triples
+ val subClassOfTriples = extractTriples(schemaTriples, RDFS.subClassOf.asNode()).cache() // extract rdfs:subClassOf triples
val subClassOfTriplesTrans = computeTransitiveClosure(subClassOfTriples, RDFS.subClassOf.asNode()).setName("rdfs11")// mutable.Set()++subClassOfTriples.collect())
/*
- rdfs5 xxx rdfs:subPropertyOf yyy .
- yyy rdfs:subPropertyOf zzz . xxx rdfs:subPropertyOf zzz .
+ rdfs5 xxx rdfs:subPropertyOf yyy .
+ yyy rdfs:subPropertyOf zzz . xxx rdfs:subPropertyOf zzz .
*/
- val subPropertyOfTriples = extractTriples(schemaTriples, RDFS.subPropertyOf.asNode()) // extract rdfs:subPropertyOf triples
+ val subPropertyOfTriples = extractTriples(schemaTriples, RDFS.subPropertyOf.asNode()).cache() // extract rdfs:subPropertyOf triples
val subPropertyOfTriplesTrans = computeTransitiveClosure(subPropertyOfTriples, RDFS.subPropertyOf.asNode()).setName("rdfs5")// extractTriples(mutable.Set()++subPropertyOfTriples.collect(), RDFS.subPropertyOf.getURI))
// a map structure should be more efficient
@@ -71,7 +74,9 @@ class ForwardRuleReasonerRDFS(sc: SparkContext, parallelism: Int = 2) extends Tr
// split by rdf:type
val split = triplesRDD.partitionBy(t => t.p == RDF.`type`.asNode)
var typeTriples = split._1
+ typeTriples.setName("rdf:type triples")
var otherTriples = split._2
+ otherTriples.setName("other triples")
// val formatter = java.text.NumberFormat.getIntegerInstance
// println("triples" + formatter.format(triplesRDD.count()))
@@ -81,8 +86,8 @@ class ForwardRuleReasonerRDFS(sc: SparkContext, parallelism: Int = 2) extends Tr
// 2. SubPropertyOf inheritance according to rdfs7 is computed
/*
- rdfs7 aaa rdfs:subPropertyOf bbb .
- xxx aaa yyy . xxx bbb yyy .
+ rdfs7 aaa rdfs:subPropertyOf bbb .
+ xxx aaa yyy . xxx bbb yyy .
*/
val triplesRDFS7 =
otherTriples // all triples (s p1 o)
@@ -92,13 +97,13 @@ class ForwardRuleReasonerRDFS(sc: SparkContext, parallelism: Int = 2) extends Tr
.setName("rdfs7")
// add triples
- otherTriples = otherTriples.union(triplesRDFS7)
+ otherTriples = otherTriples.union(triplesRDFS7).setName("other triples with rdfs7")
// 3. Domain and Range inheritance according to rdfs2 and rdfs3 is computed
/*
- rdfs2 aaa rdfs:domain xxx .
- yyy aaa zzz . yyy rdf:type xxx .
+ rdfs2 aaa rdfs:domain xxx .
+ yyy aaa zzz . yyy rdf:type xxx .
*/
val domainTriples = extractTriples(schemaTriples, RDFS.domain.asNode())
val domainMap = domainTriples.map(t => (t.s, t.o)).collect.toMap
@@ -111,8 +116,8 @@ class ForwardRuleReasonerRDFS(sc: SparkContext, parallelism: Int = 2) extends Tr
.setName("rdfs2")
/*
- rdfs3 aaa rdfs:range xxx .
- yyy aaa zzz . zzz rdf:type xxx .
+ rdfs3 aaa rdfs:range xxx .
+ yyy aaa zzz . zzz rdf:type xxx .
*/
val rangeTriples = extractTriples(schemaTriples, RDFS.range.asNode())
val rangeMap = rangeTriples.map(t => (t.s, t.o)).collect().toMap
@@ -125,16 +130,16 @@ class ForwardRuleReasonerRDFS(sc: SparkContext, parallelism: Int = 2) extends Tr
.setName("rdfs3")
// rdfs2 and rdfs3 generated rdf:type triples which we'll add to the existing ones
- val triples23 = triplesRDFS2.union(triplesRDFS3)
+ val triples23 = triplesRDFS2.union(triplesRDFS3).setName("rdfs2 + rdfs3")
// all rdf:type triples here as intermediate result
- typeTriples = typeTriples.union(triples23)
+ typeTriples = typeTriples.union(triples23).setName("rdf:type + rdfs2 + rdfs3")
// 4. SubClass inheritance according to rdfs9
/*
- rdfs9 xxx rdfs:subClassOf yyy .
- zzz rdf:type xxx . zzz rdf:type yyy .
+ rdfs9 xxx rdfs:subClassOf yyy .
+ zzz rdf:type xxx . zzz rdf:type yyy .
*/
val triplesRDFS9 =
typeTriples // all rdf:type triples (s a A)
@@ -168,8 +173,9 @@ class ForwardRuleReasonerRDFS(sc: SparkContext, parallelism: Int = 2) extends Tr
subClassOfTriplesTrans,
subPropertyOfTriplesTrans,
typeTriples,
- triplesRDFS7,
+// triplesRDFS7,
triplesRDFS9))
+ .setName("rdf:type + other + rdfs2 + rdfs3 + rdfs5 + rdfs7 + rdfs9 + rdfs11")
.distinct(parallelism)
// we perform also additional rules if enabled
@@ -180,7 +186,7 @@ class ForwardRuleReasonerRDFS(sc: SparkContext, parallelism: Int = 2) extends Tr
// rdfs4a: (s p o) => (s rdf:type rdfs:Resource)
// rdfs4b: (s p o) => (o rdf:type rdfs:Resource) // filter by literals
- // TODO not sure which version is more effcient, using a FILTER + UNION, or doing it via faltMap but creating Set objects
+ // TODO not sure which version is more efficient, using a FILTER + UNION, or doing it via faltMap but creating Set objects
// val rdfs4 = allTriples.map(t => Triple.create(t.s, RDF.`type`.asNode(), RDFS.Resource.asNode()))
// .union(
// allTriples.filter(!_.getObject.isLiteral).map(t => Triple.create(t.o, RDF.`type`.asNode(), RDFS.Resource.asNode())))
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasonerRDFSDataframe.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasonerRDFSDataframe.scala
index 4a23263..d6f41f6 100644
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasonerRDFSDataframe.scala
+++ b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/ForwardRuleReasonerRDFSDataframe.scala
@@ -65,8 +65,8 @@ class ForwardRuleReasonerRDFSDataframe(session: SparkSession, parallelism: Int =
// val checkSubclass = udf((cls: String) => subClassOfMapBC.value.contains(cls))
// val makeSuperTypeTriple = udf((ind: String, cls: String) => (ind, subClassOfMapBC.value(cls)))
/*
- rdfs5 xxx rdfs:subPropertyOf yyy .
- yyy rdfs:subPropertyOf zzz . xxx rdfs:subPropertyOf zzz .
+ rdfs5 xxx rdfs:subPropertyOf yyy .
+ yyy rdfs:subPropertyOf zzz . xxx rdfs:subPropertyOf zzz .
*/
val subPropertyOfTriples = index(RDFS.subPropertyOf.asNode()) // extract rdfs:subPropertyOf triples
val subPropertyOfTriplesTrans = broadcast(computeTransitiveClosureDF(subPropertyOfTriples.as[RDFTriple]).toDF().alias("SP"))
@@ -95,8 +95,8 @@ class ForwardRuleReasonerRDFSDataframe(session: SparkSession, parallelism: Int =
// 2. SubPropertyOf inheritance according to rdfs7 is computed
/*
- rdfs7 aaa rdfs:subPropertyOf bbb .
- xxx aaa yyy . xxx bbb yyy .
+ rdfs7 aaa rdfs:subPropertyOf bbb .
+ xxx aaa yyy . xxx bbb yyy .
*/
val triplesRDFS7 =
triples // all triples (s p1 o)
@@ -117,8 +117,8 @@ class ForwardRuleReasonerRDFSDataframe(session: SparkSession, parallelism: Int =
// 3. Domain and Range inheritance according to rdfs2 and rdfs3 is computed
/*
- rdfs2 aaa rdfs:domain xxx .
- yyy aaa zzz . yyy rdf:type xxx .
+ rdfs2 aaa rdfs:domain xxx .
+ yyy aaa zzz . yyy rdf:type xxx .
*/
val domainTriples = broadcast(index(RDFS.domain.asNode()).alias("DOM"))
@@ -132,8 +132,8 @@ class ForwardRuleReasonerRDFSDataframe(session: SparkSession, parallelism: Int =
// triplesRDFS2.explain(true)
/*
- rdfs3 aaa rdfs:range xxx .
- yyy aaa zzz . zzz rdf:type xxx .
+ rdfs3 aaa rdfs:range xxx .
+ yyy aaa zzz . zzz rdf:type xxx .
*/
val rangeTriples = broadcast(index(RDFS.range.asNode()).alias("RAN"))
@@ -154,8 +154,8 @@ class ForwardRuleReasonerRDFSDataframe(session: SparkSession, parallelism: Int =
// 4. SubClass inheritance according to rdfs9
/*
- rdfs9 xxx rdfs:subClassOf yyy .
- zzz rdf:type xxx . zzz rdf:type yyy .
+ rdfs9 xxx rdfs:subClassOf yyy .
+ zzz rdf:type xxx . zzz rdf:type yyy .
*/
val tuplesRDFS9 = typeTuples
.join(subClassOfTriplesTrans, $"TYPES.${sqlSchema.objectCol}" === $"SC.${sqlSchema.subjectCol}", "inner")
@@ -289,7 +289,7 @@ object ForwardRuleReasonerRDFSDataframe {
def apply(session: SparkSession, parallelism: Int = 2): ForwardRuleReasonerRDFSDataframe = new ForwardRuleReasonerRDFSDataframe(session, parallelism)
def main(args: Array[String]): Unit = {
- import net.sansa_stack.inference.spark.data.loader.sql.rdf._
+ import net.sansa_stack.rdf.spark.io._
val parallelism = 2
@@ -320,4 +320,4 @@ object ForwardRuleReasonerRDFSDataframe {
val infGraph = ForwardRuleReasonerRDFSDataframe(session).apply(graph)
println(infGraph.size())
}
-}
\ No newline at end of file
+}
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/TransitiveReasoner.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/TransitiveReasoner.scala
index 5f0219e..03a4c79 100644
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/TransitiveReasoner.scala
+++ b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/forwardchaining/triples/TransitiveReasoner.scala
@@ -78,7 +78,7 @@ class TransitiveReasoner(sc: SparkContext, val properties: Seq[Node], val parall
private def addTransitive(triples: Set[Triple]): Set[Triple] = {
triples ++ (
for (t1 <- triples; t2 <- triples if t1.o == t2.s)
- yield Triple.create(t1.s, t1.p, t2.o))
+ yield Triple.create(t1.s, t1.p, t2.o))
}
/**
@@ -101,7 +101,7 @@ class TransitiveReasoner(sc: SparkContext, val properties: Seq[Node], val parall
/**
* Computes the transitive closure for the given predicate on an RDD of triples.
*
- * @param triples the RDD of triples
+ * @param triples the RDD of triples
* @param predicate the predicate
* @return an RDD containing the transitive closure of the triples
*/
@@ -139,7 +139,7 @@ class TransitiveReasoner(sc: SparkContext, val properties: Seq[Node], val parall
rdd.join(edgesReversed).map(x => (x._2._2, x._2._1))
}
-// tc = FixpointIteration(10)(tc, f)
+ // tc = FixpointIteration(10)(tc, f)
// the join is iterated until a fixed point is reached
var i = 1
@@ -190,14 +190,14 @@ class TransitiveReasoner(sc: SparkContext, val properties: Seq[Node], val parall
// the join is iterated until a fixed point is reached
var i = 1
- while(!deltaTC.isEmpty()) {
+ while (!deltaTC.isEmpty()) {
log.info(s"iteration $i...")
// perform the join (x, y) x (y, x), obtaining an RDD of (x=y, (y, x)) pairs,
// then project the result to obtain the new (x, y) paths.
deltaTC = deltaTC.join(edgesReversed)
- .map(x => (x._2._2, x._2._1))
- .subtract(tc).distinct().cache()
+ .map(x => (x._2._2, x._2._1))
+ .subtract(tc).distinct().cache()
// add to TC
tc = tc.union(deltaTC).cache()
@@ -217,7 +217,7 @@ class TransitiveReasoner(sc: SparkContext, val properties: Seq[Node], val parall
*/
def computeTransitiveClosure(edges: Dataset[Triple]): Dataset[Triple] = {
log.info("computing TC...")
-// implicit val myObjEncoder = org.apache.spark.sql.Encoders.kryo[RDFTriple]
+ // implicit val myObjEncoder = org.apache.spark.sql.Encoders.kryo[RDFTriple]
val spark = edges.sparkSession.sqlContext
import spark.implicits._
implicit val myObjEncoder = org.apache.spark.sql.Encoders.kryo[Triple]
@@ -242,12 +242,12 @@ class TransitiveReasoner(sc: SparkContext, val properties: Seq[Node], val parall
tc.createOrReplaceTempView("SC")
var joined = tc.as("A").join(tc.as("B"), $"A.o" === $"B.s").select("A.s", "A.p", "B.o").as[Triple]
-// var joined = tc
-// .join(edges, tc("o") === edges("s"))
-// .select(tc("s"), tc("p"), edges("o"))
-// .as[RDFTriple]
-// tc.sqlContext.
-// sql("SELECT A.subject, A.predicate, B.object FROM SC A INNER JOIN SC B ON A.object = B.subject")
+ // var joined = tc
+ // .join(edges, tc("o") === edges("s"))
+ // .select(tc("s"), tc("p"), edges("o"))
+ // .as[RDFTriple]
+ // tc.sqlContext.
+ // sql("SELECT A.subject, A.predicate, B.object FROM SC A INNER JOIN SC B ON A.object = B.subject")
// joined.explain()
// var joined = df1.join(df2, df1("object") === df2("subject"), "inner")
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/utils/PrettyDuration.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/utils/PrettyDuration.scala
index ea4c1cd..66cde94 100644
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/utils/PrettyDuration.scala
+++ b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/utils/PrettyDuration.scala
@@ -8,7 +8,7 @@ object PrettyDuration {
def pretty: String = pretty(includeNanos = false)
- /** Selects most apropriate TimeUnit for given duration and formats it accordingly */
+ /** Selects most appropriate TimeUnit for given duration and formats it accordingly */
def pretty(includeNanos: Boolean, precision: Int = 4): String = {
require(precision > 0, "precision must be > 0")
@@ -48,4 +48,4 @@ object PrettyDuration {
}
}
-}
\ No newline at end of file
+}
diff --git a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/utils/RDFSSchemaExtractor.scala b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/utils/RDFSSchemaExtractor.scala
index 8fd3335..91767cb 100644
--- a/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/utils/RDFSSchemaExtractor.scala
+++ b/sansa-inference-spark/src/main/scala/net/sansa_stack/inference/spark/utils/RDFSSchemaExtractor.scala
@@ -35,13 +35,10 @@ class RDFSSchemaExtractor() extends Logging with Serializable {
* @return the RDF graph containing only the schema triples
*/
def extract(graph: RDFGraph): RDFGraph = {
- log.info("Started schema extraction...")
val filteredTriples = graph.triples.filter(t => properties.contains(t.p))
- log.info("Finished schema extraction.")
-
- new RDFGraph(filteredTriples)
+ RDFGraph(filteredTriples)
}
/**
@@ -51,11 +48,11 @@ class RDFSSchemaExtractor() extends Logging with Serializable {
* @return the schema triples
*/
def extract(triples: RDD[Triple]): RDD[Triple] = {
- log.info("Started schema extraction...")
+// log.info("Started schema extraction...")
val filteredTriples = triples.filter(t => properties.contains(t.p))
- log.info("Finished schema extraction.")
+// log.info("Finished schema extraction.")
filteredTriples
}
diff --git a/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/DatastructureSerializationPerformanceTests.scala b/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/DatastructureSerializationPerformanceTests.scala
index cc22c96..5d969f9 100644
--- a/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/DatastructureSerializationPerformanceTests.scala
+++ b/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/DatastructureSerializationPerformanceTests.scala
@@ -1,11 +1,13 @@
package net.sansa_stack.inference.spark
-import net.sansa_stack.inference.utils.{NTriplesStringToJenaTriple, NTriplesStringToRDFTriple}
import org.apache.jena.graph.{Node, Triple}
import org.apache.spark.SparkConf
-import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerStageCompleted}
+import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd}
import org.apache.spark.sql.{Encoder, Encoders, Row, SparkSession}
-//import org.apache.spark.groupon.metrics.{SparkMeter, SparkTimer, UserMetricsSystem}
+
+import net.sansa_stack.inference.utils.{NTriplesStringToJenaTriple, NTriplesStringToRDFTriple}
+import net.sansa_stack.rdf.spark.io.NTripleReader
+// import org.apache.spark.groupon.metrics.{SparkMeter, SparkTimer, UserMetricsSystem}
import scala.reflect.ClassTag
@@ -30,7 +32,7 @@ object DatastructureSerializationPerformanceTests {
conf.registerKryoClasses(Array(classOf[org.apache.jena.graph.Triple], classOf[org.apache.jena.graph.Node]))
conf.set("spark.extraListeners", "net.sansa_stack.inference.spark.utils.CustomSparkListener")
- val parallelism = 4
+ val parallelism = 20
class JobListener extends SparkListener {
override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
@@ -40,7 +42,7 @@ object DatastructureSerializationPerformanceTests {
// the SPARK config
val session = SparkSession.builder
- .appName(s"SPARK RDFS Reasoning")
+ .appName(s"RDF Triple Encoder Performance")
.master("local[4]")
.config("spark.eventLog.enabled", "true")
.config("spark.hadoop.validateOutputSpecs", "false") // override output files
@@ -74,10 +76,8 @@ object DatastructureSerializationPerformanceTests {
.getOrCreate()
- def loadAndDictinctJena(path: String): Unit = {
- val triples = session.sparkContext
- .textFile(path, 4) // read the text file
- .map(new NTriplesStringToJenaTriple())
+ def loadAndDistinctJena(path: String): Unit = {
+ val triples = NTripleReader.load(session, path)
triples.cache()
@@ -88,11 +88,12 @@ object DatastructureSerializationPerformanceTests {
val pair = triples.map(t => (t.getSubject, (t.getPredicate, t.getObject))) // map to PairRDD
val joinCount = pair.join(pair).count()
- logger.info(distinctCount)
- logger.info(joinCount)
+ logger.info("Jena RDD[Triple]")
+ logger.info(s"#triples:$distinctCount")
+ logger.info(s"#joined triples(s-s):$joinCount")
}
- def loadAndDictinctPlain(path: String): Unit = {
+ def loadAndDistinctPlain(path: String): Unit = {
val triples = session.sparkContext
.textFile(path, 4) // read the text file
.flatMap(line => new NTriplesStringToRDFTriple().apply(line))
@@ -124,10 +125,9 @@ object DatastructureSerializationPerformanceTests {
implicit def tuple3[A1, A2, A3](implicit e1: Encoder[A1], e2: Encoder[A2], e3: Encoder[A3]): Encoder[(A1, A2, A3)] =
Encoders.tuple[A1, A2, A3](e1, e2, e3)
- val triples = session.sparkContext
- .textFile(path, 4) // read the text file
- .map(new NTriplesStringToJenaTriple())
- .map(t => (t.getSubject, t.getPredicate, t.getObject))
+ val triplesRDD = NTripleReader.load(session, path)
+
+ val tripleNodesRDD = triplesRDD.map(t => (t.getSubject, t.getPredicate, t.getObject))
val conv = new NTriplesStringToJenaTriple()
var tripleDS =
@@ -136,29 +136,38 @@ object DatastructureSerializationPerformanceTests {
// val t = conv.apply(row.getString(0))
// (t.getSubject, t.getPredicate, t.getObject)
// })
- session.createDataset(triples)
+ session.createDataset(tripleNodesRDD)
.toDF("s", "p", "o")
.as[JenaTripleEncoded]
+ tripleDS.printSchema()
tripleDS.cache()
+ // show 10 triples
+ tripleDS.show()
+
// DISTINCT and COUNT
val distinctCount = tripleDS.distinct().count()
- // self JOIN on subject and COUNT
- val joinCount = tripleDS.alias("A").join(tripleDS.alias("B"), $"A.s" === $"B.s", "inner").count()
- logger.info(distinctCount)
- logger.info(joinCount)
+ // self JOIN on subject and COUNT
+ val triplesA = tripleDS.alias("A")
+ val triplesB = tripleDS.alias("B")
+ val triplesJoined = triplesA.joinWith(triplesB, $"A.s" === $"B.s")
+ val joinCount = triplesJoined.count()
+
+ logger.info("DataFrame[(Node, Node, Node)]")
+ logger.info(s"#triples:$distinctCount")
+ logger.info(s"#joined triples(s-s):$joinCount")
}
def main(args: Array[String]): Unit = {
val path = args(0)
-
- loadAndDictinctJena(path)
-
- loadAndDictinctPlain(path)
+//
+// loadAndDistinctJena(path)
+//
+// loadAndDistinctPlain(path)
loadAndDistinctDatasetJena(path)
diff --git a/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/GenericDataframeVsGenericNativeExperiments.scala b/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/GenericDataframeVsGenericNativeExperiments.scala
index 21a4501..11a3894 100644
--- a/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/GenericDataframeVsGenericNativeExperiments.scala
+++ b/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/GenericDataframeVsGenericNativeExperiments.scala
@@ -25,7 +25,7 @@ object GenericDataframeVsGenericNativeExperiments {
.appName("GenericDataframe-Vs-GenericNative-Experiments")
.master("local[4]")
.config("spark.eventLog.enabled", "true")
- .config("spark.hadoop.validateOutputSpecs", "false") //override output files
+ .config("spark.hadoop.validateOutputSpecs", "false") // override output files
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.config("spark.default.parallelism", "4")
.config("spark.sql.shuffle.partitions", "8")
@@ -47,7 +47,7 @@ object GenericDataframeVsGenericNativeExperiments {
session = sessionBuilder.appName("generic-rdd").getOrCreate()
// load triples from disk
- var graph = RDFGraphLoader.loadFromDiskAsRDD(session, args(0), 4)//generateData(1)
+ var graph = RDFGraphLoader.loadFromDiskAsRDD(session, args(0), 4)// generateData(1)
val infGraphNative = native(graph)
diff --git a/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/QueryLayerIntegration.scala b/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/QueryLayerIntegration.scala
deleted file mode 100644
index b073980..0000000
--- a/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/QueryLayerIntegration.scala
+++ /dev/null
@@ -1,51 +0,0 @@
-package net.sansa_stack.inference.spark
-
-/**
- * @author Lorenz Buehmann
- */
-
-object QueryLayerIntegration {
-/* def main(args: Array[String]): Unit = {
- val tempDirStr = System.getProperty("java.io.tmpdir")
- if(tempDirStr == null) {
- throw new RuntimeException("Could not obtain temporary directory")
- }
- val sparkEventsDir = new File(tempDirStr + "/spark-events")
- if(!sparkEventsDir.exists()) {
- sparkEventsDir.mkdirs()
- }
-
- val sparkSession = SparkSession.builder
- .master("local")
- .appName("spark session example")
- .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
- //.config("spark.kryo.registrationRequired", "true")
- .config("spark.eventLog.enabled", "true")
- .config("spark.kryo.registrator", String.join(", ",
- "net.sansa_stack.rdf.spark.io.JenaKryoRegistrator",
- "net.sansa_stack.query.spark.sparqlify.KryoRegistratorSparqlify"
- ))
- .config("spark.default.parallelism", "4")
- .config("spark.sql.shuffle.partitions", "4")
- .getOrCreate()
-
- val triplesString =
- """ "Guy De" .
- | .
- | .
- | .
- | "Charles"@en .
- | .""".stripMargin
-
- val it = RDFDataMgr.createIteratorTriples(IOUtils.toInputStream(triplesString, "UTF-8"), Lang.NTRIPLES, "http://example.org/").asScala.toSeq
- //it.foreach { x => println("GOT: " + (if(x.getObject.isLiteral) x.getObject.getLiteralLanguage else "-")) }
- val graphRdd = sparkSession.sparkContext.parallelize(it)
-
- //val map = graphRdd.partitionGraphByPredicates
- val partitions = RdfPartitionUtilsSpark.partitionGraph(graphRdd, RdfPartitionerDefault)
-
- partitions.foreach(p => println(p._1))
- }
-
-*/
-}
diff --git a/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/loader/RDFLoadingTests.scala b/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/loader/RDFLoadingTests.scala
index 7c18f8b..9a13a2a 100644
--- a/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/loader/RDFLoadingTests.scala
+++ b/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/loader/RDFLoadingTests.scala
@@ -5,13 +5,13 @@ import org.apache.jena.riot.Lang
import org.scalatest.FunSuite
/**
- * Tests for loading triples from either N-Triples are Turtle files into a DataFrame.
+ * Tests for loading triples from either N-Triples or Turtle files into a DataFrame.
*
* @author Lorenz Buehmann
*/
class RDFLoadingTests extends FunSuite with DataFrameSuiteBase {
- import net.sansa_stack.inference.spark.data.loader.sql.rdf._
+ import net.sansa_stack.rdf.spark.io._
test("loading N-Triples file into DataFrame with REGEX parsing mode should result in 9 triples") {
val sqlCtx = sqlContext
diff --git a/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/rules/BroadcastVsRddRuleProcessingExperiments.scala b/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/rules/BroadcastVsRddRuleProcessingExperiments.scala
index e3e13f4..c59011b 100644
--- a/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/rules/BroadcastVsRddRuleProcessingExperiments.scala
+++ b/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/rules/BroadcastVsRddRuleProcessingExperiments.scala
@@ -85,8 +85,8 @@ object BroadcastVsRddRuleProcessingExperiments extends Profiler{
}
/*
- rdfs7 aaa rdfs:subPropertyOf bbb .
- xxx aaa yyy . xxx bbb yyy .
+ rdfs7 aaa rdfs:subPropertyOf bbb .
+ xxx aaa yyy . xxx bbb yyy .
*/
def rddOnly(triples: RDD[Triple]): RDD[Triple] = {
diff --git a/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/rules/RDFGraphMaterializerTest.scala b/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/rules/RDFGraphMaterializerTest.scala
index 223df1e..01bb5b1 100644
--- a/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/rules/RDFGraphMaterializerTest.scala
+++ b/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/rules/RDFGraphMaterializerTest.scala
@@ -1,13 +1,14 @@
package net.sansa_stack.inference.spark.rules
+import scala.collection.mutable
+
+import org.apache.jena.graph.Triple
import org.apache.jena.rdf.model.ModelFactory
import org.apache.spark.{SparkConf, SparkContext}
-import scala.collection.mutable
import net.sansa_stack.inference.spark.data.model.RDFGraph
import net.sansa_stack.inference.spark.data.writer.RDFGraphWriter
import net.sansa_stack.inference.spark.forwardchaining.triples.ForwardRuleReasonerRDFS
-import org.apache.jena.graph.{Node, NodeFactory, Triple}
/**
* The class to compute the materialization of a given RDF graph.
diff --git a/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/rules/SetOfRulesTest.scala b/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/rules/SetOfRulesTest.scala
index b25d3cc..26c7bc0 100644
--- a/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/rules/SetOfRulesTest.scala
+++ b/sansa-inference-spark/src/test/scala/net/sansa_stack/inference/spark/rules/SetOfRulesTest.scala
@@ -1,17 +1,18 @@
package net.sansa_stack.inference.spark.rules
+import scala.collection.mutable
+
+import org.apache.jena.graph.Triple
+import org.apache.jena.reasoner.rulesys.Rule
+import org.apache.jena.vocabulary.{OWL2, RDF, RDFS}
+import org.apache.spark.sql.SparkSession
+
import net.sansa_stack.inference.data.JenaOps
import net.sansa_stack.inference.spark.data.loader.RDFGraphLoader
import net.sansa_stack.inference.spark.data.model.RDFGraphNative
import net.sansa_stack.inference.spark.data.writer.RDFGraphWriter
import net.sansa_stack.inference.spark.forwardchaining.triples.{ForwardRuleReasonerNaive, ForwardRuleReasonerOptimizedNative}
import net.sansa_stack.inference.utils.RuleUtils
-import org.apache.jena.graph.Triple
-import org.apache.jena.reasoner.rulesys.Rule
-import org.apache.jena.vocabulary.{OWL2, RDF, RDFS}
-import org.apache.spark.sql.SparkSession
-
-import scala.collection.mutable
/**
* A forward chaining implementation of the RDFS entailment regime.
@@ -25,7 +26,7 @@ object SetOfRulesTest {
// .master("spark://me-ThinkPad-W510:7077")
.master("local[4]")
.config("spark.eventLog.enabled", "true")
- .config("spark.hadoop.validateOutputSpecs", "false") //override output files
+ .config("spark.hadoop.validateOutputSpecs", "false") // override output files
.config("spark.default.parallelism", "4")
.config("spark.sql.shuffle.partitions", "8")
// .config("spark.jars", "/home/me/work/projects/scala/Spark-Sem-I/target/inference-spark-0.1-SNAPSHOT.jar")
@@ -48,7 +49,7 @@ object SetOfRulesTest {
val numberOfTriples = graph.size()
println("#Triples:" + numberOfTriples)
- val rules = RuleUtils.load("rdfs-simple.rules")//.filter(r => ruleNames.contains(r.getName))
+ val rules = RuleUtils.load("rdfs-simple.rules")// .filter(r => ruleNames.contains(r.getName))
// runNaive(graph, rules)
// runNative(graph, rules)
diff --git a/sansa-inference-tests/pom.xml b/sansa-inference-tests/pom.xml
index 9e746da..795ada6 100644
--- a/sansa-inference-tests/pom.xml
+++ b/sansa-inference-tests/pom.xml
@@ -4,12 +4,12 @@
sansa-inference-parent_2.11
net.sansa-stack
- 0.3.0
+ 0.4.0
../pom.xml
net.sansa-stack
sansa-inference-tests_${scala.binary.version}
- 0.3.0
+ 0.4.0
Inference API - Tests
Contains common data and utils for inference API testing
@@ -54,6 +54,11 @@
org.scalatest
scalatest_${scala.binary.version}
+
+
+ com.typesafe.scala-logging
+ scala-logging_${scala.binary.version}
+
diff --git a/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/ConformanceTestBase.scala b/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/ConformanceTestBase.scala
index c770b73..7c37c7a 100644
--- a/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/ConformanceTestBase.scala
+++ b/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/ConformanceTestBase.scala
@@ -1,17 +1,18 @@
package net.sansa_stack.test.conformance
-import java.io.File
+import java.io.{File, StringWriter}
+import java.nio.file.{Path, Paths}
+import net.sansa_stack.inference.data.{RDF, RDFOps}
import org.apache.jena.rdf.model.Model
-import org.junit.runner.RunWith
-import net.sansa_stack.inference.data.{RDF, RDFOps, RDFTriple}
import org.apache.jena.shared.PrefixMapping
-import org.apache.jena.sparql.util.{FmtUtils, PrefixMapping2}
+import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{BeforeAndAfterAll, FlatSpec}
-
import scala.collection.mutable
+import net.sansa_stack.test.conformance.TestCases.getClass
+
/**
* The class is to test the conformance of each materialization rule of RDFS(simple) entailment.
*
@@ -21,39 +22,46 @@ import scala.collection.mutable
@RunWith(classOf[JUnitRunner])
abstract class ConformanceTestBase[Rdf <: RDF](val rdfOps: RDFOps[Rdf]) extends FlatSpec with BeforeAndAfterAll {
+ val logger = com.typesafe.scalalogging.Logger("ConformanceTestBase")
+
behavior of ""
// the test case IDs
def testCaseIds: Set[String]
- // the base directory of the test cases
- def testsCasesFolder: File
+ def testsCasesFolder: String = testCasesPath // this.getClass.getClassLoader.getResource(testCasesPath).getPath
+// def testsCasesFolder: File = null // new File(this.getClass.getClassLoader.getResource(testCasesPath).getPath)
+
+ def testCasesPath: String
- val pm = PrefixMapping.Factory.create()
+ private val pm = PrefixMapping.Factory.create()
.setNsPrefix("ex", "http://www.example.org#")
.setNsPrefix("", "http://www.example.org#")
.withDefaultMappings(PrefixMapping.Standard)
// load the test cases
- val testCases = TestCases.loadTestCases(testsCasesFolder).filter(t => testCaseIds.contains(t.id))
-
- testCases.foreach{testCase =>
- println(testCase.id)
+ lazy val testCases = TestCases.loadTestCasesJar(testsCasesFolder, testCaseIds)
+ // scalastyle:off println
+ testCases.foreach { testCase =>
testCase.id should "produce the same graph" in {
val triples = new mutable.HashSet[Rdf#Triple]()
// convert to internal triples
val iterator = testCase.inputGraph.listStatements()
- while(iterator.hasNext) {
+ while (iterator.hasNext) {
val st = iterator.next()
triples.add(
rdfOps.makeTriple(
rdfOps.makeUri(st.getSubject.toString),
rdfOps.makeUri(st.getPredicate.toString),
- if(st.getObject.isLiteral)
- rdfOps.makeLiteral(st.getObject.asLiteral().getLexicalForm, rdfOps.makeUri(st.getObject.asLiteral().getDatatypeURI))
- else rdfOps.makeUri(st.getObject.toString)))
+ if (st.getObject.isLiteral) {
+ rdfOps.makeLiteral(st.getObject.asLiteral().getLexicalForm, rdfOps.makeUri(st.getObject.asLiteral().getDatatypeURI))
+ } else {
+ rdfOps.makeUri(st.getObject.toString)
+ }
+ )
+ )
}
// compute inferred graph
@@ -63,14 +71,20 @@ abstract class ConformanceTestBase[Rdf <: RDF](val rdfOps: RDFOps[Rdf]) extends
// remove the input triples such that we can compare only the conclusion graph
inferredModel.remove(testCase.inputGraph)
- println("#" * 80 + "\ninput:")
- testCase.inputGraph.write(System.out, "TURTLE")
+ logger.whenDebugEnabled {
+ println("#" * 80 + "\ninput:")
+ testCase.inputGraph.write(System.out, "TURTLE")
+ }
- println("#" * 80 + "\nexpected output:")
- testCase.outputGraph.write(System.out, "TURTLE")
+ logger.whenDebugEnabled {
+ println("#" * 80 + "\nexpected output:")
+ testCase.outputGraph.write(System.out, "TURTLE")
+ }
- println("#" * 80 + "\ngot output:")
- inferredModel.write(System.out, "TURTLE")
+ logger.whenDebugEnabled {
+ println("#" * 80 + "\ngot output:")
+ inferredModel.write(System.out, "TURTLE")
+ }
// compare models, i.e. the inferred model should contain exactly the triples of the conclusion graph
val correctOutput = inferredModel.containsAll(testCase.outputGraph)
@@ -82,5 +96,4 @@ abstract class ConformanceTestBase[Rdf <: RDF](val rdfOps: RDFOps[Rdf]) extends
}
def computeInferredModel(triples: mutable.HashSet[Rdf#Triple]): Model
-
}
diff --git a/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/OWLHorstConformanceTestBase.scala b/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/OWLHorstConformanceTestBase.scala
index dd05908..6e7cc48 100644
--- a/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/OWLHorstConformanceTestBase.scala
+++ b/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/OWLHorstConformanceTestBase.scala
@@ -1,8 +1,6 @@
package net.sansa_stack.test.conformance
-import java.io.File
-
-import net.sansa_stack.inference.data.{JenaOps, RDF, RDFOps}
+import net.sansa_stack.inference.data.{RDF, RDFOps}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@@ -18,6 +16,8 @@ abstract class OWLHorstConformanceTestBase[Rdf <: RDF](override val rdfOps: RDFO
behavior of "conformance of OWL Horst entailment rules"
+ override def testCasesPath: String = "data/conformance/owl2rl"
+
override def testCaseIds: Set[String] = Set(
"rdfbased-sem-rdfs-domain-cond",
"rdfbased-sem-rdfs-range-cond",
@@ -40,6 +40,4 @@ abstract class OWLHorstConformanceTestBase[Rdf <: RDF](override val rdfOps: RDFO
"rdfbased-sem-restrict-somevalues-inst-subj",
"rdfbased-sem-restrict-allvalues-inst-obj"
)
-
- override def testsCasesFolder: File = new File(this.getClass.getClassLoader.getResource("data/conformance/owl2rl").getPath)
}
diff --git a/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/RDFSConformanceTestBase.scala b/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/RDFSConformanceTestBase.scala
index 0c79d6b..df2a03e 100644
--- a/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/RDFSConformanceTestBase.scala
+++ b/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/RDFSConformanceTestBase.scala
@@ -1,8 +1,6 @@
package net.sansa_stack.test.conformance
-import java.io.File
-
-import net.sansa_stack.inference.data.{Jena, RDF, RDFOps}
+import net.sansa_stack.inference.data.{RDF, RDFOps}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@@ -18,6 +16,8 @@ abstract class RDFSConformanceTestBase[Rdf <: RDF](override val rdfOps: RDFOps[R
behavior of "conformance of RDFS(simple) entailment rules"
+ override def testCasesPath: String = "data/conformance/rdfs"
+
override def testCaseIds: Set[String] = Set(
"rdfbased-sem-rdfs-domain-cond",
"rdfbased-sem-rdfs-range-cond",
@@ -25,6 +25,4 @@ abstract class RDFSConformanceTestBase[Rdf <: RDF](override val rdfOps: RDFOps[R
"rdfbased-sem-rdfs-subclass-trans",
"rdfbased-sem-rdfs-subprop-cond",
"rdfbased-sem-rdfs-subprop-trans")
-
- override def testsCasesFolder: File = new File(this.getClass.getClassLoader.getResource("data/conformance/rdfs").getPath)
}
diff --git a/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/TestCase.scala b/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/TestCase.scala
index 449787d..3993976 100644
--- a/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/TestCase.scala
+++ b/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/TestCase.scala
@@ -7,6 +7,4 @@ import org.apache.jena.rdf.model.Model
*
* @author Lorenz Buehmann
*/
-case class TestCase (id: String, description: String, testCaseType: String, inputGraph: Model, outputGraph: Model){
-
-}
+case class TestCase(id: String, description: String, testCaseType: String, inputGraph: Model, outputGraph: Model) {}
diff --git a/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/TestCases.scala b/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/TestCases.scala
index a0705f8..a1b9df5 100644
--- a/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/TestCases.scala
+++ b/sansa-inference-tests/src/test/scala/net/sansa_stack/test/conformance/TestCases.scala
@@ -1,12 +1,16 @@
package net.sansa_stack.test.conformance
import java.io.File
+import java.nio.file.{FileSystem, Path}
-import org.apache.jena.riot.RDFDataMgr
-
+import org.apache.jena.riot.{Lang, RDFDataMgr}
import scala.collection.mutable.ListBuffer
import scala.xml.XML
+import org.apache.commons.io.IOUtils
+import org.apache.jena.rdf.model.ModelFactory
+import org.scalatest.path
+
/**
* Test cases loader.
*
@@ -14,6 +18,10 @@ import scala.xml.XML
*/
object TestCases {
+ val logger = com.typesafe.scalalogging.Logger("TestCases")
+
+ var fs: FileSystem = null
+
/**
* Loads test cases from the given root folder.
*
@@ -21,9 +29,12 @@ object TestCases {
* @return test cases
*/
def loadTestCases(directory: File, ids: Set[String] = Set.empty): Seq[TestCase] = {
+ println(s"loading test cases from ${directory.getAbsolutePath}...")
val testCases = new ListBuffer[TestCase]()
+ println(directory)
+
directory.listFiles().filter(f => f.isDirectory && (ids.isEmpty || ids.contains(f.getName))).foreach { subDirectory =>
// the files in the directory
@@ -45,7 +56,7 @@ object TestCases {
val entailmentType = (metadata \\ "entry").filter(n => n.attribute("key").get.text == "testcase.type").text
// currently we support only entailment test cases
- if(entailmentType == "POSITIVE_ENTAILMENT") {
+ if (entailmentType == "POSITIVE_ENTAILMENT") {
// load input data
val inputGraph = RDFDataMgr.loadModel(files.filter(_.getName.endsWith(".premisegraph.ttl")).head.getPath)
@@ -54,8 +65,94 @@ object TestCases {
testCases += TestCase(id, description, entailmentType, inputGraph, outputGraph)
}
+
}
+ println(s"loaded ${testCases.size} test cases")
testCases
}
+
+ /**
+ * Loads test cases from the given root folder.
+ *
+ * @param directory the root folder containing sub-folders for each test case
+ * @return test cases
+ */
+ def loadTestCasesJar(directory: String, ids: Set[String] = Set.empty): Seq[TestCase] = {
+ logger.info(s"loading test cases from ${directory}...")
+
+ val testCases = new ListBuffer[TestCase]()
+
+ listFiles(directory).filter(f => ids.isEmpty || ids.contains(f.getFileName.toString.replace("/", ""))).map { p =>
+
+ // the files in the directory
+ val files = listFiles(
+ if (p.toUri.getScheme == "jar") p.toString.substring(1) else p.toString, true)
+
+ // get the metadata file
+ val metadataFile = files.filter(_.toString.endsWith(".metadata.properties")).head
+
+ // load metadata XML
+ val metadata = XML.load(metadataFile.toUri.toURL.openStream())
+
+ // id
+ val id = (metadata \\ "entry").filter(n => n.attribute("key").get.text == "testcase.id").text
+
+ // description
+ val description = (metadata \\ "entry").filter(n => n.attribute("key").get.text == "testcase.description").text
+
+ // test case type
+ val entailmentType = (metadata \\ "entry").filter(n => n.attribute("key").get.text == "testcase.type").text
+
+ // currently we support only entailment test cases
+ if (entailmentType == "POSITIVE_ENTAILMENT") {
+ // load input data
+
+ val inputGraph = ModelFactory.createDefaultModel()
+ inputGraph.read(files.filter(_.toString.endsWith(".premisegraph.ttl")).head.toUri.toURL.openStream(), null, "Turtle")
+
+ // load output data
+ val outputGraph = ModelFactory.createDefaultModel()
+ outputGraph.read(files.filter(_.toString.endsWith(".conclusiongraph.ttl")).head.toUri.toURL.openStream(), null, "Turtle")
+
+ testCases += TestCase(id, description, entailmentType, inputGraph, outputGraph)
+ }
+ }
+// directory.listFiles().filter(f => f.isDirectory && (ids.isEmpty || ids.contains(f.getName))).foreach { subDirectory =>
+
+
+ println(s"loaded ${testCases.size} test cases")
+
+ if(fs != null) fs.close()
+
+ testCases
+ }
+
+ private def listFiles(path: String, subDir: Boolean = false): Seq[Path] = {
+ import java.nio.file.FileSystems
+ import java.nio.file.Files
+ import java.nio.file.Paths
+ import java.util.Collections
+
+// println(s"path: $path")
+ val uri = if (path.startsWith("/")) new File(path).toURI else classOf[TestCase].getClassLoader.getResource(path).toURI
+// println(s"uri: $uri")
+ var myPath: Path = null
+ if (uri.getScheme == "jar" && !subDir) {
+ fs = FileSystems.newFileSystem(uri, Collections.emptyMap[String, Any])
+ myPath = fs.getPath(path)
+ }
+ else myPath = Paths.get(uri)
+ val walk = Files.walk(myPath, 1)
+ val it = walk.iterator
+ var files = Seq[Path]()
+ while ({it.hasNext}) {
+ val subPath = it.next()
+ if(!subPath.equals(myPath)) {
+ files :+= subPath
+ }
+ }
+
+ files
+ }
}
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 67ad459..f9218e2 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -116,7 +116,7 @@ This file is divided into 3 sections:
-
+
@@ -142,13 +142,13 @@ This file is divided into 3 sections:
-
- ^println$
-
-
+
+
+
+
+
+
+
@VisibleForTesting
@@ -222,15 +222,15 @@ This file is divided into 3 sections:
is slower.
-
-
- java,scala,3rdParty,sansa
- javax?\..*
- scalax?\..*
- (?!net\.sansa_stack\.inference\.).*
- net\.sansa_stack\.inference\..*
-
-
+
+
+
+
+
+
+
+
+