From 1bb42e9c02245d0dc5c2c6a66919b8b117aebc3b Mon Sep 17 00:00:00 2001 From: Enrico Daga Date: Fri, 4 Feb 2022 09:01:47 +0000 Subject: [PATCH] Copy test format in docs/. See #201 --- docs/README.md | 72 +++++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 30 deletions(-) diff --git a/docs/README.md b/docs/README.md index dfc4ad43..c87e8e59 100644 --- a/docs/README.md +++ b/docs/README.md @@ -9,10 +9,10 @@ SPARQL Anything is a system for Semantic Web re-engineering that allows users to ## Facade-X SPARQL Anything uses a single generic abstraction for all data source formats called Facade-X. -Facade-X is a simplistic meta-model used by sparql.anything transformers to generate RDF data from diverse data sources. +Facade-X is a simplistic meta-model used by SPARQL Anything transformers to generate RDF data from diverse data sources. Intuitively, Facade-X uses a subset of RDF as a general approach to represent the source content *as-it-is* but in RDF. -The model combines two type of elements: containers and literals. -Facade-X has always a single root container. +The model combines two types of elements: containers and literals. +Facade-X always has a single root container. Container members are a combination of key-value pairs, where keys are either RDF properties or container membership properties. Instead, values can be either RDF literals or other containers. This is a generic example of a Facade-X data object (more examples below): @@ -21,7 +21,7 @@ This is a generic example of a Facade-X data object (more examples below): @prefix fx: . @prefix xyz: . @prefix rdf: . -[] a fx:Root ; rdf:_1 [ +[] a fx:root ; rdf:_1 [ xyz:someKey "some value" ; rdf:_1 "another value with unspecified key" ; rdf:_2 [ @@ -83,6 +83,7 @@ With SPARQL Anything you can select the TV series starring "Courteney Cox" with ```sparql PREFIX xyz: PREFIX rdf: +PREFIX fx: SELECT ?seriesName WHERE { @@ -90,7 +91,7 @@ WHERE { SERVICE { ?tvSeries xyz:name ?seriesName . ?tvSeries xyz:stars ?star . - ?star ?li "Courteney Cox" . + ?star fx:anySlot "Courteney Cox" . } } @@ -106,19 +107,14 @@ and get this result without caring of transforming JSON to RDF. ## Supported Formats -Currently, the system supports the following formats: "json", "html", "xml", "csv", "yaml", "md", "bib", "bin", "png","jpeg","jpg","bmp","tiff","tif", "ico", "txt" ... but the possibilities are limitless! - +Currently, SPARQL Anything supports the following list of formats but the possibilities are limitless! By default, these formats are triplified as follows.
JSON - - -|Input|Triplification| -|---|---| -|
{
"stringArg":"stringValue",
"intArg":1,
"booleanArg":true,
"nullArg": null,
"arr":[0,1]
}
|
@prefix xyz:    <http://sparql.xyz/facade-x/data/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
[
xyz:arr [
rdf:_1 "0"^^xsd:int ;
rdf:_2 "1"^^xsd:int
] ;
xyz:booleanArg true ;
xyz:intArg "1"^^xsd:int ;
xyz:stringArg "stringValue"
] .
| - - - +Data: +
{
"stringArg":"stringValue",
"intArg":1,
"booleanArg":true,
"nullArg": null,
"arr":[0,1]
}
+Facade-X RDF: +
@prefix xyz:    <http://sparql.xyz/facade-x/data/> .
@prefix fx: <http://sparql.xyz/facade-x/ns/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
[ a fx:root ;
xyz:arr [
rdf:_1 "0"^^xsd:int ;
rdf:_2 "1"^^xsd:int
] ;
xyz:booleanArg true ;
xyz:intArg "1"^^xsd:int ;
xyz:stringArg "stringValue"
] .
@@ -126,7 +122,7 @@ By default, these formats are triplified as follows. |Input|Triplification| |---|---| -|
<html>
<head>
<title>Hello world!</title>
</head>
<body>
<p class="paragraph">Hello world</p>
</body>
</html>
|
@prefix rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xhtml: <http://www.w3.org/1999/xhtml#> .

[ a xhtml:html ;
rdf:_1 [ a xhtml:head ;
rdf:_1 [ a xhtml:title ;
rdf:_1 "Hello world!"
]
] ;
rdf:_2 [ a xhtml:body ;
rdf:_1 [ a xhtml:p ;
rdf:_1 "Hello world" ;
xhtml:class "paragraph"
]
]
] .
| +|
<html>
<head>
<title>Hello world!</title>
</head>
<body>
<p class="paragraph">Hello world</p>
</body>
</html>
|
@prefix fx:   <http://sparql.xyz/facade-x/ns/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xhtml: <http://www.w3.org/1999/xhtml#> .

[ a xhtml:html, fx:root ;
rdf:_1 [ a xhtml:head ;
rdf:_1 [ a xhtml:title ;
rdf:_1 "Hello world!"
]
] ;
rdf:_2 [ a xhtml:body ;
rdf:_1 [ a xhtml:p ;
rdf:_1 "Hello world" ;
xhtml:class "paragraph"
]
]
] .
| @@ -134,7 +130,7 @@ By default, these formats are triplified as follows. |Input|Triplification| |---|---| -|
<breakfast_menu>
<food>
<name>Belgian Waffles</name>
<price>$5.95</price>
<description>Two of our famous Belgian Waffles with plenty of real maple syrup</description>
<calories>650</calories>
</food>
<food>
<name>Strawberry Belgian Waffles</name>
<price>$7.95</price>
<description>Light Belgian waffles covered with strawberries and whipped cream</description>
<calories>900</calories>
</food>
<food>
<name>Berry-Berry Belgian Waffles</name>
<price>$8.95</price>
<description>Light Belgian waffles covered with an assortment of fresh berries and whipped cream</description>
<calories>900</calories>
</food>
</breakfast_menu>
|
@prefix xyz:    <http://sparql.xyz/facade-x/data/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

[ a xyz:breakfast_menu ;
rdf:_1 [ a xyz:food ;
rdf:_1 [ a xyz:name ;
rdf:_1 "Belgian Waffles"
] ;
rdf:_2 [ a xyz:price ;
rdf:_1 "$5.95"
] ;
rdf:_3 [ a xyz:description ;
rdf:_1 "Two of our famous Belgian Waffles with plenty of real maple syrup"
] ;
rdf:_4 [ a xyz:calories ;
rdf:_1 "650"
]
] ;
rdf:_2 [ a xyz:food ;
rdf:_1 [ a xyz:name ;
rdf:_1 "Strawberry Belgian Waffles"
] ;
rdf:_2 [ a xyz:price ;
rdf:_1 "$7.95"
] ;
rdf:_3 [ a xyz:description ;
rdf:_1 "Light Belgian waffles covered with strawberries and whipped cream"
] ;
rdf:_4 [ a xyz:calories ;
rdf:_1 "900"
]
] ;
rdf:_3 [ a xyz:food ;
rdf:_1 [ a xyz:name ;
rdf:_1 "Berry-Berry Belgian Waffles"
] ;
rdf:_2 [ a xyz:price ;
rdf:_1 "$8.95"
] ;
rdf:_3 [ a xyz:description ;
rdf:_1 "Light Belgian waffles covered with an assortment of fresh berries and whipped cream"
] ;
rdf:_4 [ a xyz:calories ;
rdf:_1 "900"
]
]
] .
| +|
<breakfast_menu>
<food>
<name>Belgian Waffles</name>
<price>$5.95</price>
<description>Two of our famous Belgian Waffles with plenty of real maple syrup</description>
<calories>650</calories>
</food>
<food>
<name>Strawberry Belgian Waffles</name>
<price>$7.95</price>
<description>Light Belgian waffles covered with strawberries and whipped cream</description>
<calories>900</calories>
</food>
<food>
<name>Berry-Berry Belgian Waffles</name>
<price>$8.95</price>
<description>Light Belgian waffles covered with an assortment of fresh berries and whipped cream</description>
<calories>900</calories>
</food>
</breakfast_menu>
|
@prefix xyz:    <http://sparql.xyz/facade-x/data/> .
@prefix fx: <http://sparql.xyz/facade-x/ns/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

[ a xyz:breakfast_menu, fx:root ;
rdf:_1 [ a xyz:food ;
rdf:_1 [ a xyz:name ;
rdf:_1 "Belgian Waffles"
] ;
rdf:_2 [ a xyz:price ;
rdf:_1 "$5.95"
] ;
rdf:_3 [ a xyz:description ;
rdf:_1 "Two of our famous Belgian Waffles with plenty of real maple syrup"
] ;
rdf:_4 [ a xyz:calories ;
rdf:_1 "650"
]
] ;
rdf:_2 [ a xyz:food ;
rdf:_1 [ a xyz:name ;
rdf:_1 "Strawberry Belgian Waffles"
] ;
rdf:_2 [ a xyz:price ;
rdf:_1 "$7.95"
] ;
rdf:_3 [ a xyz:description ;
rdf:_1 "Light Belgian waffles covered with strawberries and whipped cream"
] ;
rdf:_4 [ a xyz:calories ;
rdf:_1 "900"
]
] ;
rdf:_3 [ a xyz:food ;
rdf:_1 [ a xyz:name ;
rdf:_1 "Berry-Berry Belgian Waffles"
] ;
rdf:_2 [ a xyz:price ;
rdf:_1 "$8.95"
] ;
rdf:_3 [ a xyz:description ;
rdf:_1 "Light Belgian waffles covered with an assortment of fresh berries and whipped cream"
] ;
rdf:_4 [ a xyz:calories ;
rdf:_1 "900"
]
]
] .
| @@ -142,7 +138,7 @@ By default, these formats are triplified as follows. |Input|Triplification| |---|---| -|
laura@example.com,2070,Laura,Grey
craig@example.com,4081,Craig,Johnson
mary@example.com,9346,Mary,Jenkins
jamie@example.com,5079,Jamie,Smith
|
@prefix rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

[ rdf:_1 [ rdf:_1 "laura@example.com" ;
rdf:_2 "2070" ;
rdf:_3 "Laura" ;
rdf:_4 "Grey"
] ;
rdf:_2 [ rdf:_1 "craig@example.com" ;
rdf:_2 "4081" ;
rdf:_3 "Craig" ;
rdf:_4 "Johnson"
] ;
rdf:_3 [ rdf:_1 "mary@example.com" ;
rdf:_2 "9346" ;
rdf:_3 "Mary" ;
rdf:_4 "Jenkins"
] ;
rdf:_4 [ rdf:_1 "jamie@example.com" ;
rdf:_2 "5079" ;
rdf:_3 "Jamie" ;
rdf:_4 "Smith"
]
] .
| +|
laura@example.com,2070,Laura,Grey
craig@example.com,4081,Craig,Johnson
mary@example.com,9346,Mary,Jenkins
jamie@example.com,5079,Jamie,Smith
|
@prefix fx:   <http://sparql.xyz/facade-x/ns/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

[ a fx:root ;
rdf:_1 [ rdf:_1 "laura@example.com" ;
rdf:_2 "2070" ;
rdf:_3 "Laura" ;
rdf:_4 "Grey"
] ;
rdf:_2 [ rdf:_1 "craig@example.com" ;
rdf:_2 "4081" ;
rdf:_3 "Craig" ;
rdf:_4 "Johnson"
] ;
rdf:_3 [ rdf:_1 "mary@example.com" ;
rdf:_2 "9346" ;
rdf:_3 "Mary" ;
rdf:_4 "Jenkins"
] ;
rdf:_4 [ rdf:_1 "jamie@example.com" ;
rdf:_2 "5079" ;
rdf:_3 "Jamie" ;
rdf:_4 "Smith"
]
] .
| @@ -150,7 +146,7 @@ By default, these formats are triplified as follows. |Input|Triplification| |---|---| -|![Image example](https://raw.githubusercontent.com/ianare/exif-samples/master/jpg/Canon_40D.jpg)|
[ <http://www.w3.org/1999/02/22-rdf-syntax-ns#_1>  ""^^<http://www.w3.org/2001/XMLSchema#base64Binary> ] .
+|![Image example](https://raw.githubusercontent.com/ianare/exif-samples/master/jpg/Canon_40D.jpg)| 
[ <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://sparql.xyz/facade-x/ns/root> ; <http://www.w3.org/1999/02/22-rdf-syntax-ns#_1>  ""^^<http://www.w3.org/2001/XMLSchema#base64Binary> ] .
 
| @@ -159,7 +155,7 @@ By default, these formats are triplified as follows. |Input|Triplification| |---|---| -|
Hello World!
|
[ <http://www.w3.org/1999/02/22-rdf-syntax-ns#_1> "Hello World!" ] .
| +|
Hello World!
|
[  <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://sparql.xyz/facade-x/ns/root> ; <http://www.w3.org/1999/02/22-rdf-syntax-ns#_1> "Hello World!" ] .
| @@ -167,7 +163,7 @@ By default, these formats are triplified as follows. |Input|Triplification| |---|---| -|
archive.tar
\|__ file.csv
\|__ file.json
\|__ file.xml
|
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
@prefix fx: <http://sparql.xyz/facade-x/ns/>
[ rdf:type fx:Root
rdf:_1 "file.csv" ;
rdf:_2 "file.json" ;
rdf:_3 "file.xml" ] .
| +|
archive.tar
\|__ file.csv
\|__ file.json
\|__ file.xml
|
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
@prefix fx: <http://sparql.xyz/facade-x/ns/>
[ rdf:type fx:root
rdf:_1 "file.csv" ;
rdf:_2 "file.json" ;
rdf:_3 "file.xml" ] .
| @@ -208,7 +204,7 @@ By default, these formats are triplified as follows. |Input|Triplification| |---|---| -|
# Title
The following list of issues:

- first issue
- second issue

---
Footer paragraph.
|
@prefix fx:  .
@prefix xyz: .
@prefix rdf: .
@prefix xsd: .

[] a fx:root, xyz:Document ;
rdf:_1 [
a xyz:Heading ;
rdf:_1 "Title"^^xsd:string ;
xyz:level "1"^^xsd:int
] ;
rdf:_2 [
a xyz:Paragraph ;
rdf:_1 "The following list of issues:"^^xsd:string
] ;
rdf:_3 [
a xyz:BulletList ;
rdf:_1 [
a xyz:ListItem ;
rdf:_1 [
a xyz:Paragraph ;
rdf:_1 "first issue"^^xsd:string
]
] ;
rdf:_2 [
a xyz:ListItem ;
rdf:_1 [
a xyz:Paragraph ;
rdf:_1 "second issue"^^xsd:string
]
]
] ;
rdf:_4 [
a xyz:ThematicBreak
] ;
rdf:_5 [
a xyz:Paragraph ;
rdf:_1 "Footer paragraph."^^xsd:string
] .
| +|
# Title
The following list of issues:

- first issue
- second issue

---
Footer paragraph.
|
@prefix fx: <http://sparql.xyz/facade-x/ns/> .
@prefix xyz: <http://sparql.xyz/facade-x/data/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#>.

[] a fx:root, xyz:Document ;
rdf:_1 [
a xyz:Heading ;
rdf:_1 "Title"^^xsd:string ;
xyz:level "1"^^xsd:int
] ;
rdf:_2 [
a xyz:Paragraph ;
rdf:_1 "The following list of issues:"^^xsd:string
] ;
rdf:_3 [
a xyz:BulletList ;
rdf:_1 [
a xyz:ListItem ;
rdf:_1 [
a xyz:Paragraph ;
rdf:_1 "first issue"^^xsd:string
]
] ;
rdf:_2 [
a xyz:ListItem ;
rdf:_1 [
a xyz:Paragraph ;
rdf:_1 "second issue"^^xsd:string
]
]
] ;
rdf:_4 [
a xyz:ThematicBreak
] ;
rdf:_5 [
a xyz:Paragraph ;
rdf:_1 "Footer paragraph."^^xsd:string
] .
| @@ -216,17 +212,17 @@ By default, these formats are triplified as follows. |Input|Triplification| |---|---| -|
foo: bar
pleh: help
stuff:
foo: bar
bar: foo
|
@prefix fx:  .
@prefix xyz: .
@prefix rdf: .
@prefix xsd: .

[ a fx:root ;
xyz:foo "bar"^^xsd:string ;
xyz:pleh "help"^^xsd:string ;
xyz:stuff [
xyz:foo "bar"^^xsd:string ;
xyz:bar "foo"^^xsd:string
]
]
| +|
foo: bar
pleh: help
stuff:
foo: bar
bar: foo
|
@prefix fx: <http://sparql.xyz/facade-x/ns/> .
@prefix xyz: <http://sparql.xyz/facade-x/data/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#>.

[ a fx:root ;
xyz:foo "bar"^^xsd:string ;
xyz:pleh "help"^^xsd:string ;
xyz:stuff [
xyz:foo "bar"^^xsd:string ;
xyz:bar "foo"^^xsd:string
]
]
| ## IRI schema and triplification options -sparql.anything will act as a virtual endpoint that can be queried exactly as a remote SPARQL endpoint. +SPARQL Anything will act as a virtual endpoint that can be queried exactly as a remote SPARQL endpoint. -### Passing triplification options via SERVICE URI +### Passing triplification options via SERVICE IRI -In order to instruct the query processor to delegate the execution to facade-x, you can use the following URI-schema within SERVICE clauses. +In order to instruct the query processor to delegate the execution to SPARQL Anything, you can use the following IRI-schema within SERVICE clauses. ``` x-sparql-anything ':' ([option] ('=' [value])? ','?)+ @@ -238,7 +234,7 @@ A minimal URI that uses only the resource locator is also possible. x-sparql-anything ':' URL ``` -In this case sparql.anything guesses the data source type from the file extension. +In this case SPARQL Anything guesses the data source type from the file extension. ### Passing triplification options via Basic Graph Pattern @@ -257,7 +253,7 @@ WHERE { fx:properties fx:location "https://sparql-anything.cc/example1.json" . ?tvSeries xyz:name ?seriesName . ?tvSeries xyz:stars ?star . - ?star ?li "Courteney Cox" . + ?star fx:anySlot "Courteney Cox" . } } @@ -265,7 +261,7 @@ WHERE { Note that -1. The SERVICE URI scheme must be ``x-sparql-anything:``. +1. The SERVICE IRI scheme must be ``x-sparql-anything:``. 2. Each triplification option to pass to the engine corresponds to a triple of the Basic Graph Pattern inside the SERVICE clause. 3. Such triples must have ``fx:properties`` as subject, ``fx:[OPTION-NAME]`` as predicate, and a literal or a variable as object. @@ -283,7 +279,7 @@ WHERE { fx:properties fx:location "https://sparql-anything.cc/example1.json" . ?tvSeries xyz:name ?seriesName . ?tvSeries xyz:stars ?star . - ?star ?li "Courteney Cox" . + ?star fx:anySlot "Courteney Cox" . } } @@ -304,6 +300,10 @@ WHERE { |triplifier|It forces sparql.anything to use a specific triplifier for transforming the data source|A canonical name of a Java class|No value| |charset|The charset of the data source.|Any charset.|UTF-8| |metadata|It tells sparql.anything to extract metadata from the data source and to store it in the named graph with URI <http://sparql.xyz/facade-x/data/metadata> |true/false|false| +|ondisk|It tells sparql.anything to use an on disk graph (instead of the default in memory graph). The string should be a path to a directory where the on disk graph will be stored. Using an on disk graph is almost always slower (than using the default in memory graph) but with it you can triplify large files without running out of memory.|a path to a directory|not set| +|ondisk.reuse|When using an on disk graph, it tells sparql.anything to reuse the previous on disk graph.|true|not set| +|strategy|The execution strategy. 0 = in memory, all triples; 1 = in memory, only triples matching any of the triple patterns in the where clause|0,1|1| +|slice|The resources is sliced and the SPARQL query executed on each one of the parts. Supported by: CSV|true/false|false| \* It is mandatory to provide either the local or the content. @@ -318,6 +318,7 @@ WHERE { |html.browser.timeout|When using a browser to nagivate, it tells the browser if it spends longer than this amount of time (in milliseconds) until a load event is emitted then the operation will timeout. |any integer|30000| |html.browser.wait|When using a browser to nagivate, it tells the triplifier to wait for the specified number of seconds (after telling the browser to navigate to the page) before attempting to obtain HTML.|any integer|No Value| |html.browser.screenshot|When using a browser to nagivate, take a screenshot of the webpage (perhaps for troubleshooting) and save it here.|a file URI e.g. "file:///tmp/screenshot.png" |No Value| +|html.metadata|It tells the triplifier to extract inline RDF from HTML pages. The triples extracted will be included in the default graph. (cf. #164)|true/false|false| @@ -455,9 +456,20 @@ The system supports the following functions operating on strings that contain wo - `WordUtils.swapCase` wraps [`org.apache.commons.text.WordUtils.swapCase`](https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/WordUtils.html#swapCase(java.lang.String)) - `WordUtils.uncapitalize` wraps [`org.apache.commons.text.WordUtils.uncapitalize`](https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/WordUtils.html#uncapitalize(java.lang.String)) +The system supports the following functions operating on strings that are URLs (See #176): +- `fx:URLEncoder.encode` wraps [`java.net.URLEncoder.encode`](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/net/URLEncoder.html#encode(java.lang.String,java.lang.String)) +- `fx:URLEncoder.encode` wraps [`java.net.URLEncoder.encode`](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/net/URLDecoder.html#decode(java.lang.String,java.lang.String)) + Additional functions: - `fx:serial (?a ... ?n)` generates an incremental number using the arguments as reference counters. For example, calling `fx:serial("x")` two times will generate `1` and then `2`. Instead, calling `fx:serial(?x)` multiple times will generate sequential numbers for each value of `?x`. +- `fx:entity (?a ... ?n)` accepts a list of arguments and performs concatenation and automatic casting to string. Container membership properties (`rdf:_1`,`rdf:_2`,...) are cast to numbers and then to strings (`"1","2"`). +``` +BIND ( fx:entity ( myns:, "dummy-entity", 1) AS ?myentity) +# is equivalent to +BIND ( IRI( CONCAT ( STR (myns:), "dummy-entity", STR(1) ) AS ?myentity ) +``` +See also https://github.com/SPARQL-Anything/sparql.anything/issues/106 ## Download and Usage