Skip to content

Commit

Permalink
Fix XML root minting (see #386)
Browse files Browse the repository at this point in the history
Delegate root minting to FacadeXNodeBuilder (see #380)
Some code cleanup #373
  • Loading branch information
luigi-asprino committed Jun 21, 2023
1 parent f486f82 commit d382c0c
Show file tree
Hide file tree
Showing 27 changed files with 233 additions and 274 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,21 +44,20 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws
logger.warn("No location provided");
return;
}
String root = Triplifier.getRootArgument(properties);
String dataSourceId = "";
String matches = properties.getProperty(ZipTriplifier.MATCHES, ".*");

logger.trace("Matches {}", matches);

builder.addRoot(dataSourceId, root);
builder.addRoot(dataSourceId);

try {
Path path = Paths.get(url.toURI());
AtomicInteger i = new AtomicInteger(1);
Files.walk(path).forEach(p -> {
logger.trace("{} matches? {}", p.toString(), path.toString().matches(matches));
if (p.toString().matches(matches)) {
builder.addValue(dataSourceId, root, i.getAndIncrement(), p.toUri().toString());
builder.addValue(dataSourceId, builder.getRoot(dataSourceId), i.getAndIncrement(), p.toUri().toString());
}
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,13 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws
return;
}
Charset charset = Triplifier.getCharsetArgument(properties);
String root = Triplifier.getRootArgument(properties);
String dataSourceId = "";
String matches = properties.getProperty(ZipTriplifier.MATCHES, ".*");

logger.trace("Matches {}", matches);

// Graph g = GraphFactory.createDefaultGraph();
builder.addRoot(dataSourceId, root);
builder.addRoot(dataSourceId);

try {
TarArchiveInputStream debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory()
Expand All @@ -61,7 +60,7 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws
while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) {

if (entry.getName().matches(matches)) {
builder.addValue(dataSourceId, root, i, entry.getName());
builder.addValue(dataSourceId, builder.getRoot(dataSourceId), i, entry.getName());
i++;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,18 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws
logger.warn("No location provided");
return;
}
String root = Triplifier.getRootArgument(properties);
String dataSourceId = "";
Charset charset = Triplifier.getCharsetArgument(properties);
String matches = properties.getProperty(MATCHES, ".*");

builder.addRoot(dataSourceId, root);
builder.addRoot(dataSourceId);

ZipInputStream zis = new ZipInputStream(url.openStream(), charset);
ZipEntry ze;
int i = 1;
while ((ze = zis.getNextEntry()) != null) {
if (ze.getName().matches(matches)) {
builder.addValue(dataSourceId, root, i, NodeFactory.createLiteral(ze.getName()));
builder.addValue(dataSourceId, builder.getRoot(dataSourceId), i, NodeFactory.createLiteral(ze.getName()));
i++;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,10 @@ public BibtexTriplifier() {
@Override
public void triplify(Properties properties, FacadeXGraphBuilder builder) throws IOException {

String root = Triplifier.getRootArgument(properties);
String dataSourceId = "";
String namespace = PropertyUtils.getStringProperty(properties, IRIArgument.NAMESPACE);

builder.addRoot(dataSourceId, root);
builder.addRoot(dataSourceId);

try (InputStream is = Triplifier.getInputStream(properties)){

Expand All @@ -57,13 +56,13 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws
BibTeXDatabase bibDB = bibtexParser.parse(reader);
AtomicInteger count = new AtomicInteger();
bibDB.getEntries().forEach((key, entry) -> {
String containerIdChild = root + key;
String containerIdChild = builder.getRoot(dataSourceId).concat(key.toString());
try {
builder.addType(dataSourceId, containerIdChild, new URI(namespace + entry.getType().toString()));
} catch (URISyntaxException e) {
logger.error("",e);
}
builder.addContainer(dataSourceId, root, count.incrementAndGet(), containerIdChild);
builder.addContainer(dataSourceId, builder.getRoot(dataSourceId), count.incrementAndGet(), containerIdChild);
entry.getFields().forEach((keyField, valueField) -> {
builder.addValue(dataSourceId, containerIdChild, keyField.toString(), valueField.toUserString());
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws
logger.warn("Using default encoding (Base64)");
}

String root = Triplifier.getRootArgument(properties);
String dataSourceId = "";
// Charset charset = getCharsetArgument(properties);
// boolean blank_nodes = Triplifier.getBlankNodeArgument(properties);
Expand All @@ -81,9 +80,9 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws
break;
}
// Add root
builder.addRoot(dataSourceId, root);
builder.addRoot(dataSourceId);
// Add content
builder.addValue(dataSourceId, root, 1, NodeFactory.createLiteralByValue(value, XSDDatatype.XSDbase64Binary));
builder.addValue(dataSourceId, builder.getRoot(dataSourceId), 1, NodeFactory.createLiteralByValue(value, XSDDatatype.XSDbase64Binary));
}

private byte[] downloadUrl(URL toDownload) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,11 @@ public class CSVSlice implements Slice<CSVRecord> {
private CSVRecord record;
private int iteration;
private String dataSourceId;
private String rootId;
LinkedHashMap<Integer, String> headers;

public static CSVSlice makeSlice(CSVRecord record, int iteration, String dataSourceId, String rootId, LinkedHashMap<Integer, String> headers){
public static CSVSlice makeSlice(CSVRecord record, int iteration, String dataSourceId, LinkedHashMap<Integer, String> headers){
CSVSlice r = new CSVSlice();
r.dataSourceId = dataSourceId;
r.rootId = rootId;
r.iteration = iteration;
r.record = record;
r.headers = headers;
Expand All @@ -53,10 +51,6 @@ public String getDatasourceId() {
return dataSourceId;
}

@Override
public String getRootId() {
return rootId;
}

public LinkedHashMap<Integer, String> getHeaders(){
return this.headers;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,12 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws
// return;

CSVFormat format = buildFormat(properties);
String root = Triplifier.getRootArgument(properties);
Charset charset = Triplifier.getCharsetArgument(properties);

String dataSourceId = ""; // there is always 1 data source id

// Add type Root
builder.addRoot(dataSourceId, root);
builder.addRoot(dataSourceId);

try (InputStream is = Triplifier.getInputStream(properties);){
Reader in = new InputStreamReader(new BOMInputStream(is), charset);
Expand All @@ -145,7 +144,7 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws
log.debug("current row num: {}", rown);
}
CSVRecord record = recordIterator.next();
processRow(rown, dataSourceId, root, record, headers_map, builder);
processRow(rown, dataSourceId, builder.getRoot(dataSourceId), record, headers_map, builder);
}
log.debug("{} records", rown);
} catch (IllegalArgumentException e) {
Expand Down Expand Up @@ -230,7 +229,7 @@ public boolean hasNext() {
public Slice next() {
rown++;
log.trace("next slice: {}", rown);
return CSVSlice.makeSlice(recordIterator.next(), rown, dataSourceId, root, headers_map);
return CSVSlice.makeSlice(recordIterator.next(), rown, dataSourceId, headers_map);
}
};
}
Expand All @@ -240,7 +239,7 @@ public Slice next() {
@Override
public void triplify(Slice slice, Properties p, FacadeXGraphBuilder builder) {
CSVSlice csvo = (CSVSlice) slice;
builder.addRoot(csvo.getDatasourceId(), csvo.getRootId());
processRow(csvo.iteration(), csvo.getDatasourceId(), csvo.getRootId(), csvo.get(), csvo.getHeaders(), builder);
builder.addRoot(csvo.getDatasourceId());
processRow(csvo.iteration(), csvo.getDatasourceId(), builder.getRoot(csvo.getDatasourceId()), csvo.get(), csvo.getHeaders(), builder);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,18 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws
if (url == null)
return;

String root = Triplifier.getRootArgument(properties);
String dataSourceId = "";
String namespace = PropertyUtils.getStringProperty(properties, IRIArgument.NAMESPACE);
boolean mergeParagraphs = Boolean.parseBoolean(properties.getProperty(MERGE_PARAGRAPHS, "false"));
boolean headers = Boolean.parseBoolean(properties.getProperty(TABLE_HEADERS, "false"));

builder.addRoot(dataSourceId, root);
builder.addRoot(dataSourceId);

InputStream is = url.openStream();
try (XWPFDocument document = new XWPFDocument(is)) {
List<XWPFParagraph> paragraphs = document.getParagraphs();

builder.addType(dataSourceId, root, namespace + "Document");
builder.addType(dataSourceId, builder.getRoot(dataSourceId), namespace + "Document");

int count = 1;
if (!mergeParagraphs) {
Expand All @@ -83,7 +82,7 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws
}


builder.addContainer(dataSourceId, root, count, paragraphURI);
builder.addContainer(dataSourceId, builder.getRoot(dataSourceId), count, paragraphURI);
builder.addValue(dataSourceId, paragraphURI, 1, para.getText());

count++;
Expand All @@ -95,7 +94,7 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws
sb.append(para.getText());
sb.append("\n");
}
builder.addValue(dataSourceId, root, count,
builder.addValue(dataSourceId, builder.getRoot(dataSourceId), count,
NodeFactory.createLiteral(sb.toString(), XSDDatatype.XSDstring));
count++;
}
Expand All @@ -105,7 +104,7 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws
XWPFTable xwpfTable = (XWPFTable) it.next();

String tableId = namespace + "Table_" + count;
builder.addContainer(dataSourceId, root, count, tableId);
builder.addContainer(dataSourceId, builder.getRoot(dataSourceId), count, tableId);

LinkedHashMap<Integer, String> headers_map = new LinkedHashMap<Integer, String>();
int rown = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ public class HTMLTriplifier implements Triplifier {
private static final String HTML_NS = "http://www.w3.org/1999/xhtml#";
private static final String DOM_NS = "https://html.spec.whatwg.org/#";

private String root;

private static String localName(Element element) {
String tagName = element.tagName().replace(':', '|');
Expand All @@ -79,11 +78,12 @@ private static String localName(Element element) {
@Override
public void triplify(Properties properties, FacadeXGraphBuilder builder) throws IOException, TriplifierHTTPException {

this.root = Triplifier.getRootArgument(properties);
// this.root = Triplifier.getRootArgument(properties);
Charset charset = Triplifier.getCharsetArgument(properties);
boolean blank_nodes = PropertyUtils.getBooleanProperty(properties, IRIArgument.BLANK_NODES);
String namespace = PropertyUtils.getStringProperty(properties, IRIArgument.NAMESPACE);
String selector = properties.getProperty(PROPERTY_SELECTOR, ":root");
String dataSourceId = "";

log.trace(properties.toString());
if (properties.containsKey(PROPERTY_METADATA) && Boolean.parseBoolean(properties.getProperty(PROPERTY_METADATA))) {
Expand All @@ -96,7 +96,7 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws
}
}

log.trace("namespace {}\n root {}\ncharset {}\nselector {}", namespace, root, charset, selector);
log.trace("namespace {}\n root {}\ncharset {}\nselector {}", namespace, builder.getRoot(dataSourceId), charset, selector);

Document doc;
// If location is a http or https, raise exception if status is not 200
Expand All @@ -114,25 +114,25 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws

Elements elements = doc.select(selector);
String rootResourceId = null;
String dataSourceId = "";

if (elements.size() > 1) {
// Create a root container
rootResourceId = root;
builder.addRoot(dataSourceId, rootResourceId);
rootResourceId = builder.getRoot(dataSourceId);
builder.addRoot(dataSourceId);
}

int counter = 0;
for (Element element : elements) {
counter++;
String resourceId = toResourceId(element, blank_nodes);
String resourceId = toResourceId(element, blank_nodes, builder, dataSourceId);
if (elements.size() > 1) {
// link to root container
builder.addContainer(dataSourceId, rootResourceId, counter, toResourceId(element, blank_nodes));
builder.addContainer(dataSourceId, rootResourceId, counter, toResourceId(element, blank_nodes, builder, dataSourceId));
} else {
// Is root container
rootResourceId = root;
resourceId = root;
builder.addRoot(dataSourceId, rootResourceId);
rootResourceId = builder.getRoot(dataSourceId);
resourceId = builder.getRoot(dataSourceId);
builder.addRoot(dataSourceId);
}
try {
populate(builder, dataSourceId, element, blank_nodes, resourceId);
Expand Down Expand Up @@ -188,22 +188,22 @@ private void populate(FacadeXGraphBuilder builder, String dataSourceId, Element
if (child.outerHtml().trim().equals("")) continue;
counter++;
if (child instanceof Element) {
builder.addContainer(dataSourceId, resourceId, counter, toResourceId((Element) child, blank_nodes));
populate(builder, dataSourceId, (Element) child, blank_nodes, toResourceId((Element) child, blank_nodes));
builder.addContainer(dataSourceId, resourceId, counter, toResourceId((Element) child, blank_nodes, builder, dataSourceId));
populate(builder, dataSourceId, (Element) child, blank_nodes, toResourceId((Element) child, blank_nodes, builder, dataSourceId));
} else {
builder.addValue(dataSourceId, resourceId, counter, child.outerHtml());
}
}

}

private String toResourceId(Element element, boolean blankNodes) {
private String toResourceId(Element element, boolean blankNodes, FacadeXGraphBuilder builder, String dataSourceId) {
if (blankNodes) {
return Integer.toHexString(element.hashCode());
} else {
String ln = localName(element);
log.debug(ln);
return this.root.concat("/").concat(ln);
return builder.getRoot(dataSourceId).concat("/").concat(ln);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,23 @@ public void testIssue351() throws URISyntaxException, IOException {

}

/**
* See <a href="https://github.com/SPARQL-Anything/sparql.anything/issues/386">...</a>
*/
@Test
public void testIssue386() throws URISyntaxException, IOException {
Dataset ds = DatasetFactory.createGeneral();
QC.setFactory(ARQ.getContext(), FacadeX.ExecutorFactory);
Query query;
String queryStr = IOUtils.toString(Objects.requireNonNull(getClass().getClassLoader().getResource("issues/issue386.sparql")).toURI(), StandardCharsets.UTF_8);
query = QueryFactory.create(queryStr);
QueryExecution qExec = QueryExecutionFactory.create(query, ds);
ResultSet rs = qExec.execSelect();
assertTrue(rs.hasNext());
assertEquals("http://example.org/document",rs.next().get("root").asResource().getURI());
assertFalse(rs.hasNext());
}

/**
* See <a href="https://github.com/SPARQL-Anything/sparql.anything/issues/371">...</a>
*/
Expand Down
13 changes: 13 additions & 0 deletions sparql-anything-it/src/test/resources/issues/issue386.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
PREFIX fx: <http://sparql.xyz/facade-x/ns/>
SELECT ?root {

SERVICE<x-sparql-anything:> {
fx:properties fx:content "<root><child>child1</child><child>child2</child></root>" ;
fx:media-type "application/xml" ;
fx:blank-nodes false ;
fx:root "http://example.org/document" .

?root a fx:root .

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ public class JSONPathSlice implements Slice<Object> {
private Object object;
private int iteration;
private String dataSourceId;
private String rootId;

private JSONPathSlice(){}

Expand All @@ -44,17 +43,12 @@ public String getDatasourceId() {
return dataSourceId;
}

@Override
public String getRootId() {
return rootId;
}

public static JSONPathSlice makeSlice(Object object, int iteration, String rootId, String dataSourceId){
public static JSONPathSlice makeSlice(Object object, int iteration, String dataSourceId){
JSONPathSlice r = new JSONPathSlice();
r.object = object;
r.iteration = iteration;
r.dataSourceId = dataSourceId;
r.rootId = rootId;
return r;
}
}
Loading

0 comments on commit d382c0c

Please sign in to comment.