From 14b7ebeff576bf3f3cbf7b115900a6e825f76c15 Mon Sep 17 00:00:00 2001 From: Carl Boettiger Date: Thu, 15 Feb 2018 21:13:05 +0000 Subject: [PATCH] Use built-in base uri mechanism from jsonld Much much faster (and simpler) than manual expansion with `@base`. Follow-up on #5 --- R/rdf.R | 12 +++++++ R/rdf_parse.R | 25 +++----------- inst/examples/profile_performance.R | 39 ++++++++++++++++++++++ tests/testthat/test-parse-serialize.R | 47 +++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 20 deletions(-) create mode 100644 inst/examples/profile_performance.R diff --git a/R/rdf.R b/R/rdf.R index d67df76..309bb04 100644 --- a/R/rdf.R +++ b/R/rdf.R @@ -25,6 +25,18 @@ #' Typical use will be simply to initialize a container to which #' the user would manually add triples using \code{\link{rdf_add}}. #' +#' Overview of configuration options +#' rdflib_storage: +#' - NULL or "memory" for in memory storage. (default) +#' - "BDB" for disk-based storage in Berkeley Database +#' rdflib_print_format: +#' - NULL or "nquads" (default) +#' - any valid serializer name: e.g. "rdfxml", "jsonld", "turtle", "ntriples" +#' rdflib_base_uri: +#' - Default base URI to use (when serializing JSON-LD only at this time) +#' default is "localhost://" +#' +#' #' #' @importClassesFrom redland World Model Storage #' @importMethodsFrom redland freeWorld freeModel freeStorage diff --git a/R/rdf_parse.R b/R/rdf_parse.R index 30d6904..7ab2131 100644 --- a/R/rdf_parse.R +++ b/R/rdf_parse.R @@ -37,8 +37,11 @@ rdf_parse <- function(doc, ## be a read-only task! if(format == "jsonld"){ tmp <- tempfile() - tmp <- add_base_uri(doc, tmp) - x <- jsonld::jsonld_to_rdf(tmp) + #tmp <- add_base_uri(doc, tmp) + x <- jsonld::jsonld_to_rdf(doc, + options = + list(base = getOption("rdflib_base_uri", "localhost://"), + format = "application/nquads")) writeLines(x, tmp) format <- "nquads" doc <- tmp @@ -56,24 +59,6 @@ rdf_parse <- function(doc, rdf } -# Whenever we convert JSON-LD to RDF we should set a @base if not set. -# https://json-ld.org/playground does this (with it's own url) -# but jsonld R package does not. -# For details, see https://github.com/cboettig/rdflib/issues/5 -# -#' @importFrom jsonld jsonld_expand jsonld_compact -add_base_uri <- function(doc, tmp = tempfile()){ - - ## Cannot assume it has context, may already be expanded - ## (e.g. from rdf_serialize). Expanding will also make - ## any preset @base context take precedence - expanded <- jsonld::jsonld_expand(doc) - base <- getOption("rdflib_base_uri", "localhost://") - context <- paste0('{"@base": "', base, '"}') - compact <- jsonld::jsonld_compact(expanded, context) - writeLines(compact, tmp) - tmp -} # rdf functions like working with local files diff --git a/inst/examples/profile_performance.R b/inst/examples/profile_performance.R new file mode 100644 index 0000000..ed9207d --- /dev/null +++ b/inst/examples/profile_performance.R @@ -0,0 +1,39 @@ + +library(nycflights13) +library(tidyverse) +library(rdflib) +source(system.file("examples/as_rdf.R", package="rdflib")) + + +## Tidyverse Style +df <- flights %>% + left_join(airlines) %>% + left_join(planes, by="tailnum") %>% + select(carrier, manufacturer, model) + +## JSON-LD approach -- 5 sec, +## ~ all rdf_parse +## ~ all of which is jsonld_expand() +profvis::profvis( + x2 <- as_rdf.list(airports) +) + + +## rdf_add approach 12 sec +## ~ all of which is rdf_add +## ~ all of which is calls to new / initialize S4 method +#3 ~ none of which is calls to the low level librdf_* C calls +profvis::profvis( +x2 <- as_rdf(airports, "faa", "x:") +) + + + +x1 <- as_rdf(airlines, "carrier", "x:") +x3 <- as_rdf(planes, "tailnum", "x:") + +system.time( + x4 <- as_rdf(flights, NULL, "x:") +) + +rdf <- c(x1,x2,x3) diff --git a/tests/testthat/test-parse-serialize.R b/tests/testthat/test-parse-serialize.R index 178e2a5..aa668b3 100644 --- a/tests/testthat/test-parse-serialize.R +++ b/tests/testthat/test-parse-serialize.R @@ -82,9 +82,56 @@ testthat::test_that("we can serialize turtle with a baseUri", { rdf_free(rdf) }) + +## JSON-LD tests with default base uri + + testthat::test_that("@id is not a URI, we should get localhost", { + ex <- '{ + "@context": "http://schema.org/", + "@id": "person_id", + "name": "Jane Doe" + }' + + rdf <- rdf_parse(ex, "jsonld") + testthat::expect_output(cat(format(rdf, "nquads")), "localhost") + rdf_free(rdf) + }) + + testthat::test_that("@id is a URI, we should not get localhost", { + ex <- '{ + "@context": "http://schema.org/", + "@id": "uri:person_id", + "name": "Jane Doe" + }' + rdf <- rdf_parse(ex, "jsonld") + testthat::expect_false(grepl("localhost", format(rdf, "nquads"))) + rdf_free(rdf) + }) + + testthat::test_that("we can alter the base URI", { + ex <- '{ + "@id": "person_id", + "schema:name": "Jane Doe" + }' + options(rdflib_base_uri = "http://example.com/") + rdf <- rdf_parse(ex, "jsonld") + testthat::expect_output(cat(format(rdf, "nquads")), "http://example.com") + rdf_free(rdf) + + + options(rdflib_base_uri = "") + rdf <- rdf_parse(ex, "jsonld") + testthat::expect_silent(cat(format(rdf, "nquads"))) + rdf_free(rdf) + + options(rdflib_base_uri = NULL) + }) + + + testthat::test_that("we can parse into an existing rdf model", { rdf1 <- rdf_parse(system.file("extdata/ex.xml", package = "rdflib"))