Skip to content

Commit

Permalink
Use built-in base uri mechanism from jsonld
Browse files Browse the repository at this point in the history
Much much faster (and simpler) than manual expansion with `@base`.  Follow-up on #5
  • Loading branch information
cboettig committed Feb 15, 2018
1 parent 55a7c1e commit 14b7ebe
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 20 deletions.
12 changes: 12 additions & 0 deletions R/rdf.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,18 @@
#' Typical use will be simply to initialize a container to which
#' the user would manually add triples using \code{\link{rdf_add}}.
#'
#' Overview of configuration options
#' rdflib_storage:
#' - NULL or "memory" for in memory storage. (default)
#' - "BDB" for disk-based storage in Berkeley Database
#' rdflib_print_format:
#' - NULL or "nquads" (default)
#' - any valid serializer name: e.g. "rdfxml", "jsonld", "turtle", "ntriples"
#' rdflib_base_uri:
#' - Default base URI to use (when serializing JSON-LD only at this time)
#' default is "localhost://"
#'
#'
#'
#' @importClassesFrom redland World Model Storage
#' @importMethodsFrom redland freeWorld freeModel freeStorage
Expand Down
25 changes: 5 additions & 20 deletions R/rdf_parse.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,11 @@ rdf_parse <- function(doc,
## be a read-only task!
if(format == "jsonld"){
tmp <- tempfile()
tmp <- add_base_uri(doc, tmp)
x <- jsonld::jsonld_to_rdf(tmp)
#tmp <- add_base_uri(doc, tmp)
x <- jsonld::jsonld_to_rdf(doc,
options =
list(base = getOption("rdflib_base_uri", "localhost://"),
format = "application/nquads"))
writeLines(x, tmp)
format <- "nquads"
doc <- tmp
Expand All @@ -56,24 +59,6 @@ rdf_parse <- function(doc,
rdf
}

# Whenever we convert JSON-LD to RDF we should set a @base if not set.
# https://json-ld.org/playground does this (with it's own url)
# but jsonld R package does not.
# For details, see https://github.com/cboettig/rdflib/issues/5
#
#' @importFrom jsonld jsonld_expand jsonld_compact
add_base_uri <- function(doc, tmp = tempfile()){

## Cannot assume it has context, may already be expanded
## (e.g. from rdf_serialize). Expanding will also make
## any preset @base context take precedence
expanded <- jsonld::jsonld_expand(doc)
base <- getOption("rdflib_base_uri", "localhost://")
context <- paste0('{"@base": "', base, '"}')
compact <- jsonld::jsonld_compact(expanded, context)
writeLines(compact, tmp)
tmp
}


# rdf functions like working with local files
Expand Down
39 changes: 39 additions & 0 deletions inst/examples/profile_performance.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@

library(nycflights13)
library(tidyverse)
library(rdflib)
source(system.file("examples/as_rdf.R", package="rdflib"))


## Tidyverse Style
df <- flights %>%
left_join(airlines) %>%
left_join(planes, by="tailnum") %>%
select(carrier, manufacturer, model)

## JSON-LD approach -- 5 sec,
## ~ all rdf_parse
## ~ all of which is jsonld_expand()
profvis::profvis(
x2 <- as_rdf.list(airports)
)


## rdf_add approach 12 sec
## ~ all of which is rdf_add
## ~ all of which is calls to new / initialize S4 method
#3 ~ none of which is calls to the low level librdf_* C calls
profvis::profvis(
x2 <- as_rdf(airports, "faa", "x:")
)



x1 <- as_rdf(airlines, "carrier", "x:")
x3 <- as_rdf(planes, "tailnum", "x:")

system.time(
x4 <- as_rdf(flights, NULL, "x:")
)

rdf <- c(x1,x2,x3)
47 changes: 47 additions & 0 deletions tests/testthat/test-parse-serialize.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,56 @@ testthat::test_that("we can serialize turtle with a baseUri", {
rdf_free(rdf)
})


## JSON-LD tests with default base uri

testthat::test_that("@id is not a URI, we should get localhost", {
ex <- '{
"@context": "http://schema.org/",
"@id": "person_id",
"name": "Jane Doe"
}'

rdf <- rdf_parse(ex, "jsonld")
testthat::expect_output(cat(format(rdf, "nquads")), "localhost")
rdf_free(rdf)
})

testthat::test_that("@id is a URI, we should not get localhost", {
ex <- '{
"@context": "http://schema.org/",
"@id": "uri:person_id",
"name": "Jane Doe"
}'
rdf <- rdf_parse(ex, "jsonld")
testthat::expect_false(grepl("localhost", format(rdf, "nquads")))
rdf_free(rdf)
})

testthat::test_that("we can alter the base URI", {
ex <- '{
"@id": "person_id",
"schema:name": "Jane Doe"
}'
options(rdflib_base_uri = "http://example.com/")
rdf <- rdf_parse(ex, "jsonld")
testthat::expect_output(cat(format(rdf, "nquads")), "http://example.com")
rdf_free(rdf)


options(rdflib_base_uri = "")
rdf <- rdf_parse(ex, "jsonld")
testthat::expect_silent(cat(format(rdf, "nquads")))
rdf_free(rdf)

options(rdflib_base_uri = NULL)
})







testthat::test_that("we can parse into an existing rdf model", {
rdf1 <- rdf_parse(system.file("extdata/ex.xml", package = "rdflib"))
Expand Down

0 comments on commit 14b7ebe

Please sign in to comment.