Skip to content

Commit

Permalink
support for BDB backend for disk-based storage, closes #6
Browse files Browse the repository at this point in the history
Also extends tests, exports rdf_free, minor tweaks to documentation.  Attempt to install bdb on travis for testing as well.
  • Loading branch information
cboettig committed Feb 3, 2018
1 parent 7cdaebd commit 7c21c6b
Show file tree
Hide file tree
Showing 10 changed files with 343 additions and 72 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@ addons:
- librdf0-dev
- libv8-dev
- libjq-dev
- libdb-dev
after_success:
- Rscript -e 'covr::codecov()'
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ S3method(format,rdf)
S3method(print,rdf)
export(rdf)
export(rdf_add)
export(rdf_free)
export(rdf_parse)
export(rdf_query)
export(rdf_serialize)
Expand All @@ -20,11 +21,18 @@ importFrom(jsonld,jsonld_expand)
importFrom(jsonld,jsonld_to_rdf)
importFrom(methods,as)
importFrom(methods,new)
importFrom(utils,capture.output)
importFrom(utils,download.file)
importMethodsFrom(redland,addStatement)
importMethodsFrom(redland,executeQuery)
importMethodsFrom(redland,freeModel)
importMethodsFrom(redland,freeParser)
importMethodsFrom(redland,freeQuery)
importMethodsFrom(redland,freeQueryResults)
importMethodsFrom(redland,freeSerializer)
importMethodsFrom(redland,freeStatement)
importMethodsFrom(redland,freeStorage)
importMethodsFrom(redland,freeWorld)
importMethodsFrom(redland,getNextResult)
importMethodsFrom(redland,parseFileIntoModel)
importMethodsFrom(redland,serializeToFile)
Expand Down
16 changes: 14 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
# rdflib 0.0.4

# rdflib 0.1.0

* add `c()` method to concatenate `rdf` objects
* `rdf_query` now coerces data into appropriate type
if it recognizes the data URI and can match that
to an R type (a few XMLSchema types are recognized,
otherwise still defaults to character string)
* All methods free memory from any temporary objects they initialize
(e.g. parsers, serializers, query, statement)
* rdf includes explicit pointer to storage object
* rdf constructor supports BDB backend for disk-based triplestore [#6](https://github.com/cboettig/rdflib/issues/6)
* tests free rdf objects
* extend unit tests for some of new functionality
* Add `rdf_free` to free rdf (ideally would be done by GC in redland...)

# rdflib 0.0.3 (2018-01-02)

Expand Down
105 changes: 63 additions & 42 deletions R/rdf.R
Original file line number Diff line number Diff line change
@@ -1,31 +1,78 @@

#' Initialize an `rdf` Object
#'
#' @param path where should local database to store RDF triples be created.
#' Default NULL will store triples in memory and should be best for most use cases.
#' Large databases should give a path on disk. Requires redland package to be
#' built with support for the Berkeley DB (libdb-dev on Ubuntu, berkeley-db on homebrew).
#'
#' @return an rdf object
#' @details an rdf Object is a list of class 'rdf', consisting of
#' two pointers to external C objects managed by the redland library.
#' These are the `World` object, basically a top-level pointer for
#' all RDF models, and a `Model` object, essentially a storage structure
#' for all RDF triples. `rdflib` defaults to an in-memory hash-based
#' three pointers to external C objects managed by the redland library.
#' These are the `world` object: basically a top-level pointer for
#' all RDF models, and a `model` object: a collection of RDF statements,
#' and a `storage` object, indicating how these statements are stored.
#' `rdflib` defaults to an in-memory hash-based
#' storage structure at this time. The primary purpose of the `rdf`
#' object is to abstract these low-level details away from the user.
#' Typical use will be simply to initialize a container to which
#' the user would manually add triples using \code{\link{rdf_add}}.
#'
#'
#' @importClassesFrom redland World Model Storage
#' @importMethodsFrom redland freeWorld freeModel freeStorage
#' @importFrom utils capture.output
#' @export
#'
#' @examples
#' x <- rdf()
#'
rdf <- function(){
rdf <- function(path = NULL){
world <- new("World")
storage <- new("Storage", world, "hashes", name = "",
options = "hash-type='memory'")

## Handle storage type
if(is.character(path)){
if(has_bdb()){
## Store in Berkeley DB
options <- paste0("new='yes',hash-type='bdb',dir='", path, "'")
} else {
warning("BDB driver not found. Falling back on in-memory storage")
options <- "hash-type='memory'"
}
} else { ## Store in memory
options <- "hash-type='memory'"
}
storage <- new("Storage", world, "hashes", name = "rdflib",
options = options)


model <- new("Model", world = world, storage, options = "")
structure(list(world = world, model = model),
structure(list(world = world, model = model, storage = storage),
class = "rdf")
}

#' Free Memory Associated with RDF object
#'
#' @param rdf an rdf object
#' @details Free all pointers associated with an rdf object.
#' Frees memory associated with the storage, world, and model
#' objects. After this a user should remove the rdf object
#' from the environment as well with `rm`, since attempting
#' to reference an object after it has been removed can crash
#' R!
#' @export
#' @examples
#' rdf <- rdf()
#' rdf_free(rdf)
#' rm(rdf)
rdf_free <- function(rdf){
redland::freeModel(rdf$model)
redland::freeStorage(rdf$storage)
redland::freeWorld(rdf$world)
}



#' @export
format.rdf <- function(x,
format = getOption("rdf_print_format", "nquads"),
Expand Down Expand Up @@ -56,7 +103,7 @@ print.rdf <- function(x, ...){
#' @return an rdf object, containing the redland world
#' and model objects
#' @importClassesFrom redland World Storage Model Parser
#' @importMethodsFrom redland parseFileIntoModel
#' @importMethodsFrom redland parseFileIntoModel freeParser
#' @importFrom jsonld jsonld_to_rdf
#' @export
#'
Expand Down Expand Up @@ -92,7 +139,8 @@ rdf_parse <- function(doc,
mimetype <- unname(rdf_mimetypes[format])
parser <- new("Parser", rdf$world, name = format, mimeType = mimetype)
redland::parseFileIntoModel(parser, rdf$world, doc, rdf$model)

redland::freeParser(parser)

rdf
}

Expand Down Expand Up @@ -129,7 +177,7 @@ add_base_uri <- function(doc, tmp = tempfile()){
#' \code{\link{rdf_parse}}.
#' @importFrom methods new
#' @importClassesFrom redland Serializer
#' @importMethodsFrom redland setNameSpace serializeToFile
#' @importMethodsFrom redland setNameSpace serializeToFile freeSerializer
#'
#' @export
#' @examples
Expand Down Expand Up @@ -189,6 +237,7 @@ rdf_serialize <- function(rdf,
}
}

redland::freeSerializer(serializer)
invisible(doc)
}

Expand Down Expand Up @@ -250,7 +299,7 @@ rdf_query <- function(rdf, query, ...){
#' to the model object in C code, note that the input object is modified
#' directly.
#' @importClassesFrom redland Statement
#' @importMethodsFrom redland addStatement
#' @importMethodsFrom redland addStatement freeStatement
#' @export
#'
#' @examples
Expand All @@ -271,8 +320,9 @@ rdf_add <- function(rdf, subject, predicate, object,
stmt <- new("Statement", world = rdf$world,
subject, predicate, as.character(object),
subjectType, objectType, datatype_uri)
addStatement(rdf$model, stmt)
redland::addStatement(rdf$model, stmt)

redland::freeStatement(stmt)
## rdf object is a list of pointers, modified in pass-by-reference
invisible(rdf)
}
Expand All @@ -288,36 +338,7 @@ c.rdf <- function(...){
rdf_parse(txt, "nquads")
}

# Must match parser name & q 1.0 mimetype listed at:
# http://librdf.org/raptor/api/raptor-formats-types-by-parser.html
# 3 turtle options listed but only text/turtle works.
rdf_mimetypes <- c("nquads" = "text/x-nquads",
"ntriples" = "application/n-triples",
"rdfxml" = "application/rdf+xml",
"trig" = "application/x-trig",
"turtle" = "text/turtle")

# trig not working right now, not clear why
# Consider adding/testing:
# - n3 (text/n3)
# - rdfa (application/xhtml+xml, or text/html)
# - rss (application/rss+xml or text/rss)


# rdf functions like working with local files
# this helper function allows us to also use URLs or strings
#' @importFrom utils download.file
text_or_url_to_doc <- function(x, tmp = tempfile()){
if(file.exists(x)){
return(x)
} else if(grepl("^https?://", x)) {
utils::download.file(x, tmp)
return(tmp)
} else {
writeLines(x, tmp)
return(tmp)
}
}

#' rdflib: Tools to Manipulate and Query Semantic Data
#'
Expand Down
54 changes: 54 additions & 0 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,57 @@ rectangularize_query_results <- function(out){
names(X) <- vars
as.data.frame(X, stringsAsFactors=FALSE)
}





has_bdb <- function(){
## Unfortunately convoluted way to check if we have Berkeley DB Support
world <- new("World")
path <-tempdir()
options <- paste0("new='yes',hash-type='bdb',dir='", path, "'")
storage <- new("Storage", world, "hashes", name = "rdflib",
options = options)

out <- !(utils::capture.output(
base::print.default(
storage@librdf_storage@ref)) ==
"<pointer: 0x0>")

redland::freeStorage(storage)
redland::freeWorld(world)

out
}

# Must match parser name & q 1.0 mimetype listed at:
# http://librdf.org/raptor/api/raptor-formats-types-by-parser.html
# 3 turtle options listed but only text/turtle works.
rdf_mimetypes <- c("nquads" = "text/x-nquads",
"ntriples" = "application/n-triples",
"rdfxml" = "application/rdf+xml",
"trig" = "application/x-trig",
"turtle" = "text/turtle")

# trig not working right now, not clear why
# Consider adding/testing:
# - n3 (text/n3)
# - rdfa (application/xhtml+xml, or text/html)
# - rss (application/rss+xml or text/rss)


# rdf functions like working with local files
# this helper function allows us to also use URLs or strings
#' @importFrom utils download.file
text_or_url_to_doc <- function(x, tmp = tempfile()){
if(file.exists(x)){
return(x)
} else if(grepl("^https?://", x)) {
utils::download.file(x, tmp, quiet = TRUE)
return(tmp)
} else {
writeLines(x, tmp)
return(tmp)
}
}
38 changes: 38 additions & 0 deletions inst/examples/storage_types.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

library(redland)
world <- new("World")

## No error but null pointer returned
bdb_storage <- new("Storage", world, "hashes", name = "db1",
options = "new='yes',hash-type='bdb',dir='.'")
model <- new("Model", world = world, storage = bdb_storage, options = "")

## error: sqlite not found
sqlite_storage <- new("Storage", world, "sqlite", name = "sqlite1", options = "new='yes'")
## not found
postgres_storage <- new("Storage", world, "postgresql", name = "postgres1",
options = "new='yes',host='localhost',database='red',user='foo','password='bar'")

## Works, in memory, serializes to an rdf/xml file called thing.rdf when freed.
## Not indexed, so will be slow. Suitable for small models.
file_storage <- new("Storage", world, "file", "thing.rdf", "")
storage <- file_storage
model <- new("Model", world = world, storage = storage, options = "")

## Works, fast write, not indexed, good for only small models,
## no reason to use this instead of hash-based memory (which is indexed)
memory_storage <- new("Storage", world, "memory", "", "")
storage <- memory_storage
model <- new("Model", world = world, storage = storage, options = "")


library(rdflib)

rdf <- structure(list(world = world, model = model, storage = storage),
class = "rdf")

rdf_add(rdf,
subject="http://www.dajobe.org/",
predicate="http://purl.org/dc/elements/1.1/language",
object="en")
rdf
18 changes: 18 additions & 0 deletions inst/extdata/ex2.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:nodeID="b0">
<rdf:type rdf:resource="http://schema.org/Person"/>
</rdf:Description>
<rdf:Description rdf:nodeID="b0">
<ns0:name xmlns:ns0="http://schema.org/">Jane Doe</ns0:name>
</rdf:Description>
<rdf:Description rdf:nodeID="b0">
<ns0:url xmlns:ns0="http://schema.org/" rdf:resource="http://www.janedoe.com"/>
</rdf:Description>
<rdf:Description rdf:nodeID="b0">
<ns0:jobTitle xmlns:ns0="http://schema.org/">Professor</ns0:jobTitle>
</rdf:Description>
<rdf:Description rdf:nodeID="b0">
<ns0:telephone xmlns:ns0="http://schema.org/">(425) 123-4567</ns0:telephone>
</rdf:Description>
</rdf:RDF>
17 changes: 12 additions & 5 deletions man/rdf.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 7c21c6b

Please sign in to comment.