diff --git a/inst/examples/as_rdf.R b/inst/examples/as_rdf.R index 53bc459..1849a7d 100644 --- a/inst/examples/as_rdf.R +++ b/inst/examples/as_rdf.R @@ -1,19 +1,33 @@ -as_rdf <- function(df, base_uri = NULL) UseMethod("as_rdf") +as_rdf <- function(df, key = NULL, base_uri = NULL) UseMethod("as_rdf") + ## tidy data to rdf -as_rdf.data.frame <- function(df, base_uri = NULL){ +as_rdf.data.frame <- function(df, key = NULL, base_uri = NULL){ - x <- tibble::rowid_to_column(df, "subject") + x <- df + if(is.null(key)){ + x <- tibble::rowid_to_column(x, "subject") + } else { + names(x)[names(x) == key] <- "subject" + } suppressWarnings( x <- tidyr::gather(x, key = predicate, value = object, -subject) ) - ## gather looses col-classes + ## gather looses col-classes, so pre-compute them (with base R) col_classes <- data.frame(datatype = - vapply(df, rdflib:::xs_class, character(1))) - col_classes <- tibble::rownames_to_column(col_classes, "predicate") - x <- dplyr::inner_join(x, col_classes, "predicate") + vapply(df, rdflib:::xs_class, character(1)), + stringsAsFactors = FALSE) + col_classes$predicate <- rownames(col_classes) + rownames(col_classes) <- NULL + + + x <- merge(x, col_classes, by = "predicate") + + ## NA to blank string + x$object[is.na(x$object)] <- "" + x$subject[is.na(x$subject)] <- "" rdf <- rdf() for(i in seq_along(x$subject)){ diff --git a/vignettes/rdf_intro.Rmd b/vignettes/rdf_intro.Rmd index b1c0300..d247500 100644 --- a/vignettes/rdf_intro.Rmd +++ b/vignettes/rdf_intro.Rmd @@ -350,14 +350,45 @@ _Still working on writing this section_ ```{r} source(system.file("examples/as_rdf.R", package="rdflib")) -## Testing: Digest some data.frames into RDF and extract back - library(tidyverse) - cars <- mtcars %>% rownames_to_column("Model") - x1 <- as_rdf(iris, "iris:") - x2 <- as_rdf(cars, "mtcars:") - rdf <- c(x1,x2) ``` +Note: looping over `rdf_add` can be a slow way to add hundreds of thousands of triples. Coercing into RDF via JSON-LD might be much faster. + +```{r} +library(nycflights13) +library(tidyverse) + +df <- flights %>% + left_join(airlines) %>% + left_join(planes, by="tailnum") %>% + select(carrier, manufacturer, model) + +``` + + +```{r} +x1 <- as_rdf(airlines, "carrier", "x:") +x2 <- as_rdf(airports, "faa", "x:") +x3 <- as_rdf(planes, "tailnum", "x:") +system.time( +x4 <- as_rdf(flights, NULL, "x:") +) + +rdf <- c(x1,x2,x3) +``` + + +```{r} +sparql <- + 'SELECT ?carrier ?manufacturer ?model +WHERE { + ?s ?carrier . + ?s ?manufacturer . + ?s ?model . +}' + +iris2 <- rdf_query(rdf, sparql) +``` ## SPARQL: Getting back to Tidy Tables!