From 9830e8b1e3871a6a2867cebe918960683180b81f Mon Sep 17 00:00:00 2001 From: Gregory Jefferis Date: Sat, 22 Feb 2020 14:51:47 +0000 Subject: [PATCH 1/8] Fix bug in neuprint_ROI_connectivity(cached=T) * need to check that no connections are missing * progress on #87 --- R/roi.R | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/R/roi.R b/R/roi.R index ec8be6e6..b89f7f19 100644 --- a/R/roi.R +++ b/R/roi.R @@ -75,26 +75,46 @@ neuprint_bodies_in_ROI <- function(roi, dataset = NULL, all_segments = FALSE, co #' @title Get the connectivity between ROIs in a neuPrint dataset #' #' @param rois regions of interest for a dataset -#' @param cached pull cached results (TRUE) or recalculate the connectivity (FALSE)? -#' @param full return all neurons involved (TRUE, the default) or give a ROI summary (FALSE, default behavior if `cached` is TRUE) -#' @param statistic either "weight" or count" (default "weight"). Which number to return (see neuprint explorer for details) for summary results (either `full` is FALSE or `cached` is TRUE) +#' @param cached pull cached results (TRUE) or recalculate the connectivity +#' (FALSE)? +#' @param full return all neurons involved (TRUE, the default) or give a ROI +#' summary (FALSE, default behavior if `cached` is TRUE) +#' @param statistic either "weight" or count" (default "weight"). Which number +#' to return (see neuprint explorer for details) for summary results (either +#' `full` is FALSE or `cached` is TRUE) #' @param ... methods passed to \code{neuprint_login} #' @inheritParams neuprint_fetch_custom -#' @seealso \code{\link{neuprint_simple_connectivity}}, \code{\link{neuprint_common_connectivity}} +#' @seealso \code{\link{neuprint_simple_connectivity}}, +#' \code{\link{neuprint_common_connectivity}} #' @export -#' @rdname neuprint_ROI_connectivity -neuprint_ROI_connectivity <- function(rois, cached = FALSE, full=TRUE, statistic = c("weight","count"),dataset = NULL, conn = NULL, ...){ +#' @examples +#' \donttest{ +#' aba <- neuprint_ROI_connectivity(neuprint_ROIs(superLevel = TRUE), +#' cached=TRUE) +#' heatmap(aba) +#' } +neuprint_ROI_connectivity <- function(rois, cached = FALSE, full=TRUE, + statistic = c("weight","count"), + dataset = NULL, conn = NULL, ...) { statistic <- match.arg(statistic) roicheck <- neuprint_check_roi(rois=rois, dataset = dataset, conn = conn, ...) - if (cached){ - results <-matrix(nrow=length(rois),ncol=length(rois),dimnames = list(inputs=rois,outputs=rois)) + if (cached) { + results <-matrix(ifelse(statistic == 'count', 0L, 0), + nrow=length(rois), ncol=length(rois), + dimnames = list(inputs=rois,outputs=rois)) roi.conn = neuprint_fetch(path = 'api/cached/roiconnectivity', conn = conn, ...) - for (inp in rois){ - for (out in rois){ - results[inp,out] <- roi.conn$weights[[paste(inp,out,sep="=>")]][[statistic]] + missing=setdiff(rois, unlist(roi.conn$roi_names)) + if(length(missing)) + warning("Dropping missing rois:", paste(missing, collapse = " ")) + allpairs = names(roi.conn$weights) + for (inp in rois) { + for (out in rois) { + edgename=paste(inp, out, sep="=>") + if(edgename %in% allpairs) + results[inp,out] <- roi.conn$weights[[edgename]][[statistic]] } } - }else{ + } else { Payload = noquote(sprintf('{"dataset":"%s","rois":%s}', dataset, ifelse(is.null(rois),jsonlite::toJSON(list()),jsonlite::toJSON(rois)))) From f0957b757c0c099d07509a75c0162197f914ac40 Mon Sep 17 00:00:00 2001 From: Gregory Jefferis Date: Sat, 22 Feb 2020 14:55:50 +0000 Subject: [PATCH 2/8] Speed up extract_connectivity_df * switch to tibble (a bit quicker than standard data.frame * don't keep on cbinding data.frames but just make one at the beginning --- DESCRIPTION | 3 ++- R/connectivity.R | 14 +++++--------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d37ba829..858f1f55 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,7 +37,8 @@ Imports: memoise, dplyr, bit64, - stringr + stringr, + tibble Remotes: natverse/drvid, natverse/nat diff --git a/R/connectivity.R b/R/connectivity.R index 20101b5d..939b48ec 100644 --- a/R/connectivity.R +++ b/R/connectivity.R @@ -477,24 +477,20 @@ extract_connectivity_df <- function(rois, json){ if(is.null(json)){ return(NULL) } - a <- unlist(jsonlite::fromJSON(json)) - values <- data.frame(row.names = 1) rois <- unique(rois) #this takes care if both the input and output ROIs are same.. + a <- unlist(jsonlite::fromJSON(json)) + roicols <- c(t(outer(rois, c("pre", "post"), paste, sep="."))) + values <- tibble::as_tibble(as.list(structure(rep(0, length(roicols)), .Names=roicols))) for(roi in rois){ - d <- data.frame(0,0) - colnames(d) <- paste0(roi,c(".pre",".post")) + thisroicols <- paste0(roi,c(".pre",".post")) if (!is.null(a)){ b <- a[startsWith(names(a),paste0(roi,"."))] - d[names(b)] <- b + values[names(b)] <- b } - values <- cbind(values,d) } values } - - - ##' @title Get a matrix for connectivity between neuron/neuronlist objects #' #' @description Get an adjacency matrix for the synaptic connectivity between \code{nat::neuron}/\code{nat::neuronlist} objects. This function does not query a neuPrint server. From 2025b2e00f3568404b8868a64b93ba2f98826408 Mon Sep 17 00:00:00 2001 From: Gregory Jefferis Date: Sat, 22 Feb 2020 16:52:52 +0000 Subject: [PATCH 3/8] Fix cached=F * first of all actually make it work * some speed ups by switching from grep to stringr::str_detect --- R/roi.R | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/R/roi.R b/R/roi.R index b89f7f19..f440cf1b 100644 --- a/R/roi.R +++ b/R/roi.R @@ -120,27 +120,42 @@ neuprint_ROI_connectivity <- function(rois, cached = FALSE, full=TRUE, ifelse(is.null(rois),jsonlite::toJSON(list()),jsonlite::toJSON(rois)))) class(Payload) = "json" roi.conn <- neuprint_fetch(path = 'api/npexplorer/roiconnectivity', body = Payload, conn = conn, ...) - connData <- roi.conn$data[sapply(roi.conn$data,function(d) any(sapply(rois,function(r) grepl(paste0("\"",r,"\""), d[[2]]))))] - connections <-lapply(connData, function(rc) extract_connectivity_df(rois=rois,json=rc[[2]])) - resultsD <- dplyr::bind_rows(connections) - resultsD$bodyid <- as.character(sapply(connData, function(d) d[[1]])) - if (!full){ - results <- matrix(nrow=length(rois),ncol=length(rois),dimnames = list(inputs=rois,outputs=rois)) - if (statistic == "count"){ - for (inp in rois){ - for (out in rois){ - results[inp,out] <- length(which(resultsD[[paste0(inp,".post")]]>0 & resultsD[[paste0(out,".pre")]]>0)) + ll <- neuprint_list2df(roi.conn) + # running fromJSON on many separate strings is slow, so start by + # selecting strings that actually contain the selected ROIs + hasroi=sapply(rois, function(roi) + stringr::str_detect(ll$roiInfo, stringr::fixed(paste0('"',roi,'"')))) + if(is.matrix(hasroi)) hasroi=rowSums(hasroi)>0 + + connections <-lapply(ll$roiInfo[hasroi], + function(x) extract_connectivity_df(rois=rois,json=x)) + resultsD <- cbind(ll[hasroi, 1, drop=FALSE], dplyr::bind_rows(connections)) + if (!full) { + results <- + matrix( + nrow = length(rois), + ncol = length(rois), + dimnames = list(inputs = rois, outputs = rois) + ) + if (statistic == "count") { + for (inp in rois) { + for (out in rois) { + results[inp, out] <- + length(which(resultsD[[paste0(inp, ".post")]] > 0 & + resultsD[[paste0(out, ".pre")]] > 0)) } } - }else{ + } else{ totalInputs <- neuprint_get_meta(resultsD$bodyid)$post - for (inp in rois){ - for (out in rois){ - results[inp,out] <- sum((resultsD[[paste0(out,".pre")]]*resultsD[[paste0(inp,".post")]]/totalInputs)[totalInputs>0]) + for (inp in rois) { + for (out in rois) { + results[inp, out] <- + sum((resultsD[[paste0(out, ".pre")]] * resultsD[[paste0(inp, ".post")]] / + totalInputs)[totalInputs > 0]) } } } - }else{ + } else { results <- resultsD } } From 43ab010cd8cda7f8d2ed5b294a3fe8c06c5eeb48 Mon Sep 17 00:00:00 2001 From: Gregory Jefferis Date: Sat, 22 Feb 2020 17:04:58 +0000 Subject: [PATCH 4/8] test extract_connectivity_df * use some data from neuprint_find_neurons --- tests/testthat/test-roi.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/testthat/test-roi.R b/tests/testthat/test-roi.R index 02ad4920..0bcce435 100644 --- a/tests/testthat/test-roi.R +++ b/tests/testthat/test-roi.R @@ -1,3 +1,11 @@ +test_that("", { + json='{"SNP(R)": {"pre": 71, "post": 155}, "SLP(R)": {"pre": 67, "post": 153}, "SIP(R)": {"pre": 4, "post": 2}, "LH(R)": {"pre": 20, "post": 25}, "VLNP(R)": {"pre": 1}, "PLP(R)": {"pre": 1}, "AL(R)": {"pre": 1, "post": 162}}' + rois=c("AL(R)", "LH(R)") + + expect_is(xdf <- extract_connectivity_df(rois = rois, json=json), 'data.frame') + expect_known_hash(xdf, "85ea57c5ec") +}) + skip_if(as.logical(Sys.getenv("SKIP_NP_SERVER_TESTS"))) test_that("neuprint_bodies_in_ROI works", { From 84c2772ff689f60c2bf0d3d19e772c2341206bd6 Mon Sep 17 00:00:00 2001 From: Gregory Jefferis Date: Sat, 22 Feb 2020 17:22:44 +0000 Subject: [PATCH 5/8] use data.frame as baseline for test not hash * this actually tests equal with tibble or data.frame --- tests/testthat/test-roi.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-roi.R b/tests/testthat/test-roi.R index 0bcce435..7ab2d6bc 100644 --- a/tests/testthat/test-roi.R +++ b/tests/testthat/test-roi.R @@ -2,8 +2,10 @@ test_that("", { json='{"SNP(R)": {"pre": 71, "post": 155}, "SLP(R)": {"pre": 67, "post": 153}, "SIP(R)": {"pre": 4, "post": 2}, "LH(R)": {"pre": 20, "post": 25}, "VLNP(R)": {"pre": 1}, "PLP(R)": {"pre": 1}, "AL(R)": {"pre": 1, "post": 162}}' rois=c("AL(R)", "LH(R)") - expect_is(xdf <- extract_connectivity_df(rois = rois, json=json), 'data.frame') - expect_known_hash(xdf, "85ea57c5ec") + baseline <- structure(list(`AL(R).pre` = 1L, `AL(R).post` = 162L, + `LH(R).pre` = 20L, `LH(R).post` = 25L), + class = "data.frame", row.names = "1") + expect_equal(xdf <- extract_connectivity_df(rois = rois, json=json), baseline) }) skip_if(as.logical(Sys.getenv("SKIP_NP_SERVER_TESTS"))) From a0c40e5ae3ccd3c8e75f56662bb736556179ba3d Mon Sep 17 00:00:00 2001 From: Gregory Jefferis Date: Sat, 29 Feb 2020 11:32:29 +0000 Subject: [PATCH 6/8] Make full (rather than cached the focus) * and complain if you do full=T, cached=T since it's not possible --- R/roi.R | 23 +++++++++++++++------- man/neuprint_ROI_connectivity.Rd | 33 +++++++++++++++++++++++++------- tests/testthat/test-roi.R | 8 ++++++++ 3 files changed, 50 insertions(+), 14 deletions(-) diff --git a/R/roi.R b/R/roi.R index f440cf1b..18d0f65e 100644 --- a/R/roi.R +++ b/R/roi.R @@ -72,16 +72,21 @@ neuprint_bodies_in_ROI <- function(roi, dataset = NULL, all_segments = FALSE, co df } -#' @title Get the connectivity between ROIs in a neuPrint dataset +#' Get connectivity between ROIs (summary or data frame of connecting neurons) #' +#' @details When requesting summary connectivity data between ROIs, we recommend +#' setting \code{cached=FALSE}. We have noticed small differences in the +#' connections weights, but computation times can get very long for more than +#' a handful of ROIs. #' @param rois regions of interest for a dataset -#' @param cached pull cached results (TRUE) or recalculate the connectivity -#' (FALSE)? -#' @param full return all neurons involved (TRUE, the default) or give a ROI -#' summary (FALSE, default behavior if `cached` is TRUE) +#' @param full return all neurons involved (TRUE, the default) or give a numeric +#' ROI summary (FALSE) #' @param statistic either "weight" or count" (default "weight"). Which number #' to return (see neuprint explorer for details) for summary results (either #' `full` is FALSE or `cached` is TRUE) +#' @param cached pull cached results (TRUE) or ask server to recalculate the +#' connectivity (FALSE). Only applicable to summary results when +#' \code(full=FALSE) and . #' @param ... methods passed to \code{neuprint_login} #' @inheritParams neuprint_fetch_custom #' @seealso \code{\link{neuprint_simple_connectivity}}, @@ -90,13 +95,17 @@ neuprint_bodies_in_ROI <- function(roi, dataset = NULL, all_segments = FALSE, co #' @examples #' \donttest{ #' aba <- neuprint_ROI_connectivity(neuprint_ROIs(superLevel = TRUE), -#' cached=TRUE) +#' full=FALSE) #' heatmap(aba) #' } -neuprint_ROI_connectivity <- function(rois, cached = FALSE, full=TRUE, +neuprint_ROI_connectivity <- function(rois, full=TRUE, statistic = c("weight","count"), + cached = !full, dataset = NULL, conn = NULL, ...) { statistic <- match.arg(statistic) + if(isTRUE(full) && isTRUE(cached)) + stop("It is not possible to return a full list of connecting neurons when ", + "`cached=TRUE`!\nPlease leave `cached` with its default value (FALSE).") roicheck <- neuprint_check_roi(rois=rois, dataset = dataset, conn = conn, ...) if (cached) { results <-matrix(ifelse(statistic == 'count', 0L, 0), diff --git a/man/neuprint_ROI_connectivity.Rd b/man/neuprint_ROI_connectivity.Rd index ad2f776e..0bd9e9d4 100644 --- a/man/neuprint_ROI_connectivity.Rd +++ b/man/neuprint_ROI_connectivity.Rd @@ -2,13 +2,13 @@ % Please edit documentation in R/roi.R \name{neuprint_ROI_connectivity} \alias{neuprint_ROI_connectivity} -\title{Get the connectivity between ROIs in a neuPrint dataset} +\title{Get connectivity between ROIs (summary or data frame of connecting neurons)} \usage{ neuprint_ROI_connectivity( rois, - cached = FALSE, full = TRUE, statistic = c("weight", "count"), + cached = !full, dataset = NULL, conn = NULL, ... @@ -17,11 +17,16 @@ neuprint_ROI_connectivity( \arguments{ \item{rois}{regions of interest for a dataset} -\item{cached}{pull cached results (TRUE) or recalculate the connectivity (FALSE)?} +\item{full}{return all neurons involved (TRUE, the default) or give a numeric +ROI summary (FALSE)} -\item{full}{return all neurons involved (TRUE, the default) or give a ROI summary (FALSE, default behavior if `cached` is TRUE)} +\item{statistic}{either "weight" or count" (default "weight"). Which number +to return (see neuprint explorer for details) for summary results (either +`full` is FALSE or `cached` is TRUE)} -\item{statistic}{either "weight" or count" (default "weight"). Which number to return (see neuprint explorer for details) for summary results (either `full` is FALSE or `cached` is TRUE)} +\item{cached}{pull cached results (TRUE) or ask server to recalculate the +connectivity (FALSE). Only applicable to summary results when +\code(full=FALSE) and .} \item{dataset}{optional, a dataset you want to query. If \code{NULL}, the default specified by your R environ file is used or, failing that the @@ -35,8 +40,22 @@ used. See \code{\link{neuprint_login}} for details.} \item{...}{methods passed to \code{neuprint_login}} } \description{ -Get the connectivity between ROIs in a neuPrint dataset +Get connectivity between ROIs (summary or data frame of connecting neurons) +} +\details{ +When requesting summary connectivity data between ROIs, we recommend + setting \code{cached=FALSE}. We have noticed small differences in the + connections weights, but computation times can get very long for more than + a handful of ROIs. +} +\examples{ +\donttest{ +aba <- neuprint_ROI_connectivity(neuprint_ROIs(superLevel = TRUE), + full=FALSE) +heatmap(aba) +} } \seealso{ -\code{\link{neuprint_simple_connectivity}}, \code{\link{neuprint_common_connectivity}} +\code{\link{neuprint_simple_connectivity}}, + \code{\link{neuprint_common_connectivity}} } diff --git a/tests/testthat/test-roi.R b/tests/testthat/test-roi.R index 7ab2d6bc..c6b38373 100644 --- a/tests/testthat/test-roi.R +++ b/tests/testthat/test-roi.R @@ -18,3 +18,11 @@ test_that("neuprint_bodies_in_ROI works", { input_ROIs = "AL(R)", output_ROIs = "LH(R)"), 'data.frame') }) + +test_that("neuprint_ROI_connectivity works", { + rois <- neuprint_ROIs(superLevel = TRUE)[1:2] + expect_is(m <- neuprint_ROI_connectivity(rois, full=F), 'matrix') + expect_equal(dimnames(m), + list(inputs = rois, outputs = rois)) + expect_error(neuprint_ROI_connectivity(rois, full = T, cached = T)) +}) From aceb1575bf3c971c83a166e6d0091474f5b071e0 Mon Sep 17 00:00:00 2001 From: Gregory Jefferis Date: Sat, 29 Feb 2020 11:45:17 +0000 Subject: [PATCH 7/8] documentation bug --- R/roi.R | 8 ++++---- man/neuprint_ROI_connectivity.Rd | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/roi.R b/R/roi.R index 18d0f65e..dedb7273 100644 --- a/R/roi.R +++ b/R/roi.R @@ -83,10 +83,10 @@ neuprint_bodies_in_ROI <- function(roi, dataset = NULL, all_segments = FALSE, co #' ROI summary (FALSE) #' @param statistic either "weight" or count" (default "weight"). Which number #' to return (see neuprint explorer for details) for summary results (either -#' `full` is FALSE or `cached` is TRUE) -#' @param cached pull cached results (TRUE) or ask server to recalculate the -#' connectivity (FALSE). Only applicable to summary results when -#' \code(full=FALSE) and . +#' (when \code{full=FALSE}) +#' @param cached pull precomputed results (TRUE) or ask server to recalculate +#' the connectivity (FALSE). Only applicable to summary results when +#' \code{full=FALSE}. #' @param ... methods passed to \code{neuprint_login} #' @inheritParams neuprint_fetch_custom #' @seealso \code{\link{neuprint_simple_connectivity}}, diff --git a/man/neuprint_ROI_connectivity.Rd b/man/neuprint_ROI_connectivity.Rd index 0bd9e9d4..50622635 100644 --- a/man/neuprint_ROI_connectivity.Rd +++ b/man/neuprint_ROI_connectivity.Rd @@ -22,11 +22,11 @@ ROI summary (FALSE)} \item{statistic}{either "weight" or count" (default "weight"). Which number to return (see neuprint explorer for details) for summary results (either -`full` is FALSE or `cached` is TRUE)} +(when \code{full=FALSE})} -\item{cached}{pull cached results (TRUE) or ask server to recalculate the -connectivity (FALSE). Only applicable to summary results when -\code(full=FALSE) and .} +\item{cached}{pull precomputed results (TRUE) or ask server to recalculate +the connectivity (FALSE). Only applicable to summary results when +\code{full=FALSE}.} \item{dataset}{optional, a dataset you want to query. If \code{NULL}, the default specified by your R environ file is used or, failing that the From 50c09450119504348b4096f1bf7ad14dbcf0aa70 Mon Sep 17 00:00:00 2001 From: Gregory Jefferis Date: Mon, 2 Mar 2020 11:20:19 +0000 Subject: [PATCH 8/8] Add additional tests for neuprint_ROI_connectivity * they're a little slow but still probably <10s and a good idea to cover the whole function --- tests/testthat/test-roi.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/testthat/test-roi.R b/tests/testthat/test-roi.R index c6b38373..b8a961ba 100644 --- a/tests/testthat/test-roi.R +++ b/tests/testthat/test-roi.R @@ -25,4 +25,11 @@ test_that("neuprint_ROI_connectivity works", { expect_equal(dimnames(m), list(inputs = rois, outputs = rois)) expect_error(neuprint_ROI_connectivity(rois, full = T, cached = T)) + + expect_is(m2 <- neuprint_ROI_connectivity(rois, full=F, cached=F), 'matrix') + # note low tolerance as the cached and recomputed results are not identical + expect_equal(m2, m, tolerance = 1e-2) + + expect_is(df <- neuprint_ROI_connectivity(rois[1], full=T), 'data.frame') + expect_true(ncol(df)==3) })