diff --git a/DESCRIPTION b/DESCRIPTION index a76a28c..7e89ae4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,7 +31,7 @@ Imports: magrittr, bit64 Suggests: - malevnc (>= 0.3.1), + malevnc (> 0.3.1), fancr, testthat (>= 3.0.0), ComplexHeatmap, diff --git a/R/datasets.R b/R/datasets.R index 012c3b2..c00b864 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -1,6 +1,6 @@ cf_datasets <- function(rval=c("all", 'available')) { rval=match.arg(rval) - datasets=c("flywire", "malecns", 'manc', 'fanc', 'hemibrain') + datasets=c("flywire", "malecns", 'manc', 'fanc', 'hemibrain', 'opticlobe') if(rval=='all') datasets else @@ -24,12 +24,14 @@ match_datasets <- function(ds) { #' abbreviate_datasets(c("flywire", "flywire", "hemibrain")) abbreviate_datasets <- function(ds) { ds=match_datasets(ds) - abbrevlist=c(hemibrain='hb', flywire='fw', manc='mv', fanc='fv', malecns='mc') + abbrevlist=c(hemibrain='hb', flywire='fw', manc='mv', fanc='fv', malecns='mc', + opticlobe='ol') unname(abbrevlist[ds]) } lengthen_datasets <- function(ds) { - longlist=c(hb="hemibrain", fw="flywire", mv="manc", fv="fanc", mc="malecns") + longlist=c(hb="hemibrain", fw="flywire", mv="manc", fv="fanc", mc="malecns", + ol='opticlobe') ds=match.arg(ds, names(longlist), several.ok = T) unname(longlist[ds]) } diff --git a/R/ids.R b/R/ids.R index 2455c70..85f6123 100644 --- a/R/ids.R +++ b/R/ids.R @@ -55,15 +55,20 @@ is_key <- function(x) { #' @param expand Whether to expand any queries into the matching ids (this will #' involve one or more calls to corresponding servers). Default \code{FALSE}. #' @param keys Whether to turn the ids into keys \code{hb:12345} right away. -#' Default \code{FALSE} but you may find this useful e.g. for combining -#' lists of neurons (see examples). +#' Default \code{FALSE} but you may find this useful e.g. for combining lists +#' of neurons (see examples). #' @param hemibrain Pass hemibrain specific query or ids to this argument #' @param flywire Pass flywire specific query or ids to this argument #' @param malecns Pass malecns specific query or ids to this argument #' @param manc Pass manc specific query or ids to this argument +#' @param opticlobe Pass opticlobe specific query or ids to this argument #' @param fanc Pass fanc ids to this argument (at present we do not support #' metadata queries for fanc) #' +#' @details all neuprint datasets (hemibrain, malevnc, opticlobe, malecns) use +#' the same query syntax although some fields may be dataset specific (see +#' examples). +#' #' @return A list of ids with additional class \code{cidlist} #' @export #' @family ids @@ -87,16 +92,20 @@ is_key <- function(x) { #' } cf_ids <- function( query=NULL, - datasets=c("brain", "vnc", "hemibrain", "flywire", "malecns", "manc", "fanc"), + datasets=c("brain", "vnc", "hemibrain", "flywire", "malecns", "manc", "fanc", + "opticlobe"), expand=FALSE, keys=FALSE, - hemibrain=NULL, flywire=NULL, malecns=NULL, manc=NULL, fanc=NULL) { + hemibrain=NULL, flywire=NULL, malecns=NULL, manc=NULL, fanc=NULL, + opticlobe=NULL) { + nds=sum( !is.null(hemibrain), !is.null(flywire), !is.null(malecns), !is.null(manc), - !is.null(fanc) + !is.null(fanc), + !is.null(opticlobe) ) res <- if(!is.null(query)) { if(nds>0) @@ -114,7 +123,7 @@ cf_ids <- function( } else { if(nds==0) stop("You must supply either the `query` argument or one of hemibrain:fanc!") - l=list(hemibrain=hemibrain, flywire=flywire, malecns=malecns, manc=manc, fanc=fanc) + l=list(hemibrain=hemibrain, flywire=flywire, malecns=malecns, manc=manc, fanc=fanc, opticlobe=opticlobe) # drop any empty datasets l[lengths(l)>0] } @@ -183,9 +192,9 @@ expand_ids <- function(ids, dataset) { FUN <- switch(dataset, manc=malevnc::manc_ids, fanc=I, - hemibrain=function(ids) neuprintr::neuprint_ids(ids, conn=npconn(dataset)), malecns=malecns::mcns_ids, - flywire=function(ids) fafbseg::flywire_ids(ids, version=fafbseg::flywire_connectome_data_version())) + flywire=function(ids) fafbseg::flywire_ids(ids, version=fafbseg::flywire_connectome_data_version()), + function(ids) neuprintr::neuprint_ids(ids, conn=npconn(dataset))) tf=try(FUN(ids), silent = T) if(inherits(tf, 'try-error')) { warning("No valid ids in dataset:", dataset) diff --git a/R/meta.R b/R/meta.R index 5d93d6e..b765cd6 100644 --- a/R/meta.R +++ b/R/meta.R @@ -4,6 +4,10 @@ npconn <- function(dataset) { return(neuprintr::neuprint_login( server="https://neuprint.janelia.org", dataset='hemibrain:v1.2.1')) + else if(dataset=='opticlobe') + return(neuprintr::neuprint_login( + server="https://neuprint.janelia.org", + dataset='optic-lobe:v1.0')) else if(dataset=='malecns') return(malecns::mcns_neuprint()) else if(dataset=='manc') @@ -114,6 +118,14 @@ hemibrain_meta <- function(ids, ...) { tres } +opticlobe_meta <- function(ids, ...) { + tres=malevnc::manc_neuprint_meta(ids, conn = npconn('opticlobe'), ...) + tres <- tres %>% + rename(id=bodyid) %>% + mutate(side=stringr::str_match(tres$name, "_([LR])$")[,2]) + tres +} + malecns_meta <- function(ids, ...) { tres=malecns::mcns_neuprint_meta(ids) tres <- tres %>% diff --git a/R/partners.R b/R/partners.R index 4a3db6c..633512a 100644 --- a/R/partners.R +++ b/R/partners.R @@ -28,7 +28,7 @@ cf_partners <- function(ids, threshold=1L, partners=c("inputs", "outputs"), threshold <- checkmate::assert_integerish( threshold, lower=0L,len = 1, null.ok = F, all.missing = F) - neuprint.chunksize=100 + neuprint.chunksize=10000 if(is.character(ids)) ids=keys2df(ids) @@ -51,13 +51,13 @@ cf_partners <- function(ids, threshold=1L, partners=c("inputs", "outputs"), if(n=='flywire') { tres=flywire_partner_summary2(ids[[n]], partners = partners, threshold = threshold) tres$side=toupper(substr(tres$side,1,1)) - } else if(n=='hemibrain') { + } else if(n=='hemibrain' || n=='opticlobe') { # a bit inelegant but not sure how else to insist - tres=neuprintr::neuprint_connection_table(ids[[n]], partners = partners, threshold=threshold, details = TRUE, conn = npconn('hemibrain'), chunk = neuprint.chunksize) + tres=neuprintr::neuprint_connection_table(ids[[n]], partners = partners, threshold=threshold, details = TRUE, conn = npconn(n), chunk = neuprint.chunksize) tres <- tres %>% dplyr::mutate( type=dplyr::case_when( - is.na(type) ~ paste0('hb', partner), + is.na(type) ~ paste0(abbreviate_datasets(n), partner), T ~ type), side=stringr::str_match(name, '_([LR])$')[,2], side=dplyr::case_when( diff --git a/R/utils.R b/R/utils.R index 3320623..b766b95 100644 --- a/R/utils.R +++ b/R/utils.R @@ -15,13 +15,15 @@ bind_rows2 <- function(l) { cf_connections <- function() { dslist=list() - npds=c("hemibrain", "manc", "malecns") + npds=c("hemibrain", "manc", "malecns", 'opticlobe') for(ds in npds) { res=list(installed=T) if(ds=='manc') res$installed=requireNamespace('malevnc', quietly = T) else if(ds=='malecns') res$installed=requireNamespace('malecns', quietly = T) + else if(ds=='opticlobe') + res$installed=requireNamespace('malevnc', quietly = T) if(!res$installed) { res=c(res, server=NA_character_, dataset=NA_character_) next @@ -86,6 +88,15 @@ dr_coconatfly <- function() { "neuprint token!\n", "See {.url https://natverse.org/coconatfly/articles/getting-started.html}")) + if(!isTRUE(filter(cfc, .data$dataset=='opticlobe')$installed)) + cli::cli_alert_danger( + "To use the opticlobe dataset do:\n{.code natmanager::install(pkgs = 'malevnc')}") + else if(is.na(filter(cfc, .data$dataset=='opticlobe')$server)) + cli::cli_alert_danger(paste0( + "Cannot connect to opticlobe dataset. You probably need to set up a ", + "neuprint token!\n", + "See {.url https://natverse.org/coconatfly/articles/getting-started.html}")) + if(!isTRUE(filter(cfc, .data$dataset=='manc')$installed)) cli::cli_alert_danger( "To use the manc dataset do:\n{.code natmanager::install(pkgs = 'malevnc')}") diff --git a/README.Rmd b/README.Rmd index ca3d8d0..78ac197 100644 --- a/README.Rmd +++ b/README.Rmd @@ -49,14 +49,15 @@ At present the following datasets are supported (dataset names used in the packa 3. [Janelia male Ventral Nerve Cord](https://www.janelia.org/project-team/flyem/manc-connectome) (**manc**) 4. Wei Lee, John Tuthill and colleagues [Female Adult Nerve Cord](https://github.com/htem/FANC_auto_recon) (**fanc**) 5. Janelia Male CNS (**malecns**) +6. Janelia Male Optic Lobe (part of the malecns) (**opticlobe**) -The first four datasets are either public (hemibrain, manc) or access can be -requested subject to agreeing to certain terms of use (flywire, fanc). -The Male CNS dataset is currently in the early stages of +Datasets 1-4 and 6 are either public (hemibrain, manc, flywire, opticlobe) or +access can be requested subject to agreeing to certain terms of use (fanc). +The Male CNS dataset is currently undergoing proofreading and annotation in a collaboration between the [FlyEM](https://www.janelia.org/project-team/flyem) and [Cambridge Drosophila Connectomics Group](https://flyconnecto.me). Release is -anticipated early 2024. +anticipated late 2024. ## Installation diff --git a/README.md b/README.md index 82f6d97..20b1933 100644 --- a/README.md +++ b/README.md @@ -45,14 +45,15 @@ the package in brackets): 4. Wei Lee, John Tuthill and colleagues [Female Adult Nerve Cord](https://github.com/htem/FANC_auto_recon) (**fanc**) 5. Janelia Male CNS (**malecns**) +6. Janelia Male Optic Lobe (part of the malecns) (**opticlobe**) -The first four datasets are either public (hemibrain, manc) or access -can be requested subject to agreeing to certain terms of use (flywire, -fanc). The Male CNS dataset is currently in the early stages of +Datasets 1-4 and 6 are either public (hemibrain, manc, flywire, +opticlobe) or access can be requested subject to agreeing to certain +terms of use (fanc). The Male CNS dataset is currently undergoing proofreading and annotation in a collaboration between the [FlyEM](https://www.janelia.org/project-team/flyem) and [Cambridge Drosophila Connectomics Group](https://flyconnecto.me). Release is -anticipated early 2024. +anticipated late 2024. ## Installation @@ -63,8 +64,8 @@ install.packages('natmanager') natmanager::install(pkgs = 'coconatfly') ``` -Some of the datasets exposed by **coconatfly** require authentication for -access or are still being annotated in private pre-release. Please +Some of the datasets exposed by **coconatfly** require authentication +for access or are still being annotated in private pre-release. Please consult individual package dependencies for authentication details and do not be surprised if you do not have access to all datasets at this time. @@ -128,10 +129,10 @@ We can also do that for multiple brain datasets ``` r da1meta <- cf_meta(cf_ids('DA1_lPN', datasets = c('hemibrain', 'flywire'))) -#> Updating 2918 ids -#> flywire_rootid_cached: Looking up 2918 missing keys -#> Updating 12286 ids -#> flywire_rootid_cached: Looking up 12285 missing keys +#> Updating 5266 ids +#> flywire_rootid_cached: Looking up 5266 missing keys +#> Updating 4089 ids +#> flywire_rootid_cached: Looking up 4089 missing keys head(da1meta) #> id side class type group instance dataset #> 1 720575940604407468 R central DA1_lPN DA1_lPN_R flywire @@ -168,7 +169,7 @@ head(da1ds) #> pre_id post_id weight side type dataset pre_key post_key #> #> 1 7e17 7e17 64 L DA1_vPN flywire fw:720575940605102694 fw:7205759… -#> 2 7e17 7e17 50 L flywire fw:720575940603231916 fw:7205759… +#> 2 7e17 7e17 50 L CB3356 flywire fw:720575940603231916 fw:7205759… #> 3 7e17 7e17 49 R LHAV4a4 flywire fw:720575940604407468 fw:7205759… #> 4 7e17 7e17 48 R DA1_vPN flywire fw:720575940623303108 fw:7205759… #> 5 7e17 7e17 46 L v2LN30 flywire fw:720575940603231916 fw:7205759… @@ -181,8 +182,8 @@ da1ds %>% summarise(weight=sum(weight), npre=n_distinct(pre_id), npost=n_distinct(post_id)) #> `summarise()` has grouped output by 'type', 'dataset'. You can override using #> the `.groups` argument. -#> # A tibble: 256 × 6 -#> # Groups: type, dataset [209] +#> # A tibble: 343 × 6 +#> # Groups: type, dataset [269] #> type dataset side weight npre npost #> #> 1 AL-AST1 flywire L 16 2 1 @@ -192,10 +193,10 @@ da1ds %>% #> 5 APL flywire R 70 6 1 #> 6 APL hemibrain R 113 6 1 #> 7 AVLP010 flywire L 6 1 1 -#> 8 AVLP010 flywire R 83 6 1 -#> 9 AVLP011,AVLP012 flywire L 6 1 1 -#> 10 AVLP011,AVLP012 flywire R 22 2 1 -#> # ℹ 246 more rows +#> 8 AVLP011,AVLP012 flywire L 6 1 1 +#> 9 AVLP011,AVLP012 flywire R 22 2 1 +#> 10 AVLP013 flywire L 14 2 1 +#> # ℹ 333 more rows ``` Let’s restrict that to types that are observed in both datasets. We do @@ -216,8 +217,8 @@ da1ds.shared_types.wide <- da1ds %>% #> `summarise()` has grouped output by 'type', 'dataset'. You can override using #> the `.groups` argument. da1ds.shared_types.wide -#> # A tibble: 38 × 4 -#> # Groups: type [38] +#> # A tibble: 39 × 4 +#> # Groups: type [39] #> type fw_L fw_R hb_R #> #> 1 AL-AST1 16 13 25 @@ -226,11 +227,11 @@ da1ds.shared_types.wide #> 4 DA1_vPN 250 254 333 #> 5 DNb05 6 0 5 #> 6 KCg-m 3275 2545 3030 -#> 7 LHAD1g1 62 60 48 -#> 8 LHAV2a3 70 128 154 +#> 7 LHAD1d2 72 33 15 +#> 8 LHAD1g1 62 60 48 #> 9 LHAV2b11 44 77 29 #> 10 LHAV3k6 19 16 5 -#> # ℹ 28 more rows +#> # ℹ 29 more rows ``` With the data organised like this, we can easily compare the connection @@ -271,10 +272,10 @@ seems to work very well for this purpose. ``` r cf_cosine_plot(cf_ids('/type:LAL0(08|09|10|42)', datasets = c("flywire", "hemibrain"))) -#> Updating 2918 ids -#> Updating 12286 ids -#> Matching types across datasets. Dropping 461/976 output partner types with total weight 8050/23730 -#> Matching types across datasets. Dropping 737/1356 input partner types with total weight 10875/26780 +#> Updating 5266 ids +#> Updating 4089 ids +#> Matching types across datasets. Dropping 478/977 output partner types with total weight 8849/23759 +#> Matching types across datasets. Dropping 735/1361 input partner types with total weight 10892/26928 ``` diff --git a/man/cf_ids.Rd b/man/cf_ids.Rd index 60dcbbe..0a9f9b8 100644 --- a/man/cf_ids.Rd +++ b/man/cf_ids.Rd @@ -7,14 +7,16 @@ \usage{ cf_ids( query = NULL, - datasets = c("brain", "vnc", "hemibrain", "flywire", "malecns", "manc", "fanc"), + datasets = c("brain", "vnc", "hemibrain", "flywire", "malecns", "manc", "fanc", + "opticlobe"), expand = FALSE, keys = FALSE, hemibrain = NULL, flywire = NULL, malecns = NULL, manc = NULL, - fanc = NULL + fanc = NULL, + opticlobe = NULL ) \method{c}{cidlist}(..., unique = TRUE) @@ -29,8 +31,8 @@ should be applied.} involve one or more calls to corresponding servers). Default \code{FALSE}.} \item{keys}{Whether to turn the ids into keys \code{hb:12345} right away. -Default \code{FALSE} but you may find this useful e.g. for combining -lists of neurons (see examples).} +Default \code{FALSE} but you may find this useful e.g. for combining lists +of neurons (see examples).} \item{hemibrain}{Pass hemibrain specific query or ids to this argument} @@ -43,6 +45,8 @@ lists of neurons (see examples).} \item{fanc}{Pass fanc ids to this argument (at present we do not support metadata queries for fanc)} +\item{opticlobe}{Pass opticlobe specific query or ids to this argument} + \item{...}{One or more lists generated by \code{cf_ids} that should be joined together} @@ -55,6 +59,11 @@ A list of ids with additional class \code{cidlist} \description{ Specify ids for fly connectome datasets } +\details{ +all neuprint datasets (hemibrain, malevnc, opticlobe, malecns) use + the same query syntax although some fields may be dataset specific (see + examples). +} \examples{ \donttest{ cf_ids("DA2_lPN", datasets='brain') diff --git a/man/figures/README-flywire-left-vs-right-1.png b/man/figures/README-flywire-left-vs-right-1.png index 1d1a82e..59291a3 100644 Binary files a/man/figures/README-flywire-left-vs-right-1.png and b/man/figures/README-flywire-left-vs-right-1.png differ diff --git a/man/figures/README-flywire-vs-hemibrain-1.png b/man/figures/README-flywire-vs-hemibrain-1.png index d374ec1..8879d57 100644 Binary files a/man/figures/README-flywire-vs-hemibrain-1.png and b/man/figures/README-flywire-vs-hemibrain-1.png differ diff --git a/man/figures/README-lal-cosine-cluster-1.png b/man/figures/README-lal-cosine-cluster-1.png index 0be1981..e34b8ff 100644 Binary files a/man/figures/README-lal-cosine-cluster-1.png and b/man/figures/README-lal-cosine-cluster-1.png differ