From dc11729b795a68783e978ed1bd320caeae5ff0e2 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Thu, 14 Nov 2024 14:08:03 -0800 Subject: [PATCH 01/15] Rework deprecated setAnnotations --- R/annotations.R | 75 +++++++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 31 deletions(-) diff --git a/R/annotations.R b/R/annotations.R index 17d14b9d..483715c3 100644 --- a/R/annotations.R +++ b/R/annotations.R @@ -1,16 +1,31 @@ +#' Wrapper around the Python `set_annotations` that pulls current annotations +#' and adds new annotations with given annotations data or replaces +#' data for annotations with the same keys existing on the entity. +#' @param id Synapse entity id. +#' @param annotations A flat list representing annotation key-value pairs, +#' e.g. `list(foo = "bar", rank = 1, authors = c("jack", "jane"))` +set_annotations <- function(id, annotations) { + e_annotations <- .syn$get_annotations(e) + for (k in names(annotations)) { + e_annotations[k] <- annotations[[k]] + } + .syn$set_annotations(e_annotations) +} + #' Set annotations from a manifest -#' -#' The [Synapse docs](https://help.synapse.org/docs/Managing-Custom-Metadata-at-Scale.2004254976.html) -#' suggest doing batch annotations through a fileview. However, it is often simpler to -#' modify or set new annotations directly given a table of just the entities (rows) and props (cols) we want. +#' +#' The [Synapse docs](https://help.synapse.org/docs/Managing-Custom-Metadata-at-Scale.2004254976.html) +#' suggest doing batch annotations through a fileview. However, it is often simpler to +#' modify or set new annotations directly given a table of just the entities (rows) and props (cols) we want. #' This is like how schematic works, except without any validation (so works best for power-users who know the data model well). -#' Some desired defaults are taken into account, such as not submitting key-values with `NA` and empty strings. -#' -#' @param manifest A table manifest. Needs to contain `entityId`. +#' Some desired defaults are taken into account, such as not submitting key-values with `NA` and empty strings. +#' +#' @param manifest A `data.frame` representing a manifest. +#' Needs to contain `entityId` (if parsed from a standard manifest.csv, the df should already contain `entityId`). #' @param ignore_na Whether to ignore annotations that are `NA`; default TRUE. #' @param ignore_blank Whether to ignore annotations that are that empty strings; default TRUE. #' @param verbose Be chatty, default FALSE. -#' @export +#' @export annotate_with_manifest <- function(manifest, ignore_na = TRUE, ignore_blank = TRUE, verbose = FALSE) { # Split by `entityId` annotations <- as.data.table(manifest) @@ -20,47 +35,45 @@ annotate_with_manifest <- function(manifest, ignore_na = TRUE, ignore_blank = TR filterNA <- if(ignore_na) function(x) !any(is.na(x)) else TRUE # will ignore entirely if list with NA, e.g. c(NA, 1, 2) -- should warn if list filterBlank <- if(ignore_blank) function(x) !any(x == "") else TRUE # same as above annotations <- lapply(annotations, function(x) Filter(function(x) filterNA(x) & filterBlank(x) & length(x), unlist(x, recursive = F))) - for(entity in names(annotations)) { - .syn$setAnnotations(entity = entity, annotations = as.list(annotations[[entity]])) + for(entity_id in names(annotations)) { + set_annotations(entity_id, annotations[[entity_id]]) } if (verbose) message("Annotations submitted") } #' Copy annotations -#' +#' #' Copy annotations (all or selectively) from a source entity to one or more target entities. -#' If annotations already exist on target entities, the copy will replace the current values. -#' -#' @param entity_from Syn id from which to copy. -#' @param entity_to One or more syn ids to copy annotations to. -#' @param select Vector of properties to selectively copy if present on the entity. +#' If same annotation keys already exist on target entities, the copy will replace the current values. +#' +#' @param entity_from Syn id from which to copy. +#' @param entity_to One or more syn ids to copy annotations to. +#' @param select Vector of properties to selectively copy if present on the entity. #' If not specified, will copy over everything, which may not be desirable. -#' @param update Whether to immediately update or return annotation objects only. +#' @param update Whether to immediately update or return annotation objects only. #' @export copy_annotations <- function(entity_from, entity_to, select = NULL, update = FALSE) { - + .check_login() - - annotations <- .syn$get_annotations(entity_from) + + from_annotations <- .syn$get_annotations(entity_from) + # Check `select` if(is.null(select)) { - cp <- annotations + select <- names(from_annotations) } else { - cp <- reticulate::dict() - for(k in names(annotations)) { - if(k %in% select) cp[k] <- annotations[k] - } + select <- select[select %in% names(from_annotations)] } - - if(update) { - for(e in entity_to) { - .syn$setAnnotations(e, annotations = cp) + + for(e in entity_to) { + to_annotations <- .syn$get_annotations(e) + for(k in select) { + to_annotations[k] <- from_annotations[k] } - } else { - return(cp) + if(update) .syn$set_annotations(to_annotations) else to_annotations } } From fdc19aa58e4307eccbb49ce345ef01d1ae267190 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Thu, 14 Nov 2024 14:09:38 -0800 Subject: [PATCH 02/15] Update downstream usage --- R/register_study.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/register_study.R b/R/register_study.R index 84dabcdb..2ef95127 100644 --- a/R/register_study.R +++ b/R/register_study.R @@ -68,7 +68,7 @@ add_new_study_meta <- function(id, study_meta) { if(is.null(study_meta$studyStatus) || is.na(study_meta$studyStatus)) study_meta$studyStatus <- "Active" if(is.null(study_meta$dataStatus) || is.na(study_meta$dataStatus)) study_meta$dataStatus <- "Data Pending" - study <- .syn$setAnnotations(id, study_meta) + study <- set_annotations(id, study_meta) invisible(study) } From 0cc7f9e9262696729536dd8d69c06637f4a16172 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Thu, 14 Nov 2024 14:12:50 -0800 Subject: [PATCH 03/15] Update more downstream usage --- R/calculate_related_studies.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/calculate_related_studies.R b/R/calculate_related_studies.R index 2f02ceaa..72c506cc 100644 --- a/R/calculate_related_studies.R +++ b/R/calculate_related_studies.R @@ -111,9 +111,9 @@ calculate_related_studies <- function(study_table_id, for(i in 1:nrow(studies_updated)) { id <- studies_updated[i, "studyId"] relatedStudies <- studies_updated[i, "relatedStudies"] - annotations <- .syn$getAnnotations(id) + annotations <- .syn$get_annotations(id) annotations$relatedStudies <- relatedStudies - invisible(.syn$setAnnotations(id, annotations)) + invisible(set_annotations(id, annotations)) } } else { studies_updated From 8a91a3518579f339cf0fa88eacf2c71afff2d485 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Thu, 14 Nov 2024 14:14:56 -0800 Subject: [PATCH 04/15] Update one more downstream usage --- R/annotation_qc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/annotation_qc.R b/R/annotation_qc.R index 6d6589d0..1e2e757f 100644 --- a/R/annotation_qc.R +++ b/R/annotation_qc.R @@ -163,7 +163,7 @@ infer_data_type <- function(dataset_id) { children <- first(children, 3) data_type <- c() for (entity in children) { - e <- .syn$getAnnotations(entity) + e <- .syn$get_annotations(entity) data_type <- append(data_type, e$Component) } data_type <- unique(data_type) From 66f1554242d0bf2b54be91ae39baef0879db204e Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Thu, 14 Nov 2024 14:29:03 -0800 Subject: [PATCH 05/15] Fix typo, regen R docs --- DESCRIPTION | 2 +- R/annotations.R | 2 +- man/annotate_with_manifest.Rd | 3 ++- man/copy_annotations.Rd | 2 +- man/set_annotations.Rd | 21 +++++++++++++++++++++ 5 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 man/set_annotations.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 6c8a23d9..560cd5c9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,7 @@ License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Imports: dplyr, data.table, diff --git a/R/annotations.R b/R/annotations.R index 483715c3..30469d49 100644 --- a/R/annotations.R +++ b/R/annotations.R @@ -5,7 +5,7 @@ #' @param annotations A flat list representing annotation key-value pairs, #' e.g. `list(foo = "bar", rank = 1, authors = c("jack", "jane"))` set_annotations <- function(id, annotations) { - e_annotations <- .syn$get_annotations(e) + e_annotations <- .syn$get_annotations(id) for (k in names(annotations)) { e_annotations[k] <- annotations[[k]] } diff --git a/man/annotate_with_manifest.Rd b/man/annotate_with_manifest.Rd index e499bb96..fb699ac6 100644 --- a/man/annotate_with_manifest.Rd +++ b/man/annotate_with_manifest.Rd @@ -12,7 +12,8 @@ annotate_with_manifest( ) } \arguments{ -\item{manifest}{A table manifest. Needs to contain \code{entityId}.} +\item{manifest}{A \code{data.frame} representing a manifest. +Needs to contain \code{entityId} (if parsed from a standard manifest.csv, the df should already contain \code{entityId}).} \item{ignore_na}{Whether to ignore annotations that are \code{NA}; default TRUE.} diff --git a/man/copy_annotations.Rd b/man/copy_annotations.Rd index 5c343464..9e6230d2 100644 --- a/man/copy_annotations.Rd +++ b/man/copy_annotations.Rd @@ -18,5 +18,5 @@ If not specified, will copy over everything, which may not be desirable.} } \description{ Copy annotations (all or selectively) from a source entity to one or more target entities. -If annotations already exist on target entities, the copy will replace the current values. +If same annotation keys already exist on target entities, the copy will replace the current values. } diff --git a/man/set_annotations.Rd b/man/set_annotations.Rd new file mode 100644 index 00000000..ff0db764 --- /dev/null +++ b/man/set_annotations.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/annotations.R +\name{set_annotations} +\alias{set_annotations} +\title{Wrapper around the Python \code{set_annotations} that pulls current annotations +and adds new annotations with given annotations data or replaces +data for annotations with the same keys existing on the entity.} +\usage{ +set_annotations(id, annotations) +} +\arguments{ +\item{id}{Synapse entity id.} + +\item{annotations}{A flat list representing annotation key-value pairs, +e.g. \code{list(foo = "bar", rank = 1, authors = c("jack", "jane"))}} +} +\description{ +Wrapper around the Python \code{set_annotations} that pulls current annotations +and adds new annotations with given annotations data or replaces +data for annotations with the same keys existing on the entity. +} From d6cb5bc793fc581be749a15d63f1b62157a67060 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Thu, 14 Nov 2024 14:30:01 -0800 Subject: [PATCH 06/15] Export, regen docs --- NAMESPACE | 1 + R/annotations.R | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 85ab7af7..9bb56a2c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -86,6 +86,7 @@ export(register_study_files) export(remanifest) export(remove_button) export(remove_wiki_subpage) +export(set_annotations) export(summarize_attribute) export(summarize_file_access) export(swap_col) diff --git a/R/annotations.R b/R/annotations.R index 30469d49..06075b29 100644 --- a/R/annotations.R +++ b/R/annotations.R @@ -4,6 +4,7 @@ #' @param id Synapse entity id. #' @param annotations A flat list representing annotation key-value pairs, #' e.g. `list(foo = "bar", rank = 1, authors = c("jack", "jane"))` +#' @export set_annotations <- function(id, annotations) { e_annotations <- .syn$get_annotations(id) for (k in names(annotations)) { From 72c57dcb898c0af99f5d31a578495f211e27f0b3 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Thu, 14 Nov 2024 14:56:14 -0800 Subject: [PATCH 07/15] Update README --- README.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 88690d7c..108f3911 100644 --- a/README.md +++ b/README.md @@ -8,20 +8,21 @@ The goal of `nfportalutils` is to provide convenience functions for project and (meta)data management in the NF-OSI data portal scope. Currently, `develop` branch is default so package install and docs refer to code in this branch. +The package interops with the [Python synapse client](https://github.com/Sage-Bionetworks/synapsePythonClient) via reticulate. +You will have to set up both (see #Installation). Outside of the tested versions, there may be some issues. The tested versions are: +- Python Synapse Client == 4.3.1 +- reticulate == 1.39.0 + ## Docs :point_right: [Package documentation!](https://nf-osi.github.io/nfportalutils/) ## Installation -You can install `nfportalutils` from here: - -``` r -remotes::install_github("nf-osi/nfportalutils") -``` - -The package interops with the [Python synapse client](https://github.com/Sage-Bionetworks/synapsePythonClient) via reticulate. -You will have to download the Python synapse client first. +1. Install `reticulate` following guide at https://rstudio.github.io/reticulate/index.html#installation. +2. Install `synapseclient==4.3.1` following https://rstudio.github.io/reticulate/articles/python_packages.html, which will use a default environment "r-reticulate". +3. Lastly, install `nfportalutils`: `remotes::install_github("nf-osi/nfportalutils")`. +At startup, `nfportalutils` imports `synapseclient` from the default "r-reticulate". ## For Users From 48fdf597234800a298e07a111aaeac75e1379317 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Thu, 14 Nov 2024 15:43:06 -0800 Subject: [PATCH 08/15] Add test --- R/annotations.R | 6 +++--- tests/testthat/test_copy_annotations.R | 28 ++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 tests/testthat/test_copy_annotations.R diff --git a/R/annotations.R b/R/annotations.R index 06075b29..e331b281 100644 --- a/R/annotations.R +++ b/R/annotations.R @@ -69,12 +69,12 @@ copy_annotations <- function(entity_from, select <- select[select %in% names(from_annotations)] } - for(e in entity_to) { - to_annotations <- .syn$get_annotations(e) + for(id in entity_to) { + to_annotations <- .syn$get_annotations(id) for(k in select) { to_annotations[k] <- from_annotations[k] } - if(update) .syn$set_annotations(to_annotations) else to_annotations + if(update) .syn$set_annotations(to_annotations) else return(to_annotations) } } diff --git a/tests/testthat/test_copy_annotations.R b/tests/testthat/test_copy_annotations.R new file mode 100644 index 00000000..dc4d8875 --- /dev/null +++ b/tests/testthat/test_copy_annotations.R @@ -0,0 +1,28 @@ +test_that("Copy annotations works", { + skip_if_no_synapseclient() + skip_if_no_login() + + PARENT_TEST_PROJECT <- "syn26462036" + # Create some folder objects with some annotations + entity_a <- synapseclient$Folder("Entity A", + parent = PARENT_TEST_PROJECT, + annotations = list(foo = "bar", favorites = c("raindrops", "whiskers"))) + entity_a <- .syn$store(entity_a) + + entity_b <- synapseclient$Folder("Entity B", + parent = PARENT_TEST_PROJECT, + annotations = list(favorites = c("kettles", "mittens"), after_a = TRUE)) + entity_b <- .syn$store(entity_b) + + copy_annotations(entity_from = entity_a$properties$id, + entity_to = entity_b$properties$id, + select = NULL, + update = TRUE) + result <- .syn$get_annotations(entity_b) + .syn$delete(entity_a) + .syn$delete(entity_b) + testthat::expect_equal(result$foo, "bar") + testthat::expect_equal(result$favorites, c("raindrops", "whiskers")) + testthat::expect_equal(result$after_a, TRUE) + +}) From be71eedb519977a02ff48d5e64e7ba5c4c036414 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Thu, 14 Nov 2024 16:48:54 -0800 Subject: [PATCH 09/15] More test updates --- tests/testthat/helpers.R | 10 ++++---- tests/testthat/test_copy_annotations.R | 27 ++++++++++++++++++---- tests/testthat/test_manifest_annotations.R | 8 +++++++ 3 files changed, 36 insertions(+), 9 deletions(-) create mode 100644 tests/testthat/test_manifest_annotations.R diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R index c3fce801..73f37743 100644 --- a/tests/testthat/helpers.R +++ b/tests/testthat/helpers.R @@ -1,12 +1,12 @@ # Implementing skips according to suggested handling when using reticulate # See https://rstudio.github.io/reticulate/articles/package.html # Skips tests on CRAN machines or other incompatible testing environments -# where Python can't be configured so package checks don't fail +# where Python can't be configured so package checks don't fail # Skip if Python synapseclient module not installed/accessible # This is normally imported upon package load, see `zzz.R` skip_if_no_synapseclient <- function() { - have_synapseclient <- py_module_available("synapseclient") + have_synapseclient <- py_module_available("synapseclient") if(!have_synapseclient) skip("synapseclient not available for testing") } @@ -14,14 +14,14 @@ skip_if_no_synapseclient <- function() { # Skip if Python synapseutils module not installed/accessible # This is normally imported upon package load, see `zzz.R` skip_if_no_synapseutils <- function() { - have_synapseutils <- py_module_available("synapseclient") + have_synapseutils <- py_module_available("synapseclient") if(!have_synapseutils) skip("synapseutils not available for testing") } # Skip if no pandas; pandas is needed for smaller subset of functions in the package skip_if_no_pandas <- function() { - have_pandas <- py_module_available("pandas") + have_pandas <- py_module_available("pandas") if(!have_pandas) skip("pandas not available for testing") } @@ -37,6 +37,6 @@ skip_if_no_token <- function() { # (e.g. someone pasted in wrong token), this creates a skip cascade for tests that presume # successful login. skip_if_no_login <- function() { - if(!exists(".syn")) + if(!exists(".syn") || is.null(.syn$username)) skip("not logged in for tests") } diff --git a/tests/testthat/test_copy_annotations.R b/tests/testthat/test_copy_annotations.R index dc4d8875..9a2ed138 100644 --- a/tests/testthat/test_copy_annotations.R +++ b/tests/testthat/test_copy_annotations.R @@ -14,15 +14,34 @@ test_that("Copy annotations works", { annotations = list(favorites = c("kettles", "mittens"), after_a = TRUE)) entity_b <- .syn$store(entity_b) + entity_c <- synapseclient$Folder("Entity C", + parent = PARENT_TEST_PROJECT) + entity_c <- .syn$store(entity_c) + + # when copying all annotations from A->B (default) copy_annotations(entity_from = entity_a$properties$id, entity_to = entity_b$properties$id, select = NULL, update = TRUE) - result <- .syn$get_annotations(entity_b) + + # when copying selective annotations from A->C + copy_annotations(entity_from = entity_a$properties$id, + entity_to = entity_c$properties$id, + select = c("favorites", "key_not_on_a"), + update = TRUE) + + result_b <- .syn$get_annotations(entity_b) + result_c <- .syn$get_annotations(entity_c) .syn$delete(entity_a) .syn$delete(entity_b) - testthat::expect_equal(result$foo, "bar") - testthat::expect_equal(result$favorites, c("raindrops", "whiskers")) - testthat::expect_equal(result$after_a, TRUE) + .syn$delete(entity_c) + testthat::expect_equal(result_b$foo, "bar") + testthat::expect_equal(result_b$favorites, c("raindrops", "whiskers")) + testthat::expect_equal(result_b$after_a, TRUE) + testthat::expect_error(result_c$foo) # Expect KeyError since key should not be present + testthat::expect_equal(result_c$favorites, c("raindrops", "whiskers")) + testthat::expect_error(result_c$key_not_on_a) }) + + diff --git a/tests/testthat/test_manifest_annotations.R b/tests/testthat/test_manifest_annotations.R new file mode 100644 index 00000000..b0eabe1e --- /dev/null +++ b/tests/testthat/test_manifest_annotations.R @@ -0,0 +1,8 @@ +test_that("Annotate with manifest works", { + skip_if_no_synapseclient() + skip_if_no_login() + + PARENT_TEST_PROJECT <- "syn26462036" + testthat::expect_equal(1,1) + +}) From 0ed731be7fbd366bec4403bfa2a4e0dbc94080ec Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Thu, 14 Nov 2024 16:49:21 -0800 Subject: [PATCH 10/15] Remove non-functional test file --- tests/testthat/test-add_pubmed_publications.R | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 tests/testthat/test-add_pubmed_publications.R diff --git a/tests/testthat/test-add_pubmed_publications.R b/tests/testthat/test-add_pubmed_publications.R deleted file mode 100644 index 8849056e..00000000 --- a/tests/testthat/test-add_pubmed_publications.R +++ /dev/null @@ -1,3 +0,0 @@ -test_that("multiplication works", { - expect_equal(2 * 2, 4) -}) From 0466174533ed8a8ec3f1a234428b43938d96c838 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Fri, 15 Nov 2024 08:08:35 -0800 Subject: [PATCH 11/15] Update tests --- tests/testthat/test_annotate_with_manifest.R | 9 +++++++++ tests/testthat/test_manifest_annotations.R | 19 +++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 tests/testthat/test_annotate_with_manifest.R diff --git a/tests/testthat/test_annotate_with_manifest.R b/tests/testthat/test_annotate_with_manifest.R new file mode 100644 index 00000000..c241bb37 --- /dev/null +++ b/tests/testthat/test_annotate_with_manifest.R @@ -0,0 +1,9 @@ +test_that("annotate_with_manifest works by annotating files with `data.table` manifest", { + + modify_annotation + + testthat::expect_identical(update_items(current, update), + expected) + +}) + diff --git a/tests/testthat/test_manifest_annotations.R b/tests/testthat/test_manifest_annotations.R index b0eabe1e..0d6088c6 100644 --- a/tests/testthat/test_manifest_annotations.R +++ b/tests/testthat/test_manifest_annotations.R @@ -3,6 +3,21 @@ test_that("Annotate with manifest works", { skip_if_no_login() PARENT_TEST_PROJECT <- "syn26462036" - testthat::expect_equal(1,1) - + # Use some folders to represent objects to annotate + objs <- make_folder(parent = PARENT_TEST_PROJECT, folders = c("mock_file_1", "mock_file_2", "mock_file_3")) + ids <- sapply(objs, function(x) x$properties$id) + # Partial manifest as a data.table with list columns + manifest <- data.table( + entityId = ids, + assay = "drugScreen", + experimentalTimepoint = c(1L, 3L, 7L), + experimentalTimepointUnit = "days", + cellType = list(c("schwann", "macrophage"), c("schwann", "macrophage"), c("schwann", "macrophage")) + ) + annotate_with_manifest(manifest) + remanifested <- list() + for(i in ids) { + remanifested[[i]] <- .syn$get_annotations(i) + } + for(i in ids) .syn$delete(i) }) From 01cc8c144b663384d308370d38cfae3125b9365c Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Fri, 15 Nov 2024 08:10:02 -0800 Subject: [PATCH 12/15] Add more introductory annotation vignette --- vignettes/annotate-data-intro.Rmd | 90 +++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 vignettes/annotate-data-intro.Rmd diff --git a/vignettes/annotate-data-intro.Rmd b/vignettes/annotate-data-intro.Rmd new file mode 100644 index 00000000..588f7526 --- /dev/null +++ b/vignettes/annotate-data-intro.Rmd @@ -0,0 +1,90 @@ +--- +title: "Introduction to utils for annotating data" +output: rmarkdown::html_vignette +date: 2022-10-17 +vignette: > + %\VignetteIndexEntry{annotating-nextflow-processed-data} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +## Intro + +This introduces the annotation utilities with typical examples. +This is expected to be the more useful starting point for using nfportalutils for annotation tasks, to be followed by the more specialized vignetted for annotating NF processed data if needed. + +### Set up +```{r, eval=FALSE} +library(nfportalutils) + +syn_login() + +# Change this to a dev project you have access to +PROJECT <- "syn26462036" +``` + +### Set annotations on a single file + +Create a demo entity. +```{r, eval=FALSE} + +synapseclient <- reticulate::import("synapseclient") +# Create an entity with some initial annotations +entity <- synapseclient$Folder("Demo Entity", + parent = PROJECT, + annotations = list(foo = "bar", favorites = c("raindrops", "whiskers"))) + +entity <- .syn$store(entity) +``` + +`set_annotations` can be used to add new annotations or correct an existing annotation on an entity. +This wraps the Python client to make it more intuitive to pass in an R list as the annotations as above. +Here, add another annotation *and* correct the `favorites` to "chocolate". +The returned data shows the unchanged `foo`, the updated `favorites`, and a new `n`. +```{r, eval=FALSE} +set_annotations(id = entity$properties$id, annotations = list(favorites = "chocolate", n = 7L)) +``` + +Cleanup. +```{r, eval=FALSE} +.syn$delete(entity) +``` + +### Annotate in batch using a manifest + +A better way to use `set_annotations` for a set of entities, usually files. + +First create multiple entities that need to be annotated or corrected in batch. +```{r, eval=FALSE} +objs <- make_folder(parent = PARENT_TEST_PROJECT, folders = c("mock_file_1", "mock_file_2", "mock_file_3")) +ids <- sapply(objs, function(x) x$properties$id) +``` + +Create example manifest. Note: Another way includes reading in a shematic csv manifest with entityIds and Filenames. +```{r, eval=FALSE} +manifest <- data.table( + entityId = ids, + assay = "drugScreen", + experimentalTimepoint = c(1L, 3L, 7L), + experimentalTimepointUnit = "days", + cellType = list(c("schwann", "macrophage"), c("schwann", "macrophage"), c("schwann", "macrophage")) + ) +manifest +``` + +Apply: +```{r, eval=FALSE} +annotate_with_manifest(manifest) +``` + +Cleanup. +```{r, eval=FALSE} +for (id in ids) .syn$delete(id) +``` From d3349faf3547be091a230e7d0cb0092c56b536d9 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Fri, 15 Nov 2024 08:20:08 -0800 Subject: [PATCH 13/15] Update pkgdown index --- _pkgdown.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index 8fe504be..b8825da4 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -32,6 +32,7 @@ reference: - subtitle: General annotations desc: Add and manage annotations on Synapse entities - contents: + - set_annotations - update_study_annotations - annotate_with_manifest - copy_annotations From c8ef9b85091257aa7114413edf5d70ca8001f787 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Fri, 15 Nov 2024 08:35:20 -0800 Subject: [PATCH 14/15] Final clean up of tests --- tests/testthat/test_annotate_with_manifest.R | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 tests/testthat/test_annotate_with_manifest.R diff --git a/tests/testthat/test_annotate_with_manifest.R b/tests/testthat/test_annotate_with_manifest.R deleted file mode 100644 index c241bb37..00000000 --- a/tests/testthat/test_annotate_with_manifest.R +++ /dev/null @@ -1,9 +0,0 @@ -test_that("annotate_with_manifest works by annotating files with `data.table` manifest", { - - modify_annotation - - testthat::expect_identical(update_items(current, update), - expected) - -}) - From f5e7987558f984360641b3a9add6ffd20579dc4c Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Fri, 15 Nov 2024 13:46:23 -0800 Subject: [PATCH 15/15] One more README update --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 108f3911..cdc17761 100644 --- a/README.md +++ b/README.md @@ -19,10 +19,16 @@ You will have to set up both (see #Installation). Outside of the tested versions ## Installation +This presumes you have already set up R with RStudio. + 1. Install `reticulate` following guide at https://rstudio.github.io/reticulate/index.html#installation. 2. Install `synapseclient==4.3.1` following https://rstudio.github.io/reticulate/articles/python_packages.html, which will use a default environment "r-reticulate". -3. Lastly, install `nfportalutils`: `remotes::install_github("nf-osi/nfportalutils")`. -At startup, `nfportalutils` imports `synapseclient` from the default "r-reticulate". +3. Lastly, install `nfportalutils`. At startup, `nfportalutils` imports `synapseclient` from the default "r-reticulate". + - As regular users: `remotes::install_github("nf-osi/nfportalutils", build_vignettes = TRUE)` or `remotes::install_github("nf-osi/nfportalutils@some-branch", build_vignettes = TRUE)` + - For developers, presumably working with `devtools`: + - Clone the repo, checkout your desired development branch. + - Make sure the package repo root is working directory, then in R run `devtools::install()`. +4. Browse some vignettes: `browseVignettes("nfportalutils")`. ## For Users