Skip to content

Commit

Permalink
Update set annotations usage (#202)
Browse files Browse the repository at this point in the history
* Rework deprecated setAnnotations

* Update downstream usage

* Update more downstream usage

* Update one more downstream usage

* Fix typo, regen R docs

* Export, regen docs

* Update README

* Add test

* More test updates

* Remove non-functional test file

* Update tests

* Add more introductory annotation vignette

* Update pkgdown index

* Final clean up of tests

* One more README update
  • Loading branch information
anngvu authored Dec 4, 2024
1 parent 8b07066 commit f99e509
Show file tree
Hide file tree
Showing 15 changed files with 254 additions and 52 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ export(register_study_files)
export(remanifest)
export(remove_button)
export(remove_wiki_subpage)
export(set_annotations)
export(summarize_attribute)
export(summarize_file_access)
export(swap_col)
Expand Down
2 changes: 1 addition & 1 deletion R/annotation_qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ infer_data_type <- function(dataset_id) {
children <- first(children, 3)
data_type <- c()
for (entity in children) {
e <- .syn$getAnnotations(entity)
e <- .syn$get_annotations(entity)
data_type <- append(data_type, e$Component)
}
data_type <- unique(data_type)
Expand Down
76 changes: 45 additions & 31 deletions R/annotations.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,32 @@
#' Wrapper around the Python `set_annotations` that pulls current annotations
#' and adds new annotations with given annotations data or replaces
#' data for annotations with the same keys existing on the entity.
#' @param id Synapse entity id.
#' @param annotations A flat list representing annotation key-value pairs,
#' e.g. `list(foo = "bar", rank = 1, authors = c("jack", "jane"))`
#' @export
set_annotations <- function(id, annotations) {
e_annotations <- .syn$get_annotations(id)
for (k in names(annotations)) {
e_annotations[k] <- annotations[[k]]
}
.syn$set_annotations(e_annotations)
}

#' Set annotations from a manifest
#'
#' The [Synapse docs](https://help.synapse.org/docs/Managing-Custom-Metadata-at-Scale.2004254976.html)
#' suggest doing batch annotations through a fileview. However, it is often simpler to
#' modify or set new annotations directly given a table of just the entities (rows) and props (cols) we want.
#'
#' The [Synapse docs](https://help.synapse.org/docs/Managing-Custom-Metadata-at-Scale.2004254976.html)
#' suggest doing batch annotations through a fileview. However, it is often simpler to
#' modify or set new annotations directly given a table of just the entities (rows) and props (cols) we want.
#' This is like how schematic works, except without any validation (so works best for power-users who know the data model well).
#' Some desired defaults are taken into account, such as not submitting key-values with `NA` and empty strings.
#'
#' @param manifest A table manifest. Needs to contain `entityId`.
#' Some desired defaults are taken into account, such as not submitting key-values with `NA` and empty strings.
#'
#' @param manifest A `data.frame` representing a manifest.
#' Needs to contain `entityId` (if parsed from a standard manifest.csv, the df should already contain `entityId`).
#' @param ignore_na Whether to ignore annotations that are `NA`; default TRUE.
#' @param ignore_blank Whether to ignore annotations that are that empty strings; default TRUE.
#' @param verbose Be chatty, default FALSE.
#' @export
#' @export
annotate_with_manifest <- function(manifest, ignore_na = TRUE, ignore_blank = TRUE, verbose = FALSE) {
# Split by `entityId`
annotations <- as.data.table(manifest)
Expand All @@ -20,47 +36,45 @@ annotate_with_manifest <- function(manifest, ignore_na = TRUE, ignore_blank = TR
filterNA <- if(ignore_na) function(x) !any(is.na(x)) else TRUE # will ignore entirely if list with NA, e.g. c(NA, 1, 2) -- should warn if list
filterBlank <- if(ignore_blank) function(x) !any(x == "") else TRUE # same as above
annotations <- lapply(annotations, function(x) Filter(function(x) filterNA(x) & filterBlank(x) & length(x), unlist(x, recursive = F)))
for(entity in names(annotations)) {
.syn$setAnnotations(entity = entity, annotations = as.list(annotations[[entity]]))
for(entity_id in names(annotations)) {
set_annotations(entity_id, annotations[[entity_id]])
}
if (verbose) message("Annotations submitted")
}


#' Copy annotations
#'
#'
#' Copy annotations (all or selectively) from a source entity to one or more target entities.
#' If annotations already exist on target entities, the copy will replace the current values.
#'
#' @param entity_from Syn id from which to copy.
#' @param entity_to One or more syn ids to copy annotations to.
#' @param select Vector of properties to selectively copy if present on the entity.
#' If same annotation keys already exist on target entities, the copy will replace the current values.
#'
#' @param entity_from Syn id from which to copy.
#' @param entity_to One or more syn ids to copy annotations to.
#' @param select Vector of properties to selectively copy if present on the entity.
#' If not specified, will copy over everything, which may not be desirable.
#' @param update Whether to immediately update or return annotation objects only.
#' @param update Whether to immediately update or return annotation objects only.
#' @export
copy_annotations <- function(entity_from,
entity_to,
select = NULL,
update = FALSE) {

.check_login()

annotations <- .syn$get_annotations(entity_from)

from_annotations <- .syn$get_annotations(entity_from)
# Check `select`
if(is.null(select)) {
cp <- annotations
select <- names(from_annotations)
} else {
cp <- reticulate::dict()
for(k in names(annotations)) {
if(k %in% select) cp[k] <- annotations[k]
}
select <- select[select %in% names(from_annotations)]
}

if(update) {
for(e in entity_to) {
.syn$setAnnotations(e, annotations = cp)

for(id in entity_to) {
to_annotations <- .syn$get_annotations(id)
for(k in select) {
to_annotations[k] <- from_annotations[k]
}
} else {
return(cp)
if(update) .syn$set_annotations(to_annotations) else return(to_annotations)
}
}

4 changes: 2 additions & 2 deletions R/calculate_related_studies.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ calculate_related_studies <- function(study_table_id,
for(i in 1:nrow(studies_updated)) {
id <- studies_updated[i, "studyId"]
relatedStudies <- studies_updated[i, "relatedStudies"]
annotations <- .syn$getAnnotations(id)
annotations <- .syn$get_annotations(id)
annotations$relatedStudies <- relatedStudies
invisible(.syn$setAnnotations(id, annotations))
invisible(set_annotations(id, annotations))
}
} else {
studies_updated
Expand Down
2 changes: 1 addition & 1 deletion R/register_study.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ add_new_study_meta <- function(id, study_meta) {
if(is.null(study_meta$studyStatus) || is.na(study_meta$studyStatus)) study_meta$studyStatus <- "Active"
if(is.null(study_meta$dataStatus) || is.na(study_meta$dataStatus)) study_meta$dataStatus <- "Data Pending"

study <- .syn$setAnnotations(id, study_meta)
study <- set_annotations(id, study_meta)
invisible(study)
}

Expand Down
21 changes: 14 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,27 @@
The goal of `nfportalutils` is to provide convenience functions for project and (meta)data management in the NF-OSI data portal scope.
Currently, `develop` branch is default so package install and docs refer to code in this branch.

The package interops with the [Python synapse client](https://github.com/Sage-Bionetworks/synapsePythonClient) via reticulate.
You will have to set up both (see #Installation). Outside of the tested versions, there may be some issues. The tested versions are:
- Python Synapse Client == 4.3.1
- reticulate == 1.39.0

## Docs

:point_right: [Package documentation!](https://nf-osi.github.io/nfportalutils/)

## Installation

You can install `nfportalutils` from here:
This presumes you have already set up R with RStudio.

``` r
remotes::install_github("nf-osi/nfportalutils")
```

The package interops with the [Python synapse client](https://github.com/Sage-Bionetworks/synapsePythonClient) via reticulate.
You will have to download the Python synapse client first.
1. Install `reticulate` following guide at https://rstudio.github.io/reticulate/index.html#installation.
2. Install `synapseclient==4.3.1` following https://rstudio.github.io/reticulate/articles/python_packages.html, which will use a default environment "r-reticulate".
3. Lastly, install `nfportalutils`. At startup, `nfportalutils` imports `synapseclient` from the default "r-reticulate".
- As regular users: `remotes::install_github("nf-osi/nfportalutils", build_vignettes = TRUE)` or `remotes::install_github("nf-osi/nfportalutils@some-branch", build_vignettes = TRUE)`
- For developers, presumably working with `devtools`:
- Clone the repo, checkout your desired development branch.
- Make sure the package repo root is working directory, then in R run `devtools::install()`.
4. Browse some vignettes: `browseVignettes("nfportalutils")`.

## For Users

Expand Down
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ reference:
- subtitle: General annotations
desc: Add and manage annotations on Synapse entities
- contents:
- set_annotations
- update_study_annotations
- annotate_with_manifest
- copy_annotations
Expand Down
3 changes: 2 additions & 1 deletion man/annotate_with_manifest.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/copy_annotations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/set_annotations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions tests/testthat/helpers.R
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
# Implementing skips according to suggested handling when using reticulate
# See https://rstudio.github.io/reticulate/articles/package.html
# Skips tests on CRAN machines or other incompatible testing environments
# where Python can't be configured so package checks don't fail
# where Python can't be configured so package checks don't fail

# Skip if Python synapseclient module not installed/accessible
# This is normally imported upon package load, see `zzz.R`
skip_if_no_synapseclient <- function() {
have_synapseclient <- py_module_available("synapseclient")
have_synapseclient <- py_module_available("synapseclient")
if(!have_synapseclient)
skip("synapseclient not available for testing")
}

# Skip if Python synapseutils module not installed/accessible
# This is normally imported upon package load, see `zzz.R`
skip_if_no_synapseutils <- function() {
have_synapseutils <- py_module_available("synapseclient")
have_synapseutils <- py_module_available("synapseclient")
if(!have_synapseutils)
skip("synapseutils not available for testing")
}

# Skip if no pandas; pandas is needed for smaller subset of functions in the package
skip_if_no_pandas <- function() {
have_pandas <- py_module_available("pandas")
have_pandas <- py_module_available("pandas")
if(!have_pandas)
skip("pandas not available for testing")
}
Expand All @@ -37,6 +37,6 @@ skip_if_no_token <- function() {
# (e.g. someone pasted in wrong token), this creates a skip cascade for tests that presume
# successful login.
skip_if_no_login <- function() {
if(!exists(".syn"))
if(!exists(".syn") || is.null(.syn$username))
skip("not logged in for tests")
}
3 changes: 0 additions & 3 deletions tests/testthat/test-add_pubmed_publications.R

This file was deleted.

47 changes: 47 additions & 0 deletions tests/testthat/test_copy_annotations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
test_that("Copy annotations works", {
skip_if_no_synapseclient()
skip_if_no_login()

PARENT_TEST_PROJECT <- "syn26462036"
# Create some folder objects with some annotations
entity_a <- synapseclient$Folder("Entity A",
parent = PARENT_TEST_PROJECT,
annotations = list(foo = "bar", favorites = c("raindrops", "whiskers")))
entity_a <- .syn$store(entity_a)

entity_b <- synapseclient$Folder("Entity B",
parent = PARENT_TEST_PROJECT,
annotations = list(favorites = c("kettles", "mittens"), after_a = TRUE))
entity_b <- .syn$store(entity_b)

entity_c <- synapseclient$Folder("Entity C",
parent = PARENT_TEST_PROJECT)
entity_c <- .syn$store(entity_c)

# when copying all annotations from A->B (default)
copy_annotations(entity_from = entity_a$properties$id,
entity_to = entity_b$properties$id,
select = NULL,
update = TRUE)

# when copying selective annotations from A->C
copy_annotations(entity_from = entity_a$properties$id,
entity_to = entity_c$properties$id,
select = c("favorites", "key_not_on_a"),
update = TRUE)

result_b <- .syn$get_annotations(entity_b)
result_c <- .syn$get_annotations(entity_c)
.syn$delete(entity_a)
.syn$delete(entity_b)
.syn$delete(entity_c)
testthat::expect_equal(result_b$foo, "bar")
testthat::expect_equal(result_b$favorites, c("raindrops", "whiskers"))
testthat::expect_equal(result_b$after_a, TRUE)
testthat::expect_error(result_c$foo) # Expect KeyError since key should not be present
testthat::expect_equal(result_c$favorites, c("raindrops", "whiskers"))
testthat::expect_error(result_c$key_not_on_a)

})


23 changes: 23 additions & 0 deletions tests/testthat/test_manifest_annotations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
test_that("Annotate with manifest works", {
skip_if_no_synapseclient()
skip_if_no_login()

PARENT_TEST_PROJECT <- "syn26462036"
# Use some folders to represent objects to annotate
objs <- make_folder(parent = PARENT_TEST_PROJECT, folders = c("mock_file_1", "mock_file_2", "mock_file_3"))
ids <- sapply(objs, function(x) x$properties$id)
# Partial manifest as a data.table with list columns
manifest <- data.table(
entityId = ids,
assay = "drugScreen",
experimentalTimepoint = c(1L, 3L, 7L),
experimentalTimepointUnit = "days",
cellType = list(c("schwann", "macrophage"), c("schwann", "macrophage"), c("schwann", "macrophage"))
)
annotate_with_manifest(manifest)
remanifested <- list()
for(i in ids) {
remanifested[[i]] <- .syn$get_annotations(i)
}
for(i in ids) .syn$delete(i)
})
Loading

0 comments on commit f99e509

Please sign in to comment.