Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update set annotations usage #202

Merged
merged 15 commits into from
Dec 4, 2024
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Imports:
dplyr,
data.table,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ export(register_study_files)
export(remanifest)
export(remove_button)
export(remove_wiki_subpage)
export(set_annotations)
export(summarize_attribute)
export(summarize_file_access)
export(swap_col)
Expand Down
2 changes: 1 addition & 1 deletion R/annotation_qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ infer_data_type <- function(dataset_id) {
children <- first(children, 3)
data_type <- c()
for (entity in children) {
e <- .syn$getAnnotations(entity)
e <- .syn$get_annotations(entity)
data_type <- append(data_type, e$Component)
}
data_type <- unique(data_type)
Expand Down
76 changes: 45 additions & 31 deletions R/annotations.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,32 @@
#' Wrapper around the Python `set_annotations` that pulls current annotations
#' and adds new annotations with given annotations data or replaces
#' data for annotations with the same keys existing on the entity.
#' @param id Synapse entity id.
#' @param annotations A flat list representing annotation key-value pairs,
#' e.g. `list(foo = "bar", rank = 1, authors = c("jack", "jane"))`
#' @export
set_annotations <- function(id, annotations) {
e_annotations <- .syn$get_annotations(id)
for (k in names(annotations)) {
e_annotations[k] <- annotations[[k]]
}
.syn$set_annotations(e_annotations)
}

#' Set annotations from a manifest
#'
#' The [Synapse docs](https://help.synapse.org/docs/Managing-Custom-Metadata-at-Scale.2004254976.html)
#' suggest doing batch annotations through a fileview. However, it is often simpler to
#' modify or set new annotations directly given a table of just the entities (rows) and props (cols) we want.
#'
#' The [Synapse docs](https://help.synapse.org/docs/Managing-Custom-Metadata-at-Scale.2004254976.html)
#' suggest doing batch annotations through a fileview. However, it is often simpler to
#' modify or set new annotations directly given a table of just the entities (rows) and props (cols) we want.
#' This is like how schematic works, except without any validation (so works best for power-users who know the data model well).
#' Some desired defaults are taken into account, such as not submitting key-values with `NA` and empty strings.
#'
#' @param manifest A table manifest. Needs to contain `entityId`.
#' Some desired defaults are taken into account, such as not submitting key-values with `NA` and empty strings.
#'
#' @param manifest A `data.frame` representing a manifest.
#' Needs to contain `entityId` (if parsed from a standard manifest.csv, the df should already contain `entityId`).
#' @param ignore_na Whether to ignore annotations that are `NA`; default TRUE.
#' @param ignore_blank Whether to ignore annotations that are that empty strings; default TRUE.
#' @param verbose Be chatty, default FALSE.
#' @export
#' @export
annotate_with_manifest <- function(manifest, ignore_na = TRUE, ignore_blank = TRUE, verbose = FALSE) {
# Split by `entityId`
annotations <- as.data.table(manifest)
Expand All @@ -20,47 +36,45 @@ annotate_with_manifest <- function(manifest, ignore_na = TRUE, ignore_blank = TR
filterNA <- if(ignore_na) function(x) !any(is.na(x)) else TRUE # will ignore entirely if list with NA, e.g. c(NA, 1, 2) -- should warn if list
filterBlank <- if(ignore_blank) function(x) !any(x == "") else TRUE # same as above
annotations <- lapply(annotations, function(x) Filter(function(x) filterNA(x) & filterBlank(x) & length(x), unlist(x, recursive = F)))
for(entity in names(annotations)) {
.syn$setAnnotations(entity = entity, annotations = as.list(annotations[[entity]]))
for(entity_id in names(annotations)) {
set_annotations(entity_id, annotations[[entity_id]])
}
if (verbose) message("Annotations submitted")
}


#' Copy annotations
#'
#'
#' Copy annotations (all or selectively) from a source entity to one or more target entities.
#' If annotations already exist on target entities, the copy will replace the current values.
#'
#' @param entity_from Syn id from which to copy.
#' @param entity_to One or more syn ids to copy annotations to.
#' @param select Vector of properties to selectively copy if present on the entity.
#' If same annotation keys already exist on target entities, the copy will replace the current values.
#'
#' @param entity_from Syn id from which to copy.
#' @param entity_to One or more syn ids to copy annotations to.
#' @param select Vector of properties to selectively copy if present on the entity.
#' If not specified, will copy over everything, which may not be desirable.
#' @param update Whether to immediately update or return annotation objects only.
#' @param update Whether to immediately update or return annotation objects only.
#' @export
copy_annotations <- function(entity_from,
entity_to,
select = NULL,
update = FALSE) {

.check_login()

annotations <- .syn$get_annotations(entity_from)

from_annotations <- .syn$get_annotations(entity_from)
# Check `select`
if(is.null(select)) {
cp <- annotations
select <- names(from_annotations)
} else {
cp <- reticulate::dict()
for(k in names(annotations)) {
if(k %in% select) cp[k] <- annotations[k]
}
select <- select[select %in% names(from_annotations)]
}

if(update) {
for(e in entity_to) {
.syn$setAnnotations(e, annotations = cp)

for(id in entity_to) {
to_annotations <- .syn$get_annotations(id)
for(k in select) {
to_annotations[k] <- from_annotations[k]
}
} else {
return(cp)
if(update) .syn$set_annotations(to_annotations) else return(to_annotations)
}
}

4 changes: 2 additions & 2 deletions R/calculate_related_studies.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ calculate_related_studies <- function(study_table_id,
for(i in 1:nrow(studies_updated)) {
id <- studies_updated[i, "studyId"]
relatedStudies <- studies_updated[i, "relatedStudies"]
annotations <- .syn$getAnnotations(id)
annotations <- .syn$get_annotations(id)
annotations$relatedStudies <- relatedStudies
invisible(.syn$setAnnotations(id, annotations))
invisible(set_annotations(id, annotations))
}
} else {
studies_updated
Expand Down
2 changes: 1 addition & 1 deletion R/register_study.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ add_new_study_meta <- function(id, study_meta) {
if(is.null(study_meta$studyStatus) || is.na(study_meta$studyStatus)) study_meta$studyStatus <- "Active"
if(is.null(study_meta$dataStatus) || is.na(study_meta$dataStatus)) study_meta$dataStatus <- "Data Pending"

study <- .syn$setAnnotations(id, study_meta)
study <- set_annotations(id, study_meta)
invisible(study)
}

Expand Down
21 changes: 14 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,27 @@
The goal of `nfportalutils` is to provide convenience functions for project and (meta)data management in the NF-OSI data portal scope.
Currently, `develop` branch is default so package install and docs refer to code in this branch.

The package interops with the [Python synapse client](https://github.com/Sage-Bionetworks/synapsePythonClient) via reticulate.
You will have to set up both (see #Installation). Outside of the tested versions, there may be some issues. The tested versions are:
- Python Synapse Client == 4.3.1
- reticulate == 1.39.0

## Docs

:point_right: [Package documentation!](https://nf-osi.github.io/nfportalutils/)

## Installation

You can install `nfportalutils` from here:
This presumes you have already set up R with RStudio.

``` r
remotes::install_github("nf-osi/nfportalutils")
```

The package interops with the [Python synapse client](https://github.com/Sage-Bionetworks/synapsePythonClient) via reticulate.
You will have to download the Python synapse client first.
1. Install `reticulate` following guide at https://rstudio.github.io/reticulate/index.html#installation.
2. Install `synapseclient==4.3.1` following https://rstudio.github.io/reticulate/articles/python_packages.html, which will use a default environment "r-reticulate".
3. Lastly, install `nfportalutils`. At startup, `nfportalutils` imports `synapseclient` from the default "r-reticulate".
- As regular users: `remotes::install_github("nf-osi/nfportalutils", build_vignettes = TRUE)` or `remotes::install_github("nf-osi/nfportalutils@some-branch", build_vignettes = TRUE)`
- For developers, presumably working with `devtools`:
- Clone the repo, checkout your desired development branch.
- Make sure the package repo root is working directory, then in R run `devtools::install()`.
4. Browse some vignettes: `browseVignettes("nfportalutils")`.

## For Users

Expand Down
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ reference:
- subtitle: General annotations
desc: Add and manage annotations on Synapse entities
- contents:
- set_annotations
- update_study_annotations
- annotate_with_manifest
- copy_annotations
Expand Down
3 changes: 2 additions & 1 deletion man/annotate_with_manifest.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/copy_annotations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/set_annotations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions tests/testthat/helpers.R
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
# Implementing skips according to suggested handling when using reticulate
# See https://rstudio.github.io/reticulate/articles/package.html
# Skips tests on CRAN machines or other incompatible testing environments
# where Python can't be configured so package checks don't fail
# where Python can't be configured so package checks don't fail

# Skip if Python synapseclient module not installed/accessible
# This is normally imported upon package load, see `zzz.R`
skip_if_no_synapseclient <- function() {
have_synapseclient <- py_module_available("synapseclient")
have_synapseclient <- py_module_available("synapseclient")
if(!have_synapseclient)
skip("synapseclient not available for testing")
}

# Skip if Python synapseutils module not installed/accessible
# This is normally imported upon package load, see `zzz.R`
skip_if_no_synapseutils <- function() {
have_synapseutils <- py_module_available("synapseclient")
have_synapseutils <- py_module_available("synapseclient")
if(!have_synapseutils)
skip("synapseutils not available for testing")
}

# Skip if no pandas; pandas is needed for smaller subset of functions in the package
skip_if_no_pandas <- function() {
have_pandas <- py_module_available("pandas")
have_pandas <- py_module_available("pandas")
if(!have_pandas)
skip("pandas not available for testing")
}
Expand All @@ -37,6 +37,6 @@ skip_if_no_token <- function() {
# (e.g. someone pasted in wrong token), this creates a skip cascade for tests that presume
# successful login.
skip_if_no_login <- function() {
if(!exists(".syn"))
if(!exists(".syn") || is.null(.syn$username))
skip("not logged in for tests")
}
3 changes: 0 additions & 3 deletions tests/testthat/test-add_pubmed_publications.R

This file was deleted.

47 changes: 47 additions & 0 deletions tests/testthat/test_copy_annotations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
test_that("Copy annotations works", {
skip_if_no_synapseclient()
skip_if_no_login()

PARENT_TEST_PROJECT <- "syn26462036"
# Create some folder objects with some annotations
entity_a <- synapseclient$Folder("Entity A",
parent = PARENT_TEST_PROJECT,
annotations = list(foo = "bar", favorites = c("raindrops", "whiskers")))
entity_a <- .syn$store(entity_a)

entity_b <- synapseclient$Folder("Entity B",
parent = PARENT_TEST_PROJECT,
annotations = list(favorites = c("kettles", "mittens"), after_a = TRUE))
entity_b <- .syn$store(entity_b)

entity_c <- synapseclient$Folder("Entity C",
parent = PARENT_TEST_PROJECT)
entity_c <- .syn$store(entity_c)

# when copying all annotations from A->B (default)
copy_annotations(entity_from = entity_a$properties$id,
entity_to = entity_b$properties$id,
select = NULL,
update = TRUE)

# when copying selective annotations from A->C
copy_annotations(entity_from = entity_a$properties$id,
entity_to = entity_c$properties$id,
select = c("favorites", "key_not_on_a"),
update = TRUE)

result_b <- .syn$get_annotations(entity_b)
result_c <- .syn$get_annotations(entity_c)
.syn$delete(entity_a)
.syn$delete(entity_b)
.syn$delete(entity_c)
testthat::expect_equal(result_b$foo, "bar")
testthat::expect_equal(result_b$favorites, c("raindrops", "whiskers"))
testthat::expect_equal(result_b$after_a, TRUE)
testthat::expect_error(result_c$foo) # Expect KeyError since key should not be present
testthat::expect_equal(result_c$favorites, c("raindrops", "whiskers"))
testthat::expect_error(result_c$key_not_on_a)

})


23 changes: 23 additions & 0 deletions tests/testthat/test_manifest_annotations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
test_that("Annotate with manifest works", {
skip_if_no_synapseclient()
skip_if_no_login()

PARENT_TEST_PROJECT <- "syn26462036"
# Use some folders to represent objects to annotate
objs <- make_folder(parent = PARENT_TEST_PROJECT, folders = c("mock_file_1", "mock_file_2", "mock_file_3"))
ids <- sapply(objs, function(x) x$properties$id)
# Partial manifest as a data.table with list columns
manifest <- data.table(
entityId = ids,
assay = "drugScreen",
experimentalTimepoint = c(1L, 3L, 7L),
experimentalTimepointUnit = "days",
cellType = list(c("schwann", "macrophage"), c("schwann", "macrophage"), c("schwann", "macrophage"))
)
annotate_with_manifest(manifest)
remanifested <- list()
for(i in ids) {
remanifested[[i]] <- .syn$get_annotations(i)
}
for(i in ids) .syn$delete(i)
})
Loading
Loading