Skip to content

Commit

Permalink
Pass sample id into lambda (#13)
Browse files Browse the repository at this point in the history
* dynamically generate plotUuid
  • Loading branch information
Anugerah Erlaut authored Mar 24, 2021
1 parent 32d8295 commit 267f6d1
Show file tree
Hide file tree
Showing 15 changed files with 106 additions and 63 deletions.
1 change: 1 addition & 0 deletions local-runner/cf-local-container-launcher.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Resources:
"__DOCKER_GATEWAY_HOST__",
f"--name {event['name']}-{random_string(10)}",
f"{'-d -p 6969:6969' if event['detached'] else ''}",
f"--env SAMPLE_ID={event.get('sampleUuid', '')}",
event['image'],
task
])
Expand Down
3 changes: 2 additions & 1 deletion local-runner/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"dev": "nodemon src/app.js",
"build": "docker build -t biomage-remoter-client ../remoter-client && docker build -t biomage-remoter-server ../remoter-server",
"start": "node src/app.js",
"restart": "npm run build && npm start",
"lint": "eslint ./src",
"detect-secrets": "pip3 install detect-secrets && git diff --staged --name-only | xargs detect-secrets-hook --baseline .secrets.baseline"
},
Expand All @@ -28,4 +29,4 @@
"nodemon": "^2.0.7",
"prepend-transform": "0.0.1019"
}
}
}
Binary file added remoter-client/.DS_Store
Binary file not shown.
1 change: 1 addition & 0 deletions remoter-client/remoter
Submodule remoter added at e73b87
15 changes: 10 additions & 5 deletions remoter-client/src/init.r
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ require("RJSONIO")
require("remoter")
require("ids")

PORT=6969

# get request from the arguments of the docker run command, and escape all quotes.
request <- commandArgs(trailingOnly = TRUE)[1]
run_id <- ids::random_id()
Expand All @@ -16,16 +18,19 @@ if (parsed$server == "host.docker.internal") {
}
}

# Get sample ids
sample_id = Sys.getenv("SAMPLE_ID", "")

# load wrapper in case it changed from last run
message("Loading wrapper for server ", parsed$server, "...")
remoter::batch(addr = parsed$server, port = 6969, file = "./wrapper.r")
remoter::batch(addr = parsed$server, port = PORT, file = "./wrapper.r")

message('')
message('Copying request...')
message(sprintf("c2s(request, 'request_%s')", run_id))
remoter::batch(addr = parsed$server, port = 6969, script = sprintf("c2s(request, 'request_%s')", run_id))
remoter::batch(addr = parsed$server, port = PORT, script = sprintf("c2s(request, 'request_%s')", run_id))

message('Launching work...')
message(sprintf("wrapper(request_%s)", run_id))
remoter::batch(addr = parsed$server, port = 6969, script = sprintf("wrapper(request_%s)", run_id))
message(sprintf('Launching work for sample %s...', sample_id))
message(sprintf("wrapper(request_%s, '%s')", run_id, sample_id))
remoter::batch(addr = parsed$server, port = PORT, script = sprintf("wrapper(request_%s, '%s')", run_id, sample_id))
message('Exiting...')
16 changes: 11 additions & 5 deletions remoter-client/src/wrapper.r
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ reload_from_s3 <- function(pipeline_config, experiment_id) {
}


run_step <- function(task_name, scdata, config) {
run_step <- function(scdata, config, task_name, sample_id) {

switch(task_name,
cellSizeDistribution = {
import::here("/src/cellSizeDistribution.r", task)
Expand All @@ -82,7 +83,8 @@ run_step <- function(task_name, scdata, config) {
},
stop(paste("Invalid task name given:", task_name))
)
out <- task(scdata, config)

out <- task(scdata, config, task_name, sample_id)
return(out)
}

Expand Down Expand Up @@ -168,15 +170,18 @@ send_plot_data_to_s3 <- function(pipeline_config, experiment_id, output) {
}


wrapper <- function(input_json) {
wrapper <- function(input_json, sample_id) {

# Get data from state machine input.
input <- RJSONIO::fromJSON(input_json)

c(
experiment_id = experimentId,
task_name = taskName,
config = config,
server = server
) %<-% input

input <- input[names(input) != "server"]

pipeline_config <- load_config(server)
Expand All @@ -190,10 +195,11 @@ wrapper <- function(input_json) {

message("Single-cell data loaded.")
}

# call function to run and update global variable
c(
data, ...rest_of_results
) %<-% run_step(task_name, scdata, config)
) %<-% run_step(scdata, config, task_name, sample_id)

assign("scdata", data, pos = ".GlobalEnv")

Expand All @@ -206,4 +212,4 @@ wrapper <- function(input_json) {
return(message_id)
}

message("Wrapper loaded.")
message("New wrapper loaded.")
Binary file added remoter-server/.DS_Store
Binary file not shown.
1 change: 1 addition & 0 deletions remoter-server/remoter
Submodule remoter added at e73b87
23 changes: 11 additions & 12 deletions remoter-server/src/cellSizeDistribution.r
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#' - binStep: Integer. Bin size for the histogram
#' @export return a list with the filtered seurat object by cell size ditribution, the config and the plot values

source('utils.r')

# CalculateBarcodeInflections calculates an adaptive inflection point ("knee")
# of the barcode distribution for each sample group. This is
Expand Down Expand Up @@ -49,7 +50,7 @@ generate_default_values_cellSizeDistribution <- function(seurat_obj, config) {
return(tmp$nCount_RNA)
}

task <- function(seurat_obj, config) {
task <- function(seurat_obj, config, task_name, sample_id) {
import::here(map2, .from = purrr)
minCellSize <- as.numeric(config$filterSettings["minCellSize"])
# extract plotting data of original data to return to plot slot later
Expand Down Expand Up @@ -78,21 +79,19 @@ task <- function(seurat_obj, config) {
}
# update config
config$filterSettings$minCellSize <- minCellSize
# the result object will have to conform to this format: {data, config, plotData : {plot1, plot2}}

plots <-list()
plots[generate_plotuuid(sample_id, task_name, 0)] <- list(plot1_data)

# plot2 = list(u = seurat_obj$nCount_RNA, rank = order(seurat_obj$nCount_RNA))
plots[generate_plotuuid(sample_id, task_name, 1)] <- list(plot2_data)

# the result object will have to conform to this format: {data, config, plotData : {plot1, plot2}}
result <- list(
data = seurat_obj,
config = config,
plotData = list(
cellSizeDistributionHistogram = plot1_data,
# Q: are both plots updated for this filter?
# Q: what is the format of plot2?
# knee-plot: this is on a log-log scale, are logs calucated here or on the UI?
# cells are ordered on the x-axis according to the number of distinct UMIs observed.
# The y-axis displays the number of distinct UMIs for each barcode (here barcodes are proxies for cells).
# cellRank_sorted.json: [{"u": 0, "rank": 17852}, {"u": 1, "rank": 17412},...] -> this should be {"u": 11852, "rank": 0}, {"u": 15123, "rank": 1}
# plot2 = list(u = seurat_obj$nCount_RNA, rank = order(seurat_obj$nCount_RNA))
cellSizeDistributionKneePlot = plot2_data
)
plotData = plots
)

return(result)
Expand Down
11 changes: 7 additions & 4 deletions remoter-server/src/classifier.r
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#' - filterThreshold:
#' @export return a list with the filtered seurat object by probabilities classifier, the config and the plot values

source('utils.r')

generate_default_values_classifier <- function(seurat_obj, config) {

# HARDCODE
Expand All @@ -36,7 +38,7 @@ generate_default_values_classifier <- function(seurat_obj, config) {
#' @export return a list with the filtered seurat object by mitochondrial content, the config and the plot values


task <- function(seurat_obj, config){
task <- function(seurat_obj, config, task_name, sample_id){
# config$filterSettings = list(minProbability=0.82, bandwidth=-1, filterThreshold=-1)
# Check wheter the filter is set to true or false
# For some reason the last children of named lists are computed as vectors, so we can't access them as recursive objects.
Expand Down Expand Up @@ -67,13 +69,14 @@ task <- function(seurat_obj, config){
# update config
config$filterSettings$minProbability <- minProbability

plots <-list()
plots[generate_plotuuid(sample_id, task_name, 0)] <- list(plot1_data)

# the result object will have to conform to this format: {data, config, plotData : {plot1, plot2}}
result <- list(
data = seurat_obj.filtered,
config = config,
plotData = list(
classifierEmptyDropsPlot = plot1_data
)
plotData = plots
)

return(result)
Expand Down
16 changes: 10 additions & 6 deletions remoter-server/src/doubletScores.r
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# To separate cells with low droplet score from the ones that have a high droplet score content what makes us think that the are mistakenly considered as a single cell but they are actully two or more.
# This can be a useful first guess. The settings for such a filter can also contain a simple "probabilityThreshold" setting.

source('utils.r')

# The most uses values in doublet scores reporting in the scrublet paper [1] are around 0.25. There are not too much literature about how to compute
# a threshold. For now, we will offer two methods:
Expand All @@ -28,7 +29,7 @@ generate_default_values_doubletScores <- function(scdata, config) {
#' - binStep: Float. Bin size for the histogram
#' @export return a list with the filtered seurat object by doublet score, the config and the plot values

task <- function(scdata, config){
task <- function(scdata, config, task_name, sample_id){
# Check if the experiment has doubletScores
if (!"doublet_scores"%in%colnames(scdata@meta.data)){
message("Warning! No doubletScores scores has been computed for this experiment!")
Expand All @@ -51,15 +52,18 @@ task <- function(scdata, config){
# update config
config$filterSettings$probabilityThreshold <- probabilityThreshold
plot1_data <- lapply(unname(scdata$doublet_scores),function(x) {c("doubletP"=x)})

plots <-list()

# plot 1: histgram of doublet scores
# [0.161, 0.198, 0.284, ...]
plots[generate_plotuuid(sample_id, task_name, 0)] <- list(plot1_data)

# the result object will have to conform to this format: {data, config, plotData : {plot1, plot2}}
result <- list(
data = scdata.filtered,
config = config,
plotData = list(
# plot 1: histgram of doublet scores
# [0.161, 0.198, 0.284, ...]
doubletFilterHistogram = plot1_data
)
plotData = plots
)
return(result)
}
Expand Down
39 changes: 22 additions & 17 deletions remoter-server/src/mitochondrialContent.r
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
# To separate cells with low MT-content from the ones that have a high MT-content what makes us think that are dead.
# This can be a useful first guess. The settings for such a filter can also contain a simple "probabilityThreshold" setting.

source('utils.r')

# The most uses values in MT-content are between [0.1, 0.2]. There are not too much literature about how to compute
# a threshold. For now, we will offer two methods:
# --> Absolute threshold: In order to be not too extrictive the threshold is set to 0.1
Expand All @@ -31,7 +33,7 @@ generate_default_values_mitochondrialContent <- function(scdata, config) {
#' * we are supposed to add more methods ....
#' @export return a list with the filtered seurat object by mitochondrial content, the config and the plot values

task <- function(scdata, config){
task <- function(scdata, config, task_name, sample_id){
print(config)
# Check if the experiment has MT-content
if (!"percent.mt"%in%colnames(scdata@meta.data)){
Expand Down Expand Up @@ -63,26 +65,29 @@ task <- function(scdata, config){
# update config
config$filterSettings$methodSettings[[config$filterSettings$method]][["maxFraction"]] <- maxFraction

plots <- list()

# plot 1: histgram of MT-content
# AAACCCAAGCGCCCAT-1 AAACCCAAGGTTCCGC-1 AAACCCACAGAGTTGG-1
# 0.161 0.198 0.284 ...
plots[generate_plotuuid(sample_id, task_name, 0)] <- list(plot1_data)

# plot 2: There are two alternavitive:
# - Scatter plot with UMIs in the x-axis and MT-content in the y-axis
# --> code: plot2 = list(u=scdata$nCount_RNA.mt, "MT-content" = scdata$percent.mt)
# - Barplot representing in the x-axis the log10(UMIs) and in the y-axis the MT-content. This option is the one
# that is shown in the mockup.
# --> code: plot2 = list(log_10_UMIs=log10(scdata$nCount_RNA), MT_content =mscdata$percent.mt)
# We have decided to use the scatter plot, but I temporaly leave the other option in the comments.
# Q: Should we return from the R side the cells that are going to be removed? For this plot it is interesting to color the
# cells that are going to be excluded.
plots[generate_plotuuid(sample_id, task_name, 1)] <- list(plot2_data)

# the result object will have to conform to this format: {data, config, plotData : {plot1, plot2}}
result <- list(
data = scdata.filtered, # scdata filter
config = config,
plotData = list(
# plot 1: histgram of MT-content
# AAACCCAAGCGCCCAT-1 AAACCCAAGGTTCCGC-1 AAACCCACAGAGTTGG-1
# 0.161 0.198 0.284 ...
mitochondrialFractionHistogram = plot1_data,
# plot 2: There are two alternavitive:
# - Scatter plot with UMIs in the x-axis and MT-content in the y-axis
# --> code: plot2 = list(u=scdata$nCount_RNA.mt, "MT-content" = scdata$percent.mt)
# - Barplot representing in the x-axis the log10(UMIs) and in the y-axis the MT-content. This option is the one
# that is shown in the mockup.
# --> code: plot2 = list(log_10_UMIs=log10(scdata$nCount_RNA), MT_content =mscdata$percent.mt)
# We have decided to use the scatter plot, but I temporaly leave the other option in the comments.
# Q: Should we return from the R side the cells that are going to be removed? For this plot it is interesting to color the
# cells that are going to be excluded.
mitochondrialFractionLogHistogram = plot2_data
)
plotData = plots
)
return(result)
}
24 changes: 14 additions & 10 deletions remoter-server/src/numGenesVsNumUmis.r
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
# }


source('utils.r')

#' @description Filters seurat object based on classifier filter
#' @param config list containing the following information
#' - enable: true/false. Refering to apply or not the filter.
Expand All @@ -48,7 +50,8 @@
#' - gam: for the gam option there is only one element:
#' - p.level: which refers to confidence level for deviation from the main trend
#' @export return a list with the filtered seurat object by numGenesVsNumUmis, the config and the plot values
task <- function(scdata, config){

task <- function(scdata, config, task_name, sample_id){
# Check wheter the filter is set to true or false
if (!as.logical(toupper(config$enabled)))
return(scdata)
Expand Down Expand Up @@ -92,19 +95,20 @@ task <- function(scdata, config){
plot1_data <- purrr::map2(plot1_data,unname(pb$lwr),function(x,y){append(x,c("lower_cutoff"=y))})
plot1_data <- purrr::map2(plot1_data,unname(pb$upr),function(x,y){append(x,c("upper_cutoff"=y))})

# Scatter plot which is composed of:
# x-axis: log_10_UMIs
# y-axis: log_10_genes
# bands that are conformed with the upper_cutoff and the lower_cutoff. We can print a band or dotted lines.
# Q: Should we return the point out the cells that are going to be excluded from the R side or this task can be done in
# the UI side.
plots <- list()
plots[generate_plotuuid(sample_id, task_name, 0)] <- list(plot1_data)

# the result object will have to conform to this format: {data, config, plotData : {plot1}}
result <- list(
data = scdata.filtered,
config = config,
plotData = list(
# Scatter plot which is composed of:
# x-axis: log_10_UMIs
# y-axis: log_10_genes
# bands that are conformed with the upper_cutoff and the lower_cutoff. We can print a band or dotted lines.
# Q: Should we return the point out the cells that are going to be excluded from the R side or this task can be done in
# the UI side.
featuresvsUMIsscatterplot = plot1_data
)
plotData = plots
)

return(result)
Expand Down
10 changes: 7 additions & 3 deletions remoter-server/src/test_fn.r
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,30 @@
# Its only purpose is to demonstrate the input and output format
# that you will expect to your pipeline steps.

source('utils.r')

# some dummy function to check that imports do not import additional
# functions from a file into the namespace
some_other_stuff <- function(a) {
a + 5
}

# a sample task
task <- function(input_data, input_config) {
task <- function(input_data, input_config, task_name, sample_id) {

# example where after coming up with sensible defaults the configuration
# will be changed to a different number, say, 202
config <- input_config
config$filterSettings[["minCellSize"]] <- 420

plots <- list()
plots[generate_plotuuid(sample_id, task_name, 0)] = c(1, 2, 3)
plots[generate_plotuuid(sample_id, task_name, 1)] = c(4, 5, 6)

# the result object will have to conform to this format.
result <- list(
data = input_data,
config = config,
plotData = list()
plotData = plots
)

return(result)
Expand Down
Loading

0 comments on commit 267f6d1

Please sign in to comment.