Pass sample id into lambda (#13)

* dynamically generate plotUuid
hms-dbmi-cellenics · Mar 24, 2021 · 267f6d1 · 267f6d1
1 parent 32d8295
commit 267f6d1
Show file tree

Hide file tree

Showing 15 changed files with 106 additions and 63 deletions.
diff --git a/local-runner/cf-local-container-launcher.yaml b/local-runner/cf-local-container-launcher.yaml
@@ -38,6 +38,7 @@ Resources:
               "__DOCKER_GATEWAY_HOST__",
               f"--name {event['name']}-{random_string(10)}", 
               f"{'-d -p 6969:6969' if event['detached'] else ''}", 
+              f"--env SAMPLE_ID={event.get('sampleUuid', '')}",
               event['image'],
               task 
             ])

diff --git a/local-runner/package.json b/local-runner/package.json
@@ -11,6 +11,7 @@
     "dev": "nodemon src/app.js",
     "build": "docker build -t biomage-remoter-client ../remoter-client && docker build -t biomage-remoter-server ../remoter-server",
     "start": "node src/app.js",
+    "restart": "npm run build && npm start",
     "lint": "eslint ./src",
     "detect-secrets": "pip3 install detect-secrets && git diff --staged --name-only | xargs detect-secrets-hook --baseline .secrets.baseline"
   },
@@ -28,4 +29,4 @@
     "nodemon": "^2.0.7",
     "prepend-transform": "0.0.1019"
   }
-}
+}
diff --git a/remoter-client/.DS_Store b/remoter-client/.DS_Store
diff --git a/remoter-client/remoter b/remoter-client/remoter
diff --git a/remoter-client/src/init.r b/remoter-client/src/init.r
@@ -2,6 +2,8 @@ require("RJSONIO")
 require("remoter")
 require("ids")
 
+PORT=6969
+
 # get request from the arguments of the docker run command, and escape all quotes.
 request <- commandArgs(trailingOnly = TRUE)[1]
 run_id <- ids::random_id()
@@ -16,16 +18,19 @@ if (parsed$server == "host.docker.internal") {
     }
 }
 
+# Get sample ids
+sample_id = Sys.getenv("SAMPLE_ID", "")
+
 # load wrapper in case it changed from last run
 message("Loading wrapper for server ", parsed$server, "...")
-remoter::batch(addr = parsed$server, port = 6969, file = "./wrapper.r")
+remoter::batch(addr = parsed$server, port = PORT, file = "./wrapper.r")
 
 message('')
 message('Copying request...')
 message(sprintf("c2s(request, 'request_%s')", run_id))
-remoter::batch(addr = parsed$server, port = 6969, script = sprintf("c2s(request, 'request_%s')", run_id))
+remoter::batch(addr = parsed$server, port = PORT, script = sprintf("c2s(request, 'request_%s')", run_id))
 
-message('Launching work...')
-message(sprintf("wrapper(request_%s)", run_id))
-remoter::batch(addr = parsed$server, port = 6969, script = sprintf("wrapper(request_%s)", run_id))
+message(sprintf('Launching work for sample %s...', sample_id))
+message(sprintf("wrapper(request_%s, '%s')", run_id, sample_id))
+remoter::batch(addr = parsed$server, port = PORT, script = sprintf("wrapper(request_%s, '%s')", run_id, sample_id))
 message('Exiting...')
diff --git a/remoter-client/src/wrapper.r b/remoter-client/src/wrapper.r
@@ -57,7 +57,8 @@ reload_from_s3 <- function(pipeline_config, experiment_id) {
 }
 
 
-run_step <- function(task_name, scdata, config) {
+run_step <- function(scdata, config, task_name, sample_id) {
+
     switch(task_name,
         cellSizeDistribution = {
             import::here("/src/cellSizeDistribution.r", task)
@@ -82,7 +83,8 @@ run_step <- function(task_name, scdata, config) {
         },
         stop(paste("Invalid task name given:", task_name))
     )
-    out <- task(scdata, config)
+
+    out <- task(scdata, config, task_name, sample_id)
     return(out)
 }
 
@@ -168,15 +170,18 @@ send_plot_data_to_s3 <- function(pipeline_config, experiment_id, output) {
 }
 
 
-wrapper <- function(input_json) {
+wrapper <- function(input_json, sample_id) {
+
     # Get data from state machine input.
     input <- RJSONIO::fromJSON(input_json)
+
     c(
         experiment_id = experimentId,
         task_name = taskName,
         config = config,
         server = server
     ) %<-% input
+
     input <- input[names(input) != "server"]
 
     pipeline_config <- load_config(server)
@@ -190,10 +195,11 @@ wrapper <- function(input_json) {
 
         message("Single-cell data loaded.")
     }
+
     # call function to run and update global variable
     c(
         data, ...rest_of_results
-    ) %<-% run_step(task_name, scdata, config)
+    ) %<-% run_step(scdata, config, task_name, sample_id)
 
     assign("scdata", data, pos = ".GlobalEnv")
 
@@ -206,4 +212,4 @@ wrapper <- function(input_json) {
     return(message_id)
 }
 
-message("Wrapper loaded.")
+message("New wrapper loaded.")
diff --git a/remoter-server/.DS_Store b/remoter-server/.DS_Store
diff --git a/remoter-server/remoter b/remoter-server/remoter
diff --git a/remoter-server/src/cellSizeDistribution.r b/remoter-server/src/cellSizeDistribution.r
@@ -17,6 +17,7 @@
 #'                  - binStep: Integer. Bin size for the histogram
 #' @export return a list with the filtered seurat object by cell size ditribution, the config and the plot values
 
+source('utils.r')
 
 # CalculateBarcodeInflections calculates an adaptive inflection point ("knee")
 # of the barcode distribution for each sample group. This is
@@ -49,7 +50,7 @@ generate_default_values_cellSizeDistribution <- function(seurat_obj, config) {
   return(tmp$nCount_RNA)
 }
 
-task <- function(seurat_obj, config) {
+task <- function(seurat_obj, config, task_name, sample_id) {
     import::here(map2, .from = purrr)
     minCellSize <- as.numeric(config$filterSettings["minCellSize"])
     # extract plotting data of original data to return to plot slot later
@@ -78,21 +79,19 @@ task <- function(seurat_obj, config) {
     }
     # update config
     config$filterSettings$minCellSize <- minCellSize
+    # the result object will have to conform to this format: {data, config, plotData : {plot1, plot2}}
+
+    plots <-list()
+    plots[generate_plotuuid(sample_id, task_name, 0)] <- list(plot1_data)
+
+     # plot2 = list(u = seurat_obj$nCount_RNA, rank = order(seurat_obj$nCount_RNA))
+    plots[generate_plotuuid(sample_id, task_name, 1)] <- list(plot2_data)
+
     # the result object will have to conform to this format: {data, config, plotData : {plot1, plot2}}
     result <- list(
         data = seurat_obj,
         config = config,
-        plotData = list(
-            cellSizeDistributionHistogram = plot1_data,
-            # Q: are both plots updated for this filter?
-            # Q: what is the format of plot2?
-            # knee-plot: this is on a log-log scale, are logs calucated here or on the UI?
-            # cells are ordered on the x-axis according to the number of distinct UMIs observed. 
-            # The y-axis displays the number of distinct UMIs for each barcode (here barcodes are proxies for cells).
-            # cellRank_sorted.json: [{"u": 0, "rank": 17852}, {"u": 1, "rank": 17412},...]  -> this should be {"u": 11852, "rank": 0}, {"u": 15123, "rank": 1}
-            # plot2 = list(u = seurat_obj$nCount_RNA, rank = order(seurat_obj$nCount_RNA))
-            cellSizeDistributionKneePlot = plot2_data
-        )
+        plotData = plots
     )
 
     return(result)

diff --git a/remoter-server/src/classifier.r b/remoter-server/src/classifier.r
@@ -13,6 +13,8 @@
 #'                  - filterThreshold: 
 #' @export return a list with the filtered seurat object by probabilities classifier, the config and the plot values
 
+source('utils.r')
+
 generate_default_values_classifier <- function(seurat_obj, config) {
 
         # HARDCODE
@@ -36,7 +38,7 @@ generate_default_values_classifier <- function(seurat_obj, config) {
 #' @export return a list with the filtered seurat object by mitochondrial content, the config and the plot values
 
 
-task <- function(seurat_obj, config){
+task <- function(seurat_obj, config, task_name, sample_id){
     # config$filterSettings = list(minProbability=0.82, bandwidth=-1, filterThreshold=-1)
     # Check wheter the filter is set to true or false
     # For some reason the last children of named lists are computed as vectors, so we can't access them as recursive objects. 
@@ -67,13 +69,14 @@ task <- function(seurat_obj, config){
     # update config
     config$filterSettings$minProbability <- minProbability
 
+    plots <-list()
+    plots[generate_plotuuid(sample_id, task_name, 0)] <- list(plot1_data)
+
     # the result object will have to conform to this format: {data, config, plotData : {plot1, plot2}}
     result <- list(
         data = seurat_obj.filtered,
         config = config,
-        plotData = list(
-            classifierEmptyDropsPlot = plot1_data
-        )
+        plotData = plots
     )
 
     return(result)

diff --git a/remoter-server/src/doubletScores.r b/remoter-server/src/doubletScores.r
@@ -5,6 +5,7 @@
 # To separate cells with low droplet score from the ones that have a high droplet score content what makes us think that the are mistakenly considered as a single cell but they are actully two or more.  
 # This can be a useful first guess. The settings for such a filter can also contain a simple "probabilityThreshold" setting. 
 
+source('utils.r')
 
 # The most uses values in doublet scores reporting in the scrublet paper [1] are around 0.25. There are not too much literature about how to compute
 # a threshold. For now, we will offer two methods:
@@ -28,7 +29,7 @@ generate_default_values_doubletScores <- function(scdata, config) {
 #'                  - binStep: Float. Bin size for the histogram
 #' @export return a list with the filtered seurat object by doublet score, the config and the plot values
 
-task <- function(scdata, config){
+task <- function(scdata, config, task_name, sample_id){
     # Check if the experiment has doubletScores
     if (!"doublet_scores"%in%colnames(scdata@meta.data)){
         message("Warning! No doubletScores scores has been computed for this experiment!")
@@ -51,15 +52,18 @@ task <- function(scdata, config){
     # update config
     config$filterSettings$probabilityThreshold <- probabilityThreshold
     plot1_data <- lapply(unname(scdata$doublet_scores),function(x) {c("doubletP"=x)})
+
+    plots <-list()
+
+    # plot 1: histgram of doublet scores
+    #              [0.161,              0.198,              0.284,  ...]
+    plots[generate_plotuuid(sample_id, task_name, 0)] <- list(plot1_data)
+
     # the result object will have to conform to this format: {data, config, plotData : {plot1, plot2}}
     result <- list(
         data = scdata.filtered,
         config = config,
-        plotData = list(
-            # plot 1: histgram of doublet scores
-            #              [0.161,              0.198,              0.284,  ...]
-            doubletFilterHistogram = plot1_data
-        )
+        plotData = plots
     )
     return(result)
 }

diff --git a/remoter-server/src/mitochondrialContent.r b/remoter-server/src/mitochondrialContent.r
@@ -5,6 +5,8 @@
 # To separate cells with low MT-content from the ones that have a high MT-content what makes us think that are dead.  
 # This can be a useful first guess. The settings for such a filter can also contain a simple "probabilityThreshold" setting. 
 
+source('utils.r')
+
 # The most uses values in MT-content are between [0.1, 0.2]. There are not too much literature about how to compute
 # a threshold. For now, we will offer two methods:
 # --> Absolute threshold: In order to be not too extrictive the threshold is set to 0.1
@@ -31,7 +33,7 @@ generate_default_values_mitochondrialContent <- function(scdata, config) {
 #'                          * we are supposed to add more methods ....
 #' @export return a list with the filtered seurat object by mitochondrial content, the config and the plot values
 
-task <- function(scdata, config){
+task <- function(scdata, config, task_name, sample_id){
     print(config)
     # Check if the experiment has MT-content
     if (!"percent.mt"%in%colnames(scdata@meta.data)){
@@ -63,26 +65,29 @@ task <- function(scdata, config){
     # update config
     config$filterSettings$methodSettings[[config$filterSettings$method]][["maxFraction"]] <- maxFraction
 
+    plots <- list()
+
+    # plot 1: histgram of MT-content
+    # AAACCCAAGCGCCCAT-1 AAACCCAAGGTTCCGC-1 AAACCCACAGAGTTGG-1
+    #              0.161              0.198              0.284  ...
+    plots[generate_plotuuid(sample_id, task_name, 0)] <- list(plot1_data)
+
+    # plot 2: There are two alternavitive:
+    #           - Scatter plot with UMIs in the x-axis and MT-content in the y-axis
+    #           --> code: plot2 = list(u=scdata$nCount_RNA.mt, "MT-content" = scdata$percent.mt)
+    #           - Barplot representing in the x-axis the log10(UMIs) and in the y-axis the MT-content. This option is the one 
+    #           that is shown in the mockup.
+    #           --> code: plot2 = list(log_10_UMIs=log10(scdata$nCount_RNA), MT_content =mscdata$percent.mt)
+    # We have decided to use the scatter plot, but I temporaly leave the other option in the comments. 
+    # Q: Should we return from the R side the cells that are going to be removed? For this plot it is interesting to color the
+    # cells that are going to be excluded. 
+    plots[generate_plotuuid(sample_id, task_name, 1)] <- list(plot2_data)
+
     # the result object will have to conform to this format: {data, config, plotData : {plot1, plot2}}
     result <- list(
         data = scdata.filtered, # scdata filter
         config = config,
-        plotData = list(
-            # plot 1: histgram of MT-content
-            # AAACCCAAGCGCCCAT-1 AAACCCAAGGTTCCGC-1 AAACCCACAGAGTTGG-1
-            #              0.161              0.198              0.284  ...
-            mitochondrialFractionHistogram = plot1_data,
-            # plot 2: There are two alternavitive:
-            #           - Scatter plot with UMIs in the x-axis and MT-content in the y-axis
-            #           --> code: plot2 = list(u=scdata$nCount_RNA.mt, "MT-content" = scdata$percent.mt)
-            #           - Barplot representing in the x-axis the log10(UMIs) and in the y-axis the MT-content. This option is the one 
-            #           that is shown in the mockup.
-            #           --> code: plot2 = list(log_10_UMIs=log10(scdata$nCount_RNA), MT_content =mscdata$percent.mt)
-            # We have decided to use the scatter plot, but I temporaly leave the other option in the comments. 
-            # Q: Should we return from the R side the cells that are going to be removed? For this plot it is interesting to color the
-            # cells that are going to be excluded. 
-            mitochondrialFractionLogHistogram = plot2_data
-        )
+        plotData = plots
     )
     return(result)
 }
diff --git a/remoter-server/src/numGenesVsNumUmis.r b/remoter-server/src/numGenesVsNumUmis.r
@@ -37,6 +37,8 @@
 #   }
 
 
+source('utils.r')
+
 #' @description Filters seurat object based on classifier filter
 #' @param config list containing the following information
 #'          - enable: true/false. Refering to apply or not the filter.
@@ -48,7 +50,8 @@
 #'                          - gam: for the gam option there is only one element: 
 #'                                - p.level: which refers to  confidence level for deviation from the main trend
 #' @export return a list with the filtered seurat object by numGenesVsNumUmis, the config and the plot values
-task <- function(scdata, config){
+
+task <- function(scdata, config, task_name, sample_id){
     # Check wheter the filter is set to true or false
     if (!as.logical(toupper(config$enabled)))
         return(scdata)
@@ -92,19 +95,20 @@ task <- function(scdata, config){
     plot1_data <- purrr::map2(plot1_data,unname(pb$lwr),function(x,y){append(x,c("lower_cutoff"=y))})
     plot1_data <- purrr::map2(plot1_data,unname(pb$upr),function(x,y){append(x,c("upper_cutoff"=y))})
 
+    # Scatter plot which is composed of:
+    # x-axis: log_10_UMIs
+    # y-axis: log_10_genes
+    # bands that are conformed with the upper_cutoff and the lower_cutoff. We can print a band or dotted lines. 
+    # Q: Should we return the point out the cells that are going to be excluded from the R side or this task can be done in 
+    # the UI side.  
+    plots <- list()
+    plots[generate_plotuuid(sample_id, task_name, 0)] <- list(plot1_data)
+
     # the result object will have to conform to this format: {data, config, plotData : {plot1}}
     result <- list(
         data = scdata.filtered,
         config = config,
-        plotData = list(
-            # Scatter plot which is composed of:
-            # x-axis: log_10_UMIs
-            # y-axis: log_10_genes
-            # bands that are conformed with the upper_cutoff and the lower_cutoff. We can print a band or dotted lines. 
-            # Q: Should we return the point out the cells that are going to be excluded from the R side or this task can be done in 
-            # the UI side.  
-            featuresvsUMIsscatterplot = plot1_data
-        )
+        plotData = plots
     )
 
     return(result)

diff --git a/remoter-server/src/test_fn.r b/remoter-server/src/test_fn.r
@@ -2,26 +2,30 @@
 # Its only purpose is to demonstrate the input and output format
 # that you will expect to your pipeline steps.
 
+source('utils.r')
+
 # some dummy function to check that imports do not import additional
 # functions from a file into the namespace
 some_other_stuff <- function(a) {
     a + 5
 }
 
 # a sample task
-task <- function(input_data, input_config) {
+task <- function(input_data, input_config, task_name, sample_id) {
 
     # example where after coming up with sensible defaults the configuration
     # will be changed to a different number, say, 202
     config <- input_config
     config$filterSettings[["minCellSize"]] <- 420
-
+    plots <- list()
+    plots[generate_plotuuid(sample_id, task_name, 0)] = c(1, 2, 3)
+    plots[generate_plotuuid(sample_id, task_name, 1)] = c(4, 5, 6)
 
     # the result object will have to conform to this format.
     result <- list(
         data = input_data,
         config = config,
-        plotData = list()
+        plotData = plots
     )
 
     return(result)