From 909c11207c555e4bc1bff9a9bcd55fa144a722e7 Mon Sep 17 00:00:00 2001 From: ogibson-biomage <76957896+ogibson-biomage@users.noreply.github.com> Date: Thu, 27 May 2021 16:51:15 -0300 Subject: [PATCH] [BIOMAGE-1016] Sample uuids fix (#72) * Sample ids property sent as list instead of string * Fix * Draft fix for uuids * Fix * fix * fix * checks Co-authored-by: Martin Fosco Co-authored-by: Oliver Gibson Co-authored-by: Martin Fosco --- .../src/data-ingest/0-download_gem2s.r | 4 +- .../src/data-ingest/5_Upload-to-aws.r | 97 +------------------ 2 files changed, 7 insertions(+), 94 deletions(-) diff --git a/pipeline-runner/src/data-ingest/0-download_gem2s.r b/pipeline-runner/src/data-ingest/0-download_gem2s.r index 8af4cb32..1bdf05bd 100644 --- a/pipeline-runner/src/data-ingest/0-download_gem2s.r +++ b/pipeline-runner/src/data-ingest/0-download_gem2s.r @@ -20,7 +20,7 @@ task <- function(input, pipeline_config) { message(gem_key) sample_name = sample_names[[match(sample,sample_uuids)]] #Preparing directories - local_dir <- file.path('/input',sample_name) + local_dir <- file.path('/input',sample) #unlink(local_dir, recursive = TRUE) dir.create('/input') dir.create(local_dir) @@ -50,7 +50,7 @@ task <- function(input, pipeline_config) { # Key = meta_key #) #writeBin(body, con = "/input/meta.json") - config <- list(name = input$experimentName, samples=input$sampleNames, + config <- list(name = input$experimentName, samples=input$sampleIds, organism = input$organism, input = list(type="10x") ) diff --git a/pipeline-runner/src/data-ingest/5_Upload-to-aws.r b/pipeline-runner/src/data-ingest/5_Upload-to-aws.r index f3432bac..2be74423 100644 --- a/pipeline-runner/src/data-ingest/5_Upload-to-aws.r +++ b/pipeline-runner/src/data-ingest/5_Upload-to-aws.r @@ -4,87 +4,12 @@ color_pool <- RJSONIO::fromJSON("/src/data-ingest/color_pool.json") input_dir <- '/input' output_dir <- '/output' -# creates the table information for samples -create_samples_table <- function(config, experiment_id, project_id) { - # In samples_table we are going to add the core of the information - samples_table <- list() - - # flag_filtered information - df_prefiltered <- read.csv(file.path(output_dir, 'df_flag_filtered.txt'), - sep = '\t', - row.names = 'samples') - - samples <- row.names(df_prefiltered) - samples_table$ids <- lapply(samples, function(x) paste0("sample-", x, sep = "")) - - # For the current datasets it could happen that they are not in the gz format, so we leave the alternative tsv format. - mime_options = c( - "tsv" = "application/tsv", - "gz" = "application/gzip", - "mtx" = "application/mtx" - ) - - for (sample in samples) { - - prefiltered <- df_prefiltered[sample, 'flag_filtered'] == 'Filtered' - - # Identify datetime - cdate <- mdate <- Sys.time() - fnames <- list() - - # files that are not hidden - sample_files <- file.path( - sample, - list.files(file.path(input_dir, sample)) - ) - - # Iterate over each file to create the slot - for (sample_file in sample_files) { - - fext <- tail(strsplit(sample_file, '[.]')[[1]], 1) - - fnames[[sample_file]] <- list( - objectKey = '', - name = sample_file, - size = file.info(file.path(input_dir, sample_file))$size, - mime = mime_options[[fext]], - success = TRUE, - error = FALSE - ) - } - - # Add the whole information to each sample - samples_table[[paste0("sample-", sample)]] <- list( - name = sample, - uuid = uuid::UUIDgenerate(), - species = config$organism, - type = config$input[['type']], - createdDate = strftime(cdate, usetz = TRUE), - lastModified = strftime(mdate, usetz = TRUE), - complete = TRUE, - error = FALSE, - fileNames = sample_files, - files = fnames, - preFiltered = prefiltered - ) - - } - - - return(list( - "experimentId" = experiment_id, - "samples" = samples_table, - "projectUuid" = project_id - )) -} - - -samples_sets <- function(){ +samples_sets <- function(config){ sample_annotations <- read.csv(file.path(output_dir, "samples-cells.csv"), sep = "\t", col.names = c("Cells_ID","Value"), na.strings = "None") - + cell_set <- list(key = "sample", name = "Samples", rootNode = TRUE, @@ -95,15 +20,14 @@ samples_sets <- function(){ for (sample in samples) { view <- sample_annotations[sample_annotations$Value == sample, "Cells_ID"] - child <- list(key = paste0("sample-", sample), - name = sample, + child <- list(key = paste0(sample), + name = config$sampleNames[[match(sample,config$sampleIds)]], color = color_pool[1], cellIds = view) color_pool <- color_pool[-1] cell_set$children[[length(cell_set$children)+1]] <- child } - return(cell_set) } @@ -151,8 +75,6 @@ task <- function(input, pipeline_config) { experiment_id <- input$experimentId project_id <- input$projectId - sample_names <- input$sampleNames - sample_uuids <- input$sampleUuids # save experiment_id for record-keeping writeLines(experiment_id, file.path(output_dir, "experiment_id.txt")) @@ -172,9 +94,7 @@ task <- function(input, pipeline_config) { type = "cellSets" ) - # TODO: maybe we don't need samples_data - samples_data <- create_samples_table(config, experiment_id, project_id) - samples_set <- samples_sets() + samples_set <- samples_sets(input) # Design cell_set meta_data for DynamoDB cell_sets <- list(scratchpad,samples_set) @@ -220,13 +140,6 @@ task <- function(input, pipeline_config) { item = experiment_data, task_name = "uploadToAWS") - # samples data to dynamodb - send_dynamodb_item_to_api(pipeline_config, - experiment_id = experiment_id, - table = pipeline_config$samples_table, - item = samples_data, - task_name = "uploadToAWS") - if (cluster_env == "production") print(sprintf("https://scp.biomage.net/experiments/%s/data-exploration", experiment_id))