From 0475d2df6684eb7f497ccff1d4caa80c18f69cb5 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 2 Dec 2022 18:18:52 -0300 Subject: [PATCH 01/34] add infra to init-functions, draft subset step, improve data handling --- .../R/gem2s-6-prepare_experiment.R | 5 +- pipeline-runner/R/gem2s-X-subset_experiment.R | 55 ++++++++++++++++++ pipeline-runner/R/handle_data.R | 12 ++-- pipeline-runner/R/init-functions.R | 38 +++++++++++- pipeline-runner/R/sysdata.rda | Bin 3116 -> 3123 bytes pipeline-runner/data-raw/sysdata.R | 7 ++- 6 files changed, 106 insertions(+), 11 deletions(-) create mode 100644 pipeline-runner/R/gem2s-X-subset_experiment.R diff --git a/pipeline-runner/R/gem2s-6-prepare_experiment.R b/pipeline-runner/R/gem2s-6-prepare_experiment.R index 5688a94d..8c636c6b 100644 --- a/pipeline-runner/R/gem2s-6-prepare_experiment.R +++ b/pipeline-runner/R/gem2s-6-prepare_experiment.R @@ -1,8 +1,7 @@ #' Prepare experiment for upload to AWS #' -#' 1) Merges the samples for the current experiment -#' 2) Adds metadata: cellsId, color_pool, and gene annotation -#' 3) Preparing QC configuration +#' 1) Adds metadata: cellsId, color_pool, and gene annotation +#' 2) Prepares QC configuration #' #' @inheritParams download_user_files #' @param prev_out 'output' slot from call to \code{create_seurat} diff --git a/pipeline-runner/R/gem2s-X-subset_experiment.R b/pipeline-runner/R/gem2s-X-subset_experiment.R new file mode 100644 index 00000000..95b82c74 --- /dev/null +++ b/pipeline-runner/R/gem2s-X-subset_experiment.R @@ -0,0 +1,55 @@ + +download_cellsets_file <- function(parent_experiment_id) { + # download parent experiment cellsets file from S3 +} + +parse_cellsets <- function(cellset_path, cellset_type) { + cellsets <- jsonlite::fromJSON(cellset_path, flatten = T) + + cellsets$cellSets |> + filter(key == cellset_type) %>% + .$children |> + as.data.frame() |> + as_tibble() |> + select(key, name, cellIds) + +} + +create_subset_experiment <- function(input, pipeline_config) { + + parent_experiment_id <- input$parentExperimentId + subset_experiment_id <- input$subsetExperimentId + cellset_keys <- input$cellSetKeys + + # load parent processed scdata and cellsets + s3 <- paws::s3(config = pipeline_config$aws_config) + parent_scdata <- load_processed_scdata(s3, pipeline_config, parent_experiment_id) + parent_cellsets <- load_cellsets(s3, pipeline_config, parent_experiment_id) + + cell_ids_to_keep <- get_cell_sets(parent_cellsets, cellset_keys) + + sample_id_mapping <- input$sampleIdMapping + + # subset seurat object + scdata <- subset_ids(scdata, cell_ids_to_keep) + + # add subset experiment name to the subset seurat object + scdata$project <- input$name + + # add new sample_ids, keep originals in a new variable + scdata$parent_samples <- scdata$samples + scdata$samples <- sample_id_mapping[match(parent_samples, sample_id_mapping)] + + # split by sample + scdata_list <- Seurat::SplitObject(scdata, split.by = "samples") + + prev_out$scdata_list <- scdata_list + prev_out$annot <- scdata@misc + res <- list( + data = list(), + output = prev_out + ) + + message("\nSubsetting of Seurat object step complete.") + return(res) +} diff --git a/pipeline-runner/R/handle_data.R b/pipeline-runner/R/handle_data.R index 44018335..8082dd6e 100644 --- a/pipeline-runner/R/handle_data.R +++ b/pipeline-runner/R/handle_data.R @@ -25,7 +25,7 @@ upload_cells_id <- function(pipeline_config, object_key, cells_id) { return(object_key) } -reload_scdata_from_s3 <- function (s3, pipeline_config, experiment_id) { +load_processed_scdata <- function (s3, pipeline_config, experiment_id) { bucket <- pipeline_config$processed_bucket message(bucket) message(paste(experiment_id, "r.rds", sep = "/")) @@ -45,10 +45,10 @@ get_nnzero <- function (x) { } order_by_size <- function(scdata_list) { - return(scdata_list <- scdata_list[ order( sapply(scdata_list, get_nnzero)) ]) + return(scdata_list[order(sapply(scdata_list, get_nnzero))]) } -reload_scdata_list_from_s3 <- function (s3, pipeline_config, experiment_id) { +load_source_scdata_list <- function (s3, pipeline_config, experiment_id) { bucket <- pipeline_config$source_bucket objects <- s3$list_objects( Bucket = bucket, @@ -83,11 +83,11 @@ reload_data_from_s3 <- function(pipeline_config, experiment_id, task_name, tasks # If the task is after data integration, we need to get scdata from processed_matrix if (match(task_name, task_names) > integration_index) { - return(reload_scdata_from_s3(s3, pipeline_config, experiment_id)) + return(load_processed_scdata(s3, pipeline_config, experiment_id)) } # Otherwise, return scdata_list - return(reload_scdata_list_from_s3(s3, pipeline_config, experiment_id)) + return(load_source_scdata_list(s3, pipeline_config, experiment_id)) } @@ -200,7 +200,7 @@ send_gem2s_update_to_api <- function(pipeline_config, experiment_id, task_name, sns <- paws::sns(config = pipeline_config$aws_config) # TODO -REMOVE DUPLICATE AUTHJWT IN RESPONSE msg <- c( - data, + data, taskName = list(task_name), experimentId = list(experiment_id), authJWT = list(input$auth_JWT), diff --git a/pipeline-runner/R/init-functions.R b/pipeline-runner/R/init-functions.R index 25df328b..340595d6 100644 --- a/pipeline-runner/R/init-functions.R +++ b/pipeline-runner/R/init-functions.R @@ -249,6 +249,40 @@ call_gem2s <- function(task_name, input, pipeline_config) { } +#' Call subset gem2s +#' +#' Runs step `task_name` of the subset GEM2S pipeline, sends output message to the API +#' +#' @param task_name character name of the step +#' @param input list containing +#' - parentExperimentId +#' - childExperimentId +#' - sample IDs, and names +#' @param pipeline_config list as defined by load_config +#' +#' @return character message id +#' +call_subset_gem2s <- function(task_name, input, pipeline_config) { + experiment_id <- input$experimentId + # remove when it's added to the input + input$subset_experiment <- TRUE + + if (!exists("prev_out")) { + remove_cell_ids(pipeline_config, experiment_id) + assign("prev_out", NULL, pos = ".GlobalEnv") + } + + check_input(input) + tasks <- lapply(SUBSET_GEM2S_TASK_LIST, get) + + c(data, task_out) %<-% run_gem2s_step(prev_out, input, pipeline_config, tasks, task_name) + assign("prev_out", task_out, pos = ".GlobalEnv") + + message_id <- send_gem2s_update_to_api(pipeline_config, experiment_id, task_name, data, input) + + return(message_id) +} + #' Call QC pipeline #' #' Runs step `task_name` of the data processing pipeline, sends plot data to s3 @@ -258,7 +292,7 @@ call_gem2s <- function(task_name, input, pipeline_config) { #' @param input list containing: #' - step parameters for all samples #' - current sample UUID -#' - uploadCountMatrix (wether or not to upload matrix after step) +#' - uploadCountMatrix (whether or not to upload matrix after step) #' @param pipeline_config list as defined by load_config #' #' @return character message id @@ -450,6 +484,8 @@ wrapper <- function(input, pipeline_config) { message_id <- call_qc(task_name, input, pipeline_config) } else if (process_name == "gem2s") { message_id <- call_gem2s(task_name, input, pipeline_config) + } else if (process_name == "subsetGem2s") { + message_id <- call_subset_gem2s(task_name, input, pipeline_config) } else { stop("Process name not recognized.") } diff --git a/pipeline-runner/R/sysdata.rda b/pipeline-runner/R/sysdata.rda index 7ddc51f6298c99a3304cba1eff0aa729041a1bd8..58e71f4714dfaaad1c80ce19cd060f1c220a1bd5 100644 GIT binary patch delta 3122 zcmV-249)Yb7_%4-LRx4!F+o`-Q&}>=HW85yB7gYCYklbYUwmBjeF_`u8cloMPrGdP z?(r=(v;-2SiK3oODu0yP0X(XIrh=Z*1w1L?q{=-{Ow}MQ1u3Y15E~i0000013&;ssMS11gq~5S>Q7K;0004@ zpaz3Ss0X7+&}h@j84N=p03?V_MofXJlLYtP*(jHLLLqGr;03M(K003w-{V4T1 zoqUd$_`9cOn!y+_ZLy4FZLk|{4Wu^8jDKZhh?eChL?WmKfP^0b5D;M<(m=5cBoB<4 zG9axX2q4@U9sJxyV*!+4$NJ_kkrRGCcl~K0WEe>c(e}9gc5Le*p!wf_aTOO?Sm3>&b$x;??KLTp$QGIwavqMb?R zLPAQb1d?n42+|^$#y7Yp&@?&e&()#cg|QhHb2=hVUD6T&@9E0cw{5?BTHQYL@%C$6 zYcD?8nVUb3bLO^$swLo&Q9I-G8?mq2Q}c zw4>p2CcQ@99Gy;BE-JD2so2}rZM3JQmg1IzGiB2o$w|e$WVz``*b%SD(7-$sy{a=A z&$|tX*+>q=j~Q2gn;am3Nc_m;^g@?RxsF=rBoHW$B&DmY>NiIdZuW_|Td(D9H0<+) zO)Wb#=Fm)#gvjM7GAyFvMt`Vf4km^WYLP1{21f90X}XzrX0+nesJO>j08$(MG{l(^ zJL6k63=@+E7EbfZaS^RG6sn*h^@XWAy@8>vRa3NC1t91PBEU{S%&h>TOk~i|uFi5o zrcVN<@~qftdS@e5YWujjL>k!$0LX-rOfUd}hyX=YBn>E%2Ag6UM1Mj67)HcIgv63S zM$nrOA|`|(B8H}7LHlA!AdsMOFkk@!wM~}LY}jA#BeO`Q$1oHw9svyv)QpRVAjQ)9 z-Os0c95l*};&PN>K>5-_1aZhpQWBt~5jzRs^>;ztkFcW%r06D%S)R^?V5by=2s=F9 z98}cN(43`+;@#oi;D6v|;EC^N2fFO%qF&dFpP3m$DA^Iqs$N~>*zJsNoOJDNjUyD2 zMBObxypA!%YGqB~tFS0->Calp5rGWI48%3gIpZwndbzMd=>lZ1s|gcw1Hu9G;akpf zCQNaAL&8GFq}G?SPItN%GJ-2bawmeJt%8#mW(|%5#C$5JeSdT?5P)q=yj6y6NnF)1 zLRp88Ucc?YKK#Jeju^&s)399JU+@^*cG>Q3$ysn0M&V z9=X*uV~sw&LJbVIbk8uyDT9b;&zkK?2V`$*z3(=dF+v^lz%Ih>ChHGol^H0+(zrw5 z`Sas9oy@IgcYhLMI7>%7h#?c__#N7AiE+rWn+rp(o5cRiCKbgrQq#@^KM zbvlN2SQ6^_Y0XL*P#mzcfZ?3O2qFU_Un@Km`y;H{*nilIXO)%1Cs}1E*mrGe)N4bz zwun}YESiikgyuy}juARS#~X$f!sWSnD0~n&-xqkv2C72p({{c|4iYjATO7cx`~xB< z5sYJY`QN~BbrV)8wyT!ILzfi+q8yY2A`2TC*A5yj8nHlTqFzRMtrlHCYRjxS7{$vV zbn-pOhJSB2xfZiS7l;dOXukU0m{O3Mg}D(*wzbq1TrPNIL{^A3v>pgVLLMbB4Gn2! zRSK}UL3B8$Vl5uRS!)i&gjcNOA+Ib;+!+}b-C$(o*?$x*6B|(MRKU%A8Qt(E0}9>D z&W2A)8a*LmDCVkCFe-*(aYQnTJr!7x$rgrU=YJK~Q`E%(V1>Cx12@!apsQh?J!TU-=DhFu6{3MC#`?u*RAD*-G7!!A%%nHM!+ zMp9ZvZwy=yiHI&GQ%aj4gu|nZ8f0qItbsVOgFxz*V8bBz5Z2Q)D^^%DQLNBhU6ZC~ zn}5=BrXWj%5M)Mf-rRNFjet85v^j7=-S;%{@y9Sjf+kQW%Y)gvp!CqaBWG*mTvbAt^B2=xd4zhnGzYPMvKNNdbK>&#&!l^c4j;{ zKMT&%y0W==Ffej)uB)Ccv&cg2t_QiDZ+~25a^0&vfyq~8*q+Bk_5=7(pjYkyh@LGo zq)ALJ$1Q3q@MHtUajbz60$4}OxmFB07=(khNXpV)rY8RpO~2DTWUd6WXP3P&8Z zZ{B!J-1xAFhy|E5gofJ$#=sbq(ti~Q0FsIhm3>h$eokugP7GW&8f`O`^CyDB4-0Ij z4XVMeAeFt2T zRgkU`t1RG}vK7U^4dT8g3R z3Wh$+S)$5Zj=)t{U5kXYnwcTp2|p1VyXMNA?4UHr7}j8vDW#hfB6b!P2b9N42X#?N zzohEWPq7IZ3ooqB-NToL3d3WoR6<2$O;I-~=>l-Nc=sbB!~*RV@u8q^+ym6&EYR zicpHjy^2Be9`3E}kgEWdAUV1uhcd@O>vp!1q<5Z~NU4+=Sf{TWFT}do8C*wTB3=Qg zAV~y7?3U)Dv!3+8p?~YKnT$1yS%VjHSv|?n%r1A}$aHI?*j~1cORBLG0 z0m!f*Qf?H)(&auzW3n0PL3SC9QyP%MVx-t}jr-cRVvY(xT*8A^Ukr?CqCiH@oK-AD z;X^|OvTz(ICVzJn@tm7UP&2cUGbsfkfxtTgF3MV;7A%O_Bz4FasSp*Mf?hS14<*^b1=Eb$avSscd2pcTP;|S`SO@_li2udh MkxmpO46qG^K$4%PF8}}l delta 3115 zcmV+`4Ak?p7_1l$LRx4!F+o`-Q&~#NT7QuaB7Z)_&G)T+FCTrq^hlxKX`z{Rqh6i2 z+}2E>^w$YfMA0!5$)E<4&?X25L7<~*dSafD=`$iU0000QWEub(2qQoNG-#&Lq}o#+ zqct@Lq%_k*AQ}Jw00000&=FJc1WJ0RwNFGeGynhqG&BGJ000008UO$Wr;vhZflpM= zQGck}A+%_in1`qu8Vvvd000008UO-CCQT{m)f-dFKS}@^00000000000000qG!lq} zo>Xn5^)%Y2=`=tY01Td{ng9ScpdOD%5u-911ZebzY95>}H(P;_d4Ac7))9jS+Ze_+ z+X1%F+Cz4t`C%d^3rUd(szG9uQ4CTbAb-3<2_RWPi3Bf`CPWqjiYP&q8aFe5iv|Uw z3;S{qjtILxpZxLBC<-DVp1i4kDzt9z))}p7{ItYvCoMVlqFDN^zKwYgCzgN=##_PO zjpvLYwh>j{C;+;*R`!;&gbec+(oy)STwcU9DH-y zkb~D6R-~>;63Bok0K*LzaFD2XV}DmJB}};ahsN6O+^Go(D;Ok`U)XuRX-{J<#VsUf%c?h0lZ$wXZ>1w(My|6<0PRG>jjg&7hef36aWDWLZVT zjZn%QO$;E_B34ukjo{kTb$>GM&1uD{QE`rB0Z4K>sfjWocgD7C7$+tSES=|+;v+QF zQmTN5%q>aP>CZ*QB1L^`2yKK+dm4%jh3#Is9Qj8{8dpgpZ(xFzHW zh!U8DkhTa(3S{YIu?lPK$>K^-B`O<@ah$0sgR3*y(5w{VkYNXBo5PBlnmQAdu^jF{ zD|dT&n|ZF`j7}`h%76C8zK!*}6R~bcX1MljE8Kr^+0$8?-kC9FZEEi-a@-c@;72gC zD(;R?G@-Y%9ot$=MaX7AXd$sVX^h%UGuaFh1j$>eR}v=V2$}))VQbEECQNaAL&8GF zq}G*D7r_ddK@_6EiP=!r!AXm=2FC!$-dkXiW8x2ugM)@bbAOvushLVRGGDhX3CNf4 zuJzm3yBW~UO-L76l+2kt5pQJOuUXb?*we#+yu%vY#^W4Ii)&VnA{WyW%>nD8m%RSI zK;AYo;q0Dqj#CE^(W5=uk`Bn;)qCD;Fk*x|=zv{?+)dUKpv67R>1caCeD|Bq=2o-2 zi7^|+qkQDfvVTJ(E?E&gFmg1#vQZF@TyDpKeuEsvdae0pT&!@z3^2Wk_i~(c%vi6* z6!-4k(JgxtuG@$*tajbpCvh7WtJAZM%aH_7;LT=da)`QDPK_gPYIb!xhIUpZ)){Qi zN*Pcbu(N>SoWlqr10r85JQVyf#oE`dEuvOe44q|^qkmhwX01kOcQ(@s(S?&yh7g>{ zsj?FE*E*`WVZ1UTD?|~k@OU8+2#1uoXlqMrQC1fSE{_=P zMWfnFEos`2iuN3YHRXwX0lmFVla0G)qR~mI(#4&usm`D|?|n?EM6Y_P!BOynvFQsD zM>SHBfmAaSiXoIy=&HnqNVGE-II_yUSfC7KLVskHjYul)5RfJWcLmpJ^wqh-DN3r7 z0G)_vNRGI;u9-=SP#S1UeZfQ+WzdFDqEY3K1YTwlSP5Vz8FGT8$hoTmGLq6WxyZm6 zSVF>8TEk%EOgcEhrbewg$P2Gb%#93(ce8l%vkP#$w*>qJkjIY8&5RC)- z<6Q?lJ7$s=YiN6w*7&0%mdy#O4obSN#DDfHvs4e=K>%N{0EnP2Go&_|FU^jnD>Ax4 z0cCa&A|Ol&2kz0Z>QHY6^Nmat3G|#(Wr2!huNf{E#f3WO92+`C-fpi#VN9{lm# zM~%6RFklf(2*!vVo)hyVCcsZ;G1k)`s3;7tOOgm%r$xjsy7bGE>03lmfCz~yHGd%n zr+ZMUJeJX2QJf^LS-7I3coG93L_nJnoJcy$IGeyS&2t1Y;gc~gWx_hhbsS9a<27eZ zT0{}+P-gY-&7}b&K>o5q`g0wRkGIXL9jIud33H3VvBoBvRRL!dj+5%wkn+nG8Jt8& zBF2!A+hCa30}`r|=@1f7i^RQvM1Lp}4gSd4UfITbMqxIm(>PRHXM&#wxmMPM*nt|2 z24KOg;%xFcxfrobH=C3IL{E+33~Y&H90nT$LCR--mX=MIisXna6={s4aIUr`h1b~8 z-KaR5OSsOabwu8FBpTqnVyx?&c@yF}x@Wk_U699VahyFjCGVZ$m1QO1pntJ3#xEgd zKv@B3GJ84^VSU|u0v!_6 z>Jv@P)wa_Mpt81!(bNJOp?=Gi%Jq5^QtY<+kYtN-1|Us>uNvJszmFGO)S@+OF4}`e z`?-{0EfFiGj&r}y>+oGK5`PwU(;5Qi8-@gU{B=c%Zoa`F*Zp&Q6mpET z+yuqx-IPm=wUT~tYV(dic*P3Kj1l35C0GWs2oWS>hMtiKc~qx67l1%9F966N;z z{N~B8Jf^5)7mbTxyB4YjXo(c~nIQ;ezk14_l+aB<`;pEkDC#83+Z7ih!-`Of#k;9U ze8rzF&48f*kic^D(x2Es;2(xgBa)pj&_-Ga4pk@V9T@0J^(~;?0YYjGFko2<2H}xi zz9t;xf`=s2nV70zTz>+Z`2_E|?m)x=w*d)!AP)S%%)0>p$uDJ{^|smA5~Y1k*I z!3_<1wrsXt5;#L~EY2blQDYHREv;WIR&{CPDXT|A}M!Cp~RK!(mLfvsD`Nr8vu}- zx>A%ywD{edl1wQ#z0Nezd)S1F7#%&SWG;cNyK(#vWNMFW2# z`2!R~Fbzs}6d`|7vpr(Nh@4`8lNjheN?R^-E?m4o-+#{4xLkcUC9*TLP0Aj FY5epZls^Cf diff --git a/pipeline-runner/data-raw/sysdata.R b/pipeline-runner/data-raw/sysdata.R index abd14a09..884288ec 100644 --- a/pipeline-runner/data-raw/sysdata.R +++ b/pipeline-runner/data-raw/sysdata.R @@ -24,6 +24,12 @@ GEM2S_TASK_LIST <- list( "uploadToAWS" = "upload_to_aws" ) +SUBSET_GEM2S_TASK_LIST <- list( + "subsetSeurat" = "create_subset_experiment", + "prepareExperiment" = "prepare_experiment", + "uploadToAWS" = "upload_to_aws" +) + # vector of task functions named by task name QC_TASK_LIST <- list( "classifier" = "filter_emptydrops", @@ -38,7 +44,6 @@ QC_TASK_LIST <- list( # directory where download_user_files downloads user files INPUT_DIR <- "/input" - # constants used in GEM2S gem2s <- list( max.edrops.fdr = 0.001, From 7321a3898baa78ccd2b1350c6c9fde94d574f3f7 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Mon, 5 Dec 2022 11:05:58 -0300 Subject: [PATCH 02/34] add func to download and parse cellsets, start tests --- pipeline-runner/R/gem2s-X-subset_experiment.R | 45 ++++++++++++++----- pipeline-runner/R/handle_data.R | 17 +++++++ .../testthat/test-gem2s-X-subset_experiment.R | 13 ++++++ 3 files changed, 65 insertions(+), 10 deletions(-) create mode 100644 pipeline-runner/tests/testthat/test-gem2s-X-subset_experiment.R diff --git a/pipeline-runner/R/gem2s-X-subset_experiment.R b/pipeline-runner/R/gem2s-X-subset_experiment.R index 95b82c74..cf116a37 100644 --- a/pipeline-runner/R/gem2s-X-subset_experiment.R +++ b/pipeline-runner/R/gem2s-X-subset_experiment.R @@ -1,20 +1,45 @@ - -download_cellsets_file <- function(parent_experiment_id) { - # download parent experiment cellsets file from S3 -} - -parse_cellsets <- function(cellset_path, cellset_type) { - cellsets <- jsonlite::fromJSON(cellset_path, flatten = T) +#' Extract cellset type as data.frame +#' +#' Gets the cellsets list and converts it to tidy tibble, keeping only the +#' the required cellset type +#' +#' @param cellsets +#' @param cellset_type +#' +#' @return +#' @export +#' +parse_cellsets <- function(cellsets, cellset_type) { cellsets$cellSets |> - filter(key == cellset_type) %>% + dplyr::filter(key == cellset_type) %>% .$children |> as.data.frame() |> - as_tibble() |> - select(key, name, cellIds) + tibble::as_tibble() |> + dplyr::select(key, name, cellIds) +} + + +#' Filters cellsets, getting vector of cell_ids to keep +#' +#' @param cellsets_df data.frame +#' @param cellset_keys character +#' +#' @return +#' @export +#' +get_cell_ids <- function(cellsets_df, cellset_keys) { + + cellsets_df |> + dplyr::filter(key %in% cellset_keys) |> + tidyr::unnest(cellIds) |> + dplyr::pull(cellIds) |> + unique() } + + create_subset_experiment <- function(input, pipeline_config) { parent_experiment_id <- input$parentExperimentId diff --git a/pipeline-runner/R/handle_data.R b/pipeline-runner/R/handle_data.R index 8082dd6e..3f0f7fda 100644 --- a/pipeline-runner/R/handle_data.R +++ b/pipeline-runner/R/handle_data.R @@ -398,3 +398,20 @@ upload_multipart_parts <- function(s3, bucket, object, key, upload_id) { return(parts) } + + +load_cellsets_file <- function(pipeline_config, experiment_id) { + message("loading cellsets file") + s3 <- paws::s3(config = pipeline_config$aws_config) + + bucket <- pipeline_config$cell_sets_bucket + + c(body, ...rest) %<-% s3$get_object( + Bucket = bucket, + Key = experiment_id + ) + + obj <- jsonlite::fromJSON(rawConnection(body), flatten = T) + return(obj) + +} diff --git a/pipeline-runner/tests/testthat/test-gem2s-X-subset_experiment.R b/pipeline-runner/tests/testthat/test-gem2s-X-subset_experiment.R new file mode 100644 index 00000000..c0292edd --- /dev/null +++ b/pipeline-runner/tests/testthat/test-gem2s-X-subset_experiment.R @@ -0,0 +1,13 @@ +mock_scdata <- function(){ + processed_path <- "/Users/german/bm/cellenics/data/8ecc9d20-30e4-49eb-b536-a0d1f0ba420d/processed_r.rds" + + readRDS(processed_path) +} + +mock_cellsets <- function(){ + cellsets_path <- "/Users/german/bm/cellenics/data/8ecc9d20-30e4-49eb-b536-a0d1f0ba420d/cellsets.json" + +} + +mock_sample_id_mapping <- function(){} + From 69fb01cac00476465bd0e393002aca26f186d72c Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Mon, 12 Dec 2022 20:13:52 -0300 Subject: [PATCH 03/34] migrate to data.table --- pipeline-runner/R/gem2s-X-subset_experiment.R | 46 +------------------ pipeline-runner/R/handle_data.R | 32 ++++++++++++- 2 files changed, 32 insertions(+), 46 deletions(-) diff --git a/pipeline-runner/R/gem2s-X-subset_experiment.R b/pipeline-runner/R/gem2s-X-subset_experiment.R index cf116a37..52c900f2 100644 --- a/pipeline-runner/R/gem2s-X-subset_experiment.R +++ b/pipeline-runner/R/gem2s-X-subset_experiment.R @@ -1,45 +1,3 @@ -#' Extract cellset type as data.frame -#' -#' Gets the cellsets list and converts it to tidy tibble, keeping only the -#' the required cellset type -#' -#' @param cellsets -#' @param cellset_type -#' -#' @return -#' @export -#' -parse_cellsets <- function(cellsets, cellset_type) { - - cellsets$cellSets |> - dplyr::filter(key == cellset_type) %>% - .$children |> - as.data.frame() |> - tibble::as_tibble() |> - dplyr::select(key, name, cellIds) -} - - -#' Filters cellsets, getting vector of cell_ids to keep -#' -#' @param cellsets_df data.frame -#' @param cellset_keys character -#' -#' @return -#' @export -#' -get_cell_ids <- function(cellsets_df, cellset_keys) { - - cellsets_df |> - dplyr::filter(key %in% cellset_keys) |> - tidyr::unnest(cellIds) |> - dplyr::pull(cellIds) |> - unique() - -} - - - create_subset_experiment <- function(input, pipeline_config) { parent_experiment_id <- input$parentExperimentId @@ -49,9 +7,9 @@ create_subset_experiment <- function(input, pipeline_config) { # load parent processed scdata and cellsets s3 <- paws::s3(config = pipeline_config$aws_config) parent_scdata <- load_processed_scdata(s3, pipeline_config, parent_experiment_id) - parent_cellsets <- load_cellsets(s3, pipeline_config, parent_experiment_id) + parent_cellsets <- parse_cellsets(load_cellsets(s3, pipeline_config, parent_experiment_id)) - cell_ids_to_keep <- get_cell_sets(parent_cellsets, cellset_keys) + cell_ids_to_keep <- parent_cellsets[key %in% cellset_keys, cell_id] sample_id_mapping <- input$sampleIdMapping diff --git a/pipeline-runner/R/handle_data.R b/pipeline-runner/R/handle_data.R index cad76407..c5253e9c 100644 --- a/pipeline-runner/R/handle_data.R +++ b/pipeline-runner/R/handle_data.R @@ -403,9 +403,17 @@ upload_multipart_parts <- function(s3, bucket, object, key, upload_id) { } -load_cellsets_file <- function(pipeline_config, experiment_id) { +#' Load cellsets object from s3 +#' +#' @param s3 +#' @param pipeline_config +#' @param experiment_id +#' +#' @return +#' @export +#' +load_cellsets <- function(s3, pipeline_config, experiment_id) { message("loading cellsets file") - s3 <- paws::s3(config = pipeline_config$aws_config) bucket <- pipeline_config$cell_sets_bucket @@ -418,3 +426,23 @@ load_cellsets_file <- function(pipeline_config, experiment_id) { return(obj) } + + +#' Parse cellsets object to data.table +#' +#' Gets the cellsets list and converts it to a tidy data.table +#' +#' @param cellsets list +#' +#' @return +#' @export +#' +parse_cellsets <- function(cellsets) { + + data.table::setDT(cellsets$cellSets) + # fill columns in case there are empty cellset classes + dt <- data.table::rbindlist(cellsets$cellSets$children, fill = TRUE) + # unnest, and change column name + dt[, setNames(.(unlist(cellIds)), "cell_id"), by = .(key, name)] + +} From 7216814aec3cfd5d886a6b50e3f332fd584e3320 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Thu, 15 Dec 2022 19:11:23 -0300 Subject: [PATCH 04/34] finish subset step, add docs. init tests --- pipeline-runner/NAMESPACE | 6 + pipeline-runner/R/gem2s-X-subset_experiment.R | 107 +++++++++++++++--- pipeline-runner/R/handle_data.R | 10 +- pipeline-runner/man/add_new_sample_ids.Rd | 19 ++++ pipeline-runner/man/call_qc.Rd | 2 +- pipeline-runner/man/call_subset_gem2s.Rd | 26 +++++ pipeline-runner/man/construct_qc_config.Rd | 5 +- pipeline-runner/man/create_sample_id_map.Rd | 20 ++++ .../man/create_subset_experiment.Rd | 27 +++++ pipeline-runner/man/diet_scdata.Rd | 18 +++ pipeline-runner/man/load_cellsets.Rd | 21 ++++ pipeline-runner/man/parse_cellsets.Rd | 17 +++ pipeline-runner/man/prepare_experiment.Rd | 3 +- .../testthat/test-gem2s-X-subset_experiment.R | 18 ++- 14 files changed, 270 insertions(+), 29 deletions(-) create mode 100644 pipeline-runner/man/add_new_sample_ids.Rd create mode 100644 pipeline-runner/man/call_subset_gem2s.Rd create mode 100644 pipeline-runner/man/create_sample_id_map.Rd create mode 100644 pipeline-runner/man/create_subset_experiment.Rd create mode 100644 pipeline-runner/man/diet_scdata.Rd create mode 100644 pipeline-runner/man/load_cellsets.Rd create mode 100644 pipeline-runner/man/parse_cellsets.Rd diff --git a/pipeline-runner/NAMESPACE b/pipeline-runner/NAMESPACE index 8173b447..fc511d60 100644 --- a/pipeline-runner/NAMESPACE +++ b/pipeline-runner/NAMESPACE @@ -1,10 +1,14 @@ # Generated by roxygen2: do not edit by hand export(add_metadata) +export(add_new_sample_ids) export(build_cc_gene_list) export(build_metadata_cellsets) +export(create_sample_id_map) export(create_scdata) export(create_seurat) +export(create_subset_experiment) +export(diet_scdata) export(download_user_files) export(embed_and_cluster) export(filter_doublets) @@ -25,12 +29,14 @@ export(integrate_from_sketch) export(integrate_scdata) export(learn_from_sketches) export(list_exclude_genes) +export(load_cellsets) export(load_user_files) export(log_normalize) export(make_annot_with_ids) export(merge_scdata_list) export(normalize_annotation_types) export(normalize_data) +export(parse_cellsets) export(prepare_experiment) export(prepare_sct_integration) export(read_10x_annotations) diff --git a/pipeline-runner/R/gem2s-X-subset_experiment.R b/pipeline-runner/R/gem2s-X-subset_experiment.R index 52c900f2..81b84169 100644 --- a/pipeline-runner/R/gem2s-X-subset_experiment.R +++ b/pipeline-runner/R/gem2s-X-subset_experiment.R @@ -1,38 +1,111 @@ +#' create a subset experiment +#' +#' This is the first step of a subset pipeline, which basically takes the parent +#' experiment ID and cellset keys to keep as input, extracts the cell ids to keep +#' and subsets and slims down the parent seurat object. +#' +#' @param input list containing: +#' - parentExperimentId character +#' - subsetExperimentId character +#' - cellSetKeys character vector of cellset keys to subset +#' - experimentName character +#' @param pipeline_config list +#' +#' @return list containing scdata_list, annotations and sample_id_map +#' @export +#' create_subset_experiment <- function(input, pipeline_config) { - parent_experiment_id <- input$parentExperimentId - subset_experiment_id <- input$subsetExperimentId - cellset_keys <- input$cellSetKeys - # load parent processed scdata and cellsets s3 <- paws::s3(config = pipeline_config$aws_config) - parent_scdata <- load_processed_scdata(s3, pipeline_config, parent_experiment_id) - parent_cellsets <- parse_cellsets(load_cellsets(s3, pipeline_config, parent_experiment_id)) - - cell_ids_to_keep <- parent_cellsets[key %in% cellset_keys, cell_id] + parent_scdata <- load_processed_scdata(s3, pipeline_config, input$parentExperimentId) + parent_cellsets <- parse_cellsets(load_cellsets(s3, pipeline_config, input$parentExperimentId)) - sample_id_mapping <- input$sampleIdMapping + cell_ids_to_keep <- parent_cellsets[key %in% input$cellSetKeys, cell_id] - # subset seurat object - scdata <- subset_ids(scdata, cell_ids_to_keep) + # subset seurat object, remove unnecesary data + scdata <- subset_ids(parent_scdata, cell_ids_to_keep) + scdata <- diet_scdata(scdata) + scdata@misc$experimentId <- input$subsetExperimentId - # add subset experiment name to the subset seurat object - scdata$project <- input$name + # delete parent_scdata to free memory + rm(parent_scdata) # add new sample_ids, keep originals in a new variable scdata$parent_samples <- scdata$samples - scdata$samples <- sample_id_mapping[match(parent_samples, sample_id_mapping)] + sample_id_map <- create_sample_id_map(unique(scdata$parent_samples)) + scdata <- add_new_sample_ids(scdata, sample_id_map) # split by sample scdata_list <- Seurat::SplitObject(scdata, split.by = "samples") - prev_out$scdata_list <- scdata_list - prev_out$annot <- scdata@misc + # structure step output res <- list( data = list(), - output = prev_out + output = list(scdata_list = scdata_list, + annot = scdata@misc$gene_annotations, + sample_id_map = sample_id_map) ) message("\nSubsetting of Seurat object step complete.") return(res) } + + +#' generate a sample id mapping for remaining samples after subset +#' +#' New sample ids must be created, but the number of samples depends on which +#' cells have been subset by the user. Sample Ids that belong to the parent +#' experiment are also kept, which is useful for the addition of the new subclusters +#' to the parent experiment. +#' +#' @param parent_sample_id character vector of unique parent sample ids +#' +#' @return data.table with sample id map +#' @export +#' +create_sample_id_map <- function(parent_sample_id) { + subset_sample_id <- uuid::UUIDgenerate(n = length(parent_sample_id)) + sample_id_map <-data.table::data.table(parent_sample_id = parent_sample_id, + subset_sample_id = subset_sample_id) + + return(sample_id_map) +} + + +#' Add new sample ids to the subset Seurat Object +#' +#' @param scdata Seurat Object +#' @param sample_id_map data.table of parent/subset sample id map +#' +#' @return SeuratObject with new sample ids +#' @export +#' +add_new_sample_ids <- function(scdata, sample_id_map) { + sample_map_idx <- match(scdata$parent_samples, sample_id_map$parent_sample_id) + scdata$samples <- sample_id_map$subset_sample_id[sample_map_idx] + return(scdata) +} + + +#' Remove all unnecessary data from the parent seurat object +#' +#' Seurat::DietSeurat is not able to remove certain slots from a seurat object. +#' This function also removes elements from the misc slot which are not necessary +#' +#' @param scdata SeuratObject +#' +#' @return leaner SeuratObject +#' @export +#' +diet_scdata <- function(scdata) { + lean_scdata <- Seurat::CreateSeuratObject(counts = scdata@assays$RNA@counts, + meta.data = scdata@meta.data, + min.cells = 0, + min.features = 0) + + lean_scdata@misc <- list(gene_annotations = scdata@misc$gene_annotations, + parent_experimentId = scdata@misc$experimentId) + + return(lean_scdata) +} diff --git a/pipeline-runner/R/handle_data.R b/pipeline-runner/R/handle_data.R index c5253e9c..73fed0ee 100644 --- a/pipeline-runner/R/handle_data.R +++ b/pipeline-runner/R/handle_data.R @@ -405,11 +405,11 @@ upload_multipart_parts <- function(s3, bucket, object, key, upload_id) { #' Load cellsets object from s3 #' -#' @param s3 -#' @param pipeline_config -#' @param experiment_id +#' @param s3 paws::s3 object +#' @param pipeline_config list +#' @param experiment_id character #' -#' @return +#' @return cellsets list #' @export #' load_cellsets <- function(s3, pipeline_config, experiment_id) { @@ -434,7 +434,7 @@ load_cellsets <- function(s3, pipeline_config, experiment_id) { #' #' @param cellsets list #' -#' @return +#' @return data.table of cellset keys, names and corresponding cell_ids #' @export #' parse_cellsets <- function(cellsets) { diff --git a/pipeline-runner/man/add_new_sample_ids.Rd b/pipeline-runner/man/add_new_sample_ids.Rd new file mode 100644 index 00000000..d7af121f --- /dev/null +++ b/pipeline-runner/man/add_new_sample_ids.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gem2s-X-subset_experiment.R +\name{add_new_sample_ids} +\alias{add_new_sample_ids} +\title{Add new sample ids to the subset Seurat Object} +\usage{ +add_new_sample_ids(scdata, sample_id_map) +} +\arguments{ +\item{scdata}{Seurat Object} + +\item{sample_id_map}{data.table of parent/subset sample id map} +} +\value{ +SeuratObject with new sample ids +} +\description{ +Add new sample ids to the subset Seurat Object +} diff --git a/pipeline-runner/man/call_qc.Rd b/pipeline-runner/man/call_qc.Rd index 0daaef32..ccce56a7 100644 --- a/pipeline-runner/man/call_qc.Rd +++ b/pipeline-runner/man/call_qc.Rd @@ -13,7 +13,7 @@ call_qc(task_name, input, pipeline_config) \itemize{ \item step parameters for all samples \item current sample UUID -\item uploadCountMatrix (wether or not to upload matrix after step) +\item uploadCountMatrix (whether or not to upload matrix after step) }} \item{pipeline_config}{list as defined by load_config} diff --git a/pipeline-runner/man/call_subset_gem2s.Rd b/pipeline-runner/man/call_subset_gem2s.Rd new file mode 100644 index 00000000..60350a77 --- /dev/null +++ b/pipeline-runner/man/call_subset_gem2s.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/init-functions.R +\name{call_subset_gem2s} +\alias{call_subset_gem2s} +\title{Call subset gem2s} +\usage{ +call_subset_gem2s(task_name, input, pipeline_config) +} +\arguments{ +\item{task_name}{character name of the step} + +\item{input}{list containing +\itemize{ +\item parentExperimentId +\item childExperimentId +\item sample IDs, and names +}} + +\item{pipeline_config}{list as defined by load_config} +} +\value{ +character message id +} +\description{ +Runs step \code{task_name} of the subset GEM2S pipeline, sends output message to the API +} diff --git a/pipeline-runner/man/construct_qc_config.Rd b/pipeline-runner/man/construct_qc_config.Rd index e5306805..ae11ef07 100644 --- a/pipeline-runner/man/construct_qc_config.Rd +++ b/pipeline-runner/man/construct_qc_config.Rd @@ -4,12 +4,15 @@ \alias{construct_qc_config} \title{Constructs default QC configuration} \usage{ -construct_qc_config(scdata_list, any_filtered) +construct_qc_config(scdata_list, any_filtered, disable_qc_filters) } \arguments{ \item{scdata_list}{list of seurat objects} \item{any_filtered}{bool indicating if barcodes were filtered by emptyDrops} + +\item{disable_qc_filters}{bool indicating if the data derives from the +subsetting of another experiment} } \value{ list of QC configuration parameters diff --git a/pipeline-runner/man/create_sample_id_map.Rd b/pipeline-runner/man/create_sample_id_map.Rd new file mode 100644 index 00000000..9f3c1382 --- /dev/null +++ b/pipeline-runner/man/create_sample_id_map.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gem2s-X-subset_experiment.R +\name{create_sample_id_map} +\alias{create_sample_id_map} +\title{generate a sample id mapping for remaining samples after subset} +\usage{ +create_sample_id_map(parent_sample_id) +} +\arguments{ +\item{parent_sample_id}{character vector of unique parent sample ids} +} +\value{ +data.table with sample id map +} +\description{ +New sample ids must be created, but the number of samples depends on which +cells have been subset by the user. Sample Ids that belong to the parent +experiment are also kept, which is useful for the addition of the new subclusters +to the parent experiment. +} diff --git a/pipeline-runner/man/create_subset_experiment.Rd b/pipeline-runner/man/create_subset_experiment.Rd new file mode 100644 index 00000000..3a679b7b --- /dev/null +++ b/pipeline-runner/man/create_subset_experiment.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gem2s-X-subset_experiment.R +\name{create_subset_experiment} +\alias{create_subset_experiment} +\title{create a subset experiment} +\usage{ +create_subset_experiment(input, pipeline_config) +} +\arguments{ +\item{input}{list containing: +\itemize{ +\item parentExperimentId character +\item subsetExperimentId character +\item cellSetKeys character vector of cellset keys to subset +\item experimentName character +}} + +\item{pipeline_config}{list} +} +\value{ +list containing scdata_list, annotations and sample_id_map +} +\description{ +This is the first step of a subset pipeline, which basically takes the parent +experiment ID and cellset keys to keep as input, extracts the cell ids to keep +and subsets and slims down the parent seurat object. +} diff --git a/pipeline-runner/man/diet_scdata.Rd b/pipeline-runner/man/diet_scdata.Rd new file mode 100644 index 00000000..5039fc32 --- /dev/null +++ b/pipeline-runner/man/diet_scdata.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gem2s-X-subset_experiment.R +\name{diet_scdata} +\alias{diet_scdata} +\title{Remove all unnecessary data from the parent seurat object} +\usage{ +diet_scdata(scdata) +} +\arguments{ +\item{scdata}{SeuratObject} +} +\value{ +leaner SeuratObject +} +\description{ +Seurat::DietSeurat is not able to remove certain slots from a seurat object. +This function also removes elements from the misc slot which are not necessary +} diff --git a/pipeline-runner/man/load_cellsets.Rd b/pipeline-runner/man/load_cellsets.Rd new file mode 100644 index 00000000..27a970ce --- /dev/null +++ b/pipeline-runner/man/load_cellsets.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/handle_data.R +\name{load_cellsets} +\alias{load_cellsets} +\title{Load cellsets object from s3} +\usage{ +load_cellsets(s3, pipeline_config, experiment_id) +} +\arguments{ +\item{s3}{paws::s3 object} + +\item{pipeline_config}{list} + +\item{experiment_id}{character} +} +\value{ +cellsets list +} +\description{ +Load cellsets object from s3 +} diff --git a/pipeline-runner/man/parse_cellsets.Rd b/pipeline-runner/man/parse_cellsets.Rd new file mode 100644 index 00000000..1bf0ed9b --- /dev/null +++ b/pipeline-runner/man/parse_cellsets.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/handle_data.R +\name{parse_cellsets} +\alias{parse_cellsets} +\title{Parse cellsets object to data.table} +\usage{ +parse_cellsets(cellsets) +} +\arguments{ +\item{cellsets}{list} +} +\value{ +data.table of cellset keys, names and corresponding cell_ids +} +\description{ +Gets the cellsets list and converts it to a tidy data.table +} diff --git a/pipeline-runner/man/prepare_experiment.Rd b/pipeline-runner/man/prepare_experiment.Rd index cc8860d0..01cfb4d9 100644 --- a/pipeline-runner/man/prepare_experiment.Rd +++ b/pipeline-runner/man/prepare_experiment.Rd @@ -19,8 +19,7 @@ prev_out \code{prev_out} with added slots 'scdata' containing merged } \description{ \enumerate{ -\item Merges the samples for the current experiment \item Adds metadata: cellsId, color_pool, and gene annotation -\item Preparing QC configuration +\item Prepares QC configuration } } diff --git a/pipeline-runner/tests/testthat/test-gem2s-X-subset_experiment.R b/pipeline-runner/tests/testthat/test-gem2s-X-subset_experiment.R index c0292edd..fcae2442 100644 --- a/pipeline-runner/tests/testthat/test-gem2s-X-subset_experiment.R +++ b/pipeline-runner/tests/testthat/test-gem2s-X-subset_experiment.R @@ -1,13 +1,25 @@ mock_scdata <- function(){ processed_path <- "/Users/german/bm/cellenics/data/8ecc9d20-30e4-49eb-b536-a0d1f0ba420d/processed_r.rds" - readRDS(processed_path) } mock_cellsets <- function(){ cellsets_path <- "/Users/german/bm/cellenics/data/8ecc9d20-30e4-49eb-b536-a0d1f0ba420d/cellsets.json" - + jsonlite::fromJSON(cellsets_path, flatten = TRUE) } -mock_sample_id_mapping <- function(){} +mock_input <- function() { + input <- list( + name = "mock_subset_experiment_name", + parentExperimentId = "mock_parent_experiment_id", + subsetExperimentId = "mock_subset_experiment_id", + cellSetKeys = c("louvain-0", "louvain-1") + ) + + return(input) +} +parent_scdata <- mock_scdata() +parent_cellsets <- parse_cellsets(mock_cellsets()) +sample_mapping <- mock_sample_id_mapping() +input <- mock_input() From dd92e9879fc6877cbd50c327d39a6487c5ef13cf Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 16 Dec 2022 09:28:07 -0300 Subject: [PATCH 05/34] rename step --- .../R/{gem2s-X-subset_experiment.R => subset-1-subset_seurat.R} | 0 ...-gem2s-X-subset_experiment.R => test-subset-1-subset_seurat.R} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename pipeline-runner/R/{gem2s-X-subset_experiment.R => subset-1-subset_seurat.R} (100%) rename pipeline-runner/tests/testthat/{test-gem2s-X-subset_experiment.R => test-subset-1-subset_seurat.R} (100%) diff --git a/pipeline-runner/R/gem2s-X-subset_experiment.R b/pipeline-runner/R/subset-1-subset_seurat.R similarity index 100% rename from pipeline-runner/R/gem2s-X-subset_experiment.R rename to pipeline-runner/R/subset-1-subset_seurat.R diff --git a/pipeline-runner/tests/testthat/test-gem2s-X-subset_experiment.R b/pipeline-runner/tests/testthat/test-subset-1-subset_seurat.R similarity index 100% rename from pipeline-runner/tests/testthat/test-gem2s-X-subset_experiment.R rename to pipeline-runner/tests/testthat/test-subset-1-subset_seurat.R From 018ae4c91aff066766c054056f98c31677e3f226 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 16 Dec 2022 09:35:53 -0300 Subject: [PATCH 06/34] rename subset pipeline --- pipeline-runner/R/init-functions.R | 15 +++++++-------- pipeline-runner/man/add_new_sample_ids.Rd | 2 +- ...call_subset_gem2s.Rd => call_subset_seurat.Rd} | 10 +++++----- pipeline-runner/man/create_sample_id_map.Rd | 2 +- pipeline-runner/man/create_subset_experiment.Rd | 2 +- pipeline-runner/man/diet_scdata.Rd | 2 +- 6 files changed, 16 insertions(+), 17 deletions(-) rename pipeline-runner/man/{call_subset_gem2s.Rd => call_subset_seurat.Rd} (63%) diff --git a/pipeline-runner/R/init-functions.R b/pipeline-runner/R/init-functions.R index 5c2528f4..86ee1cb6 100644 --- a/pipeline-runner/R/init-functions.R +++ b/pipeline-runner/R/init-functions.R @@ -94,7 +94,7 @@ load_config <- function(development_aws_server) { } # batch does not have access to the internal EKS cluster api URL, use the public one - if(running_in_batch == "true" && domain_name != "") { + if (running_in_batch == "true" && domain_name != "") { config$api_url <- config$public_api_url } @@ -205,7 +205,6 @@ run_qc_step <- function(scdata, config, tasks, task_name, cells_id, sample_id, d #' @return list of task results #' run_gem2s_step <- function(prev_out, input, pipeline_config, tasks, task_name) { - if (!task_name %in% names(tasks)) { stop("Invalid task name given: ", task_name) } @@ -252,9 +251,9 @@ call_gem2s <- function(task_name, input, pipeline_config) { } -#' Call subset gem2s +#' Call subset seurat #' -#' Runs step `task_name` of the subset GEM2S pipeline, sends output message to the API +#' Runs step `task_name` of the subset seurat pipeline, sends output message to the API #' #' @param task_name character name of the step #' @param input list containing @@ -265,7 +264,7 @@ call_gem2s <- function(task_name, input, pipeline_config) { #' #' @return character message id #' -call_subset_gem2s <- function(task_name, input, pipeline_config) { +call_subset_seurat <- function(task_name, input, pipeline_config) { experiment_id <- input$experimentId # remove when it's added to the input input$subset_experiment <- TRUE @@ -276,7 +275,7 @@ call_subset_gem2s <- function(task_name, input, pipeline_config) { } check_input(input) - tasks <- lapply(SUBSET_GEM2S_TASK_LIST, get) + tasks <- lapply(SUBSET_SEURAT_TASK_LIST, get) c(data, task_out) %<-% run_gem2s_step(prev_out, input, pipeline_config, tasks, task_name) assign("prev_out", task_out, pos = ".GlobalEnv") @@ -449,14 +448,14 @@ pipeline_heartbeat <- function(task_token, aws_config) { start_heartbeat <- function(task_token, aws_config) { message("Starting heartbeat") - heartbeat_proc <- callr::r_bg( + heartbeat_proc <- callr::r_bg( func = pipeline_heartbeat, args = list( task_token, aws_config ), stdout = "/tmp/out", stderr = "/tmp/err" ) - return(heartbeat_proc) + return(heartbeat_proc) } diff --git a/pipeline-runner/man/add_new_sample_ids.Rd b/pipeline-runner/man/add_new_sample_ids.Rd index d7af121f..c1f8875b 100644 --- a/pipeline-runner/man/add_new_sample_ids.Rd +++ b/pipeline-runner/man/add_new_sample_ids.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gem2s-X-subset_experiment.R +% Please edit documentation in R/subset-1-subset_seurat.R \name{add_new_sample_ids} \alias{add_new_sample_ids} \title{Add new sample ids to the subset Seurat Object} diff --git a/pipeline-runner/man/call_subset_gem2s.Rd b/pipeline-runner/man/call_subset_seurat.Rd similarity index 63% rename from pipeline-runner/man/call_subset_gem2s.Rd rename to pipeline-runner/man/call_subset_seurat.Rd index 60350a77..3a194eb5 100644 --- a/pipeline-runner/man/call_subset_gem2s.Rd +++ b/pipeline-runner/man/call_subset_seurat.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/init-functions.R -\name{call_subset_gem2s} -\alias{call_subset_gem2s} -\title{Call subset gem2s} +\name{call_subset_seurat} +\alias{call_subset_seurat} +\title{Call subset seurat} \usage{ -call_subset_gem2s(task_name, input, pipeline_config) +call_subset_seurat(task_name, input, pipeline_config) } \arguments{ \item{task_name}{character name of the step} @@ -22,5 +22,5 @@ call_subset_gem2s(task_name, input, pipeline_config) character message id } \description{ -Runs step \code{task_name} of the subset GEM2S pipeline, sends output message to the API +Runs step \code{task_name} of the subset seurat pipeline, sends output message to the API } diff --git a/pipeline-runner/man/create_sample_id_map.Rd b/pipeline-runner/man/create_sample_id_map.Rd index 9f3c1382..4926d873 100644 --- a/pipeline-runner/man/create_sample_id_map.Rd +++ b/pipeline-runner/man/create_sample_id_map.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gem2s-X-subset_experiment.R +% Please edit documentation in R/subset-1-subset_seurat.R \name{create_sample_id_map} \alias{create_sample_id_map} \title{generate a sample id mapping for remaining samples after subset} diff --git a/pipeline-runner/man/create_subset_experiment.Rd b/pipeline-runner/man/create_subset_experiment.Rd index 3a679b7b..964e1a47 100644 --- a/pipeline-runner/man/create_subset_experiment.Rd +++ b/pipeline-runner/man/create_subset_experiment.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gem2s-X-subset_experiment.R +% Please edit documentation in R/subset-1-subset_seurat.R \name{create_subset_experiment} \alias{create_subset_experiment} \title{create a subset experiment} diff --git a/pipeline-runner/man/diet_scdata.Rd b/pipeline-runner/man/diet_scdata.Rd index 5039fc32..0d3b5a6c 100644 --- a/pipeline-runner/man/diet_scdata.Rd +++ b/pipeline-runner/man/diet_scdata.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gem2s-X-subset_experiment.R +% Please edit documentation in R/subset-1-subset_seurat.R \name{diet_scdata} \alias{diet_scdata} \title{Remove all unnecessary data from the parent seurat object} From 2ebeead7c23a5c2a9cbd194fdc69f2bfe98d3b33 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 16 Dec 2022 09:50:58 -0300 Subject: [PATCH 07/34] update sysdata --- pipeline-runner/R/init-functions.R | 6 +++--- pipeline-runner/R/sysdata.rda | Bin 3123 -> 3125 bytes pipeline-runner/man/call_subset_seurat.Rd | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pipeline-runner/R/init-functions.R b/pipeline-runner/R/init-functions.R index 86ee1cb6..ed79bd8f 100644 --- a/pipeline-runner/R/init-functions.R +++ b/pipeline-runner/R/init-functions.R @@ -264,7 +264,7 @@ call_gem2s <- function(task_name, input, pipeline_config) { #' #' @return character message id #' -call_subset_seurat <- function(task_name, input, pipeline_config) { +call_subset <- function(task_name, input, pipeline_config) { experiment_id <- input$experimentId # remove when it's added to the input input$subset_experiment <- TRUE @@ -486,8 +486,8 @@ wrapper <- function(input, pipeline_config) { message_id <- call_qc(task_name, input, pipeline_config) } else if (process_name == "gem2s") { message_id <- call_gem2s(task_name, input, pipeline_config) - } else if (process_name == "subsetGem2s") { - message_id <- call_subset_gem2s(task_name, input, pipeline_config) + } else if (process_name == "subset") { + message_id <- call_subset(task_name, input, pipeline_config) } else { stop("Process name not recognized.") } diff --git a/pipeline-runner/R/sysdata.rda b/pipeline-runner/R/sysdata.rda index 58e71f4714dfaaad1c80ce19cd060f1c220a1bd5..1a20befed27612d8290a1b9312994767a950807d 100644 GIT binary patch delta 3124 zcmV-449oMg7_}G00u)w zjWPfY00w{n05lI!&;XPnH5oFVnrcr|#6o&$h%^sTG{Ryr1^@w|0Aw1PU^NW%lhcdI z&Efx#H`et3Z zj|8l9oPmxi97wQ);(>Hfx$?p+k`wple{f1dwlO3wes5!{-Rk&m&4|R$zXcs6ugma6 zvG&aJHtHTvEif5`n)ftXY72rW#vq|eQ#G~57shPIv>CM?*n4vnhB3G|Cg{gEW`FR= zH>UW>H|9~6a1UhPJ7)pHaT!kG#(PJ$avVn~?i|NZ=QA13>E1olIqw<{gP7<&^S*N( z^WZ)c;xiu^@*V@_G#U?>`Ol#GPYK~XMv0_)4@v4hXL!>;qu6-OFYM4ee&2V$;qm!< zbaZ}tdS1Ox@~`%z6rW1-rMmoBkAGqyfu{Kra7zS28q5g5C~MtoUNwPjN_^=PWmgm9 zZvw4Jp&=z%f=M<2gkK>uFzaq==Nk^*4BYwL;9C)p<})HDtyc;|;m1T%Ax)nnw0?9n>3A~!OVN)<*ZoAevVgs|@w6O|B+z{Pgd zpcdrn*7;0FKrRDy5DlbwsZK7j_`v{?`H`c;2`-Uh`{~U@2nrFk5?c9toFi)R`e?js z*K)U(D$wXm>lIjqa%6-iM}H|vk!2SWHA5(IG%$lyiCIuGH-l=!Bo<{_!`5^c7-T>S zAmZsFl4L|*3RP^O7?&=XvqmuP`5_*3NrD?7tm2JOW@$vCGQChFgP<&n0XYLQv;v7S zlS4wgImrr{JPMo2v$awcD5PrZt1y7Egc{a_09!&yCNKd55CDqkNPikoNE&U3X%G+q z!Yo7(n36~cv?jy|gF+CIajCH&{jnsFQ!Wk+7ywADYBfVA#QNY_~tztWwHyR0%Vq=i4$@I zIsnm*IAMnv3IieyE*!FLhaq))xbB@Y)00~$6`C7 zVn+ejHE`6;HNE5O;v8lSw{*r)hA<8RUp8B;E@qHrc&2i6N!E`hlP0{&5X4Z`R*I~}2(l%Skj?pg zn5!gED=Ji$OJSoV=4CK(wO9rU#nd?t6$xx=&QY|ESAQpSaqF5@ONB;EaRK)kmnA4N zJ29?zO1-L!H5PPZB9Nxf*iMj`e?;dUQkTKAk3G;D^(dvwzbt&!vn)2BD6uRpzuN>5b-H| zXlqLo`6)&@126ubu ze8RVLGoh2xhL1>CiaD#LO$)NAT3MATY1!Tpm46xRs%Esq3(oMA3Tj1aj|%87If^w3 zgk0l3eSBBXF-uzR=>V<_tc=dMaJpWYr2(ddwzw#R47w1?6iPg?;ERmHD*-G7!!A%% znHM!+Mp9ZvZxmn)w?%}iSW}`1Fd`fhA#kQ8+AAPVDqPSqPRbNiHHcd2gcPEsm1eb9 zEPpPj$g0rOiBA9}#t1SaFFw5LYJQvzqj+r2gS_sk;^MAwhayNRS8+Lxq_U{YpdU4P zka!c#!l3MK3b^(;)(d1Bd4@D;=7rXbM6&mG-r~kuOArYIc~Fq6Di|#lL#s1IEGsA~ zgDR=|Ik2>ACz|UPf|U(k^(J4TuufwO`F~>TwVj$pIbk-7Lh?(SL|gi_S_kYvfG^Yl zL{Ao((iu#bVUD#FGqynS3gAG9fh;59)mC<#%;wx_w7pEFBT8=5>{Bf|Vw|8@Rh0c^ z!qijTl-#C6brw4APPC63Ffg&0B8&$PgU%g@ut&|9PY#Q0rQVpJF1;;Bp=vrUA%AtZ zCVTCblt$nI5hX^V4`Ov{6*~JUuBRL1t}?`lY0wo$gpjFCxxmYfg1%E%v~nt8UAbYo zHF0ZE?F;zOc}0mMRk8|bM5DJh<}npYOjEHnyX$f`b#|3Mo&5?kOOUu4+G1&?Pzsz< zIHIe5^Uo}KuxSwx3ovO34Ymo5fPXP5(<>4IB?&wWSR~6OQ!n;2>fX>~R$nLC#+nB+ ztD;vfW^Jce0kha>GzC#bbp-hnNei!aiOi^80TCl+BPpL@(;13FRbzr-vvQ5D>C#d( z77DV)RGgJ;OAD8GCoN&5+Fb19XCy7~u@;KpOT_UV66I0HbUJio*sVxoxPMh=8)3_Q zCupXt(=7y68!<~{Ay5!_uo*_8#WF?C9zqo<l)=(wC_NDN)cr4?Wxt`urDDJ^`de&wmT5M*m<1|VC#UwLjE zK8+mqDLT*?fdvzux!7Jo=p$5X6l;^^JDT_ZI*sf7W9TMSh7O12TX0@?%Cd{s28 zs^hW{7&>p|haqAr!A)8#SS?{J;-$6_Y$NRD8M866aIqX*OThd=u%MVfl{ggd z<_{a04t9vN0Ksscg3gDr1famAc4EAa^Ko2eW~RVtv>yS{5Peu<>3_65`_SQD%M@zw zSi!6}CZD1q$+~?^TNH(bB_TKziV2?^K;xiRP6yJE2{PBYfjc40G)(O@#v>z?j~@W2 zdrplhLMsQWenH~~-Z}O{o8mzEzr`gis~iVq)7nTPo!OL%nL&|?c5`LuR~-zhBae`- zfwT}LeiGWEW95^ZW`C8$jXesgTbxY7nzzLf$Bc|Uf+U2X*mpYzYTS?Rp!s4l5fqbC z*R9!Vj;59BN-sfAV!o!McBX;B`)h_mF(nXNy-4C)(87||`2HY=3Znun=7bNO^zE+e zYRtRH*`rpQbbfKUFfAZg9NCGGa%Sb;v~B_v7xHY$Ao}gXDt|IV128xkQZt(n#l$Zf zl&C@2gzu%NF8^%QMk*mKCFQ(@BMV$-Yc5c|YEc%^VA?FbLUJ>t@e#|Ym>Nl64w2U? zHO4i_HY^DVnu=14xX)v7+a_vIU7mAPrlc^KDK?zLe)!hh5y40clu&BN;bUVoNd}?+Oi{SE7rihD1fZkBH~$4 z@?C(GxwtS*>soQd1i{An(Oj2?mY5L*Ypm9c3PE{6zJU=FBZ9-e#PvoP?W)SW5H|TN zaWz(;)KD9@tH>Cl8Gvdj)=z!i-DZ2@2N^WQ0HQ_!;~hZ6Yi+f)t)OjWI3Jf>2LS^K O_ZM@Erogj delta 3122 zcmV-249)Yk7_%4-LRx4!F+o`-Q&}>=HW85yB7gYCYklbYUwmBjeF_`u8cloMPrGdP z?(r=(v;-2SiK3oODu0yP0X(XIrh=Z*1w1L?q{=-{Ow}MQ1u3Y15E~i0000013&;ssMS11gq~5S>Q7K;0004@ zpaz3Ss0X7+&}h@j84N=p03?V_MofXJlLYtP*(jHLLLqGr;03M(K003w-{V4T1 zoqUd$_`9cOn!y+_ZLy4FZLk|{4Wu^8jDKZhh?eChL?WmKfP^0b5D;M<(m=5cBoB<4 zG9axX2q4@U9sJxyV*!+4$NJ_kkrRGCcl~K0WEe>c(e}9gc5Le*p!wf_aTOO?Sm3>&b$x;??KLTp$QGIwavqMb?R zLPAQb1d?n42+|^$#y7Yp&@?&e&()#cg|QhHb2=hVUD6T&@9E0cw{5?BTHQYL@%C$6 zYcD?8nVUb3bLO^$swLo&Q9I-G8?mq2Q}c zw4>p2CcQ@99Gy;BE-JD2so2}rZM3JQmg1IzGiB2o$w|e$WVz``*b%SD(7-$sy{a=A z&$|tX*+>q=j~Q2gn;am3Nc_m;^g@?RxsF=rBoHW$B&DmY>NiIdZuW_|Td(D9H0<+) zO)Wb#=Fm)#gvjM7GAyFvMt`Vf4km^WYLP1{21f90X}XzrX0+nesJO>j08$(MG{l(^ zJL6k63=@+E7EbfZaS^RG6sn*h^@XWAy@8>vRa3NC1t91PBEU{S%&h>TOk~i|uFi5o zrcVN<@~qftdS@e5YWujjL>k!$0LX-rOfUd}hyX=YBn>E%2Ag6UM1Mj67)HcIgv63S zM$nrOA|`|(B8H}7LHlA!AdsMOFkk@!wM~}LY}jA#BeO`Q$1oHw9svyv)QpRVAjQ)9 z-Os0c95l*};&PN>K>5-_1aZhpQWBt~5jzRs^>;ztkFcW%r06D%S)R^?V5by=2s=F9 z98}cN(43`+;@#oi;D6v|;EC^N2fFO%qF&dFpP3m$DA^Iqs$N~>*zJsNoOJDNjUyD2 zMBObxypA!%YGqB~tFS0->Calp5rGWI48%3gIpZwndbzMd=>lZ1s|gcw1Hu9G;akpf zCQNaAL&8GFq}G?SPItN%GJ-2bawmeJt%8#mW(|%5#C$5JeSdT?5P)q=yj6y6NnF)1 zLRp88Ucc?YKK#Jeju^&s)399JU+@^*cG>Q3$ysn0M&V z9=X*uV~sw&LJbVIbk8uyDT9b;&zkK?2V`$*z3(=dF+v^lz%Ih>ChHGol^H0+(zrw5 z`Sas9oy@IgcYhLMI7>%7h#?c__#N7AiE+rWn+rp(o5cRiCKbgrQq#@^KM zbvlN2SQ6^_Y0XL*P#mzcfZ?3O2qFU_Un@Km`y;H{*nilIXO)%1Cs}1E*mrGe)N4bz zwun}YESiikgyuy}juARS#~X$f!sWSnD0~n&-xqkv2C72p({{c|4iYjATO7cx`~xB< z5sYJY`QN~BbrV)8wyT!ILzfi+q8yY2A`2TC*A5yj8nHlTqFzRMtrlHCYRjxS7{$vV zbn-pOhJSB2xfZiS7l;dOXukU0m{O3Mg}D(*wzbq1TrPNIL{^A3v>pgVLLMbB4Gn2! zRSK}UL3B8$Vl5uRS!)i&gjcNOA+Ib;+!+}b-C$(o*?$x*6B|(MRKU%A8Qt(E0}9>D z&W2A)8a*LmDCVkCFe-*(aYQnTJr!7x$rgrU=YJK~Q`E%(V1>Cx12@!apsQh?J!TU-=DhFu6{3MC#`?u*RAD*-G7!!A%%nHM!+ zMp9ZvZwy=yiHI&GQ%aj4gu|nZ8f0qItbsVOgFxz*V8bBz5Z2Q)D^^%DQLNBhU6ZC~ zn}5=BrXWj%5M)Mf-rRNFjet85v^j7=-S;%{@y9Sjf+kQW%Y)gvp!CqaBWG*mTvbAt^B2=xd4zhnGzYPMvKNNdbK>&#&!l^c4j;{ zKMT&%y0W==Ffej)uB)Ccv&cg2t_QiDZ+~25a^0&vfyq~8*q+Bk_5=7(pjYkyh@LGo zq)ALJ$1Q3q@MHtUajbz60$4}OxmFB07=(khNXpV)rY8RpO~2DTWUd6WXP3P&8Z zZ{B!J-1xAFhy|E5gofJ$#=sbq(ti~Q0FsIhm3>h$eokugP7GW&8f`O`^CyDB4-0Ij z4XVMeAeFt2T zRgkU`t1RG}vK7U^4dT8g3R z3Wh$+S)$5Zj=)t{U5kXYnwcTp2|p1VyXMNA?4UHr7}j8vDW#hfB6b!P2b9N42X#?N zzohEWPq7IZ3ooqB-NToL3d3WoR6<2$O;I-~=>l-Nc=sbB!~*RV@u8q^+ym6&EYR zicpHjy^2Be9`3E}kgEWdAUV1uhcd@O>vp!1q<5Z~NU4+=Sf{TWFT}do8C*wTB3=Qg zAV~y7?3U)Dv!3+8p?~YKnT$1yS%VjHSv|?n%r1A}$aHI?*j~1cORBLG0 z0m!f*Qf?H)(&auzW3n0PL3SC9QyP%MVx-t}jr-cRVvY(xT*8A^Ukr?CqCiH@oK-AD z;X^|OvTz(ICVzJn@tm7UP&2cUGbsfkfxtTgF3MV;7A%O_Bz4FasSp*Mf?hS14<*^b1=Eb$avSscd2pcTP;|S`SO@_li2udh MkxmpO46qG^Kmc&2H~;_u diff --git a/pipeline-runner/man/call_subset_seurat.Rd b/pipeline-runner/man/call_subset_seurat.Rd index 3a194eb5..02ca6a98 100644 --- a/pipeline-runner/man/call_subset_seurat.Rd +++ b/pipeline-runner/man/call_subset_seurat.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/init-functions.R -\name{call_subset_seurat} -\alias{call_subset_seurat} +\name{call_subset} +\alias{call_subset} \title{Call subset seurat} \usage{ -call_subset_seurat(task_name, input, pipeline_config) +call_subset(task_name, input, pipeline_config) } \arguments{ \item{task_name}{character name of the step} From 34b8f9a7d8ff60b780b86dc47fa212aa49e8764c Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 16 Dec 2022 09:53:45 -0300 Subject: [PATCH 08/34] fix sysdata --- pipeline-runner/R/sysdata.rda | Bin 3125 -> 3169 bytes pipeline-runner/data-raw/sysdata.R | 1 + 2 files changed, 1 insertion(+) diff --git a/pipeline-runner/R/sysdata.rda b/pipeline-runner/R/sysdata.rda index 1a20befed27612d8290a1b9312994767a950807d..c750a90d5513b4f53e06689742df642eb0dbfa12 100644 GIT binary patch literal 3169 zcmV-n44(5sT4*^jL0KkKSycq{E&vmA|MLI;|NsC0|NsC0|NsC0|L{OS03ZM$5C9+r z;0zx5G8cT%d&h3kB8R@qZBr}HVfT8jyhxYZb?wRtQ$)}bQ~eDQn3FUnX#qBx8VRC$ zYIsQX4=4=)00006fEoxOX_HeXjU=C{WM+|*!&LP>Lp3z?fEoY*0AgqW0MO78Q}Qr{ zC#dy3Q`9tIjRu-z3?n8G8X7biG+<3M3?m_8mHtFLMfD!@;0ZcndDQ$RP`~b zqerO8pfgiJpa1{>00HU%0!2YhsLfMNsiX8L(9jwH01W^DJwR+2SEYG5iu1`W0`jBU1o zw$R!`VVg27DJCFYn2`iPA`2MzyBPowSUJfcSq2gZT1b%qR}Xv8UNSqmI7K5M%rtB+ zQ!q%9dOi>K85p)h5<>CUv5?(Kc5eq=3@7I40*7f8*sZruI}-eH&SsqbNl-1U5uAggb`Vyr$up-8YT%Ilyq7M;YBY4)NbSla%lr$CPuO z2T9a;Pb~8ebFA}EIM+P$u`^s#r3QN$Na@tjj%zw5(P=$Gprw z25FjnOC_V^XCE`@WtPnIK+G*@mJF>guiECGrj@HVbM0Hz`_cSSFJFGJ@rkF!dhwu= zL2f9{3uXjpQR_eo1Pm!dLXiZ6p%k7di5i~jl3bb!6XXyif>tKbg$h3rZ=GiD{oc`` z%};M-hk2I7WPOKXB5Bxks6ri8rfXZg-@UDE-cvSjq{XS~Qx#R0mU4|ZB2WvaVuOcK zHxA9GEjFyf)sG%wtK2PVuPi;v?srvo`+VJ=zI|5c=H9WMx?ab|W~68l{AmCp z_?U1sG=L%~%Zs@iG~?D}un=z=Dsl6WWXaueqv!S^0V<=KwaJ+~UAR2fxcH9PNeB_h zgpj04W+OQoA%YNb2tAq;!ZSolHfDNsDhhFEFoUzp!-_TQDugYjnvV~6!QRP{jL6q< zRuhXWv%RyipM7q|?0Ku1Z#|b1^Ff){?@M~+((x+W3%938uR`Q`#hF{YI=u3SM?U(k z5f~wv0l0?A*L-cf{fga$GLR=lieaHSAUq%+ek(IN6Qdno@bHkiqH5HlF@z}T1ZJtS zMX;#f0cge9gH(uQ4=u3PSpb9sVc_AcGr3+vAm$2h&oob>U%t98QAcYz*@p}(vdaZD z`VsF_yK70-Y}>!7fT4vea~q6tDO*~!aS)+9_Gk}V?A==HZ`afy$KZrh z%UeL~j&Dz#%SbU|9rwU4!!9M(Cc{+uGi$@$_ioI*2-I|OoI08e?66C$?a#W_I#?aCcLBpWh7d#sM84N} zDfkaFk1s4=ewSGcoo%*}%I@5iLy{fM`p~Z!x@$1RCpr}@aT91->yW~+lHIM8J`g(R zS9pR2p%7M-xs+7a*4I}<8%u)oG>1$;PII1qVdJ)S(;FxX%9HV4fGboYaH22)-20*@}^!XIvysPHvsbBY@O)I}|u@=D#4`v##g5 zcv{Z;L`8=+s$JeDsiSzcB1olZK5>hVGfF~eDhyc|j+y5bn2rfErh7cro1hGoj*4E) z!ddkcV_I8Q%9)>Ei7ahQc5wyw^d?Ho8+_g=AqH^Pnf z5Ja2V(YvaH)(Gkvk|2q&K>(0oq{SFORmOnyGB~xJlTZ~^MoP?7HXMl#jepbyVE7@g&nQ(g!!?@i1;$xAW_f!~medVBupsC}cxmrw+il=)4MS#>99|5! zx2I@Btt%-m{k9npgM_fS)wgW+}$`# z+Nm*H1vb?WPuh{gx@jC2XFmIsxmNWSmA#-xeHJ|Q@L5t#VpJS0DQrN1xe7_(dR4;> z1#WKH*UFz{L^6H5lB6z4#{wDf&YE$;#>R?31Von_kg2KBxLr;+#&=S6S2bp}7aPE= z83G~%+6?GH#N}If0Fs{$0nBku`b}(2K+ZXiS?{jCl#2|EI2Oa-X7bLR(%{_wpQJulFmvcGmi8Oi z-5xe5GSk*Q!CeL)%8BH zIL*z29L3;8q8KG|(Q8MgH7K6vS`3kHAjAu}ba@u(&;0tmmt>6A(p|L&26uN9VJ0J2 zhPGp~qs9DjUoa9AWlV^ngPSZ0ES6S|_U%0<3o$;wqL>*)sMxs!u8vr&*Q1?i7KD)oExG0fxbdW-hB8SYm>1ruR&mBSAsPk5 zERlKj5Rl6XOz>`X7_n4~fL>ez{8U;-WmG0p^(au$)h@opnl63lb@*}?)UqLVS$Zd7-yMp1?5l=DPY~Q&kK&1BVhWAa9s>Y^OFHzw!yJ)$DCcV}Mex?zhKb3S+#HGr zFA%*!sSwy1>ZmFx%gvE6+RIx4ixrAam?u4Ru5F~MN}3?2h^hWeg!GxIv}l+~l=PmH z(teSs)6*K7k16I+AEh)610Vol4AcP7f-(TmMk+i_DdI-d^*=_bvW+wu01r?N00000 z004@gfFw;cr>GvG>KXt6>Hq)$0000000003Q_vtp6+KNqB-F(9Jx^(pnm<%%Xfyx- z00E!?007Vc5-3r%ObU99JWU~>00u)wjWPfY00w{n05lI!&;XPnH5oFVnrcr|#6o&$ zh%^sTG{Ryr1^@w|0Aw1PU^NW%lhcdI&Efx#H`et{V6ZhtSa7se9 zF(fX2Z)2+6>iBNWh{Vso1sx=>%kV_8_RR7&>K;!mFd2lJ_cU5+3xX)dAfZZAHMPbU z#%#y58MPkRdvg?qF}OD-=*Kr^@W?l&_{lfsQI>EIWZyey0m5+^PT|ITN49btM=9GJ`>_I9~tr<1LZUt519GSp!!b<;XFo(qOE(8(?6rwc+D^D&^vzLcfaBB`FnJ9etLRdy-)J5_M;S^O7o?<{8*1-Ac3a&6L3oe zLK@5nz$k0oYhE>hZAyIU6J=Ku<8K13NuePnS%OJ60EAy5GcfCJYUdjc-VEIN+~8Xg zkmfTYCaqTrL*d6#%Wb!`-{{+I)b1v?8DxUO=S|}?w}>|0o8(XzH-Zp1c*Uw3^GtAc znTGv%H8D}~q_L^ZP~=VC(Ied4w+p23RifHa@?43pNypma>T%Ax)nnw0?9n>3A~!OV zN)<*ZoAevVgs|@w6O|B+z{Pgdpcdrn*7;0FKrRDy5DlbwsZK7j_`v{?`H`c;2`-Uh z`{~U@2nrFk5?c9toFi)R`e?js*K)U(D$wXm>lIjqa%6-iM=42>Wfu}PLnv`HFoRTy zSx_=JgKEPh7G+w))^rybWIzfa;^`uiWJF&IRcxUcmoAvIMlkOAAs%!|f*T;L;*C&d zX+)wjy-*~Bpe&04IRi4Z0*Nt`LqfYb$qJb~3Y*HawNe!*q-yJ{Fo3ay8rFmWTS7@D zFaZM)0E*~H8c;|YZHQ?Q5CFm~L=c#gNC>nh#0Y~z5Rq}Iu^|1iB#=`s4h$FoNUUl# zLuQ)B$~UH6$1SN?IHRyH=J*y3QA}d`Q-daq4#e;bZW5%wS9)~%(MceT6 za-(Q%8(=u}#mBsxUA3|5$5%&4)G$UdIwhn>vBo$}RVS!x>&gyhoYbU{ix8@SWg%{G z%=qR#8fCHzq5@==qKOl713CcNtxLp6nJK}l?C64~gwk|JdszhxoQu+nawlY=t%6BQ zDuu$~YczXjLD76z_~aYCh{knfs$(x=qH9n(iA4*$a_!#hGP3>m*IAMnv3IieyE*!F zLhaq))xbB@Y)00~$6`C7Vn+ejHE`6;HNE5O;v8lSw{*r)hA<8RUp8B;DpZzBVWTAGWiWBISOyBk)Hx0n32bW4QM8U%Cv$P@ znpI1MMoe)5_ZgQZC^9=Su6Ihks*5!ibYmirrq9?;keK6uQCr|wez$<$vc?W#1*}qHUe=7LKp8IZS#@TOJJtzuUkzHD^MCC$v{FNvbjfGSg5pW z#Q~a$aT(^cS#<%c3dE#CO~rA0RS=sSRT+vApj>h}6dE*cS0K!n!z)!8O18DtRl@_r zA|kXwt)TEiA`tN@d}wP+DyUV3!V96rI}vF163bY2BqF_MAq{b2T`&elMYt>sc3qVsb2Fin(uR*nSc*BTrA-U6sajc;DQVf>5tSM2s%Esq3(oMA z3Tj1aj|%87If^w3gk0l3eSBBXF-uzR=>V<_tc=dMaJpWYr2(ddwzw#R47w1?6iPg? z;ERmHD*-G7!!A%%nHM!+Mp9ZvZxmn)w?%}iSW}`1Fd`fhA#kQ8+AAPVDqPSqPRbNi zHHcd2gcPEsm1eb9EH0?Xs?gJkPXHyx2r?rtKD_E`ew+=Xcx=vtyzZ&u;;wLqB1kD$ zaXF5pvZ&0UA2oT9coWUSpzLl6xb`{L3uGF3hBRsBh1QHjviEl0;>KD_5D5c$P>`!C z7%de;t20F`D<~?1DyjN8u(WF@n(G#Vl?`6?CSRejPGbxCV(Yb?nngKbHj6^?OPfSn z`m|aH>_C7o)Br?J7Mao+OqXGfwG=b9K=BITK!|}XBjVLocAU)S+-bDEOr;}AZqw{j zEjnVHpjcIu{bs_{Q{0r?rbBfWI_^%ij~g&Bv6v!^2M&YI9f+_;&6rOPi)^Ldn4m7b zEk~hhIxQh}w5O}W6!je@>Y zSF~~}VO_alxixWXQSA%((0N6PBUQ2rXhfs8Hs&!EN=#F+HM{F_Hg$HDKb`#wGfR-T z8rouMrBDi-QaGZke)G>Pd9Y~_5DPGA2@SRhjes#J(<>4IB?&wWSR~6OQ!n;2>fX>~ zR$nLC#+nB+tD;vfW^Jce0kha>GzC#bbp-hnNei!aiOi^80TCl+BPpL@(;13FRbzr- zvvQ5D>C#d(77DV)RGgJ;OAD8GCoN&5+Fb19XCy7~u@;KpOT_UV66I0HbUJio*sVxo zxK(ExVat3cXr`;vEd*8@F-v42P!M>q8AQIEeOs;UKq1jFbxNV5r=rnVqX1Ya&afFC z$$YCC_aR4MW?p@|?=mKLAB0E-5`$CdxSVE43|+~k6<{E)6lygoEqtkd<*B(4WNT># zAX~j(d2Sp&jU4vmgxAwt$_$>fn`$o>NL9yEtY_lr-MC#NH9Dz<0fSo%RP{=>5xD}| z1J-<1G_0!QvJe#}Vk*H+S}RyBVJzaMwh(M1?ByA=F|u&699&Dm2TLlgPI<7P zm_L;`6z}E_8<`Gvh_nE~aGrwBhp_~pz@&C!ypHp6TxMpbz-hD}0nreBSYzq5Jp0h$ zUdt3}?^waCHzuE=A<4RZOj{I%g(V?46p9I-8$jcrR!#@fkO?x^xq&+&%rs2xG{z$% zl#d?(sC!P0DMBj;tA0V_2HrXLLYv}1`M<>_EUO#`Wz*V7BAwZkikU%?igt5l=vN&K zsw0n(u7R`=Bz_XwqGRQgn`V{7jXesgTbxY7nzzLf$Bc|Uf+U2X*mpYzYTS?Rp!s4l z5fqbC*R9!Vj;59BN-sfAV!o!McBX;B`)h_mF(nXNy-4C)(87||`2HY=3Znun=7bNO z^zE+eYRtRH*`rpQbbfKUFfAZg9NCGGa%Sb;v~B_v7xHY$Ao}gXDl$U@FgO@eGn){_ z#4j3@s6p6-@1>_M|7_GoDj_W;<-CO>3tVSwE>OK{Q5Ml)+AO_7axDq%fE%Hk`wL_}1JJ!AJ{~P-@5FV`DT) z1$K0+R8@(BhD{c;faIW?)TfBq&`N=wn-X;(q)<2qm@9TMRQlSoBWo+xz`ZDdtk@#r zSy1v_fRwqoFiq=Pal{0{#`)1)mxh*@5d~|k){F{4c|pE`5fdYV!@k7zMi}j?%DfOZ z`7CiYR-n{S8@H>-7@`?~YAM!Fecjz=d*TNfG{pd-MgZeL#A|J}wXL9SWH=v}Tn7OI P3HKLrML1B9a}m^!3Z9E* diff --git a/pipeline-runner/data-raw/sysdata.R b/pipeline-runner/data-raw/sysdata.R index 51bc8ae7..9c6e8f50 100644 --- a/pipeline-runner/data-raw/sysdata.R +++ b/pipeline-runner/data-raw/sysdata.R @@ -90,6 +90,7 @@ usethis::use_data( debug_timestamp, bucket_list, gem2s, + SUBSET_SEURAT_TASK_LIST, GEM2S_TASK_LIST, QC_TASK_LIST, INPUT_DIR, From 20a3e5b876b80c5075549cc11a57abfa230980b9 Mon Sep 17 00:00:00 2001 From: cosa65 Date: Fri, 16 Dec 2022 10:17:35 -0300 Subject: [PATCH 09/34] Some small fixes --- pipeline-runner/R/handle_data.R | 1 + pipeline-runner/R/init-functions.R | 6 +++--- pipeline-runner/R/subset-1-subset_seurat.R | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pipeline-runner/R/handle_data.R b/pipeline-runner/R/handle_data.R index 115c6c4e..18398920 100644 --- a/pipeline-runner/R/handle_data.R +++ b/pipeline-runner/R/handle_data.R @@ -27,6 +27,7 @@ upload_cells_id <- function(pipeline_config, object_key, cells_id) { load_processed_scdata <- function (s3, pipeline_config, experiment_id) { bucket <- pipeline_config$processed_bucket + message("Loading processed scdata") message(bucket) message(paste(experiment_id, "r.rds", sep = "/")) diff --git a/pipeline-runner/R/init-functions.R b/pipeline-runner/R/init-functions.R index ed79bd8f..56719f26 100644 --- a/pipeline-runner/R/init-functions.R +++ b/pipeline-runner/R/init-functions.R @@ -204,7 +204,7 @@ run_qc_step <- function(scdata, config, tasks, task_name, cells_id, sample_id, d #' #' @return list of task results #' -run_gem2s_step <- function(prev_out, input, pipeline_config, tasks, task_name) { +run_pipeline_step <- function(prev_out, input, pipeline_config, tasks, task_name) { if (!task_name %in% names(tasks)) { stop("Invalid task name given: ", task_name) } @@ -242,7 +242,7 @@ call_gem2s <- function(task_name, input, pipeline_config) { check_input(input) tasks <- lapply(GEM2S_TASK_LIST, get) - c(data, task_out) %<-% run_gem2s_step(prev_out, input, pipeline_config, tasks, task_name) + c(data, task_out) %<-% run_pipeline_step(prev_out, input, pipeline_config, tasks, task_name) assign("prev_out", task_out, pos = ".GlobalEnv") message_id <- send_gem2s_update_to_api(pipeline_config, experiment_id, task_name, data, input) @@ -277,7 +277,7 @@ call_subset <- function(task_name, input, pipeline_config) { check_input(input) tasks <- lapply(SUBSET_SEURAT_TASK_LIST, get) - c(data, task_out) %<-% run_gem2s_step(prev_out, input, pipeline_config, tasks, task_name) + c(data, task_out) %<-% run_pipeline_step(prev_out, input, pipeline_config, tasks, task_name) assign("prev_out", task_out, pos = ".GlobalEnv") message_id <- send_gem2s_update_to_api(pipeline_config, experiment_id, task_name, data, input) diff --git a/pipeline-runner/R/subset-1-subset_seurat.R b/pipeline-runner/R/subset-1-subset_seurat.R index 81b84169..deadf5e8 100644 --- a/pipeline-runner/R/subset-1-subset_seurat.R +++ b/pipeline-runner/R/subset-1-subset_seurat.R @@ -10,12 +10,12 @@ #' - cellSetKeys character vector of cellset keys to subset #' - experimentName character #' @param pipeline_config list +#' @param prev_out list, ignored because this is the first step in the subset pipeline #' #' @return list containing scdata_list, annotations and sample_id_map #' @export #' -create_subset_experiment <- function(input, pipeline_config) { - +create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { # load parent processed scdata and cellsets s3 <- paws::s3(config = pipeline_config$aws_config) parent_scdata <- load_processed_scdata(s3, pipeline_config, input$parentExperimentId) From e04502736f68a74d07cc854d1b657183c15a8995 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 16 Dec 2022 14:06:26 -0300 Subject: [PATCH 10/34] add config, format --- pipeline-runner/R/subset-1-subset_seurat.R | 47 +++++++++++++++------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/pipeline-runner/R/subset-1-subset_seurat.R b/pipeline-runner/R/subset-1-subset_seurat.R index 81b84169..a3821e52 100644 --- a/pipeline-runner/R/subset-1-subset_seurat.R +++ b/pipeline-runner/R/subset-1-subset_seurat.R @@ -14,8 +14,7 @@ #' @return list containing scdata_list, annotations and sample_id_map #' @export #' -create_subset_experiment <- function(input, pipeline_config) { - +create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { # load parent processed scdata and cellsets s3 <- paws::s3(config = pipeline_config$aws_config) parent_scdata <- load_processed_scdata(s3, pipeline_config, input$parentExperimentId) @@ -26,7 +25,7 @@ create_subset_experiment <- function(input, pipeline_config) { # subset seurat object, remove unnecesary data scdata <- subset_ids(parent_scdata, cell_ids_to_keep) scdata <- diet_scdata(scdata) - scdata@misc$experimentId <- input$subsetExperimentId + scdata@misc$experimentId <- input$experimentId # delete parent_scdata to free memory rm(parent_scdata) @@ -39,12 +38,24 @@ create_subset_experiment <- function(input, pipeline_config) { # split by sample scdata_list <- Seurat::SplitObject(scdata, split.by = "samples") + # TODO: remove from here and refactor all pipeline. + config <- list( + name = input$experimentName, + samples = input$sampleIds, + organism = input$organism, + input = list(type = input$input$type), + sampleOptions = input$sampleOptions + ) + # structure step output res <- list( data = list(), - output = list(scdata_list = scdata_list, - annot = scdata@misc$gene_annotations, - sample_id_map = sample_id_map) + output = list( + scdata_list = scdata_list, + annot = scdata@misc$gene_annotations, + sample_id_map = sample_id_map, + config = config + ) ) message("\nSubsetting of Seurat object step complete.") @@ -65,9 +76,11 @@ create_subset_experiment <- function(input, pipeline_config) { #' @export #' create_sample_id_map <- function(parent_sample_id) { - subset_sample_id <- uuid::UUIDgenerate(n = length(parent_sample_id)) - sample_id_map <-data.table::data.table(parent_sample_id = parent_sample_id, - subset_sample_id = subset_sample_id) + subset_sample_id <- uuid::UUIDgenerate(n = length(parent_sample_id)) + sample_id_map <- data.table::data.table( + parent_sample_id = parent_sample_id, + subset_sample_id = subset_sample_id + ) return(sample_id_map) } @@ -99,13 +112,17 @@ add_new_sample_ids <- function(scdata, sample_id_map) { #' @export #' diet_scdata <- function(scdata) { - lean_scdata <- Seurat::CreateSeuratObject(counts = scdata@assays$RNA@counts, - meta.data = scdata@meta.data, - min.cells = 0, - min.features = 0) + lean_scdata <- Seurat::CreateSeuratObject( + counts = scdata@assays$RNA@counts, + meta.data = scdata@meta.data, + min.cells = 0, + min.features = 0 + ) - lean_scdata@misc <- list(gene_annotations = scdata@misc$gene_annotations, - parent_experimentId = scdata@misc$experimentId) + lean_scdata@misc <- list( + gene_annotations = scdata@misc$gene_annotations, + parent_experimentId = scdata@misc$experimentId + ) return(lean_scdata) } From fc968011eefd728533e582659b1edc2fe0e84aaa Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Wed, 21 Dec 2022 14:08:08 -0300 Subject: [PATCH 11/34] make everything work --- pipeline-runner/R/gem2s-6-prepare_experiment.R | 2 +- pipeline-runner/R/gem2s-7-upload_to_aws.R | 6 +++++- pipeline-runner/R/init-functions.R | 10 +++++++++- pipeline-runner/R/subset-1-subset_seurat.R | 9 ++++----- 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/pipeline-runner/R/gem2s-6-prepare_experiment.R b/pipeline-runner/R/gem2s-6-prepare_experiment.R index 0d9b13b4..72e58a38 100644 --- a/pipeline-runner/R/gem2s-6-prepare_experiment.R +++ b/pipeline-runner/R/gem2s-6-prepare_experiment.R @@ -14,7 +14,7 @@ prepare_experiment <- function(input, pipeline_config, prev_out) { message("Preparing experiment ...") - check_names <- c("config", "counts_list", "annot", "doublet_scores", "scdata_list", "disable_qc_filters") + check_names <- c("config", "edrops", "annot", "scdata_list", "disable_qc_filters") check_prev_out(prev_out, check_names) scdata_list <- prev_out$scdata_list diff --git a/pipeline-runner/R/gem2s-7-upload_to_aws.R b/pipeline-runner/R/gem2s-7-upload_to_aws.R index 913fcc86..fccc8cd7 100644 --- a/pipeline-runner/R/gem2s-7-upload_to_aws.R +++ b/pipeline-runner/R/gem2s-7-upload_to_aws.R @@ -10,7 +10,7 @@ #' upload_to_aws <- function(input, pipeline_config, prev_out) { message("Uploading to AWS ...") - check_names <- c("config", "counts_list", "annot", "doublet_scores", "scdata_list", "qc_config", "disable_qc_filters") + check_names <- c("config", "scdata_list", "qc_config", "disable_qc_filters") check_prev_out(prev_out, check_names) experiment_id <- input$experimentId @@ -21,6 +21,10 @@ upload_to_aws <- function(input, pipeline_config, prev_out) { config <- prev_out$config qc_config <- prev_out$qc_config disable_qc_filters <- prev_out$disable_qc_filters + if("sample_id_map" %in% names(prev_out)) { + input$sampleIds <- names(scdata_list) + input$sampleNames <- names(scdata_list) + } message("Constructing cell sets ...") cell_sets <- get_cell_sets(scdata_list, input) diff --git a/pipeline-runner/R/init-functions.R b/pipeline-runner/R/init-functions.R index 56719f26..fe03f867 100644 --- a/pipeline-runner/R/init-functions.R +++ b/pipeline-runner/R/init-functions.R @@ -280,6 +280,15 @@ call_subset <- function(task_name, input, pipeline_config) { c(data, task_out) %<-% run_pipeline_step(prev_out, input, pipeline_config, tasks, task_name) assign("prev_out", task_out, pos = ".GlobalEnv") + if (task_name == names(tasks)[1]) { + assign("cells_id", generate_first_step_ids(prev_out$scdata_list), pos = ".GlobalEnv") + next_task <- "dataIntegration" + for(sample_id in names(prev_out$scdata_list)) { + object_key <- paste0(experiment_id, "/", next_task, "/", sample_id, ".rds") + upload_cells_id(pipeline_config, object_key, cells_id) + } + } + message_id <- send_gem2s_update_to_api(pipeline_config, experiment_id, task_name, data, input) return(message_id) @@ -474,7 +483,6 @@ wrapper <- function(input, pipeline_config) { message("\n------\nStarting task: ", task_name, "\n") message("Input:") str(input) - message("") # common to gem2s and data processing server <- input$server diff --git a/pipeline-runner/R/subset-1-subset_seurat.R b/pipeline-runner/R/subset-1-subset_seurat.R index d0b607d4..b5de9cf0 100644 --- a/pipeline-runner/R/subset-1-subset_seurat.R +++ b/pipeline-runner/R/subset-1-subset_seurat.R @@ -42,10 +42,7 @@ create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { # TODO: remove from here and refactor all pipeline. config <- list( name = input$experimentName, - samples = input$sampleIds, - organism = input$organism, - input = list(type = input$input$type), - sampleOptions = input$sampleOptions + samples = sample_id_map$subset_sample_id ) # structure step output @@ -54,8 +51,10 @@ create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { output = list( scdata_list = scdata_list, annot = scdata@misc$gene_annotations, + edrops = NULL, sample_id_map = sample_id_map, - config = config + config = config, + disable_qc_filters = TRUE ) ) From 8e6218a84636e54573c0b80a3f9f0f7df1f7893a Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Wed, 21 Dec 2022 14:33:17 -0300 Subject: [PATCH 12/34] send sample_id_map to API --- pipeline-runner/R/gem2s-7-upload_to_aws.R | 1 + pipeline-runner/R/subset-1-subset_seurat.R | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pipeline-runner/R/gem2s-7-upload_to_aws.R b/pipeline-runner/R/gem2s-7-upload_to_aws.R index fccc8cd7..37294f98 100644 --- a/pipeline-runner/R/gem2s-7-upload_to_aws.R +++ b/pipeline-runner/R/gem2s-7-upload_to_aws.R @@ -21,6 +21,7 @@ upload_to_aws <- function(input, pipeline_config, prev_out) { config <- prev_out$config qc_config <- prev_out$qc_config disable_qc_filters <- prev_out$disable_qc_filters + if("sample_id_map" %in% names(prev_out)) { input$sampleIds <- names(scdata_list) input$sampleNames <- names(scdata_list) diff --git a/pipeline-runner/R/subset-1-subset_seurat.R b/pipeline-runner/R/subset-1-subset_seurat.R index b5de9cf0..6608cd57 100644 --- a/pipeline-runner/R/subset-1-subset_seurat.R +++ b/pipeline-runner/R/subset-1-subset_seurat.R @@ -47,7 +47,9 @@ create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { # structure step output res <- list( - data = list(), + data = list( + sample_id_map = sample_id_map + ), output = list( scdata_list = scdata_list, annot = scdata@misc$gene_annotations, From 97a51da74ff37caf3b08fd9c295a0a8032128492 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Wed, 21 Dec 2022 14:42:34 -0300 Subject: [PATCH 13/34] better sample mapping --- pipeline-runner/R/init-functions.R | 2 -- pipeline-runner/R/subset-1-subset_seurat.R | 9 ++++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/pipeline-runner/R/init-functions.R b/pipeline-runner/R/init-functions.R index fe03f867..9080a5b7 100644 --- a/pipeline-runner/R/init-functions.R +++ b/pipeline-runner/R/init-functions.R @@ -266,8 +266,6 @@ call_gem2s <- function(task_name, input, pipeline_config) { #' call_subset <- function(task_name, input, pipeline_config) { experiment_id <- input$experimentId - # remove when it's added to the input - input$subset_experiment <- TRUE if (!exists("prev_out")) { remove_cell_ids(pipeline_config, experiment_id) diff --git a/pipeline-runner/R/subset-1-subset_seurat.R b/pipeline-runner/R/subset-1-subset_seurat.R index 6608cd57..de20cf6c 100644 --- a/pipeline-runner/R/subset-1-subset_seurat.R +++ b/pipeline-runner/R/subset-1-subset_seurat.R @@ -48,7 +48,7 @@ create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { # structure step output res <- list( data = list( - sample_id_map = sample_id_map + sampleIdMap = sample_id_map ), output = list( scdata_list = scdata_list, @@ -79,10 +79,9 @@ create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { #' create_sample_id_map <- function(parent_sample_id) { subset_sample_id <- uuid::UUIDgenerate(n = length(parent_sample_id)) - sample_id_map <- data.table::data.table( - parent_sample_id = parent_sample_id, - subset_sample_id = subset_sample_id - ) + + sample_id_map <- as.list(subset_sample_id) + names(sample_id_map) <- parent_sample_id return(sample_id_map) } From ba84375480dda67406642e316fc8f13dd139d496 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Thu, 22 Dec 2022 12:57:16 -0300 Subject: [PATCH 14/34] add cellset type to cellset data.table --- pipeline-runner/R/handle_data.R | 47 ++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/pipeline-runner/R/handle_data.R b/pipeline-runner/R/handle_data.R index 18398920..9b80d7f2 100644 --- a/pipeline-runner/R/handle_data.R +++ b/pipeline-runner/R/handle_data.R @@ -434,6 +434,37 @@ load_cellsets <- function(s3, pipeline_config, experiment_id) { } +#' Bind columns not failing if there's an empty data.table +#' +#' @param dt data.table +#' @param ... columns to add +#' +#' @return data.table with new columns +#' @export +#' +safe_cbind <- function(dt, ...) { + if (nrow(dt) > 0) { + dt <- cbind(dt, ...) + } + return(dt) +} + + +#' add cellset type column to cellsets data.table +#' +#' helper to correctly name the cellset_type column. +#' +#' @param dt data.table +#' @param col string of corresponding cellset type +#' +#' @return +#' @export +#' +cbind_cellset_type <- function(dt, col) { + dt <- safe_cbind(dt, cellset_type = col) +} + + #' Parse cellsets object to data.table #' #' Gets the cellsets list and converts it to a tidy data.table @@ -445,10 +476,18 @@ load_cellsets <- function(s3, pipeline_config, experiment_id) { #' parse_cellsets <- function(cellsets) { - data.table::setDT(cellsets$cellSets) + dt_list <- cellsets$cellSets$children + + lapply(dt_list, data.table::setDT) + dt_list <- purrr::map2(dt_list, cellsets$cellSets$key, cbind_cellset_type) + # fill columns in case there are empty cellset classes - dt <- data.table::rbindlist(cellsets$cellSets$children, fill = TRUE) - # unnest, and change column name - dt[, setNames(.(unlist(cellIds)), "cell_id"), by = .(key, name)] + dt <- data.table::rbindlist(dt_list, fill = TRUE) + + # rename cellset type to metadata in case of metadata cellsets + dt[!cellset_type%in% c("louvain", "scratchpad", "sample"), cellset_type := "metadata"] + # unnest, and change column name + dt[, setNames(.(unlist(cellIds)), "cell_id"), by = .(key, name, cellset_type)] } + From a057e5ab6c96531200cda827aa788596ada6f015 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Thu, 22 Dec 2022 15:49:35 -0300 Subject: [PATCH 15/34] add unittest for helpers --- .../tests/testthat/test-handle_data.R | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/pipeline-runner/tests/testthat/test-handle_data.R b/pipeline-runner/tests/testthat/test-handle_data.R index cfd59197..e5c3ac42 100644 --- a/pipeline-runner/tests/testthat/test-handle_data.R +++ b/pipeline-runner/tests/testthat/test-handle_data.R @@ -6,6 +6,12 @@ mock_sns <- function(config) { )) } +mock_cellsets <- function(){ + # get a snapshot cellsets json + jsonlite::fromJSON("tests/testthat/_snaps/gem2s/gem2s-7-mock_experiment_id-cellsets.json", flatten = TRUE) + +} + test_that("send_gem2s_update_to_api completes successfully", { pipeline_config <- list( sns_topic = 'ExampleTopic', @@ -88,3 +94,79 @@ test_that("send_output_to_api completes successfully", { expect_true(response == 'ok') }) + + +test_that("safe_cbind returns empty data.table when binding an empty data.table with a vector", { + + dt_empty <- data.table::data.table() + col <- c(a_col = "a_value") + + res <- safe_cbind(dt_empty, col) + + expect_identical(res, dt_empty) + +}) + + +test_that("safe_cbind adds a column to a non-empty data.table", { + dt <- data.table::data.table(col1 = 1:10, col2 = 11:20) + values <- seq(1, 20, 2) + res <- safe_cbind(dt, bound_col = values) + + expect_identical(res[,bound_col], values) + expect_equal(ncol(res), ncol(dt) + 1) +}) + + +test_that("safe_cbind names bound column as expected", { + dt <- data.table::data.table(col1 = 1:10, col2 = 11:20) + values <- seq(1, 20, 2) + res <- safe_cbind(dt, my_expected_column_name = values) + + expect_true("my_expected_column_name" %in% names(res)) + expect_identical(res[,my_expected_column_name], values) + + +}) + + +test_that("safe_cbind binds more than one column and names accordingly", { + + dt <- data.table::data.table(col1 = 1:10, col2 = 11:20) + values_1 <- seq(1, 20, 2) + values_2 <- values_1 + 2 + + res <- safe_cbind(dt, an_interesting_variable = values_1, an_interesting_variable_plus_2 = values_2) + + expect_true("an_interesting_variable" %in% names(res)) + expect_identical(res[,an_interesting_variable], values_1) + + expect_true("an_interesting_variable_plus_2" %in% names(res)) + expect_identical(res[,an_interesting_variable_plus_2], values_2) + +}) + + +test_that("cbind_cellset_type names the bound column correctly", { + + dt <- data.table::data.table(col1 = 1:10, col2 = 11:20) + values <- seq(1, 20, 2) + + res <- cbind_cellset_type(dt, values_1) + + expect_true("cellset_type" %in% names(res)) + expect_identical(res[,cellset_type], values) + +}) + + +test_that("parse_cellsets parses a cellset object", { + + cellsets <- mock_cellsets() + + res <- parse_cellsets(cellsets) + + expect_s3_class(res, "data.table") + expect_identical(names(res), c("key", "name", "cellset_type", "cell_id")) + +}) From 723b3df63b4536b8a3be185f25555719ebaf2ba9 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Thu, 22 Dec 2022 16:22:22 -0300 Subject: [PATCH 16/34] refactor, fix addition of sample_ids --- pipeline-runner/R/subset-1-subset_seurat.R | 43 ++++++++++++++++------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/pipeline-runner/R/subset-1-subset_seurat.R b/pipeline-runner/R/subset-1-subset_seurat.R index de20cf6c..9cc73791 100644 --- a/pipeline-runner/R/subset-1-subset_seurat.R +++ b/pipeline-runner/R/subset-1-subset_seurat.R @@ -16,21 +16,16 @@ #' @export #' create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { - # load parent processed scdata and cellsets - s3 <- paws::s3(config = pipeline_config$aws_config) - parent_scdata <- load_processed_scdata(s3, pipeline_config, input$parentExperimentId) - parent_cellsets <- parse_cellsets(load_cellsets(s3, pipeline_config, input$parentExperimentId)) - cell_ids_to_keep <- parent_cellsets[key %in% input$cellSetKeys, cell_id] + parent <- load_parental_data(input, pipeline_config) + + cell_ids_to_keep <- parent$cellsets[key %in% input$cellSetKeys, cell_id] # subset seurat object, remove unnecesary data - scdata <- subset_ids(parent_scdata, cell_ids_to_keep) + scdata <- subset_ids(parent$scdata, cell_ids_to_keep) scdata <- diet_scdata(scdata) scdata@misc$experimentId <- input$experimentId - # delete parent_scdata to free memory - rm(parent_scdata) - # add new sample_ids, keep originals in a new variable scdata$parent_samples <- scdata$samples sample_id_map <- create_sample_id_map(unique(scdata$parent_samples)) @@ -56,7 +51,8 @@ create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { edrops = NULL, sample_id_map = sample_id_map, config = config, - disable_qc_filters = TRUE + disable_qc_filters = TRUE, + parent_cellsets = parent$cellsets ) ) @@ -65,6 +61,29 @@ create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { } +#' load parent experiment data +#' +#' Loads the processed rds and cellsets file from the parent experiment from s3. +#' +#' @param input list of input parameters +#' @param pipelne_config list of pipeline parameters +#' +#' @return list with scdata and parsed cellsets +#' @export +#' +load_parental_data <- function(input, pipelne_config) { + # load parent processed scdata and cellsets + s3 <- paws::s3(config = pipeline_config$aws_config) + parent_scdata <- + load_processed_scdata(s3, pipeline_config, input$parentExperimentId) + parent_cellsets <- + parse_cellsets(load_cellsets(s3, pipeline_config, input$parentExperimentId)) + + return(list(scdata = parent_scdata, cellsets = parent_cellsets)) + +} + + #' generate a sample id mapping for remaining samples after subset #' #' New sample ids must be created, but the number of samples depends on which @@ -96,8 +115,8 @@ create_sample_id_map <- function(parent_sample_id) { #' @export #' add_new_sample_ids <- function(scdata, sample_id_map) { - sample_map_idx <- match(scdata$parent_samples, sample_id_map$parent_sample_id) - scdata$samples <- sample_id_map$subset_sample_id[sample_map_idx] + sample_map_idx <- match(scdata$parent_samples, names(sample_id_map)) + scdata$samples <- unname(unlist(sample_id_map[sample_map_idx])) return(scdata) } From 624f0ad8bf19cb07c9002179d4d31dc965d09176 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Thu, 22 Dec 2022 16:42:36 -0300 Subject: [PATCH 17/34] more refactor --- pipeline-runner/R/subset-1-subset_seurat.R | 93 +++++++++++++++------- 1 file changed, 63 insertions(+), 30 deletions(-) diff --git a/pipeline-runner/R/subset-1-subset_seurat.R b/pipeline-runner/R/subset-1-subset_seurat.R index 9cc73791..810f38c6 100644 --- a/pipeline-runner/R/subset-1-subset_seurat.R +++ b/pipeline-runner/R/subset-1-subset_seurat.R @@ -19,19 +19,10 @@ create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { parent <- load_parental_data(input, pipeline_config) - cell_ids_to_keep <- parent$cellsets[key %in% input$cellSetKeys, cell_id] - - # subset seurat object, remove unnecesary data - scdata <- subset_ids(parent$scdata, cell_ids_to_keep) - scdata <- diet_scdata(scdata) - scdata@misc$experimentId <- input$experimentId - - # add new sample_ids, keep originals in a new variable - scdata$parent_samples <- scdata$samples - sample_id_map <- create_sample_id_map(unique(scdata$parent_samples)) - scdata <- add_new_sample_ids(scdata, sample_id_map) + scdata <- subset_experiment(input, parent) + sample_id_map <- create_sample_id_map(unique(scdata$samples)) + scdata <- add_subset_metadata(input, scdata, sample_id_map) - # split by sample scdata_list <- Seurat::SplitObject(scdata, split.by = "samples") # TODO: remove from here and refactor all pipeline. @@ -80,7 +71,54 @@ load_parental_data <- function(input, pipelne_config) { parse_cellsets(load_cellsets(s3, pipeline_config, input$parentExperimentId)) return(list(scdata = parent_scdata, cellsets = parent_cellsets)) +} + +#' Remove all unnecessary data from the parent seurat object +#' +#' Seurat::DietSeurat is not able to remove certain slots from a seurat object. +#' This function also removes elements from the misc slot which are not necessary +#' +#' @param scdata SeuratObject +#' +#' @return leaner SeuratObject +#' @export +#' +diet_scdata <- function(scdata) { + lean_scdata <- Seurat::CreateSeuratObject( + counts = scdata@assays$RNA@counts, + meta.data = scdata@meta.data, + min.cells = 0, + min.features = 0 + ) + + lean_scdata@misc <- list( + gene_annotations = scdata@misc$gene_annotations, + parent_experimentId = scdata@misc$experimentId + ) + + return(lean_scdata) +} + + +#' Subset seurat object by the input cellset keys +#' +#' This function takes the cellset keys sent by the API, extracts the cell_ids +#' that belong to them, subsets the seurat object and removes all unnecessary +#' data from it. +#' +#' @param input list of input parameters, containing cellSetKeys to subset +#' @param parent list containing parent scdata and parsed cellsets +#' +#' @return subset seurat object +#' @export +#' +subset_experiment <- function(input, parent) { + # subset seurat object, remove unnecesary data + cell_ids_to_keep <- parent$cellsets[key %in% input$cellSetKeys, cell_id] + scdata <- subset_ids(parent$scdata, cell_ids_to_keep) + scdata <- diet_scdata(scdata) + return(scdata) } @@ -115,34 +153,29 @@ create_sample_id_map <- function(parent_sample_id) { #' @export #' add_new_sample_ids <- function(scdata, sample_id_map) { + sample_map_idx <- match(scdata$parent_samples, names(sample_id_map)) scdata$samples <- unname(unlist(sample_id_map[sample_map_idx])) return(scdata) } -#' Remove all unnecessary data from the parent seurat object +#' add experiment level metadata to subset seurat object #' -#' Seurat::DietSeurat is not able to remove certain slots from a seurat object. -#' This function also removes elements from the misc slot which are not necessary -#' -#' @param scdata SeuratObject +#' @param input list of input params, containing the experimentId +#' @param scdata seurat object +#' @param sample_id_map list with mapping between sample_ids from +#' parent and subset experiments #' -#' @return leaner SeuratObject +#' @return scdata with additional metadata #' @export #' -diet_scdata <- function(scdata) { - lean_scdata <- Seurat::CreateSeuratObject( - counts = scdata@assays$RNA@counts, - meta.data = scdata@meta.data, - min.cells = 0, - min.features = 0 - ) +add_subset_metadata <- function(input, scdata, sample_id_map) { - lean_scdata@misc <- list( - gene_annotations = scdata@misc$gene_annotations, - parent_experimentId = scdata@misc$experimentId - ) + # add new sample_ids, keep originals in a new variable + scdata$parent_samples <- scdata$samples + scdata <- add_new_sample_ids(scdata, sample_id_map) + scdata@misc$experimentId <- input$experimentId - return(lean_scdata) + return(scdata) } From e73e87fabef0f5d3e202669dd134165e5a2bc0f2 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Thu, 22 Dec 2022 16:47:16 -0300 Subject: [PATCH 18/34] document --- pipeline-runner/NAMESPACE | 5 +++++ pipeline-runner/R/handle_data.R | 2 +- pipeline-runner/man/add_subset_metadata.Rd | 22 +++++++++++++++++++ .../{call_subset_seurat.Rd => call_subset.Rd} | 0 pipeline-runner/man/cbind_cellset_type.Rd | 19 ++++++++++++++++ .../man/create_subset_experiment.Rd | 4 +++- pipeline-runner/man/load_parental_data.Rd | 19 ++++++++++++++++ ...run_gem2s_step.Rd => run_pipeline_step.Rd} | 6 ++--- pipeline-runner/man/safe_cbind.Rd | 19 ++++++++++++++++ pipeline-runner/man/subset_experiment.Rd | 21 ++++++++++++++++++ 10 files changed, 112 insertions(+), 5 deletions(-) create mode 100644 pipeline-runner/man/add_subset_metadata.Rd rename pipeline-runner/man/{call_subset_seurat.Rd => call_subset.Rd} (100%) create mode 100644 pipeline-runner/man/cbind_cellset_type.Rd create mode 100644 pipeline-runner/man/load_parental_data.Rd rename pipeline-runner/man/{run_gem2s_step.Rd => run_pipeline_step.Rd} (84%) create mode 100644 pipeline-runner/man/safe_cbind.Rd create mode 100644 pipeline-runner/man/subset_experiment.Rd diff --git a/pipeline-runner/NAMESPACE b/pipeline-runner/NAMESPACE index fc511d60..c111a52a 100644 --- a/pipeline-runner/NAMESPACE +++ b/pipeline-runner/NAMESPACE @@ -2,8 +2,10 @@ export(add_metadata) export(add_new_sample_ids) +export(add_subset_metadata) export(build_cc_gene_list) export(build_metadata_cellsets) +export(cbind_cellset_type) export(create_sample_id_map) export(create_scdata) export(create_seurat) @@ -30,6 +32,7 @@ export(integrate_scdata) export(learn_from_sketches) export(list_exclude_genes) export(load_cellsets) +export(load_parental_data) export(load_user_files) export(log_normalize) export(make_annot_with_ids) @@ -46,7 +49,9 @@ export(runClusters) export(run_emptydrops) export(run_geosketch) export(run_pca) +export(safe_cbind) export(score_doublets) +export(subset_experiment) export(subset_ids) export(subset_safe) export(sym_to_ids) diff --git a/pipeline-runner/R/handle_data.R b/pipeline-runner/R/handle_data.R index 9b80d7f2..f180b4af 100644 --- a/pipeline-runner/R/handle_data.R +++ b/pipeline-runner/R/handle_data.R @@ -457,7 +457,7 @@ safe_cbind <- function(dt, ...) { #' @param dt data.table #' @param col string of corresponding cellset type #' -#' @return +#' @return data.table with cellset_type #' @export #' cbind_cellset_type <- function(dt, col) { diff --git a/pipeline-runner/man/add_subset_metadata.Rd b/pipeline-runner/man/add_subset_metadata.Rd new file mode 100644 index 00000000..d4e5c61c --- /dev/null +++ b/pipeline-runner/man/add_subset_metadata.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/subset-1-subset_seurat.R +\name{add_subset_metadata} +\alias{add_subset_metadata} +\title{add experiment level metadata to subset seurat object} +\usage{ +add_subset_metadata(input, scdata, sample_id_map) +} +\arguments{ +\item{input}{list of input params, containing the experimentId} + +\item{scdata}{seurat object} + +\item{sample_id_map}{list with mapping between sample_ids from +parent and subset experiments} +} +\value{ +scdata with additional metadata +} +\description{ +add experiment level metadata to subset seurat object +} diff --git a/pipeline-runner/man/call_subset_seurat.Rd b/pipeline-runner/man/call_subset.Rd similarity index 100% rename from pipeline-runner/man/call_subset_seurat.Rd rename to pipeline-runner/man/call_subset.Rd diff --git a/pipeline-runner/man/cbind_cellset_type.Rd b/pipeline-runner/man/cbind_cellset_type.Rd new file mode 100644 index 00000000..27b345c5 --- /dev/null +++ b/pipeline-runner/man/cbind_cellset_type.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/handle_data.R +\name{cbind_cellset_type} +\alias{cbind_cellset_type} +\title{add cellset type column to cellsets data.table} +\usage{ +cbind_cellset_type(dt, col) +} +\arguments{ +\item{dt}{data.table} + +\item{col}{string of corresponding cellset type} +} +\value{ +data.table with cellset_type +} +\description{ +helper to correctly name the cellset_type column. +} diff --git a/pipeline-runner/man/create_subset_experiment.Rd b/pipeline-runner/man/create_subset_experiment.Rd index 964e1a47..cb8321ef 100644 --- a/pipeline-runner/man/create_subset_experiment.Rd +++ b/pipeline-runner/man/create_subset_experiment.Rd @@ -4,7 +4,7 @@ \alias{create_subset_experiment} \title{create a subset experiment} \usage{ -create_subset_experiment(input, pipeline_config) +create_subset_experiment(input, pipeline_config, prev_out = NULL) } \arguments{ \item{input}{list containing: @@ -16,6 +16,8 @@ create_subset_experiment(input, pipeline_config) }} \item{pipeline_config}{list} + +\item{prev_out}{list, ignored because this is the first step in the subset pipeline} } \value{ list containing scdata_list, annotations and sample_id_map diff --git a/pipeline-runner/man/load_parental_data.Rd b/pipeline-runner/man/load_parental_data.Rd new file mode 100644 index 00000000..4fedfe97 --- /dev/null +++ b/pipeline-runner/man/load_parental_data.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/subset-1-subset_seurat.R +\name{load_parental_data} +\alias{load_parental_data} +\title{load parent experiment data} +\usage{ +load_parental_data(input, pipelne_config) +} +\arguments{ +\item{input}{list of input parameters} + +\item{pipelne_config}{list of pipeline parameters} +} +\value{ +list with scdata and parsed cellsets +} +\description{ +Loads the processed rds and cellsets file from the parent experiment from s3. +} diff --git a/pipeline-runner/man/run_gem2s_step.Rd b/pipeline-runner/man/run_pipeline_step.Rd similarity index 84% rename from pipeline-runner/man/run_gem2s_step.Rd rename to pipeline-runner/man/run_pipeline_step.Rd index b34f0348..924f7e33 100644 --- a/pipeline-runner/man/run_gem2s_step.Rd +++ b/pipeline-runner/man/run_pipeline_step.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/init-functions.R -\name{run_gem2s_step} -\alias{run_gem2s_step} +\name{run_pipeline_step} +\alias{run_pipeline_step} \title{Run GEM2S step} \usage{ -run_gem2s_step(prev_out, input, pipeline_config, tasks, task_name) +run_pipeline_step(prev_out, input, pipeline_config, tasks, task_name) } \arguments{ \item{prev_out}{list output from previous step} diff --git a/pipeline-runner/man/safe_cbind.Rd b/pipeline-runner/man/safe_cbind.Rd new file mode 100644 index 00000000..c9182e73 --- /dev/null +++ b/pipeline-runner/man/safe_cbind.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/handle_data.R +\name{safe_cbind} +\alias{safe_cbind} +\title{Bind columns not failing if there's an empty data.table} +\usage{ +safe_cbind(dt, ...) +} +\arguments{ +\item{dt}{data.table} + +\item{...}{columns to add} +} +\value{ +data.table with new columns +} +\description{ +Bind columns not failing if there's an empty data.table +} diff --git a/pipeline-runner/man/subset_experiment.Rd b/pipeline-runner/man/subset_experiment.Rd new file mode 100644 index 00000000..ba3dc297 --- /dev/null +++ b/pipeline-runner/man/subset_experiment.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/subset-1-subset_seurat.R +\name{subset_experiment} +\alias{subset_experiment} +\title{Subset seurat object by the input cellset keys} +\usage{ +subset_experiment(input, parent) +} +\arguments{ +\item{input}{list of input parameters, containing cellSetKeys to subset} + +\item{parent}{list containing parent scdata and parsed cellsets} +} +\value{ +subset seurat object +} +\description{ +This function takes the cellset keys sent by the API, extracts the cell_ids +that belong to them, subsets the seurat object and removes all unnecessary +data from it. +} From c6f531b1bc05c70b6a1512ae083e0a101cc299f1 Mon Sep 17 00:00:00 2001 From: cosa65 Date: Thu, 22 Dec 2022 17:10:31 -0300 Subject: [PATCH 19/34] Fix parameter mistyped --- pipeline-runner/R/subset-1-subset_seurat.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline-runner/R/subset-1-subset_seurat.R b/pipeline-runner/R/subset-1-subset_seurat.R index 810f38c6..82a886bb 100644 --- a/pipeline-runner/R/subset-1-subset_seurat.R +++ b/pipeline-runner/R/subset-1-subset_seurat.R @@ -62,7 +62,7 @@ create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { #' @return list with scdata and parsed cellsets #' @export #' -load_parental_data <- function(input, pipelne_config) { +load_parental_data <- function(input, pipeline_config) { # load parent processed scdata and cellsets s3 <- paws::s3(config = pipeline_config$aws_config) parent_scdata <- From 7e4ce6a9191cf69fe40cbbb9b52a5f8e561f2d9d Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 23 Dec 2022 12:35:29 -0300 Subject: [PATCH 20/34] fix some cellsets have type column --- pipeline-runner/R/handle_data.R | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pipeline-runner/R/handle_data.R b/pipeline-runner/R/handle_data.R index f180b4af..03e24a84 100644 --- a/pipeline-runner/R/handle_data.R +++ b/pipeline-runner/R/handle_data.R @@ -452,12 +452,13 @@ safe_cbind <- function(dt, ...) { #' add cellset type column to cellsets data.table #' -#' helper to correctly name the cellset_type column. +#' helper to correctly name the cellset type column. some cellsets already +#' contain a "type" slot, which complicates matters, so we chose `cellset_type`, #' #' @param dt data.table #' @param col string of corresponding cellset type #' -#' @return data.table with cellset_type +#' @return data.table with cellset_type column #' @export #' cbind_cellset_type <- function(dt, col) { @@ -484,10 +485,13 @@ parse_cellsets <- function(cellsets) { # fill columns in case there are empty cellset classes dt <- data.table::rbindlist(dt_list, fill = TRUE) - # rename cellset type to metadata in case of metadata cellsets - dt[!cellset_type%in% c("louvain", "scratchpad", "sample"), cellset_type := "metadata"] + # change cellset type to more generic names + dt[cellset_type %in% c("louvain", "leiden"), cellset_type := "cluster"] + dt[!cellset_type %in% c("cluster", "scratchpad", "sample"), cellset_type := "metadata"] # unnest, and change column name - dt[, setNames(.(unlist(cellIds)), "cell_id"), by = .(key, name, cellset_type)] + dt <- dt[, setNames(.(unlist(cellIds)), "cell_id"), by = .(key, name, cellset_type)] + data.table::setnames(dt, "cellset_type", "type") + return(dt) } From ca96c112d2a65eea38a9f98133ca1e457d093174 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 23 Dec 2022 12:35:55 -0300 Subject: [PATCH 21/34] fix handle data test --- pipeline-runner/tests/testthat/test-handle_data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline-runner/tests/testthat/test-handle_data.R b/pipeline-runner/tests/testthat/test-handle_data.R index e5c3ac42..0869f5fb 100644 --- a/pipeline-runner/tests/testthat/test-handle_data.R +++ b/pipeline-runner/tests/testthat/test-handle_data.R @@ -167,6 +167,6 @@ test_that("parse_cellsets parses a cellset object", { res <- parse_cellsets(cellsets) expect_s3_class(res, "data.table") - expect_identical(names(res), c("key", "name", "cellset_type", "cell_id")) + expect_identical(names(res), c("key", "name", "type", "cell_id")) }) From a9f5dac3e368efdda318441a635ffdb192dc1e7e Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 23 Dec 2022 12:36:25 -0300 Subject: [PATCH 22/34] add unique for non-mutually-exclusive cellsets --- pipeline-runner/R/subset-1-subset_seurat.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline-runner/R/subset-1-subset_seurat.R b/pipeline-runner/R/subset-1-subset_seurat.R index 82a886bb..7f6495cc 100644 --- a/pipeline-runner/R/subset-1-subset_seurat.R +++ b/pipeline-runner/R/subset-1-subset_seurat.R @@ -115,7 +115,7 @@ diet_scdata <- function(scdata) { #' subset_experiment <- function(input, parent) { # subset seurat object, remove unnecesary data - cell_ids_to_keep <- parent$cellsets[key %in% input$cellSetKeys, cell_id] + cell_ids_to_keep <- unique(parent$cellsets[key %in% input$cellSetKeys, cell_id]) scdata <- subset_ids(parent$scdata, cell_ids_to_keep) scdata <- diet_scdata(scdata) return(scdata) From 1fea094fb8e1282843efcef9dd5a60a99a8c9b57 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 23 Dec 2022 12:50:38 -0300 Subject: [PATCH 23/34] remove unnecessary block --- pipeline-runner/R/init-functions.R | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pipeline-runner/R/init-functions.R b/pipeline-runner/R/init-functions.R index 9080a5b7..414a463d 100644 --- a/pipeline-runner/R/init-functions.R +++ b/pipeline-runner/R/init-functions.R @@ -278,20 +278,12 @@ call_subset <- function(task_name, input, pipeline_config) { c(data, task_out) %<-% run_pipeline_step(prev_out, input, pipeline_config, tasks, task_name) assign("prev_out", task_out, pos = ".GlobalEnv") - if (task_name == names(tasks)[1]) { - assign("cells_id", generate_first_step_ids(prev_out$scdata_list), pos = ".GlobalEnv") - next_task <- "dataIntegration" - for(sample_id in names(prev_out$scdata_list)) { - object_key <- paste0(experiment_id, "/", next_task, "/", sample_id, ".rds") - upload_cells_id(pipeline_config, object_key, cells_id) - } - } - message_id <- send_gem2s_update_to_api(pipeline_config, experiment_id, task_name, data, input) return(message_id) } + #' Call QC pipeline #' #' Runs step `task_name` of the data processing pipeline, sends plot data to s3 @@ -443,6 +435,7 @@ pipeline_heartbeat <- function(task_token, aws_config) { } } + #' Start heartbeat as a background process #' #' messages inside the background process will ONLY be printed into @@ -501,6 +494,7 @@ wrapper <- function(input, pipeline_config) { return(message_id) } + #' Pipeline error handler #' #' Pretty prints errors, sends roport to the API, and uploads debug output to From 1ee3b7a88e0fa9a5bc958373c246328d373d0d6d Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 23 Dec 2022 12:50:53 -0300 Subject: [PATCH 24/34] document --- pipeline-runner/man/cbind_cellset_type.Rd | 5 +++-- pipeline-runner/man/load_parental_data.Rd | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pipeline-runner/man/cbind_cellset_type.Rd b/pipeline-runner/man/cbind_cellset_type.Rd index 27b345c5..c64160db 100644 --- a/pipeline-runner/man/cbind_cellset_type.Rd +++ b/pipeline-runner/man/cbind_cellset_type.Rd @@ -12,8 +12,9 @@ cbind_cellset_type(dt, col) \item{col}{string of corresponding cellset type} } \value{ -data.table with cellset_type +data.table with cellset_type column } \description{ -helper to correctly name the cellset_type column. +helper to correctly name the cellset type column. some cellsets already +contain a "type" slot, which complicates matters, so we chose \code{cellset_type}, } diff --git a/pipeline-runner/man/load_parental_data.Rd b/pipeline-runner/man/load_parental_data.Rd index 4fedfe97..089dc3b2 100644 --- a/pipeline-runner/man/load_parental_data.Rd +++ b/pipeline-runner/man/load_parental_data.Rd @@ -4,7 +4,7 @@ \alias{load_parental_data} \title{load parent experiment data} \usage{ -load_parental_data(input, pipelne_config) +load_parental_data(input, pipeline_config) } \arguments{ \item{input}{list of input parameters} From 06d426b9105a3242ba9f375c754c50ecf7ed3e58 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 23 Dec 2022 13:46:24 -0300 Subject: [PATCH 25/34] rename function --- pipeline-runner/R/subset-1-subset_seurat.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipeline-runner/R/subset-1-subset_seurat.R b/pipeline-runner/R/subset-1-subset_seurat.R index 7f6495cc..c25dee55 100644 --- a/pipeline-runner/R/subset-1-subset_seurat.R +++ b/pipeline-runner/R/subset-1-subset_seurat.R @@ -17,7 +17,7 @@ #' create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { - parent <- load_parental_data(input, pipeline_config) + parent <- load_parent_experiment_data(input, pipeline_config) scdata <- subset_experiment(input, parent) sample_id_map <- create_sample_id_map(unique(scdata$samples)) @@ -62,7 +62,7 @@ create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { #' @return list with scdata and parsed cellsets #' @export #' -load_parental_data <- function(input, pipeline_config) { +load_parent_experiment_data <- function(input, pipeline_config) { # load parent processed scdata and cellsets s3 <- paws::s3(config = pipeline_config$aws_config) parent_scdata <- From e9fcfb50e88a04f1470fa5468b27d360c890195c Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 23 Dec 2022 13:49:50 -0300 Subject: [PATCH 26/34] document --- pipeline-runner/NAMESPACE | 2 +- ...load_parental_data.Rd => load_parent_experiment_data.Rd} | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) rename pipeline-runner/man/{load_parental_data.Rd => load_parent_experiment_data.Rd} (76%) diff --git a/pipeline-runner/NAMESPACE b/pipeline-runner/NAMESPACE index c111a52a..6a1e6117 100644 --- a/pipeline-runner/NAMESPACE +++ b/pipeline-runner/NAMESPACE @@ -32,7 +32,7 @@ export(integrate_scdata) export(learn_from_sketches) export(list_exclude_genes) export(load_cellsets) -export(load_parental_data) +export(load_parent_experiment_data) export(load_user_files) export(log_normalize) export(make_annot_with_ids) diff --git a/pipeline-runner/man/load_parental_data.Rd b/pipeline-runner/man/load_parent_experiment_data.Rd similarity index 76% rename from pipeline-runner/man/load_parental_data.Rd rename to pipeline-runner/man/load_parent_experiment_data.Rd index 089dc3b2..953aebea 100644 --- a/pipeline-runner/man/load_parental_data.Rd +++ b/pipeline-runner/man/load_parent_experiment_data.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/subset-1-subset_seurat.R -\name{load_parental_data} -\alias{load_parental_data} +\name{load_parent_experiment_data} +\alias{load_parent_experiment_data} \title{load parent experiment data} \usage{ -load_parental_data(input, pipeline_config) +load_parent_experiment_data(input, pipeline_config) } \arguments{ \item{input}{list of input parameters} From 67a4311da6dac50f6cf39ee9627b592ac8774889 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 23 Dec 2022 13:54:50 -0300 Subject: [PATCH 27/34] comment WIP tests --- .../testthat/test-subset-1-subset_seurat.R | 58 ++++++++++++------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/pipeline-runner/tests/testthat/test-subset-1-subset_seurat.R b/pipeline-runner/tests/testthat/test-subset-1-subset_seurat.R index fcae2442..171553df 100644 --- a/pipeline-runner/tests/testthat/test-subset-1-subset_seurat.R +++ b/pipeline-runner/tests/testthat/test-subset-1-subset_seurat.R @@ -1,25 +1,39 @@ -mock_scdata <- function(){ - processed_path <- "/Users/german/bm/cellenics/data/8ecc9d20-30e4-49eb-b536-a0d1f0ba420d/processed_r.rds" - readRDS(processed_path) -} +# mock_scdata <- function(){ +# paths <- path_setup() +# source("tests/testthat/_snaps/qc/mock_experiment_id-integrated_scdata.R") +# scdata <- snap_list$data +# rm(snap_list, envir = parent.frame()) +# return(scdata) +# } +# +# mock_cellsets <- function(){ +# +# jsonlite::fromJSON("tests/testthat/_snaps/gem2s/gem2s-7-mock_experiment_id-cellsets.json", flatten = TRUE) +# +# } +# +# mock_cluster_cellsets <- function(cellsets) { +# +# } +# +# mock_input <- function() { +# input <- list( +# name = "mock_subset_experiment_name", +# parentExperimentId = "mock_parent_experiment_id", +# subsetExperimentId = "mock_subset_experiment_id", +# cellSetKeys = c("louvain-0", "louvain-1") +# ) +# +# return(input) +# } +# +# parent_scdata <- mock_scdata() +# cellsets <- mock_cellsets() +# parent_cellsets <- parse_cellsets(mock_cellsets()) +# +# parent <- list(scdata = parent_scdata, cellsets = parent_cellsets) +# +# input <- mock_input() -mock_cellsets <- function(){ - cellsets_path <- "/Users/german/bm/cellenics/data/8ecc9d20-30e4-49eb-b536-a0d1f0ba420d/cellsets.json" - jsonlite::fromJSON(cellsets_path, flatten = TRUE) -} -mock_input <- function() { - input <- list( - name = "mock_subset_experiment_name", - parentExperimentId = "mock_parent_experiment_id", - subsetExperimentId = "mock_subset_experiment_id", - cellSetKeys = c("louvain-0", "louvain-1") - ) - return(input) -} - -parent_scdata <- mock_scdata() -parent_cellsets <- parse_cellsets(mock_cellsets()) -sample_mapping <- mock_sample_id_mapping() -input <- mock_input() From df596e1e4bacf0d3c7d874d860a3712049c76ad1 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 23 Dec 2022 14:09:34 -0300 Subject: [PATCH 28/34] fix tests --- pipeline-runner/tests/testthat/test-handle_data.R | 5 +++-- pipeline-runner/tests/testthat/test-subset-1-subset_seurat.R | 3 --- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pipeline-runner/tests/testthat/test-handle_data.R b/pipeline-runner/tests/testthat/test-handle_data.R index 0869f5fb..fe96b8de 100644 --- a/pipeline-runner/tests/testthat/test-handle_data.R +++ b/pipeline-runner/tests/testthat/test-handle_data.R @@ -8,7 +8,8 @@ mock_sns <- function(config) { mock_cellsets <- function(){ # get a snapshot cellsets json - jsonlite::fromJSON("tests/testthat/_snaps/gem2s/gem2s-7-mock_experiment_id-cellsets.json", flatten = TRUE) + paths <- setup_test_paths() + jsonlite::fromJSON(file.path(paths$snaps, "gem2s", "gem2s-7-mock_experiment_id-cellsets.json"), flatten = TRUE) } @@ -152,7 +153,7 @@ test_that("cbind_cellset_type names the bound column correctly", { dt <- data.table::data.table(col1 = 1:10, col2 = 11:20) values <- seq(1, 20, 2) - res <- cbind_cellset_type(dt, values_1) + res <- cbind_cellset_type(dt, values) expect_true("cellset_type" %in% names(res)) expect_identical(res[,cellset_type], values) diff --git a/pipeline-runner/tests/testthat/test-subset-1-subset_seurat.R b/pipeline-runner/tests/testthat/test-subset-1-subset_seurat.R index 171553df..19d75f32 100644 --- a/pipeline-runner/tests/testthat/test-subset-1-subset_seurat.R +++ b/pipeline-runner/tests/testthat/test-subset-1-subset_seurat.R @@ -34,6 +34,3 @@ # parent <- list(scdata = parent_scdata, cellsets = parent_cellsets) # # input <- mock_input() - - - From 9c851a9558e02c00f445745e77dd3b4e2e205c50 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 23 Dec 2022 14:16:21 -0300 Subject: [PATCH 29/34] rename function call in test --- pipeline-runner/tests/testthat/test-gem2s.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline-runner/tests/testthat/test-gem2s.R b/pipeline-runner/tests/testthat/test-gem2s.R index fb17e345..3d4abdc7 100644 --- a/pipeline-runner/tests/testthat/test-gem2s.R +++ b/pipeline-runner/tests/testthat/test-gem2s.R @@ -74,7 +74,7 @@ test_gem2s <- function(experiment_id) { res <- list() for (task_name in names(tasks)) { - res <- run_gem2s_step(res$output, + res <- run_pipeline_step(res$output, input, pipeline_config, tasks, From 90a75c4ac3a0f3c62bd1683f248572221b7a4d3c Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Fri, 23 Dec 2022 14:26:23 -0300 Subject: [PATCH 30/34] update snaps --- .../_snaps/gem2s-6-prepare_experiment.md | 63 ++--- .../tests/testthat/_snaps/gem2s.md | 256 ++++++------------ .../gem2s/gem2s-6-mock_experiment_id-out.R | 51 ++-- .../gem2s-6-mock_experiment_id-qc_config.R | 51 ++-- 4 files changed, 139 insertions(+), 282 deletions(-) diff --git a/pipeline-runner/tests/testthat/_snaps/gem2s-6-prepare_experiment.md b/pipeline-runner/tests/testthat/_snaps/gem2s-6-prepare_experiment.md index c3859e51..054f2e31 100644 --- a/pipeline-runner/tests/testthat/_snaps/gem2s-6-prepare_experiment.md +++ b/pipeline-runner/tests/testthat/_snaps/gem2s-6-prepare_experiment.md @@ -5,52 +5,34 @@ Output List of 7 $ cellSizeDistribution:List of 1 - ..$ sample_a:List of 4 - .. ..$ enabled : logi FALSE - .. ..$ auto : logi TRUE - .. ..$ filterSettings :List of 2 - .. .. ..$ minCellSize: num 10 - .. .. ..$ binStep : num 200 - .. ..$ defaultFilterSettings:List of 2 + ..$ sample_a:List of 3 + .. ..$ enabled : logi FALSE + .. ..$ auto : logi TRUE + .. ..$ filterSettings:List of 2 .. .. ..$ minCellSize: num 10 .. .. ..$ binStep : num 200 $ mitochondrialContent:List of 1 - ..$ sample_a:List of 4 - .. ..$ enabled : logi TRUE - .. ..$ auto : logi TRUE - .. ..$ filterSettings :List of 2 - .. .. ..$ method : chr "absoluteThreshold" - .. .. ..$ methodSettings:List of 1 - .. .. .. ..$ absoluteThreshold:List of 2 - .. .. .. .. ..$ maxFraction: num 0 - .. .. .. .. ..$ binStep : num 0.3 - .. ..$ defaultFilterSettings:List of 2 + ..$ sample_a:List of 3 + .. ..$ enabled : logi TRUE + .. ..$ auto : logi TRUE + .. ..$ filterSettings:List of 2 .. .. ..$ method : chr "absoluteThreshold" .. .. ..$ methodSettings:List of 1 .. .. .. ..$ absoluteThreshold:List of 2 .. .. .. .. ..$ maxFraction: num 0 .. .. .. .. ..$ binStep : num 0.3 $ classifier :List of 1 - ..$ sample_a:List of 5 - .. ..$ enabled : logi TRUE - .. ..$ prefiltered : logi FALSE - .. ..$ auto : logi TRUE - .. ..$ filterSettings :List of 1 - .. .. ..$ FDR: num 0.01 - .. ..$ defaultFilterSettings:List of 1 + ..$ sample_a:List of 4 + .. ..$ enabled : logi TRUE + .. ..$ prefiltered : logi FALSE + .. ..$ auto : logi TRUE + .. ..$ filterSettings:List of 1 .. .. ..$ FDR: num 0.01 $ numGenesVsNumUmis :List of 1 - ..$ sample_a:List of 4 - .. ..$ enabled : logi TRUE - .. ..$ auto : logi TRUE - .. ..$ filterSettings :List of 2 - .. .. ..$ regressionType : chr "linear" - .. .. ..$ regressionTypeSettings:List of 2 - .. .. .. ..$ linear:List of 1 - .. .. .. .. ..$ p.level: num 0.000132 - .. .. .. ..$ spline:List of 1 - .. .. .. .. ..$ p.level: num 0.001 - .. ..$ defaultFilterSettings:List of 2 + ..$ sample_a:List of 3 + .. ..$ enabled : logi TRUE + .. ..$ auto : logi TRUE + .. ..$ filterSettings:List of 2 .. .. ..$ regressionType : chr "linear" .. .. ..$ regressionTypeSettings:List of 2 .. .. .. ..$ linear:List of 1 @@ -58,13 +40,10 @@ .. .. .. ..$ spline:List of 1 .. .. .. .. ..$ p.level: num 0.001 $ doubletScores :List of 1 - ..$ sample_a:List of 4 - .. ..$ enabled : logi TRUE - .. ..$ auto : logi TRUE - .. ..$ filterSettings :List of 2 - .. .. ..$ probabilityThreshold: num 0.8 - .. .. ..$ binStep : num 0.02 - .. ..$ defaultFilterSettings:List of 2 + ..$ sample_a:List of 3 + .. ..$ enabled : logi TRUE + .. ..$ auto : logi TRUE + .. ..$ filterSettings:List of 2 .. .. ..$ probabilityThreshold: num 0.8 .. .. ..$ binStep : num 0.02 $ dataIntegration :List of 2 diff --git a/pipeline-runner/tests/testthat/_snaps/gem2s.md b/pipeline-runner/tests/testthat/_snaps/gem2s.md index 08e6fb81..c8c5f0d5 100644 --- a/pipeline-runner/tests/testthat/_snaps/gem2s.md +++ b/pipeline-runner/tests/testthat/_snaps/gem2s.md @@ -482,7 +482,7 @@ Code rlang::hash(res) Output - [1] "988133ae29394ba9af3dcf9c03e0dba7" + [1] "e449dea4dd4b69f18d8fc7fd7fb58063" Code str(res) Output @@ -697,101 +697,65 @@ ..$ disable_qc_filters: logi FALSE ..$ qc_config :List of 7 .. ..$ cellSizeDistribution:List of 2 - .. .. ..$ mock_sample_2_id:List of 4 - .. .. .. ..$ enabled : logi FALSE - .. .. .. ..$ auto : logi TRUE - .. .. .. ..$ filterSettings :List of 2 - .. .. .. .. ..$ minCellSize: num 17 - .. .. .. .. ..$ binStep : num 200 - .. .. .. ..$ defaultFilterSettings:List of 2 + .. .. ..$ mock_sample_2_id:List of 3 + .. .. .. ..$ enabled : logi FALSE + .. .. .. ..$ auto : logi TRUE + .. .. .. ..$ filterSettings:List of 2 .. .. .. .. ..$ minCellSize: num 17 .. .. .. .. ..$ binStep : num 200 - .. .. ..$ mock_sample_1_id:List of 4 - .. .. .. ..$ enabled : logi FALSE - .. .. .. ..$ auto : logi TRUE - .. .. .. ..$ filterSettings :List of 2 - .. .. .. .. ..$ minCellSize: num 27 - .. .. .. .. ..$ binStep : num 200 - .. .. .. ..$ defaultFilterSettings:List of 2 + .. .. ..$ mock_sample_1_id:List of 3 + .. .. .. ..$ enabled : logi FALSE + .. .. .. ..$ auto : logi TRUE + .. .. .. ..$ filterSettings:List of 2 .. .. .. .. ..$ minCellSize: num 27 .. .. .. .. ..$ binStep : num 200 .. ..$ mitochondrialContent:List of 2 - .. .. ..$ mock_sample_2_id:List of 4 - .. .. .. ..$ enabled : logi TRUE - .. .. .. ..$ auto : logi TRUE - .. .. .. ..$ filterSettings :List of 2 + .. .. ..$ mock_sample_2_id:List of 3 + .. .. .. ..$ enabled : logi TRUE + .. .. .. ..$ auto : logi TRUE + .. .. .. ..$ filterSettings:List of 2 .. .. .. .. ..$ method : chr "absoluteThreshold" .. .. .. .. ..$ methodSettings:List of 1 .. .. .. .. .. ..$ absoluteThreshold:List of 2 .. .. .. .. .. .. ..$ maxFraction: num 0.521 .. .. .. .. .. .. ..$ binStep : num 0.3 - .. .. .. ..$ defaultFilterSettings:List of 2 - .. .. .. .. ..$ method : chr "absoluteThreshold" - .. .. .. .. ..$ methodSettings:List of 1 - .. .. .. .. .. ..$ absoluteThreshold:List of 2 - .. .. .. .. .. .. ..$ maxFraction: num 0.521 - .. .. .. .. .. .. ..$ binStep : num 0.3 - .. .. ..$ mock_sample_1_id:List of 4 - .. .. .. ..$ enabled : logi TRUE - .. .. .. ..$ auto : logi TRUE - .. .. .. ..$ filterSettings :List of 2 - .. .. .. .. ..$ method : chr "absoluteThreshold" - .. .. .. .. ..$ methodSettings:List of 1 - .. .. .. .. .. ..$ absoluteThreshold:List of 2 - .. .. .. .. .. .. ..$ maxFraction: num 0.603 - .. .. .. .. .. .. ..$ binStep : num 0.3 - .. .. .. ..$ defaultFilterSettings:List of 2 + .. .. ..$ mock_sample_1_id:List of 3 + .. .. .. ..$ enabled : logi TRUE + .. .. .. ..$ auto : logi TRUE + .. .. .. ..$ filterSettings:List of 2 .. .. .. .. ..$ method : chr "absoluteThreshold" .. .. .. .. ..$ methodSettings:List of 1 .. .. .. .. .. ..$ absoluteThreshold:List of 2 .. .. .. .. .. .. ..$ maxFraction: num 0.603 .. .. .. .. .. .. ..$ binStep : num 0.3 .. ..$ classifier :List of 2 - .. .. ..$ mock_sample_2_id:List of 5 - .. .. .. ..$ enabled : logi TRUE - .. .. .. ..$ prefiltered : logi FALSE - .. .. .. ..$ auto : logi TRUE - .. .. .. ..$ filterSettings :List of 1 - .. .. .. .. ..$ FDR: num 0.01 - .. .. .. ..$ defaultFilterSettings:List of 1 - .. .. .. .. ..$ FDR: num 0.01 - .. .. ..$ mock_sample_1_id:List of 5 - .. .. .. ..$ enabled : logi TRUE - .. .. .. ..$ prefiltered : logi FALSE - .. .. .. ..$ auto : logi TRUE - .. .. .. ..$ filterSettings :List of 1 + .. .. ..$ mock_sample_2_id:List of 4 + .. .. .. ..$ enabled : logi TRUE + .. .. .. ..$ prefiltered : logi FALSE + .. .. .. ..$ auto : logi TRUE + .. .. .. ..$ filterSettings:List of 1 .. .. .. .. ..$ FDR: num 0.01 - .. .. .. ..$ defaultFilterSettings:List of 1 + .. .. ..$ mock_sample_1_id:List of 4 + .. .. .. ..$ enabled : logi TRUE + .. .. .. ..$ prefiltered : logi FALSE + .. .. .. ..$ auto : logi TRUE + .. .. .. ..$ filterSettings:List of 1 .. .. .. .. ..$ FDR: num 0.01 .. ..$ numGenesVsNumUmis :List of 2 - .. .. ..$ mock_sample_2_id:List of 4 - .. .. .. ..$ enabled : logi TRUE - .. .. .. ..$ auto : logi TRUE - .. .. .. ..$ filterSettings :List of 2 + .. .. ..$ mock_sample_2_id:List of 3 + .. .. .. ..$ enabled : logi TRUE + .. .. .. ..$ auto : logi TRUE + .. .. .. ..$ filterSettings:List of 2 .. .. .. .. ..$ regressionType : chr "linear" .. .. .. .. ..$ regressionTypeSettings:List of 2 .. .. .. .. .. ..$ linear:List of 1 .. .. .. .. .. .. ..$ p.level: num 0.001 .. .. .. .. .. ..$ spline:List of 1 .. .. .. .. .. .. ..$ p.level: num 0.001 - .. .. .. ..$ defaultFilterSettings:List of 2 - .. .. .. .. ..$ regressionType : chr "linear" - .. .. .. .. ..$ regressionTypeSettings:List of 2 - .. .. .. .. .. ..$ linear:List of 1 - .. .. .. .. .. .. ..$ p.level: num 0.001 - .. .. .. .. .. ..$ spline:List of 1 - .. .. .. .. .. .. ..$ p.level: num 0.001 - .. .. ..$ mock_sample_1_id:List of 4 - .. .. .. ..$ enabled : logi TRUE - .. .. .. ..$ auto : logi TRUE - .. .. .. ..$ filterSettings :List of 2 - .. .. .. .. ..$ regressionType : chr "linear" - .. .. .. .. ..$ regressionTypeSettings:List of 2 - .. .. .. .. .. ..$ linear:List of 1 - .. .. .. .. .. .. ..$ p.level: num 0.001 - .. .. .. .. .. ..$ spline:List of 1 - .. .. .. .. .. .. ..$ p.level: num 0.001 - .. .. .. ..$ defaultFilterSettings:List of 2 + .. .. ..$ mock_sample_1_id:List of 3 + .. .. .. ..$ enabled : logi TRUE + .. .. .. ..$ auto : logi TRUE + .. .. .. ..$ filterSettings:List of 2 .. .. .. .. ..$ regressionType : chr "linear" .. .. .. .. ..$ regressionTypeSettings:List of 2 .. .. .. .. .. ..$ linear:List of 1 @@ -799,22 +763,16 @@ .. .. .. .. .. ..$ spline:List of 1 .. .. .. .. .. .. ..$ p.level: num 0.001 .. ..$ doubletScores :List of 2 - .. .. ..$ mock_sample_2_id:List of 4 - .. .. .. ..$ enabled : logi TRUE - .. .. .. ..$ auto : logi TRUE - .. .. .. ..$ filterSettings :List of 2 + .. .. ..$ mock_sample_2_id:List of 3 + .. .. .. ..$ enabled : logi TRUE + .. .. .. ..$ auto : logi TRUE + .. .. .. ..$ filterSettings:List of 2 .. .. .. .. ..$ probabilityThreshold: num 0.979 .. .. .. .. ..$ binStep : num 0.02 - .. .. .. ..$ defaultFilterSettings:List of 2 - .. .. .. .. ..$ probabilityThreshold: num 0.979 - .. .. .. .. ..$ binStep : num 0.02 - .. .. ..$ mock_sample_1_id:List of 4 - .. .. .. ..$ enabled : logi TRUE - .. .. .. ..$ auto : logi TRUE - .. .. .. ..$ filterSettings :List of 2 - .. .. .. .. ..$ probabilityThreshold: num 0.84 - .. .. .. .. ..$ binStep : num 0.02 - .. .. .. ..$ defaultFilterSettings:List of 2 + .. .. ..$ mock_sample_1_id:List of 3 + .. .. .. ..$ enabled : logi TRUE + .. .. .. ..$ auto : logi TRUE + .. .. .. ..$ filterSettings:List of 2 .. .. .. .. ..$ probabilityThreshold: num 0.84 .. .. .. .. ..$ binStep : num 0.02 .. ..$ dataIntegration :List of 2 @@ -862,7 +820,7 @@ Code rlang::hash(res) Output - [1] "b43eb24698e7e929755e256573b8b817" + [1] "4afaaf94e9fd057c28b870c5eca95131" Code str(res) Output @@ -877,101 +835,65 @@ .. .. ..$ type : chr "10x" .. ..$ processingConfig:List of 7 .. .. ..$ cellSizeDistribution:List of 2 - .. .. .. ..$ mock_sample_2_id:List of 4 - .. .. .. .. ..$ enabled : logi FALSE - .. .. .. .. ..$ auto : logi TRUE - .. .. .. .. ..$ filterSettings :List of 2 - .. .. .. .. .. ..$ minCellSize: num 17 - .. .. .. .. .. ..$ binStep : num 200 - .. .. .. .. ..$ defaultFilterSettings:List of 2 + .. .. .. ..$ mock_sample_2_id:List of 3 + .. .. .. .. ..$ enabled : logi FALSE + .. .. .. .. ..$ auto : logi TRUE + .. .. .. .. ..$ filterSettings:List of 2 .. .. .. .. .. ..$ minCellSize: num 17 .. .. .. .. .. ..$ binStep : num 200 - .. .. .. ..$ mock_sample_1_id:List of 4 - .. .. .. .. ..$ enabled : logi FALSE - .. .. .. .. ..$ auto : logi TRUE - .. .. .. .. ..$ filterSettings :List of 2 - .. .. .. .. .. ..$ minCellSize: num 27 - .. .. .. .. .. ..$ binStep : num 200 - .. .. .. .. ..$ defaultFilterSettings:List of 2 + .. .. .. ..$ mock_sample_1_id:List of 3 + .. .. .. .. ..$ enabled : logi FALSE + .. .. .. .. ..$ auto : logi TRUE + .. .. .. .. ..$ filterSettings:List of 2 .. .. .. .. .. ..$ minCellSize: num 27 .. .. .. .. .. ..$ binStep : num 200 .. .. ..$ mitochondrialContent:List of 2 - .. .. .. ..$ mock_sample_2_id:List of 4 - .. .. .. .. ..$ enabled : logi TRUE - .. .. .. .. ..$ auto : logi TRUE - .. .. .. .. ..$ filterSettings :List of 2 + .. .. .. ..$ mock_sample_2_id:List of 3 + .. .. .. .. ..$ enabled : logi TRUE + .. .. .. .. ..$ auto : logi TRUE + .. .. .. .. ..$ filterSettings:List of 2 .. .. .. .. .. ..$ method : chr "absoluteThreshold" .. .. .. .. .. ..$ methodSettings:List of 1 .. .. .. .. .. .. ..$ absoluteThreshold:List of 2 .. .. .. .. .. .. .. ..$ maxFraction: num 0.521 .. .. .. .. .. .. .. ..$ binStep : num 0.3 - .. .. .. .. ..$ defaultFilterSettings:List of 2 - .. .. .. .. .. ..$ method : chr "absoluteThreshold" - .. .. .. .. .. ..$ methodSettings:List of 1 - .. .. .. .. .. .. ..$ absoluteThreshold:List of 2 - .. .. .. .. .. .. .. ..$ maxFraction: num 0.521 - .. .. .. .. .. .. .. ..$ binStep : num 0.3 - .. .. .. ..$ mock_sample_1_id:List of 4 - .. .. .. .. ..$ enabled : logi TRUE - .. .. .. .. ..$ auto : logi TRUE - .. .. .. .. ..$ filterSettings :List of 2 - .. .. .. .. .. ..$ method : chr "absoluteThreshold" - .. .. .. .. .. ..$ methodSettings:List of 1 - .. .. .. .. .. .. ..$ absoluteThreshold:List of 2 - .. .. .. .. .. .. .. ..$ maxFraction: num 0.603 - .. .. .. .. .. .. .. ..$ binStep : num 0.3 - .. .. .. .. ..$ defaultFilterSettings:List of 2 + .. .. .. ..$ mock_sample_1_id:List of 3 + .. .. .. .. ..$ enabled : logi TRUE + .. .. .. .. ..$ auto : logi TRUE + .. .. .. .. ..$ filterSettings:List of 2 .. .. .. .. .. ..$ method : chr "absoluteThreshold" .. .. .. .. .. ..$ methodSettings:List of 1 .. .. .. .. .. .. ..$ absoluteThreshold:List of 2 .. .. .. .. .. .. .. ..$ maxFraction: num 0.603 .. .. .. .. .. .. .. ..$ binStep : num 0.3 .. .. ..$ classifier :List of 2 - .. .. .. ..$ mock_sample_2_id:List of 5 - .. .. .. .. ..$ enabled : logi TRUE - .. .. .. .. ..$ prefiltered : logi FALSE - .. .. .. .. ..$ auto : logi TRUE - .. .. .. .. ..$ filterSettings :List of 1 - .. .. .. .. .. ..$ FDR: num 0.01 - .. .. .. .. ..$ defaultFilterSettings:List of 1 - .. .. .. .. .. ..$ FDR: num 0.01 - .. .. .. ..$ mock_sample_1_id:List of 5 - .. .. .. .. ..$ enabled : logi TRUE - .. .. .. .. ..$ prefiltered : logi FALSE - .. .. .. .. ..$ auto : logi TRUE - .. .. .. .. ..$ filterSettings :List of 1 + .. .. .. ..$ mock_sample_2_id:List of 4 + .. .. .. .. ..$ enabled : logi TRUE + .. .. .. .. ..$ prefiltered : logi FALSE + .. .. .. .. ..$ auto : logi TRUE + .. .. .. .. ..$ filterSettings:List of 1 .. .. .. .. .. ..$ FDR: num 0.01 - .. .. .. .. ..$ defaultFilterSettings:List of 1 + .. .. .. ..$ mock_sample_1_id:List of 4 + .. .. .. .. ..$ enabled : logi TRUE + .. .. .. .. ..$ prefiltered : logi FALSE + .. .. .. .. ..$ auto : logi TRUE + .. .. .. .. ..$ filterSettings:List of 1 .. .. .. .. .. ..$ FDR: num 0.01 .. .. ..$ numGenesVsNumUmis :List of 2 - .. .. .. ..$ mock_sample_2_id:List of 4 - .. .. .. .. ..$ enabled : logi TRUE - .. .. .. .. ..$ auto : logi TRUE - .. .. .. .. ..$ filterSettings :List of 2 + .. .. .. ..$ mock_sample_2_id:List of 3 + .. .. .. .. ..$ enabled : logi TRUE + .. .. .. .. ..$ auto : logi TRUE + .. .. .. .. ..$ filterSettings:List of 2 .. .. .. .. .. ..$ regressionType : chr "linear" .. .. .. .. .. ..$ regressionTypeSettings:List of 2 .. .. .. .. .. .. ..$ linear:List of 1 .. .. .. .. .. .. .. ..$ p.level: num 0.001 .. .. .. .. .. .. ..$ spline:List of 1 .. .. .. .. .. .. .. ..$ p.level: num 0.001 - .. .. .. .. ..$ defaultFilterSettings:List of 2 - .. .. .. .. .. ..$ regressionType : chr "linear" - .. .. .. .. .. ..$ regressionTypeSettings:List of 2 - .. .. .. .. .. .. ..$ linear:List of 1 - .. .. .. .. .. .. .. ..$ p.level: num 0.001 - .. .. .. .. .. .. ..$ spline:List of 1 - .. .. .. .. .. .. .. ..$ p.level: num 0.001 - .. .. .. ..$ mock_sample_1_id:List of 4 - .. .. .. .. ..$ enabled : logi TRUE - .. .. .. .. ..$ auto : logi TRUE - .. .. .. .. ..$ filterSettings :List of 2 - .. .. .. .. .. ..$ regressionType : chr "linear" - .. .. .. .. .. ..$ regressionTypeSettings:List of 2 - .. .. .. .. .. .. ..$ linear:List of 1 - .. .. .. .. .. .. .. ..$ p.level: num 0.001 - .. .. .. .. .. .. ..$ spline:List of 1 - .. .. .. .. .. .. .. ..$ p.level: num 0.001 - .. .. .. .. ..$ defaultFilterSettings:List of 2 + .. .. .. ..$ mock_sample_1_id:List of 3 + .. .. .. .. ..$ enabled : logi TRUE + .. .. .. .. ..$ auto : logi TRUE + .. .. .. .. ..$ filterSettings:List of 2 .. .. .. .. .. ..$ regressionType : chr "linear" .. .. .. .. .. ..$ regressionTypeSettings:List of 2 .. .. .. .. .. .. ..$ linear:List of 1 @@ -979,22 +901,16 @@ .. .. .. .. .. .. ..$ spline:List of 1 .. .. .. .. .. .. .. ..$ p.level: num 0.001 .. .. ..$ doubletScores :List of 2 - .. .. .. ..$ mock_sample_2_id:List of 4 - .. .. .. .. ..$ enabled : logi TRUE - .. .. .. .. ..$ auto : logi TRUE - .. .. .. .. ..$ filterSettings :List of 2 + .. .. .. ..$ mock_sample_2_id:List of 3 + .. .. .. .. ..$ enabled : logi TRUE + .. .. .. .. ..$ auto : logi TRUE + .. .. .. .. ..$ filterSettings:List of 2 .. .. .. .. .. ..$ probabilityThreshold: num 0.979 .. .. .. .. .. ..$ binStep : num 0.02 - .. .. .. .. ..$ defaultFilterSettings:List of 2 - .. .. .. .. .. ..$ probabilityThreshold: num 0.979 - .. .. .. .. .. ..$ binStep : num 0.02 - .. .. .. ..$ mock_sample_1_id:List of 4 - .. .. .. .. ..$ enabled : logi TRUE - .. .. .. .. ..$ auto : logi TRUE - .. .. .. .. ..$ filterSettings :List of 2 - .. .. .. .. .. ..$ probabilityThreshold: num 0.84 - .. .. .. .. .. ..$ binStep : num 0.02 - .. .. .. .. ..$ defaultFilterSettings:List of 2 + .. .. .. ..$ mock_sample_1_id:List of 3 + .. .. .. .. ..$ enabled : logi TRUE + .. .. .. .. ..$ auto : logi TRUE + .. .. .. .. ..$ filterSettings:List of 2 .. .. .. .. .. ..$ probabilityThreshold: num 0.84 .. .. .. .. .. ..$ binStep : num 0.02 .. .. ..$ dataIntegration :List of 2 diff --git a/pipeline-runner/tests/testthat/_snaps/gem2s/gem2s-6-mock_experiment_id-out.R b/pipeline-runner/tests/testthat/_snaps/gem2s/gem2s-6-mock_experiment_id-out.R index 3a2b9164..2e018970 100644 --- a/pipeline-runner/tests/testthat/_snaps/gem2s/gem2s-6-mock_experiment_id-out.R +++ b/pipeline-runner/tests/testthat/_snaps/gem2s/gem2s-6-mock_experiment_id-out.R @@ -49746,53 +49746,34 @@ list(data = list(), output = list(config = list(name = "mock_experiment", )), commands = list(), tools = list(flag_filtered = FALSE))), disable_qc_filters = FALSE, qc_config = list(cellSizeDistribution = list( mock_sample_2_id = list(enabled = FALSE, auto = TRUE, - filterSettings = list(minCellSize = 17, binStep = 200), - defaultFilterSettings = list(minCellSize = 17, binStep = 200)), + filterSettings = list(minCellSize = 17, binStep = 200)), mock_sample_1_id = list(enabled = FALSE, auto = TRUE, - filterSettings = list(minCellSize = 27, binStep = 200), - defaultFilterSettings = list(minCellSize = 27, binStep = 200))), + filterSettings = list(minCellSize = 27, binStep = 200))), mitochondrialContent = list(mock_sample_2_id = list(enabled = TRUE, auto = TRUE, filterSettings = list(method = "absoluteThreshold", - methodSettings = list(absoluteThreshold = list( - maxFraction = 0.52054794520547942, binStep = 0.29999999999999999))), - defaultFilterSettings = list(method = "absoluteThreshold", methodSettings = list(absoluteThreshold = list( maxFraction = 0.52054794520547942, binStep = 0.29999999999999999)))), mock_sample_1_id = list(enabled = TRUE, auto = TRUE, filterSettings = list(method = "absoluteThreshold", - methodSettings = list(absoluteThreshold = list( - maxFraction = 0.60256410256410253, binStep = 0.29999999999999999))), - defaultFilterSettings = list(method = "absoluteThreshold", methodSettings = list(absoluteThreshold = list( maxFraction = 0.60256410256410253, binStep = 0.29999999999999999))))), classifier = list(mock_sample_2_id = list(enabled = TRUE, prefiltered = FALSE, auto = TRUE, filterSettings = list( - FDR = 0.01), defaultFilterSettings = list(FDR = 0.01)), - mock_sample_1_id = list(enabled = TRUE, prefiltered = FALSE, - auto = TRUE, filterSettings = list(FDR = 0.01), - defaultFilterSettings = list(FDR = 0.01))), numGenesVsNumUmis = list( + FDR = 0.01)), mock_sample_1_id = list(enabled = TRUE, + prefiltered = FALSE, auto = TRUE, filterSettings = list( + FDR = 0.01))), numGenesVsNumUmis = list(mock_sample_2_id = list( + enabled = TRUE, auto = TRUE, filterSettings = list( + regressionType = "linear", regressionTypeSettings = list( + linear = list(p.level = 0.001), spline = list( + p.level = 0.001)))), mock_sample_1_id = list( + enabled = TRUE, auto = TRUE, filterSettings = list( + regressionType = "linear", regressionTypeSettings = list( + linear = list(p.level = 0.001), spline = list( + p.level = 0.001))))), doubletScores = list( mock_sample_2_id = list(enabled = TRUE, auto = TRUE, - filterSettings = list(regressionType = "linear", - regressionTypeSettings = list(linear = list( - p.level = 0.001), spline = list(p.level = 0.001))), - defaultFilterSettings = list(regressionType = "linear", - regressionTypeSettings = list(linear = list( - p.level = 0.001), spline = list(p.level = 0.001)))), - mock_sample_1_id = list(enabled = TRUE, auto = TRUE, - filterSettings = list(regressionType = "linear", - regressionTypeSettings = list(linear = list( - p.level = 0.001), spline = list(p.level = 0.001))), - defaultFilterSettings = list(regressionType = "linear", - regressionTypeSettings = list(linear = list( - p.level = 0.001), spline = list(p.level = 0.001))))), - doubletScores = list(mock_sample_2_id = list(enabled = TRUE, - auto = TRUE, filterSettings = list(probabilityThreshold = 0.97920405864715576, - binStep = 0.02), defaultFilterSettings = list( - probabilityThreshold = 0.97920405864715576, binStep = 0.02)), - mock_sample_1_id = list(enabled = TRUE, auto = TRUE, - filterSettings = list(probabilityThreshold = 0.83960545063018799, - binStep = 0.02), defaultFilterSettings = list( - probabilityThreshold = 0.83960545063018799, + filterSettings = list(probabilityThreshold = 0.97920405864715576, + binStep = 0.02)), mock_sample_1_id = list(enabled = TRUE, + auto = TRUE, filterSettings = list(probabilityThreshold = 0.83960545063018799, binStep = 0.02))), dataIntegration = list(dataIntegration = list( method = "harmony", methodSettings = list(seuratv4 = list( numGenes = 2000, normalisation = "logNormalize"), diff --git a/pipeline-runner/tests/testthat/_snaps/gem2s/gem2s-6-mock_experiment_id-qc_config.R b/pipeline-runner/tests/testthat/_snaps/gem2s/gem2s-6-mock_experiment_id-qc_config.R index 3cced61a..198f146c 100644 --- a/pipeline-runner/tests/testthat/_snaps/gem2s/gem2s-6-mock_experiment_id-qc_config.R +++ b/pipeline-runner/tests/testthat/_snaps/gem2s/gem2s-6-mock_experiment_id-qc_config.R @@ -1,52 +1,33 @@ qc_config <- list(cellSizeDistribution = list(mock_sample_2_id = list(enabled = FALSE, - auto = TRUE, filterSettings = list(minCellSize = 17, binStep = 200), - defaultFilterSettings = list(minCellSize = 17, binStep = 200)), + auto = TRUE, filterSettings = list(minCellSize = 17, binStep = 200)), mock_sample_1_id = list(enabled = FALSE, auto = TRUE, filterSettings = list( - minCellSize = 27, binStep = 200), defaultFilterSettings = list( minCellSize = 27, binStep = 200))), mitochondrialContent = list( mock_sample_2_id = list(enabled = TRUE, auto = TRUE, filterSettings = list( method = "absoluteThreshold", methodSettings = list(absoluteThreshold = list( - maxFraction = 0.52054794520547942, binStep = 0.29999999999999999))), - defaultFilterSettings = list(method = "absoluteThreshold", - methodSettings = list(absoluteThreshold = list(maxFraction = 0.52054794520547942, - binStep = 0.29999999999999999)))), mock_sample_1_id = list( - enabled = TRUE, auto = TRUE, filterSettings = list(method = "absoluteThreshold", - methodSettings = list(absoluteThreshold = list(maxFraction = 0.60256410256410253, - binStep = 0.29999999999999999))), defaultFilterSettings = list( - method = "absoluteThreshold", methodSettings = list( - absoluteThreshold = list(maxFraction = 0.60256410256410253, - binStep = 0.29999999999999999))))), classifier = list( - mock_sample_2_id = list(enabled = TRUE, prefiltered = FALSE, - auto = TRUE, filterSettings = list(FDR = 0.01), defaultFilterSettings = list( + maxFraction = 0.52054794520547942, binStep = 0.29999999999999999)))), + mock_sample_1_id = list(enabled = TRUE, auto = TRUE, filterSettings = list( + method = "absoluteThreshold", methodSettings = list(absoluteThreshold = list( + maxFraction = 0.60256410256410253, binStep = 0.29999999999999999))))), + classifier = list(mock_sample_2_id = list(enabled = TRUE, + prefiltered = FALSE, auto = TRUE, filterSettings = list( FDR = 0.01)), mock_sample_1_id = list(enabled = TRUE, prefiltered = FALSE, auto = TRUE, filterSettings = list( - FDR = 0.01), defaultFilterSettings = list(FDR = 0.01))), - numGenesVsNumUmis = list(mock_sample_2_id = list(enabled = TRUE, - auto = TRUE, filterSettings = list(regressionType = "linear", + FDR = 0.01))), numGenesVsNumUmis = list(mock_sample_2_id = list( + enabled = TRUE, auto = TRUE, filterSettings = list(regressionType = "linear", regressionTypeSettings = list(linear = list(p.level = 0.001), - spline = list(p.level = 0.001))), defaultFilterSettings = list( - regressionType = "linear", regressionTypeSettings = list( - linear = list(p.level = 0.001), spline = list( - p.level = 0.001)))), mock_sample_1_id = list( + spline = list(p.level = 0.001)))), mock_sample_1_id = list( enabled = TRUE, auto = TRUE, filterSettings = list(regressionType = "linear", regressionTypeSettings = list(linear = list(p.level = 0.001), - spline = list(p.level = 0.001))), defaultFilterSettings = list( - regressionType = "linear", regressionTypeSettings = list( - linear = list(p.level = 0.001), spline = list( - p.level = 0.001))))), doubletScores = list( + spline = list(p.level = 0.001))))), doubletScores = list( mock_sample_2_id = list(enabled = TRUE, auto = TRUE, filterSettings = list(probabilityThreshold = 0.97920405864715576, - binStep = 0.02), defaultFilterSettings = list( - probabilityThreshold = 0.97920405864715576, binStep = 0.02)), - mock_sample_1_id = list(enabled = TRUE, auto = TRUE, - filterSettings = list(probabilityThreshold = 0.83960545063018799, - binStep = 0.02), defaultFilterSettings = list( - probabilityThreshold = 0.83960545063018799, binStep = 0.02))), - dataIntegration = list(dataIntegration = list(method = "harmony", - methodSettings = list(seuratv4 = list(numGenes = 2000, - normalisation = "logNormalize"), unisample = list( + binStep = 0.02)), mock_sample_1_id = list(enabled = TRUE, + auto = TRUE, filterSettings = list(probabilityThreshold = 0.83960545063018799, + binStep = 0.02))), dataIntegration = list(dataIntegration = list( + method = "harmony", methodSettings = list(seuratv4 = list( numGenes = 2000, normalisation = "logNormalize"), + unisample = list(numGenes = 2000, normalisation = "logNormalize"), harmony = list(numGenes = 2000, normalisation = "logNormalize"), fastmnn = list(numGenes = 2000, normalisation = "logNormalize"))), dimensionalityReduction = list(method = "rpca", numPCs = NULL, From 1124f30324ded8b51ed631c31105d75a965c7b5d Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Mon, 26 Dec 2022 12:25:22 -0300 Subject: [PATCH 31/34] add comment --- pipeline-runner/R/handle_data.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pipeline-runner/R/handle_data.R b/pipeline-runner/R/handle_data.R index 03e24a84..a2d74631 100644 --- a/pipeline-runner/R/handle_data.R +++ b/pipeline-runner/R/handle_data.R @@ -87,6 +87,8 @@ reload_data_from_s3 <- function(pipeline_config, experiment_id, task_name, tasks integration_index <- match("dataIntegration", task_names) s3 <- paws::s3(config = pipeline_config$aws_config) + # TODO: remove if block + # this never runs, because embed and cluster runs in the worker if modified. # If the task is after data integration, we need to get scdata from processed_matrix if (match(task_name, task_names) > integration_index) { return(load_processed_scdata(s3, pipeline_config, experiment_id)) From 6fe9f8f616042125f1162914e46c27ef9fa26489 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Mon, 26 Dec 2022 12:52:46 -0300 Subject: [PATCH 32/34] address martin's comments --- pipeline-runner/NAMESPACE | 2 +- pipeline-runner/R/handle_data.R | 7 ++- pipeline-runner/R/subset-1-subset_seurat.R | 45 +++++++++---------- pipeline-runner/data-raw/sysdata.R | 2 +- pipeline-runner/man/add_new_sample_ids.Rd | 6 +-- pipeline-runner/man/add_subset_metadata.Rd | 6 +-- pipeline-runner/man/safe_cbind.Rd | 7 ++- pipeline-runner/man/subset_experiment.Rd | 2 +- ..._subset_experiment.Rd => subset_seurat.Rd} | 6 +-- 9 files changed, 44 insertions(+), 39 deletions(-) rename pipeline-runner/man/{create_subset_experiment.Rd => subset_seurat.Rd} (85%) diff --git a/pipeline-runner/NAMESPACE b/pipeline-runner/NAMESPACE index 6a1e6117..290c77cb 100644 --- a/pipeline-runner/NAMESPACE +++ b/pipeline-runner/NAMESPACE @@ -9,7 +9,6 @@ export(cbind_cellset_type) export(create_sample_id_map) export(create_scdata) export(create_seurat) -export(create_subset_experiment) export(diet_scdata) export(download_user_files) export(embed_and_cluster) @@ -54,6 +53,7 @@ export(score_doublets) export(subset_experiment) export(subset_ids) export(subset_safe) +export(subset_seurat) export(sym_to_ids) export(upload_to_aws) import(data.table) diff --git a/pipeline-runner/R/handle_data.R b/pipeline-runner/R/handle_data.R index a2d74631..6323f8bc 100644 --- a/pipeline-runner/R/handle_data.R +++ b/pipeline-runner/R/handle_data.R @@ -436,7 +436,12 @@ load_cellsets <- function(s3, pipeline_config, experiment_id) { } -#' Bind columns not failing if there's an empty data.table +#' Bind columns not creating rows if there's an empty data.table +#' +#' `cbind` on `data.table` adds a row if binding an empty data.table to a non-empty +#' one. We do not want that behavior when parsing cellsets, because it implies +#' the "creation" of a cell that does not exists (i.e. when binding scratchpad +#' cellsets slots of an experiment without custom cellsets) #' #' @param dt data.table #' @param ... columns to add diff --git a/pipeline-runner/R/subset-1-subset_seurat.R b/pipeline-runner/R/subset-1-subset_seurat.R index c25dee55..537db54c 100644 --- a/pipeline-runner/R/subset-1-subset_seurat.R +++ b/pipeline-runner/R/subset-1-subset_seurat.R @@ -15,15 +15,14 @@ #' @return list containing scdata_list, annotations and sample_id_map #' @export #' -create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { +subset_seurat <- function(input, pipeline_config, prev_out = NULL) { + parent_data <- load_parent_experiment_data(input, pipeline_config) - parent <- load_parent_experiment_data(input, pipeline_config) + subset_scdata <- subset_experiment(input, parent_data) + sample_id_map <- create_sample_id_map(unique(subset_scdata$samples)) + subset_scdata <- add_subset_metadata(input, subset_scdata, sample_id_map) - scdata <- subset_experiment(input, parent) - sample_id_map <- create_sample_id_map(unique(scdata$samples)) - scdata <- add_subset_metadata(input, scdata, sample_id_map) - - scdata_list <- Seurat::SplitObject(scdata, split.by = "samples") + subset_scdata_list <- Seurat::SplitObject(subset_scdata, split.by = "samples") # TODO: remove from here and refactor all pipeline. config <- list( @@ -37,13 +36,13 @@ create_subset_experiment <- function(input, pipeline_config, prev_out = NULL) { sampleIdMap = sample_id_map ), output = list( - scdata_list = scdata_list, - annot = scdata@misc$gene_annotations, + scdata_list = subset_scdata_list, + annot = subset_scdata@misc$gene_annotations, edrops = NULL, sample_id_map = sample_id_map, config = config, disable_qc_filters = TRUE, - parent_cellsets = parent$cellsets + parent_cellsets = parent_data$cellsets ) ) @@ -113,10 +112,10 @@ diet_scdata <- function(scdata) { #' @return subset seurat object #' @export #' -subset_experiment <- function(input, parent) { +subset_experiment <- function(input, parent_data) { # subset seurat object, remove unnecesary data - cell_ids_to_keep <- unique(parent$cellsets[key %in% input$cellSetKeys, cell_id]) - scdata <- subset_ids(parent$scdata, cell_ids_to_keep) + cell_ids_to_keep <- unique(parent_data$cellsets[key %in% input$cellSetKeys, cell_id]) + scdata <- subset_ids(parent_data$scdata, cell_ids_to_keep) scdata <- diet_scdata(scdata) return(scdata) } @@ -152,11 +151,10 @@ create_sample_id_map <- function(parent_sample_id) { #' @return SeuratObject with new sample ids #' @export #' -add_new_sample_ids <- function(scdata, sample_id_map) { - - sample_map_idx <- match(scdata$parent_samples, names(sample_id_map)) - scdata$samples <- unname(unlist(sample_id_map[sample_map_idx])) - return(scdata) +add_new_sample_ids <- function(subset_scdata, sample_id_map) { + sample_map_idx <- match(subset_scdata$parent_samples, names(sample_id_map)) + subset_scdata$samples <- unname(unlist(sample_id_map[sample_map_idx])) + return(subset_scdata) } @@ -170,12 +168,11 @@ add_new_sample_ids <- function(scdata, sample_id_map) { #' @return scdata with additional metadata #' @export #' -add_subset_metadata <- function(input, scdata, sample_id_map) { - +add_subset_metadata <- function(input, subset_scdata, sample_id_map) { # add new sample_ids, keep originals in a new variable - scdata$parent_samples <- scdata$samples - scdata <- add_new_sample_ids(scdata, sample_id_map) - scdata@misc$experimentId <- input$experimentId + subset_scdata$parent_samples <- subset_scdata$samples + subset_scdata <- add_new_sample_ids(subset_scdata, sample_id_map) + subset_scdata@misc$experimentId <- input$experimentId - return(scdata) + return(subset_scdata) } diff --git a/pipeline-runner/data-raw/sysdata.R b/pipeline-runner/data-raw/sysdata.R index 9c6e8f50..9b6a6171 100644 --- a/pipeline-runner/data-raw/sysdata.R +++ b/pipeline-runner/data-raw/sysdata.R @@ -25,7 +25,7 @@ GEM2S_TASK_LIST <- list( ) SUBSET_SEURAT_TASK_LIST <- list( - "subsetSeurat" = "create_subset_experiment", + "subsetSeurat" = "subset_seurat", "prepareExperiment" = "prepare_experiment", "uploadToAWS" = "upload_to_aws" ) diff --git a/pipeline-runner/man/add_new_sample_ids.Rd b/pipeline-runner/man/add_new_sample_ids.Rd index c1f8875b..b6c7aaaf 100644 --- a/pipeline-runner/man/add_new_sample_ids.Rd +++ b/pipeline-runner/man/add_new_sample_ids.Rd @@ -4,12 +4,12 @@ \alias{add_new_sample_ids} \title{Add new sample ids to the subset Seurat Object} \usage{ -add_new_sample_ids(scdata, sample_id_map) +add_new_sample_ids(subset_scdata, sample_id_map) } \arguments{ -\item{scdata}{Seurat Object} - \item{sample_id_map}{data.table of parent/subset sample id map} + +\item{scdata}{Seurat Object} } \value{ SeuratObject with new sample ids diff --git a/pipeline-runner/man/add_subset_metadata.Rd b/pipeline-runner/man/add_subset_metadata.Rd index d4e5c61c..688edcd5 100644 --- a/pipeline-runner/man/add_subset_metadata.Rd +++ b/pipeline-runner/man/add_subset_metadata.Rd @@ -4,15 +4,15 @@ \alias{add_subset_metadata} \title{add experiment level metadata to subset seurat object} \usage{ -add_subset_metadata(input, scdata, sample_id_map) +add_subset_metadata(input, subset_scdata, sample_id_map) } \arguments{ \item{input}{list of input params, containing the experimentId} -\item{scdata}{seurat object} - \item{sample_id_map}{list with mapping between sample_ids from parent and subset experiments} + +\item{scdata}{seurat object} } \value{ scdata with additional metadata diff --git a/pipeline-runner/man/safe_cbind.Rd b/pipeline-runner/man/safe_cbind.Rd index c9182e73..f8471e22 100644 --- a/pipeline-runner/man/safe_cbind.Rd +++ b/pipeline-runner/man/safe_cbind.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/handle_data.R \name{safe_cbind} \alias{safe_cbind} -\title{Bind columns not failing if there's an empty data.table} +\title{Bind columns not creating rows if there's an empty data.table} \usage{ safe_cbind(dt, ...) } @@ -15,5 +15,8 @@ safe_cbind(dt, ...) data.table with new columns } \description{ -Bind columns not failing if there's an empty data.table +\code{cbind} on \code{data.table} adds a row if binding an empty data.table to a non-empty +one. We do not want that behavior when parsing cellsets, because it implies +the "creation" of a cell that does not exists (i.e. when binding scratchpad +cellsets slots of an experiment without custom cellsets) } diff --git a/pipeline-runner/man/subset_experiment.Rd b/pipeline-runner/man/subset_experiment.Rd index ba3dc297..16c10453 100644 --- a/pipeline-runner/man/subset_experiment.Rd +++ b/pipeline-runner/man/subset_experiment.Rd @@ -4,7 +4,7 @@ \alias{subset_experiment} \title{Subset seurat object by the input cellset keys} \usage{ -subset_experiment(input, parent) +subset_experiment(input, parent_data) } \arguments{ \item{input}{list of input parameters, containing cellSetKeys to subset} diff --git a/pipeline-runner/man/create_subset_experiment.Rd b/pipeline-runner/man/subset_seurat.Rd similarity index 85% rename from pipeline-runner/man/create_subset_experiment.Rd rename to pipeline-runner/man/subset_seurat.Rd index cb8321ef..f1eddefc 100644 --- a/pipeline-runner/man/create_subset_experiment.Rd +++ b/pipeline-runner/man/subset_seurat.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/subset-1-subset_seurat.R -\name{create_subset_experiment} -\alias{create_subset_experiment} +\name{subset_seurat} +\alias{subset_seurat} \title{create a subset experiment} \usage{ -create_subset_experiment(input, pipeline_config, prev_out = NULL) +subset_seurat(input, pipeline_config, prev_out = NULL) } \arguments{ \item{input}{list containing: From 7d4b71b436ef3d9243fa5842d6d20a93551271fe Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Mon, 26 Dec 2022 12:56:48 -0300 Subject: [PATCH 33/34] fix docs --- pipeline-runner/NAMESPACE | 1 + pipeline-runner/R/gem2s-3-run_emptydrops.R | 6 ++++ pipeline-runner/R/init-functions.R | 6 ++-- pipeline-runner/R/qc-1-filter_emptydrops.R | 5 ++- pipeline-runner/filter_emptydrops.Rd | 0 pipeline-runner/man/compute_sample_edrops.Rd | 17 ++++++++++ pipeline-runner/man/filter_emptydrops.Rd | 34 ++++++++++++++++++++ pipeline-runner/man/run_pipeline_step.Rd | 6 ++-- 8 files changed, 68 insertions(+), 7 deletions(-) create mode 100644 pipeline-runner/filter_emptydrops.Rd create mode 100644 pipeline-runner/man/compute_sample_edrops.Rd create mode 100644 pipeline-runner/man/filter_emptydrops.Rd diff --git a/pipeline-runner/NAMESPACE b/pipeline-runner/NAMESPACE index 290c77cb..bbf2a269 100644 --- a/pipeline-runner/NAMESPACE +++ b/pipeline-runner/NAMESPACE @@ -6,6 +6,7 @@ export(add_subset_metadata) export(build_cc_gene_list) export(build_metadata_cellsets) export(cbind_cellset_type) +export(compute_sample_edrops) export(create_sample_id_map) export(create_scdata) export(create_seurat) diff --git a/pipeline-runner/R/gem2s-3-run_emptydrops.R b/pipeline-runner/R/gem2s-3-run_emptydrops.R index 462d1478..956f31a1 100644 --- a/pipeline-runner/R/gem2s-3-run_emptydrops.R +++ b/pipeline-runner/R/gem2s-3-run_emptydrops.R @@ -33,7 +33,13 @@ run_emptydrops <- function(input, pipeline_config, prev_out) { } +#' Calculate empty drops scores for sample +#' #' @param sample_counts dgCMatrix with counts for one sample. +#' +#' @return data.frame with edrops scores +#' @export +#' compute_sample_edrops <- function(sample_counts) { # check if filtered num_empty_drops <- sum(Matrix::colSums(sample_counts) < gem2s$max.empty.counts) diff --git a/pipeline-runner/R/init-functions.R b/pipeline-runner/R/init-functions.R index 414a463d..58e8aba5 100644 --- a/pipeline-runner/R/init-functions.R +++ b/pipeline-runner/R/init-functions.R @@ -187,12 +187,12 @@ run_qc_step <- function(scdata, config, tasks, task_name, cells_id, sample_id, d } -#' Run GEM2S step +#' Run pipeline step #' -#' Calls the corresponding task_name GEM2S step function. +#' Calls the corresponding `task_name` pipeline step function. #' #' The input list only contains experiment level parameters, such as project ID, -#' and sample names. It's only used for downloading user files. +#' and sample names and it's only used for downloading user files. #' #' @param task_name character #' @param input list diff --git a/pipeline-runner/R/qc-1-filter_emptydrops.R b/pipeline-runner/R/qc-1-filter_emptydrops.R index e808e33f..b2dbfbab 100644 --- a/pipeline-runner/R/qc-1-filter_emptydrops.R +++ b/pipeline-runner/R/qc-1-filter_emptydrops.R @@ -1,6 +1,9 @@ # STEP 1. Classifier filter -#' @description Filters seurat object based on mitochondrialContent +#' Filter empty droplets +#' +#' filters seurat objects based on edrops scores. +#' #' @param config list containing the following information #' - enable: true/false. Refering to apply or not the filter. #' - auto: true/false. 'True' indicates that the filter setting need to be changed depending on some sensible value (it requires diff --git a/pipeline-runner/filter_emptydrops.Rd b/pipeline-runner/filter_emptydrops.Rd new file mode 100644 index 00000000..e69de29b diff --git a/pipeline-runner/man/compute_sample_edrops.Rd b/pipeline-runner/man/compute_sample_edrops.Rd new file mode 100644 index 00000000..0919993a --- /dev/null +++ b/pipeline-runner/man/compute_sample_edrops.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gem2s-3-run_emptydrops.R +\name{compute_sample_edrops} +\alias{compute_sample_edrops} +\title{Calculate empty drops scores for sample} +\usage{ +compute_sample_edrops(sample_counts) +} +\arguments{ +\item{sample_counts}{dgCMatrix with counts for one sample.} +} +\value{ +data.frame with edrops scores +} +\description{ +Calculate empty drops scores for sample +} diff --git a/pipeline-runner/man/filter_emptydrops.Rd b/pipeline-runner/man/filter_emptydrops.Rd new file mode 100644 index 00000000..66375661 --- /dev/null +++ b/pipeline-runner/man/filter_emptydrops.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/qc-1-filter_emptydrops.R +\name{filter_emptydrops} +\alias{filter_emptydrops} +\title{Filter empty droplets} +\usage{ +filter_emptydrops( + scdata_list, + config, + sample_id, + cells_id, + task_name = "classifier", + num_cells_to_downsample = 6000 +) +} +\arguments{ +\item{config}{list containing the following information +- enable: true/false. Refering to apply or not the filter. +- auto: true/false. 'True' indicates that the filter setting need to be changed depending on some sensible value (it requires +to call generate_default_values_mitochondrialContent) +- filterSettings: slot with thresholds +- method: String. Method to be used {absoluteThreshold} +- methodSettings: List with the method as key and contain all the filterSettings for this specific method. +* absoluteThreshold: based on a cut-off threshold +- maxFraction: Float. maximun pct MT-content that we considere for a alive cell +- binStep: Float. Bin size for the histogram +* we are supposed to add more methods ....} +} +\value{ +a list with the filtered seurat object by mitochondrial content, the config and the plot values +} +\description{ +filters seurat objects based on edrops scores. +} diff --git a/pipeline-runner/man/run_pipeline_step.Rd b/pipeline-runner/man/run_pipeline_step.Rd index 924f7e33..f62d2b80 100644 --- a/pipeline-runner/man/run_pipeline_step.Rd +++ b/pipeline-runner/man/run_pipeline_step.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/init-functions.R \name{run_pipeline_step} \alias{run_pipeline_step} -\title{Run GEM2S step} +\title{Run pipeline step} \usage{ run_pipeline_step(prev_out, input, pipeline_config, tasks, task_name) } @@ -24,9 +24,9 @@ run_pipeline_step(prev_out, input, pipeline_config, tasks, task_name) list of task results } \description{ -Calls the corresponding task_name GEM2S step function. +Calls the corresponding \code{task_name} pipeline step function. } \details{ The input list only contains experiment level parameters, such as project ID, -and sample names. It's only used for downloading user files. +and sample names and it's only used for downloading user files. } From f937381d6f814d619cd421fff34818ccafa53fa8 Mon Sep 17 00:00:00 2001 From: German Beldorati Stark Date: Mon, 26 Dec 2022 13:18:21 -0300 Subject: [PATCH 34/34] fix sysdata --- pipeline-runner/R/sysdata.rda | Bin 3169 -> 3152 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/pipeline-runner/R/sysdata.rda b/pipeline-runner/R/sysdata.rda index c750a90d5513b4f53e06689742df642eb0dbfa12..819509cc1bfe8f44563a8060d604dbafe63294fd 100644 GIT binary patch delta 3151 zcmV-V46yUz7|<9FLRx4!F+o`-Q(5!=3(=7dB7gX_YkTj$!3}Rj?9Xeeo>o{6K>Y@lcW0077V>HugUng)Q4G%4w- zZ7`-WG}Q8%Jx!2JX-DdtBxqwIVH#w~fCRxZX|*<* z5xqe(Mn;1{=`?9Dm_rZ*iY6$~&}eFCq3UVnJ){sepfUge05UWH000IF`l0G%Ck`RO>k{C%Q5@r)_S^Ynl0QA335Ff%Yx zA(93dQThw2dFHMh9V4OJqocpl_n5XLA?PilCe@dU!~y2)UUk&gr?n9-;vTnCj4he9}VhPPz1vtC$p*C%;fcXwlFiGPNgAyk{-;g&Z! z^T^>fLIK<}&4ulI^5qO0KbBp>6*@)CbzySS=9UGB43#-tj-PmQTYJ(?z}(J0(@oxC z6I;#NWuZ&i)V)>S_cI2iG=DoQRI{+NL)!6p44SrSyttev9hD~h;_nF^>(#MnVn%564^&06Tn zFs0`x()_0K*9z$)e-LAY66T~F3`hXdgpy2YAV9%tAlLkOC%LI5aT;SuD?yyf$;A(Oh6Mag3zN2V5?5@0igb?SmAG ziy72wKubC;akwX4w#JiYG>PCm-M|?1>6_*j0M%Y z>fNE-7Ux`!VP;j|9In8jx5d?KQ4xX}kQ&4`M?F!aQQx+<5Xm4-7C3I=gyukrKt8@# z&Sr$sj)%NFBra@Ct7Q?E5V52YTC5Q_RW`U(V(h`Iz*lk#tABa1EPz4**70$~GqD|$ zaK}vd=*6{az zZEU6IBTL=*B7X%Ojsnx$9W3ig8cjK5MCFGgUGJqtLOOSHS#EYYME3FD!n^En%GS1} z552l_*(jqNFxGx)waaozSAE18#MQ^0!gmq5dp%!o1UV9J&1PqEh{9M-jbo9sH+41) z?5<0!XVmGfO)3tIUBGb8VT2I@ktbXl2_7(JvlrfxZGR9{gow(z=u6DyW4Ks`V;3Pp zPU)({5S-AdV~Co<&pd+*V8r)uN+FWC5oc9tl+00xLL7=n<)g~Jvr;=r`2s{z3p6-#R(BL(d0W@xVROF^83L>-QO98>7*C~Uo2=i$!8uiz8BZr1HUMn>^*s*IVbD|vgzP47PcfD0* zRD8H+t}Pa}D$xq&#vPcouk?>1uKEB zyGx`ym>zS=AxJHov!{B&+1aaWUf95!;YO;6B2DV(-PNGl7FWFrqA1}C1fvX2Q-~Il zC>o(8-#MFm!n(|rlv*1OM2AN>HPCF+Y65d(2BFn%!G?hNA+N@iEoN9|t6iYD$&;pM zmw&9%xq(gqL7@@khgMCd+y?;8ICCK7>s7zMWg!k^n5te1XFPb~ zA)g>sIy#my#>z$-jn?7Q3%HChQR~Z}Zi2wM0FXAB5*XrxYca1LhDx-tL;^rWk>E1t zr`#x;Dha@-!0qy-4D@(KCNay%{{_wUDt{Gd#*J5oXiSca_3}4#2UQ3cPz06YvqNS= zc`)hQ+}?_}RPYs++hPPn34tYky7M^bZ(EIxJ(>jeu$!V)CD8G^usPuXy8-xV++|xD z@Gd$8dxJpt=!N8Y?!?0e1d3olX_W2opNdOH1x>u?T~s|$5X&|klA&uj-a{q%6Mr^a z=WJwv0Em*aQWCW}YZb#S(XzN4vskRuqO*7t10X~|n<1QtIhK(}F_t&5OPdU|nXt<# z++m2^V%7>|bL^}%$FA{Du{QJG48=qHp#;_9cD^0HhBi&vn@w4$kYonO7@~>P1*B9r z#WL%;>ce|$ZzG~f1enqi8*CFB0Dodv=u`#(2#FYS*KvNZ6^aK#us4qGtVW!JIa9Lw z=_N(UOsac(ldbzucxLcGk4J+rVAdj>Xme`J)z8iW5i>#`G3}xM8w`LEa81 zUl{N_RxB`p7Am#tED_x4MY=lC!o>qm4Dv;`&0oo{jDKqiYu;Ie zkjHUGd7Pg~`41^Iie^xX+GjMh9DgS5waB32*7k)M%fu@0Y-u_>p1&RL8inSqAM zW<+){z<~Yln_{iyVyPI@FIGanrsR7nO z0whdu+VO^mc@7&Inm|n{-b(@IMU;-eQff9!$GqvD@mfJ%67)Ija^_+XCof*e*-fgFGZkd`emkmU*tJ`AudD z@8Pjjs-v8XbG9lT%|XQ|MHzKujC{+Za}MxVfJVS}xudPzSnwZ)UIUt~hv_3NBnMeZ z`vZCrOF8RsH%cO$@PE?=1+v85X=vXu2PzfBoH1D2GU68jlQ(u*O%$98pcE!n3!{UO z?BI?LG$161gb_)8zU{78C+k(mSw=S3&9ipvvfpdMT}f_%AeuI^9<+~Ss9{p~c^)vg zrm_J1#)L=NmaZ!@UtZBsa$Cw7*DY1TcIM;A26}72U_@5E{eM_aU`oSpvA;J4U0Yur z2w-3i(+Z95^MKYoWdmBRw!=6H%{^KNemJO%g0d6TUSy;y7~cas+mS|ADv@yufZ`Wr zQArt!ki1o`i9@5xTv@Q4r9(YZc82?a)jNq*l~q~Oee1ega>5LmsWqI{3??d^M?l*d zWMi>N1wdSaCV#p9Ho8MZm|1r1iu(ltMWaK?_ih~Z6S(W&Ht9n+^As#J6*J)}nXy1;! zz+npx%?FOb1xsYvpUgSJ;1qOq7IODyY@-V*Wz~lwfjH307s(mX48g@&Qm(x`?86@k pYrLk+Vx%M&&XKU=mb{S6VAlQbr20XCW%38H#xcu4gRC=CDr000Jn8VDe1lT#**B%i8e zW|5P_RP{YWH8k{q8UO$QVrT#W(9jW6@-T%bsP#Ql)HGm?2AX6HBPI|U8Z;R+U`;d( zBYz>IOqv)>8mHtFLMfD!@;0ZcndDQ$RP`~bqerO8pfgiJpa1{>00HU%0!2YhsLfMN zsiX8L(9jwH01W^DJwR+2SEYG5iu1`W0`jDKymfws`vLt&dTEh#1-U6_#sKq3nm_q!PY z5Lh|MAXx?y2Ukc_l6pQ5_8AzqLlQ#q*Rhb@Np^1s zT?{AY=>mso71*t}PdgI)an5F({Yg+ojACx8;Y@=RV&9D+B+OdF4M7cYHUu_AHh+XW zhSJcq2yJ%_ZcRz1hO%svKbntV$oqvU5FGw5ZO%=19ZEohbu ztuL?I=ANdNt2cA)Th;r~{82Aoet)p>iKoSS@t~4HZYa(RW&~(a>p%(w3@JlGkpzRG z6rL!F8lLKsT$&0KPj!G(BD~0h3yqZ!V;} zvtDBMp^>j50HizqM8usAlbveqh6%}o3uk%SVk44NT9tth$S-N9xHdIPleAq0ac2>8 zAz2BSnh;d7nwpDEn&`~;O9M>#Pc&M}9Ym%zQWd$taikmUBmiYXNq;6ZkRW0N2$cx~ zN&rJmw1x@+01P2uph99mAcdrx5F!Q%AtGj^Xc7Er03!I9a5OZ4A}Gs?xf?X&)?}~{ zZyG9b^N?i8-EpJm_8|c(qnfqJnLAy$Jl44Qj@U^E5y*s)q)KKZIT|5?5OD}SniIk^ zL`pVhdUPrZacD4uvwzFOiZ$yhge|6;j}Ldj-pP@S$k%aJ6N@Xey|b~OeQw6=d8?Uk zJ(m*mL7CU@OM2zf@haO3x2H(2LgabHnOnR%yz+-fKKiW@7$KPfxQ5Bsd~Lk_irs`V zkS9foVWBx7JRl!_D>FJ1qa9xG@Q}HpYSf}Jged6*W~s78uz#rE0cge9gH(uQ4=u3P zSpb9sVc_AcGr3+vAm$2h&oob>U%t98QAcYz*@p}(vdaZD`VsF_yK70-Y}>!7fT4ve za~q6tDO*~!aS)+9_Gk}V?A==HZ`afy$KZrh%UeL~j&Dz#%SbU|9rwU4 z!!9M(Cc{+uGkgQvID58n%a677T*fHb59CaX~jPu&JI_@}wI{W;&=_jQdo89U9AcrE2*{PY_ z_*o34l?c>yahy7u4D7H=tnJUb);d@nv3CK(Iff8K27g4p*LW%T4>ON1EMIgpbV6bieAjamCBZDM$MMS@gq~Q zi)&ePoe<}}_cG-cJ?g7ct78oI#YI*u(F*2_JAW~1!?LVf5fYt~#uddkcV_I8Q%9)>Ei7ahQc5wyw^d?Ho8+_g=AqH^Pnf5Ja2V(YvaH)(Gkv zk|2q&K>(0oq{SFORmOnyGB~xJlTZ~^MoP?7HXMl#jepbyVE7@g z&wnUYGQ%~R?FGhJI%au$PL|XSJ+L6?M0jcMXxnYzZw*6clpJ0Rx3{NgL#-<*F8#J3 z;ShqEmSJ79(1WKNcT`iWI8``on;dMkXx47cqi`=Z7+|B)t5)0#oVfsyII<)$#)FQd z%8#LyZ78AvAR^%j-AphtZcJoNBcQcYr194#qqK!CXlN#J@_!wm&)ZrRt$pMPXT zGJU&}q%KLv0vYhmnsLI$#)?1$M3)+nsj1PpT~0T~cT#m%HD+~z+ zOH3#k!Li063ls&kNN$s`x3fBQB!8t-Ugso`M#hkk+hCa30}{WIq<{%0L8!9j`p#HV zc0MV@*K$^EN}If0Fs{$ z0nBku`b}(2K+ZXiS?{jCl#2|EI2Oa-X7bLR(%{_wpQJulFmvcGmi8Oi-G3f7C^FO5 zJ;7ZDALQHDHX>!!s}V56W-zhhaQRKvYY3`hSPxm)<1(dffo%oBliAOsi#{GUV22nN z5nKkoe!K+AIReQ{9Kb?gLdZ_N9t#iIbDPYv?G8&{f87xSkf{B?rt7@lh&av7gB-=+ zMWPrba?xu?r8Ov?=UNPrZhs)e3%GQ77U|FY`n{KAjMma!wFd@wcNAeJBUgsDW3!{h z{Bd6}5))-ih@pdta818ZO@RS`5qt1yfz?jO4tA9p^HyeyY1~huo z%$qY#va>TjHjXt2KzM{&#j7D*UOC7if;v~U#TtB;@!}gMmc3Eioq2L>R4ghgqUmDT zF5K2bEM;{-CP;!Q@L!csPG>IYL3*p>>N?8$yuzdJyf~!@D<1Boga$p$TiYR_2$4c^ z^T|`8WI|cS|_U z%0<3o$;wqLdKpL z%bG5I=ymvV7Jt;TA$D1MCt=?mih1m-hC@#f+*^<0iK3NaclT3Q%s?p2Hl8qdH0+T( zhV9dDAtns)-$<`IEE5QDo@sKUZdowf5(zX(t<^Gy^M?xl*q3Vnu7wN4_*28Q?hLUT z1cH)wqNhKTITg^g7;n`641i(^jnp0k0D?<8^uNO#kx6+d=W8uR@YdRfiOHAT9Et}o z5WPXE5ZD>&s46JS&5=DBgt