From 8cd01ee453455597d91f7ba10976f425526b65d4 Mon Sep 17 00:00:00 2001 From: mtello <59661805+mtello22@users.noreply.github.com> Date: Fri, 8 Dec 2023 15:04:03 -0800 Subject: [PATCH] updated reactive doc version --- README.md | 15 ++- tSNEviz/data/GSE72056_cell_metadata.tsv | 4 +- tSNEviz/data/GSE72056_donor_metadata.tsv | 4 +- tSNEviz/data/GSE72056_exp_df.tsv | 4 +- tSNEviz/reactive_doc.Rmd | 114 ++++++------------ .../shinyapps.io/mtello/reactive_doc.dcf | 2 +- 6 files changed, 55 insertions(+), 88 deletions(-) diff --git a/README.md b/README.md index f99b044..9b8113c 100644 --- a/README.md +++ b/README.md @@ -29,4 +29,17 @@ The current shiny app has the following graphic elements: # Reactive version -The previous Shiny app was also updated to provide a platform to explore the dataset and filter it based on user-defined requirements. In this version, all cells labelled as "Unresolved" were removed for simplicity. +The previous Shiny app was also updated to provide a platform to explore the dataset and filter it based on user-defined requirements. The document to run the reactive version can be found at + +"./tSNEviz/reactive_doc.Rmd" + +Once the document is loaded in an Rstudio session, its possible to runt it by clicking the "Run document" button. + +The deployed version can be found at + +In this version, I only included non-malignant cells from two donors due to computational and storage limitations. by GitHub and ShinyApps.io. However, the code is able to run for the full dataset in a local computer with the following specs: + +**Processor:** 11th Gen Intel(R) Core(TM) i5-11400H \@ 2.70GHz, 2688 Mhz, 6 Core(s), 12 Logical Processor(s)\ +**Installed Physical Memory (RAM)**: 16.0 GB + +Therefore it is a good template to develop a local platform to analyze and filter single cell RNA-seq data. diff --git a/tSNEviz/data/GSE72056_cell_metadata.tsv b/tSNEviz/data/GSE72056_cell_metadata.tsv index bbfef22..69f18a7 100644 --- a/tSNEviz/data/GSE72056_cell_metadata.tsv +++ b/tSNEviz/data/GSE72056_cell_metadata.tsv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3d2c6788bff7eb22313fb156fb249f5922aa471069fbd05f7cf50a5b2743587b -size 73231 +oid sha256:82799112873b57595617e5f9ed81dd89cb3eba25e769a90965db832bb42c1fdc +size 21795 diff --git a/tSNEviz/data/GSE72056_donor_metadata.tsv b/tSNEviz/data/GSE72056_donor_metadata.tsv index ccb94d6..4628dfe 100644 --- a/tSNEviz/data/GSE72056_donor_metadata.tsv +++ b/tSNEviz/data/GSE72056_donor_metadata.tsv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43104c9473b7482712594f3c04428572ce9a59135bca3d96e98fa3c6f2dd88fd -size 205 +oid sha256:225a4f569e4fc5cff05bbef9d0fb35cd65818b8d24a589e682f4bb26fc9fd9c9 +size 168 diff --git a/tSNEviz/data/GSE72056_exp_df.tsv b/tSNEviz/data/GSE72056_exp_df.tsv index c28277e..94710db 100644 --- a/tSNEviz/data/GSE72056_exp_df.tsv +++ b/tSNEviz/data/GSE72056_exp_df.tsv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b5b12e35c8b1c06bb74adba7c0851d7f590cab9c82b78f96cfe37f96f16168d -size 94770776 +oid sha256:b8bea98e95e3aa2d55aff5e68a1b56fd431f4ae2cbca950e7ad2698b19bcf16f +size 30114643 diff --git a/tSNEviz/reactive_doc.Rmd b/tSNEviz/reactive_doc.Rmd index 95f0b7e..e2fc54c 100644 --- a/tSNEviz/reactive_doc.Rmd +++ b/tSNEviz/reactive_doc.Rmd @@ -13,6 +13,7 @@ library(ggplot2) library(scales) library(RColorBrewer) library(dplyr) +library(irlba) knitr::opts_chunk$set(echo = FALSE) ``` @@ -23,16 +24,14 @@ cell_dt <- fread("data/GSE72056_cell_metadata.tsv") cell_dt <- cell_dt[malignant != "Unresolved"] cell_dt <- cell_dt[cell != "Unresolved"] cell_dt[, tumor_id := factor(tumor_id)] +cell_dt[, malignant := NULL] cell_dt[, cell := factor(cell, levels = c("T", "B", "Macrophage", "Endothelial", "CAF", "NaturalKiller"))] -cell_dt[, malignant := factor(malignant)] - donor_dt <- fread("data/GSE72056_donor_metadata.tsv") tumors <- as.character(unique(cell_dt$tumor_id)) -malignant <- unique(cell_dt$malignant) celltypes <- unique(cell_dt$cell) ``` @@ -41,18 +40,16 @@ celltypes <- unique(cell_dt$cell) # Goal of the current Shiny document: -The current document is aimed to provide an environment to visualize the single cell RNA-seq data from the study "Dissecting the multicellular ecosystem of metastatic melanoma by single-cell RNA-seq" by Tirosh, and collaborators 2016. - -After exploring the dataset and selecting the target cells to analyse, it is possible to download the filtered dataset. +The current document is aimed to provide an proof of concept for an interactive environment to visualize the single cell RNA-seq data from the study "Dissecting the multicellular ecosystem of metastatic melanoma by single-cell RNA-seq" by Tirosh, and collaborators 2016. # Dataset description. -The single cell dataset is composed of 4,645 cells derived from 18 different donors. Each cell was further classified into different cell types based on the expression of marker genes and into malignant status based on estimated copy number variation (CNV). +The single cell dataset is composed of 4,645 cells derived from 18 different donors. Each cell was further classified into different cell types based on the expression of marker genes and into malignant status based on estimated copy number variation (CNV). Due to the file size and limitations of an online Shiny app, I only included the non-malignant cells from two tumors. ```{r} -summary(cell_dt, maxsum = 20, na.strings = "") +summary(cell_dt[, .SD, .SDcols = c("tumor_id", "cell")], maxsum = 20, na.strings = "") ``` @@ -63,22 +60,15 @@ The first step is to visualize the distribution of cells by each category of mal ```{r} -selectInput("color_var", - label = "Choose a variable to color:", - choices = c("Malignant" = "malignant", - "Celltype" = "cell"), - selected = "cell") - - color_df <- data.table( - Variable = factor(x = c("Yes", "No", levels(cell_dt$cell)), - levels = c("Yes", "No", levels(cell_dt$cell))), - Color = factor(brewer.pal(8, "Set3")) + Variable = factor(x = levels(cell_dt$cell), + levels = levels(cell_dt$cell)), + Color = factor(brewer.pal(6, "Set2")) ) data_plot <- reactive({ - plot_dt <- cell_dt[, .SD, .SDcols = c("cell_id", "tumor_id", input$color_var)] + plot_dt <- cell_dt[, .SD, .SDcols = c("cell_id", "tumor_id", "cell")] plot_dt <- merge.data.table(x = plot_dt, y = color_df, - by.x = input$color_var, by.y = "Variable", + by.x = "cell", by.y = "Variable", all.x = TRUE) }) @@ -86,8 +76,8 @@ data_plot <- reactive({ renderPlot({ plot_dt <- data_plot() fill_dt <- data.table(Color = levels(droplevels(plot_dt$Color)), - Variable = levels(unlist(plot_dt[, .SD, .SDcols = input$color_var]))) - ggplot(plot_dt, aes(x = tumor_id, fill = !!sym(input$color_var))) + + Variable = levels(unlist(plot_dt[, .SD, .SDcols = "cell"]))) + ggplot(plot_dt, aes(x = tumor_id, fill = cell)) + geom_bar(position = "fill") + labs(title = element_blank(), y = "Fraction", x = "Tumor ID") + scale_y_continuous(labels = scales::percent) + @@ -98,8 +88,8 @@ renderPlot({ renderPlot({ plot_dt <- data_plot() fill_dt <- data.table(Color = levels(droplevels(plot_dt$Color)), - Variable = levels(unlist(plot_dt[, .SD, .SDcols = input$color_var]))) - ggplot(plot_dt, aes(x = tumor_id, fill = !!sym(input$color_var))) + + Variable = levels(unlist(plot_dt[, .SD, .SDcols = "cell"]))) + ggplot(plot_dt, aes(x = tumor_id, fill = cell)) + geom_bar(position = position_dodge()) + labs(title = element_blank(), y = "Number of cells", x = "Tumor ID") + scale_y_continuous(trans = "log10", labels = scales::comma) + @@ -119,23 +109,20 @@ The following code reproduces the tSNE plot for figure 1C, however it is possibl To calculate the tSNE plot based on the selected cells jsut click on "Generate tSNE plot". Running time could take up to one minute, depending on the amount of cells included (all boxes checked). -It is also possible to color the cells by "Malignant status" or "Cell type", changing this option won't recalculate the tSNE plot. +Cells are automatically colored by estimated cell type. -Please note that the subset of cells used to generate the last tSNE will be used in the following sections. For instance, if you want to visualize variability only between T and B cells you must select the corresponding boxes and then generate the corresponding tSNE plot. This decision is aimed to mantain a consistent data subset during the rest of visualizations. +Please note that the subset of cells used to generate the last tSNE will be used in the following sections. For instance, if you want to visualize variability only between T and B cells you must select the corresponding boxes and then generate the corresponding tSNE plot. This decision is aimed to maintain a consistent data subset during the rest of visualizations. ```{r} color_df <- data.table( - Variable = factor(x = c("Yes", "No", levels(cell_dt$cell), levels(cell_dt$tumor_id)), - levels = c("Yes", "No", levels(cell_dt$cell), levels(cell_dt$tumor_id))), - Color = factor(c(brewer.pal(8, "Set3"), brewer.pal(3, "Dark2"))) + Variable = factor(x = c(levels(cell_dt$cell), levels(cell_dt$tumor_id)), + levels = c(levels(cell_dt$cell), levels(cell_dt$tumor_id))), + Color = factor(c(brewer.pal(6, "Set2"), brewer.pal(3, "Dark2")[1:2])) ) inputPanel( - checkboxGroupInput("malignant", label = "Malignant status:", - choices = c("Yes", "No"), - selected = "Yes"), checkboxGroupInput("celltypes", label = "Cell types:", choices = celltypes, selected = celltypes), checkboxGroupInput("tumor_id", label = "Tumors to include:", @@ -146,7 +133,6 @@ inputPanel( inputPanel( selectInput("colorby", label = "Color cells by:", choices = c("Tumor"= "tumor_id", - "Malignant status" = "malignant", "Cell type" = "cell"), selected = "tumor_id") ) @@ -161,7 +147,7 @@ observeEvent(input$updatePlot, { updateTsnePlot <- function() { req(updatePlotClicked()) # Only proceed if the updatePlot button is clicked - cell2plot <- cell_dt[malignant %in% input$malignant][cell %in% input$celltypes][tumor_id %in% input$tumor_id, cell_id] + cell2plot <- cell_dt[cell %in% input$celltypes][tumor_id %in% input$tumor_id, cell_id] dt_tsne <- t(as.matrix(exp_dt[, .SD, .SDcols = cell2plot])) tsne_plot <- Rtsne(dt_tsne, partial_pca = TRUE, verbose = TRUE, num_threads = 1) @@ -200,25 +186,25 @@ renderPlot({ A common approach to reduce dimensionality is to focus on genes whose expression is highly variable across the cells in the dataset. The following section allows you to visualize the expression variability using median absolute deviation (MAD) scores. - - ```{r, error = FALSE} inputPanel( selectInput("n_breaks", label = "Number of bins:", - choices = c(10, 20, 35, 50), selected = 20) + choices = c(20, 50, 100), selected = 20) ) hist_dt_reactive <- reactiveVal(NULL) sliderInput("var_threshold", label = "Select variability threshold:", - min = 0, max = 1, value = 0) + min = 0, max = 5, value = 0) observeEvent(input$updatePlot, { hist_dt_reactive(updateHist()) + all_var <- hist_dt_reactive()$exp_var updateSliderInput(session, "var_threshold", - min = min(hist_dt_reactive()$exp_var), - max = round(max(hist_dt_reactive()$exp_var), 2), - value = 0) + min = 0, + max = round(max(all_var), 2), + value = 0, + step = 0.001) output$histogram <- renderPlot({ plot_dt <- hist_dt_reactive() ggplot(plot_dt, aes(x = exp_var)) + @@ -226,15 +212,20 @@ observeEvent(input$updatePlot, { scale_y_continuous(trans = "log10", labels = scales::comma) + geom_vline(xintercept = input$var_threshold, linetype = "dashed", color = "red") + - labs(y = "Number of genes", x = "MAD") + + labs(y = "Number of genes", x = "MAD scores", + title = paste("Current threshold will conserve ~", + round((1-(ecdf(all_var)(input$var_threshold)))*100), + "% of genes", + sep = "")) + theme_classic() }) }) updateHist <- function() { - cell2plot <- cell_dt[malignant %in% input$malignant][cell %in% input$celltypes][tumor_id %in% input$tumor_id, cell_id] + cell2plot <- cell_dt[cell %in% input$celltypes][tumor_id %in% input$tumor_id, cell_id] exp_mat <- as.matrix(exp_dt[, .SD, .SDcols = cell2plot]) gene_var <- apply(exp_mat, MARGIN = 1, mad, na.rm = TRUE) + gene_var <- round(gene_var, 2) + 0.001 hist_dt <- data.table(ENSG = exp_dt$ENSG, exp_var = gene_var) return(hist_dt) @@ -245,40 +236,3 @@ plotOutput("histogram") ``` - -# Output processed data - -Now that we selected the appropriate filtering parameters, let's download the filtered data - -```{r} -# Add the action button for downloading -actionButton("downloadButton", "Download Filtered Data") - -# Add a reactive expression for filtered data -filteredData <- reactive({ - cell2plot <- cell_dt[malignant %in% input$malignant][cell %in% input$celltypes][tumor_id %in% input$tumor_id, cell_id] - exp_mat <- as.matrix(exp_dt[, .SD, .SDcols = cell2plot]) - gene_var <- apply(exp_mat, MARGIN = 1, mad, na.rm = TRUE) - index_genes <- which(gene_var > input$var_threshold) - exp_mat_filtered <- exp_mat[index_genes, ] - exp_filtered <- data.table(ENSG = exp_dt$ENSG[index_genes], exp_mat_filtered) - colnames(exp_filtered) <- c("ENSG", cell2plot) - return(exp_filtered) -}) - -# Download handler for the button -output$downloadButton <- downloadHandler( - filename = function() { - paste("filtered_data.tsv", sep = "") - }, - content = function(file) { - write.table(filteredData(), file, sep = "\t", row.names = FALSE, quote = FALSE) - } -) - - -``` - - - - diff --git a/tSNEviz/rsconnect/documents/reactive_doc.Rmd/shinyapps.io/mtello/reactive_doc.dcf b/tSNEviz/rsconnect/documents/reactive_doc.Rmd/shinyapps.io/mtello/reactive_doc.dcf index e56e182..2479fc1 100644 --- a/tSNEviz/rsconnect/documents/reactive_doc.Rmd/shinyapps.io/mtello/reactive_doc.dcf +++ b/tSNEviz/rsconnect/documents/reactive_doc.Rmd/shinyapps.io/mtello/reactive_doc.dcf @@ -5,7 +5,7 @@ account: mtello server: shinyapps.io hostUrl: https://api.shinyapps.io/v1 appId: 10748803 -bundleId: 8023592 +bundleId: 8027003 url: https://mtello.shinyapps.io/reactive_doc/ version: 1 asMultiple: FALSE