diff --git a/R/pppr_output_plot.R b/R/pppr_output_plot.R new file mode 100644 index 0000000..b90de67 --- /dev/null +++ b/R/pppr_output_plot.R @@ -0,0 +1,117 @@ +#' PPR OUTPUT PLOT +#' +#' @description +#' Plots the predicted positon of your sample. +#' +#' @param sample.gss A GSS_OBJECT of the sample you wish to plot +#' @param col The colour that you'd like +#' @param overlay set to TRUE if you would like this plot to overlay a previous plot. +#' @param label string that you would like to assign as the label to the line. +#' @param genes_of_interest The names of any genes that you'd like to include the switching point of. +#' @param switching_genes a matrix containing the switching gene information as produced by GeneSwitches. +#' +#' @return nice plot highlighting the probable position of your sample on your trajectory. +#' @export +#' +ppr_output_plot <- function(sample.gss, col = "red", overlay = FALSE, label = "sample name", genes_of_interest = NULL, switching_genes = reference.sg){ + + if (!overlay) { + plot(x = 1:100, + y = (sample.gss$sample_flat/max(sample.gss$sample_flat)*100), + ylim = c(0,110), + xlim = c(0,100), + pch = 20, + cex = 0.8, + col = col, + type = "l", + xlab = "Pseudo-Time Index", + ylab = "GSS Score", + main = paste("Predicted Positions")) + + segments(which.max(colSums(sample.gss$sample_flat)), + -3.9, + which.max(colSums(sample.gss$sample_flat)), + (sample.gss$sample_flat[which.max(colSums(sample.gss$sample_flat))]/max(sample.gss$sample_flat)*100), + lwd = 1, + lty = 2, + col = col) + + text(x = which.max(colSums(sample.gss$sample_flat)), + y = (sample.gss$sample_flat[which.max(colSums(sample.gss$sample_flat))]/max(sample.gss$sample_flat)*100), + labels = label, + col = col, + pos = 3) + + if (length(genes_of_interest) > 0) { + for (gene_name in genes_of_interest) { + segments(switching_genes[gene_name,"switch_at_timeidx"], + -3.9, + switching_genes[gene_name,"switch_at_timeidx"], + -0.5, + lwd = 1, + lty = 2) + + text(x = switching_genes[gene_name,"switch_at_timeidx"] + 3, + y = 1, + labels = gene_name, + srt = -20, + cex = 0.86, + pos = 2) + + } + } + + + + } else { + lines(x = 1:100, + y = (sample.gss$sample_flat/max(sample.gss$sample_flat)*100), + ylim = c(0,110), + xlim = c(0,100), + pch = 20, + cex = 0.8, + col = col, + type = "l") + + segments(which.max(colSums(sample.gss$sample_flat)), + -3.9, + which.max(colSums(sample.gss$sample_flat)), + (sample.gss$sample_flat[which.max(colSums(sample.gss$sample_flat))]/max(sample.gss$sample_flat)*100), + lwd = 1, + lty = 2, + col = col) + + text(x = which.max(colSums(sample.gss$sample_flat)), + y = (sample.gss$sample_flat[which.max(colSums(sample.gss$sample_flat))]/max(sample.gss$sample_flat)*100), + labels = label, + col = col, + pos = 3) + + if (length(genes_of_interest) > 0) { + for (gene_name in genes_of_interest) { + segments(switching_genes[gene_name,"switch_at_timeidx"], + -3.9, + switching_genes[gene_name,"switch_at_timeidx"], + -0.5, + lwd = 1, + lty = 2) + + text(x = switching_genes[gene_name,"switch_at_timeidx"] + 3, + y = 1, + labels = gene_name, + srt = -20, + cex = 0.86, + pos = 2) + + } + } + } +} + + + + + + + + diff --git a/R/pppr_predict_position.R b/R/pppr_predict_position.R new file mode 100644 index 0000000..83cdf81 --- /dev/null +++ b/R/pppr_predict_position.R @@ -0,0 +1,88 @@ +#' @title PPPR Predict Position +#' +#' @description +#' Produces an estimate for the position on trajectory of each gene in each cell of a sample. +#' This can be later aggregated to estimate the position of the sample along the trajectory. +#' +#' @param reduced_binary_counts_matrix a matrix of your samples binary gene expression. +#' @param reference.sg Genes which switch through the trajectory as identified by GeneSwitches. +#' +#' @return A list of matrices: A matrix for each cell where the columns represent progress through a trajectory, +#' and the rows represent genes, values indicate a likely position of the cell upon the trajectory based that genes bianrized expression. +#' @export +#' +ppr_predict_position <- function(reduced_binary_counts_matrix,reference.sg) { + + ## The final output will be a ppr_obj (list) comprised of 3 objects. + # Make the list of length 3, and name the objects + ppr_obj <- vector("list", 3) + names(ppr_obj) <- c("genomic_expression_traces","cells_flat","sample_flat") + # Assign the ppr_obj class attribute to the list + class(ppr_obj) <- "PPR_OBJECT" + + ## Reorder reference.sg + # as the code relies on the rownames and idicies of the genes in reduced_binary_counts_matrix and reference.sg matching. + reference.sg <- reference.sg[rownames(reduced_binary_counts_matrix),] + + ## Input values: + number_of_cells <- dim(reduced_binary_counts_matrix)[2] + # Should we get the number of switching genes from here? or dim(reduced_binary_counts_matrix)[1] + # It is possible for dim(reduced_binary_counts_matrix)[1] < dim(reference.sg)[1] + number_of_switching_genes <- dim(reference.sg)[1] + + + # Different datasets store this information in different columns.. watch out. + # Maybe this should be an input to the function? + # as.numeric is needed/notneeded depending on dataset. + switching_time <- as.numeric(reference.sg$switch_at_timeidx) + switching_direction <- reference.sg$direction + + # Building the genomic_expression_traces list. (faster than building it dynamically.) + all_patients_cells_scored <- vector("list", number_of_cells) + names(all_patients_cells_scored) <- colnames(reduced_binary_counts_matrix) + + # Loop through all cells making matrices for each, + #which represent likely position of a cell on a trajectory based on the expression of each gene. + for (c in 1:number_of_cells) { + # Build the matrix of 0's which has genes as rows and pseudotime indecies as columns. + genomic_expression_mat <- matrix(0, nrow = number_of_switching_genes, ncol = 100) + rownames(genomic_expression_mat) <- rownames(reduced_binary_counts_matrix) + binarized_gene_expression_for_cell_c <- reduced_binary_counts_matrix[, c] + + up_indices <- which(binarized_gene_expression_for_cell_c == 1 & switching_direction == "up") + down_indices <- which(binarized_gene_expression_for_cell_c == 0 & switching_direction == "down") + not_up_indices <- which(binarized_gene_expression_for_cell_c == 0 & switching_direction == "up") + not_down_indices <- which(binarized_gene_expression_for_cell_c == 1 & switching_direction == "down") + + for (i in up_indices) { + genomic_expression_mat[i, switching_time[i]:100] <- 1 + } + + for (i in down_indices) { + genomic_expression_mat[i, switching_time[i]:100] <- 1 + } + + for (i in not_up_indices) { + genomic_expression_mat[i, 1:switching_time[i]] <- 1 + } + + for (i in not_down_indices) { + genomic_expression_mat[i, 1:switching_time[i]] <- 1 + } + + all_patients_cells_scored[[c]] <- genomic_expression_mat + } + + ppr_obj$genomic_expression_traces <- all_patients_cells_scored + +### GENOMIC EXPRESSION TRACES CREATED +# Now flatten: + +# Use lapply to calculate column sums for each matrix +ppr_obj$cells_flat <- do.call(rbind, lapply(all_patients_cells_scored, colSums)) +rownames(ppr_obj$cells_flat) <- names(all_patients_cells_scored) +# Combine each cells column sums into a single flat matrix. +ppr_obj$sample_flat <- matrix(colSums(ppr_obj$cells_flat), nrow = 1) + +return(ppr_obj) +} diff --git a/R/pppr_racinglines_plot.R b/R/pppr_racinglines_plot.R new file mode 100644 index 0000000..c076272 --- /dev/null +++ b/R/pppr_racinglines_plot.R @@ -0,0 +1,113 @@ +#' @title Identify and Visualise each cells position. +#' +#' @description +#' Produces a plot for each cell which helps visualize how PathPinPointR is predicting the cells position. +#' +#' @param reference.sg A selection of switching genes which are evenly distributed through pseudo-time. +#' @param lines logical, do you want to include lines to inidicate the predicted position of a cell +#' @param reference_reduced a matrix of your sample's binary gene expression. +#' @param cell_id The index or name of the cell of interest +#' +#' @return Timeline plot of selected cell +#' @import ggplot2 +#' @import ggrepel +#' +#' +#' @export +#' + +ppr_timeline_plot <- function(reference.sg, lines = TRUE, reference_reduced = NULL, cell_id = 1) { + + # Convert reference.sg to a data frame + reference.sg <- as.data.frame(reference.sg) + + # Add a new column direction_num and set it to -1 + reference.sg$direction_num <- -1 + + # If "up" is present in the direction column, set direction_num to 1 + if ("up" %in% reference.sg$direction) { + reference.sg[reference.sg$direction == "up", ]$direction_num <- 1 + } + + # Generate pseudotime data based on the specified parameters + timeidx_df <- data.frame(timeidx_range = c(0, 25, 50, 75, 100), timeidx_labs = c(0, 25, 50, 75, 100)) + + + # Create the initial ggplot object with x and y aesthetics, color, and labels + timeline_plot <- ggplot(reference.sg, aes(x = switch_at_timeidx, y = pseudoR2s * direction_num, label = rownames(reference.sg))) + + geom_point(size = 1) + xlab("Time-Index") + ylab("Quality of fitting (R^2)") + + # Add the classic theme to the plot + timeline_plot <- timeline_plot + theme_classic() + + # Add a horizontal black line for the timeline + timeline_plot <- timeline_plot + geom_hline(yintercept = 0, color = "black", linewidth = 0.6) + + # Add labels for pseudotime on the plot + timeline_plot <- timeline_plot + geom_label(data = timeidx_df, aes(x = timeidx_range, y = 0, label = timeidx_labs), + size = (3), color = "black") + + # Add text labels with repulsion to avoid overlap + timeline_plot <- timeline_plot + geom_text_repel(aes(x = switch_at_timeidx, y = pseudoR2s * direction_num, label = rownames(reference.sg)), + size = 3, show.legend = FALSE) + + # Customize the theme and legend appearance + timeline_plot <- timeline_plot + theme(legend.position = "bottom", legend.title = element_blank(), legend.key.size = unit(10, "pt"), + text = element_text(size = 12)) + + +if (lines) { + + if (is.character(cell_id)) { + cell_idx <- which(colnames(reference_reduced) == cell_id) + } else if (is.numeric(cell_id)) { + # Convert numeric to integer if needed + cell_idx <- as.integer(cell_id) + } else { + stop("cell_id must be character or numeric.") + } + + ## Reorder reference.sg + # as the code relies on the rownames and idicies of the genes in reference_reduced and reference.sg matching. + reference_reduced <- reference_reduced[rownames(reference.sg),] + + # Check if row names in reference.sg match those in reference_reduced + if (!setequal(rownames(reference.sg), rownames(reference_reduced))) { + stop("Row names in reference.sg do not match those in reference_reduced") + } + + #loop through all of the genes in reference.sg. + for (g in 1:dim(reference.sg)[1]) { + # if G is NOT expressed in C, and the switch is UP , then draw the line to the right. + if ((reference_reduced[rownames(reference.sg)[g], cell_idx] == 0) && (reference.sg$direction[g] == "up")) { + timeline_plot <- timeline_plot + geom_segment(aes_string(x = 0, xend = reference.sg$switch_at_timeidx[g], y = reference.sg$pseudoR2s[g], yend = reference.sg$pseudoR2s[g]), + color = "blue", linewidth = 0.6) + } + # if G is expressed in c, and the switch is UP , then draw the line to the right. + if ((reference_reduced[rownames(reference.sg)[g], cell_idx] == 1) && (reference.sg$direction[g] == "up")) { + timeline_plot <- timeline_plot + geom_segment(aes_string(x = reference.sg$switch_at_timeidx[g], xend = 100, y = reference.sg$pseudoR2s[g], yend = reference.sg$pseudoR2s[g]), + color = "blue", linewidth = 0.6) + } + # if G is NOT expressed in C, and the switch is Down, then draw the line to the Left. + if ((reference_reduced[rownames(reference.sg)[g], cell_idx] == 0) && (reference.sg$direction[g] == "down")) { + timeline_plot <- timeline_plot + geom_segment(aes_string(x = reference.sg$switch_at_timeidx[g], xend = 100, y = -reference.sg$pseudoR2s[g], yend = -reference.sg$pseudoR2s[g]), + color = "blue", linewidth = 0.6) + } + # if G is expressed in C, and the switch is Down, then draw the line to the Left. + if ((reference_reduced[rownames(reference.sg)[g], cell_idx] == 1) && (reference.sg$direction[g] == "down")) { + timeline_plot <- timeline_plot + geom_segment(aes_string(x = 0, xend = reference.sg$switch_at_timeidx[g], y = -reference.sg$pseudoR2s[g], yend = -reference.sg$pseudoR2s[g]), + color = "blue", linewidth = 0.6) + } + } + +# Title the plot with the name of the cell +timeline_plot <- timeline_plot + ggtitle(colnames(reference_reduced)[cell_idx]) + + +} + +# Return the final plot +return(timeline_plot) + +} + diff --git a/R/pppr_timeline_plot.R b/R/pppr_timeline_plot.R new file mode 100644 index 0000000..a74beb7 --- /dev/null +++ b/R/pppr_timeline_plot.R @@ -0,0 +1,110 @@ +#' @title Identify and Visualise each cells position. +#' +#' @description +#' Produces a plot for each cell which helps visualize how GSS is predicting the cells position. +#' +#' @param reference.sg A selection of switching genes. +#' @param genomic_expression_traces Logical, Do you want to plot the lines which indicate the predicted position of the selected cell. +#' @param reduced_binary_counts_matrix a matrix of your samples binary gene expression. +#' @param cell_id The index or name of the cell of interest +#' +#' @return Timeline plot of selected cell +#' @importFrom ggplot2 ggplot +#' @importFrom ggrepel geom_text_repel +#' +#' +#' @export +ppr_timeline_plot <- function(reference.sg, genomic_expression_traces = FALSE, reduced_binary_counts_matrix = NULL, cell_id = 1) { + + # Convert reference.sg to a data frame + reference.sg <- as.data.frame(reference.sg) + + # Add a new column direction_num and set it to -1 + reference.sg$direction_num <- -1 + + # If "up" is present in the direction column, set direction_num to 1 + if ("up" %in% reference.sg$direction) { + reference.sg[reference.sg$direction == "up", ]$direction_num <- 1 + } + + # Generate pseudotime data based on the specified parameters + timeidx_df <- data.frame(timeidx_range = c(0, 25, 50, 75, 100), timeidx_labs = c(0, 25, 50, 75, 100)) + + + # Create the initial ggplot object with x and y aesthetics, color, and labels + timeline_plot <- ggplot(reference.sg, aes(x = switch_at_timeidx, y = pseudoR2s * direction_num, label = rownames(reference.sg))) + + geom_point(size = 1) + xlab("Time-Index") + ylab("Quality of fitting (R^2)") + + # Add the classic theme to the plot + timeline_plot <- timeline_plot + theme_classic() + + # Add a horizontal black line for the timeline + timeline_plot <- timeline_plot + geom_hline(yintercept = 0, color = "black", linewidth = 0.6) + + # Add labels for pseudotime on the plot + timeline_plot <- timeline_plot + geom_label(data = timeidx_df, aes(x = timeidx_range, y = 0, label = timeidx_labs), + size = (3), color = "black") + + # Add text labels with repulsion to avoid overlap + timeline_plot <- timeline_plot + geom_text_repel(aes(x = switch_at_timeidx, y = pseudoR2s * direction_num, label = rownames(reference.sg)), + size = 3, show.legend = FALSE) + + # Customize the theme and legend appearance + timeline_plot <- timeline_plot + theme(legend.position = "bottom", legend.title = element_blank(), legend.key.size = unit(10, "pt"), + text = element_text(size = 12)) + + +if (genomic_expression_traces) { + + if (is.character(cell_id)) { + cell_idx <- which(colnames(reference_reduced) == cell_id) + } else if (is.numeric(cell_id)) { + # Convert numeric to integer if needed + cell_idx <- as.integer(cell_id) + } else { + stop("cell_id must be character or numeric.") + } + + # Check if row names in reference.sg match those in reduced_binary_counts_matrix + if (!identical(rownames(reference.sg), rownames(reduced_binary_counts_matrix))) { + stop("Row names in reference.sg do not match those in reduced_binary_counts_matrix") + } + + ## Reorder reference.sg + # as the code relies on the rownames and idicies of the genes in reduced_binary_counts_matrix and reference.sg matching. + reference.sg <- reference.sg[rownames(reduced_binary_counts_matrix),] + + #loop through all of the genes in reference.sg. + for (g in 1:dim(reference.sg)[1]) { + # if G is NOT expressed in C, and the switch is UP , then draw the line to the right. + if ((reduced_binary_counts_matrix[rownames(reference.sg)[g], cell_idx] == 0) && (reference.sg$direction[g] == "up")) { + timeline_plot <- timeline_plot + geom_segment(aes_string(x = 0, xend = reference.sg$switch_at_timeidx[g], y = reference.sg$pseudoR2s[g], yend = reference.sg$pseudoR2s[g]), + color = "blue", linewidth = 0.6) + } + # if G is expressed in c, and the switch is UP , then draw the line to the right. + if ((reduced_binary_counts_matrix[rownames(reference.sg)[g], cell_idx] == 1) && (reference.sg$direction[g] == "up")) { + timeline_plot <- timeline_plot + geom_segment(aes_string(x = reference.sg$switch_at_timeidx[g], xend = 100, y = reference.sg$pseudoR2s[g], yend = reference.sg$pseudoR2s[g]), + color = "blue", linewidth = 0.6) + } + # if G is NOT expressed in C, and the switch is Down, then draw the line to the Left. + if ((reduced_binary_counts_matrix[rownames(reference.sg)[g], cell_idx] == 0) && (reference.sg$direction[g] == "down")) { + timeline_plot <- timeline_plot + geom_segment(aes_string(x = reference.sg$switch_at_timeidx[g], xend = 100, y = -reference.sg$pseudoR2s[g], yend = -reference.sg$pseudoR2s[g]), + color = "blue", linewidth = 0.6) + } + # if G is expressed in C, and the switch is Down, then draw the line to the Left. + if ((reduced_binary_counts_matrix[rownames(reference.sg)[g], cell_idx] == 1) && (reference.sg$direction[g] == "down")) { + timeline_plot <- timeline_plot + geom_segment(aes_string(x = 0, xend = reference.sg$switch_at_timeidx[g], y = -reference.sg$pseudoR2s[g], yend = -reference.sg$pseudoR2s[g]), + color = "blue", linewidth = 0.6) + } + } + +# Title the plot with the name of the cell +timeline_plot <- timeline_plot + ggtitle(colnames(reduced_binary_counts_matrix)[cell_idx]) + +} + +# Return the final plot +return(timeline_plot) + +} + diff --git a/man/create_racing_lines.Rd b/man/create_racing_lines.Rd new file mode 100644 index 0000000..415f083 --- /dev/null +++ b/man/create_racing_lines.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pppr_create_racing_lines.R +\name{create_racing_lines} +\alias{create_racing_lines} +\title{Identify the "racing lines"} +\usage{ +create_racing_lines(reduced_binary_counts_matrix, reference.sg) +} +\arguments{ +\item{reduced_binary_counts_matrix}{a matrix of your samples binary gene expression.} + +\item{reference.sg}{Genes which switch through the trajectory as identified by GeneSwitches.} +} +\value{ +A list of matrices: A matrix for each cell where the columns represent progress through a trajectory, +and the rows represent genes, values indicate a likely position of the cell upon the trajectory based that genes bianrized expression. +} +\description{ +Produces an estimate for the position on trajectory of each gene in each cell of a sample. +This can be later aggregated to estimate the position of the sample along the trajectory. +} diff --git a/man/label_transfer.Rd b/man/label_transfer.Rd new file mode 100644 index 0000000..06ef1d2 --- /dev/null +++ b/man/label_transfer.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gss_label_transfer.R +\name{label_transfer} +\alias{label_transfer} +\title{Label Transfer +To make sure that your labels in your patient data match that used in the atlas.} +\usage{ +label_transfer(query, reference) +} +\arguments{ +\item{query}{The Query object. e.g. the patient data} + +\item{reference}{The reference Seurat object. e.g the atlas data} +} +\value{ +The Query object with new predicted cell types. +} +\description{ +Label Transfer +To make sure that your labels in your patient data match that used in the atlas. +} diff --git a/man/pppr_accuracy_test.Rd b/man/pppr_accuracy_test.Rd new file mode 100644 index 0000000..43a34b7 --- /dev/null +++ b/man/pppr_accuracy_test.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pppr_accuracy_test.R +\name{pppr_accuracy_test} +\alias{pppr_accuracy_test} +\title{Accuracy Tester} +\usage{ +pppr_accuracy_test(reference.pppr, reference.gs) +} +\arguments{ +\item{reference.pppr}{A "GSS_OBJECT" as outputted by create_racing_lines} + +\item{reference.gs}{The GS object containing Pseudotime.} +} +\value{ +A score for how accurate the results are as a data frame. +} +\description{ +Use this with "sample" cells taken from the reference. It will check how close the predicted position of the cells is to the real position. +} diff --git a/man/pppr_cell_plot.Rd b/man/pppr_cell_plot.Rd new file mode 100644 index 0000000..c958dcc --- /dev/null +++ b/man/pppr_cell_plot.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pppr_cell_plot.R +\name{pppr_cell_plot} +\alias{pppr_cell_plot} +\title{GSS INDERVIDUAL CELL PLOT} +\usage{ +pppr_cell_plot( + sample.pppr, + cell_idx = 1, + col = "red", + overlay = FALSE, + genes_of_interest = NULL, + switching_genes = NULL, + true_position = FALSE, + accuracy_data = NULL +) +} +\arguments{ +\item{sample.pppr}{A PPPR_OBJECT of the sample you wish to plot} + +\item{cell_idx}{The selected cell.} + +\item{col}{The colour that you'd like} + +\item{overlay}{set to TRUE if you would like this plot to overlay a previous plot.} + +\item{genes_of_interest}{The genes that you would like to plot} + +\item{switching_genes}{The data which includes all of the switching genes} + +\item{true_position}{do you wan to plot the "true position of your cell"} + +\item{accuracy_data}{dataset which contains the accuracy data} +} +\value{ +nice plot highlighting the probable position of your sample on your trajectory. +} +\description{ +Plots the predicted positon of a chosen cell. +} diff --git a/man/pppr_output_plot.Rd b/man/pppr_output_plot.Rd new file mode 100644 index 0000000..4682cdd --- /dev/null +++ b/man/pppr_output_plot.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pppr_output_plot.R +\name{pppr_output_plot} +\alias{pppr_output_plot} +\title{PPPR OUTPUT PLOT} +\usage{ +pppr_output_plot( + sample.gss, + col = "red", + overlay = FALSE, + label = "sample name", + genes_of_interest = NULL, + switching_genes = reference.sg +) +} +\arguments{ +\item{sample.gss}{A GSS_OBJECT of the sample you wish to plot} + +\item{col}{The colour that you'd like} + +\item{overlay}{set to TRUE if you would like this plot to overlay a previous plot.} + +\item{label}{string that you would like to assign as the label to the line.} + +\item{genes_of_interest}{The names of any genes that you'd like to include the switching point of.} + +\item{switching_genes}{a matrix containing the switching gene information as produced by GeneSwitches.} +} +\value{ +nice plot highlighting the probable position of your sample on your trajectory. +} +\description{ +Plots the predicted positon of your sample. +} diff --git a/man/pppr_precision.Rd b/man/pppr_precision.Rd new file mode 100644 index 0000000..3c8d572 --- /dev/null +++ b/man/pppr_precision.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pppr_precision.R +\name{pppr_precision} +\alias{pppr_precision} +\title{pppr_precision} +\usage{ +pppr_precision(sample.gs, range = seq(0.02, 0.04, 0.005)) +} +\arguments{ +\item{sample.gs}{GeneSwitches object that you have already run binarise and GLM on.} + +\item{range}{range of r2cutoffs to use} +} +\value{ +the precision dataframe. +} +\description{ +pppr_precision +} diff --git a/man/pppr_predict_position.Rd b/man/pppr_predict_position.Rd new file mode 100644 index 0000000..afa6ee4 --- /dev/null +++ b/man/pppr_predict_position.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pppr_predict_position.R +\name{pppr_predict_position} +\alias{pppr_predict_position} +\title{PPPR Predict Position} +\usage{ +pppr_predict_position(reduced_binary_counts_matrix, reference.sg) +} +\arguments{ +\item{reduced_binary_counts_matrix}{a matrix of your samples binary gene expression.} + +\item{reference.sg}{Genes which switch through the trajectory as identified by GeneSwitches.} +} +\value{ +A list of matrices: A matrix for each cell where the columns represent progress through a trajectory, +and the rows represent genes, values indicate a likely position of the cell upon the trajectory based that genes bianrized expression. +} +\description{ +Produces an estimate for the position on trajectory of each gene in each cell of a sample. +This can be later aggregated to estimate the position of the sample along the trajectory. +} diff --git a/man/pppr_timeline_plot.Rd b/man/pppr_timeline_plot.Rd new file mode 100644 index 0000000..99055b4 --- /dev/null +++ b/man/pppr_timeline_plot.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pppr_timeline_plot.R +\name{pppr_timeline_plot} +\alias{pppr_timeline_plot} +\title{Identify and Visualise each cells position.} +\usage{ +pppr_timeline_plot( + reference.sg, + genomic_expression_traces = FALSE, + reduced_binary_counts_matrix = NULL, + cell_id = 1 +) +} +\arguments{ +\item{reference.sg}{A selection of switching genes.} + +\item{genomic_expression_traces}{Logical, Do you want to plot the lines which indicate the predicted position of the selected cell.} + +\item{reduced_binary_counts_matrix}{a matrix of your samples binary gene expression.} + +\item{cell_id}{The index or name of the cell of interest} +} +\value{ +Timeline plot of selected cell +} +\description{ +Produces a plot for each cell which helps visualize how GSS is predicting the cells position. +} diff --git a/man/print.PPPR_OBJECT.Rd b/man/print.PPPR_OBJECT.Rd new file mode 100644 index 0000000..956a5d9 --- /dev/null +++ b/man/print.PPPR_OBJECT.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pppr_utils.R +\name{print.PPPR_OBJECT} +\alias{print.PPPR_OBJECT} +\title{Print PPPR Object Summary} +\usage{ +\method{print}{PPPR_OBJECT}(x) +} +\arguments{ +\item{x}{An object of class 'PPPR_OBJECT' to be summarized.} +} +\value{ +A summary of the contents of the 'PPPR_OBJECT'. +} +\description{ +Make calling the PPPR_OBJECT nicer by printing a summary of its contents. +} diff --git a/man/re_integrate.Rd b/man/re_integrate.Rd new file mode 100644 index 0000000..abe8d47 --- /dev/null +++ b/man/re_integrate.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gss_re_integrate.R +\name{re_integrate} +\alias{re_integrate} +\title{Re Integrate +Splits a Seurat object into a list of Seurat objects to integrate, +Performs SCTransform normalization separately for each Seurat object, +Runs the PrepSCTIntegration function on the object list, +Integrates datasets.} +\usage{ +re_integrate(object, ncell_cutoff) +} +\arguments{ +\item{object}{Subsetted Seurat object that you want to re-integrate} + +\item{ncell_cutoff}{Minimum number of cells per split object} +} +\value{ +Re-integrated Seurat Object. +} +\description{ +Re Integrate +Splits a Seurat object into a list of Seurat objects to integrate, +Performs SCTransform normalization separately for each Seurat object, +Runs the PrepSCTIntegration function on the object list, +Integrates datasets. +} diff --git a/vignettes/covid.Rmd b/vignettes/covid.Rmd new file mode 100644 index 0000000..5c5abff --- /dev/null +++ b/vignettes/covid.Rmd @@ -0,0 +1,518 @@ +--- +title: "wholeanalysis" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{wholeanalysis} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## Load Libraries +```{r, eval = FALSE} +# library(data.table) +# library(Matrix) +# library(reticulate) +library(Seurat) +# library(ggplot2) +# library(patchwork) +# library(RColorBrewer) +# library(parallel) +# library(GeneSwitches) +# library(slingshot) +``` + +### Loading in the downsampled atlas and covid objects +```{r, eval = FALSE} +atlas.seu <- readRDS("~/R Packages/mini_data/covid/downsample_1000_atlas.rds") +covid.seu <- readRDS("~/R Packages/mini_data/covid/downsample_1000_covidBlood.rds") +``` + +## sctransforming the downsampled atlas object +```{r, eval = FALSE} +atlas.seu <- SCTransform(atlas.seu, verbose = FALSE) +``` + + +## loading in the individual datasets +```{r, eval = FALSE} +PTCL_POST <- readRDS("/scratch/ojlr1u20/PTCL_POST.rds") +PTCL_PRE <- readRDS("/scratch/ojlr1u20/PTCL_PRE.rds") +``` + +### subsetting them to get more cells in each sample +```{r, eval = FALSE} +PTCL_POST <- subset(PTCL_POST, subset = nFeature_RNA > 200) +PTCL_PRE <- subset(PTCL_PRE, subset = nFeature_RNA > 200) +``` + + +#Label transfer so consistency is maintained from atlas and patient sample dataset annotation +```{r, eval = FALSE} +post_anchors <- FindTransferAnchors( + reference = atlas.seu, + query = PTCL_POST, + normalization.method = "SCT", + reference.reduction = "pca", + dims = 1:50 +) + +PTCL_POST <- MapQuery( + anchorset = post_anchors, + query = PTCL_POST, + reference = atlas.seu, + refdata = list( + cell_type = "cell_type" + ), + reference.reduction = "pca" +) + +pre_anchors <- FindTransferAnchors( + reference = atlas.seu, + query = PTCL_PRE, + normalization.method = "SCT", + reference.reduction = "pca", + dims = 1:50 +) + +PTCL_PRE <- MapQuery( + anchorset = pre_anchors, + query = PTCL_PRE, + reference = atlas.seu, + refdata = list( + cell_type = "cell_type" + ), + reference.reduction = "pca" +) +``` + + +## Setting a threshold +```{r, eval = FALSE} +PTCL_POST_ss <- subset(PTCL_POST, subset = predicted.cell_type.score > 0.65) +PTCL_PRE_ss <- subset(PTCL_PRE, subset = predicted.cell_type.score > 0.65) + +PTCL_POST_ss <- NormalizeData(PTCL_POST_ss) +PTCL_POST_ss <- FindVariableFeatures(PTCL_POST_ss) +PTCL_POST_ss <- ScaleData(PTCL_POST_ss) +PTCL_POST_ss <- RunPCA(PTCL_POST_ss) +PTCL_POST_ss <- FindNeighbors(PTCL_POST_ss) +PTCL_POST_ss <- FindClusters(PTCL_POST_ss) +PTCL_POST_ss <- RunUMAP(PTCL_POST_ss, dims = 1:30, n_neighbors = 20, min_dist = 0.3) + +DimPlot(PTCL_POST_ss,reduction="umap",label=T,pt.size=1,group.by="predicted.cell_type") + +PTCL_PRE_ss <- NormalizeData(PTCL_PRE_ss) +PTCL_PRE_ss <- FindVariableFeatures(PTCL_PRE_ss) +PTCL_PRE_ss <- ScaleData(PTCL_PRE_ss) +PTCL_PRE_ss <- RunPCA(PTCL_PRE_ss) +PTCL_PRE_ss <- FindNeighbors(PTCL_PRE_ss) +PTCL_PRE_ss <- FindClusters(PTCL_PRE_ss) +PTCL_PRE_ss <- RunUMAP(PTCL_PRE_ss, dims = 1:30, n_neighbors = 20, min_dist = 0.3) + +DimPlot(PTCL_PRE_ss,reduction="umap",label=T,pt.size=1,group.by="predicted.cell_type") +``` + + +### subsetting the downsampled atlas into three T cell types that fit the exhaustion trajectory +```{r, eval = FALSE} +atlas.seu <- readRDS("/scratch/ojlr1u20/TICAtlas_downsampled_1000.rds") +seu_pr <- subset(x = atlas.seu, idents = c("Naive T cells","Cytotoxic CD8 T cells", "Terminally exhausted CD8 T cells")) +seu_sources <- SplitObject(seu_pr, split.by = "source") +``` + +## Then, loop through the list of Seurat objects and remove any that have less than 100 cells + +## Create a new list to store the filtered Seurat objects +```{r, eval = FALSE} +filtered_seu_sources <- list() +``` + +# Loop over each Seurat object in the original list +```{r, eval = FALSE} +for (i in 1:length(seu_sources)) { + # Check the number of cells in the Seurat object + num_cells <- dim(seu_sources[[i]]@assays$RNA@counts)[2] + # If the number of cells is less than 100, skip this Seurat object + if (num_cells < 100) { + next + } + # Otherwise, add the Seurat object to the filtered list + filtered_seu_sources[[length(filtered_seu_sources)+1]] <- seu_sources[[i]] +} + +for (i in 1:length(filtered_seu_sources)) { + filtered_seu_sources[[i]] = NormalizeData(filtered_seu_sources[[i]], normalization.method = "LogNormalize", + scale.factor = 10000, verbose = FALSE) + filtered_seu_sources[[i]] = FindVariableFeatures(filtered_seu_sources[[i]], selection.method = "vst", + nfeatures = 1500, verbose = FALSE) +} +``` + +## All are default settings except anchor.features +```{r, eval = FALSE} +seuAnch = FindIntegrationAnchors(filtered_seu_sources, anchor.features = 1500, + normalization.method = "LogNormalize", + reduction = "cca", dims = 1:30, + k.anchor = 5, k.filter = 200, + k.score = 30, max.features = 200) +``` + + +## Here, we integrate all genes common between snRNA and scRNA +```{r, eval = FALSE} +seu = IntegrateData(anchorset = seuAnch, dims = 1:30, + normalization.method = "LogNormalize") + +DefaultAssay(seu) = "integrated" +seu <- ScaleData(seu, verbose = FALSE) +seu <- FindVariableFeatures(seu) +seu <- RunPCA(seu, npcs = 30, verbose = FALSE) +seu <- RunUMAP(seu, reduction = "pca", dims = 1:30) +seu <- FindNeighbors(seu, reduction = "pca", dims = 1:30) +seu <- FindClusters(seu, resolution = 0.5) + +p1 <- DimPlot(seu, reduction = "umap", group.by = "cell_type") +p2 <- DimPlot(seu, reduction = "umap", label = TRUE, repel = TRUE) +p1 + p2 +``` + +### switch to SCE +```{r, eval = FALSE} +sce <- as.SingleCellExperiment(seu) +``` + + + + + + +#UMAP +## run slingshot +```{r, eval = FALSE} +sce <- slingshot(sce, clusterLabels = "cell_type", reducedDim = 'UMAP') +``` + +## check it looks ok +```{r, eval = FALSE} +summary(sce$slingPseudotime_1) +``` + +## plot +#### *MTN - not sure if we need grDevices* +```{r, eval = FALSE} +library(grDevices) +colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100) +plotcol <- colors[cut(sce$slingPseudotime_1, breaks=100)] +plot(reducedDims(sce)$UMAP, col = plotcol, pch=16, asp = 1) +lines(SlingshotDataSet(sce), lwd=2, col='black') +``` + +## create GS object +```{r, eval = FALSE} +counts <- exp(logcounts(sce)) - 1 +assay(sce, "counts") <- counts + +sce_gs <- SingleCellExperiment(assays = List(expdata = logcounts(sce))) +colData(sce_gs)$Pseudotime <- -sce$slingPseudotime_1 +reducedDims(sce_gs) <- SimpleList(UMAP = reducedDim(sce, "UMAP", withDimnames=TRUE)) +``` + +## Binarize +```{r, eval = FALSE} +bn_cutoff <- 1.5 +sce_gs <- binarize_exp(sce_gs, fix_cutoff = TRUE, binarize_cutoff = bn_cutoff) +sce_gs <- find_switch_logistic_fastglm(sce_gs, downsample = TRUE, show_warning = FALSE) +``` + +## filter top 15 best fitting switching genes among all the genes +```{r, eval = FALSE} +sg_allgenes_umap <- filter_switchgenes(sce_gs, allgenes = TRUE, topnum = 100) +``` +## plot +```{r, eval = FALSE} +plot_timeline_ggplot(sg_allgenes_umap, timedata = sce_gs$Pseudotime, txtsize = 3) +``` + +## Select an evenly distributed selection of genes from a GeneSwitches Object. +# select_evenly_distributed_switching_genes +```{r, eval = FALSE} +select_evenly_distributed_switching_genes <- function(sg_allgenes, min_time_spacing){ + + ## Sort sg_allgenes by pseudoR2s + sg_allgenes <- sg_allgenes[order(-sg_allgenes$pseudoR2s),] + + # Initialize the time value of the previously selected gene for both "up"'s and "down"'s + ups <- sg_allgenes[sg_allgenes$direction == "up", ] + prev_ups_times <- ups$switch_at_timeidx[1] + downs <- sg_allgenes[sg_allgenes$direction == "down", ] + prev_downs_times <- downs$switch_at_timeidx[1] + + # Initialize the subsetted matrix with the first "up" and "down" gene + subsetted_matrix <- downs[1, ] + subsetted_matrix <- rbind(subsetted_matrix, ups[1, ]) + + # Loop over the remaining "up" genes and add them to the subsetted matrix if they meet the criteria + for (i in 2:nrow(ups)) { + # Check if the time value of the current gene is spaced by at least min_time_spacing from all previously selected genes + if (all(abs(ups$switch_at_timeidx[i] - prev_ups_times) >= min_time_spacing)) { + # Add the current gene to the subsetted matrix + subsetted_matrix <- rbind(subsetted_matrix, ups[i, ]) + # Update the previous time values + prev_ups_times <- c(prev_ups_times, ups$switch_at_timeidx[i]) + } + } + + # Loop over the remaining "down" genes and add them to the subsetted matrix if they meet the criteria + for (i in 2:nrow(downs)) { + # Check if the time value of the current gene is spaced by at least min_time_spacing from all previously selected genes + if (all(abs(downs$switch_at_timeidx[i] - prev_downs_times) >= min_time_spacing)) { + # Add the current gene to the subsetted matrix + subsetted_matrix <- rbind(subsetted_matrix, downs[i, ]) + # Update the previous time values + prev_downs_times <- c(prev_downs_times, downs$switch_at_timeidx[i]) + } + } + + # return the subsetted matrix + gs_scorer_genes <- subsetted_matrix + return(gs_scorer_genes) +} +``` + +# Create a reduced binary expression matrix for only the selected switching genes, +# binary_counts_matrix is from the Patient DATA and gs_scorer_genes is from Atlas Data +```{r, eval = FALSE} +filter_gene_expression_for_switching_genes<-function(binary_counts_matrix, gs_scorer_genes) { + indices_of_switching_genes<-which(rownames(binary_counts_matrix) %in% gs_scorer_genes[,1]) + reduced_binary_counts_matrix<- binary_counts_matrix[indices_of_switching_genes,] + gs_scorer_genes_to_keep<-which(gs_scorer_genes[,1] %in% rownames(reduced_binary_counts_matrix)) + gs_scorer_genes<- gs_scorer_genes[gs_scorer_genes_to_keep,] + #print(gs_scorer_genes_to_keep) + returnlist<-list(reduced_binary_counts_matrix, gs_scorer_genes) + return(returnlist) +} +``` + +#### Old? +```{r, eval = FALSE} +#filtered_return<-filter_gene_expression_for_switching_genes(binary_counts_matrix, gs_scorer_genes) +#reduced_binary_counts_matrix<-filtered_return[[1]] +#gs_scorer_genes<-filtered_return[[2]] +``` + +## Identify the "racing lines" +### TODO make this ^^ comment more verbose. +```{r, eval = FALSE} +create_racing_lines<-function(reduced_binary_counts_matrix,gs_scorer_genes) { + all_patients_cells_scored<-list() + number_of_cells<-dim(reduced_binary_counts_matrix)[2] + number_of_switching_genes<-dim(gs_scorer_genes)[1] + #for each patient cell C + for (c in 1:number_of_cells){ + print(paste(c,"out of", number_of_cells, "cells")) + racing_mat<-matrix(0,nrow = number_of_switching_genes, ncol = 100) + + #for each switching gene G + for (g in 1:number_of_switching_genes){ + print(paste(g,"out of", number_of_switching_genes, "switching genes")) + #find out the switch time Gt, and direction Gd + switching_time <- as.numeric(gs_scorer_genes[g,12]) + switching_direction <- gs_scorer_genes[g,10] + #find out if its expressed Ct + is_expressed <- reduced_binary_counts_matrix[g,c] + #If Ct = TRUE + if(is_expressed == 1){ + #If Gd = UP + if(switching_direction == "up"){ + # [Gt:100] = 1 + racing_mat[g,switching_time:100]<-1 + }else{ + #If Gd = DOWN + # [0:Gt] = 1 + racing_mat[g,0:switching_time]<-1 + } + #If Ct = FALSE + }else{ + if(switching_direction == "up"){ + #If Gd = UP + #[0:Gt] = 1 + racing_mat[g,0:switching_time]<-1 + }else{ + #If Gd = DOWN + # [Gt:100] = 1 + racing_mat[g,switching_time:100]<-1 + } + } + } + all_patients_cells_scored[[c]]<-racing_mat + } + return(all_patients_cells_scored) +} +``` + +## Create list_of_cell_position_frequencies using valid indices +## Does this need to be included in a function? +```{r, eval = FALSE} +#list_of_cell_position_frequencies <- create_racing_lines(reduced_binary_counts_matrix, gs_scorer_genes) +``` + +## Combining all cells racing lines after binerization (Owen's way) +#### *TODO make the high-points into single points in order to makae the yaxis of the final plot more intuitive. * +```{r, eval = FALSE} +flatten_cell_frequencies_owen <- function(list_of_cell_position_frequencies) { + all_patient_cells_scored_flat <- list() + length_of_list <- length(list_of_cell_position_frequencies) + for(i in 1:length_of_list){ + location_of_highpoint <- as.numeric(colSums(list_of_cell_position_frequencies[[i]]) == max(colSums(list_of_cell_position_frequencies[[i]]))) + all_patient_cells_scored_flat[[i]] <- location_of_highpoint + } + flat_matrix <- do.call(rbind, all_patient_cells_scored_flat) + return(flat_matrix) +} +``` + +## Combining all cells racing lines after binerization (Owen's way) +#### *DONE: make the high-points into single points in order to mkae the yaxis of the final plot more intuitive. * +#### *this is an inelegant solution as it chooses the FIRST column index of the highpoint.* +```{r, eval = FALSE} +flatten_cell_frequencies_owen_done <- function(list_of_cell_position_frequencies) { + all_patient_cells_scored_flat <- list() + length_of_list <- length(list_of_cell_position_frequencies) + for(i in 1:length_of_list){ + location_of_highpoint <- as.numeric(colSums(list_of_cell_position_frequencies[[i]]) == max(colSums(list_of_cell_position_frequencies[[i]]))) + indices <- which(location_of_highpoint == 1)[-1] + location_of_highpoint[indices] <- 0 + all_patient_cells_scored_flat[[i]] <- location_of_highpoint + } + flat_matrix <- do.call(rbind, all_patient_cells_scored_flat) + return(flat_matrix) +} +``` + +## Combining all cells racing lines without binerization (Moi's way). +```{r, eval = FALSE} +flatten_cell_frequencies_moi <- function(list_of_cell_position_frequencies) { + #making an empty flat matrix + all_patient_cells_scored_flat <- matrix(0, nrow = 1, ncol = 100) + # + length_of_list <- length(list_of_cell_position_frequencies) + # for every cells matrix calculate the colsums and add them to the flat matrix + for (i in 1:length_of_list) { + all_patient_cells_scored_flat <- all_patient_cells_scored_flat + colSums(list_of_cell_position_frequencies[[i]]) + } + # divide the values in the flat matrix by the number of cells in an attempt to make the yaxis more informative. + # This may be the wrong approach but it shouldnt change the shape of the final plot + all_patient_cells_scored_flat <- all_patient_cells_scored_flat/length_of_list + + return(all_patient_cells_scored_flat) +} +``` + +## select_evenly_distributed_switching_genes +```{r, eval = FALSE} +gs_scorer_genes <- select_evenly_distributed_switching_genes(sg_allgenes_umap, min_time_spacing = 5) +``` +## plot +```{r, eval = FALSE} +plot_timeline_ggplot(gs_scorer_genes, timedata = sce_gs$Pseudotime, txtsize = 3) +``` + +## subsetting the pre-treatment patient data to only include the exhaustion trajectory. +```{r, eval = FALSE} +pre_T.seu <- subset(x = PTCL_PRE_ss, subset = predicted.cell_type %in% c("Naive T cells","Cytotoxic CD8 T cells", "Terminally exhausted CD8 T cells")) +``` +## subsetting the post treatment patient data to only include the exhaustion trajectory. +```{r, eval = FALSE} +post_T.seu <- subset(x = PTCL_POST_ss, subset = predicted.cell_type %in% c("Naive T cells","Cytotoxic CD8 T cells", "Terminally exhausted CD8 T cells")) +``` + +## converting the pre-treatment patient data to SCE. +```{r, eval = FALSE} +pre.sce <- as.SingleCellExperiment(pre_T.seu) +``` + +## converting the pre-treatment patient data to SCE. +```{r, eval = FALSE} +post.sce <- as.SingleCellExperiment(post_T.seu) +``` + +## convert the pre treatment patient data to a GS obj. +```{r, eval = FALSE} +gs_pre <- SingleCellExperiment(assays = List(expdata = logcounts(pre.sce))) +``` +## convert the post treatment patient data to a GS obj. +```{r, eval = FALSE} +gs_post <- SingleCellExperiment(assays = List(expdata = logcounts(post.sce))) +``` +## TODO check if this cutoff is suitable for the patient data. +```{r, eval = FALSE} +bn_cutoff <- 0.2 +# binarize pre treatment patient data +gs_pre<- binarize_exp(gs_pre, fix_cutoff = TRUE, binarize_cutoff = bn_cutoff) +# binarize post treatment patient data +gs_post<- binarize_exp(gs_post, fix_cutoff = TRUE, binarize_cutoff = bn_cutoff) + +binary_counts_matrix_pre<-assays(gs_pre)$binary +``` + +#create a reduced binary expression matrix for only the switching genes, binary counts is from the patient and gs scorer is from atlas +```{r, eval = FALSE} +filtered_return_pre<-filter_gene_expression_for_switching_genes(binary_counts_matrix_pre, gs_scorer_genes) +``` + +# MTN - I'm not sure what this does +```{r, eval = FALSE} +reduced_binary_counts_matrix_pre<-filtered_return_pre[[1]] +``` +# MTN - or this. +```{r, eval = FALSE} +gs_scorer_genes<-filtered_return_pre[[2]] +``` + +#Create list_of_cell_position_frequencies using valid indices +```{r, eval = FALSE} +list_of_cell_position_frequencies <- create_racing_lines(reduced_binary_counts_matrix_pre, gs_scorer_genes) +``` +# Combining all cells racing lines after binerization (Owen's way) +```{r, eval = FALSE} +preflat <- flatten_cell_frequencies_moi(list_of_cell_position_frequencies) +plot(colSums(preflat)) + +binary_counts_matrix_post<-assays(gs_post)$binary +``` + +#create a reduced binary expression matrix for only the switching genes, binary counts is from the patient and gs scorer is from atlas +```{r, eval = FALSE} +filtered_return_post<-filter_gene_expression_for_switching_genes(binary_counts_matrix_post, gs_scorer_genes) +``` + +# MTN - I'm not sure what this does +```{r, eval = FALSE} +reduced_binary_counts_matrix_post<-filtered_return_post[[1]] +``` + +# MTN - or this. +```{r, eval = FALSE} +gs_scorer_genes<-filtered_return_post[[2]] +``` + +#Create list_of_cell_position_frequencies using valid indices +```{r, eval = FALSE} +list_of_cell_position_frequencies <- create_racing_lines(reduced_binary_counts_matrix_post, gs_scorer_genes) +``` + +# Combining all cells racing lines after binerization and plot +```{r, eval = FALSE} +postflat <- flatten_cell_frequencies_moi(list_of_cell_position_frequencies) +plot(colSums(postflat)) +``` + +## view the differnece between patients +```{r, eval = FALSE} +patient_diff = colSums(postflat) - colSums(preflat) +plot(patient_diff) +```