diff --git a/R/pppr_output_plot.R b/R/pppr_output_plot.R
new file mode 100644
index 0000000..b90de67
--- /dev/null
+++ b/R/pppr_output_plot.R
@@ -0,0 +1,117 @@
+#' PPR OUTPUT PLOT
+#'
+#' @description
+#' Plots the predicted positon of your sample.
+#'
+#' @param sample.gss A GSS_OBJECT of the sample you wish to plot
+#' @param col The colour that you'd like
+#' @param overlay set to TRUE if you would like this plot to overlay a previous plot.
+#' @param label string that you would like to assign as the label to the line.
+#' @param genes_of_interest The names of any genes that you'd like to include the switching point of.
+#' @param switching_genes a matrix containing the switching gene information as produced by GeneSwitches.
+#'
+#' @return nice plot highlighting the probable position of your sample on your trajectory.
+#' @export
+#'
+ppr_output_plot <- function(sample.gss, col = "red", overlay = FALSE, label = "sample name", genes_of_interest = NULL, switching_genes = reference.sg){
+
+ if (!overlay) {
+  plot(x = 1:100,
+       y = (sample.gss$sample_flat/max(sample.gss$sample_flat)*100),
+       ylim = c(0,110),
+       xlim = c(0,100),
+       pch = 20,
+       cex = 0.8,
+       col = col,
+       type = "l",
+       xlab = "Pseudo-Time Index",
+       ylab = "GSS Score",
+       main = paste("Predicted Positions"))
+
+  segments(which.max(colSums(sample.gss$sample_flat)),
+           -3.9,
+           which.max(colSums(sample.gss$sample_flat)),
+           (sample.gss$sample_flat[which.max(colSums(sample.gss$sample_flat))]/max(sample.gss$sample_flat)*100),
+           lwd = 1,
+           lty = 2,
+           col = col)
+
+  text(x = which.max(colSums(sample.gss$sample_flat)),
+       y = (sample.gss$sample_flat[which.max(colSums(sample.gss$sample_flat))]/max(sample.gss$sample_flat)*100),
+       labels = label,
+       col = col,
+       pos = 3)
+
+  if (length(genes_of_interest) > 0) {
+    for (gene_name in genes_of_interest) {
+      segments(switching_genes[gene_name,"switch_at_timeidx"],
+               -3.9,
+               switching_genes[gene_name,"switch_at_timeidx"],
+               -0.5,
+               lwd = 1,
+               lty = 2)
+
+      text(x = switching_genes[gene_name,"switch_at_timeidx"] + 3,
+           y = 1,
+           labels = gene_name,
+           srt = -20,
+           cex = 0.86,
+           pos = 2)
+
+    }
+  }
+
+
+
+  } else {
+    lines(x = 1:100,
+         y = (sample.gss$sample_flat/max(sample.gss$sample_flat)*100),
+         ylim = c(0,110),
+         xlim = c(0,100),
+         pch = 20,
+         cex = 0.8,
+         col = col,
+         type = "l")
+
+    segments(which.max(colSums(sample.gss$sample_flat)),
+             -3.9,
+             which.max(colSums(sample.gss$sample_flat)),
+             (sample.gss$sample_flat[which.max(colSums(sample.gss$sample_flat))]/max(sample.gss$sample_flat)*100),
+             lwd = 1,
+             lty = 2,
+             col = col)
+
+    text(x = which.max(colSums(sample.gss$sample_flat)),
+         y = (sample.gss$sample_flat[which.max(colSums(sample.gss$sample_flat))]/max(sample.gss$sample_flat)*100),
+         labels = label,
+         col = col,
+         pos = 3)
+
+    if (length(genes_of_interest) > 0) {
+      for (gene_name in genes_of_interest) {
+        segments(switching_genes[gene_name,"switch_at_timeidx"],
+                 -3.9,
+                 switching_genes[gene_name,"switch_at_timeidx"],
+                 -0.5,
+                 lwd = 1,
+                 lty = 2)
+
+        text(x = switching_genes[gene_name,"switch_at_timeidx"] + 3,
+             y = 1,
+             labels = gene_name,
+             srt = -20,
+             cex = 0.86,
+             pos = 2)
+
+   }
+  }
+ }
+}
+
+
+
+
+
+
+
+
diff --git a/R/pppr_predict_position.R b/R/pppr_predict_position.R
new file mode 100644
index 0000000..83cdf81
--- /dev/null
+++ b/R/pppr_predict_position.R
@@ -0,0 +1,88 @@
+#' @title PPPR Predict Position
+#'
+#' @description
+#' Produces an estimate for the position on trajectory of each gene in each cell of a sample.
+#'     This can be later aggregated to estimate the position of the sample along the trajectory.
+#'
+#' @param reduced_binary_counts_matrix a matrix of your samples binary gene expression.
+#' @param reference.sg Genes which switch through the trajectory as identified by GeneSwitches.
+#'
+#' @return A list of matrices: A matrix for each cell where the columns represent progress through a trajectory,
+#'     and the rows represent genes, values indicate a likely position of the cell upon the trajectory based that genes bianrized expression.
+#' @export
+#'
+ppr_predict_position <- function(reduced_binary_counts_matrix,reference.sg) {
+
+  ## The final output will be a ppr_obj (list) comprised of 3 objects.
+  # Make the list of length 3, and name the objects
+  ppr_obj <- vector("list", 3)
+  names(ppr_obj) <- c("genomic_expression_traces","cells_flat","sample_flat")
+  # Assign the ppr_obj class attribute to the list
+  class(ppr_obj) <- "PPR_OBJECT"
+
+  ## Reorder reference.sg
+  # as the code relies on the rownames and idicies of the genes in reduced_binary_counts_matrix and reference.sg matching.
+  reference.sg <- reference.sg[rownames(reduced_binary_counts_matrix),]
+
+  ## Input values:
+  number_of_cells <- dim(reduced_binary_counts_matrix)[2]
+  # Should we get the number of switching genes from here? or dim(reduced_binary_counts_matrix)[1]
+  # It is possible for dim(reduced_binary_counts_matrix)[1] <  dim(reference.sg)[1]
+  number_of_switching_genes <- dim(reference.sg)[1]
+
+
+  # Different datasets store this information in different columns.. watch out.
+  # Maybe this should be an input to the function?
+  # as.numeric is needed/notneeded depending on dataset.
+  switching_time <- as.numeric(reference.sg$switch_at_timeidx)
+  switching_direction <- reference.sg$direction
+
+  # Building the genomic_expression_traces list. (faster than building it dynamically.)
+  all_patients_cells_scored <- vector("list", number_of_cells)
+  names(all_patients_cells_scored) <- colnames(reduced_binary_counts_matrix)
+
+  # Loop through all cells making matrices for each,
+  #which represent likely position of a cell on a trajectory based on the expression of each gene.
+  for (c in 1:number_of_cells) {
+    # Build the matrix of 0's which has genes as rows and pseudotime indecies as columns.
+   genomic_expression_mat <- matrix(0, nrow = number_of_switching_genes, ncol = 100)
+   rownames(genomic_expression_mat) <- rownames(reduced_binary_counts_matrix)
+    binarized_gene_expression_for_cell_c <- reduced_binary_counts_matrix[, c]
+
+    up_indices <- which(binarized_gene_expression_for_cell_c == 1 & switching_direction == "up")
+    down_indices <- which(binarized_gene_expression_for_cell_c == 0 & switching_direction == "down")
+    not_up_indices <- which(binarized_gene_expression_for_cell_c == 0 & switching_direction == "up")
+    not_down_indices <- which(binarized_gene_expression_for_cell_c == 1 & switching_direction == "down")
+
+    for (i in up_indices) {
+      genomic_expression_mat[i, switching_time[i]:100] <- 1
+    }
+
+    for (i in down_indices) {
+      genomic_expression_mat[i, switching_time[i]:100] <- 1
+    }
+
+    for (i in not_up_indices) {
+      genomic_expression_mat[i, 1:switching_time[i]] <- 1
+    }
+
+    for (i in not_down_indices) {
+      genomic_expression_mat[i, 1:switching_time[i]] <- 1
+    }
+
+    all_patients_cells_scored[[c]] <- genomic_expression_mat
+  }
+
+  ppr_obj$genomic_expression_traces <- all_patients_cells_scored
+
+### GENOMIC EXPRESSION TRACES CREATED
+# Now flatten:
+
+# Use lapply to calculate column sums for each matrix
+ppr_obj$cells_flat <- do.call(rbind, lapply(all_patients_cells_scored, colSums))
+rownames(ppr_obj$cells_flat) <- names(all_patients_cells_scored)
+# Combine each cells column sums into a single flat matrix.
+ppr_obj$sample_flat <- matrix(colSums(ppr_obj$cells_flat), nrow = 1)
+
+return(ppr_obj)
+}
diff --git a/R/pppr_racinglines_plot.R b/R/pppr_racinglines_plot.R
new file mode 100644
index 0000000..c076272
--- /dev/null
+++ b/R/pppr_racinglines_plot.R
@@ -0,0 +1,113 @@
+#' @title Identify and Visualise each cells position.
+#'
+#' @description
+#' Produces a plot for each cell which helps visualize how PathPinPointR is predicting the cells position.
+#'
+#' @param reference.sg A selection of switching genes which are evenly distributed through pseudo-time.
+#' @param lines logical, do you want to include lines to inidicate the predicted position of a cell
+#' @param reference_reduced a matrix of your sample's binary gene expression.
+#' @param cell_id The index or name of the cell of interest
+#'
+#' @return Timeline plot of selected cell
+#' @import ggplot2
+#' @import ggrepel
+#'
+#'
+#' @export
+#'
+
+ppr_timeline_plot <- function(reference.sg, lines = TRUE, reference_reduced = NULL, cell_id = 1) {
+
+  # Convert reference.sg to a data frame
+  reference.sg <- as.data.frame(reference.sg)
+
+  # Add a new column direction_num and set it to -1
+  reference.sg$direction_num <- -1
+
+  # If "up" is present in the direction column, set direction_num to 1
+  if ("up" %in% reference.sg$direction) {
+    reference.sg[reference.sg$direction == "up", ]$direction_num <- 1
+  }
+
+  # Generate pseudotime data based on the specified parameters
+    timeidx_df <- data.frame(timeidx_range = c(0, 25, 50, 75, 100), timeidx_labs = c(0, 25, 50, 75, 100))
+
+
+  # Create the initial ggplot object with x and y aesthetics, color, and labels
+  timeline_plot <- ggplot(reference.sg, aes(x = switch_at_timeidx, y = pseudoR2s * direction_num, label = rownames(reference.sg))) +
+    geom_point(size = 1) + xlab("Time-Index") + ylab("Quality of fitting (R^2)")
+
+  # Add the classic theme to the plot
+  timeline_plot <- timeline_plot + theme_classic()
+
+  # Add a horizontal black line for the timeline
+  timeline_plot <- timeline_plot + geom_hline(yintercept = 0, color = "black", linewidth = 0.6)
+
+  # Add labels for pseudotime on the plot
+  timeline_plot <- timeline_plot + geom_label(data = timeidx_df, aes(x = timeidx_range, y = 0, label = timeidx_labs),
+                                    size = (3), color = "black")
+
+  # Add text labels with repulsion to avoid overlap
+  timeline_plot <- timeline_plot + geom_text_repel(aes(x = switch_at_timeidx, y = pseudoR2s * direction_num, label = rownames(reference.sg)),
+                                         size = 3, show.legend = FALSE)
+
+  # Customize the theme and legend appearance
+  timeline_plot <- timeline_plot + theme(legend.position = "bottom", legend.title = element_blank(), legend.key.size = unit(10, "pt"),
+                               text = element_text(size = 12))
+
+
+if (lines) {
+
+  if (is.character(cell_id)) {
+    cell_idx <- which(colnames(reference_reduced) == cell_id)
+  } else if (is.numeric(cell_id)) {
+    # Convert numeric to integer if needed
+    cell_idx <- as.integer(cell_id)
+  } else {
+    stop("cell_id must be character or numeric.")
+  }
+
+  ## Reorder reference.sg
+  # as the code relies on the rownames and idicies of the genes in reference_reduced and reference.sg matching.
+  reference_reduced <- reference_reduced[rownames(reference.sg),]
+
+  # Check if row names in reference.sg match those in reference_reduced
+  if (!setequal(rownames(reference.sg), rownames(reference_reduced))) {
+    stop("Row names in reference.sg do not match those in reference_reduced")
+  }
+
+  #loop through all of the genes in reference.sg.
+    for (g in 1:dim(reference.sg)[1]) {
+      # if G is NOT expressed in  C, and the switch is UP  , then draw the line to the right.
+      if ((reference_reduced[rownames(reference.sg)[g], cell_idx] == 0) && (reference.sg$direction[g] == "up")) {
+        timeline_plot <- timeline_plot + geom_segment(aes_string(x = 0, xend = reference.sg$switch_at_timeidx[g], y = reference.sg$pseudoR2s[g], yend = reference.sg$pseudoR2s[g]),
+                                                      color = "blue", linewidth = 0.6)
+      }
+      # if G is expressed in      c, and the switch is UP  , then draw the line to the right.
+      if ((reference_reduced[rownames(reference.sg)[g], cell_idx] == 1) && (reference.sg$direction[g] == "up")) {
+        timeline_plot <- timeline_plot + geom_segment(aes_string(x = reference.sg$switch_at_timeidx[g], xend = 100, y = reference.sg$pseudoR2s[g], yend = reference.sg$pseudoR2s[g]),
+                                                      color = "blue", linewidth = 0.6)
+      }
+      # if G is NOT expressed in  C, and the switch is Down, then draw the line to the Left.
+      if ((reference_reduced[rownames(reference.sg)[g], cell_idx] == 0) && (reference.sg$direction[g] == "down")) {
+        timeline_plot <- timeline_plot + geom_segment(aes_string(x = reference.sg$switch_at_timeidx[g], xend = 100, y = -reference.sg$pseudoR2s[g], yend = -reference.sg$pseudoR2s[g]),
+                                                      color = "blue", linewidth = 0.6)
+      }
+      # if G is expressed in      C, and the switch is Down, then draw the line to the Left.
+      if ((reference_reduced[rownames(reference.sg)[g], cell_idx] == 1) && (reference.sg$direction[g] == "down")) {
+        timeline_plot <- timeline_plot + geom_segment(aes_string(x = 0, xend = reference.sg$switch_at_timeidx[g], y = -reference.sg$pseudoR2s[g], yend = -reference.sg$pseudoR2s[g]),
+                                                      color = "blue", linewidth = 0.6)
+      }
+    }
+
+# Title the plot with the name of the cell
+timeline_plot <- timeline_plot + ggtitle(colnames(reference_reduced)[cell_idx])
+
+
+}
+
+# Return the final plot
+return(timeline_plot)
+
+}
+
diff --git a/R/pppr_timeline_plot.R b/R/pppr_timeline_plot.R
new file mode 100644
index 0000000..a74beb7
--- /dev/null
+++ b/R/pppr_timeline_plot.R
@@ -0,0 +1,110 @@
+#' @title Identify and Visualise each cells position.
+#'
+#' @description
+#' Produces a plot for each cell which helps visualize how GSS is predicting the cells position.
+#'
+#' @param reference.sg A selection of switching genes.
+#' @param genomic_expression_traces Logical, Do you want to plot the lines which indicate the predicted position of the selected cell.
+#' @param reduced_binary_counts_matrix a matrix of your samples binary gene expression.
+#' @param cell_id The index or name of the cell of interest
+#'
+#' @return Timeline plot of selected cell
+#' @importFrom ggplot2 ggplot
+#' @importFrom ggrepel geom_text_repel
+#'
+#'
+#' @export
+ppr_timeline_plot <- function(reference.sg, genomic_expression_traces = FALSE, reduced_binary_counts_matrix = NULL, cell_id = 1) {
+
+  # Convert reference.sg to a data frame
+  reference.sg <- as.data.frame(reference.sg)
+
+  # Add a new column direction_num and set it to -1
+  reference.sg$direction_num <- -1
+
+  # If "up" is present in the direction column, set direction_num to 1
+  if ("up" %in% reference.sg$direction) {
+    reference.sg[reference.sg$direction == "up", ]$direction_num <- 1
+  }
+
+  # Generate pseudotime data based on the specified parameters
+    timeidx_df <- data.frame(timeidx_range = c(0, 25, 50, 75, 100), timeidx_labs = c(0, 25, 50, 75, 100))
+
+
+  # Create the initial ggplot object with x and y aesthetics, color, and labels
+  timeline_plot <- ggplot(reference.sg, aes(x = switch_at_timeidx, y = pseudoR2s * direction_num, label = rownames(reference.sg))) +
+    geom_point(size = 1) + xlab("Time-Index") + ylab("Quality of fitting (R^2)")
+
+  # Add the classic theme to the plot
+  timeline_plot <- timeline_plot + theme_classic()
+
+  # Add a horizontal black line for the timeline
+  timeline_plot <- timeline_plot + geom_hline(yintercept = 0, color = "black", linewidth = 0.6)
+
+  # Add labels for pseudotime on the plot
+  timeline_plot <- timeline_plot + geom_label(data = timeidx_df, aes(x = timeidx_range, y = 0, label = timeidx_labs),
+                                    size = (3), color = "black")
+
+  # Add text labels with repulsion to avoid overlap
+  timeline_plot <- timeline_plot + geom_text_repel(aes(x = switch_at_timeidx, y = pseudoR2s * direction_num, label = rownames(reference.sg)),
+                                         size = 3, show.legend = FALSE)
+
+  # Customize the theme and legend appearance
+  timeline_plot <- timeline_plot + theme(legend.position = "bottom", legend.title = element_blank(), legend.key.size = unit(10, "pt"),
+                               text = element_text(size = 12))
+
+
+if (genomic_expression_traces) {
+
+  if (is.character(cell_id)) {
+    cell_idx <- which(colnames(reference_reduced) == cell_id)
+  } else if (is.numeric(cell_id)) {
+    # Convert numeric to integer if needed
+    cell_idx <- as.integer(cell_id)
+  } else {
+    stop("cell_id must be character or numeric.")
+  }
+
+  # Check if row names in reference.sg match those in reduced_binary_counts_matrix
+  if (!identical(rownames(reference.sg), rownames(reduced_binary_counts_matrix))) {
+    stop("Row names in reference.sg do not match those in reduced_binary_counts_matrix")
+  }
+
+  ## Reorder reference.sg
+  # as the code relies on the rownames and idicies of the genes in reduced_binary_counts_matrix and reference.sg matching.
+  reference.sg <- reference.sg[rownames(reduced_binary_counts_matrix),]
+
+  #loop through all of the genes in reference.sg.
+    for (g in 1:dim(reference.sg)[1]) {
+      # if G is NOT expressed in  C, and the switch is UP  , then draw the line to the right.
+      if ((reduced_binary_counts_matrix[rownames(reference.sg)[g], cell_idx] == 0) && (reference.sg$direction[g] == "up")) {
+        timeline_plot <- timeline_plot + geom_segment(aes_string(x = 0, xend = reference.sg$switch_at_timeidx[g], y = reference.sg$pseudoR2s[g], yend = reference.sg$pseudoR2s[g]),
+                                                      color = "blue", linewidth = 0.6)
+      }
+      # if G is expressed in      c, and the switch is UP  , then draw the line to the right.
+      if ((reduced_binary_counts_matrix[rownames(reference.sg)[g], cell_idx] == 1) && (reference.sg$direction[g] == "up")) {
+        timeline_plot <- timeline_plot + geom_segment(aes_string(x = reference.sg$switch_at_timeidx[g], xend = 100, y = reference.sg$pseudoR2s[g], yend = reference.sg$pseudoR2s[g]),
+                                                      color = "blue", linewidth = 0.6)
+      }
+      # if G is NOT expressed in  C, and the switch is Down, then draw the line to the Left.
+      if ((reduced_binary_counts_matrix[rownames(reference.sg)[g], cell_idx] == 0) && (reference.sg$direction[g] == "down")) {
+        timeline_plot <- timeline_plot + geom_segment(aes_string(x = reference.sg$switch_at_timeidx[g], xend = 100, y = -reference.sg$pseudoR2s[g], yend = -reference.sg$pseudoR2s[g]),
+                                                      color = "blue", linewidth = 0.6)
+      }
+      # if G is expressed in      C, and the switch is Down, then draw the line to the Left.
+      if ((reduced_binary_counts_matrix[rownames(reference.sg)[g], cell_idx] == 1) && (reference.sg$direction[g] == "down")) {
+        timeline_plot <- timeline_plot + geom_segment(aes_string(x = 0, xend = reference.sg$switch_at_timeidx[g], y = -reference.sg$pseudoR2s[g], yend = -reference.sg$pseudoR2s[g]),
+                                                      color = "blue", linewidth = 0.6)
+      }
+    }
+
+# Title the plot with the name of the cell
+timeline_plot <- timeline_plot + ggtitle(colnames(reduced_binary_counts_matrix)[cell_idx])
+
+}
+
+# Return the final plot
+return(timeline_plot)
+
+}
+
diff --git a/man/create_racing_lines.Rd b/man/create_racing_lines.Rd
new file mode 100644
index 0000000..415f083
--- /dev/null
+++ b/man/create_racing_lines.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pppr_create_racing_lines.R
+\name{create_racing_lines}
+\alias{create_racing_lines}
+\title{Identify the "racing lines"}
+\usage{
+create_racing_lines(reduced_binary_counts_matrix, reference.sg)
+}
+\arguments{
+\item{reduced_binary_counts_matrix}{a matrix of your samples binary gene expression.}
+
+\item{reference.sg}{Genes which switch through the trajectory as identified by GeneSwitches.}
+}
+\value{
+A list of matrices: A matrix for each cell where the columns represent progress through a trajectory,
+and the rows represent genes, values indicate a likely position of the cell upon the trajectory based that genes bianrized expression.
+}
+\description{
+Produces an estimate for the position on trajectory of each gene in each cell of a sample.
+This can be later aggregated to estimate the position of the sample along the trajectory.
+}
diff --git a/man/label_transfer.Rd b/man/label_transfer.Rd
new file mode 100644
index 0000000..06ef1d2
--- /dev/null
+++ b/man/label_transfer.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/gss_label_transfer.R
+\name{label_transfer}
+\alias{label_transfer}
+\title{Label Transfer
+To make sure that your labels in your patient data match that used in the atlas.}
+\usage{
+label_transfer(query, reference)
+}
+\arguments{
+\item{query}{The Query object. e.g. the patient data}
+
+\item{reference}{The reference Seurat object. e.g the atlas data}
+}
+\value{
+The Query object with new predicted cell types.
+}
+\description{
+Label Transfer
+To make sure that your labels in your patient data match that used in the atlas.
+}
diff --git a/man/pppr_accuracy_test.Rd b/man/pppr_accuracy_test.Rd
new file mode 100644
index 0000000..43a34b7
--- /dev/null
+++ b/man/pppr_accuracy_test.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pppr_accuracy_test.R
+\name{pppr_accuracy_test}
+\alias{pppr_accuracy_test}
+\title{Accuracy Tester}
+\usage{
+pppr_accuracy_test(reference.pppr, reference.gs)
+}
+\arguments{
+\item{reference.pppr}{A "GSS_OBJECT" as outputted by create_racing_lines}
+
+\item{reference.gs}{The GS object containing Pseudotime.}
+}
+\value{
+A score for how accurate the results are as a data frame.
+}
+\description{
+Use this with "sample" cells taken from the reference. It will check how close the predicted position of the cells is to the real position.
+}
diff --git a/man/pppr_cell_plot.Rd b/man/pppr_cell_plot.Rd
new file mode 100644
index 0000000..c958dcc
--- /dev/null
+++ b/man/pppr_cell_plot.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pppr_cell_plot.R
+\name{pppr_cell_plot}
+\alias{pppr_cell_plot}
+\title{GSS INDERVIDUAL CELL PLOT}
+\usage{
+pppr_cell_plot(
+  sample.pppr,
+  cell_idx = 1,
+  col = "red",
+  overlay = FALSE,
+  genes_of_interest = NULL,
+  switching_genes = NULL,
+  true_position = FALSE,
+  accuracy_data = NULL
+)
+}
+\arguments{
+\item{sample.pppr}{A PPPR_OBJECT of the sample you wish to plot}
+
+\item{cell_idx}{The selected cell.}
+
+\item{col}{The colour that you'd like}
+
+\item{overlay}{set to TRUE if you would like this plot to overlay a previous plot.}
+
+\item{genes_of_interest}{The genes that you would like to plot}
+
+\item{switching_genes}{The data which includes all of the switching genes}
+
+\item{true_position}{do you wan to plot the "true position of your cell"}
+
+\item{accuracy_data}{dataset which contains the accuracy data}
+}
+\value{
+nice plot highlighting the probable position of your sample on your trajectory.
+}
+\description{
+Plots the predicted positon of a chosen cell.
+}
diff --git a/man/pppr_output_plot.Rd b/man/pppr_output_plot.Rd
new file mode 100644
index 0000000..4682cdd
--- /dev/null
+++ b/man/pppr_output_plot.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pppr_output_plot.R
+\name{pppr_output_plot}
+\alias{pppr_output_plot}
+\title{PPPR OUTPUT PLOT}
+\usage{
+pppr_output_plot(
+  sample.gss,
+  col = "red",
+  overlay = FALSE,
+  label = "sample name",
+  genes_of_interest = NULL,
+  switching_genes = reference.sg
+)
+}
+\arguments{
+\item{sample.gss}{A GSS_OBJECT of the sample you wish to plot}
+
+\item{col}{The colour that you'd like}
+
+\item{overlay}{set to TRUE if you would like this plot to overlay a previous plot.}
+
+\item{label}{string that you would like to assign as the label to the line.}
+
+\item{genes_of_interest}{The names of any genes that you'd like to include the switching point of.}
+
+\item{switching_genes}{a matrix containing the switching gene information as produced by GeneSwitches.}
+}
+\value{
+nice plot highlighting the probable position of your sample on your trajectory.
+}
+\description{
+Plots the predicted positon of your sample.
+}
diff --git a/man/pppr_precision.Rd b/man/pppr_precision.Rd
new file mode 100644
index 0000000..3c8d572
--- /dev/null
+++ b/man/pppr_precision.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pppr_precision.R
+\name{pppr_precision}
+\alias{pppr_precision}
+\title{pppr_precision}
+\usage{
+pppr_precision(sample.gs, range = seq(0.02, 0.04, 0.005))
+}
+\arguments{
+\item{sample.gs}{GeneSwitches object that you have already run binarise and GLM on.}
+
+\item{range}{range of r2cutoffs to use}
+}
+\value{
+the precision dataframe.
+}
+\description{
+pppr_precision
+}
diff --git a/man/pppr_predict_position.Rd b/man/pppr_predict_position.Rd
new file mode 100644
index 0000000..afa6ee4
--- /dev/null
+++ b/man/pppr_predict_position.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pppr_predict_position.R
+\name{pppr_predict_position}
+\alias{pppr_predict_position}
+\title{PPPR Predict Position}
+\usage{
+pppr_predict_position(reduced_binary_counts_matrix, reference.sg)
+}
+\arguments{
+\item{reduced_binary_counts_matrix}{a matrix of your samples binary gene expression.}
+
+\item{reference.sg}{Genes which switch through the trajectory as identified by GeneSwitches.}
+}
+\value{
+A list of matrices: A matrix for each cell where the columns represent progress through a trajectory,
+and the rows represent genes, values indicate a likely position of the cell upon the trajectory based that genes bianrized expression.
+}
+\description{
+Produces an estimate for the position on trajectory of each gene in each cell of a sample.
+This can be later aggregated to estimate the position of the sample along the trajectory.
+}
diff --git a/man/pppr_timeline_plot.Rd b/man/pppr_timeline_plot.Rd
new file mode 100644
index 0000000..99055b4
--- /dev/null
+++ b/man/pppr_timeline_plot.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pppr_timeline_plot.R
+\name{pppr_timeline_plot}
+\alias{pppr_timeline_plot}
+\title{Identify and Visualise each cells position.}
+\usage{
+pppr_timeline_plot(
+  reference.sg,
+  genomic_expression_traces = FALSE,
+  reduced_binary_counts_matrix = NULL,
+  cell_id = 1
+)
+}
+\arguments{
+\item{reference.sg}{A selection of switching genes.}
+
+\item{genomic_expression_traces}{Logical, Do you want to plot the lines which indicate the predicted position of the selected cell.}
+
+\item{reduced_binary_counts_matrix}{a matrix of your samples binary gene expression.}
+
+\item{cell_id}{The index or name of the cell of interest}
+}
+\value{
+Timeline plot of selected cell
+}
+\description{
+Produces a plot for each cell which helps visualize how GSS is predicting the cells position.
+}
diff --git a/man/print.PPPR_OBJECT.Rd b/man/print.PPPR_OBJECT.Rd
new file mode 100644
index 0000000..956a5d9
--- /dev/null
+++ b/man/print.PPPR_OBJECT.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pppr_utils.R
+\name{print.PPPR_OBJECT}
+\alias{print.PPPR_OBJECT}
+\title{Print PPPR Object Summary}
+\usage{
+\method{print}{PPPR_OBJECT}(x)
+}
+\arguments{
+\item{x}{An object of class 'PPPR_OBJECT' to be summarized.}
+}
+\value{
+A summary of the contents of the 'PPPR_OBJECT'.
+}
+\description{
+Make calling the PPPR_OBJECT nicer by printing a summary of its contents.
+}
diff --git a/man/re_integrate.Rd b/man/re_integrate.Rd
new file mode 100644
index 0000000..abe8d47
--- /dev/null
+++ b/man/re_integrate.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/gss_re_integrate.R
+\name{re_integrate}
+\alias{re_integrate}
+\title{Re Integrate
+Splits a Seurat object into a list of Seurat objects to integrate,
+Performs SCTransform normalization separately for each Seurat object,
+Runs the PrepSCTIntegration function on the object list,
+Integrates datasets.}
+\usage{
+re_integrate(object, ncell_cutoff)
+}
+\arguments{
+\item{object}{Subsetted Seurat object that you want to re-integrate}
+
+\item{ncell_cutoff}{Minimum number of cells per split object}
+}
+\value{
+Re-integrated Seurat Object.
+}
+\description{
+Re Integrate
+Splits a Seurat object into a list of Seurat objects to integrate,
+Performs SCTransform normalization separately for each Seurat object,
+Runs the PrepSCTIntegration function on the object list,
+Integrates datasets.
+}
diff --git a/vignettes/covid.Rmd b/vignettes/covid.Rmd
new file mode 100644
index 0000000..5c5abff
--- /dev/null
+++ b/vignettes/covid.Rmd
@@ -0,0 +1,518 @@
+---
+title: "wholeanalysis"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{wholeanalysis}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+## Load Libraries
+```{r, eval = FALSE}
+# library(data.table)
+# library(Matrix)
+# library(reticulate)
+library(Seurat)
+# library(ggplot2)
+# library(patchwork)
+# library(RColorBrewer)
+# library(parallel)
+# library(GeneSwitches)
+# library(slingshot)
+```
+
+### Loading in the downsampled atlas and covid objects
+```{r, eval = FALSE}
+atlas.seu <- readRDS("~/R Packages/mini_data/covid/downsample_1000_atlas.rds")
+covid.seu <- readRDS("~/R Packages/mini_data/covid/downsample_1000_covidBlood.rds")
+```
+
+## sctransforming the downsampled atlas object
+```{r, eval = FALSE}
+atlas.seu <- SCTransform(atlas.seu, verbose = FALSE)
+```
+
+
+## loading in the individual datasets
+```{r, eval = FALSE}
+PTCL_POST <-  readRDS("/scratch/ojlr1u20/PTCL_POST.rds")
+PTCL_PRE <-  readRDS("/scratch/ojlr1u20/PTCL_PRE.rds")
+```
+
+### subsetting them to get more cells in each sample
+```{r, eval = FALSE}
+PTCL_POST <- subset(PTCL_POST, subset = nFeature_RNA > 200)
+PTCL_PRE <- subset(PTCL_PRE, subset = nFeature_RNA > 200)
+```
+
+
+#Label transfer so consistency is maintained from atlas and patient sample dataset annotation
+```{r, eval = FALSE}
+post_anchors <- FindTransferAnchors(
+  reference = atlas.seu,
+  query = PTCL_POST,
+  normalization.method = "SCT",
+  reference.reduction = "pca",
+  dims = 1:50
+)
+
+PTCL_POST <- MapQuery(
+  anchorset = post_anchors,
+  query = PTCL_POST,
+  reference = atlas.seu,
+  refdata = list(
+    cell_type = "cell_type"
+  ),
+  reference.reduction = "pca"
+)
+
+pre_anchors <- FindTransferAnchors(
+  reference = atlas.seu,
+  query = PTCL_PRE,
+  normalization.method = "SCT",
+  reference.reduction = "pca",
+  dims = 1:50
+)
+
+PTCL_PRE <- MapQuery(
+  anchorset = pre_anchors,
+  query = PTCL_PRE,
+  reference = atlas.seu,
+  refdata = list(
+    cell_type = "cell_type"
+  ),
+  reference.reduction = "pca"
+)
+```
+
+
+## Setting a threshold
+```{r, eval = FALSE}
+PTCL_POST_ss <- subset(PTCL_POST, subset = predicted.cell_type.score > 0.65)
+PTCL_PRE_ss <- subset(PTCL_PRE, subset = predicted.cell_type.score > 0.65)
+
+PTCL_POST_ss <- NormalizeData(PTCL_POST_ss)
+PTCL_POST_ss <- FindVariableFeatures(PTCL_POST_ss)
+PTCL_POST_ss <- ScaleData(PTCL_POST_ss)
+PTCL_POST_ss <- RunPCA(PTCL_POST_ss)
+PTCL_POST_ss <- FindNeighbors(PTCL_POST_ss)
+PTCL_POST_ss <- FindClusters(PTCL_POST_ss)
+PTCL_POST_ss <- RunUMAP(PTCL_POST_ss, dims = 1:30, n_neighbors = 20, min_dist = 0.3)
+
+DimPlot(PTCL_POST_ss,reduction="umap",label=T,pt.size=1,group.by="predicted.cell_type")
+
+PTCL_PRE_ss <- NormalizeData(PTCL_PRE_ss)
+PTCL_PRE_ss <- FindVariableFeatures(PTCL_PRE_ss)
+PTCL_PRE_ss <- ScaleData(PTCL_PRE_ss)
+PTCL_PRE_ss <- RunPCA(PTCL_PRE_ss)
+PTCL_PRE_ss <- FindNeighbors(PTCL_PRE_ss)
+PTCL_PRE_ss <- FindClusters(PTCL_PRE_ss)
+PTCL_PRE_ss <- RunUMAP(PTCL_PRE_ss, dims = 1:30, n_neighbors = 20, min_dist = 0.3)
+
+DimPlot(PTCL_PRE_ss,reduction="umap",label=T,pt.size=1,group.by="predicted.cell_type")
+```
+
+
+### subsetting the downsampled atlas into three T cell types that fit the exhaustion trajectory
+```{r, eval = FALSE}
+atlas.seu <- readRDS("/scratch/ojlr1u20/TICAtlas_downsampled_1000.rds")
+seu_pr <- subset(x = atlas.seu, idents = c("Naive T cells","Cytotoxic CD8 T cells", "Terminally exhausted CD8 T cells"))
+seu_sources <- SplitObject(seu_pr, split.by = "source")
+```
+
+## Then, loop through the list of Seurat objects and remove any that have less than 100 cells
+
+## Create a new list to store the filtered Seurat objects
+```{r, eval = FALSE}
+filtered_seu_sources <- list()
+```
+
+# Loop over each Seurat object in the original list
+```{r, eval = FALSE}
+for (i in 1:length(seu_sources)) {
+  # Check the number of cells in the Seurat object
+  num_cells <- dim(seu_sources[[i]]@assays$RNA@counts)[2] 
+  # If the number of cells is less than 100, skip this Seurat object
+  if (num_cells < 100) {
+    next
+  }
+  # Otherwise, add the Seurat object to the filtered list
+  filtered_seu_sources[[length(filtered_seu_sources)+1]] <- seu_sources[[i]]
+}
+
+for (i in 1:length(filtered_seu_sources)) {
+  filtered_seu_sources[[i]] = NormalizeData(filtered_seu_sources[[i]], normalization.method = "LogNormalize",
+                                            scale.factor = 10000, verbose = FALSE)
+  filtered_seu_sources[[i]] = FindVariableFeatures(filtered_seu_sources[[i]], selection.method = "vst", 
+                                                   nfeatures = 1500, verbose = FALSE)
+}
+```
+
+## All are default settings except anchor.features
+```{r, eval = FALSE}
+seuAnch = FindIntegrationAnchors(filtered_seu_sources, anchor.features = 1500, 
+                                 normalization.method = "LogNormalize", 
+                                 reduction = "cca", dims = 1:30,
+                                 k.anchor = 5, k.filter = 200,
+                                 k.score = 30, max.features = 200)
+```
+
+
+## Here, we integrate all genes common between snRNA and scRNA
+```{r, eval = FALSE}
+seu = IntegrateData(anchorset = seuAnch, dims = 1:30,
+                    normalization.method = "LogNormalize")
+
+DefaultAssay(seu) = "integrated"
+seu <- ScaleData(seu, verbose = FALSE)
+seu <- FindVariableFeatures(seu)
+seu <- RunPCA(seu, npcs = 30, verbose = FALSE)
+seu <- RunUMAP(seu, reduction = "pca", dims = 1:30)
+seu <- FindNeighbors(seu, reduction = "pca", dims = 1:30)
+seu <- FindClusters(seu, resolution = 0.5)
+
+p1 <- DimPlot(seu, reduction = "umap", group.by = "cell_type")
+p2 <- DimPlot(seu, reduction = "umap", label = TRUE, repel = TRUE)
+p1 + p2
+```
+
+### switch to SCE
+```{r, eval = FALSE}
+sce <- as.SingleCellExperiment(seu)
+```
+
+
+
+
+
+
+#UMAP
+## run slingshot
+```{r, eval = FALSE}
+sce <- slingshot(sce, clusterLabels = "cell_type", reducedDim = 'UMAP')
+```
+
+## check it looks ok
+```{r, eval = FALSE}
+summary(sce$slingPseudotime_1)
+```
+
+## plot
+#### *MTN - not sure if we need grDevices*
+```{r, eval = FALSE}
+library(grDevices)
+colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100)
+plotcol <- colors[cut(sce$slingPseudotime_1, breaks=100)]
+plot(reducedDims(sce)$UMAP, col = plotcol, pch=16, asp = 1)
+lines(SlingshotDataSet(sce), lwd=2, col='black')
+```
+
+## create GS object
+```{r, eval = FALSE}
+counts <- exp(logcounts(sce)) - 1
+assay(sce, "counts") <- counts
+
+sce_gs <- SingleCellExperiment(assays = List(expdata = logcounts(sce)))
+colData(sce_gs)$Pseudotime <- -sce$slingPseudotime_1
+reducedDims(sce_gs) <- SimpleList(UMAP = reducedDim(sce, "UMAP", withDimnames=TRUE))
+```
+
+## Binarize
+```{r, eval = FALSE}
+bn_cutoff <- 1.5
+sce_gs <- binarize_exp(sce_gs, fix_cutoff = TRUE, binarize_cutoff = bn_cutoff)
+sce_gs <- find_switch_logistic_fastglm(sce_gs, downsample = TRUE, show_warning = FALSE)
+```
+
+## filter top 15 best fitting switching genes among all the genes
+```{r, eval = FALSE}
+sg_allgenes_umap <- filter_switchgenes(sce_gs, allgenes = TRUE, topnum = 100)
+```
+## plot
+```{r, eval = FALSE}
+plot_timeline_ggplot(sg_allgenes_umap, timedata = sce_gs$Pseudotime, txtsize = 3)
+```
+
+## Select an evenly distributed selection of genes from a GeneSwitches Object.
+# select_evenly_distributed_switching_genes
+```{r, eval = FALSE}
+select_evenly_distributed_switching_genes <- function(sg_allgenes, min_time_spacing){
+  
+  ## Sort sg_allgenes by pseudoR2s 
+  sg_allgenes <- sg_allgenes[order(-sg_allgenes$pseudoR2s),]
+  
+  # Initialize the time value of the previously selected gene for both "up"'s and "down"'s
+  ups <- sg_allgenes[sg_allgenes$direction == "up", ]
+  prev_ups_times <- ups$switch_at_timeidx[1]
+  downs <- sg_allgenes[sg_allgenes$direction == "down", ]
+  prev_downs_times <- downs$switch_at_timeidx[1]
+  
+  # Initialize the subsetted matrix with the first "up" and "down" gene
+  subsetted_matrix <- downs[1, ]
+  subsetted_matrix <- rbind(subsetted_matrix, ups[1, ])
+  
+  # Loop over the remaining "up" genes and add them to the subsetted matrix if they meet the criteria
+  for (i in 2:nrow(ups)) {
+    # Check if the time value of the current gene is spaced by at least min_time_spacing from all previously selected genes
+    if (all(abs(ups$switch_at_timeidx[i] - prev_ups_times) >= min_time_spacing)) {
+      # Add the current gene to the subsetted matrix
+      subsetted_matrix <- rbind(subsetted_matrix, ups[i, ])
+      # Update the previous time values
+      prev_ups_times <- c(prev_ups_times, ups$switch_at_timeidx[i])
+    }
+  }
+  
+  # Loop over the remaining "down" genes and add them to the subsetted matrix if they meet the criteria
+  for (i in 2:nrow(downs)) {
+    # Check if the time value of the current gene is spaced by at least min_time_spacing from all previously selected genes
+    if (all(abs(downs$switch_at_timeidx[i] - prev_downs_times) >= min_time_spacing)) {
+      # Add the current gene to the subsetted matrix
+      subsetted_matrix <- rbind(subsetted_matrix, downs[i, ])
+      # Update the previous time values
+      prev_downs_times <- c(prev_downs_times, downs$switch_at_timeidx[i])
+    }
+  }
+  
+  # return the subsetted matrix
+  gs_scorer_genes <- subsetted_matrix
+  return(gs_scorer_genes)
+}
+```
+
+# Create a reduced binary expression matrix for only the selected switching genes, 
+# binary_counts_matrix is from the Patient DATA and gs_scorer_genes is from Atlas Data
+```{r, eval = FALSE}
+filter_gene_expression_for_switching_genes<-function(binary_counts_matrix, gs_scorer_genes) {
+  indices_of_switching_genes<-which(rownames(binary_counts_matrix) %in% gs_scorer_genes[,1])
+  reduced_binary_counts_matrix<- binary_counts_matrix[indices_of_switching_genes,]
+  gs_scorer_genes_to_keep<-which(gs_scorer_genes[,1] %in% rownames(reduced_binary_counts_matrix))
+  gs_scorer_genes<- gs_scorer_genes[gs_scorer_genes_to_keep,]
+  #print(gs_scorer_genes_to_keep)
+  returnlist<-list(reduced_binary_counts_matrix, gs_scorer_genes)
+  return(returnlist)
+}
+```
+
+#### Old?
+```{r, eval = FALSE}
+#filtered_return<-filter_gene_expression_for_switching_genes(binary_counts_matrix, gs_scorer_genes)
+#reduced_binary_counts_matrix<-filtered_return[[1]]
+#gs_scorer_genes<-filtered_return[[2]]
+```
+
+## Identify the "racing lines" 
+### TODO make this ^^ comment more verbose.
+```{r, eval = FALSE}
+create_racing_lines<-function(reduced_binary_counts_matrix,gs_scorer_genes) {
+  all_patients_cells_scored<-list()
+  number_of_cells<-dim(reduced_binary_counts_matrix)[2]
+  number_of_switching_genes<-dim(gs_scorer_genes)[1]
+  #for each patient cell C 
+  for (c in 1:number_of_cells){
+    print(paste(c,"out of", number_of_cells, "cells"))
+    racing_mat<-matrix(0,nrow = number_of_switching_genes, ncol = 100)
+    
+    #for each switching gene G
+    for (g in 1:number_of_switching_genes){
+      print(paste(g,"out of", number_of_switching_genes, "switching genes"))
+      #find out the switch time Gt, and direction Gd
+      switching_time <- as.numeric(gs_scorer_genes[g,12])
+      switching_direction <- gs_scorer_genes[g,10]
+      #find out if its expressed Ct
+      is_expressed <- reduced_binary_counts_matrix[g,c]
+      #If Ct = TRUE
+      if(is_expressed == 1){
+        #If Gd = UP
+        if(switching_direction == "up"){
+          # [Gt:100] = 1
+          racing_mat[g,switching_time:100]<-1
+        }else{
+          #If Gd = DOWN
+          # [0:Gt] = 1
+          racing_mat[g,0:switching_time]<-1
+        }
+        #If Ct = FALSE    
+      }else{
+        if(switching_direction == "up"){
+          #If Gd = UP
+          #[0:Gt] = 1
+          racing_mat[g,0:switching_time]<-1
+        }else{
+          #If Gd = DOWN
+          # [Gt:100] = 1
+          racing_mat[g,switching_time:100]<-1
+        }
+      }
+    }
+    all_patients_cells_scored[[c]]<-racing_mat
+  }
+  return(all_patients_cells_scored)
+}
+```
+
+## Create list_of_cell_position_frequencies using valid indices
+## Does this need to be included in a function?
+```{r, eval = FALSE}
+#list_of_cell_position_frequencies <- create_racing_lines(reduced_binary_counts_matrix, gs_scorer_genes)
+```
+
+## Combining all cells racing lines after binerization (Owen's way)
+#### *TODO make the high-points into single points in order to makae the yaxis of the final plot more intuitive. *
+```{r, eval = FALSE}
+flatten_cell_frequencies_owen <- function(list_of_cell_position_frequencies) {
+  all_patient_cells_scored_flat <- list()
+  length_of_list <- length(list_of_cell_position_frequencies)
+  for(i in 1:length_of_list){
+    location_of_highpoint <- as.numeric(colSums(list_of_cell_position_frequencies[[i]]) == max(colSums(list_of_cell_position_frequencies[[i]])))
+    all_patient_cells_scored_flat[[i]] <- location_of_highpoint
+  }
+  flat_matrix <- do.call(rbind, all_patient_cells_scored_flat)
+  return(flat_matrix)
+}
+```
+
+## Combining all cells racing lines after binerization (Owen's way)
+#### *DONE: make the high-points into single points in order to mkae the yaxis of the final plot more intuitive. *
+#### *this is an inelegant solution as it chooses the FIRST column index of the highpoint.*
+```{r, eval = FALSE}
+flatten_cell_frequencies_owen_done <- function(list_of_cell_position_frequencies) {
+  all_patient_cells_scored_flat <- list()
+  length_of_list <- length(list_of_cell_position_frequencies)
+  for(i in 1:length_of_list){
+    location_of_highpoint <- as.numeric(colSums(list_of_cell_position_frequencies[[i]]) == max(colSums(list_of_cell_position_frequencies[[i]])))
+    indices <- which(location_of_highpoint == 1)[-1]
+    location_of_highpoint[indices] <- 0
+    all_patient_cells_scored_flat[[i]] <- location_of_highpoint
+  }
+  flat_matrix <- do.call(rbind, all_patient_cells_scored_flat)
+  return(flat_matrix)
+}
+```
+
+## Combining all cells racing lines without binerization (Moi's way). 
+```{r, eval = FALSE}
+flatten_cell_frequencies_moi <- function(list_of_cell_position_frequencies) {
+  #making an empty flat matrix
+  all_patient_cells_scored_flat <- matrix(0, nrow = 1, ncol = 100)
+  #
+  length_of_list <- length(list_of_cell_position_frequencies)
+  # for every cells matrix calculate the colsums and add them to the flat matrix
+  for (i in 1:length_of_list) {
+    all_patient_cells_scored_flat <- all_patient_cells_scored_flat + colSums(list_of_cell_position_frequencies[[i]])
+  }
+  # divide the values in the flat matrix by the number of cells in an attempt to make the yaxis more informative.
+  # This may be the wrong approach but it shouldnt change the shape of the final plot
+  all_patient_cells_scored_flat <- all_patient_cells_scored_flat/length_of_list
+  
+  return(all_patient_cells_scored_flat)
+}
+```
+
+## select_evenly_distributed_switching_genes
+```{r, eval = FALSE}
+gs_scorer_genes <- select_evenly_distributed_switching_genes(sg_allgenes_umap, min_time_spacing = 5)
+```
+## plot
+```{r, eval = FALSE}
+plot_timeline_ggplot(gs_scorer_genes, timedata = sce_gs$Pseudotime, txtsize = 3)
+```
+
+## subsetting the pre-treatment patient data to only include the exhaustion trajectory.
+```{r, eval = FALSE}
+pre_T.seu <- subset(x = PTCL_PRE_ss, subset = predicted.cell_type %in% c("Naive T cells","Cytotoxic CD8 T cells", "Terminally exhausted CD8 T cells"))
+```
+## subsetting the post treatment patient data to only include the exhaustion trajectory.
+```{r, eval = FALSE}
+post_T.seu <- subset(x = PTCL_POST_ss, subset = predicted.cell_type %in% c("Naive T cells","Cytotoxic CD8 T cells", "Terminally exhausted CD8 T cells"))
+```
+
+## converting the pre-treatment patient data to SCE.
+```{r, eval = FALSE}
+pre.sce <- as.SingleCellExperiment(pre_T.seu)
+```
+
+## converting the pre-treatment patient data to SCE.
+```{r, eval = FALSE}
+post.sce <- as.SingleCellExperiment(post_T.seu)
+```
+
+## convert the pre treatment patient data to a GS obj. 
+```{r, eval = FALSE}
+gs_pre <- SingleCellExperiment(assays = List(expdata = logcounts(pre.sce)))
+```
+## convert the post treatment patient data to a GS obj. 
+```{r, eval = FALSE}
+gs_post <- SingleCellExperiment(assays = List(expdata = logcounts(post.sce)))
+```
+## TODO check if this cutoff is suitable for the patient data. 
+```{r, eval = FALSE}
+bn_cutoff <- 0.2
+# binarize pre treatment patient data
+gs_pre<- binarize_exp(gs_pre, fix_cutoff = TRUE, binarize_cutoff = bn_cutoff)
+# binarize post treatment patient data
+gs_post<- binarize_exp(gs_post, fix_cutoff = TRUE, binarize_cutoff = bn_cutoff)
+
+binary_counts_matrix_pre<-assays(gs_pre)$binary
+```
+
+#create a reduced binary expression matrix for only the switching genes, binary counts is from the patient and gs scorer is from atlas
+```{r, eval = FALSE}
+filtered_return_pre<-filter_gene_expression_for_switching_genes(binary_counts_matrix_pre, gs_scorer_genes)
+```
+
+# MTN - I'm not sure what this does
+```{r, eval = FALSE}
+reduced_binary_counts_matrix_pre<-filtered_return_pre[[1]]
+```
+# MTN - or this.
+```{r, eval = FALSE}
+gs_scorer_genes<-filtered_return_pre[[2]]
+```
+
+#Create list_of_cell_position_frequencies using valid indices
+```{r, eval = FALSE}
+list_of_cell_position_frequencies <- create_racing_lines(reduced_binary_counts_matrix_pre, gs_scorer_genes)
+```
+# Combining all cells racing lines after binerization (Owen's way)
+```{r, eval = FALSE}
+preflat <- flatten_cell_frequencies_moi(list_of_cell_position_frequencies)
+plot(colSums(preflat))
+
+binary_counts_matrix_post<-assays(gs_post)$binary
+```
+
+#create a reduced binary expression matrix for only the switching genes, binary counts is from the patient and gs scorer is from atlas
+```{r, eval = FALSE}
+filtered_return_post<-filter_gene_expression_for_switching_genes(binary_counts_matrix_post, gs_scorer_genes)
+```
+
+# MTN - I'm not sure what this does
+```{r, eval = FALSE}
+reduced_binary_counts_matrix_post<-filtered_return_post[[1]]
+```
+
+# MTN - or this.
+```{r, eval = FALSE}
+gs_scorer_genes<-filtered_return_post[[2]]
+```
+
+#Create list_of_cell_position_frequencies using valid indices
+```{r, eval = FALSE}
+list_of_cell_position_frequencies <- create_racing_lines(reduced_binary_counts_matrix_post, gs_scorer_genes)
+```
+
+# Combining all cells racing lines after binerization and plot
+```{r, eval = FALSE}
+postflat <- flatten_cell_frequencies_moi(list_of_cell_position_frequencies)
+plot(colSums(postflat))
+```
+
+## view the differnece between patients
+```{r, eval = FALSE}
+patient_diff = colSums(postflat) - colSums(preflat)
+plot(patient_diff)
+```