From f92e656bccfc4eb3961ad0a479ce3554df4c5902 Mon Sep 17 00:00:00 2001 From: Atlantis-Real Date: Tue, 3 Sep 2024 14:02:54 -0700 Subject: [PATCH 1/8] mirrored_lollipop_plot.R --- R/mirrored_lollipop_plot.R | 314 +++++++++++++++++++++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 R/mirrored_lollipop_plot.R diff --git a/R/mirrored_lollipop_plot.R b/R/mirrored_lollipop_plot.R new file mode 100644 index 0000000..05ccd35 --- /dev/null +++ b/R/mirrored_lollipop_plot.R @@ -0,0 +1,314 @@ +#' @title Mirrored Lollipop Plot. +#' +#' @description Generates a visually appealing mirrored lollipop plot. +#' +#' @details Retrieve two maf data files of a specific sample or a set of samples for comparison. +#' A gene of interest can then be visualized with the given maf data files. Silent mutations can +#' be visualized setting include_silent to TRUE. +#' +#' @param maf_df1 A data frame containing the mutation data from a given cohort or pathology, etc. +#' @param maf_df2 A data frame containing the mutation data from an other cohort or pathology, etc. +#' @param gene The gene symbol to plot. +#' @param plot_title Optional, the title of the plot. Default is gene. +#' @param include_silent Logical parameter indicating whether to include silent mutations into coding mutations. Default is FALSE. +#' +#' @return A mirrored lollipop plot. +#' +#' @import dplyr ggplot2 +#' @export +#' +#' @examples +#' library(GAMBLR.data) +#' +#' metadata <- get_gambl_metadata() +#' metadata1 <- metadata %>% +#' filter(cohort == "DLBCL_Hilton") +#' metadata2 <- metadata %>% +#' filter(cohort == "DLBCL_Thomas") +#' +#' maf_df1 <- get_ssm_by_samples( +#' these_samples_metadata = metadata1 +#' ) +#' +#' maf_df2 <- get_ssm_by_samples( +#' these_samples_metadata = metadata2 +#' ) +#' +#' #construct mirrored_lollipop_plot +#' mirrored_lollipop_result <- mirrored_lollipop_plot(maf_df1, maf_df2, "IGLL5") +#' + +mirrored_lollipop_plot <- function( + maf_df1, + maf_df2, + gene = NULL, + plot_title, + include_silent = FALSE +) { + if(missing(gene)){ + stop("Please provide a gene...") + } + + if(missing(plot_title)){ + plot_title=gene + } + + maf_df2 <- as.data.frame(maf_df2) + maf_df1 <- as.data.frame(maf_df1) + + # Specifying noncoding regions with coding_class as a bundled object with GAMBLR.helpers + if(include_silent){ + variants <- coding_class + } else { + variants <- coding_class[!coding_class %in% c( + "Silent", + "Splice_Region" + )] + } + + nc_maf_df2 <- maf_df2 %>% + filter( + Hugo_Symbol == gene + ) %>% + filter( + Variant_Classification %in% variants + ) + + nc_maf_df1 <- maf_df1 %>% + filter( + Hugo_Symbol == gene + ) %>% + filter( + Variant_Classification %in% variants + ) + + # Filter for the specific gene + gene_df2 <- nc_maf_df2 %>% + mutate( + AA = as.numeric( + gsub( + "[^0-9]+", + "", + gsub( + "([0-9]+).*", + "\\1", + HGVSp_Short + ) + ) + ) + ) %>% + arrange(AA) + + gene_counts2 <- gene_df2 %>% + group_by( + AA, + Start_Position, + End_Position, + Variant_Classification, + Reference_Allele, + Tumor_Seq_Allele2 + ) %>% + arrange(AA) %>% + summarise(mutation_count = n()) + + gene_df1 <- nc_maf_df1 %>% + mutate( + AA = as.numeric( + gsub( + "[^0-9]+", + "", + gsub( + "([0-9]+).*", + "\\1", + HGVSp_Short + ) + ) + ) + ) %>% + arrange(AA) + + gene_counts1 <- gene_df1 %>% + group_by( + AA, + Start_Position, + End_Position, + Variant_Classification, + Reference_Allele, + Tumor_Seq_Allele2 + ) %>% + arrange(AA) %>% + summarise(mutation_count = n()) + + # protein_domains a bundled object with GAMBLR.data + protein_domain_subset <- subset( + protein_domains, + HGNC == gene + ) + + domain_data <- protein_domain_subset %>% + data.frame( + start.points = protein_domain_subset$Start, + end.points = protein_domain_subset$End, + text.label = protein_domain_subset$Label, + color = protein_domain_subset$Label + ) + + domain_data$text.position <- (domain_data$start.points + domain_data$end.points) / 2 + + # Determine the x-axis range + x_max <- max( + max( + domain_data$end.points + ), + max( + gene_counts1$AA, + na.rm = TRUE + ), + max( + gene_counts2$AA, + na.rm = TRUE + ) + ) + x_min <- 0 + + # get_gambl_colours() from GAMBLR.helpers + colours_manual <- get_gambl_colours("mutation") + + # Somatic mutation statistic + Somatic_Mutation_Numerator2 <- maf_df2 %>% + filter( + Hugo_Symbol == gene + ) %>% + filter( + Variant_Classification %in% variants + ) %>% + distinct(Tumor_Sample_Barcode) %>% + nrow() + + Somatic_Mutation_Denominator2 <- length(unique(maf_df2$Tumor_Sample_Barcode)) + + Somatic_Mutation_Rate2 <- Somatic_Mutation_Numerator2/Somatic_Mutation_Denominator2 *100 + Somatic_Mutation_Rate2 <- round(Somatic_Mutation_Rate2, 2) + + Somatic_Mutation_Numerator1 <- maf_df1 %>% + filter( + Hugo_Symbol == gene + ) %>% + filter( + Variant_Classification %in% variants + ) %>% + distinct(Tumor_Sample_Barcode) %>% + nrow() + + Somatic_Mutation_Denominator1 <- length(unique(maf_df1$Tumor_Sample_Barcode)) + + Somatic_Mutation_Rate1 <- Somatic_Mutation_Numerator1/Somatic_Mutation_Denominator1 *100 + Somatic_Mutation_Rate1 <- round(Somatic_Mutation_Rate1, 2) + + plot <- ggplot() + + geom_segment( + data = gene_counts2, + aes( + x = AA, + xend = AA, + y = 0, + yend = -mutation_count + ) + ) + + geom_segment( + data = gene_counts1, + aes( + x = AA, + xend = AA, + y = 0, + yend = mutation_count + ) + ) + + geom_point( + data = gene_counts2, + aes( + x = AA, + y = -mutation_count, + color = Variant_Classification, + size = mutation_count + ) + ) + + annotate( + "text", + x = 0, + y = max(gene_counts2$mutation_count) * -1.1, + label = paste0("Sample 2: ", Somatic_Mutation_Rate2, "%"), + hjust = 0 + ) + + geom_point( + data = gene_counts1, + aes( + x = AA, + y = mutation_count, + color = Variant_Classification, + size = mutation_count + ) + ) + + annotate( + "text", + x = 0, + y = max(gene_counts1$mutation_count) * 1.1, + label = paste0("Sample 1: ", Somatic_Mutation_Rate1, "%"), + hjust = 0 + ) + + # Background rectangle for regions without domain data + geom_rect( + aes( + xmin = x_min, + xmax = x_max, + ymin = -0.2, + ymax = 0.2 + ), + fill = "black", + color = "black" + ) + + # Domain rectangles + geom_rect( + data = domain_data, + aes( + xmin = start.points, + xmax = end.points, + ymin = -0.4, + ymax = 0.4, + fill = color + ), + color = "black", + show.legend = FALSE + ) + + geom_text( + data = domain_data, + aes( + x = text.position, + y = 0, + label = text.label + ) + ) + + labs( + x = "AA Position", + y = "Mutation Count", + title = paste0( + plot_title + ) + ) + + theme_bw() + + theme( + plot.title = element_text( + hjust = 0.5 + ), + axis.text.x = element_text( + angle = 45, + hjust = 1 + ) + ) + + scale_color_manual( + name = "Legend", + values = colours_manual + ) + + return(plot) +} From 33b69f9287cc933db1eec9a670b0fc66b4b40716 Mon Sep 17 00:00:00 2001 From: Atlantis-Real Date: Tue, 3 Sep 2024 14:16:56 -0700 Subject: [PATCH 2/8] mirrored_lollipop_plot.R doc --- NAMESPACE | 1 + man/mirrored_lollipop_plot.Rd | 57 +++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 man/mirrored_lollipop_plot.Rd diff --git a/NAMESPACE b/NAMESPACE index 6583d9e..150c9ce 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ export(fancy_v_sizedis) export(focal_cn_plot) export(heatmap_mutation_frequency_bin) export(map_metadata_to_colours) +export(mirrored_lollipop_plot) export(plot_sample_circos) export(prettyChromoplot) export(prettyCoOncoplot) diff --git a/man/mirrored_lollipop_plot.Rd b/man/mirrored_lollipop_plot.Rd new file mode 100644 index 0000000..b218c24 --- /dev/null +++ b/man/mirrored_lollipop_plot.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mirrored_lollipop_plot.R +\name{mirrored_lollipop_plot} +\alias{mirrored_lollipop_plot} +\title{Mirrored Lollipop Plot.} +\usage{ +mirrored_lollipop_plot( + maf_df1, + maf_df2, + gene = NULL, + plot_title, + include_silent = FALSE +) +} +\arguments{ +\item{maf_df1}{A data frame containing the mutation data from a given cohort or pathology, etc.} + +\item{maf_df2}{A data frame containing the mutation data from an other cohort or pathology, etc.} + +\item{gene}{The gene symbol to plot.} + +\item{plot_title}{Optional, the title of the plot. Default is gene.} + +\item{include_silent}{Logical parameter indicating whether to include silent mutations into coding mutations. Default is FALSE.} +} +\value{ +A mirrored lollipop plot. +} +\description{ +Generates a visually appealing mirrored lollipop plot. +} +\details{ +Retrieve two maf data files of a specific sample or a set of samples for comparison. +A gene of interest can then be visualized with the given maf data files. Silent mutations can +be visualized setting include_silent to TRUE. +} +\examples{ +library(GAMBLR.data) + +metadata <- get_gambl_metadata() +metadata1 <- metadata \%>\% + filter(cohort == "DLBCL_Hilton") +metadata2 <- metadata \%>\% + filter(cohort == "DLBCL_Thomas") + +maf_df1 <- get_ssm_by_samples( + these_samples_metadata = metadata1 +) + +maf_df2 <- get_ssm_by_samples( + these_samples_metadata = metadata2 +) + +#construct mirrored_lollipop_plot +mirrored_lollipop_result <- mirrored_lollipop_plot(maf_df1, maf_df2, "IGLL5") + +} From 9360b2c55b4d37b3f7db684490a17e03c206343c Mon Sep 17 00:00:00 2001 From: atlantis-real Date: Thu, 3 Oct 2024 18:59:17 -0700 Subject: [PATCH 3/8] pretty_lollipop_plot and pretty_colollipop_plot for review --- NAMESPACE | 1 + R/pretty_colollipop_plot.r | 155 +++++++++++++++++++++++++++++++ R/pretty_lollipop_plot.R | 169 ++++++++++++++++++++++++++++------ man/pretty_colollipop_plot.Rd | 57 ++++++++++++ man/pretty_lollipop_plot.Rd | 28 +++++- 5 files changed, 383 insertions(+), 27 deletions(-) create mode 100644 R/pretty_colollipop_plot.r create mode 100644 man/pretty_colollipop_plot.Rd diff --git a/NAMESPACE b/NAMESPACE index 150c9ce..89dee60 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ export(prettyForestPlot) export(prettyGeneCloud) export(prettyOncoplot) export(prettyRainfallPlot) +export(pretty_colollipop_plot) export(pretty_lollipop_plot) export(splendidHeatmap) import(ComplexHeatmap) diff --git a/R/pretty_colollipop_plot.r b/R/pretty_colollipop_plot.r new file mode 100644 index 0000000..ef09048 --- /dev/null +++ b/R/pretty_colollipop_plot.r @@ -0,0 +1,155 @@ +#' @title Pretty CoLollipop Plot. +#' +#' @description Generates a ggplot-compatible figure of 2 [GAMBLR.viz::pretty_lollipop_plot] mirrored. +#' +#' @details Retrieve maf data of a specific sample or a set of samples for comparison. A gene of interest +#' can then be visualized with the given maf data files, and comparison commands. Silent mutations can be +#' visualized setting include_silent to TRUE. +#' +#' @param maf_df A data frame containing the mutation data. +#' @param metadata Required argument. A data.frame with metadata for the CoLollipop Plot. +#' @param comparison_column Required: the name of the metadata column containing the comparison values. +#' @param comparison_values Optional: If the comparison column contains more than two values or is not a factor, specify a character vector of length two in the order you would like the factor levels to be set, reference group first. +#' @param gene The gene symbol to plot. +#' @param plot_title Optional, the title of the plot. Default is gene. +#' @param ... `pretty_lollipop_plot` arguments, see that function for more info on avaialble parameters. +#' +#' @return A mirrored lollipop plot. +#' +#' @import dplyr ggplot2 +#' @export +#' +#' @examples +#' library(GAMBLR.data) +#' +#' #get meta data (DLBCL_Hilton) +#' meta = GAMBLR.data::get_gambl_metadata() +#' metadata = dplyr::filter(meta, cohort %in% "DLBCL_Hilton") +#' maf_df = GAMBLR.data::get_coding_ssm( +#' these_samples_metadata = metadata +#' ) +#' pretty_colollipop_plot_result <- pretty_colollipop_plot(maf_df = maf_df, +#' metadata = metadata, +#' comparison_column = "sex", +#' comparison_values = c("M", "F"), +#' gene = "IGLL5") +pretty_colollipop_plot <- function( + maf_df, + metadata, + comparison_column, + comparison_values, + gene = NULL, + plot_title, + ... +) { + + # check for required arguments + required <- c( + "maf_df", + "metadata", + "comparison_column" + ) + + defined <- names(as.list(match.call())[-1]) + + if(any(!required %in% defined)) { + stop("Please provide mutation data and metadata for 2 pretty Oncoplots with specified comparison_column.") + } + + # If no comparison_values are specified, derive the comparison_values from the specified comparison_column + if(missing(comparison_values)){ + if(class(metadata[[comparison_column]]) == "factor"){ + comparison_values = levels(metadata[[comparison_column]]) + } else { + comparison_values = unique(metadata[[comparison_column]]) + } + } + + # Ensure there are only two comparison_values + { + if(length(comparison_values) != 2) + stop(paste0("Your comparison must have two values. \nEither specify comparison_values as a vector of length 2 or subset your metadata so your comparison_column has only two unique values or factor levels.")) + } + + # Subset the metadata to the specified comparison_values and the maf to the remaining sample_ids + meta1 <- metadata[metadata[[comparison_column]] %in% comparison_values[1], ] + meta2 <- metadata[metadata[[comparison_column]] %in% comparison_values[2], ] + + # Subset maf to only samples in the comparison values + ssm1 <- maf_df %>% + dplyr::filter( + Tumor_Sample_Barcode %in% meta1$Tumor_Sample_Barcode + ) + + ssm2 <- maf_df %>% + dplyr::filter( + Tumor_Sample_Barcode %in% meta2$Tumor_Sample_Barcode + ) + + # Ensure dimensions are greater than zero otherwise no variants are returned. + dim1 <- dim(ssm1)[1] + dim2 <- dim(ssm2)[1] + + if(dim1 == 0 | dim2 == 0) { + stop(paste0("Ensure all variants in metadata are accounted for. \nEither ensure all variants are loaded into metadata as one dataframe, or subset your metadata so that all variants are included.")) + } + + # Arguments to pass into pretty_lollipop_plot + lollipopplot_args <- list(...) + + # Get gene_counts data for the first plot (without generating the plot) + lp1_gene_counts <- do.call(pretty_lollipop_plot, c( + list( + maf_df = ssm1, + gene = gene, + plotarg = FALSE + ), + lollipopplot_args + )) + lp1_gene_counts_data <- as.data.frame(lp1_gene_counts) + + # Get gene_counts data for the second plot + lp2_gene_counts <- do.call(pretty_lollipop_plot, c( + list( + maf_df = ssm2, + gene = gene, + plotarg = FALSE + ), + lollipopplot_args + )) + lp2_gene_counts_data <- as.data.frame(lp2_gene_counts) + lp2_gene_counts_data <- lp2_gene_counts_data %>% + mutate(mutation_count = mutation_count * -1) + + # Combine data for both plots + lp1_gene_counts_data <- lp1_gene_counts_data %>% + mutate(source = "Sample 1") + lp2_gene_counts_data <- lp2_gene_counts_data %>% + mutate(source = "Sample 2") + + combined_gene_counts <- rbind( + lp1_gene_counts_data, + lp2_gene_counts_data + ) + + # Setting Somatic Mutation Statistic + meta1_counter <- length(unique(meta1$Tumor_Sample_Barcode)) + meta2_counter <- length(unique(meta2$Tumor_Sample_Barcode)) + + # Pass combined_gene_counts to pretty_lollipop_plot for plotting + combined_plot <- pretty_lollipop_plot( + maf_df = maf_df, + gene = gene, + plot_title = plot_title, + include_silent = FALSE, + plotarg = TRUE, + mirrorarg = TRUE, + combined_gene_counts = combined_gene_counts, + meta1_counter = meta1_counter, + meta2_counter = meta2_counter, + Sample1 = comparison_values[1], + Sample2 = comparison_values[2] + ) + + return(combined_plot) +} diff --git a/R/pretty_lollipop_plot.R b/R/pretty_lollipop_plot.R index b9d7a71..dd21b48 100644 --- a/R/pretty_lollipop_plot.R +++ b/R/pretty_lollipop_plot.R @@ -10,6 +10,13 @@ #' @param gene The gene symbol to plot. #' @param plot_title Optional, the title of the plot. Default is gene. #' @param include_silent Logical parameter indicating whether to include silent mutations into coding mutations. Default is FALSE. +#' @param plotarg Logical parameter indicating whether to plot the lollipopplot or return the data in data frame format. Default is TRUE. +#' @param mirrorarg Logical paramter for when mirroring lollipop data in prety_co_lollipop plot. Default is FALSE. +#' @param combined_gene_counts A dataframe containing data for a mirrored lollipop analysis. +#' @param meta1_counter A dataframe for calculating Somatic Mutation Rate in `pretty_lollipop_plot`. +#' @param meta2_counter A dataframe for calculating Somatic Mutation Rate in `pretty_lollipop_plot`. +#' @param Sample1 A label for displaying Somatic Mutation Rate in `pretty_lollipop_plot`. +#' @param Sample2 A label for displaying Somatic Mutation Rate in `pretty_lollipop_plot`. #' #' @return A lollipop plot. #' @@ -31,17 +38,24 @@ #' lolipop_result <- pretty_lollipop_plot(maf_df, "MYC") #' pretty_lollipop_plot <- function( - maf_df, + maf_df = NULL, gene = NULL, plot_title, - include_silent = FALSE + include_silent = FALSE, + plotarg = TRUE, + mirrorarg = FALSE, + combined_gene_counts = NULL, + meta1_counter = NULL, + meta2_counter = NULL, + Sample1 = Sample1, + Sample2 = Sample2 ) { if(missing(gene)){ stop("Please provide a gene...") } if(missing(plot_title)){ - plot_title=gene + plot_title = gene } maf_df <- as.data.frame(maf_df) @@ -91,7 +105,13 @@ pretty_lollipop_plot <- function( Tumor_Seq_Allele2 ) %>% arrange(AA) %>% - summarise(mutation_count = n()) + mutate(mutation_count = n()) + + if (mirrorarg == TRUE){ + gene_counts <- combined_gene_counts + } else { + gene_counts <- gene_counts + } # protein_domains a bundled object with GAMBLR.data protein_domain_subset <- subset( @@ -120,22 +140,64 @@ pretty_lollipop_plot <- function( # get_gambl_colours() from GAMBLR.helpers colours_manual <- get_gambl_colours("mutation") - + # Somatic mutation statistic - Somatic_Mutation_Numerator <- maf_df %>% - filter( - Hugo_Symbol == gene - ) %>% - filter( - Variant_Classification %in% variants - ) %>% - distinct(Tumor_Sample_Barcode) %>% - nrow() + if (mirrorarg == TRUE){ + + # Initialize vectors + Somatic_Mutation_Numerator <- numeric(2) + Somatic_Mutation_Denominator <- numeric(2) + Somatic_Mutation_Rate <- numeric(2) + + # Somatic Mutation rate for lp1 + Somatic_Mutation_Numerator[1] <- combined_gene_counts %>% + filter( + Hugo_Symbol == gene, + source == "Sample 1" + ) %>% + filter( + Variant_Classification %in% variants + ) %>% + distinct(Tumor_Sample_Barcode) %>% + nrow() + + Somatic_Mutation_Denominator[1] <- meta1_counter + + Somatic_Mutation_Rate[1] <- Somatic_Mutation_Numerator[1]/Somatic_Mutation_Denominator[1] *100 + Somatic_Mutation_Rate[1] <- round(Somatic_Mutation_Rate[1], 2) + + # Somatic Mutation rate for lp2 + Somatic_Mutation_Numerator[2] <- combined_gene_counts %>% + filter( + Hugo_Symbol == gene, + source == "Sample 2" + ) %>% + filter( + Variant_Classification %in% variants + ) %>% + distinct(Tumor_Sample_Barcode) %>% + nrow() + + Somatic_Mutation_Denominator[2] <- meta2_counter - Somatic_Mutation_Denominator <- length(unique(maf_df$Tumor_Sample_Barcode)) + Somatic_Mutation_Rate[2] <- Somatic_Mutation_Numerator[2]/Somatic_Mutation_Denominator[2] *100 + Somatic_Mutation_Rate[2] <- round(Somatic_Mutation_Rate[2], 2) + } else { + Somatic_Mutation_Numerator <- maf_df %>% + filter( + Hugo_Symbol == gene + ) %>% + filter( + Variant_Classification %in% variants + ) %>% + distinct(Tumor_Sample_Barcode) %>% + nrow() + + Somatic_Mutation_Denominator <- length(unique(maf_df$Tumor_Sample_Barcode)) - Somatic_Mutation_Rate <- Somatic_Mutation_Numerator/Somatic_Mutation_Denominator *100 - Somatic_Mutation_Rate <- round(Somatic_Mutation_Rate, 2) + Somatic_Mutation_Rate <- Somatic_Mutation_Numerator/Somatic_Mutation_Denominator *100 + Somatic_Mutation_Rate <- round(Somatic_Mutation_Rate, 2) + } plot <- ggplot() + geom_segment( @@ -153,7 +215,7 @@ pretty_lollipop_plot <- function( x = AA, y = mutation_count, color = Variant_Classification, - size = mutation_count + size = abs(mutation_count) ) ) + # Background rectangle for regions without domain data @@ -192,26 +254,81 @@ pretty_lollipop_plot <- function( x = "AA Position", y = "Mutation Count", title = paste0( - plot_title, - "\n[Somatic Mutation Rate: ", - Somatic_Mutation_Rate, - "%]" + plot_title ) - ) + + ) + + if (mirrorarg == TRUE) { + plot <- plot + + annotate( + "text", + x = 0, + y = max(gene_counts$mutation_count) * 1.1, + label = paste0( + "Somatic Mutation Rate ", + Somatic_Mutation_Rate[1], + "%", + "\n", + '"', + Sample1, + '"' + ), + hjust = 0 + ) + + annotate( + "text", + x = 0, + y = min(gene_counts$mutation_count) * 1.1, + label = paste0( + "Somatic Mutation Rate ", + Somatic_Mutation_Rate[2], + "%", + "\n", + '"', + Sample2, + '"' + ), + hjust = 0 + ) + } else { + plot <- plot + + annotate( + "text", + x = 0, + y = max(gene_counts$mutation_count) * 1.1, + label = paste0( + "Somatic Mutation Rate ", + Somatic_Mutation_Rate, + "%" + ), + hjust = 0 + ) + } + + plot <- plot + + scale_size_continuous( + name = "Mutation Count", + labels = function(x) abs(x) + ) + theme_bw() + theme( plot.title = element_text( hjust = 0.5 ), axis.text.x = element_text( - angle = 45, + angle = 90, hjust = 1 ) ) + scale_color_manual( name = "Legend", values = colours_manual - ) + ) + + if (plotarg == TRUE) { + return(plot) + } else { + return(gene_counts) + } - return(plot) } diff --git a/man/pretty_colollipop_plot.Rd b/man/pretty_colollipop_plot.Rd new file mode 100644 index 0000000..1954e65 --- /dev/null +++ b/man/pretty_colollipop_plot.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pretty_colollipop_plot.r +\name{pretty_colollipop_plot} +\alias{pretty_colollipop_plot} +\title{Pretty CoLollipop Plot.} +\usage{ +pretty_colollipop_plot( + maf_df, + metadata, + comparison_column, + comparison_values, + gene = NULL, + plot_title, + ... +) +} +\arguments{ +\item{maf_df}{A data frame containing the mutation data.} + +\item{metadata}{Required argument. A data.frame with metadata for the CoLollipop Plot.} + +\item{comparison_column}{Required: the name of the metadata column containing the comparison values.} + +\item{comparison_values}{Optional: If the comparison column contains more than two values or is not a factor, specify a character vector of length two in the order you would like the factor levels to be set, reference group first.} + +\item{gene}{The gene symbol to plot.} + +\item{plot_title}{Optional, the title of the plot. Default is gene.} + +\item{...}{`pretty_lollipop_plot` arguments, see that function for more info on avaialble parameters.} +} +\value{ +A mirrored lollipop plot. +} +\description{ +Generates a ggplot-compatible figure of 2 [GAMBLR.viz::pretty_lollipop_plot] mirrored. +} +\details{ +Retrieve maf data of a specific sample or a set of samples for comparison. A gene of interest +can then be visualized with the given maf data files, and comparison commands. Silent mutations can be +visualized setting include_silent to TRUE. +} +\examples{ +library(GAMBLR.data) + +#get meta data (DLBCL_Hilton) +meta = GAMBLR.data::get_gambl_metadata() +metadata = dplyr::filter(meta, cohort \%in\% "DLBCL_Hilton") +maf_df = GAMBLR.data::get_coding_ssm( + these_samples_metadata = metadata +) +pretty_colollipop_plot_result <- pretty_colollipop_plot(maf_df = maf_df, + metadata = metadata, + comparison_column = "sex", + comparison_values = c("M", "F"), + gene = "IGLL5") +} diff --git a/man/pretty_lollipop_plot.Rd b/man/pretty_lollipop_plot.Rd index 8ac705c..0207583 100644 --- a/man/pretty_lollipop_plot.Rd +++ b/man/pretty_lollipop_plot.Rd @@ -4,7 +4,19 @@ \alias{pretty_lollipop_plot} \title{Pretty Lollipop Plot.} \usage{ -pretty_lollipop_plot(maf_df, gene = NULL, plot_title, include_silent = FALSE) +pretty_lollipop_plot( + maf_df = NULL, + gene = NULL, + plot_title, + include_silent = FALSE, + plotarg = TRUE, + mirrorarg = FALSE, + combined_gene_counts = NULL, + meta1_counter = NULL, + meta2_counter = NULL, + Sample1 = Sample1, + Sample2 = Sample2 +) } \arguments{ \item{maf_df}{A data frame containing the mutation data.} @@ -14,6 +26,20 @@ pretty_lollipop_plot(maf_df, gene = NULL, plot_title, include_silent = FALSE) \item{plot_title}{Optional, the title of the plot. Default is gene.} \item{include_silent}{Logical parameter indicating whether to include silent mutations into coding mutations. Default is FALSE.} + +\item{plotarg}{Logical parameter indicating whether to plot the lollipopplot or return the data in data frame format. Default is TRUE.} + +\item{mirrorarg}{Logical paramter for when mirroring lollipop data in prety_co_lollipop plot. Default is FALSE.} + +\item{combined_gene_counts}{A dataframe containing data for a mirrored lollipop analysis.} + +\item{meta1_counter}{A dataframe for calculating Somatic Mutation Rate in `pretty_lollipop_plot`.} + +\item{meta2_counter}{A dataframe for calculating Somatic Mutation Rate in `pretty_lollipop_plot`.} + +\item{Sample1}{A label for displaying Somatic Mutation Rate in `pretty_lollipop_plot`.} + +\item{Sample2}{A label for displaying Somatic Mutation Rate in `pretty_lollipop_plot`.} } \value{ A lollipop plot. From 69e416d79ffa8fb299b1ff9825ac498c858924df Mon Sep 17 00:00:00 2001 From: atlantis-real Date: Sun, 20 Oct 2024 00:28:47 -0700 Subject: [PATCH 4/8] KS test for gene and for domain(s) update --- R/pretty_lollipop_plot.R | 133 +++++++++++++++++++++++++++++++++++---- 1 file changed, 122 insertions(+), 11 deletions(-) diff --git a/R/pretty_lollipop_plot.R b/R/pretty_lollipop_plot.R index dd21b48..ea98f43 100644 --- a/R/pretty_lollipop_plot.R +++ b/R/pretty_lollipop_plot.R @@ -199,6 +199,92 @@ pretty_lollipop_plot <- function( Somatic_Mutation_Rate <- round(Somatic_Mutation_Rate, 2) } + # KS-Test + if(mirrorarg == TRUE){ + # Gene KS-Test + aa_frequency_data <- combined_gene_counts %>% + group_by(AA, source) %>% + summarise(freq = n()) + + ks_test_result <- ks.test( + aa_frequency_data$freq[aa_frequency_data$source == "Sample 1"], + aa_frequency_data$freq[aa_frequency_data$source == "Sample 2"] + ) + + gene_p_value <- ks_test_result$p.value + + # Domain(s) KS-Test + domain_names <- c() + domain_list <- list() + domain_data$p_value <- NA + + for (i in 1:nrow(domain_data)) { + domain_name <- domain_data$text.label[i] + min_val <- domain_data$start.points[i] + max_val <- domain_data$end.points[i] + + # Subset data for the current domain + domain_subset <- combined_gene_counts %>% + filter(AA >= min_val & AA <= max_val) + + if (nrow(domain_subset) > 0) { + + # Mutation counts for Sample 1 and Sample 2 + domain_mutation1 <- domain_subset %>% + filter( + Variant_Classification %in% variants, + source == "Sample 1" + ) %>% + nrow() + + domain_mutation2 <- domain_subset %>% + filter( + Variant_Classification %in% variants, + source == "Sample 2" + ) %>% + nrow() + + domain_frequency_data <- domain_subset %>% + group_by( + AA, + source + ) %>% + summarise(freq = n()) + + domain_ks_test <- tryCatch({ + ks.test( + domain_frequency_data$freq[domain_frequency_data$source == "Sample 1"], + domain_frequency_data$freq[domain_frequency_data$source == "Sample 2"] + ) + }, error = function(e) { + list(p.value = NA) + }) + + # Save domain-specific results in a list + unique_domain_name <- paste0( + "Domain_", + domain_name + ) + domain_list[[unique_domain_name]] <- list( + mutation_count_sample1 = domain_mutation1, + mutation_count_sample2 = domain_mutation2, + ks_p_value = domain_ks_test$p.value + ) + + # Store the p-value in domain_data + domain_data$p_value[i] <- domain_ks_test$p.value + + # Store the unique domain name + domain_names <- c(domain_names, unique_domain_name) + } else { + print(paste( + "No mutation data for domain", + domain_name + )) + } + } + } + plot <- ggplot() + geom_segment( data = gene_counts, @@ -246,16 +332,9 @@ pretty_lollipop_plot <- function( data = domain_data, aes( x = text.position, - y = 0, + y = 0.1, label = text.label ) - ) + - labs( - x = "AA Position", - y = "Mutation Count", - title = paste0( - plot_title - ) ) if (mirrorarg == TRUE) { @@ -267,8 +346,10 @@ pretty_lollipop_plot <- function( label = paste0( "Somatic Mutation Rate ", Somatic_Mutation_Rate[1], - "%", - "\n", + "% N = ", + Somatic_Mutation_Denominator[1], + "\n", + "Comparison Value ", '"', Sample1, '"' @@ -282,13 +363,36 @@ pretty_lollipop_plot <- function( label = paste0( "Somatic Mutation Rate ", Somatic_Mutation_Rate[2], - "%", + "% N = ", + Somatic_Mutation_Denominator[2], "\n", + "Comparison Value ", '"', Sample2, '"' ), hjust = 0 + ) + + geom_text( + data = domain_data, + aes( + x = text.position, + y = -0.2, + label = paste0( + "p = ", + round(p_value, 3) + ) # Display domain-specific p-value + ) + ) + + labs( + x = "AA Position", + y = "Mutation Count", + title = paste0( + plot_title, + "\n", + "p = ", + round(gene_p_value, 3) + ) ) } else { plot <- plot + @@ -302,6 +406,13 @@ pretty_lollipop_plot <- function( "%" ), hjust = 0 + ) + + labs( + x = "AA Position", + y = "Mutation Count", + title = paste0( + plot_title + ) ) } From 0b5a40741058fa35c8db49676ed7c2fdfe48e43a Mon Sep 17 00:00:00 2001 From: atlantis-real Date: Mon, 21 Oct 2024 00:35:23 -0700 Subject: [PATCH 5/8] prettyForestPlot addition to pretty_colollipop_plot --- R/prettyForestPlot.R | 12 +++++++++--- R/pretty_colollipop_plot.r | 35 ++++++++++++++++++++++++++++++++--- man/prettyForestPlot.Rd | 5 ++++- man/pretty_colollipop_plot.Rd | 3 ++- 4 files changed, 47 insertions(+), 8 deletions(-) diff --git a/R/prettyForestPlot.R b/R/prettyForestPlot.R index 8746327..854b59e 100644 --- a/R/prettyForestPlot.R +++ b/R/prettyForestPlot.R @@ -23,7 +23,8 @@ #' @param custom_colours Optional: Specify a named vector of colours that match the values in the comparison column. #' @param custom_labels Optional: Specify custom labels for the legend categories. Must be in the same order as comparison_values. #' @param max_q cut off for q values to be filtered in fish test -#' +#' @param mirrorarg Logical paramter for when mirroring lollipop data in prety_co_lollipop plot. Default is FALSE. +#' #' @return A convenient list containing all the data frames that were created in making the plot, including the mutation matrix. It also produces (and returns) ggplot object with a side-by-side forest plot and bar plot showing mutation incidences across two groups. #' #' @rawNamespace import(data.table, except = c("last", "first", "between", "transpose", "melt", "dcast")) @@ -64,7 +65,8 @@ prettyForestPlot = function(maf, comparison_name = FALSE, custom_colours = FALSE, custom_labels = FALSE, - max_q = 1){ + max_q = 1, + mirrorarg = FALSE){ #If no comparison_values are specified, derive the comparison_values from the specified comparison_column if(comparison_values[1] == FALSE){ @@ -298,5 +300,9 @@ prettyForestPlot = function(maf, align = "h" ) - return(list(fisher = fish_test, forest = forest, bar = bar, arranged = arranged_plot, mutmat = mutmat)) + if (mirrorarg == FALSE) { + return(list(fisher = fish_test, forest = forest, bar = bar, arranged = arranged_plot, mutmat = mutmat)) + } else { + return(list(forest = forest, bar = bar)) + } } diff --git a/R/pretty_colollipop_plot.r b/R/pretty_colollipop_plot.r index ef09048..038657e 100644 --- a/R/pretty_colollipop_plot.r +++ b/R/pretty_colollipop_plot.r @@ -1,6 +1,7 @@ #' @title Pretty CoLollipop Plot. #' -#' @description Generates a ggplot-compatible figure of 2 [GAMBLR.viz::pretty_lollipop_plot] mirrored. +#' @description Generates a ggplot-compatible figure of 2 [GAMBLR.viz::pretty_lollipop_plot] mirrored, and a +#' [GAMBLR.viz::prettyForestPlot] displayed below. #' #' @details Retrieve maf data of a specific sample or a set of samples for comparison. A gene of interest #' can then be visualized with the given maf data files, and comparison commands. Silent mutations can be @@ -137,7 +138,7 @@ pretty_colollipop_plot <- function( meta2_counter <- length(unique(meta2$Tumor_Sample_Barcode)) # Pass combined_gene_counts to pretty_lollipop_plot for plotting - combined_plot <- pretty_lollipop_plot( + colollipop_plot <- pretty_lollipop_plot( maf_df = maf_df, gene = gene, plot_title = plot_title, @@ -151,5 +152,33 @@ pretty_colollipop_plot <- function( Sample2 = comparison_values[2] ) - return(combined_plot) + forest_plot <- prettyForestPlot( + maf = maf_df, + metadata = metadata, + genes = gene, + comparison_column = comparison_column, + comparison_values = comparison_values, + separate_hotspots = FALSE, + comparison_name = paste0( + comparison_values[1], + " vs. ", + comparison_values[2] + ), + mirrorarg = TRUE + ) + + # arrange colollipop plot and forest plot together one over the other + plot <- ggarrange( + colollipop_plot, + ggarrange( + forest_plot$forest, + forest_plot$bar, + widths = c(1, 0.6), + common.legend = TRUE, + align = "h"), + ncol = 1, + nrow = 2, + heights = c(2, 1) + ) + return(plot) } diff --git a/man/prettyForestPlot.Rd b/man/prettyForestPlot.Rd index 8d748a5..3f16f10 100644 --- a/man/prettyForestPlot.Rd +++ b/man/prettyForestPlot.Rd @@ -18,7 +18,8 @@ prettyForestPlot( comparison_name = FALSE, custom_colours = FALSE, custom_labels = FALSE, - max_q = 1 + max_q = 1, + mirrorarg = FALSE ) } \arguments{ @@ -49,6 +50,8 @@ prettyForestPlot( \item{custom_labels}{Optional: Specify custom labels for the legend categories. Must be in the same order as comparison_values.} \item{max_q}{cut off for q values to be filtered in fish test} + +\item{mirrorarg}{Logical paramter for when mirroring lollipop data in prety_co_lollipop plot. Default is FALSE.} } \value{ A convenient list containing all the data frames that were created in making the plot, including the mutation matrix. It also produces (and returns) ggplot object with a side-by-side forest plot and bar plot showing mutation incidences across two groups. diff --git a/man/pretty_colollipop_plot.Rd b/man/pretty_colollipop_plot.Rd index 1954e65..5af7682 100644 --- a/man/pretty_colollipop_plot.Rd +++ b/man/pretty_colollipop_plot.Rd @@ -33,7 +33,8 @@ pretty_colollipop_plot( A mirrored lollipop plot. } \description{ -Generates a ggplot-compatible figure of 2 [GAMBLR.viz::pretty_lollipop_plot] mirrored. +Generates a ggplot-compatible figure of 2 [GAMBLR.viz::pretty_lollipop_plot] mirrored, and a +[GAMBLR.viz::prettyForestPlot] displayed below. } \details{ Retrieve maf data of a specific sample or a set of samples for comparison. A gene of interest From eaa0d26b5075f8903633d24d4f2445b574426042 Mon Sep 17 00:00:00 2001 From: atlantis-real Date: Mon, 21 Oct 2024 00:40:34 -0700 Subject: [PATCH 6/8] cleaning forest implementation --- R/pretty_colollipop_plot.r | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/pretty_colollipop_plot.r b/R/pretty_colollipop_plot.r index 038657e..c46363b 100644 --- a/R/pretty_colollipop_plot.r +++ b/R/pretty_colollipop_plot.r @@ -176,9 +176,9 @@ pretty_colollipop_plot <- function( widths = c(1, 0.6), common.legend = TRUE, align = "h"), - ncol = 1, - nrow = 2, - heights = c(2, 1) + ncol = 1, + nrow = 2, + heights = c(2, 1) ) return(plot) } From d5bc35499c9eae81140358a8427aaa5a4aa3d0b7 Mon Sep 17 00:00:00 2001 From: atlantis-real Date: Tue, 22 Oct 2024 20:44:56 -0700 Subject: [PATCH 7/8] update --- R/prettyForestPlot.R | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/R/prettyForestPlot.R b/R/prettyForestPlot.R index 854b59e..cc5bddd 100644 --- a/R/prettyForestPlot.R +++ b/R/prettyForestPlot.R @@ -23,7 +23,6 @@ #' @param custom_colours Optional: Specify a named vector of colours that match the values in the comparison column. #' @param custom_labels Optional: Specify custom labels for the legend categories. Must be in the same order as comparison_values. #' @param max_q cut off for q values to be filtered in fish test -#' @param mirrorarg Logical paramter for when mirroring lollipop data in prety_co_lollipop plot. Default is FALSE. #' #' @return A convenient list containing all the data frames that were created in making the plot, including the mutation matrix. It also produces (and returns) ggplot object with a side-by-side forest plot and bar plot showing mutation incidences across two groups. #' @@ -300,9 +299,5 @@ prettyForestPlot = function(maf, align = "h" ) - if (mirrorarg == FALSE) { - return(list(fisher = fish_test, forest = forest, bar = bar, arranged = arranged_plot, mutmat = mutmat)) - } else { - return(list(forest = forest, bar = bar)) - } + return(list(fisher = fish_test, forest = forest, bar = bar, arranged = arranged_plot, mutmat = mutmat)) } From 95ce0bd3dbf77f3eeb92fb060594cbdde173ecac Mon Sep 17 00:00:00 2001 From: atlantis-real Date: Tue, 22 Oct 2024 23:23:01 -0700 Subject: [PATCH 8/8] finalized plot --- R/prettyForestPlot.R | 3 +-- R/pretty_colollipop_plot.r | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/R/prettyForestPlot.R b/R/prettyForestPlot.R index cc5bddd..4155007 100644 --- a/R/prettyForestPlot.R +++ b/R/prettyForestPlot.R @@ -64,8 +64,7 @@ prettyForestPlot = function(maf, comparison_name = FALSE, custom_colours = FALSE, custom_labels = FALSE, - max_q = 1, - mirrorarg = FALSE){ + max_q = 1){ #If no comparison_values are specified, derive the comparison_values from the specified comparison_column if(comparison_values[1] == FALSE){ diff --git a/R/pretty_colollipop_plot.r b/R/pretty_colollipop_plot.r index c46363b..b5da1e4 100644 --- a/R/pretty_colollipop_plot.r +++ b/R/pretty_colollipop_plot.r @@ -163,8 +163,7 @@ pretty_colollipop_plot <- function( comparison_values[1], " vs. ", comparison_values[2] - ), - mirrorarg = TRUE + ) ) # arrange colollipop plot and forest plot together one over the other