forked from AlexsLemonade/OpenPBTA-analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfig1-sample-distribution.R
133 lines (113 loc) · 4.02 KB
/
fig1-sample-distribution.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# Sample Distribution Figure
#
# 2020
# Chante Bethell for ALSF - CCDL
#
# This script is intended to run steps needed to create Figure 1.
# Load in libraries
library(dplyr)
library(ggplot2)
library(colorspace)
library(scales)
library(treemapify)
library(patchwork)
# Magrittr pipe
`%>%` <- dplyr::`%>%`
# Detect the ".git" folder -- this will in the project root directory.
# Use this as the root directory to ensure proper execution, no matter where
# it is called from.
root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
# Declare output directory
output_dir <- file.path(root_dir, "figures", "pngs")
#### Declare relative file paths of modules used for this figure --------------
sample_distribution_dir <- file.path(
root_dir,
"analyses",
"sample-distribution-analysis"
)
## TODO: Define the file path to the directory containing the data for the
## contributions plot
#### Read in the associated results -------------------------------------------
# Read in disease expression file from `01-filter-across-types.R`
disease_expression <-
readr::read_tsv(file.path(sample_distribution_dir, "results", "disease_expression.tsv"))
# Read in plots data.frame file from `02-multilayer-plots.R`
plots_df <-
readr::read_tsv(file.path(sample_distribution_dir, "results", "plots_df.tsv")) %>%
dplyr::select(-hex_codes)
# Reorder the columns to be displayed in descending order by count on the plot
disease_expression$harmonized_diagnosis <- with(disease_expression,
reorder(harmonized_diagnosis, -count))
# Read in the histology color palette
histology_label_mapping <- readr::read_tsv(
file.path(root_dir,
"figures",
"palettes",
"histology_label_color_table.tsv")) %>%
# Select just the columns we will need for making sure the hex_codes are up to date
dplyr::select(display_group, display_order, hex_codes) %>%
dplyr::distinct()
#### Re-run the individual plots ----------------------------------------------
# Create a treemap of broad histology, short histology, and harmonized diagnosis
# Join the color palette for the colors for each short histology value --
# palette is generated in `figures/scripts/color_palettes.R`
plots_df2 <- plots_df %>%
left_join(histology_label_mapping, by = c("level2" = "display_group")) %>%
distinct() # Remove the redundant rows from prep for the `treemap` function
# Plot the treemap where level1 is `broad_histology`,
# level2 is `display_group`, and level3 is `harmonized_diagnosis`
treemap <-
ggplot(
plots_df2,
aes(
area = size,
fill = hex_codes,
label = level3,
subgroup = level1
)
) +
geom_treemap() +
geom_treemap_subgroup_border(colour = "white") +
geom_treemap_text(
fontface = "italic",
colour = "white",
place = "topright",
alpha = 0.3,
grow = F,
reflow = T,
min.size = 0,
size = 6
) +
geom_treemap_subgroup_text(
place = "bottomleft",
grow = F,
reflow = T,
alpha = 0.6,
colour = "#FAFAFA",
size = 10
) +
theme(legend.position = "none") +
scale_fill_identity()
## TODO: Re-run Github Contributions plot/table here -- for now we will define
## this plot as NULL
github_contributions_plot <- NULL
## TODO: Re-run or load in plots of the project features and assays -- for now
## we will define these plots as NULL
project_assays_plot <- NULL
project_features_plot <- NULL
#### Assemble multipanel plot -------------------------------------------------
# Combine plots with patchwork
# Layout of the four plots will be two over the other two
# (2 columns and 2 rows)
combined_plot <- treemap + project_features_plot +
project_assays_plot + github_contributions_plot +
plot_layout(ncol = 2, nrow = 2) +
plot_annotation(tag_levels = 'A') &
theme(# add uniform labels
axis.text.x = element_text(size = 9),
axis.text.y = element_text(size = 9))
# Save to PNG
ggplot2::ggsave(file.path(output_dir, "fig1-openpbta-distribution.png"),
width = 12, height = 8,
units = "in"
)