-
Notifications
You must be signed in to change notification settings - Fork 0
/
plotVirusFunction.R
executable file
·162 lines (133 loc) · 5.99 KB
/
plotVirusFunction.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# ******** function plotVirus ********
# input: *tableToPlot - which of the results do you want plotted (e.g. FinalBestHit or FinalGuess)
# *criteriaTable - which table is used to filter the plotted genomes (e.g. with finalBestHitReads and the
# default threshold minReadThreshold = 100, Organisms where at least one had 100 or more
# reads mapped will be kept, the rest is tossed)
# *metadata - table with metadata
# *minReadThreshold - used for the filtering of organisms with at least some abundance
# *lazOrdered - should the IDs in the final plot be organized by laz score? T/F
# *savePlots - should the resulting plots be saved as pdf? T/F (will create a folder "figures")
plotVirus = function(tableToPlot,
criteriaTable=finalBestHitReads,
metadata=metadata,
minReadThreshold=100,
maxPerPlot = 20,
lazOrdered=F,
savePlots=T)
{
if (savePlots & !file.exists(file.path(getwd(), "figures"))){
dir.create(file.path(getwd(), "figures"))
}
# 1) == filtering and sorting ==
# select only the viruses which pass given criteria - default is to take the finalBestHitReadsTable and filter
# out ony the rows where at least one virus has at least 100 mapped reads
filteredGenomes = criteriaTable[apply(criteriaTable[,-c(1,2)], 1, function(x) !all(x < minReadThreshold)),1]$Genome
# take the selected table to plot - extract the viruses selected in previous step and sort according to the
# mean abundance across samples
plotTable = tableToPlot %>%
filter(Genome %in% filteredGenomes) %>%
group_by(Genome, Organism) %>%
ungroup() %>%
mutate(meanVal = rowMeans(.[,3:ncol(finalBestHit)])) %>%
gather(key = "ID", value = "abundance", 3:ncol(tableToPlot)) %>%
left_join(metadata, by = "ID") %>%
mutate_if(is.character, as.factor)
# 2) == plot abundance across samples in facets ==
# get IDs sorted by the mean abundance - to plot the facets from the most important to the least important
sortedID = plotTable %>%
select(Genome, Organism, meanVal) %>%
distinct() %>%
arrange(desc(meanVal)) %>%
mutate(Organism = as.character(Organism)) %>%
mutate(Organism = factor(Organism, Organism))
# relevel the plo table according to the sorted IDs
plotTable = plotTable %>%
mutate(Organism = factor(Organism, levels(sortedID$Organism)))
i = 0
while(!is.null(sortedID)){
i = i+1
if (nrow(sortedID) > maxPerPlot){
# was the function passed and argument lazOrdered?
if(lazOrdered==T){
# LAZ ordered
p = plotTable %>%
filter(plotTable$Genome %in% sortedID$Genome[1:maxPerPlot]) %>%
ggplot(aes(x= reorder(ID, laz), y = abundance, color = site))
}else{
# just groups
p = plotTable %>%
filter(plotTable$Genome %in% sortedID$Genome[1:maxPerPlot]) %>%
ggplot(aes(x= ID, y = abundance, color = site))
}
p = p + geom_point() + theme_bw() + facet_wrap(~Organism, scales = "free") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
print(p)
#if save plot mode is on- save the plot
if(savePlots){
ggsave(paste0("figures/AbundanceFacetPlot_", i, ".pdf"), width = 40, height = 20, units = "cm")
}
sortedID = sortedID[-seq(1,20),]
}else{
# was the function passed and argument lazOrdered?
if(lazOrdered==T){
# LAZ ordered
p = plotTable %>%
filter(plotTable$Genome %in% sortedID$Genome) %>%
ggplot(aes(x= reorder(ID, laz), y = abundance, color = site))
}else{
# just groups
p = plotTable %>%
filter(plotTable$Genome %in% sortedID$Genome) %>%
ggplot(aes(x= ID, y = abundance, color = site))
}
p = p + geom_point() + theme_bw() + facet_wrap(~Organism, scales = "free") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
print(p)
#if save plot mode is on- save the plot
if(savePlots){
ggsave(paste0("figures/AbundanceFacetPlot_", i, ".pdf"), width = 40, height = 20, units = "cm")
}
sortedID = NULL
}
}
# 3) == make circle plots ==
# plot as is
p= ggplot(plotTable,aes(x= Organism, y = ID, size = abundance, fill = site))
p = p + geom_point(shape = 21) + theme_bw() +
scale_size_continuous(range=c(0.2,15)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Circle plot - unscaled")
print(p)
#if save plot mode is on- save the plot
if(savePlots){
ggsave("figures/DotsOnGridPlot_unscaled.pdf", width = 40, height = 20, units = "cm")
}
# Filter out the most abundant
p = plotTable %>%
filter(Organism != levels(plotTable$Organism)[1]) %>%
ggplot(aes(x= Organism, y = ID, size = abundance, fill = site))
p = p + geom_point(shape = 21) + theme_bw() +
scale_size_continuous(range=c(0.2,15)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Circle plot - unscaled / Human alphherpesvirus 1 fltered out")
print(p)
#if save plot mode is on- save the plot
if(savePlots){
ggsave("figures/DotsOnGridPlot_unscaled_topOrganismFilteredOut.pdf", width = 40, height = 20, units = "cm")
}
# do z-score normalization
p = plotTable %>%
group_by(Organism) %>%
mutate(z_score = scale(abundance)) %>%
ggplot(aes(x= Organism, y = ID, size = z_score, fill = site, color= site))
p = p + geom_point(shape = 21,alpha = 0.8) + theme_bw() +
scale_size_continuous(range=c(0.1,7)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
ggtitle("Circle plot - z-scaled")
print(p)
#if save plot mode is on- save the plot
if(savePlots){
ggsave("figures/DotsOnGridPlot_zScaled.pdf", width = 40, height = 20, units = "cm")
}
}
plotVirus(finalBestHit, finalBestHitReads, metadata = metadata)