-
Notifications
You must be signed in to change notification settings - Fork 0
/
scRNA - tag aneuploid cells.R
158 lines (135 loc) · 5.7 KB
/
scRNA - tag aneuploid cells.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#this script will tag cells as aneuploid or not based on the average expression of chromosomes that are expected to be aneuploid
#inputs
seuratObj <- seuratObj
#parameters
subsample = FALSE #If true, will subsample 10% of the cells to perform the analysis. Usefull to speed up the script.
aneu_chromo <- list("B12" = c("01", "05", "10", "13", "20", "23"),
"E4" = c("13", "20", "23", "26"),
"G3" = c("20", "23", "26", "29"),
"G6" = c("01", "09", "13", "16", "20", "22", "23", "24", "26", "35"))
#aneu_chromo <- c("02")
#libraries
library(tidyverse)
library(ggplot2)
library(pheatmap)
library(ggridges)
library(Seurat)
library(viridis)
#custom_functions
scale_vec <- function(x){
x <- (x - mean(x, na.rm=TRUE)) / sd(x, na.rm=TRUE)
return(x)
}
#code
rm(genes_meta)
rm(expr_table)
rm(sum_table)
genes_meta <- seuratObj@[email protected] #get the metadata for the genes
expr_table <- as.matrix(seuratObj@assays$RNA@data) #get the expression matrix
if(subsample){
cells <- c(1:ncol(expr_table))
cells <- sample(cells, size = round(length(cells)*0.1), replace = FALSE)
expr_table <- expr_table[,cells]
rm(cells)
}
#add chromosome information in case it is missing
if(!"chromo" %in% colnames(seuratObj@[email protected])){
seuratObj@[email protected] <- seuratObj@[email protected] %>%
mutate(chromo = substr(id, 7, 8))
}
#set 0s to NAs
#expr_table[expr_table == 0] <- NA
#convert to dataframe and add a column with the genes names
expr_table <- expr_table %>%
as.data.frame()%>%
rownames_to_column("gene")
#check in the metadata to which chromosome each gene belongs
expr_table$chromo <- genes_meta$chromo[match(expr_table$gene, genes_meta$name)]
#calculating average expression of each chromosome####
sum_table <- expr_table %>%
#convert to tidy dataframe
pivot_longer(cols = c(2:(ncol(expr_table)-1)), names_to = "cell", values_to = "expression") %>%
#remove kDNA
filter(chromo != 37) %>%
#for each cell and each chromosome
group_by(cell, chromo)%>%
#calculate the average expression of each chromosome in each cell
summarise(mean_expr = mean(expression, na.rm = T)) %>%
#summarise(mean_expr = sum(expression, na.rm = T)/n())%>%
ungroup()
#ploting heatmap with calculated average expression of each chromosome####
# to_plot <- sum_table %>%
# pivot_wider(names_from = cell, values_from = mean_expr)%>%
# as.data.frame()
#
# rownames(to_plot) <- to_plot$chromo
# to_plot$chromo <- NULL
# to_plot <- as.matrix(to_plot)
#
# pheatmap(to_plot,
# show_colnames = F,
# scale = "column",
# clustering_method = "ward.D2",
# cutree_rows = 2)
#Calculatiing for each cell the ration between the average expression of aneuploid chromosomes vs normal chromosomes####
sum_table$sample <- [email protected]$sample[match(sum_table$cell, rownames([email protected]))]
cell_aneu_ratio <- sum_table %>%
group_by(sample)%>%
mutate(aneu_chromo = chromo %in% unlist(aneu_chromo[which(names(aneu_chromo) == sample)]))%>%
mutate(chromo_group = ifelse(aneu_chromo, "aneuploid", "normal"))%>%
group_by(cell, chromo_group)%>%
summarise(mean_expr = mean(mean_expr))%>%
pivot_wider(names_from = chromo_group, values_from = mean_expr)%>%
mutate(aneu_ratio = aneuploid/normal)%>%
ungroup()
#assigning the aneu_ratio to the cells metadata in the seurat seuratObj
[email protected]$aneu_ratio <- NA
[email protected]$aneu_ratio <- cell_aneu_ratio$aneu_ratio[match(cell_aneu_ratio$cell, rownames([email protected]))]
#plotting the aneu_ratio distribution
ggplot(aes(x = aneu_ratio, y = sample, fill = sample))+
geom_density_ridges(scale = 3, alpha= 0.5)+
guides(fill = "none")+
labs(x = "Aneuploid/disomic ratio", y = "Sample")
col_vec <- c("#f7f7f7", "#fcffa6", "yellow", "orange", "red", "dark red", "#730000", "#330000")
col_vec <- rev(viridis::magma(8))
col_vec <- c("#f7f7f7", "#f7fabb", "#e6eb94", "#638a85", "#485463", "#2a3440", "#182029")
#plotting the aneu_ratio in a UMAP
patchwork::wrap_plots(FeaturePlot(seuratObj,
feature = "aneu_ratio",
reduction = "umap",
split.by = "sample",
keep.scale = NULL,
combine = FALSE))&
theme_minimal()&
theme(panel.grid = element_blank())&
scale_color_gradientn(colors = col_vec, limits = c(0.5, 2.1))
#scale_color_gradientn(colors = rev(viridis::magma(8)))
#Now it will use kmeans to determine if cells are aneuploid or normal
#NTS: for now it assumes that both normal and aneuploid cells co-exist in the data. Fix it later.
clusters <- data.frame(value = [email protected]$aneu_ratio)
clusters$group <- kmeans(clusters$value, centers = 2)$cluster
group_name <- unique(clusters$group)
group_name[which(group_name == max(group_name))] <- "aneuploid"
group_name[which(group_name == min(group_name))] <- "normal"
clusters$group_name <- group_name[clusters$group]
clusters %>%
ggplot(aes(x = value, fill = group_name))+
geom_density(alpha = 0.5)
#adding the group name
[email protected]$aneu_group <- clusters$group_name
#adding the group name as an identity class
seuratObj <- SetIdent(seuratObj, value = "aneu_group")
#plotting the result
DimPlot(seuratObj, reduction = "umap", group.by = "aneu_group", split.by = "sample")&
scale_color_manual(values = c("#226185", "#e6d99a"))
#finding marker genes between aneuploid and diploid cells
marker_genes <- seuratObj %>%
SetIdent(value = "aneu_group")%>%
FindAllMarkers(slot = "scale.data", only.pos = FALSE)
genes_to_plot <- marker_genes %>%
group_by(cluster) %>%
arrange(p_val_adj)%>%
mutate(position = 1:n()) %>%
filter(position <= 20)
DoHeatmap(seuratObj, features = genes_to_plot$gene, group.by = "aneu_group", slot = "scale.data")