-
Notifications
You must be signed in to change notification settings - Fork 0
/
3_markers_annotation.Rmd
124 lines (109 loc) · 5.48 KB
/
3_markers_annotation.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
---
title: "scRNA-seq Rat Metrial Glands"
author: "Ha T. H. Vu"
output: html_document
---
```{r setup, include=FALSE}
options(max.print = "75")
knitr::opts_chunk$set(
echo = TRUE,
collapse = TRUE,
comment = "#>",
fig.path = "Files/",
fig.width = 15,
fig.height = 15,
prompt = FALSE,
tidy = FALSE,
message = FALSE,
warning = TRUE
)
knitr::opts_knit$set(width = 75)
```
This is a documentation for analyses of scRNA-seq data, generated from rat metrial gland tissues on gestational day (GD) 15.5 and 19.5. <br>
We will use differential expression analysis within each timepoint to find the genes enriched in each cluster, then annotate the clusters using known markers of each cell type. First, save the known markers into vectors:
```{r}
tbmarkers <- c("Prl5a1", "Prl7b1", "Krt7", "Krt8", "Krt18")
nkmarkers <- c("Nkg7", "Prf1", "Gzmm", "Gzmbl2")
endomarkers <- c("Cdh5", "Plvap", "Adgrl4", "Egfl7")
macromarkers <- c("C1qa", "Lyz2", "Aif1", "Cybb")
vascularmarkers <- c("Acta2", "Myl9", "Tagln", "Myh11")
features <- c(tbmarkers, nkmarkers, endomarkers, macromarkers, vascularmarkers)
```
## 1. GD15.5 clusters
```{r}
library(dplyr)
library(Seurat)
library(patchwork)
library(ggplot2)
library(knitr)
set.seed(1234)
load("/work/LAS/geetu-lab/hhvu/project3_scATAC/scRNA-seq-analysis/3_cluster_DEG/rmChrMTgenes/GD15.5/gd15.5_res0.8.rda")
```
```{r, eval = F}
gd15.5.markers.res0.8.wilcoxin <- FindAllMarkers(data, only.pos = TRUE, logfc.threshold=0)
#save(gd15.5.markers.res0.8.wilcoxin, file = "/work/LAS/geetu-lab/hhvu/project3_scATAC/scRNA-seq-analysis/3_cluster_DEG/rmChrMTgenes/GD15.5/gd15.5.markers.res0.8.wilcoxin.rda")
```
```{r}
load("/work/LAS/geetu-lab/hhvu/project3_scATAC/scRNA-seq-analysis/3_cluster_DEG/rmChrMTgenes/GD15.5/gd15.5.markers.res0.8.wilcoxin.rda")
gd15.5.markers.res0.8.wilcoxin <- subset(gd15.5.markers.res0.8.wilcoxin, gd15.5.markers.res0.8.wilcoxin$p_val_adj <= 0.05)
for (i in levels(gd15.5.markers.res0.8.wilcoxin$cluster)) {
subcluster <- subset(gd15.5.markers.res0.8.wilcoxin, gd15.5.markers.res0.8.wilcoxin$cluster == i)
subcluster <- subset(subcluster, subcluster$avg_log2FC >= log2(1.5))
file <- paste0("/work/LAS/geetu-lab/hhvu/project3_scATAC/scRNA-seq-analysis/3_cluster_DEG/rmChrMTgenes/GD15.5/res0.8/gd15.5-wilcoxin-cluster", i, ".txt")
#write.table(subcluster, file, sep = '\t', quote = F)
print(paste("Cluster", i, "at GD15.5 has:", length(unique(subcluster$gene)), "genes."))
print(paste("Cluster", i, "at GD15.5 has the following known markers:",
toString(intersect(unique(subcluster$gene), tbmarkers)), "(invasive trophoblast cells);",
toString(intersect(unique(subcluster$gene), nkmarkers)), "(natural killer cells);",
toString(intersect(unique(subcluster$gene), endomarkers)), "(endothelial cells);",
toString(intersect(unique(subcluster$gene), macromarkers)), "(macrophage cells);",
toString(intersect(unique(subcluster$gene), vascularmarkers)), "(smooth muscle cells)."))
}
```
We can also plot the expression level of the markers in an UMAP for visualization of the cell clusters.
```{r}
FeaturePlot(data, features = features)
```
We conclude the cell clusters as the following identities:
```{r}
sum <- read.table("/work/LAS/geetu-lab/hhvu/project3_scATAC/scRNA-seq-analysis/MetrialGland-scRNA-seq/Files/gd15.5-cellIdentities.txt", header = T, sep = "\t")
kable(sum, caption = "GD15.5 summary")
```
## 2. GD19.5 clusters
```{r}
set.seed(1234)
load("/work/LAS/geetu-lab/hhvu/project3_scATAC/scRNA-seq-analysis/3_cluster_DEG/rmChrMTgenes/GD19.5/gd19.5-4-5-6-7_res0.8.rda")
data <- data2
DefaultAssay(data) <- "RNA"
```
```{r, eval = F}
gd19.5.markers.res0.8.wilcoxin <- FindAllMarkers(data, only.pos = TRUE, logfc.threshold=0)
#save(gd19.5.markers.res0.8.wilcoxin, file = "/work/LAS/geetu-lab/hhvu/project3_scATAC/scRNA-seq-analysis/3_cluster_DEG/rmChrMTgenes/GD19.5/gd19.5.markers.res0.8.wilcoxin.rda")
```
```{r}
load("/work/LAS/geetu-lab/hhvu/project3_scATAC/scRNA-seq-analysis/3_cluster_DEG/rmChrMTgenes/GD19.5/gd19.5.markers.res0.8.wilcoxin.rda")
gd19.5.markers.res0.8.wilcoxin <- subset(gd19.5.markers.res0.8.wilcoxin, gd19.5.markers.res0.8.wilcoxin$p_val_adj <= 0.05)
for (i in levels(gd19.5.markers.res0.8.wilcoxin$cluster)) {
subcluster <- subset(gd19.5.markers.res0.8.wilcoxin, gd19.5.markers.res0.8.wilcoxin$cluster == i)
subcluster <- subset(subcluster, subcluster$avg_log2FC >= log2(1.5))
file <- paste0("/work/LAS/geetu-lab/hhvu/project3_scATAC/scRNA-seq-analysis/3_cluster_DEG/rmChrMTgenes/GD19.5/res0.8/gd19.5-wilcoxin-cluster", i, ".txt")
#write.table(subcluster, file, sep = '\t', quote = F)
print(paste("Cluster", i, "at GD19.5 has the following known markers:",
toString(intersect(unique(subcluster$gene), tbmarkers)), "(invasive trophoblast cells);",
toString(intersect(unique(subcluster$gene), nkmarkers)), "(natural killer cells);",
toString(intersect(unique(subcluster$gene), endomarkers)), "(endothelial cells);",
toString(intersect(unique(subcluster$gene), macromarkers)), "(macrophage cells);",
toString(intersect(unique(subcluster$gene), vascularmarkers)), "(smooth muscle cells)."))
}
```
```{r}
FeaturePlot(data, features = features)
```
We conclude the cell clusters as the following identities:
```{r}
sum <- read.table("/work/LAS/geetu-lab/hhvu/project3_scATAC/scRNA-seq-analysis/MetrialGland-scRNA-seq/Files/gd19.5-cellIdentities.txt", header = T, sep = "\t")
kable(sum, caption = "GD19.5 summary")
```
```{r}
sessionInfo()
```