forked from AlexSchulz98/CircularCodeMotifs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
CDS_Analysis.Rmd
78 lines (58 loc) · 1.53 KB
/
CDS_Analysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
---
title: "CDS_Analysis"
author: "Alexander Schulz"
date: "21 2 2020"
output: html_document
---
Analysis coding sequences of an organism by cds, codons and avg cds length
```{r include=FALSE}
library("seqinr")
library("ccmotif") # Version 0.6.6
library("Biostrings")
source("Scripts/Sequences.R")
library("xlsx")
```
Change here:
```{r}
pathSeq = "../BA Circular Code/cds/"
filenames = list.files(pathSeq, pattern = "*.fasta")
```
```{r}
df_cds = data.frame(Name=c(),
CDS=c(),
Codons=c(),
AvgCodons=c()
)
for (i in 1:length(filenames)) {
dnaf = readDNAStringSet(paste(pathSeq,filenames[i],sep=""))
file = filenames[i]
cds = length(dnaf)
sample = pmin(cds,1000)
dnaf = dnaf[1:pmin(cds,1000)] # deleting sequences with IUPAC Codes (reinhardtii, celegans)
dnaf = deleteIUPACSequences(dnaf)
codons_in_seq = 0
for (z in 1:length(dnaf)) {
tmp_codons = codons(dnaf[[z]])
codons_in_seq <<- codons_in_seq + length(tmp_codons)
}
avg_codons = codons_in_seq/sample
#Fill data frame
tmp = data.frame(Name=file,
CDS=paste(sample,"/",cds, sep = ""),
Codons=codons_in_seq,
AvgCodons=round(avg_codons,1)
)
df_cds = rbind(df_cds,tmp)
}
```
```{r}
saveRDS(
object = df_cds,
file = "analysis_results/CDS_Analysis.RDS"
)
```
Complete data frame:
```{r}
write.xlsx(df_cds, file = "analysis_results/CDS_Analysis.xlsx",
sheetName = "Sheet 1", append = FALSE)
```