-
Notifications
You must be signed in to change notification settings - Fork 0
/
1-BasicStructures.txt
167 lines (113 loc) · 3.71 KB
/
1-BasicStructures.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
screen #Get into a screen
ctrl+A D #Detach from the screen
screen -ls #See the ids of the screens you have
screen -r <ID> #reattach to that screen
exit #kill the screen
#ALIASES
alias ll='ls -la'
#LOOPS
while read line
do
cmmnds
cmmnds
cmmnds
done < inputFile.txt > outputFile.txt
#TEXT EDITOR
nano test.sh
for i in `seq ${1} ${2}`
do
cmmnd > tempFile
cmmnd >> tempFile
cmmnd
done > outputFile.txt
#So, if I want to append an empty line at the end of a file called "Lyrics_[1-100].txt", how do I do it?
#Sum a column with awk
cat count.txt | awk '{sum+=$1} END {print sum}'
#Lovely sed
sed 's/,//g'
#To replace all occurrences in all the files with sed in a single line
sed -i 's/old/new/g' *filesufix #Can you start seeing the beauty of this? there are million ways to do the exact same thing
#Remove the line with a match and the line right after that one with the match
sed '/WorkToSearch/,+1 d' file
#Delete any empty line
sed '/^$/d' file
#Get the length of a seq in a perl one-liner
pep=`echo $a | perl -nle 'print length' `
#Read from 2 files! or more!!
while read -r a && read -r b <&3
do
echo -e "$a\n$b";
done < file1 3<file2
while read -r a && read -r b <&3 && read -r c <&4 && read -r d <&5
do
echo -e "$a\n$b\n$c\n$d";
done < PS1.txt 3<PS2.txt 4<PS3.txt 5<PS4.txt
#Have more than one field separator
awk -F"[,=]" '{print $6}' file
#Print the last field with separator ";"
awk -F";" '{print $NF}' 16S.txt
# And for seqs of letters
for i in $(echo {a..z});do echo $i; done
## Truncating names and more #<<<<---- VERY useful
${#file} => length of the path/number of letters (with dots and any symbol)
${file%.*} => truncate right of the last dot
${file%%.*} => truncate right of the first dot
${file#*.} => truncate left of the first dot
${file##*.} => truncate left of the last dot
# Pass a shell variable to awk
awk --assign=var=${line} '{sum+=$1} END {print var"\n"sum}'
##########Exercise:
#I have a sequence file that looks like this in the header
>cc786992bf3d628b862dfc7e18f86b531c4ef6bf;barcodelabel=Bl1ITS
AACGCACATTGCGCCCCTTGGTATTCCATGGGGCATGCCTGTTCGAGCGTCATTTGTACCTTCAAGCTCTGCTTGGTGTT
GGGTGTTTGTCTCGCCTCTGCGTGTAGACTCGCCTTAAAACAATTGGCAGCCGGCGTATTGATTTCGGAGCGCAGTACAT
CTCGCGCTTTGCACTCACAACGACGACGTCCAAAAGTACATTTTTACACTCTTGACCTCGGATCAGGTAGGGATACCCGC
TGAACTTAA
>cc786992bf3d628b862dfc7e18f86b531c4ef6bf;barcodelabel=Bl1ITS
AACGCACATTGCGCCCCTTGGTATTCCATGGGGCATGCCTGTTCGAGCGTCATTTGTACCTTCAAGCTCTGCTTGGTGTT
GGGTGTTTGTCTCGCCTCTGCGTGTAGACTCGCCTTAAAACAATTGGCAGCCGGCGTATTGATTTCGGAGCGCAGTACAT
CTCGCGCTTTGCACTCACAACGACGACGTCCAAAAGTACATTTTTACACTCTTGACCTCGGATCAGGTAGGGATACCCGC
TGAACTTAA
But I need the header to read so that it only includes the information after the = sign
>Bl1ITS
sequencehere
>Bl1ITS
sequencehere
Is there a sed command to get rid of all of the junk that comes before the = sign above?
####### If you want to get fancy
# functions in shell
Map(){
local refGenome=$1
local file=$2
cmmnd Using $refGenome ran with $file
}
########################## R
#Use R in a one liner
cut -f 3 all.blast | Rscript -e 'data=scan(file="stdin"); pdf("hist.pdf"); hist(data)'
##R oneliner
cat datafile | Rscript -e 'print(summary(scan("stdin")))'
#To pass arguments to R
args<- commandArgs(trailingOnly = TRUE)
#With R write/append to an existing file
write(line, file="myfile", append=TRUE)
#Cat with R
cat("line to add", file=FileName, append=TRUE, sep = "\n")
#Input from pipe to Rscript
nano index.R
#! /usr/bin/env Rscript
d<-scan("stdin", quiet=TRUE)
cat(max(d)/sum(d))
#nano getIndex.sh
for file in `ls *genesHits_L5.txt`
do
awk '{print $1}' $file | ./index.R
echo
done > ConsistencyIndexes.txt
# PDFs in R
pdf("FileName.pdf")
plot(density(log(as.numeric(RATIOS_nuc[,3]))))
dev.off()
#Loops in R
for(i in 1:14){
cmmnds
}