-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalc-convRate-from-hairpin-adapters.pl
42 lines (36 loc) · 1.27 KB
/
calc-convRate-from-hairpin-adapters.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env perl
# This script is used for Hammer-seq analysis, calculate BS conversion rate from unmethylated hairpin adapters.
# last update: 2020.07.03;
use strict;
use v5.10;
my $reads_num = 0;
my $numC = 0;
while(<>){
# get seq from the 2nd of every 4 lines;
my $seq = <>;
<>;<>; # skip 3rd, 4th lines.
chomp $seq;
if ($seq =~ /([CT]GGATGAG[CT]AAGTGAAG[CT]T[CT]AT[CT][CT]GAT[CT])/){
my $m = $1;
$reads_num += 1;
my $C = $m =~ tr/C/C/;
$numC += $C;
}
print "$. lines.\n" if ($. % 10000000 == 0);
}
# output to STDOUT
print "\n";
say "Total hairpins found:\t $reads_num";
say "Total C in hairpins:\t", $reads_num*7;
say "Non converted C:\t $numC";
say "Non converted C (%):\t", sprintf("%.2f", $numC / $reads_num / 7 * 100), '%';
say "Convertion rate (%):\t", sprintf("%.2f", (1-$numC / $reads_num / 7)*100), '%';
# output to file named 'conv_rate'
open my $FH, ">>conv_rate";
say $FH "Total hairpins found:\t $reads_num";
say $FH "Total C in hairpins:\t", $reads_num*7;
say $FH "Non converted C:\t $numC";
say $FH "Non converted C (%):\t", sprintf("%.2f", $numC / $reads_num / 7 * 100), '%';
say $FH "Convertion rate (%):\t", sprintf(("%.2f", 1-$numC / $reads_num / 7)*100), '%';
# End of the file.