forked from gringer/bioinfscripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsam2mapped.pl
executable file
·69 lines (62 loc) · 1.33 KB
/
sam2mapped.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/perl
## sam2mapped -- created mapped sequences (as fasta files) for aligned regions
my $pos = -1;
my $seqName = "";
my $bestFlags = "";
my $bestID = "";
my $bestSeq = "";
my $bestQual = "";
my $bestLine = "";
my $seenCount = 0;
my $addSeq = 40; # amount of sequence to add to the start and end
my $output = "fa"; # can be "fa"
sub printFQ {
my ($id, $seq, $qual) = @_;
if($id){
printf("@%s\n%s\n+\n%s\n", $id, $seq, $qual);
}
}
sub printFA {
my ($id, $seq, $qual) = @_;
if($id){
printf(">%s\n%s\n", $id, $seq);
}
}
while(<>){
if(/^@/){
next;
}
my $line = $_;
chomp;
my @F = split(/\t/);
my $refPos = $F[3];
my $cigar = $F[5];
$cigar =~ s/[0-9]S$//;
my $seq = $F[9];
my $qual = $F[10];
my $startTrim = 0;
my $matchLen = 0;
while($cigar =~ s/^([0-9]+)([MIDNSHP=X])//){
my $subLen = $1;
my $op = $2;
if($op eq "S"){
if($subLen > $addSeq){
$seq = substr($seq, $subLen - $addSeq);
$qual = substr($qual, $subLen - $addSeq);
}
}
if($op =~ /[M=XI]/){
$matchLen += $subLen;
}
}
$seq = substr($seq, 0, $matchLen + $addSeq);
$qual = substr($qual, 0, $matchLen + $addSeq);
if($seq){
printFA($F[0], $seq, $qual);
}
}
if($output eq "fastq"){
printSeq($bestID, $bestSeq, $bestQual);
} elsif($output eq "sam"){
print($bestLine);
}