-
Notifications
You must be signed in to change notification settings - Fork 0
/
fconcatvcf
executable file
·112 lines (86 loc) · 1.86 KB
/
fconcatvcf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/perl
use warnings;
use strict;
use fralib;
use Getopt::Long;
use Pod::Usage;
=head1 NAME
fconcatvcf
=head1 SYNOPSIS
fconcatvcf [options] <vcf_file>
-h help
-o output file (required)
genotype-file gt or tg file
example: fconcatvcf -o ra-317459.txt *.txt
Concatenates VCF files. Assumes the columns are all ordered.
=head1 DESCRIPTION
=cut
#option variables
my $help;
my $verbose;
my $outFile = "out.vcf.gz";
#variables
my $check; # function pointer to isGt or isTg
my $colNo;
my %ELEMENT;
my @sortedElements;
my $headerProcessed;
my $missingData;
#initialize options
Getopt::Long::Configure ('bundling');
if(!GetOptions ('h'=>\$help, 'v'=>\$verbose, 'o=s'=>\$outFile)
|| $help || scalar(@ARGV)==0)
{
if ($help)
{
pod2usage(-verbose => 2);
}
else
{
pod2usage(1);
}
}
my @orderedFiles;
@orderedFiles = sort {$a<=>$b} @ARGV;
#prepare output file
open(OUT, "|gzip>$outFile") || die "Cannot open $outFile\n";
my $file1Header;
#iterates through each file and concatenates it to OUT
foreach my $file (@orderedFiles)
{
print "processing $file\n";
if(`gzip -t $file` eq "")
{
open(IN, "gunzip -c $file |") || die "can't open pipe to $file";
}
else
{
open(IN, $file) || die "can't open $file";
}
$headerProcessed = 0;
while (<IN>)
{
if (!$headerProcessed)
{
if (/^#CHROM/)
{
if($file eq $orderedFiles[0])
{
$file1Header = $_;
print OUT $_;
}
if ($file1Header ne $_)
{
die "Header for $file not the same as the original file";
}
$headerProcessed = 1;
}
}
else
{
print OUT $_;
}
}
close(IN);
}
close(OUT);