Skip to content

Commit

Permalink
Merge branch 'release/v4.3.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
David Jones committed Jul 13, 2021
2 parents 6fa17c8 + 0c17a2b commit 64c5f3f
Show file tree
Hide file tree
Showing 9 changed files with 308 additions and 4 deletions.
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# CHANGES

## v4.3.0

* Add script to convert allelecount output to JSON

## v4.2.1

* Update so docker and native install use same install scripts behind the scenes
Expand Down
3 changes: 2 additions & 1 deletion perl/Makefile.PL
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ WriteMakefile(
NAME => 'alleleCount',
LICENSE => 'agpl_3', # http://search.cpan.org/~dagolden/CPAN-Meta-2.142690/lib/CPAN/Meta/Spec.pm#license
VERSION_FROM => 'lib/Sanger/CGP/AlleleCount.pm',
EXE_FILES => [qw( bin/alleleCounter.pl )],
EXE_FILES => [qw( bin/alleleCounter.pl bin/alleleCounterToJson.pl )],
PREREQ_PM => {
'Const::Fast' => 0.014,
'Try::Tiny' => 0.19,
Expand All @@ -38,5 +38,6 @@ WriteMakefile(
'Devel::Cover' => 1.09,
'Pod::Coverage' => 0.23,
'IPC::System::Simple' => 1.25,
'JSON' => 2.90,
}
);
99 changes: 99 additions & 0 deletions perl/bin/alleleCounterToJson.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/perl

##########LICENCE##########
# Copyright (c) 2014-2021 Genome Research Ltd.
#
# Author: CASM/Cancer IT <[email protected]>
#
# This file is part of alleleCount.
#
# alleleCount is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
##########LICENCE##########

use strict;
use Carp;
use English qw( -no_match_vars );
use warnings FATAL => 'all';

use Getopt::Long 'GetOptions';
use Pod::Usage;

use Sanger::CGP::AlleleCount;
use Sanger::CGP::AlleleCount::ToJson;

{
my $options = option_builder();
$options->{'o'} = '/dev/stdout' unless(defined $options->{'o'});
run($options);
}

sub run {
my ($options) = @_;
my $json_string = Sanger::CGP::AlleleCount::ToJson::alleleCountToJson($options->{'a'}, $options->{'l'});
my $OUT;
if($options->{'o'}){
open($OUT, '>', $options->{'o'}) or croak("Error opening file for output: $!");
}
print $OUT "$json_string";
if($options->{'o'}){
close($OUT) or croak("Error closing output file for JSON conversion: $!");
}
}


sub option_builder {
my ($factory) = @_;

my %opts;

&GetOptions (
'h|help' => \$opts{'h'},
'l|locus-file=s' => \$opts{'l'},
'a|allelecount-file=s' => \$opts{'a'},
'o|output-file:s' => \$opts{'o'},
'v|version' => \$opts{'v'},
);

pod2usage(0) if($opts{'h'});
if($opts{'v'}){
print Sanger::CGP::AlleleCount->VERSION."\n";
exit;
}
pod2usage(1) if(!$opts{'l'} || !$opts{'a'});
croak("Locus file ".$opts{'l'}." does not exist.") if(! -e $opts{'l'});
croak("Allele count output file ".$opts{'a'}." does not exist.") if(! -e $opts{'a'});
return \%opts;
}

__END__
=head1 NAME
alleleCounterToJson.pl - Generate JSON format file from the tab seperated format
=head1 SYNOPSIS
alleleCounterToJson.pl
Required:
-locus-file -l File containing SNP positions used for allelecounter
-allelecount-file -a Allelecounter output file
Optional:
-output-file -o Output file (default: stdout)
-help -h This message
-version -v Version number
=cut
2 changes: 1 addition & 1 deletion perl/lib/Sanger/CGP/AlleleCount.pm
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ package Sanger::CGP::AlleleCount;
use strict;

use base 'Exporter';
our $VERSION = '4.2.1';
our $VERSION = '4.3.0';
our @EXPORT = qw($VERSION);

1;
130 changes: 130 additions & 0 deletions perl/lib/Sanger/CGP/AlleleCount/ToJson.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
package Sanger::CGP::AlleleCount::ToJson;

##########LICENCE##########
# Copyright (c) 2014-2021 Genome Research Ltd.
#
# Author: CASM/Cancer IT <[email protected]>
#
# This file is part of alleleCount.
#
# alleleCount is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
##########LICENCE##########


use strict;

use Carp;
use English qw( -no_match_vars );
use warnings FATAL => 'all';

use JSON;
use IO::Zlib;

use Sanger::CGP::AlleleCount;

use Const::Fast qw(const);

const my %ALLELECOUNT_CONST => (
MIN_PROP => 0.21,
MIN_READS => 5,
);

sub allelecount_val {
my ($item) = @_;
return $ALLELECOUNT_CONST{$item};
}

=item new
Null constructor
=cut

sub new {
my ($class) = @_;
my $self = { };
bless $self, $class;
return $self;
}

=item alleleCountToJson
Convert allele count file result format to JSON
=cut

sub alleleCountToJson{
my ($countsfile, $snpsfile) = @_;
my $tmp;
my $SNPS;
my $snp_list;

#TODO open gzipped file....
open($SNPS, '<', $snpsfile) or croak("Error opening allele count locus file '$snpsfile' for JSON conversion: $!");
while(<$SNPS>){
my $line = $_;
next if($line =~ m/^\s*#/);
chomp($line);
my ($chr,$pos,$name,undef) = split(/\s+/,$line);
$snp_list->{$chr}->{$pos} = $name;
}
close($SNPS) or croak("Error closing allele count locus file '$snpsfile' for JSON conversion: $!");

my $fh = new IO::Zlib;
if($fh->open($countsfile, "rb")){
while(<$fh>){
my $line = $_;
next if($line =~ m/^\s*#/);
chomp($line);
my ($chr,$pos,$a,$c,$g,$t,$good) = split(/\s+/,$line);
my $nom = $snp_list->{$chr}->{$pos};
my $genotype = _calculate_genotype_from_allele_count($a,$c,$g,$t,$good);
$tmp->{$nom} = $genotype;
}
$fh->close;
}else{
croak("Error trying to open file for SNP locus loading '$countsfile': $!\n");
}
my $jsonstr = encode_json($tmp);
return $jsonstr;
}

sub _calculate_genotype_from_allele_count{
my ($a_a,$a_c,$a_g,$a_t,$good) = @_;
my $geno;
return q{.} if($good < allelecount_val('MIN_READS'));

my @counts;
push @counts, ['A', $a_a] if($a_a/$good >= allelecount_val('MIN_PROP'));
push @counts, ['C', $a_c] if($a_c/$good >= allelecount_val('MIN_PROP'));
push @counts, ['G', $a_g] if($a_g/$good >= allelecount_val('MIN_PROP'));
push @counts, ['T', $a_t] if($a_t/$good >= allelecount_val('MIN_PROP'));

my $entries = scalar @counts;
if($entries == 0) {
$geno = q{.};
}
elsif($entries == 1) {
$geno = $counts[0][0].$counts[0][0];
}
else {
@counts = sort {$b->[1]<=>$a->[1]} @counts; # reverse sorts by the counts
$geno = join(q{}, sort {$a cmp $b} $counts[0][0], $counts[1][0]); # then sort the alleles into the string
}
croak("Error calculating genotype from allele counts $a_a,$a_c,$a_g,$a_t,$good.\n") if((length $geno)>2 || (length $geno) == 0);
return $geno;

}

1;
5 changes: 3 additions & 2 deletions perl/t/2_pl_compile.t
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,17 @@ for(@scripts) {
next;
}
my $message = "Compilation check: $script";
my $output = "";
my $command = "$perl -c $script";
my ($pid, $process);
try {
$pid = open $process, $command.' 2>&1 |';
while(<$process>){};
while(<$process>){ $output.=$_;};
close $process;
pass($message);
}
catch {
fail($message);
fail($message."\n".$output);
};
}

Expand Down
67 changes: 67 additions & 0 deletions perl/t/tojson.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
##########LICENCE##########
# Copyright (c) 2014-2020 Genome Research Ltd.
#
# Author: CASM/Cancer IT <[email protected]>
#
# This file is part of alleleCount.
#
# alleleCount is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
##########LICENCE##########

use strict;
use Test::More;
use Test::Fatal;
use Const::Fast qw(const);
use File::Temp qw(tempdir);
use File::Slurp;

use Sanger::CGP::AlleleCount::ToJson;


const my $MOD => 'Sanger::CGP::AlleleCount::ToJson';
const my $EXP_JSON => '{"rs2369898":"CT"}';

const my @DEFAULT_RESULT => ( ['#CHR',qw(POS Count_A Count_C Count_G Count_T Good_depth)],
[qw(22 16165776 1 12 0 0 13)]
);
const my @PBQ20_RESULT => ( ['#CHR',qw(POS Count_A Count_C Count_G Count_T Good_depth)],
[qw(22 16165776 2 17 0 0 19)]
);

const my @MAPQ0_RESULT => ( ['#CHR',qw(POS Count_A Count_C Count_G Count_T Good_depth)],
[qw(22 16165776 1 17 0 0 18)]
);

const my @DEFAULT_RESULT_SNP6 => (['#CHR',qw(POS Count_Allele_A Count_Allele_B Good_depth)],
[qw(22 16165776 12 1 13)]
);
const my @PBQ20_RESULT_SNP6 => (['#CHR',qw(POS Count_Allele_A Count_Allele_B Good_depth)],
[qw(22 16165776 17 2 19)]
);
const my @MAPQ0_RESULT_SNP6 => (['#CHR',qw(POS Count_Allele_A Count_Allele_B Good_depth)],
[qw(22 16165776 17 1 18)]
);

use FindBin qw($Bin);
my $data_root = "$Bin/../../testData";

my $loci = "$data_root/test_loci.txt";
my $ac_output = "$data_root/test_ac_out.txt";

my $obj = new_ok($MOD); # no options

is($EXP_JSON, Sanger::CGP::AlleleCount::ToJson::alleleCountToJson($ac_output, $loci), "Check conversion to JSON");

done_testing();

1 change: 1 addition & 0 deletions testData/test_ac_out.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
chr14 96772879 0 31 0 31 62
1 change: 1 addition & 0 deletions testData/test_loci.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
chr14 96772879 rs2369898

0 comments on commit 64c5f3f

Please sign in to comment.