Skip to content

Commit

Permalink
Merge branch 'devel' of https://github.com/wtsi-npg/npg_qc into maste…
Browse files Browse the repository at this point in the history
…r to create release 69.8.0
  • Loading branch information
dkj committed Aug 11, 2021
2 parents 1eda6c2 + 4b595b2 commit 682c155
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 158 deletions.
11 changes: 10 additions & 1 deletion Changes
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
LIST OF CHANGES FOR NPG-QC PACKAGE


release 69.8.0
- mqc skipper - a more generic way to disregard spiked-in controls when
counting the number of studies the samples in the run belong to. The change
is driven by the fact that spiked-in controls can now belong to a number of
different studies.
- replace fastx_toolkit use with seqtk
- npg_substitution_metrics.pl script clean-up

release 69.7.0
- added new npg_subtitution_metrics.pl script
- added new npg_substitution_metrics.pl script
- move from Travis CI to GitHub Actions
- switch to node 14 for CI, using npm from default conda pkg
- remove dependency on npg_common::sequence::reference::base_count, which
Expand Down
10 changes: 5 additions & 5 deletions bin/npg_mqc_skipper
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use npg_qc::mqc::skipper;

our $VERSION = '0';

Readonly::Scalar my $EXCLUDE_STUDY_NAME => 'Illumina Controls';
Readonly::Scalar my $EXCLUDE_ENTITY_TYPE => 'library_indexed_spike';
Readonly::Scalar my $NUM_DISTINCT_STUDIES => 1;
Readonly::Scalar my $RUN_STATUS_FROM => 'qc review pending';
Readonly::Scalar my $RUN_STATUS_TO => 'archival pending';
Expand All @@ -39,7 +39,7 @@ GetOptions('dry_run|dry-run!' => \$dry_run,
'study_name=s' => \$study_name,
'deplexing_percent_threshold=i' => \$deplexing_threshold,
'qc_fails_percent_threshold=i' => \$qc_fails_threshold,
'artic_qc_fails_percent_threshold=i' => \$artic_qc_fails_threshold,
'artic_qc_fails_percent_threshold=i' => \$artic_qc_fails_threshold,
'help' => sub {
pod2usage(-verbose => 2,
-exitval => 0)
Expand Down Expand Up @@ -71,7 +71,7 @@ my @rows = $tracking_schema->resultset('Run')->search(
'run_status_dict.description' => $RUN_STATUS_FROM,
'instrument_format.model' => $INSTRUMENT_MODEL},
{prefetch => [{'run_statuses' => 'run_status_dict'}, 'instrument_format']}
)->all();
)->all();
@rows = grep { my $row = $_; none { $row->is_tag_set($_) } @NO_SKIPPING_TAGS }
@rows;

Expand Down Expand Up @@ -107,13 +107,13 @@ my $query =
q[from iseq_product_metrics p ] .
q[join iseq_flowcell f on p.id_iseq_flowcell_tmp=f.id_iseq_flowcell_tmp ] .
q[join study s on s.id_study_tmp=f.id_study_tmp ] .
qq[where s.name != ? and p.id_run in (${placeholders}) ] .
qq[where f.entity_type != ? and p.id_run in (${placeholders}) ] .
q[group by p.id_run having study_count = ?];
my $sth = $dbh->prepare($query) or
($logger->fatal("Failed to prepare statement: $DBI::errstr") and exit 1);
# Run time database errors are thrown by the execute method, no need to
# do anything special.
$sth->execute($EXCLUDE_STUDY_NAME, @run_ids, $NUM_DISTINCT_STUDIES);
$sth->execute($EXCLUDE_ENTITY_TYPE, @run_ids, $NUM_DISTINCT_STUDIES);
@run_ids = $get_run_ids->($sth);

if (@run_ids) {
Expand Down
114 changes: 5 additions & 109 deletions bin/npg_substitution_metrics.pl
Original file line number Diff line number Diff line change
Expand Up @@ -190,83 +190,6 @@ ()
return \%win;
}

#%======================================================to process winRi structures
#function [new_win]=stripped_convert_new(win_R1)
#%coverts win_R1 (as in error quality) into converient format, new_win,
#%where each of 12 columns are counts of windows X.Y for each of 12 substitutions
#
#%INPUT input win_R1 (16x12) from error file see below:
#
#%# Effect of previous base and next base high predictor. Use `grep ^PNCRH | cut -f 2-` to extract this part
#%# 16 rows: one row per pair= previous/next base, columns (12 for each subs) is previous base+substitution+next base and count for 12 substitutions
#
#%PCRNH 1 caA bw AX AACA 6 AACC 10 AACG 6 AACT 2 AAGA 159 AAGC 125 AAGG 133 AAGT 102 AATA 45 AATC 51 AATG 52 AATT 65
#%PCRNH 1 caC ACAA 11 ACAC 23 ACAG 10 ACAT 12 ACGA 2 ACGC 12 ACGG 8 ACGT 4 ACTA 129 ACTC 156 ACTG 276 ACTT 74
#%PCRNH 1 caG AGAA 51 AGAC 179 AGAG 77 AGAT 53 AGCA 45 AGCC 32 AGCG 20 AGCT 13 AGTA 646 AGTC 701 AGTG 398 AGTT 209
#%PCRNH 1 caT ATAA 49 ATAC 64 ATAG 49 ATAT 53 ATCA 116 ATCC 92 ATCG 137 ATCT 119 ATGA 2 ATGC 7 ATGG 8 ATGT 5
#
#%PCRNH 1 caA bw CX CACA 6 CACC 4 CACG 14 CACT 12 CAGA 146 CAGC 65 CAGG 155 CAGT 124 CATA 60 CATC 39 CATG 60 CATT 72
#%PCRNH 1 CCAA 56 CCAC 25 CCAG 22 CCAT 30 CCGA 13 CCGC 3 CCGG 7 CCGT 7 CCTA 175 CCTC 34 CCTG 138 CCTT 66
#%PCRNH 1 CGAA 129 CGAC 363 CGAG 97 CGAT 244 CGCA 33 CGCC 37 CGCG 36 CGCT 22 CGTA 463 CGTC 509 CGTG 306 CGTT 251
#%PCRNH 1 CTAA 23 CTAC 46 CTAG 37 CTAT 45 CTCA 178 CTCC 118 CTCG 204 CTCT 202 CTGA 3 CTGC 2 CTGG 9 CTGT 10
#
#%PCRNH 1 caA bw GX GACA 3 GACC 7 GACG 6 GACT 4 GAGA 99 GAGC 78 GAGG 94 GAGT 83 GATA 60 GATC 58 GATG 46 GATT 72
#%PCRNH 1 GCAA 97 GCAC 20 GCAG 20 GCAT 29 GCGA 11 GCGC 11 GCGG 10 GCGT 14 GCTA 201 GCTC 192 GCTG 307 GCTT 162
#%PCRNH 1 GGAA 53 GGAC 214 GGAG 36 GGAT 94 GGCA 33 GGCC 21 GGCG 7 GGCT 13 GGTA 348 GGTC 388 GGTG 145 GGTT 184
#%PCRNH 1 GTAA 26 GTAC 47 GTAG 33 GTAT 51 GTCA 3520 GTCC 96 GTCG 71 GTCT 104 GTGA 4 GTGC 5 GTGG 11 GTGT 22
#
#%PCRNH 1 caA bw TX TACA 4 TACC 3 TACG 2 TACT 3 TAGA 96 TAGC 4020 TAGG 209 TAGT 90 TATA 63 TATC 46 TATG 16 TATT 49
#%PCRNH 1 TCAA 28 TCAC 11 TCAG 19 TCAT 23 TCGA 15 TCGC 5 TCGG 7 TCGT 5 TCTA 186 TCTC 65 TCTG 141 TCTT 84
#%PCRNH 1 TGAA 134 TGAC 272 TGAG 112 TGAT 86 TGCA 74 TGCC 47 TGCG 46 TGCT 14 TGTA 774 TGTC 822 TGTG 512 TGTT 211
#%PCRNH 1 TTAA 45 TTAC 63 TTAG 31 TTAT 43
#
#new_win=[];
#N=4;
#
#a=1;
#for f=1:3,
# for k=1:4,
# j=a+(k-1)*N;
# for t1=1:4,
# new_win(j+t1-a,f)=win_R1(j,t1+(f-1)*N);
# end
# end
#end
#
#
#c=2;
#for f=1:3,
# for k=1:4,
# j=c+(k-1)*N;
# for t1=1:4,
# new_win(j+t1-c,f+2*c-1)=win_R1(j,t1+(f-1)*N);
# end
# end
#end
#
#g=3;
#for f=1:3,
# for k=1:4,
# j=g+(k-1)*N;
# for t=1:4,
# new_win(j+t-g,f+2*g)=win_R1(j,t+(f-1)*N);
# end
# end
#end
#
#
#t=4;
#for f=1:3,
# for k=1:4,
# j=t+(k-1)*N;% caG 3,
# for t1=1:4,
# new_win(j+t1-t,f+2*t+1)=win_R1(j,t1+(f-1)*N);
# end
# end
#end
#endfunction
#

# generate metrics
sub metrics_gen_assym_prediction () {
my ($err_data, $verbose) = @_;
Expand Down Expand Up @@ -318,33 +241,13 @@ ()
my $as_ag_tc_R2 = ($mis2 > 0 ? (abs($coH2{AG} - $coH2{TC}) / $mis2) : 0);
my $symm_ag_tc = max(($as_ag_tc_R1, $as_ag_tc_R2));

#1.1.1.1--------remove strong assymetry if needed (re-define %separated pair
if ($as_ag_tc_R1 > $thr_ass) {
$coH1{AG} = $mis1;
$coH1{TC} = $mis1;
}
if ($as_ag_tc_R2 > $thr_ass) {
$coH2{AG} = $mis2;
$coH2{TC} = $mis2;
}

#1.1.2 ---- strand assymetry (sh be symmetry) mic=min 'close' Ti: CT GA
my $mic1 = min(($coH1{CT}, $coH1{GA}));
my $mic2 = min(($coH2{CT}, $coH2{GA}));
my $as_ct_ga_R1 = ($mic1 > 0 ? (abs($coH1{CT} - $coH1{GA}) / $mic1) : 0);
my $as_ct_ga_R2 = ($mic2 > 0 ? (abs($coH2{CT} - $coH2{GA}) / $mic2) : 0);
my $symm_ct_ga = max(($as_ct_ga_R1, $as_ct_ga_R2));

#1.1.2.1-remove close Ti starnd assymetry if needed
if ($as_ct_ga_R1 > $thr_ass) {
$coH1{CT} = $mic1;
$coH1{GA} = $mic1;
}
if ($as_ct_ga_R2 > $thr_ass) {
$coH2{CT} = $mic2;
$coH2{GA} = $mic2;
}

my @ti1 = ($coH1{AG}, $coH1{TC}, $coH1{CT}, $coH1{GA});
my $mean_ti1 = round(mean(@ti1));
my $sd_ti1 = round(stddev(@ti1));
Expand Down Expand Up @@ -394,18 +297,15 @@ ()
my $TiTv_meGT = max(($si_sv1, $si_sv2));

#3========metrics on oxiG and GT/CA artefact
#artefact C2A=art_ox=proportion of gt/ca to mean Ti, depends
#on Ti variability
my $thr_ti1 = $mean_ti1 - ($cvTi1 > $thr_cv ? 1 : 3) * $sd_ti1;
my $thr_ti2 = $mean_ti2 - ($cvTi2 > $thr_cv ? 1 : 3) * $sd_ti2;
#artefact C2A=art_ox=proportion of gt/ca to mean Ti, depends on Ti variability

#===============art_ox=proportion of gt or ca to mean Ti
my $ma1 = max(($coH1{CA}, $coH1{GT}));
my $ma2 = max(($coH2{CA}, $coH2{GT}));

# prop max(gt,ca) in Ti: the higher the more likely artC2A
my $artR1 = ($thr_ti1 > 0 ? ($ma1 / $thr_ti1) : 0);
my $artR2 = ($thr_ti2 > 0 ? ($ma2 / $thr_ti2) : 0);
my $artR1 = ($mean_ti1 > 0 ? ($ma1 / $mean_ti1) : 0);
my $artR2 = ($mean_ti2 > 0 ? ($ma2 / $mean_ti2) : 0);
my $art_ox = max(($artR1, $artR2));

#3.2 ----------------------oxidation biasas BROAD
Expand Down Expand Up @@ -476,12 +376,8 @@ ()
}

#5.2--------compute probability/likelihood of C2A based on art_ox
my $likely = 0.25 * ($artR1 + $artR2);
my $new_likely = $likely;
if ($likely > 1) {
$new_likely = 1;
}
my $likelihood = $new_likely;
my $likely = $gt_nearTi;
my $likelihood = $likely;

# update results
$results{likelihood} = $likelihood;
Expand Down
28 changes: 7 additions & 21 deletions lib/npg_qc/autoqc/checks/insert_size.pm
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,8 @@ use npg_common::Alignment;
use npg_common::extractor::fastq qw(read_count);

extends qw(npg_qc::autoqc::checks::check);
with qw(
npg_tracking::data::reference::find
npg_common::roles::software_location
);
with qw(npg_tracking::data::reference::find),
qw(npg_common::roles::software_location) => { tools => [qw/bwa0_6 seqtk/] };
has '+aligner' => (default => 'bwa0_6', is => 'ro');

our $VERSION = '0';
Expand Down Expand Up @@ -433,10 +431,10 @@ sub _fastq_reverse_complement {
my @reversed = ();
foreach my $i (0 .. 1) {
push @reversed, catfile($self->tmp_path, ($i+1) . q[r.fastq]);
my @args = (q[fastx_reverse_complement], q[-Q 33], q[-i], $sample_reads->[$i], q[-o], $reversed[$i]);
print 'Producing reverse complemented file: ' . (join q[ ], @args) . qq[\n] || carp 'Producing reverse complemented file: ' . (join q[ ], @args) . qq[\n];
if (system(@args)) {
croak printf "Child %s exited with value %d\n", join(q[ ], @args), $CHILD_ERROR >> $CHILD_ERROR_SHIFT;
my $cmd = $self->seqtk_cmd . q[ seq -r -Q 33 ] . $sample_reads->[$i] . q[ > ] . $reversed[$i];
print 'Producing reverse complemented file: ' . $cmd . qq[\n] || carp 'Producing reverse complemented file: ' . $cmd . qq[\n];
if (system($cmd)) {
croak printf "Child %s exited with value %d\n", $cmd, $CHILD_ERROR >> $CHILD_ERROR_SHIFT;
}
}
return \@reversed;
Expand All @@ -455,25 +453,13 @@ sub _set_additional_modules_info {
use strict;
## use critic
if ($self->use_reverse_complemented) {
push @packages_info, q[FASTX Toolkit fastx_reverse_complement ] . $self->_fastx_version;
push @packages_info, q[seqtk ] . $self->current_version( $self->seqtk_cmd() )
}
push @packages_info, join q[ ], $self->norm_fit_cmd, $VERSION;
$self->result->set_info('Additional_Modules', ( join q[;], @packages_info ) );
return;
}

sub _fastx_version {
my $self = shift;
my @lines = slurp 'fastx_reverse_complement -h |';
foreach my $line (@lines) {
if ($line =~ /FASTX\ Toolkit/xms) {
my ($version) = $line =~ /\ (\d+\.\d+\.\d+)\ /xms;
if ($version) { return $version; }
}
}
return q[];
}

sub _align {
my ($self, $sample_reads, $prefix) = @_;

Expand Down
14 changes: 7 additions & 7 deletions t/60-autoqc-checks-insert_size.t
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use_ok('npg_qc::autoqc::results::insert_size');
use_ok('npg_qc::autoqc::checks::insert_size');

sub _additional_modules {
my $use_fastx = shift;
my $use_seqtk = shift;
my @expected = ();
require npg_qc::autoqc::parse::alignment;
push @expected, join(q[ ],
Expand All @@ -41,8 +41,8 @@ sub _additional_modules {
require npg_common::Alignment;
push @expected, join(q[ ],
q[npg_common::Alignment], $npg_common::Alignment::VERSION);
if ($use_fastx) {
push @expected, q[FASTX Toolkit fastx_reverse_complement 0.0.12];
if ($use_seqtk) {
push @expected, q[seqtk 1.3];
}
push @expected, join(q[ ], $norm_fit, $npg_qc::autoqc::results::insert_size::VERSION);
return @expected;
Expand Down Expand Up @@ -572,7 +572,7 @@ sub _additional_modules {

{
my $dir = tempdir( CLEANUP => 1 );
t::autoqc_util::write_fastx_script(catfile($dir, 'fastx_reverse_complement'));
t::autoqc_util::write_seqtk_script(catfile($dir, 'seqtk'));
local $ENV{PATH} = join ':', $dir, $ENV{PATH};

my $qc = npg_qc::autoqc::checks::insert_size->new(
Expand All @@ -584,15 +584,15 @@ sub _additional_modules {
expected_size => [340,350],
format => $format,
);
is ($qc->_fastx_version, '0.0.12', 'fastx version');
is ($qc->current_version( $qc->seqtk_cmd() ), '1.3', 'seqtk version');
$qc->_set_additional_modules_info;
is ( $qc->result->get_info('Additional_Modules'),
join(q[;], _additional_modules(1)), 'additional info with fastx');
join(q[;], _additional_modules(1)), 'additional info with seqtk');
}

{
my $dir = tempdir( CLEANUP => 1 );
t::autoqc_util::write_fastx_script(catfile($dir, 'fastx_reverse_complement'), 1);
t::autoqc_util::write_seqtk_script(catfile($dir, 'seqtk'), 1);
my $s1 = catfile($current_dir, q[t/data/autoqc/alignment_few.sam]);
t::autoqc_util::write_bwa_script(catfile($dir, 'bwa0_6'), $s1);
local $ENV{PATH} = join ':', $dir, $ENV{PATH};
Expand Down
27 changes: 12 additions & 15 deletions t/autoqc_util.pm
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ sub find_or_save_composition {

sub write_samtools_script {
my ($script_path, $b1) = @_;

open my $fh, '>', $script_path or croak "Cannot open $script_path for writing";

print $fh '#!/usr/local/bin/bash';
print $fh '#!/usr/bin/env bash';
print $fh "\n";
print $fh '# Script to fake samtools in testing. Automatically generated by npg at ' . time;
print $fh "\n";
Expand All @@ -65,34 +65,31 @@ sub write_samtools_script {
}

close $fh or carp "Cannot close $script_path";

chmod 0775, $script_path;
return 1;
}

sub write_fastx_script {
sub write_seqtk_script {

my ($script_path, $b1) = @_;

my $fh;
open($fh, '>', $script_path) or croak "Cannot open $script_path for writing";

print $fh '#!/usr/local/bin/bash';
print $fh "\n";
print $fh '# Script to fake fastx in testing. Automatically generated by npg at ' . time;
print $fh '# Script to fake seqtk in testing. Automatically generated by npg at ' . time;
print $fh "\n";

# echo the first argument $1;
# echo number of arguments: $#;
if (!defined $b1) {
my $out = q[usage: fastx_reverse_complement [-h] [-r] [-z] [-v] [-i INFILE] [-o OUTFILE]
Part of FASTX Toolkit 0.0.12 by A. Gordon ([email protected])
[-h] = This helpful help screen.
[-z] = Compress output with GZIP.
[-i INFILE] = FASTA/Q input file. default is STDIN.
[-o OUTFILE] = FASTA/Q output file. default is STDOUT.
my $out = q[
Usage: seqtk <command> <arguments>
Version: 1.3
Command: seq common transformation of FASTA/Q
];

print $fh "echo '$out'";
Expand All @@ -107,7 +104,7 @@ Part of FASTX Toolkit 0.0.12 by A. Gordon ([email protected])
sub write_bwa_script {

my ($script_path, $s1, $bwa_version) = @_;

my $fh;
open($fh, '>', $script_path) or croak "Cannot open $script_path for writing";

Expand All @@ -133,7 +130,7 @@ sub write_bwa_script {
}

close $fh;

chmod 0775, $script_path;
return 1;
}
Expand Down

0 comments on commit 682c155

Please sign in to comment.