From 5f76b57812b7c59bb30c9ca66c60eca1d2d5181c Mon Sep 17 00:00:00 2001 From: Carol Scott Date: Mon, 7 Oct 2019 14:58:32 +0100 Subject: [PATCH 1/5] change default file type to cram for genotype check --- Changes | 2 ++ lib/npg_qc/autoqc/checks/genotype.pm | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Changes b/Changes index 0ad4febc6..8e5624ca4 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,7 @@ LIST OF CHANGES FOR NPG-QC PACKAGE + - change default file type to cram for genotype check + release 67.2.0 - script to move NovaSeq runs for a particular study - change default file type to cram for bam_flagstats diff --git a/lib/npg_qc/autoqc/checks/genotype.pm b/lib/npg_qc/autoqc/checks/genotype.pm index beca0e0e8..3efb01e82 100644 --- a/lib/npg_qc/autoqc/checks/genotype.pm +++ b/lib/npg_qc/autoqc/checks/genotype.pm @@ -29,7 +29,7 @@ Readonly::Scalar my $SAMTOOLS_EXTRACT_REGIONS_NAME => q[samtools]; Readonly::Scalar my $SAMTOOLS_MERGE_NAME => q[samtools]; Readonly::Scalar my $SAMTOOLS_MPILEUP_NAME => q[samtools]; Readonly::Scalar my $BCFTOOLS_NAME => q[bcftools]; -Readonly::Scalar our $EXT => q[bam]; +Readonly::Scalar our $EXT => q[cram]; Readonly::Scalar my $SEQUENOM_QC_PLEX => q[W30467]; Readonly::Scalar my $DEFAULT_QC_PLEX => q[sequenom_fluidigm_combo]; Readonly::Scalar my $DEFAULT_SNP_CALL_SET => q[W30467]; From 5d4afca3ab5b2354907360709dba00aa254619a9 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Wed, 9 Oct 2019 16:10:49 +0100 Subject: [PATCH 2/5] block skipping mqc if deplexing percent is low --- bin/npg_mqc_skipper | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/bin/npg_mqc_skipper b/bin/npg_mqc_skipper index 9de631de2..27d5b36ef 100755 --- a/bin/npg_mqc_skipper +++ b/bin/npg_mqc_skipper @@ -22,6 +22,7 @@ Readonly::Scalar my $NUM_DISTINCT_STUDIES => 1; Readonly::Scalar my $RUN_STATUS_FROM => 'qc review pending'; Readonly::Scalar my $RUN_STATUS_TO => 'archival pending'; Readonly::Scalar my $INSTRUMENT_MODEL => 'NovaSeq'; +Readonly::Scalar my $DEPLEXING_PERCENT_THRESHOLD => 90; my $dry_run = 1; my $study_name; @@ -57,8 +58,13 @@ if (!@rows) { exit 0; } +my $run_list = sub { + my @ids = @_; + return join q[,], map { q[?] } @ids; +}; + my @run_ids = map { $_->id_run } @rows; -my $placeholders = join q[,], map { q[?] } @run_ids; +my $placeholders = $run_list->(@run_ids); my $dbh = WTSI::DNAP::Warehouse::Schema->connect->storage->dbh; my $query = @@ -78,6 +84,21 @@ while (my @data = $sth->fetchrow_array()) { push @run_ids, $data[0]; } +if (@run_ids) { + $placeholders = $run_list->(@run_ids); + $query = q[select distinct(id_run) from iseq_run_lane_metrics ] . + qq[where tags_decode_percent < ? and id_run in (${placeholders})]; + $sth = $dbh->prepare($query) or croak "Failed to prepare statement: $DBI::errstr"; + $sth->execute($DEPLEXING_PERCENT_THRESHOLD, @run_ids); + my $temp = {}; + while (my @data = $sth->fetchrow_array()) { + $temp->{$data[0]} = 1; + } + if (keys %{$temp}) { + @run_ids = grep { not $temp->{$_} } @run_ids; + } +} + if (@run_ids) { $logger->info(join qq[\n\t], q[], qq[Study '$study_name'], @@ -179,7 +200,8 @@ npg_mqc_skipper --study_name 'SOME STUDY' --no-dry_run In dry run mode this script identifies and reports runs having 'qc review pending' status where all samples apart from controls -belong to the study given as the --study_name argument. +belong to the study given as the --study_name argument and +deplexing percent for all lanes of any such run is 90% or above. In non-dry run mode the status of this runs is changed to 'archival pending', ie the manual QC stage of the run life cycle From 27d3916d62f4fcd9fd26caf4cc27b8ce9650f6e6 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Wed, 9 Oct 2019 16:23:28 +0100 Subject: [PATCH 3/5] handle undefined values --- bin/npg_mqc_skipper | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/npg_mqc_skipper b/bin/npg_mqc_skipper index 27d5b36ef..294e01733 100755 --- a/bin/npg_mqc_skipper +++ b/bin/npg_mqc_skipper @@ -87,7 +87,8 @@ while (my @data = $sth->fetchrow_array()) { if (@run_ids) { $placeholders = $run_list->(@run_ids); $query = q[select distinct(id_run) from iseq_run_lane_metrics ] . - qq[where tags_decode_percent < ? and id_run in (${placeholders})]; + q[where (tags_decode_percent is null or tags_decode_percent < ?) ] . + qq[and id_run in (${placeholders})]; $sth = $dbh->prepare($query) or croak "Failed to prepare statement: $DBI::errstr"; $sth->execute($DEPLEXING_PERCENT_THRESHOLD, @run_ids); my $temp = {}; From febc997b707f193447976532ef199041e2b6e159 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Wed, 9 Oct 2019 18:50:18 +0100 Subject: [PATCH 4/5] increase threshold to 93% --- bin/npg_mqc_skipper | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/npg_mqc_skipper b/bin/npg_mqc_skipper index 294e01733..f345c642c 100755 --- a/bin/npg_mqc_skipper +++ b/bin/npg_mqc_skipper @@ -22,7 +22,7 @@ Readonly::Scalar my $NUM_DISTINCT_STUDIES => 1; Readonly::Scalar my $RUN_STATUS_FROM => 'qc review pending'; Readonly::Scalar my $RUN_STATUS_TO => 'archival pending'; Readonly::Scalar my $INSTRUMENT_MODEL => 'NovaSeq'; -Readonly::Scalar my $DEPLEXING_PERCENT_THRESHOLD => 90; +Readonly::Scalar my $DEPLEXING_PERCENT_THRESHOLD => 93; my $dry_run = 1; my $study_name; @@ -202,7 +202,7 @@ npg_mqc_skipper --study_name 'SOME STUDY' --no-dry_run In dry run mode this script identifies and reports runs having 'qc review pending' status where all samples apart from controls belong to the study given as the --study_name argument and -deplexing percent for all lanes of any such run is 90% or above. +deplexing percent for all lanes of any such run is 93% or above. In non-dry run mode the status of this runs is changed to 'archival pending', ie the manual QC stage of the run life cycle From 0badd9d0289f3b2b3d3c58189de09a49879c16bc Mon Sep 17 00:00:00 2001 From: mgcam Date: Thu, 10 Oct 2019 13:34:47 +0100 Subject: [PATCH 5/5] Changes file update for release 67.3.0 --- Changes | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Changes b/Changes index 8e5624ca4..a561d3781 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,10 @@ LIST OF CHANGES FOR NPG-QC PACKAGE +release 67.3.0 - change default file type to cram for genotype check + - script for skippimg manual QC step: an additional filter + to ensure that the all lanes for a run that bypasses + manual QC deplexed properly release 67.2.0 - script to move NovaSeq runs for a particular study