diff --git a/Changes b/Changes index 0ad4febc6..a561d3781 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,11 @@ LIST OF CHANGES FOR NPG-QC PACKAGE +release 67.3.0 + - change default file type to cram for genotype check + - script for skippimg manual QC step: an additional filter + to ensure that the all lanes for a run that bypasses + manual QC deplexed properly + release 67.2.0 - script to move NovaSeq runs for a particular study - change default file type to cram for bam_flagstats diff --git a/bin/npg_mqc_skipper b/bin/npg_mqc_skipper index 9de631de2..f345c642c 100755 --- a/bin/npg_mqc_skipper +++ b/bin/npg_mqc_skipper @@ -22,6 +22,7 @@ Readonly::Scalar my $NUM_DISTINCT_STUDIES => 1; Readonly::Scalar my $RUN_STATUS_FROM => 'qc review pending'; Readonly::Scalar my $RUN_STATUS_TO => 'archival pending'; Readonly::Scalar my $INSTRUMENT_MODEL => 'NovaSeq'; +Readonly::Scalar my $DEPLEXING_PERCENT_THRESHOLD => 93; my $dry_run = 1; my $study_name; @@ -57,8 +58,13 @@ if (!@rows) { exit 0; } +my $run_list = sub { + my @ids = @_; + return join q[,], map { q[?] } @ids; +}; + my @run_ids = map { $_->id_run } @rows; -my $placeholders = join q[,], map { q[?] } @run_ids; +my $placeholders = $run_list->(@run_ids); my $dbh = WTSI::DNAP::Warehouse::Schema->connect->storage->dbh; my $query = @@ -78,6 +84,22 @@ while (my @data = $sth->fetchrow_array()) { push @run_ids, $data[0]; } +if (@run_ids) { + $placeholders = $run_list->(@run_ids); + $query = q[select distinct(id_run) from iseq_run_lane_metrics ] . + q[where (tags_decode_percent is null or tags_decode_percent < ?) ] . + qq[and id_run in (${placeholders})]; + $sth = $dbh->prepare($query) or croak "Failed to prepare statement: $DBI::errstr"; + $sth->execute($DEPLEXING_PERCENT_THRESHOLD, @run_ids); + my $temp = {}; + while (my @data = $sth->fetchrow_array()) { + $temp->{$data[0]} = 1; + } + if (keys %{$temp}) { + @run_ids = grep { not $temp->{$_} } @run_ids; + } +} + if (@run_ids) { $logger->info(join qq[\n\t], q[], qq[Study '$study_name'], @@ -179,7 +201,8 @@ npg_mqc_skipper --study_name 'SOME STUDY' --no-dry_run In dry run mode this script identifies and reports runs having 'qc review pending' status where all samples apart from controls -belong to the study given as the --study_name argument. +belong to the study given as the --study_name argument and +deplexing percent for all lanes of any such run is 93% or above. In non-dry run mode the status of this runs is changed to 'archival pending', ie the manual QC stage of the run life cycle diff --git a/lib/npg_qc/autoqc/checks/genotype.pm b/lib/npg_qc/autoqc/checks/genotype.pm index beca0e0e8..3efb01e82 100644 --- a/lib/npg_qc/autoqc/checks/genotype.pm +++ b/lib/npg_qc/autoqc/checks/genotype.pm @@ -29,7 +29,7 @@ Readonly::Scalar my $SAMTOOLS_EXTRACT_REGIONS_NAME => q[samtools]; Readonly::Scalar my $SAMTOOLS_MERGE_NAME => q[samtools]; Readonly::Scalar my $SAMTOOLS_MPILEUP_NAME => q[samtools]; Readonly::Scalar my $BCFTOOLS_NAME => q[bcftools]; -Readonly::Scalar our $EXT => q[bam]; +Readonly::Scalar our $EXT => q[cram]; Readonly::Scalar my $SEQUENOM_QC_PLEX => q[W30467]; Readonly::Scalar my $DEFAULT_QC_PLEX => q[sequenom_fluidigm_combo]; Readonly::Scalar my $DEFAULT_SNP_CALL_SET => q[W30467];