From a35ee96fdd9ff29bb0a522100e024573fec29a80 Mon Sep 17 00:00:00 2001 From: mgcam Date: Fri, 9 Aug 2024 15:01:15 +0100 Subject: [PATCH 1/6] Added the runfolder_path attribute to the review check. (#869) --- Changes | 4 +++ lib/npg_qc/autoqc/checks/review.pm | 39 +++++++++++++++++++++++++++++- t/60-autoqc-autoqc.t | 10 +++++++- t/60-autoqc-checks-review.t | 19 +++++++++++++-- 4 files changed, 68 insertions(+), 4 deletions(-) diff --git a/Changes b/Changes index 96d219c49..24b850b9c 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,9 @@ LIST OF CHANGES FOR NPG-QC PACKAGE + - To enable access to information about a sequencing run (from RunInfo.xml, + {r,R}unParameters.xml), added an optional 'runfolder_path' attribute to + the review check. The attribute will have to be set by the caller. + release 72.1.2 (2024-08-05) - handle cases where the same tag sequence is seen in both index reads diff --git a/lib/npg_qc/autoqc/checks/review.pm b/lib/npg_qc/autoqc/checks/review.pm index 0c8e7d2ab..7cfbb9152 100644 --- a/lib/npg_qc/autoqc/checks/review.pm +++ b/lib/npg_qc/autoqc/checks/review.pm @@ -219,6 +219,43 @@ npg_tracking::util::pipeline_config A method. Returns the path of the product configuration file. Inherited from npg_tracking::util::pipeline_config +=head2 runfolder_path + +The runfolder path, an optional attribute. In case of complex products +(multi-component compositions) is only relevant if all components belong +to the same sequencing run. This attribute is used to retrieve information +from RunInfo.xml and {r,R}unParameters.xml files. Some 'robo' configuration +might not require information of this nature, thus the attribute is optional. +If the information from the above-mentioned files is required, but the access +to the staging run folder is not available, the check cannot be run. + +=cut + +has 'runfolder_path' => ( + isa => 'Str', + is => 'ro', + required => 0, + predicate => 'has_runfolder_path', +); + + +=head2 BUILD + +A method that is run before returning the new object instance to the caller. +Errors if any attributes of the object are are in conflict. + +=cut + +sub BUILD { + my $self = shift; + if ($self->has_runfolder_path && !$self->get_id_run) { + my $m = sprintf + 'Product defined by rpt list %s does not belong to a single run.', + $self->rpt_list; + croak "$m 'runfolder_path' attribute should not be set."; + } +} + =head2 can_run Returns true if the check can be run, meaning a robo configuration @@ -764,7 +801,7 @@ Marina Gourtovaia =head1 LICENSE AND COPYRIGHT -Copyright (C) 2019,2020 Genome Research Ltd. +Copyright (C) 2019,2020,2024 Genome Research Ltd. This file is part of NPG. diff --git a/t/60-autoqc-autoqc.t b/t/60-autoqc-autoqc.t index e3876860f..7af743ca1 100644 --- a/t/60-autoqc-autoqc.t +++ b/t/60-autoqc-autoqc.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 24; +use Test::More tests => 26; use Test::Exception; use File::Temp qw/tempdir/; @@ -88,4 +88,12 @@ my $dir = tempdir( CLEANUP => 1 ); 'check is an instance of the foo1 autoqc class'); } +{ + local @ARGV = qw(--check review --runfolder_path t --rpt_list 4:1 --qc_out); + push @ARGV, $dir; + my $check = npg_qc::autoqc::autoqc->new_with_options()->create_check_object(); + isa_ok($check, 'npg_qc::autoqc::checks::review'); + is ($check->runfolder_path, 't', '--runfolder_path option is passed through'); +} + 1; diff --git a/t/60-autoqc-checks-review.t b/t/60-autoqc-checks-review.t index b6fd53f2d..e8595507a 100644 --- a/t/60-autoqc-checks-review.t +++ b/t/60-autoqc-checks-review.t @@ -31,8 +31,8 @@ my $criteria_list = [ 'verify_bam_id.freemix < 0.01' ]; -subtest 'construction object, deciding whether to run' => sub { - plan tests => 27; +subtest 'constructing object, deciding whether to run' => sub { + plan tests => 29; my $check = npg_qc::autoqc::checks::review->new( conf_path => $test_data_dir, @@ -40,6 +40,21 @@ subtest 'construction object, deciding whether to run' => sub { rpt_list => '27483:1:2'); isa_ok ($check, 'npg_qc::autoqc::checks::review'); isa_ok ($check->result, 'npg_qc::autoqc::results::review'); + + lives_ok { npg_qc::autoqc::checks::review->new( + conf_path => $test_data_dir, + qc_in => $test_data_dir, + runfolder_path => $test_data_dir, + rpt_list => '27483:1:2;27483:2:2') + } 'object created OK for components from the same run'; + throws_ok { npg_qc::autoqc::checks::review->new( + conf_path => $test_data_dir, + qc_in => $test_data_dir, + runfolder_path => $test_data_dir, + rpt_list => '27483:1:2;27484:2:2') + } qr/'runfolder_path' attribute should not be set/, + 'error creating an object for components from different runs'; + my $can_run; warnings_like { $can_run = $check->can_run } [qr/Study config not found for/], From 7a9af15d3f7cdeae091ae2d43267e0a5bc1da7e6 Mon Sep 17 00:00:00 2001 From: mgcam Date: Wed, 21 Aug 2024 14:50:33 +0100 Subject: [PATCH 2/6] Add robo sequencing_run applicability criteria type (#870) * Eligibility criteria from seq run meta. * Added ROBO criteria validation to the can_run method * Consistent logging of error messages in can_run --- Changes | 10 +- MANIFEST | 1 + lib/npg_qc/autoqc/checks/review.pm | 176 ++++++++++++------ t/60-autoqc-checks-review.t | 46 +++-- .../RunParameters.xml | 89 +++++++++ .../review/mqc_type/product_release.yml | 3 +- .../product_release.yml | 3 +- t/data/autoqc/review/product_release.yml | 5 + 8 files changed, 253 insertions(+), 80 deletions(-) create mode 100644 t/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2/RunParameters.xml diff --git a/Changes b/Changes index 24b850b9c..c6cee83e7 100644 --- a/Changes +++ b/Changes @@ -1,8 +1,12 @@ LIST OF CHANGES FOR NPG-QC PACKAGE - - To enable access to information about a sequencing run (from RunInfo.xml, - {r,R}unParameters.xml), added an optional 'runfolder_path' attribute to - the review check. The attribute will have to be set by the caller. + - npg_qc::autoqc::check::review: + 1. To enable access to information about a sequencing run (from RunInfo.xml, + {r,R}unParameters.xml), added an optional 'runfolder_path' attribute. + The attribute will have to be set by the caller. + 2. Added a new applicability criteria type - 'sequencing_run' + 3. To avoid running the review check unnecessarily in the pipeline context, + moved ROBO criteria validation to the can_run method. release 72.1.2 (2024-08-05) - handle cases where the same tag sequence is seen in both index reads diff --git a/MANIFEST b/MANIFEST index 6a01731d1..32cf09d1e 100644 --- a/MANIFEST +++ b/MANIFEST @@ -697,6 +697,7 @@ t/data/autoqc/200114_HS40_32710_A_H72VGBCX3/RunInfo.xml t/data/autoqc/200114_HS40_32710_A_H72VGBCX3/runParameters.xml t/data/autoqc/200114_HS40_32710_A_H72VGBCX3/InterOp/TileMetricsOut.bin t/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2/InterOp/TileMetricsOut.bin +t/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2/RunParameters.xml t/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2/RunInfo.xml t/data/autoqc/200117_A00715_0080_BHY7T3DSXX/InterOp/TileMetricsOut.bin t/data/autoqc/200117_A00715_0080_BHY7T3DSXX/InterOp/ExtendedTileMetricsOut.bin diff --git a/lib/npg_qc/autoqc/checks/review.pm b/lib/npg_qc/autoqc/checks/review.pm index 7cfbb9152..aadab26ff 100644 --- a/lib/npg_qc/autoqc/checks/review.pm +++ b/lib/npg_qc/autoqc/checks/review.pm @@ -11,6 +11,7 @@ use Try::Tiny; use WTSI::DNAP::Utilities::Timestamp qw/create_current_timestamp/; use st::api::lims; +use npg_tracking::illumina::runfolder; use npg_qc::autoqc::qc_store; use npg_qc::Schema::Mqc::OutcomeDict; @@ -26,6 +27,8 @@ Readonly::Scalar my $ROBO_KEY => q[robo_qc]; Readonly::Scalar my $CRITERIA_KEY => q[criteria]; Readonly::Scalar my $QC_TYPE_KEY => q[qc_type]; Readonly::Scalar my $APPLICABILITY_CRITERIA_KEY => q[applicability_criteria]; +Readonly::Scalar my $LIMS_APPLICABILITY_CRITERIA_KEY => q[lims]; +Readonly::Scalar my $SEQ_APPLICABILITY_CRITERIA_KEY => q[sequencing_run]; Readonly::Scalar my $ACCEPTANCE_CRITERIA_KEY => q[acceptance_criteria]; Readonly::Scalar my $QC_TYPE_DEFAULT => q[mqc]; @@ -71,18 +74,18 @@ samples in the same study might vary depending on the sequencing instrument type, library type, sample type, etc. There might be a need to exclude some samples from RoboQC. The criteria key of the robo configuration points to an array of criteria objects each of -which could contain two further keys, one for acceptance and one -for applicability criteria. The acceptance criteria are evaluated +which contains two further keys, one for acceptance and one for +applicability criteria. The acceptance criteria are evaluated if either the applicability criteria have been satisfied or no applicability criteria are defined. The applicability criteria for each criteria object should be set in such a way that the order of evaluation of the criteria array does not matter. If applicability criteria in all of the -criteria objects fail are not satisfied, no QC outcome is assigned +criteria objects are not satisfied, no QC outcome is assigned and the pass attribute of the review result object remains unset. -The product cannot satisfy applicability criteria in more than one -criteria object, this is considered an error. +Within a study the product can satisfy applicability criteria +in at most one criteria object. =head2 QC outcomes @@ -238,7 +241,6 @@ has 'runfolder_path' => ( predicate => 'has_runfolder_path', ); - =head2 BUILD A method that is run before returning the new object instance to the caller. @@ -258,22 +260,47 @@ sub BUILD { =head2 can_run -Returns true if the check can be run, meaning a robo configuration -for this product exists. +Returns true if the check can be run, ie a valid RoboQC configuration exists +and one of the applicability criteria is satisfied for this product. =cut sub can_run { my $self = shift; - (keys %{$self->_robo_config()}) and return 1; - $self->result->add_comment( - 'Product configuration for RoboQC is absent'); - return 0; + + my $can_run = 1; + my $message; + + if (!keys %{$self->_robo_config}) { + $message = 'RoboQC configuration is absent'; + $can_run = 0; + } else { + my $num_criteria; + try { + $num_criteria = @{$self->_applicable_criteria}; + } catch { + $message = "Error validating RoboQC criteria: $_"; + $can_run = 0; + }; + if ($can_run && !$num_criteria) { + $message = 'None of the RoboQC applicability criteria is satisfied'; + $can_run = 0; + } + } + + if (!$can_run && $message) { + $self->result->add_comment($message); + carp sprintf 'Review check cannot be run for %s . Reason: %s', + $self->_entity_desc, $message; + } + + return $can_run; } =head2 execute -Returns early without an error if the can_run method returns a false value. +Returns early if the can_run method returns a false value. + An assessment of applicability of running RoboQC on this entity is performed next, an early return is possible after that. If RoboQC is applicable, a full evaluation of autoqc results for this product is performed. If autoqc results @@ -385,6 +412,29 @@ sub _build_lims { return st::api::lims->new(rpt_list => $self->rpt_list); } +=head2 runfolder + +npg_tracking::illumina::runfolder object + +=cut + +has 'runfolder' => ( + isa => 'npg_tracking::illumina::runfolder', + is => 'ro', + lazy_build => 1, +); +sub _build_runfolder { + my $self = shift; + if ($self->has_runfolder_path) { + return npg_tracking::illumina::runfolder->new( + npg_tracking_schema => undef, + runfolder_path => $self->runfolder_path, + id_run => $self->get_id_run() + ); + } + croak 'runfolder_path argument is not set'; +} + has '_entity_desc' => ( isa => 'Str', is => 'ro', @@ -438,62 +488,70 @@ sub _validate_criteria { (ref $criteria_array eq q[ARRAY]) or croak 'Criteria is not a list in a robo config for ' . $self->_entity_desc; - my $num_cobj = @{$criteria_array}; - if ($num_cobj > 1) { - my $test = grep { $_->{$APPLICABILITY_CRITERIA_KEY} } @{$criteria_array}; - ($test == $num_cobj) or croak 'Each criteria object should have the ' . - "$APPLICABILITY_CRITERIA_KEY key defined in a robo config for " . - $self->_entity_desc; - } + @{$criteria_array} or croak 'Criteria list is empty'; - my $test = grep { $_->{$ACCEPTANCE_CRITERIA_KEY} } @{$criteria_array}; - ($test == $num_cobj) or croak 'Each criteria object should have the ' . - "$ACCEPTANCE_CRITERIA_KEY key defined in a robo config for ". - $self->_entity_desc; + foreach my $c ( @{$criteria_array} ) { + (exists $c->{$APPLICABILITY_CRITERIA_KEY} && + exists $c->{$ACCEPTANCE_CRITERIA_KEY}) || croak sprintf + 'Each criteria should have both the %s and %s key present', + $APPLICABILITY_CRITERIA_KEY, $ACCEPTANCE_CRITERIA_KEY; + } return; } -sub _applicability_lims_all { - my ($self, $criteria_objs) = @_; +has '_applicable_criteria' => ( + isa => 'ArrayRef', + is => 'ro', + lazy_build => 1, +); +sub _build__applicable_criteria { + my $self = shift; + my $criteria_objs = $self->_robo_config->{$CRITERIA_KEY}; my @applicable = (); foreach my $co ( @{$criteria_objs} ) { - my $lims_c = $co->{$APPLICABILITY_CRITERIA_KEY}->{lims}; - if (!$lims_c || $self->_applicability_lims($lims_c)) { - push @applicable, $co; + my $c_applicable = 1; + for my $c_type ($LIMS_APPLICABILITY_CRITERIA_KEY, $SEQ_APPLICABILITY_CRITERIA_KEY) { + my $c = $co->{$APPLICABILITY_CRITERIA_KEY}->{$c_type}; + if ($c && !$self->_applicability($c, $c_type)) { + $c_applicable = 0; + last; + } } + $c_applicable or next; + push @applicable, $co; } return \@applicable; } -sub _applicability_lims { - my ($self, $lcriteria) = @_; +sub _applicability { + my ($self, $acriteria, $criteria_type) = @_; - $lcriteria or return 1; + ($acriteria && $criteria_type) or croak + 'The criterium and its type type should be defined'; + (ref $acriteria eq 'HASH') or croak sprintf + '%s section should be a hash in a robo config for %', $criteria_type, $self->_entity_desc; - (ref $lcriteria eq 'HASH') or croak q('lims' section should be a hash ) . - 'in a robo config for ' . $self->_entity_desc; - my $lims_test = {}; - foreach my $lims_prop ( keys %{$lcriteria} ) { - my $ref_test = ref $lcriteria->{$lims_prop}; + my $test = {}; + foreach my $prop ( keys %{$acriteria} ) { + my $ref_test = ref $acriteria->{$prop}; not $ref_test or ($ref_test eq 'ARRAY') or croak - qq(Values for 'lims' property '$lims_prop' are neither a scalar nor ) . + qq(Values for '$criteria_type' property '$prop' are neither a scalar nor ) . 'an array in a robo config for ' . $self->_entity_desc; - my @expected_values = $ref_test ? @{$lcriteria->{$lims_prop}} : ($lcriteria->{$lims_prop}); - my $value = $self->lims->$lims_prop; - $value ||= q[]; - #defined $value or croak - # qq('$lims_prop' - cannot compare to an undefined value); - # comparing as strings in lower case - $value = lc q[] . $value; - $lims_test->{$lims_prop} = any { $value eq lc q[] . $_ } - @expected_values; + my @expected_values = $ref_test ? @{$acriteria->{$prop}} : ($acriteria->{$prop}); + my $value = $criteria_type eq $LIMS_APPLICABILITY_CRITERIA_KEY ? + $self->lims->$prop : $self->runfolder->$prop; + if (!defined $value) { # for example, boolean false values + $value = q[]; + } + $value = lc q[] . $value; # comparing as strings in lower case + $test->{$prop} = any { $value eq lc q[] . $_ } @expected_values; } - # assuming 'AND' for lims properties - return all { $_ } values %{$lims_test}; + # assuming 'AND' for properties + return all { $_ } values %{$test}; } has '_criteria' => ( @@ -510,23 +568,19 @@ sub _build__criteria { $lib_type or croak 'Library type is not defined for ' . $self->_entity_desc; $self->result->library_type($lib_type); - my $criteria_objs = $self->_robo_config->{$CRITERIA_KEY}; - $criteria_objs = $self->_applicability_lims_all($criteria_objs); - # Criteria objects with no lims criteria defined should be returned - # in the above array. An empty array means that all objects had lims - # criteria and none of them was satisfied. - @{$criteria_objs} or return {}; - - (@{$criteria_objs} == 1) or carp 'Multiple criteria sets are satisfied ' . - 'in a robo config for ' . $self->_entity_desc; - + my $num_criteria = scalar @{$self->_applicable_criteria}; + if ($num_criteria == 0) { + return {}; + } elsif ($num_criteria > 1) { + croak 'Multiple criteria sets are satisfied in a robo config'; + } ##### # A very simple criteria format - a list of strings - is used for now. # Each string represents a math expression. It is assumed that the # conjunction operator should be used to form the boolean expression # that should give the result of the evaluation. # - my @c = uniq sort @{$criteria_objs->[0]->{$ACCEPTANCE_CRITERIA_KEY}}; + my @c = uniq sort @{$self->_applicable_criteria->[0]->{$ACCEPTANCE_CRITERIA_KEY}}; return @c ? {$CONJUNCTION_OP => \@c} : {}; } @@ -787,6 +841,8 @@ __END__ =item st::api::lims +=item npg_tracking::illumina::runfolder + =item npg_tracking::util::pipeline_config =back diff --git a/t/60-autoqc-checks-review.t b/t/60-autoqc-checks-review.t index e8595507a..772792199 100644 --- a/t/60-autoqc-checks-review.t +++ b/t/60-autoqc-checks-review.t @@ -19,6 +19,7 @@ use_ok('npg_qc::autoqc::checks::review'); my $dir = tempdir( CLEANUP => 1 ); my $test_data_dir = 't/data/autoqc/review'; my $conf_file_path = "$test_data_dir/product_release.yml"; +my $rf_path = 't/data/autoqc/200117_A00715_0080_BHY7T3DSXX'; local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/autoqc/review/samplesheet_27483.csv'; @@ -57,11 +58,10 @@ subtest 'constructing object, deciding whether to run' => sub { my $can_run; warnings_like { $can_run = $check->can_run } - [qr/Study config not found for/], + [qr/Study config not found/, qr/RoboQC configuration is absent/], 'can_run is accompanied by warnings'; ok (!$can_run, 'can_run returns false - no study config'); - is ($check->result->comments, - 'Product configuration for RoboQC is absent', + like ($check->result->comments, qr/RoboQC configuration is absent/, 'reason logged'); lives_ok { $check->execute() } 'cannot run, but execute method runs OK'; is ($check->result->pass, undef, @@ -74,11 +74,10 @@ subtest 'constructing object, deciding whether to run' => sub { qc_in => $test_data_dir, rpt_list => '27483:1:2'); warnings_like { $can_run = $check->can_run } - [qr/robo_qc section is not present for/], + [qr/robo_qc section is not present for/, qr/Review check cannot be run/], 'can_run is accompanied by warnings'; ok (!$can_run, 'can_run returns false - no robo config'); - is ($check->result->comments, - 'Product configuration for RoboQC is absent', + is ($check->result->comments, 'RoboQC configuration is absent', 'reason logged'); $check = npg_qc::autoqc::checks::review->new( @@ -108,10 +107,10 @@ subtest 'constructing object, deciding whether to run' => sub { qc_in => $test_data_dir, rpt_list => '27483:1:2'); throws_ok { $check->can_run } - qr/should have the acceptance_criteria key defined/, + qr/should have both the applicability_criteria and acceptance_criteria key/, 'error in can_run when acceptance criteria not defined'; throws_ok { $check->execute } - qr/should have the acceptance_criteria key defined/, + qr/should have both the applicability_criteria and acceptance_criteria key/, 'error in execute when acceptance criteria not defined'; $check = npg_qc::autoqc::checks::review->new( @@ -119,11 +118,11 @@ subtest 'constructing object, deciding whether to run' => sub { qc_in => $test_data_dir, rpt_list => '27483:1:2'); throws_ok { $check->can_run } - qr/should have the applicability_criteria key defined/, + qr/should have both the applicability_criteria and acceptance_criteria key/, 'error in can_run when applicability criteria not defined ' . 'in one of multiple criterium objects'; throws_ok { $check->execute } - qr/should have the applicability_criteria key defined/, + qr/should have both the applicability_criteria and acceptance_criteria key/, 'error in execute when applicability criteria not defined ' . 'in one of multiple criterium objects'; @@ -155,8 +154,10 @@ subtest 'constructing object, deciding whether to run' => sub { conf_path => "$test_data_dir/with_na_criteria", qc_in => $test_data_dir, rpt_list => '27483:1:2'); - ok ($check->can_run, 'can_run returns true'); - ok (!$check->result->comments, 'No comments logged'); + ok (!$check->can_run, 'can_run returns false'); + is ($check->result->comments, + 'None of the RoboQC applicability criteria is satisfied', + 'Comment logged'); }; subtest 'caching appropriate criteria object' => sub { @@ -194,7 +195,8 @@ subtest 'execute when no criteria apply' => sub { lives_ok { $check->execute } 'no error in execute when no criteria apply'; my $result = $check->result; - is ($result->comments, 'RoboQC is not applicable', + is ($result->comments, + 'None of the RoboQC applicability criteria is satisfied', 'correct comment logged'); is_deeply ($result->criteria, {}, 'empty criteria hash'); is_deeply ($result->qc_outcome, {}, 'empty qc_outcome hash'); @@ -399,15 +401,28 @@ subtest 'single expression evaluation' => sub { }; subtest 'evaluation within the execute method' => sub { - plan tests => 38; + plan tests => 42; local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/autoqc/review/samplesheet_29524.csv'; my $rpt_list = '29524:1:2;29524:2:2;29524:3:2;29524:4:2'; + # NovaSeq run is required, MiSeq is given + my $o = npg_qc::autoqc::checks::review->new( + runfolder_path => 't/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2', + conf_path => $test_data_dir, + qc_in => $dir, + rpt_list => $rpt_list + ); + ok (!$o->can_run, 'the check cannot be run'); + is_deeply($o->_criteria, {}, 'no criteria to evaluate'); + lives_ok { $o->execute } 'execute method runs OK'; + is ($o->result->pass, undef, 'result pass attribute is unset'); + my @check_objects = (); push @check_objects, npg_qc::autoqc::checks::review->new( + runfolder_path => $rf_path, conf_path => $test_data_dir, qc_in => $dir, rpt_list => $rpt_list); @@ -465,6 +480,7 @@ subtest 'evaluation within the execute method' => sub { $check = npg_qc::autoqc::checks::review->new( final_qc_outcome => 1, + runfolder_path => $rf_path, conf_path => $test_data_dir, qc_in => $dir, rpt_list => $rpt_list); @@ -501,6 +517,7 @@ subtest 'error in evaluation' => sub { my $rpt_list = '29524:1:2;29524:2:2;29524:3:2;29524:4:2'; my $check = npg_qc::autoqc::checks::review->new( + runfolder_path => $rf_path, conf_path => $test_data_dir, qc_in => $dir, rpt_list => $rpt_list, @@ -510,6 +527,7 @@ subtest 'error in evaluation' => sub { 'final outcome - not capturing the error'; $check = npg_qc::autoqc::checks::review->new( + runfolder_path => $rf_path, conf_path => $test_data_dir, qc_in => $dir, rpt_list => $rpt_list, diff --git a/t/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2/RunParameters.xml b/t/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2/RunParameters.xml new file mode 100644 index 000000000..a94c6a10f --- /dev/null +++ b/t/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2/RunParameters.xml @@ -0,0 +1,89 @@ + + + MiSeq_1_3 + + 000000000-LMK9T + 15028382 + 2025-05-02T00:00:00 + 20852102 + + + MS1227233-00PR2 + 15041807 + 2025-05-17T00:00:00 + 20855811 + + + MS2060060-050V2 + 15033571 + 2024-11-16T00:00:00 + 20829667 + + true + 0 + + Post-Run Wash + + true + 4.1.0.656 + 14 + 1 + 1 + MiSeq Control Software + + 240806_MS7_49308_A_MS2060060-050V2 + M02088 + 19 + 9.5.12 + 4.1.0.656 + 1.18.54 + 000000000-LMK9T + MS1227233-00PR2 + 15033571 + Version2 + MS2060060-050V2 + + + 49308 + + Custom + Custom + Default + sbsUser + false + + + + + D:\Illumina\MiSeqTemp\240806_MS7_49308_A_MS2060060-050V2 + D:\Illumina\RTATemp\240806_MS7_49308_A_MS2060060-050V2 + D:\Illumina\MiSeqAnalysis\240806_MS7_49308_A_MS2060060-050V2 + 240806 + MissedPostRunWash + C:\ProgramData\Illumina\MiSeq Control Software\Recipe + C:\Program Files\Illumina\MiSeq Control Software\Recipe + MS2060060-50V2 + \\ms7-esa.dnapipelines.sanger.ac.uk\staging\IL_seq_data\samplesheets + \\ms7-esa.dnapipelines.sanger.ac.uk\staging\IL_seq_data\samplesheets\MS2060060-50V2.csv + D:\Illumina\MiSeq Control Software\Manifests + \\ms7-esa.dnapipelines.sanger.ac.uk\staging\IL_seq_data\incoming\240806_MS7_49308_A_MS2060060-050V2 + AutoFocus + Both + false + true + SampleSheet + 283641369 + 283641369 + Ruo + 49308 + GenerateFastQWorkflow + 3.0.1.49 + + + IpdRequested + false + false + false + 1B94042F8DE72858 + 18019 + \ No newline at end of file diff --git a/t/data/autoqc/review/mqc_type/product_release.yml b/t/data/autoqc/review/mqc_type/product_release.yml index 3f7bd1c35..b0db0bd61 100644 --- a/t/data/autoqc/review/mqc_type/product_release.yml +++ b/t/data/autoqc/review/mqc_type/product_release.yml @@ -26,8 +26,7 @@ study: robo_qc: qc_type: "mqc" criteria: - - library_type: - - "HiSeqX PCR free" + - applicability_criteria: acceptance_criteria : - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95" - "bam_flagstats.target_mapped_bases > 85_000_000_000" diff --git a/t/data/autoqc/review/no_applicability4single/product_release.yml b/t/data/autoqc/review/no_applicability4single/product_release.yml index 3403d0f22..b52bbb379 100644 --- a/t/data/autoqc/review/no_applicability4single/product_release.yml +++ b/t/data/autoqc/review/no_applicability4single/product_release.yml @@ -25,7 +25,8 @@ study: component_cache_dir: "/merge_component_cache/5392/" robo_qc: criteria: - - acceptance_criteria : + - applicability_criteria: + acceptance_criteria: - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95" - "bam_flagstats.target_mapped_bases > 85_000_000_000" - "bam_flagstats.target_percent_gt_coverage_threshold > 95" diff --git a/t/data/autoqc/review/product_release.yml b/t/data/autoqc/review/product_release.yml index 47562ef18..2203a4ef6 100644 --- a/t/data/autoqc/review/product_release.yml +++ b/t/data/autoqc/review/product_release.yml @@ -29,6 +29,11 @@ study: lims: library_type: - "HiSeqX PCR free" + sequencing_run: + platform_NovaSeq: + - 1 + workflow_type: + - "NovaSeqStandard" acceptance_criteria : - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95" - "bam_flagstats.target_mapped_bases > 85_000_000_000" From 1a807394a2477ef7bcdf63c8e76e62c620876f9b Mon Sep 17 00:00:00 2001 From: mgcam Date: Wed, 21 Aug 2024 16:20:58 +0100 Subject: [PATCH 3/6] Robo in default section (#871) * Eligibility criteria from seq run meta. * Added ROBO criteria validation to the can_run method * Consistent logging of error messages in can_run * Added robo_qc consideration within the default section --- Changes | 2 + MANIFEST | 3 + lib/npg_qc/autoqc/checks/review.pm | 64 ++++++++++--------- t/60-autoqc-checks-review.t | 54 +++++++++++++++- .../product_release.yml | 43 +++++++++++++ .../default_section/product_release.yml | 25 ++++++++ .../product_release.yml | 45 +++++++++++++ 7 files changed, 204 insertions(+), 32 deletions(-) create mode 100644 t/data/autoqc/review/default_and_study_section/product_release.yml create mode 100644 t/data/autoqc/review/default_section/product_release.yml create mode 100644 t/data/autoqc/review/wrong_default_and_study_section/product_release.yml diff --git a/Changes b/Changes index c6cee83e7..392ee1ded 100644 --- a/Changes +++ b/Changes @@ -7,6 +7,8 @@ LIST OF CHANGES FOR NPG-QC PACKAGE 2. Added a new applicability criteria type - 'sequencing_run' 3. To avoid running the review check unnecessarily in the pipeline context, moved ROBO criteria validation to the can_run method. + 4. Introduced an option of defining the ROBO section within the default + section of the product configuration file. release 72.1.2 (2024-08-05) - handle cases where the same tag sequence is seen in both index reads diff --git a/MANIFEST b/MANIFEST index 32cf09d1e..f41f24c7f 100644 --- a/MANIFEST +++ b/MANIFEST @@ -522,6 +522,9 @@ t/data/autoqc/review/not_hash/product_release.yml t/data/autoqc/review/mqc_type/product_release.yml t/data/autoqc/review/unknown_qc_type/product_release.yml t/data/autoqc/review/uqc_type/product_release.yml +t/data/autoqc/review/default_and_study_section/product_release.yml +t/data/autoqc/review/default_section/product_release.yml +t/data/autoqc/review/wrong_default_and_study_section/product_release.yml t/data/autoqc/review/samplesheet_27483.csv t/data/autoqc/review/samplesheet_29524.csv t/data/autoqc/review/29524#2.composition.json diff --git a/lib/npg_qc/autoqc/checks/review.pm b/lib/npg_qc/autoqc/checks/review.pm index aadab26ff..08a242676 100644 --- a/lib/npg_qc/autoqc/checks/review.pm +++ b/lib/npg_qc/autoqc/checks/review.pm @@ -58,24 +58,26 @@ npg_qc::autoqc::checks::review This checks evaluates the results of other autoqc checks against a predefined set of criteria. -If data product acceptance criteria for a project are defined, it -is possible to introduce a degree of automation into the manual -QC process. To provide interoperability with the API supporting -the manual QC process, the outcome of the evaluation, which is -performed by this check, is recorded not only as a simple undefined, -pass or fail as in other autoqc checks, but also as one of valid -manual or user QC outcomes. +If data product acceptance criteria are defined, it is possible to +introduce a degree of automation into the manual QC process. To +provide interoperability with the API supporting the manual QC process, +the outcome of the evaluation, which is performed by this check, is +recorded not only as a simple undefined, pass or fail as in other autoqc +checks, but also as one of valid manual or user QC outcomes. =head2 Types of criteria -The robo section of the product configuration file sits within -the configuration for a particular study. Evaluation criteria for -samples in the same study might vary depending on the sequencing +The robo section of the product configuration file sits either +within the configuration for a particular study or in the default +section, or in both locations. A study-specific RoboQC definition +takes precedence over the default one. + +Evaluation criteria for samples vary depending on the sequencing instrument type, library type, sample type, etc. There might be a need to exclude some samples from RoboQC. The criteria key of the -robo configuration points to an array of criteria objects each of -which contains two further keys, one for acceptance and one for -applicability criteria. The acceptance criteria are evaluated +robo configuration points to an array of criteria objects. Each of +the criteria contains two further keys, one for acceptance and one +for applicability criteria. The acceptance criteria are evaluated if either the applicability criteria have been satisfied or no applicability criteria are defined. @@ -84,8 +86,11 @@ set in such a way that the order of evaluation of the criteria array does not matter. If applicability criteria in all of the criteria objects are not satisfied, no QC outcome is assigned and the pass attribute of the review result object remains unset. -Within a study the product can satisfy applicability criteria -in at most one criteria object. + +The product can satisfy applicability criteria in at most one +criteria object. If none of the study-specific applicability +criteria are satisfied, the review check does not proceed even if +the product might satisfy one of the default applicability criteria. =head2 QC outcomes @@ -455,22 +460,23 @@ sub _build__robo_config { my $strict = 1; # Parse study section only, ignore the default section. my $config = $self->study_config($self->lims(), $strict); - if ($config and keys %{$config}) { + if ($config) { $config = $config->{$ROBO_KEY}; - $config or carp - "$ROBO_KEY section is not present for " . $self->_entity_desc; - if ($config) { - (ref $config eq 'HASH') or croak - 'Robo config should be a hash in a config for ' . $self->_entity_desc; - if (keys %{$config}) { - $self->_validate_criteria($config); - } else { - carp 'Robo section of the product config is empty for ' . - $self->_entity_desc; - } + } + + if (!$config) { + carp 'Study-specific RoboQC config not found for ' . $self->_entity_desc; + $config = $self->default_study_config()->{$ROBO_KEY}; + } + + if ($config) { + (ref $config eq 'HASH') or croak + 'Robo config should be a hash in a config for ' . $self->_entity_desc; + if (keys %{$config}) { + $self->_validate_criteria($config); + } else { + carp 'RoboQC section of the product config file is empty'; } - } else { - carp 'Study config not found for ' . $self->_entity_desc; } $config ||= {}; diff --git a/t/60-autoqc-checks-review.t b/t/60-autoqc-checks-review.t index 772792199..36b769a3c 100644 --- a/t/60-autoqc-checks-review.t +++ b/t/60-autoqc-checks-review.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 11; +use Test::More tests => 12; use Test::Exception; use Test::Warn; use File::Temp qw/tempdir/; @@ -58,7 +58,7 @@ subtest 'constructing object, deciding whether to run' => sub { my $can_run; warnings_like { $can_run = $check->can_run } - [qr/Study config not found/, qr/RoboQC configuration is absent/], + [qr/Study-specific RoboQC config not found/, qr/RoboQC configuration is absent/], 'can_run is accompanied by warnings'; ok (!$can_run, 'can_run returns false - no study config'); like ($check->result->comments, qr/RoboQC configuration is absent/, @@ -74,7 +74,7 @@ subtest 'constructing object, deciding whether to run' => sub { qc_in => $test_data_dir, rpt_list => '27483:1:2'); warnings_like { $can_run = $check->can_run } - [qr/robo_qc section is not present for/, qr/Review check cannot be run/], + [qr/Study-specific RoboQC config not found/, qr/Review check cannot be run/], 'can_run is accompanied by warnings'; ok (!$can_run, 'can_run returns false - no robo config'); is ($check->result->comments, 'RoboQC configuration is absent', @@ -500,6 +500,54 @@ subtest 'evaluation within the execute method' => sub { } }; +subtest 'study-specific vs default robo definition' => sub { + plan tests => 11; + + local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = + 't/data/autoqc/review/samplesheet_29524.csv'; + my $rpt_list = '29524:1:2;29524:2:2;29524:3:2;29524:4:2'; + + # robo config in the default section only + my $check = npg_qc::autoqc::checks::review->new( + runfolder_path => $rf_path, + conf_path => "$test_data_dir/default_section", + qc_in => $test_data_dir, + rpt_list => $rpt_list + ); + ok ($check->can_run, 'the check can run'); + lives_ok { $check->execute } 'execute method runs OK'; + is ($check->result->pass, 1, 'result pass attribute is set to 1'); + my %expected = map { $_ => 1 } @{$criteria_list}; + is_deeply ($check->result->evaluation_results(), \%expected, + 'evaluation results are saved'); + is ($check->result->qc_outcome->{'mqc_outcome'} , 'Accepted preliminary', + 'correct outcome string'); + + # robo config in the default section and in the study section (empty) + $check = npg_qc::autoqc::checks::review->new( + runfolder_path => $rf_path, + conf_path => "$test_data_dir/default_and_study_section", + qc_in => $test_data_dir, + rpt_list => $rpt_list + ); + ok ($check->can_run, 'the check cannot run'); + + # invalid robo config in the default section, valid in the study section + $check = npg_qc::autoqc::checks::review->new( + runfolder_path => $rf_path, + conf_path => "$test_data_dir/wrong_default_and_study_section", + qc_in => $test_data_dir, + rpt_list => $rpt_list + ); + ok ($check->can_run, 'the check can run'); + lives_ok { $check->execute } 'execute method runs OK'; + is ($check->result->pass, 1, 'result pass attribute is set to 1'); + is_deeply ($check->result->evaluation_results(), \%expected, + 'evaluation results are saved'); + is ($check->result->qc_outcome->{'mqc_outcome'} , 'Accepted preliminary', + 'correct outcome string'); +}; + subtest 'error in evaluation' => sub { plan tests => 5; diff --git a/t/data/autoqc/review/default_and_study_section/product_release.yml b/t/data/autoqc/review/default_and_study_section/product_release.yml new file mode 100644 index 000000000..2b465394c --- /dev/null +++ b/t/data/autoqc/review/default_and_study_section/product_release.yml @@ -0,0 +1,43 @@ +--- +default: + s3: + enable: false + url: null + notify: false + irods: + enable: true + notify: false + robo_qc: + criteria: + - applicability_criteria: + lims: + library_type: + - "HiSeqX PCR free" + sequencing_run: + platform_NovaSeq: + - 1 + workflow_type: + - "NovaSeqStandard" + acceptance_criteria : + - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95" + - "bam_flagstats.target_mapped_bases > 85_000_000_000" + - "bam_flagstats.target_percent_gt_coverage_threshold > 95" + - "verify_bam_id.freemix < 0.01" + - "( bcfstats.genotypes_nrd_dividend / bcfstats.genotypes_nrd_divisor ) < 0.02" + +study: + - study_id: "5392" + s3: + enable: true + url: "gs://profile_one-europe-west2" + date_binning: true + customer_name: "UK Organisation" + profile: "profile_one" + notify: true + receipts: "/data_product_receipts/5392/" + irods: + enable: false + notify: true + merge: + component_cache_dir: "/merge_component_cache/5392/" + robo_qc: diff --git a/t/data/autoqc/review/default_section/product_release.yml b/t/data/autoqc/review/default_section/product_release.yml new file mode 100644 index 000000000..ca3d9efab --- /dev/null +++ b/t/data/autoqc/review/default_section/product_release.yml @@ -0,0 +1,25 @@ +--- +default: + irods: + enable: true + robo_qc: + criteria: + - applicability_criteria: + lims: + library_type: + - "HiSeqX PCR free" + sequencing_run: + platform_NovaSeq: + - 1 + workflow_type: + - "NovaSeqStandard" + acceptance_criteria : + - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95" + - "bam_flagstats.target_mapped_bases > 85_000_000_000" + - "bam_flagstats.target_percent_gt_coverage_threshold > 95" + - "verify_bam_id.freemix < 0.01" + - "( bcfstats.genotypes_nrd_dividend / bcfstats.genotypes_nrd_divisor ) < 0.02" +study: + - study_id: "5392" + irods: + enable: false diff --git a/t/data/autoqc/review/wrong_default_and_study_section/product_release.yml b/t/data/autoqc/review/wrong_default_and_study_section/product_release.yml new file mode 100644 index 000000000..ecc244942 --- /dev/null +++ b/t/data/autoqc/review/wrong_default_and_study_section/product_release.yml @@ -0,0 +1,45 @@ +--- +default: + s3: + enable: false + url: null + notify: false + irods: + enable: true + notify: false + robo_qc: + - acceptance_criteria: + - "verify_bam_id.freemix < 0.001" + +study: + - study_id: "5392" + s3: + enable: true + url: "gs://profile_one-europe-west2" + date_binning: true + customer_name: "UK Organisation" + profile: "profile_one" + notify: true + receipts: "/data_product_receipts/5392/" + irods: + enable: false + notify: true + merge: + component_cache_dir: "/merge_component_cache/5392/" + robo_qc: + criteria: + - applicability_criteria: + lims: + library_type: + - "HiSeqX PCR free" + sequencing_run: + platform_NovaSeq: + - 1 + workflow_type: + - "NovaSeqStandard" + acceptance_criteria: + - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95" + - "bam_flagstats.target_mapped_bases > 85_000_000_000" + - "bam_flagstats.target_percent_gt_coverage_threshold > 95" + - "verify_bam_id.freemix < 0.01" + - "( bcfstats.genotypes_nrd_dividend / bcfstats.genotypes_nrd_divisor ) < 0.02" From 5d421a4a3ad65e8ff7598d79ae34854b378611b4 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Wed, 28 Aug 2024 16:21:07 +0100 Subject: [PATCH 4/6] Explained QC outcome change for complex products --- docs/qc_outcomes_change_howto.md | 35 ++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/docs/qc_outcomes_change_howto.md b/docs/qc_outcomes_change_howto.md index b7a8b2459..ed1421986 100644 --- a/docs/qc_outcomes_change_howto.md +++ b/docs/qc_outcomes_change_howto.md @@ -5,6 +5,41 @@ - Wrap database changes into a transaction, which initially should have a clause to fail it so that it can be tried out (see some examples below). +All listed below procedures were originally written for products containing +one component. Procedures for changing the already existing QC outcomes should +also work for products containing multiple components as long as these +components are from the same run and are for the same tag index. Use any lane +number (position) that went into the merged product. For example, for a product +described by JSON below + +``` +{ + "__CLASS__": "npg_tracking::glossary::composition-101.3.0", + "components": [ + { + "__CLASS__": "npg_tracking::glossary::composition::component::illumina-101.3.0", + "id_run": 49404, + "position": 7, + "tag_index": 1 + }, + { + "__CLASS__": "npg_tracking::glossary::composition::component::illumina-101.3.0", + "id_run": 49404, + "position": 8, + "tag_index": 1 + } + ] +} +``` + +use the following expression for a search when toggling the QC outcome + +``` +my $rs=npg_qc::Schema->connect()->resultset("MqcLibraryOutcomeEnt") + ->search_autoqc({id_run=>49494,position=>7,tag_index=>1}); +``` + + ## Toggle the outcome of a single library QC outcome will change from `pass` to `fail` or `fail` to `pass`. From 37330d1d3dc4fef644c749826daf47737c2f81dd Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Thu, 29 Aug 2024 10:52:27 +0100 Subject: [PATCH 5/6] Post-review text improvements --- docs/qc_outcomes_change_howto.md | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/docs/qc_outcomes_change_howto.md b/docs/qc_outcomes_change_howto.md index ed1421986..8c474ddab 100644 --- a/docs/qc_outcomes_change_howto.md +++ b/docs/qc_outcomes_change_howto.md @@ -6,13 +6,13 @@ a clause to fail it so that it can be tried out (see some examples below). All listed below procedures were originally written for products containing -one component. Procedures for changing the already existing QC outcomes should -also work for products containing multiple components as long as these +one component. Existing procedures for changing the already existing QC outcomes +should also work for products containing multiple components as long as these components are from the same run and are for the same tag index. Use any lane number (position) that went into the merged product. For example, for a product described by JSON below -``` +```json { "__CLASS__": "npg_tracking::glossary::composition-101.3.0", "components": [ @@ -39,6 +39,13 @@ my $rs=npg_qc::Schema->connect()->resultset("MqcLibraryOutcomeEnt") ->search_autoqc({id_run=>49494,position=>7,tag_index=>1}); ``` +The custom `search_autoqc` method returns a resultset containing QC outcomes for +products which contain a component with position 7 and tag_index 1, which will +be the merged product described by the above JSON. If the QC database records +are correct, the resultset should contain one row. The same resultset is +returned if 8 is used as the value of the position. There should not be any need +to repeat the process for every position that went into the merge and in a case +of a toggle this would be wrong. ## Toggle the outcome of a single library From dc79857587d7b0e0806beae1cf82ca58f42579fd Mon Sep 17 00:00:00 2001 From: jmtcsngr Date: Fri, 30 Aug 2024 12:35:42 +0100 Subject: [PATCH 6/6] prep release 72.2.0 --- Changes | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Changes b/Changes index 392ee1ded..a6e5b955b 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,6 @@ LIST OF CHANGES FOR NPG-QC PACKAGE +release 72.2.0 (2024-08-30) - npg_qc::autoqc::check::review: 1. To enable access to information about a sequencing run (from RunInfo.xml, {r,R}unParameters.xml), added an optional 'runfolder_path' attribute. @@ -9,6 +10,7 @@ LIST OF CHANGES FOR NPG-QC PACKAGE moved ROBO criteria validation to the can_run method. 4. Introduced an option of defining the ROBO section within the default section of the product configuration file. + - Explained QC outcome change for complex products (merged) release 72.1.2 (2024-08-05) - handle cases where the same tag sequence is seen in both index reads