From 0e5819c612b971400dfdaddb42e2dd8596c90f4c Mon Sep 17 00:00:00 2001 From: mgcam Date: Wed, 21 Aug 2024 14:50:33 +0100 Subject: [PATCH] Add robo sequencing_run applicability criteria type (#870) * Eligibility criteria from seq run meta. * Added ROBO criteria validation to the can_run method * Consistent logging of error messages in can_run --- Changes | 7 + MANIFEST | 1 + lib/npg_qc/autoqc/checks/review.pm | 176 ++++++++++++------ t/60-autoqc-checks-review.t | 46 +++-- .../RunParameters.xml | 89 +++++++++ .../review/mqc_type/product_release.yml | 3 +- .../product_release.yml | 3 +- t/data/autoqc/review/product_release.yml | 5 + 8 files changed, 253 insertions(+), 77 deletions(-) create mode 100644 t/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2/RunParameters.xml diff --git a/Changes b/Changes index 3989322f5..9dfffea0f 100644 --- a/Changes +++ b/Changes @@ -5,6 +5,13 @@ LIST OF CHANGES FOR NPG-QC PACKAGE - To enable access to information about a sequencing run (from RunInfo.xml, {r,R}unParameters.xml), added an optional 'runfolder_path' attribute to the review check. The attribute will have to be set by the caller. + - npg_qc::autoqc::check::review: + 1. To enable access to information about a sequencing run (from RunInfo.xml, + {r,R}unParameters.xml), added an optional 'runfolder_path' attribute. + The attribute will have to be set by the caller. + 2. Added a new applicability criteria type - 'sequencing_run' + 3. To avoid running the review check unnecessarily in the pipeline context, + moved ROBO criteria validation to the can_run method. release 72.1.2 (2024-08-05) - handle cases where the same tag sequence is seen in both index reads diff --git a/MANIFEST b/MANIFEST index 6a01731d1..32cf09d1e 100644 --- a/MANIFEST +++ b/MANIFEST @@ -697,6 +697,7 @@ t/data/autoqc/200114_HS40_32710_A_H72VGBCX3/RunInfo.xml t/data/autoqc/200114_HS40_32710_A_H72VGBCX3/runParameters.xml t/data/autoqc/200114_HS40_32710_A_H72VGBCX3/InterOp/TileMetricsOut.bin t/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2/InterOp/TileMetricsOut.bin +t/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2/RunParameters.xml t/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2/RunInfo.xml t/data/autoqc/200117_A00715_0080_BHY7T3DSXX/InterOp/TileMetricsOut.bin t/data/autoqc/200117_A00715_0080_BHY7T3DSXX/InterOp/ExtendedTileMetricsOut.bin diff --git a/lib/npg_qc/autoqc/checks/review.pm b/lib/npg_qc/autoqc/checks/review.pm index 7cfbb9152..aadab26ff 100644 --- a/lib/npg_qc/autoqc/checks/review.pm +++ b/lib/npg_qc/autoqc/checks/review.pm @@ -11,6 +11,7 @@ use Try::Tiny; use WTSI::DNAP::Utilities::Timestamp qw/create_current_timestamp/; use st::api::lims; +use npg_tracking::illumina::runfolder; use npg_qc::autoqc::qc_store; use npg_qc::Schema::Mqc::OutcomeDict; @@ -26,6 +27,8 @@ Readonly::Scalar my $ROBO_KEY => q[robo_qc]; Readonly::Scalar my $CRITERIA_KEY => q[criteria]; Readonly::Scalar my $QC_TYPE_KEY => q[qc_type]; Readonly::Scalar my $APPLICABILITY_CRITERIA_KEY => q[applicability_criteria]; +Readonly::Scalar my $LIMS_APPLICABILITY_CRITERIA_KEY => q[lims]; +Readonly::Scalar my $SEQ_APPLICABILITY_CRITERIA_KEY => q[sequencing_run]; Readonly::Scalar my $ACCEPTANCE_CRITERIA_KEY => q[acceptance_criteria]; Readonly::Scalar my $QC_TYPE_DEFAULT => q[mqc]; @@ -71,18 +74,18 @@ samples in the same study might vary depending on the sequencing instrument type, library type, sample type, etc. There might be a need to exclude some samples from RoboQC. The criteria key of the robo configuration points to an array of criteria objects each of -which could contain two further keys, one for acceptance and one -for applicability criteria. The acceptance criteria are evaluated +which contains two further keys, one for acceptance and one for +applicability criteria. The acceptance criteria are evaluated if either the applicability criteria have been satisfied or no applicability criteria are defined. The applicability criteria for each criteria object should be set in such a way that the order of evaluation of the criteria array does not matter. If applicability criteria in all of the -criteria objects fail are not satisfied, no QC outcome is assigned +criteria objects are not satisfied, no QC outcome is assigned and the pass attribute of the review result object remains unset. -The product cannot satisfy applicability criteria in more than one -criteria object, this is considered an error. +Within a study the product can satisfy applicability criteria +in at most one criteria object. =head2 QC outcomes @@ -238,7 +241,6 @@ has 'runfolder_path' => ( predicate => 'has_runfolder_path', ); - =head2 BUILD A method that is run before returning the new object instance to the caller. @@ -258,22 +260,47 @@ sub BUILD { =head2 can_run -Returns true if the check can be run, meaning a robo configuration -for this product exists. +Returns true if the check can be run, ie a valid RoboQC configuration exists +and one of the applicability criteria is satisfied for this product. =cut sub can_run { my $self = shift; - (keys %{$self->_robo_config()}) and return 1; - $self->result->add_comment( - 'Product configuration for RoboQC is absent'); - return 0; + + my $can_run = 1; + my $message; + + if (!keys %{$self->_robo_config}) { + $message = 'RoboQC configuration is absent'; + $can_run = 0; + } else { + my $num_criteria; + try { + $num_criteria = @{$self->_applicable_criteria}; + } catch { + $message = "Error validating RoboQC criteria: $_"; + $can_run = 0; + }; + if ($can_run && !$num_criteria) { + $message = 'None of the RoboQC applicability criteria is satisfied'; + $can_run = 0; + } + } + + if (!$can_run && $message) { + $self->result->add_comment($message); + carp sprintf 'Review check cannot be run for %s . Reason: %s', + $self->_entity_desc, $message; + } + + return $can_run; } =head2 execute -Returns early without an error if the can_run method returns a false value. +Returns early if the can_run method returns a false value. + An assessment of applicability of running RoboQC on this entity is performed next, an early return is possible after that. If RoboQC is applicable, a full evaluation of autoqc results for this product is performed. If autoqc results @@ -385,6 +412,29 @@ sub _build_lims { return st::api::lims->new(rpt_list => $self->rpt_list); } +=head2 runfolder + +npg_tracking::illumina::runfolder object + +=cut + +has 'runfolder' => ( + isa => 'npg_tracking::illumina::runfolder', + is => 'ro', + lazy_build => 1, +); +sub _build_runfolder { + my $self = shift; + if ($self->has_runfolder_path) { + return npg_tracking::illumina::runfolder->new( + npg_tracking_schema => undef, + runfolder_path => $self->runfolder_path, + id_run => $self->get_id_run() + ); + } + croak 'runfolder_path argument is not set'; +} + has '_entity_desc' => ( isa => 'Str', is => 'ro', @@ -438,62 +488,70 @@ sub _validate_criteria { (ref $criteria_array eq q[ARRAY]) or croak 'Criteria is not a list in a robo config for ' . $self->_entity_desc; - my $num_cobj = @{$criteria_array}; - if ($num_cobj > 1) { - my $test = grep { $_->{$APPLICABILITY_CRITERIA_KEY} } @{$criteria_array}; - ($test == $num_cobj) or croak 'Each criteria object should have the ' . - "$APPLICABILITY_CRITERIA_KEY key defined in a robo config for " . - $self->_entity_desc; - } + @{$criteria_array} or croak 'Criteria list is empty'; - my $test = grep { $_->{$ACCEPTANCE_CRITERIA_KEY} } @{$criteria_array}; - ($test == $num_cobj) or croak 'Each criteria object should have the ' . - "$ACCEPTANCE_CRITERIA_KEY key defined in a robo config for ". - $self->_entity_desc; + foreach my $c ( @{$criteria_array} ) { + (exists $c->{$APPLICABILITY_CRITERIA_KEY} && + exists $c->{$ACCEPTANCE_CRITERIA_KEY}) || croak sprintf + 'Each criteria should have both the %s and %s key present', + $APPLICABILITY_CRITERIA_KEY, $ACCEPTANCE_CRITERIA_KEY; + } return; } -sub _applicability_lims_all { - my ($self, $criteria_objs) = @_; +has '_applicable_criteria' => ( + isa => 'ArrayRef', + is => 'ro', + lazy_build => 1, +); +sub _build__applicable_criteria { + my $self = shift; + my $criteria_objs = $self->_robo_config->{$CRITERIA_KEY}; my @applicable = (); foreach my $co ( @{$criteria_objs} ) { - my $lims_c = $co->{$APPLICABILITY_CRITERIA_KEY}->{lims}; - if (!$lims_c || $self->_applicability_lims($lims_c)) { - push @applicable, $co; + my $c_applicable = 1; + for my $c_type ($LIMS_APPLICABILITY_CRITERIA_KEY, $SEQ_APPLICABILITY_CRITERIA_KEY) { + my $c = $co->{$APPLICABILITY_CRITERIA_KEY}->{$c_type}; + if ($c && !$self->_applicability($c, $c_type)) { + $c_applicable = 0; + last; + } } + $c_applicable or next; + push @applicable, $co; } return \@applicable; } -sub _applicability_lims { - my ($self, $lcriteria) = @_; +sub _applicability { + my ($self, $acriteria, $criteria_type) = @_; - $lcriteria or return 1; + ($acriteria && $criteria_type) or croak + 'The criterium and its type type should be defined'; + (ref $acriteria eq 'HASH') or croak sprintf + '%s section should be a hash in a robo config for %', $criteria_type, $self->_entity_desc; - (ref $lcriteria eq 'HASH') or croak q('lims' section should be a hash ) . - 'in a robo config for ' . $self->_entity_desc; - my $lims_test = {}; - foreach my $lims_prop ( keys %{$lcriteria} ) { - my $ref_test = ref $lcriteria->{$lims_prop}; + my $test = {}; + foreach my $prop ( keys %{$acriteria} ) { + my $ref_test = ref $acriteria->{$prop}; not $ref_test or ($ref_test eq 'ARRAY') or croak - qq(Values for 'lims' property '$lims_prop' are neither a scalar nor ) . + qq(Values for '$criteria_type' property '$prop' are neither a scalar nor ) . 'an array in a robo config for ' . $self->_entity_desc; - my @expected_values = $ref_test ? @{$lcriteria->{$lims_prop}} : ($lcriteria->{$lims_prop}); - my $value = $self->lims->$lims_prop; - $value ||= q[]; - #defined $value or croak - # qq('$lims_prop' - cannot compare to an undefined value); - # comparing as strings in lower case - $value = lc q[] . $value; - $lims_test->{$lims_prop} = any { $value eq lc q[] . $_ } - @expected_values; + my @expected_values = $ref_test ? @{$acriteria->{$prop}} : ($acriteria->{$prop}); + my $value = $criteria_type eq $LIMS_APPLICABILITY_CRITERIA_KEY ? + $self->lims->$prop : $self->runfolder->$prop; + if (!defined $value) { # for example, boolean false values + $value = q[]; + } + $value = lc q[] . $value; # comparing as strings in lower case + $test->{$prop} = any { $value eq lc q[] . $_ } @expected_values; } - # assuming 'AND' for lims properties - return all { $_ } values %{$lims_test}; + # assuming 'AND' for properties + return all { $_ } values %{$test}; } has '_criteria' => ( @@ -510,23 +568,19 @@ sub _build__criteria { $lib_type or croak 'Library type is not defined for ' . $self->_entity_desc; $self->result->library_type($lib_type); - my $criteria_objs = $self->_robo_config->{$CRITERIA_KEY}; - $criteria_objs = $self->_applicability_lims_all($criteria_objs); - # Criteria objects with no lims criteria defined should be returned - # in the above array. An empty array means that all objects had lims - # criteria and none of them was satisfied. - @{$criteria_objs} or return {}; - - (@{$criteria_objs} == 1) or carp 'Multiple criteria sets are satisfied ' . - 'in a robo config for ' . $self->_entity_desc; - + my $num_criteria = scalar @{$self->_applicable_criteria}; + if ($num_criteria == 0) { + return {}; + } elsif ($num_criteria > 1) { + croak 'Multiple criteria sets are satisfied in a robo config'; + } ##### # A very simple criteria format - a list of strings - is used for now. # Each string represents a math expression. It is assumed that the # conjunction operator should be used to form the boolean expression # that should give the result of the evaluation. # - my @c = uniq sort @{$criteria_objs->[0]->{$ACCEPTANCE_CRITERIA_KEY}}; + my @c = uniq sort @{$self->_applicable_criteria->[0]->{$ACCEPTANCE_CRITERIA_KEY}}; return @c ? {$CONJUNCTION_OP => \@c} : {}; } @@ -787,6 +841,8 @@ __END__ =item st::api::lims +=item npg_tracking::illumina::runfolder + =item npg_tracking::util::pipeline_config =back diff --git a/t/60-autoqc-checks-review.t b/t/60-autoqc-checks-review.t index e8595507a..772792199 100644 --- a/t/60-autoqc-checks-review.t +++ b/t/60-autoqc-checks-review.t @@ -19,6 +19,7 @@ use_ok('npg_qc::autoqc::checks::review'); my $dir = tempdir( CLEANUP => 1 ); my $test_data_dir = 't/data/autoqc/review'; my $conf_file_path = "$test_data_dir/product_release.yml"; +my $rf_path = 't/data/autoqc/200117_A00715_0080_BHY7T3DSXX'; local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/autoqc/review/samplesheet_27483.csv'; @@ -57,11 +58,10 @@ subtest 'constructing object, deciding whether to run' => sub { my $can_run; warnings_like { $can_run = $check->can_run } - [qr/Study config not found for/], + [qr/Study config not found/, qr/RoboQC configuration is absent/], 'can_run is accompanied by warnings'; ok (!$can_run, 'can_run returns false - no study config'); - is ($check->result->comments, - 'Product configuration for RoboQC is absent', + like ($check->result->comments, qr/RoboQC configuration is absent/, 'reason logged'); lives_ok { $check->execute() } 'cannot run, but execute method runs OK'; is ($check->result->pass, undef, @@ -74,11 +74,10 @@ subtest 'constructing object, deciding whether to run' => sub { qc_in => $test_data_dir, rpt_list => '27483:1:2'); warnings_like { $can_run = $check->can_run } - [qr/robo_qc section is not present for/], + [qr/robo_qc section is not present for/, qr/Review check cannot be run/], 'can_run is accompanied by warnings'; ok (!$can_run, 'can_run returns false - no robo config'); - is ($check->result->comments, - 'Product configuration for RoboQC is absent', + is ($check->result->comments, 'RoboQC configuration is absent', 'reason logged'); $check = npg_qc::autoqc::checks::review->new( @@ -108,10 +107,10 @@ subtest 'constructing object, deciding whether to run' => sub { qc_in => $test_data_dir, rpt_list => '27483:1:2'); throws_ok { $check->can_run } - qr/should have the acceptance_criteria key defined/, + qr/should have both the applicability_criteria and acceptance_criteria key/, 'error in can_run when acceptance criteria not defined'; throws_ok { $check->execute } - qr/should have the acceptance_criteria key defined/, + qr/should have both the applicability_criteria and acceptance_criteria key/, 'error in execute when acceptance criteria not defined'; $check = npg_qc::autoqc::checks::review->new( @@ -119,11 +118,11 @@ subtest 'constructing object, deciding whether to run' => sub { qc_in => $test_data_dir, rpt_list => '27483:1:2'); throws_ok { $check->can_run } - qr/should have the applicability_criteria key defined/, + qr/should have both the applicability_criteria and acceptance_criteria key/, 'error in can_run when applicability criteria not defined ' . 'in one of multiple criterium objects'; throws_ok { $check->execute } - qr/should have the applicability_criteria key defined/, + qr/should have both the applicability_criteria and acceptance_criteria key/, 'error in execute when applicability criteria not defined ' . 'in one of multiple criterium objects'; @@ -155,8 +154,10 @@ subtest 'constructing object, deciding whether to run' => sub { conf_path => "$test_data_dir/with_na_criteria", qc_in => $test_data_dir, rpt_list => '27483:1:2'); - ok ($check->can_run, 'can_run returns true'); - ok (!$check->result->comments, 'No comments logged'); + ok (!$check->can_run, 'can_run returns false'); + is ($check->result->comments, + 'None of the RoboQC applicability criteria is satisfied', + 'Comment logged'); }; subtest 'caching appropriate criteria object' => sub { @@ -194,7 +195,8 @@ subtest 'execute when no criteria apply' => sub { lives_ok { $check->execute } 'no error in execute when no criteria apply'; my $result = $check->result; - is ($result->comments, 'RoboQC is not applicable', + is ($result->comments, + 'None of the RoboQC applicability criteria is satisfied', 'correct comment logged'); is_deeply ($result->criteria, {}, 'empty criteria hash'); is_deeply ($result->qc_outcome, {}, 'empty qc_outcome hash'); @@ -399,15 +401,28 @@ subtest 'single expression evaluation' => sub { }; subtest 'evaluation within the execute method' => sub { - plan tests => 38; + plan tests => 42; local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/autoqc/review/samplesheet_29524.csv'; my $rpt_list = '29524:1:2;29524:2:2;29524:3:2;29524:4:2'; + # NovaSeq run is required, MiSeq is given + my $o = npg_qc::autoqc::checks::review->new( + runfolder_path => 't/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2', + conf_path => $test_data_dir, + qc_in => $dir, + rpt_list => $rpt_list + ); + ok (!$o->can_run, 'the check cannot be run'); + is_deeply($o->_criteria, {}, 'no criteria to evaluate'); + lives_ok { $o->execute } 'execute method runs OK'; + is ($o->result->pass, undef, 'result pass attribute is unset'); + my @check_objects = (); push @check_objects, npg_qc::autoqc::checks::review->new( + runfolder_path => $rf_path, conf_path => $test_data_dir, qc_in => $dir, rpt_list => $rpt_list); @@ -465,6 +480,7 @@ subtest 'evaluation within the execute method' => sub { $check = npg_qc::autoqc::checks::review->new( final_qc_outcome => 1, + runfolder_path => $rf_path, conf_path => $test_data_dir, qc_in => $dir, rpt_list => $rpt_list); @@ -501,6 +517,7 @@ subtest 'error in evaluation' => sub { my $rpt_list = '29524:1:2;29524:2:2;29524:3:2;29524:4:2'; my $check = npg_qc::autoqc::checks::review->new( + runfolder_path => $rf_path, conf_path => $test_data_dir, qc_in => $dir, rpt_list => $rpt_list, @@ -510,6 +527,7 @@ subtest 'error in evaluation' => sub { 'final outcome - not capturing the error'; $check = npg_qc::autoqc::checks::review->new( + runfolder_path => $rf_path, conf_path => $test_data_dir, qc_in => $dir, rpt_list => $rpt_list, diff --git a/t/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2/RunParameters.xml b/t/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2/RunParameters.xml new file mode 100644 index 000000000..a94c6a10f --- /dev/null +++ b/t/data/autoqc/191210_MS2_MiSeq_walk-up_246_A_MS8539685-050V2/RunParameters.xml @@ -0,0 +1,89 @@ + + + MiSeq_1_3 + + 000000000-LMK9T + 15028382 + 2025-05-02T00:00:00 + 20852102 + + + MS1227233-00PR2 + 15041807 + 2025-05-17T00:00:00 + 20855811 + + + MS2060060-050V2 + 15033571 + 2024-11-16T00:00:00 + 20829667 + + true + 0 + + Post-Run Wash + + true + 4.1.0.656 + 14 + 1 + 1 + MiSeq Control Software + + 240806_MS7_49308_A_MS2060060-050V2 + M02088 + 19 + 9.5.12 + 4.1.0.656 + 1.18.54 + 000000000-LMK9T + MS1227233-00PR2 + 15033571 + Version2 + MS2060060-050V2 + + + 49308 + + Custom + Custom + Default + sbsUser + false + + + + + D:\Illumina\MiSeqTemp\240806_MS7_49308_A_MS2060060-050V2 + D:\Illumina\RTATemp\240806_MS7_49308_A_MS2060060-050V2 + D:\Illumina\MiSeqAnalysis\240806_MS7_49308_A_MS2060060-050V2 + 240806 + MissedPostRunWash + C:\ProgramData\Illumina\MiSeq Control Software\Recipe + C:\Program Files\Illumina\MiSeq Control Software\Recipe + MS2060060-50V2 + \\ms7-esa.dnapipelines.sanger.ac.uk\staging\IL_seq_data\samplesheets + \\ms7-esa.dnapipelines.sanger.ac.uk\staging\IL_seq_data\samplesheets\MS2060060-50V2.csv + D:\Illumina\MiSeq Control Software\Manifests + \\ms7-esa.dnapipelines.sanger.ac.uk\staging\IL_seq_data\incoming\240806_MS7_49308_A_MS2060060-050V2 + AutoFocus + Both + false + true + SampleSheet + 283641369 + 283641369 + Ruo + 49308 + GenerateFastQWorkflow + 3.0.1.49 + + + IpdRequested + false + false + false + 1B94042F8DE72858 + 18019 + \ No newline at end of file diff --git a/t/data/autoqc/review/mqc_type/product_release.yml b/t/data/autoqc/review/mqc_type/product_release.yml index 3f7bd1c35..b0db0bd61 100644 --- a/t/data/autoqc/review/mqc_type/product_release.yml +++ b/t/data/autoqc/review/mqc_type/product_release.yml @@ -26,8 +26,7 @@ study: robo_qc: qc_type: "mqc" criteria: - - library_type: - - "HiSeqX PCR free" + - applicability_criteria: acceptance_criteria : - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95" - "bam_flagstats.target_mapped_bases > 85_000_000_000" diff --git a/t/data/autoqc/review/no_applicability4single/product_release.yml b/t/data/autoqc/review/no_applicability4single/product_release.yml index 3403d0f22..b52bbb379 100644 --- a/t/data/autoqc/review/no_applicability4single/product_release.yml +++ b/t/data/autoqc/review/no_applicability4single/product_release.yml @@ -25,7 +25,8 @@ study: component_cache_dir: "/merge_component_cache/5392/" robo_qc: criteria: - - acceptance_criteria : + - applicability_criteria: + acceptance_criteria: - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95" - "bam_flagstats.target_mapped_bases > 85_000_000_000" - "bam_flagstats.target_percent_gt_coverage_threshold > 95" diff --git a/t/data/autoqc/review/product_release.yml b/t/data/autoqc/review/product_release.yml index 47562ef18..2203a4ef6 100644 --- a/t/data/autoqc/review/product_release.yml +++ b/t/data/autoqc/review/product_release.yml @@ -29,6 +29,11 @@ study: lims: library_type: - "HiSeqX PCR free" + sequencing_run: + platform_NovaSeq: + - 1 + workflow_type: + - "NovaSeqStandard" acceptance_criteria : - "( bam_flagstats.target_proper_pair_mapped_reads / bam_flagstats.target_mapped_reads ) > 0.95" - "bam_flagstats.target_mapped_bases > 85_000_000_000"