Skip to content

Commit

Permalink
Merge pull request #897 from mgcam/multiple_result_types_copy
Browse files Browse the repository at this point in the history
Updated the expression evaluator.
  • Loading branch information
jmtcsngr authored Dec 16, 2024
2 parents b857661 + 1c6283a commit 5f39e5b
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 66 deletions.
11 changes: 7 additions & 4 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
LIST OF CHANGES FOR NPG-QC PACKAGE

- Excluded study-specific lane-lane assessment in npg_qc::autoqc::check::review
This helps to avoid uncertainty in evaluating lanes where samples belong to
different studies at a price of not being able to perform study-specific
evaluation of one-library lanes.
- Fixes/updates for lane-level RoboQC lane-level assessment:
- Excluded study-specific lane-lane assessment in npg_qc::autoqc::check::review
This helps to avoid uncertainty in evaluating lanes where samples belong to
different studies at a price of not being able to perform study-specific
evaluation of one-library lanes.
- Updated the expression evaluator to allow for multiple types of
result objects to be used in a single expression.

release 74.0.0 (2024-12-02)
- npg_qc::autoqc::qc_store - changed loading of ORM classes into memory
Expand Down
144 changes: 88 additions & 56 deletions lib/npg_qc/autoqc/checks/review.pm
Original file line number Diff line number Diff line change
Expand Up @@ -24,25 +24,36 @@ our $VERSION = '0';
Readonly::Scalar my $CONJUNCTION_OP => q[and];
Readonly::Scalar my $DISJUNCTION_OP => q[or];

Readonly::Scalar my $ROBO_KEY => q[robo_qc];
Readonly::Scalar my $CRITERIA_KEY => q[criteria];
Readonly::Scalar my $QC_TYPE_KEY => q[qc_type];
Readonly::Scalar my $ROBO_KEY => q[robo_qc];
Readonly::Scalar my $CRITERIA_KEY => q[criteria];
Readonly::Scalar my $APPLICABILITY_CRITERIA_KEY => q[applicability_criteria];
Readonly::Scalar my $LIMS_APPLICABILITY_CRITERIA_KEY => q[lims];
Readonly::Scalar my $SEQ_APPLICABILITY_CRITERIA_KEY => q[sequencing_run];
Readonly::Array my @APPLICABILITY_CRITERIA_TYPES => (
$LIMS_APPLICABILITY_CRITERIA_KEY, $SEQ_APPLICABILITY_CRITERIA_KEY
);
Readonly::Scalar my $ACCEPTANCE_CRITERIA_KEY => q[acceptance_criteria];
Readonly::Scalar my $ACCEPTANCE_CRITERIA_KEY => q[acceptance_criteria];

Readonly::Scalar my $QC_TYPE_LIB => q[mqc];
Readonly::Scalar my $QC_TYPE_SEQ => q[mqc_seq];

Readonly::Scalar my $TIMESTAMP_FORMAT_WOFFSET => q[%Y-%m-%dT%T%z];

Readonly::Scalar my $CLASS_NAME_SPEC_DELIM => q[:];
Readonly::Scalar my $CLASS_NAME_SPEC_RE =>
qr/\A(?:\W+)? (\w+) (?: $CLASS_NAME_SPEC_DELIM (\w+))?[.]/xms;
Readonly::Scalar my $CLASS_NAME_SPEC_RE =>
qr{
# Examples:
# "generic:ncov2019_artic_nf.doc->{meta}->{'num_input_reads'} > 10000"
# "tag_metrics.all_reads && (qX_yield.yield1_q30/(tag_metrics.all_reads * 302) > 78)"
(?:\W+)? # optional non-word characters
( # start of capture group
[[:alpha:]_]+ # result class name
(?: $CLASS_NAME_SPEC_DELIM \w+)? # an optional spec for a generic result
# class prepended by a delimeter
) # end of capture group
[.] # ends with a dot
}xms;

## no critic (Documentation::RequirePodAtEnd)

Expand Down Expand Up @@ -663,7 +674,7 @@ sub _build__result_class_names {
my $self = shift;

my @class_names = uniq sort
map { (_class_name_from_expression($_))[0] }
map { _class_names_from_expression($_) }
map { @{$_} } # dereference the array
values %{$self->_criteria()};

Expand Down Expand Up @@ -732,11 +743,26 @@ sub _build__results {
return \%h;
}

sub _class_name_from_expression {
sub _class_names_with_spec_from_expression {
my $e = shift;
my ($class_name, $spec) = $e =~ $CLASS_NAME_SPEC_RE;
$class_name or croak "Failed to infer class name from $e";
return ($class_name, $spec);
# Note 'g' flag at the end of the regexp. We will get a list of matches.
# The list might contain multiple class names for the same class. Depending
# on the expression, the return value contains one or more class names.
my @extended_class_names = $e =~ /$CLASS_NAME_SPEC_RE/xmsg;
@extended_class_names = uniq @extended_class_names; # Remove repetitions.
@extended_class_names or croak "Failed to infer class names from $e";
return @extended_class_names;
}

sub _class_names_from_expression {
my $e = shift;
my @names = ();
foreach my $name (_class_names_with_spec_from_expression($e)) {
($name) = $name =~ /\A(\w+)/smx; # Remove the spec part if present.
push @names, $name;
}
@names = uniq @names;
return @names;
}

#####
Expand Down Expand Up @@ -796,62 +822,68 @@ sub _apply_operator {
return $outcome ? 1 : 0;
}

sub _pp_name2spec {
# To get a match with what would have been used in the robo config,
# replace all 'non-word' characters.
my $pp_name = shift;
$pp_name =~ s/\W/_/gsmx;
return $pp_name;
};

sub _get_evaluation_result {
my ($perl_e, $e, $results) = @_;

use warnings FATAL => 'uninitialized'; # Force an error when operations on
# undefined values are attempted.
##no critic (BuiltinFunctions::ProhibitStringyEval)
my $o = eval $perl_e; # Evaluate Perl expression
##use critic
if ($EVAL_ERROR) {
my $err = $EVAL_ERROR;
croak "Error evaluating expression '$perl_e' derived from '$e': $err";
}

return $o ? 1 : 0;
}

#####
# Evaluates a single expression in the context of available autoqc results.
# If runs successfully, returns 0 or 1, otherwise throws an error.
#
sub _evaluate_expression {
my ($self, $e) = @_;

my ($class_name, $spec) = _class_name_from_expression($e);
my $obj_a = $self->_results->{$class_name};
# We should not get this far with an error in the configuration
# file, but just in case...
$obj_a and @{$obj_a} or croak "No autoqc result for evaluation of '$e'";

if ($spec) {
my $pp_name2spec = sub { # To get a match with what would have been
my $pp_name = shift; # used in the robo config, replace all
$pp_name =~ s/\W/_/gsmx; # 'non-word' characters.
return $pp_name;
};
# Now have to choose one result. If the object is not an instance of
# the generic autoqc result class, there will be an error at this point.
# Making the code less specific is not worth the effort at this point.
my @on_spec = grep { $pp_name2spec->($_->pp_name) eq $spec } @{$obj_a};
@on_spec or croak "No autoqc $class_name result for $spec";
$obj_a = \@on_spec;
}

(@{$obj_a} == 1) or croak "Multiple autoqc results for evaluation of '$e'";
my $obj = $obj_a->[0];

$obj or croak "No autoqc result for evaluation of '$e'";

# Prepare the expression from the robo config for evaluation.
my $placeholder = $class_name;
$spec and ($placeholder .= $CLASS_NAME_SPEC_DELIM . $spec);
my $replacement = q[$] . q[result->];
my @names = _class_names_with_spec_from_expression($e);
my $results = {};
my $perl_e = $e;
$perl_e =~ s/$placeholder[.]/$replacement/xmsg;

my $evaluator = sub { # Evaluation function
my $result = shift;
# Force an error when operations on undefined values are
# are attempted.
use warnings FATAL => 'uninitialized';
##no critic (BuiltinFunctions::ProhibitStringyEval)
my $o = eval $perl_e; # Evaluate Perl string expression
##use critic
if ($EVAL_ERROR) {
my $err = $EVAL_ERROR;
croak "Error evaluating expression '$perl_e' derived from '$e': $err";
# Prepare the expression from the robo config for evaluation.
foreach my $name (@names) {
my ($class_name, $spec) = split /$CLASS_NAME_SPEC_DELIM/smx, $name;
my $obj_a = $self->_results->{$class_name};
# We should not get this far with an error in the configuration
# file, but just in case...
$obj_a and @{$obj_a} or croak "No $class_name autoqc result for evaluation of '$e'";
if ($spec) {
# Now have to choose one result. If the object is not an instance of
# the generic autoqc result class, there will be an error at this point.
# Making the code less specific is not worth the effort at this point.
my @on_spec = grep { _pp_name2spec($_->pp_name) eq $spec } @{$obj_a};
@on_spec or croak "No $name autoqc result for evaluation of '$e'";
$obj_a = \@on_spec;
}
return $o ? 1 : 0;
};
(@{$obj_a} == 1) or croak "Multiple $name autoqc results for evaluation of '$e'";
my $obj = $obj_a->[0];
$obj or croak "No $class_name autoqc result for evaluation of '$e'";
$results->{$name} = $obj;

## no critic (ValuesAndExpressions::RequireInterpolationOfMetachars)
my $replacement = q[$results->{'] . $name . q['}->];
## use critic
$perl_e =~ s/$name[.]/$replacement/xmsg;
}

# Evaluate and return the outcome.
return $evaluator->($obj);
return _get_evaluation_result($perl_e, $e, $results);
}

__PACKAGE__->meta->make_immutable();
Expand Down
19 changes: 13 additions & 6 deletions t/60-autoqc-checks-review.t
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ subtest 'single expression evaluation' => sub {
rpt_list => $rpt_list);

throws_ok {$check->_evaluate_expression('verify_bam.freemix < 0.01')}
qr/No autoqc result for evaluation of/,
qr/No verify_bam autoqc result for evaluation of/,
'error if check name is unknown';
throws_ok {$check->_evaluate_expression('verify_bam_id.freemix_free < 0.01')}
qr/Can't locate object method \"freemix_free\"/,
Expand Down Expand Up @@ -655,7 +655,7 @@ subtest 'evaluating generic for artic results' => sub {
final_qc_outcome => 1);
is ($check->can_run, 1, 'check can run');
throws_ok { $check->execute }
qr/Not able to run evaluation: No autoqc generic result for ncov2019_artic_nf/,
qr/Not able to run evaluation: No generic:ncov2019_artic_nf autoqc result/,
'message as an error';

# qc_in contains two artic generic results for the same product
Expand All @@ -666,7 +666,7 @@ subtest 'evaluating generic for artic results' => sub {
final_qc_outcome => 1);
is ($check->can_run, 1, 'check can run');
throws_ok { $check->execute }
qr/Not able to run evaluation: Multiple autoqc results/,
qr/Not able to run evaluation: Multiple generic:ncov2019_artic_nf autoqc results/,
'message as an error';

# qc_in contains other autoqc results for this entity, including
Expand Down Expand Up @@ -821,7 +821,7 @@ subtest 'evaluating generic for artic results' => sub {
};

subtest 'evaluating for LCMB library type' => sub {
plan tests => 16;
plan tests => 19;

my $test_data_path = 't/data/runfolder_49285';
my $runfolder_name = '240802_A00537_1044_BHJKCGDSXC';
Expand All @@ -843,6 +843,7 @@ subtest 'evaluating for LCMB library type' => sub {
verbose => 0
)->load();

# Sample-level evaluation
my $check = npg_qc::autoqc::checks::review->new(
runfolder_path => $runfolder_path,
conf_path => $test_data_path,
Expand All @@ -858,6 +859,7 @@ subtest 'evaluating for LCMB library type' => sub {
'verify_bam_id.pass'
);
my $result = $check->result();
is ($result->comments(), undef, 'no comments');
is_deeply ($result->evaluation_results(), \%expected_evaluation_results,
'sample evaluation results as expected');
is ($result->pass, 1, 'the check passed');
Expand All @@ -869,6 +871,7 @@ subtest 'evaluating for LCMB library type' => sub {
'sample QC outcome is saved correctly'
);

# Lane-level evaluation
$check = npg_qc::autoqc::checks::review->new(
runfolder_path => $runfolder_path,
conf_path => $test_data_path,
Expand All @@ -878,10 +881,13 @@ subtest 'evaluating for LCMB library type' => sub {
);
lives_ok { $check->execute() } 'lane level check runs OK';
$result = $check->result();
is ($result->comments(), undef, 'no comments');
%expected_evaluation_results = (
'tag_metrics.matches_pf_percent && (tag_metrics.perfect_matches_percent +' .
' tag_metrics.one_mismatch_percent) > 93' => 1,
'tag_metrics.all_reads * 302 > 750000000000' => 1
' tag_metrics.one_mismatch_percent) > 93' => 1,
'tag_metrics.all_reads * 302 > 750000000000' => 1,
'tag_metrics.all_reads && (((qX_yield.yield1_q30 + qX_yield.yield2_q30) ' .
'* 1000 * 100)/(tag_metrics.all_reads * 302) >= 78)' => 1,
);
is_deeply ($result->evaluation_results(), \%expected_evaluation_results,
'lane evaluation results as expected');
Expand All @@ -906,6 +912,7 @@ subtest 'evaluating for LCMB library type' => sub {
$expected_evaluation_results{$key} = 0;
}
$result = $check->result();
is ($result->comments(), undef, 'no comments');
is_deeply ($result->evaluation_results(), \%expected_evaluation_results,
'lane evaluation results as expected');
is ($result->pass, 0, 'the check failed');
Expand Down
1 change: 1 addition & 0 deletions t/data/runfolder_49285/product_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,5 @@ default:
acceptance_criteria:
- "tag_metrics.matches_pf_percent && (tag_metrics.perfect_matches_percent + tag_metrics.one_mismatch_percent) > 93"
- "tag_metrics.all_reads * 302 > 750000000000"
- "tag_metrics.all_reads && (((qX_yield.yield1_q30 + qX_yield.yield2_q30) * 1000 * 100)/(tag_metrics.all_reads * 302) >= 78)"

0 comments on commit 5f39e5b

Please sign in to comment.