Skip to content

Commit

Permalink
Merge branch 'DOR-707_fix_trimmed_summary' into 'master'
Browse files Browse the repository at this point in the history
DOR-707 Fix summary output when data is trimmed with no move table

Closes DOR-707

See merge request machine-learning/dorado!1003
  • Loading branch information
kdolan1973 committed May 16, 2024
2 parents bd2f026 + 67090cb commit 70ff95d
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 12 deletions.
31 changes: 21 additions & 10 deletions dorado/demux/Trimmer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,17 +142,24 @@ BamPtr Trimmer::trim_sequence(bam1_t* input_record, std::pair<int, int> trim_int
auto trimmed_seq = utils::trim_sequence(seq, trim_interval);
auto trimmed_qual = utils::trim_quality(qual, trim_interval);
auto [positions_trimmed, trimmed_moves] = utils::trim_move_table(move_vals, trim_interval);
if (ts >= 0) {
ts += positions_trimmed * stride;
}
if (ns >= 0) {
// After sequence trimming, the number of samples corresponding to the sequence is the size of
// the new move table * stride. However, the ns tag includes the number of samples trimmed from the
// front of the read as well.
// |---------------------- ns ------------------|
// |----ts----|--------moves signal-------------|
ns = int(trimmed_moves.size() * stride) + ts;

if (move_vals.empty()) {
ns = -1;
ts = -1;
} else {
if (ts >= 0) {
ts += positions_trimmed * stride;
}
if (ns >= 0) {
// After sequence trimming, the number of samples corresponding to the sequence is the size of
// the new move table * stride. However, the ns tag includes the number of samples trimmed from
// the front of the read as well. If ts is negative, the tag is not present, so treat it as 0.
// |---------------------- ns ------------------|
// |----ts----|--------moves signal-------------|
ns = int(trimmed_moves.size() * stride) + std::max(0, ts);
}
}

auto [trimmed_modbase_str, trimmed_modbase_probs] = utils::trim_modbase_info(
is_seq_reversed ? utils::reverse_complement(seq) : seq, modbase_str, modbase_probs,
is_seq_reversed ? reverse_complement_interval(trim_interval, int(seq.length()))
Expand Down Expand Up @@ -183,9 +190,13 @@ BamPtr Trimmer::trim_sequence(bam1_t* input_record, std::pair<int, int> trim_int

if (ts >= 0) {
bam_aux_update_int(out_record, "ts", ts);
} else if (bam_aux_get(out_record, "ts")) {
bam_aux_del(out_record, bam_aux_get(out_record, "ts"));
}
if (ns >= 0) {
bam_aux_update_int(out_record, "ns", ns);
} else if (bam_aux_get(out_record, "ns")) {
bam_aux_del(out_record, bam_aux_get(out_record, "ns"));
}

return output;
Expand Down
9 changes: 7 additions & 2 deletions dorado/summary/summary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,13 @@ bool SummaryData::write_rows_from_reader(
barcode = "unclassified";
}

float sample_rate = num_samples / duration;
float template_duration = (num_samples - trim_samples) / sample_rate;
float template_duration = duration;
if (num_samples > 0 && duration > 0) {
// If either num_samples or duration are 0 (due to missing tags), then
// we can't properly compute template_duration.
float sample_rate = num_samples / duration;
template_duration = (num_samples - trim_samples) / sample_rate;
}
auto start_time = 0.0;
auto exp_start_time_iter = read_group_exp_start_time.find(rg_value);
if (exp_start_time_iter != read_group_exp_start_time.end()) {
Expand Down

0 comments on commit 70ff95d

Please sign in to comment.