From b67bac8e503c8459bcb4e9a786299d7d8359a05b Mon Sep 17 00:00:00 2001
From: Andrew Frantz <andrew.frantz@stjude.org>
Date: Sun, 11 Feb 2024 13:09:40 -0500
Subject: [PATCH] fix: lots of code cleanup

---
 src/derive/command/endedness.rs           |  34 +++--
 src/derive/command/readlen.rs             |   2 +-
 src/derive/endedness.rs                   |   1 +
 src/derive/endedness/compute.rs           | 160 +++-------------------
 src/derive/endedness/results.rs           | 119 ++++++++++++++++
 src/derive/junction_annotation/compute.rs |  57 ++++----
 src/derive/junction_annotation/results.rs |   1 +
 src/derive/readlen/compute.rs             |  25 ++--
 src/derive/strandedness/compute.rs        |  71 +++++-----
 src/utils/read_groups.rs                  |   4 +-
 10 files changed, 238 insertions(+), 236 deletions(-)
 create mode 100644 src/derive/endedness/results.rs

diff --git a/src/derive/command/endedness.rs b/src/derive/command/endedness.rs
index 7532e06..a35b390 100644
--- a/src/derive/command/endedness.rs
+++ b/src/derive/command/endedness.rs
@@ -44,7 +44,7 @@ pub struct DeriveEndednessArgs {
     calc_rpt: bool,
 
     /// Round RPT to the nearest INT before comparing to expected values.
-    /// Appropriate if using `-n` > 0.
+    /// Appropriate if using `-n` > 0. Unrounded value is reported in output.
     #[arg(long, default_value = "false")]
     round_rpt: bool,
 }
@@ -108,18 +108,26 @@ pub fn derive(args: DeriveEndednessArgs) -> anyhow::Result<()> {
             }
         }
 
-        if !record.flags().is_segmented() {
-            ordering_flags.entry(read_group).or_default().unsegmented += 1;
-        } else if record.flags().is_first_segment() && !record.flags().is_last_segment() {
-            ordering_flags.entry(read_group).or_default().first += 1;
-        } else if !record.flags().is_first_segment() && record.flags().is_last_segment() {
-            ordering_flags.entry(read_group).or_default().last += 1;
-        } else if record.flags().is_first_segment() && record.flags().is_last_segment() {
-            ordering_flags.entry(read_group).or_default().both += 1;
-        } else if !record.flags().is_first_segment() && !record.flags().is_last_segment() {
-            ordering_flags.entry(read_group).or_default().neither += 1;
-        } else {
-            unreachable!();
+        match (
+            record.flags().is_segmented(),
+            record.flags().is_first_segment(),
+            record.flags().is_last_segment(),
+        ) {
+            (false, _, _) => {
+                ordering_flags.entry(read_group).or_default().unsegmented += 1;
+            }
+            (true, true, false) => {
+                ordering_flags.entry(read_group).or_default().first += 1;
+            }
+            (true, false, true) => {
+                ordering_flags.entry(read_group).or_default().last += 1;
+            }
+            (true, true, true) => {
+                ordering_flags.entry(read_group).or_default().both += 1;
+            }
+            (true, false, false) => {
+                ordering_flags.entry(read_group).or_default().neither += 1;
+            }
         }
 
         counter.inc();
diff --git a/src/derive/command/readlen.rs b/src/derive/command/readlen.rs
index ccaa1a6..e6b0c51 100644
--- a/src/derive/command/readlen.rs
+++ b/src/derive/command/readlen.rs
@@ -55,7 +55,7 @@ pub fn derive(args: DeriveReadlenArgs) -> anyhow::Result<()> {
         let record = result?;
         let len = record.sequence().len();
 
-        read_lengths.entry(len).and_modify(|e| *e += 1).or_insert(1);
+        *read_lengths.entry(len).or_default() += 1;
 
         counter.inc();
         if counter.time_to_break(&num_records) {
diff --git a/src/derive/endedness.rs b/src/derive/endedness.rs
index bf321e0..6f8d5cc 100644
--- a/src/derive/endedness.rs
+++ b/src/derive/endedness.rs
@@ -1,3 +1,4 @@
 //! Supporting functionality for the `ngs derive endedness` subcommand.
 
 pub mod compute;
+pub mod results;
diff --git a/src/derive/endedness/compute.rs b/src/derive/endedness/compute.rs
index 2b63d26..265fc2d 100644
--- a/src/derive/endedness/compute.rs
+++ b/src/derive/endedness/compute.rs
@@ -1,16 +1,16 @@
 //! Module holding the logic for computing the endedness of a BAM.
 
-use serde::Serialize;
 use std::collections::HashMap;
 use std::collections::HashSet;
 use std::ops::{Add, AddAssign};
 use std::sync::Arc;
 use tracing::warn;
 
+use crate::derive::endedness::results;
 use crate::utils::read_groups::ReadGroupPtr;
 
 /// Struct holding the ordering flags for a single read group.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Default)]
 pub struct OrderingFlagsCounts {
     /// The number of reads without 0x1 set.
     pub unsegmented: usize,
@@ -40,12 +40,6 @@ impl OrderingFlagsCounts {
     }
 }
 
-impl Default for OrderingFlagsCounts {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
 impl Add for OrderingFlagsCounts {
     type Output = Self;
 
@@ -70,120 +64,6 @@ impl AddAssign for OrderingFlagsCounts {
     }
 }
 
-/// Struct holding the per read group results for an `ngs derive endedness`
-/// subcommand call.
-#[derive(Debug, Serialize)]
-pub struct ReadGroupDerivedEndednessResult {
-    /// Name of the read group.
-    pub read_group: String,
-
-    /// Whether or not an endedness was determined for this read group.
-    pub succeeded: bool,
-
-    /// The endedness of this read group or "Unknown".
-    pub endedness: String,
-
-    /// The number of reads without 0x1 set.
-    pub unsegmented: usize,
-
-    /// The f+l- read count.
-    pub first: usize,
-
-    /// The f-l+ read count.
-    pub last: usize,
-
-    /// The f+l+ read count.
-    pub both: usize,
-
-    /// The f-l- read count.
-    pub neither: usize,
-
-    /// The reads per template (RPT).
-    /// Only available if `args.calc_rpt` is true.
-    pub rpt: Option<f64>,
-}
-
-impl ReadGroupDerivedEndednessResult {
-    /// Creates a new [`ReadGroupDerivedEndednessResult`].
-    fn new(
-        read_group: String,
-        succeeded: bool,
-        endedness: String,
-        counts: OrderingFlagsCounts,
-        rpt: Option<f64>,
-    ) -> Self {
-        ReadGroupDerivedEndednessResult {
-            read_group,
-            succeeded,
-            endedness,
-            unsegmented: counts.unsegmented,
-            first: counts.first,
-            last: counts.last,
-            both: counts.both,
-            neither: counts.neither,
-            rpt,
-        }
-    }
-}
-
-/// Struct holding the final results for an `ngs derive endedness` subcommand
-/// call.
-#[derive(Debug, Serialize)]
-pub struct DerivedEndednessResult {
-    /// Whether or not the `ngs derive endedness` subcommand succeeded.
-    pub succeeded: bool,
-
-    /// The overall endedness of the file or "Unknown".
-    pub endedness: String,
-
-    /// The number of reads without 0x1 set.
-    pub unsegmented: usize,
-
-    /// The overall f+l- read count.
-    pub first: usize,
-
-    /// The overall f-l+ read count.
-    pub last: usize,
-
-    /// The overall f+l+ read count.
-    pub both: usize,
-
-    /// The overall f-l- read count.
-    pub neither: usize,
-
-    /// The overall reads per template (RPT).
-    /// Only available if `args.calc_rpt` is true.
-    pub rpt: Option<f64>,
-
-    /// Vector of [`ReadGroupDerivedEndednessResult`]s.
-    /// One for each read group in the BAM,
-    /// and potentially one for any reads with an unknown read group.
-    pub read_groups: Vec<ReadGroupDerivedEndednessResult>,
-}
-
-impl DerivedEndednessResult {
-    /// Creates a new [`DerivedEndednessResult`].
-    pub fn new(
-        succeeded: bool,
-        endedness: String,
-        counts: OrderingFlagsCounts,
-        rpt: Option<f64>,
-        read_groups: Vec<ReadGroupDerivedEndednessResult>,
-    ) -> Self {
-        DerivedEndednessResult {
-            succeeded,
-            endedness,
-            unsegmented: counts.unsegmented,
-            first: counts.first,
-            last: counts.last,
-            both: counts.both,
-            neither: counts.neither,
-            rpt,
-            read_groups,
-        }
-    }
-}
-
 /// Calculate the reads per template overall and for each read group.
 fn calculate_reads_per_template(
     read_names: HashMap<String, Vec<ReadGroupPtr>>,
@@ -204,17 +84,16 @@ fn calculate_reads_per_template(
         let read_group_set: HashSet<ReadGroupPtr> = read_groups.iter().cloned().collect();
 
         if read_group_set.len() == 1 {
+            // All found read groups assigned to this QNAME are the same.
+            // We assume this means all the reads came from the same template.
             let read_group = Arc::clone(read_group_set.iter().next().unwrap());
 
-            read_group_reads
-                .entry(Arc::clone(&read_group))
-                .and_modify(|e| *e += num_reads)
-                .or_insert(num_reads);
-            read_group_templates
-                .entry(read_group)
-                .and_modify(|e| *e += 1)
-                .or_insert(1);
+            *read_group_reads.entry(Arc::clone(&read_group)).or_default() += num_reads;
+            *read_group_templates.entry(read_group).or_default() += 1;
         } else {
+            // The QNAME is in multiple read groups.
+            // We assume this means the reads came from multiple templates.
+            // More specifically, we assume that exactly one template will originate from each read group.
             warning_count += 1;
             match warning_count {
                 1..=100 => {
@@ -230,16 +109,10 @@ fn calculate_reads_per_template(
             }
 
             for read_group in read_groups {
-                read_group_reads
-                    .entry(Arc::clone(read_group))
-                    .and_modify(|e| *e += 1)
-                    .or_insert(1);
+                *read_group_reads.entry(Arc::clone(read_group)).or_default() += 1;
             }
             for read_group in read_group_set {
-                read_group_templates
-                    .entry(read_group)
-                    .and_modify(|e| *e += 1)
-                    .or_insert(1);
+                *read_group_templates.entry(read_group).or_default() += 1;
             }
         }
     }
@@ -268,7 +141,7 @@ fn predict_endedness(
     paired_deviance: f64,
     reads_per_template: Option<f64>,
     round_rpt: bool,
-) -> ReadGroupDerivedEndednessResult {
+) -> results::ReadGroupDerivedEndednessResult {
     let unsegmented = rg_ordering_flags.unsegmented;
     let first = rg_ordering_flags.first;
     let last = rg_ordering_flags.last;
@@ -282,7 +155,7 @@ fn predict_endedness(
             "No reads were detected in this read group: {}",
             read_group_name
         );
-        return ReadGroupDerivedEndednessResult::new(
+        return results::ReadGroupDerivedEndednessResult::new(
             read_group_name,
             false,
             "Unknown".to_string(),
@@ -291,7 +164,7 @@ fn predict_endedness(
         );
     }
 
-    let mut result = ReadGroupDerivedEndednessResult::new(
+    let mut result = results::ReadGroupDerivedEndednessResult::new(
         read_group_name,
         false,
         "Unknown".to_string(),
@@ -331,6 +204,7 @@ fn predict_endedness(
     }
     // only both present
     if first == 0 && last == 0 && both > 0 && neither == 0 {
+        // Prior logic (before addition of unsegmented checks) left as comment for posterity
         // match reads_per_template {
         //     Some(rpt) => {
         //         if rpt == 1.0 || (round_rpt && rpt.round() as usize == 1) {
@@ -389,7 +263,7 @@ pub fn predict(
     read_names: HashMap<String, Vec<ReadGroupPtr>>,
     paired_deviance: f64,
     round_rpt: bool,
-) -> DerivedEndednessResult {
+) -> results::DerivedEndednessResult {
     let mut rg_rpts: HashMap<ReadGroupPtr, f64> = HashMap::new();
     let mut overall_rpt: Option<f64> = None;
     if !read_names.is_empty() {
@@ -420,7 +294,7 @@ pub fn predict(
         round_rpt,
     );
 
-    DerivedEndednessResult::new(
+    results::DerivedEndednessResult::new(
         overall_result.succeeded,
         overall_result.endedness,
         overall_flags,
diff --git a/src/derive/endedness/results.rs b/src/derive/endedness/results.rs
new file mode 100644
index 0000000..aef11c9
--- /dev/null
+++ b/src/derive/endedness/results.rs
@@ -0,0 +1,119 @@
+//! Module holding the results structs for the `ngs derive endedness` subcommand.
+
+use serde::Serialize;
+
+use crate::derive::endedness::compute::OrderingFlagsCounts;
+
+/// Struct holding the per read group results for an `ngs derive endedness`
+/// subcommand call.
+#[derive(Debug, Serialize)]
+pub struct ReadGroupDerivedEndednessResult {
+    /// Name of the read group.
+    pub read_group: String,
+
+    /// Whether or not an endedness was determined for this read group.
+    pub succeeded: bool,
+
+    /// The endedness of this read group or "Unknown".
+    pub endedness: String,
+
+    /// The number of reads without 0x1 set.
+    pub unsegmented: usize,
+
+    /// The f+l- read count.
+    pub first: usize,
+
+    /// The f-l+ read count.
+    pub last: usize,
+
+    /// The f+l+ read count.
+    pub both: usize,
+
+    /// The f-l- read count.
+    pub neither: usize,
+
+    /// The reads per template (RPT).
+    /// Only available if `args.calc_rpt` is true.
+    pub rpt: Option<f64>,
+}
+
+impl ReadGroupDerivedEndednessResult {
+    /// Creates a new [`ReadGroupDerivedEndednessResult`].
+    pub fn new(
+        read_group: String,
+        succeeded: bool,
+        endedness: String,
+        counts: OrderingFlagsCounts,
+        rpt: Option<f64>,
+    ) -> Self {
+        ReadGroupDerivedEndednessResult {
+            read_group,
+            succeeded,
+            endedness,
+            unsegmented: counts.unsegmented,
+            first: counts.first,
+            last: counts.last,
+            both: counts.both,
+            neither: counts.neither,
+            rpt,
+        }
+    }
+}
+
+/// Struct holding the final results for an `ngs derive endedness` subcommand
+/// call.
+#[derive(Debug, Serialize)]
+pub struct DerivedEndednessResult {
+    /// Whether or not the `ngs derive endedness` subcommand succeeded.
+    pub succeeded: bool,
+
+    /// The overall endedness of the file or "Unknown".
+    pub endedness: String,
+
+    /// The number of reads without 0x1 set.
+    pub unsegmented: usize,
+
+    /// The overall f+l- read count.
+    pub first: usize,
+
+    /// The overall f-l+ read count.
+    pub last: usize,
+
+    /// The overall f+l+ read count.
+    pub both: usize,
+
+    /// The overall f-l- read count.
+    pub neither: usize,
+
+    /// The overall reads per template (RPT).
+    /// Only available if `args.calc_rpt` is true.
+    pub rpt: Option<f64>,
+
+    /// Vector of [`ReadGroupDerivedEndednessResult`]s.
+    /// One for each read group in the BAM,
+    /// and potentially one for any reads with an unknown read group.
+    pub read_groups: Vec<ReadGroupDerivedEndednessResult>,
+}
+
+impl DerivedEndednessResult {
+    /// Creates a new [`DerivedEndednessResult`].
+    pub fn new(
+        succeeded: bool,
+        endedness: String,
+        counts: OrderingFlagsCounts,
+        rpt: Option<f64>,
+        read_groups: Vec<ReadGroupDerivedEndednessResult>,
+    ) -> Self {
+        DerivedEndednessResult {
+            succeeded,
+            endedness,
+            unsegmented: counts.unsegmented,
+            first: counts.first,
+            last: counts.last,
+            both: counts.both,
+            neither: counts.neither,
+            rpt,
+            read_groups,
+        }
+    }
+}
diff --git a/src/derive/junction_annotation/compute.rs b/src/derive/junction_annotation/compute.rs
index 3a67b9b..069a5dc 100644
--- a/src/derive/junction_annotation/compute.rs
+++ b/src/derive/junction_annotation/compute.rs
@@ -142,7 +142,7 @@ pub fn process(
     }
 
     // (7) Calculate the start position of this read. This will
-    // later be used to find the position of any introns.
+    // be used to find the position of any introns.
     let start = match record.alignment_start() {
         Some(s) => s,
         _ => bail!("Could not parse record's start position."),
@@ -155,6 +155,7 @@ pub fn process(
             // This is an intron.
             Kind::Skip => {
                 // Check that `op.len() >= params.min_intron_length` later,
+                // once all reads supporting short junctions have been collected
                 // for better metric reporting.
 
                 let intron_start = cur_pos;
@@ -175,14 +176,10 @@ pub fn process(
                     continue;
                 }
 
-                let exon_starts = match exons.starts.get(seq_name) {
-                    Some(starts) => starts,
-                    _ => bail!("Could not find exon starts for contig: {}", seq_name),
-                };
-                let exon_ends = match exons.ends.get(seq_name) {
-                    Some(ends) => ends,
-                    _ => bail!("Could not find exon ends for contig: {}", seq_name),
-                };
+                // The following unwraps are safe because we checked that the reference
+                // sequence is annotated above.
+                let exon_starts = exons.starts.get(seq_name).unwrap();
+                let exon_ends = exons.ends.get(seq_name).unwrap();
 
                 let mut intron_start_known = false;
                 let mut intron_end_known = false;
@@ -299,27 +296,29 @@ pub fn summarize(
     );
 
     // Tally up observed junctions and spliced reads.
-    let mut juncs;
-    let mut support;
-    (juncs, support) = tally_junctions_and_support(&results.junction_annotations.known);
-    results.summary.known_junctions = juncs;
-    results.summary.known_junctions_read_support = support;
-    (juncs, support) = tally_junctions_and_support(&results.junction_annotations.partial_novel);
-    results.summary.partial_novel_junctions = juncs;
-    results.summary.partial_novel_junctions_read_support = support;
-    (juncs, support) = tally_junctions_and_support(&results.junction_annotations.complete_novel);
-    results.summary.complete_novel_junctions = juncs;
-    results.summary.complete_novel_junctions_read_support = support;
-    (juncs, support) =
-        tally_junctions_and_support(&results.junction_annotations.unannotated_reference);
-    results.summary.unannotated_reference_junctions = juncs;
-    results.summary.unannotated_reference_junctions_read_support = support;
-
-    // Tally up total junctions and spliced reads.
+    (
+        results.summary.known_junctions,
+        results.summary.known_junctions_read_support,
+    ) = tally_junctions_and_support(&results.junction_annotations.known);
+    (
+        results.summary.partial_novel_junctions,
+        results.summary.partial_novel_junctions_read_support,
+    ) = tally_junctions_and_support(&results.junction_annotations.partial_novel);
+    (
+        results.summary.complete_novel_junctions,
+        results.summary.complete_novel_junctions_read_support,
+    ) = tally_junctions_and_support(&results.junction_annotations.complete_novel);
+    (
+        results.summary.unannotated_reference_junctions,
+        results.summary.unannotated_reference_junctions_read_support,
+    ) = tally_junctions_and_support(&results.junction_annotations.unannotated_reference);
+
+    // Tally up total junctions.
     results.summary.total_junctions = results.summary.known_junctions
         + results.summary.partial_novel_junctions
         + results.summary.complete_novel_junctions
         + results.summary.unannotated_reference_junctions;
+    // Tally up total read support.
     results.summary.total_junctions_read_support = results.summary.known_junctions_read_support
         + results.summary.partial_novel_junctions_read_support
         + results.summary.complete_novel_junctions_read_support
@@ -327,7 +326,7 @@ pub fn summarize(
 
     // Calculate percentages.
     let total_junctions = results.summary.total_junctions as f64
-        - results.summary.unannotated_reference_junctions as f64;
+        - results.summary.unannotated_reference_junctions as f64; // exclude unannotated junctions from percentages
     results.summary.known_junctions_percent =
         results.summary.known_junctions as f64 / total_junctions * 100.0;
     results.summary.partial_novel_junctions_percent =
@@ -336,15 +335,19 @@ pub fn summarize(
         results.summary.complete_novel_junctions as f64 / total_junctions * 100.0;
 
     // Calculate average read support.
+    // Total
     results.summary.average_junction_read_support = results.summary.total_junctions_read_support
         as f64
         / results.summary.total_junctions as f64;
+    // Known
     results.summary.average_known_junction_read_support =
         results.summary.known_junctions_read_support as f64
             / results.summary.known_junctions as f64;
+    // Partial Novel
     results.summary.average_partial_novel_junction_read_support =
         results.summary.partial_novel_junctions_read_support as f64
             / results.summary.partial_novel_junctions as f64;
+    // Complete Novel
     results.summary.average_complete_novel_junction_read_support =
         results.summary.complete_novel_junctions_read_support as f64
             / results.summary.complete_novel_junctions as f64;
diff --git a/src/derive/junction_annotation/results.rs b/src/derive/junction_annotation/results.rs
index ced00da..29daf20 100644
--- a/src/derive/junction_annotation/results.rs
+++ b/src/derive/junction_annotation/results.rs
@@ -41,6 +41,7 @@ pub struct JunctionAnnotations {
     pub unannotated_reference: JunctionsMap,
 }
 
+// TODO should contigs be sorted?
 impl Serialize for JunctionAnnotations {
     fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
         let mut known = Vec::new();
diff --git a/src/derive/readlen/compute.rs b/src/derive/readlen/compute.rs
index 28b6f97..2867dbb 100644
--- a/src/derive/readlen/compute.rs
+++ b/src/derive/readlen/compute.rs
@@ -17,8 +17,8 @@ pub struct DerivedReadlenResult {
     /// The majority vote percentage of the consensus read length.
     pub majority_pct_detected: f64,
 
-    /// Status of the evidence that supports (or does not support) this
-    /// read length.
+    /// Status of the evidence that supports (or does not support) the
+    /// consensus read length.
     pub evidence: Vec<(usize, usize)>,
 }
 
@@ -61,15 +61,20 @@ pub fn predict(
     let consensus_read_length = max_read_length;
     let majority_detected = max_count as f64 / num_samples as f64;
 
-    let mut result =
-        DerivedReadlenResult::new(false, None, majority_detected * 100.0, read_lengths);
-
-    if majority_detected >= majority_vote_cutoff {
-        result.succeeded = true;
-        result.consensus_read_length = Some(consensus_read_length);
+    match majority_detected >= majority_vote_cutoff {
+        true => anyhow::Ok(DerivedReadlenResult::new(
+            true,
+            Some(consensus_read_length),
+            majority_detected * 100.0,
+            read_lengths,
+        )),
+        false => anyhow::Ok(DerivedReadlenResult::new(
+            false,
+            None,
+            majority_detected * 100.0,
+            read_lengths,
+        )),
     }
-
-    anyhow::Ok(result)
 }
 
 #[cfg(test)]
diff --git a/src/derive/strandedness/compute.rs b/src/derive/strandedness/compute.rs
index a880467..f2cb477 100644
--- a/src/derive/strandedness/compute.rs
+++ b/src/derive/strandedness/compute.rs
@@ -1,22 +1,16 @@
 //! Module holding the logic for computing the strandedness.
 
-use noodles::bam;
-use noodles::core::Region;
-use noodles::gff;
-use noodles::sam;
-use noodles::sam::record::data::field::Tag;
 use noodles::sam::record::MappingQuality;
+use noodles::{bam, core::Region, gff, sam};
 use rand::Rng;
 use rust_lapper::Lapper;
-use std::collections::HashMap;
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 use std::ops::{Add, AddAssign};
 use std::sync::Arc;
 
 use crate::derive::strandedness::results;
-use crate::utils::alignment::filter_by_mapq;
-use crate::utils::display::RecordCounter;
-use crate::utils::read_groups::UNKNOWN_READ_GROUP;
+use crate::utils::read_groups;
+use crate::utils::{alignment::filter_by_mapq, display::RecordCounter};
 
 const STRANDED_THRESHOLD: f64 = 80.0;
 const UNSTRANDED_THRESHOLD: f64 = 40.0;
@@ -49,7 +43,7 @@ impl AddAssign for Counts {
     }
 }
 
-/// Struct for possible (valid) strand orientations.
+/// Struct for valid strand orientations.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum Strand {
     /// Forward strand.
@@ -84,7 +78,10 @@ impl TryFrom<gff::record::Strand> for Strand {
 /// Struct for tracking the order of segments in a record.
 #[derive(Clone, Copy, Debug)]
 enum SegmentOrder {
+    /// The first segment in a record.
     First,
+
+    /// The last segment in a record.
     Last,
 }
 
@@ -105,6 +102,8 @@ impl TryFrom<sam::record::Flags> for SegmentOrder {
 }
 
 /// Struct holding the parsed BAM file and its index.
+/// TODO this code is repeated. Should be in a common module.
+/// Will be moved to utils::formats in a future PR.
 pub struct ParsedBAMFile {
     /// The BAM reader.
     pub reader: bam::Reader<noodles::bgzf::Reader<std::fs::File>>,
@@ -131,7 +130,7 @@ pub struct StrandednessParams {
     /// The number of genes to test for strandedness.
     pub num_genes: usize,
 
-    /// The maximum number of iterations to try before giving up.
+    /// The maximum number of genes to try before giving up.
     pub max_genes_per_try: usize,
 
     /// Minimum number of reads mapped to a gene to be considered
@@ -139,7 +138,7 @@ pub struct StrandednessParams {
     pub min_reads_per_gene: usize,
 
     /// Minumum mapping quality for a record to be considered.
-    /// `None` means no filtering by MAPQ. This also allows
+    /// `None` means no filtering by MAPQ. This allows
     /// for records _without_ a MAPQ to be counted.
     pub min_mapq: Option<MappingQuality>,
 
@@ -156,22 +155,23 @@ pub struct StrandednessParams {
     pub count_duplicates: bool,
 }
 
-/// Function to disqualify a gene based on its strand and exons.
+/// Function to disqualify a gene based on its strand and its exons' strand.
 fn disqualify_gene(gene: &gff::Record, exons: &HashMap<&str, Lapper<usize, Strand>>) -> bool {
     // gene_strand guaranteed to be Forward or Reverse by initialization code.
     let gene_strand = Strand::try_from(gene.strand()).unwrap();
     let mut all_on_same_strand = true;
-    let mut at_least_one_exon = false;
-
-    if let Some(intervals) = exons.get(gene.reference_sequence_name()) {
-        for exon in intervals.find(gene.start().into(), gene.end().into()) {
-            at_least_one_exon = true;
-            if exon.val != gene_strand {
-                all_on_same_strand = false;
-                break;
-            }
-        }
-    }
+
+    let at_least_one_exon = match exons.get(gene.reference_sequence_name()) {
+        Some(intervals) => intervals
+            .find(gene.start().into(), gene.end().into())
+            .any(|exon| {
+                if exon.val != gene_strand {
+                    all_on_same_strand = false;
+                }
+                true
+            }),
+        None => false,
+    };
 
     if all_on_same_strand && at_least_one_exon {
         return false;
@@ -242,16 +242,7 @@ fn classify_read(
     all_counts: &mut AllReadGroupsCounts,
     read_metrics: &mut results::ReadRecordMetrics,
 ) {
-    let read_group = match read.data().get(Tag::ReadGroup) {
-        Some(rg) => {
-            let rg = rg.to_string();
-            if !all_counts.found_rgs.contains(&rg) {
-                all_counts.found_rgs.insert(Arc::new(rg.clone()));
-            }
-            Arc::clone(all_counts.found_rgs.get(&rg).unwrap())
-        }
-        None => Arc::clone(&UNKNOWN_READ_GROUP),
-    };
+    let read_group = read_groups::get_read_group(read, Some(&mut all_counts.found_rgs));
 
     let rg_counts = all_counts.counts.entry(read_group).or_default();
 
@@ -324,7 +315,7 @@ pub fn predict_strandedness(
     {
         result.succeeded = true;
         result.strandedness = "Unstranded".to_string();
-    }
+    } // else Inconclusive
 
     result
 }
@@ -342,7 +333,7 @@ pub fn predict(
 ) -> Result<results::DerivedStrandednessResult, anyhow::Error> {
     let mut rng = rand::thread_rng();
     let mut num_genes_considered = 0; // Local to this attempt
-    let mut counter = RecordCounter::new(Some(1_000));
+    let mut counter = RecordCounter::new(Some(1_000)); // Also local to this attempt
     let genes_remaining = gene_records.len();
 
     let max_iters = if params.max_genes_per_try > genes_remaining {
@@ -366,12 +357,13 @@ pub fn predict(
         }
 
         let cur_gene = gene_records.swap_remove(rng.gen_range(0..gene_records.len()));
-        counter.inc();
+        counter.inc(); // Technically this is off-by-one, as the gene hasn't been processed yet.
 
         if disqualify_gene(&cur_gene, exons) {
             metrics.genes.mixed_strands += 1; // Tracked across attempts
             continue;
         }
+        // gene_strand guaranteed to be Forward or Reverse by initialization code.
         let cur_gene_strand = Strand::try_from(cur_gene.strand()).unwrap();
 
         let mut enough_reads = false;
@@ -388,7 +380,7 @@ pub fn predict(
     }
     if num_genes_considered < params.num_genes {
         tracing::warn!(
-            "Evaluated the maximum number of genes ({}) before considering the requested amount of genes ({}) for this try. Only considering {} genes.",
+            "Evaluated the maximum number of genes ({}) before considering the requested amount of genes ({}) for this try. Only considering an additional {} genes this try.",
             max_iters,
             params.num_genes,
             num_genes_considered,
@@ -423,6 +415,7 @@ pub fn predict(
 #[cfg(test)]
 mod tests {
     use super::*;
+    use noodles::sam::record::data::field::Tag;
     use rust_lapper::Interval;
 
     #[test]
diff --git a/src/utils/read_groups.rs b/src/utils/read_groups.rs
index 7cd0b57..86caf5d 100644
--- a/src/utils/read_groups.rs
+++ b/src/utils/read_groups.rs
@@ -11,10 +11,8 @@ use tracing::warn;
 /// Type alias for a read group pointer.
 pub type ReadGroupPtr = Arc<String>;
 
-// Strings used to index into the HashMaps used to store the Read Group ordering flags.
-// Lazy statics are used to save memory.
 lazy_static! {
-    /// String used to index into th HashMaps used to store the "unknown_read_group" ordering flags.
+    /// String used to represent an unknown read group. Wrapped in an Arc to prevent redundant memory usage.
     pub static ref UNKNOWN_READ_GROUP: ReadGroupPtr = Arc::new(String::from("unknown_read_group"));
 }