Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changing adapter matching so that self-overlapping patterns are only matched once per read #71

Merged
merged 2 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/StreamReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,10 @@ StreamReader::process_sequence_base_from_buffer(FastqStats &stats) {
if (do_adapter_optimized && (num_bases_after_n == adapter_size)) {
cur_kmer &= adapter_mask;
for (size_t i = 0; i != num_adapters; ++i) {
if (cur_kmer == adapters[i]) {
if (cur_kmer == adapters[i] && !adapters_found[i]) {
++stats.pos_adapter_count[
(read_pos << Constants::bit_shift_adapter) | i];
adapters_found[i] = true;
}
}
}
Expand Down Expand Up @@ -436,6 +437,7 @@ StreamReader::read_sequence_line(FastqStats &stats) {
still_in_buffer = true;
next_truncation = 100;
do_kmer_read = (stats.num_reads == next_kmer_read);
adapters_found.reset();

if (do_adapters_slow) {
const string seq_line_str = cur_char;
Expand Down
4 changes: 3 additions & 1 deletion src/StreamReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <string>
#include <cmath>
#include <bitset>

// Optional zlib usage
#include <zlib.h>
Expand Down Expand Up @@ -81,6 +82,7 @@ class StreamReader{
const size_t adapter_size;
const size_t adapter_mask;
const std::array<size_t, Constants::max_adapters> adapters;
std::bitset<Constants::max_adapters> adapters_found{};

const std::string filename;

Expand Down Expand Up @@ -250,7 +252,7 @@ class BamReader : public StreamReader {
bool read_entry(FastqStats &stats, size_t &num_bytes_read);

// Specially made for BamReader to work directly with bam1_t
inline void read_sequence_line(FastqStats &stats);
inline void read_sequence_line(FastqStats &stats);
inline void read_quality_line(FastqStats &stats); // parse quality
inline void put_base_in_buffer(const size_t pos); // puts base in buffer or leftover
~BamReader();
Expand Down
38 changes: 19 additions & 19 deletions test/md5sum.txt
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
9641ae08f984bde102d4292bcad56484 test_output/SRR1853178_1/fastqc_data.txt
5c4e6118d438b1f01cf620120f26c622 test_output/SRR1853178_1/fastqc_data.txt
36df1dcab539ba4ef885239fc8524636 test_output/SRR1853178_1/summary.txt
c5b1d892705ee40353a5b88df5fc3a74 test_output/SRR3897196_2/fastqc_data.txt
8dfecfc49e5b450152be12c63177054f test_output/SRR3897196_2/fastqc_data.txt
80cd130958bcb2219f1e5a07d06a3b6e test_output/SRR3897196_2/summary.txt
d60663e26511968865b3ed92a864fda8 test_output/SRR9624732_1/fastqc_data.txt
5df2ae98f4389616df1fa90fe46ce463 test_output/SRR9624732_1/fastqc_data.txt
a525d455b17eb6ba84cd2a60d281a2b0 test_output/SRR9624732_1/summary.txt
053bb7a28541ac110116086ac6ea0cac test_output/SRR10143153_2/fastqc_data.txt
6e5eaadf209c3f77ab61ed61a034f49e test_output/SRR10143153_2/fastqc_data.txt
19f1811f324e4c44154f659bb6e22806 test_output/SRR10143153_2/summary.txt
71e3964a9a610ebcb3207ea2a391b4be test_output/SRR1772703.lite.1/fastqc_data.txt
6403659cb0295ec05db9891e3e5e4f76 test_output/SRR1772703.lite.1/summary.txt
8d28c215efc4f2930a002c240fb14448 test_output/SRR9624732_2/fastqc_data.txt
427099afa91d877f078f1e7989fe4c39 test_output/SRR1772703.lite.1/fastqc_data.txt
ad5727295e7c8de6eb6874837bf1518f test_output/SRR1772703.lite.1/summary.txt
04f7bb98120971c0ba1d648fda893a7c test_output/SRR9624732_2/fastqc_data.txt
fefc5d746f853c14b5e00421ad1ec260 test_output/SRR9624732_2/summary.txt
3942d27ab09f2db7f58a2ed4c904ac8a test_output/SRR10124060/fastqc_data.txt
1e228b1bb498eec2ca81ec2cc657a02d test_output/SRR10124060/summary.txt
7c5f40be6a37ac4d99c39400b1f7ddb6 test_output/SRR891268_2/fastqc_data.txt
a7827fe2115ef6ac48cd61f2065e58e5 test_output/SRR10124060/fastqc_data.txt
776f7d1b53bbed8683de9ca1d2529f1e test_output/SRR10124060/summary.txt
7c05da833c8806ea8d5093ddab337f1c test_output/SRR891268_2/fastqc_data.txt
20a8e50baace4c672622793874a3d7de test_output/SRR891268_2/summary.txt
e02e6043667560aacb39b4e956edc146 test_output/SRR9878537.lite.1/fastqc_data.txt
9bae57f4fa64d9fca4b11f0e0c107327 test_output/SRR9878537.lite.1/fastqc_data.txt
e5c40997d4993c07e164ee5598c39cf9 test_output/SRR9878537.lite.1/summary.txt
7807c0aeca3bf4fdb18309ce993f6e35 test_output/SRR891268_1/fastqc_data.txt
6778518c16860c4300ed575d1dfdd43e test_output/SRR891268_1/fastqc_data.txt
69e7d0c53cd2e67117637c408b65333a test_output/SRR891268_1/summary.txt
54a383a9dd615f4130cbdab202829173 test_output/SRR6059706.lite.1/fastqc_data.txt
a5545633e81fc57fd03ff6ff7ba4fb8f test_output/SRR6059706.lite.1/summary.txt
21b11f43971f424267f6e32a880aaea9 test_output/SRR6387347/fastqc_data.txt
f05047b2a21949570ca2ae45b73b8da0 test_output/SRR6059706.lite.1/fastqc_data.txt
e348e4bcc7fc6f05e989ac7858d2b287 test_output/SRR6059706.lite.1/summary.txt
db82cb634e7e9b2f30472a069ba082b0 test_output/SRR6387347/fastqc_data.txt
a61f65047e76f93300967cf399d044de test_output/SRR6387347/summary.txt
65519f6bcfd726b3219d7f646ad1d3b5 test_output/SRR3897196_1/fastqc_data.txt
905dcc1d5d135e18df379cc1edf778b6 test_output/SRR3897196_1/fastqc_data.txt
b736ee95d5c450ef5c0dda31957b6818 test_output/SRR3897196_1/summary.txt
109cf880b376fcfba44b9637dcc045a6 test_output/SRR10143153_1/fastqc_data.txt
f7eb7940ad9836764b695d2abb0426da test_output/SRR10143153_1/fastqc_data.txt
9ad191925d47a57d4f8b12f21ba0a7c3 test_output/SRR10143153_1/summary.txt
dfebd8cc85b17f954fb14287a2dc8169 test_output/SRR1853178_2/fastqc_data.txt
7645f17be12d149347dab6033aabdeb3 test_output/SRR1853178_2/summary.txt
508cdda67a1ea7ddc756aa3656d1079b test_output/SRR1853178_2/fastqc_data.txt
c331d0f7a6aa9d72be41ac531f9ba269 test_output/SRR1853178_2/summary.txt