Skip to content

Commit

Permalink
text output shows only num_bases - adapter_size bases
Browse files Browse the repository at this point in the history
  • Loading branch information
guilhermesena1 committed Nov 26, 2019
1 parent 7cd0cd3 commit e1271c9
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 75 deletions.
8 changes: 8 additions & 0 deletions src/FalcoConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@ FalcoConfig::read_adapters() {

// The adapters file has a space separated name, and the last instance is
// the biological sequence

adapter_size = 0;
while (getline(in, line)) {
if (is_content_line(line)) {
if (adapter_names.size() > FastqStats::max_adapters)
Expand Down Expand Up @@ -284,6 +286,12 @@ FalcoConfig::read_adapters() {
adapter_names.push_back(adapter_name);
adapter_seqs.push_back(adapter_seq);
adapter_hashes.push_back(adapter_hash);

if (adapter_size == 0)
adapter_size = adapter_seq.size();
else if (adapter_seq.size() != adapter_size)
throw runtime_error("adapters in config are not all of same size");

line_by_space.clear();
}
}
Expand Down
1 change: 1 addition & 0 deletions src/FalcoConfig.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ struct FalcoConfig {
// two-bit hash of the sequence above
std::vector<size_t> adapter_hashes;

size_t adapter_size;
/************************************************************
******* ADDITIONAL INFORMATION ABOUT THE SAMPLE ************
************************************************************/
Expand Down
7 changes: 0 additions & 7 deletions src/FastqStats.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,19 +156,12 @@ struct FastqStats {
static const size_t kmer_mask = (1ll << (2*kmer_size)) - 1;

/************ ADAPTER CONSTANTS **********/
// Kmer size given as input
static const size_t adapter_size = 12;

// Maximum number of adapters
static const size_t max_adapters = 128;

// bit shift for adapters, log(100) = 7
static const size_t kBitShiftAdapter = log2exact(max_adapters);

// mask to get only the first 2*k bits of the sliding window
static const size_t adapter_mask = (1ll << (2*adapter_size)) - 1;



public:
/*********** SINGLE NUMBERS FROM THE ENTIRE FASTQ ****************/
Expand Down
74 changes: 12 additions & 62 deletions src/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1680,60 +1680,8 @@ Module(ModuleAdapterContent::module_name) {
auto grade_adapter = config.limits.find("adapter")->second;
grade_warn = grade_adapter.find("warn")->second;
grade_error = grade_adapter.find("error")->second;
}

// Function to estimate number of adapter counts in a position, basically takes
// the minimum of all adapter kmers in adjacent positions to the "pos" paramter
// and return the minimum of them.
double
ModuleAdapterContent::count_adapter(
const vector<size_t> &kmer_count,
const size_t pos,
const size_t adapter_hash,
const size_t adapter_size,
const size_t kmer_size) {

// mask to just get the k bases from the adapter
size_t kmer_mask = (1ll << (2*kmer_size)) - 1;

size_t bit_shift_kmer = 2*kmer_size;

// minimum of all adapter kmers counted
size_t adapter_count = 0;

// temp variable to get the position in the flattened matrix of each kmer
size_t kmer_count_index;

// number of sliding window slides we need to do
size_t num_slides = adapter_size - kmer_size + 1;

// temp variable to get the sliding kmers in the adapter
size_t adapter_kmer;

size_t adapter_slide = adapter_hash;

// temp variable
size_t cur_pos;

// temp variable to get the adapter count
size_t cnt;
for (size_t i = 0; i < num_slides; ++i) {
cur_pos = pos + (kmer_size - 1) + (num_slides - i - 1);
adapter_kmer = adapter_slide & kmer_mask;

// debug
kmer_count_index = (cur_pos << bit_shift_kmer) | adapter_kmer;
cnt = kmer_count[kmer_count_index];
if (adapter_count == 0)
adapter_count = cnt;
else if (cnt < adapter_count)
adapter_count = cnt;

// slide the adapter hash by 2 bits
adapter_slide >>= 2;
}

return static_cast<double>(adapter_count);
adapter_size = config.adapter_size;
}

void
Expand All @@ -1744,11 +1692,13 @@ ModuleAdapterContent::summarize_module(const FastqStats &stats) {
num_bases = FastqStats::kNumBases;

for (size_t i = 0; i < num_adapters; ++i)
adapter_pos_pct.push_back(vector<double>(num_bases, 0.0));
adapter_pos_pct.push_back(
vector<double>(num_bases - adapter_size + 1, 0.0)
);

size_t cnt;
for (size_t i = 0; i < num_adapters; ++i) {
for (size_t j = 0; j < num_bases; ++j) {
for (size_t i = 0; i < adapter_pos_pct.size(); ++i) {
for (size_t j = 0; j < adapter_pos_pct[0].size(); ++j) {
cnt = 0;
// check if position even makes sense
if (j + adapter_seqs[i].size() - 1 < num_bases)
Expand All @@ -1764,8 +1714,8 @@ ModuleAdapterContent::summarize_module(const FastqStats &stats) {
}

// now convert the counts we got before to percentage
for (size_t i = 0; i < num_adapters; ++i) {
for (size_t j = 0; j < num_bases; ++j) {
for (size_t i = 0; i < adapter_pos_pct.size(); ++i) {
for (size_t j = 0; j < adapter_pos_pct[0].size(); ++j) {
adapter_pos_pct[i][j] *= 100.0;
adapter_pos_pct[i][j] /= static_cast<double>(stats.num_reads);
}
Expand All @@ -1774,8 +1724,8 @@ ModuleAdapterContent::summarize_module(const FastqStats &stats) {

void
ModuleAdapterContent::make_grade() {
for (size_t i = 0; i < num_adapters; ++i) {
for (size_t j = 0; j < num_bases; ++j) {
for (size_t i = 0; i < adapter_pos_pct.size(); ++i) {
for (size_t j = 0; j < adapter_pos_pct[0].size(); ++j) {
if (grade != "fail") {
if (adapter_pos_pct[i][j] > grade_error) {
grade = "fail";
Expand All @@ -1797,9 +1747,9 @@ ModuleAdapterContent::write_module(ostream &os) {
os << "\n";

// matrix of percentages
for (size_t i = 0; i < num_bases; ++i) {
for (size_t i = 0; i < adapter_pos_pct[0].size(); ++i) {
os << i + 1;
for (size_t j = 0; j < num_adapters; ++j)
for (size_t j = 0; j < adapter_pos_pct.size(); ++j)
os << "\t" << adapter_pos_pct[j][i];
os << "\n";
}
Expand Down
3 changes: 3 additions & 0 deletions src/Module.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,9 @@ class ModuleAdapterContent : public Module {
// number of bases to report
size_t num_bases;

// adapter size to know how many bases to report
size_t adapter_size;

// Information from config
std::vector<std::string> adapter_names;
std::vector<std::string> adapter_seqs;
Expand Down
12 changes: 7 additions & 5 deletions src/StreamReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ StreamReader::StreamReader(FalcoConfig &config,

// Here are the const adapters
num_adapters(config.adapter_hashes.size()),
adapters(make_adapters(config.adapter_hashes))
adapters(make_adapters(config.adapter_hashes)),
adapter_size(config.adapter_size),
adapter_mask((1ll << (2*adapter_size)) - 1)
{

// Allocates buffer to temporarily store reads
Expand Down Expand Up @@ -261,16 +263,16 @@ StreamReader::process_sequence_base_from_buffer(FastqStats &stats) {
}

// GS: slow, need to use fsm
if (do_adapter && (num_bases_after_n == stats.adapter_size)) {
cur_kmer &= stats.adapter_mask;
for (i = 0; i < num_adapters; ++i) {
if (do_adapter && (num_bases_after_n == adapter_size)) {
cur_kmer &= adapter_mask;
for (i = 0; i != num_adapters; ++i) {
if (cur_kmer == adapters[i]) {
stats.pos_adapter_count[(read_pos << stats.kBitShiftAdapter) | i]++;
}
}
}

num_bases_after_n += (num_bases_after_n != stats.adapter_size);
num_bases_after_n += (num_bases_after_n != adapter_size);
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion src/StreamReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ class StreamReader{

/************ ADAPTER SEARCH ***********/
const size_t num_adapters;
std::array<size_t, FastqStats::max_adapters> adapters;
const size_t adapter_size;
const size_t adapter_mask;
const std::array<size_t, FastqStats::max_adapters> adapters;

/************ FUNCTIONS TO PROCESS READS AND BASES ***********/
// gets and puts bases from and to buffer
Expand Down

0 comments on commit e1271c9

Please sign in to comment.