Skip to content

Commit

Permalink
[FEAT] option to not ignore blanks before the id in fasta files
Browse files Browse the repository at this point in the history
  • Loading branch information
SGSSGene committed Feb 9, 2022
1 parent 23d2bca commit 0345900
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 12 deletions.
60 changes: 48 additions & 12 deletions include/seqan3/io/sequence_file/format_fasta.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,16 @@ class format_fasta
#if SEQAN3_WORKAROUND_VIEW_PERFORMANCE
auto it = stream_view.begin();
auto e = stream_view.end();
for (; (it != e) && (is_id || is_blank)(*it); ++it)
{}
if (options.fasta_ignore_blanks_before_id)
{
for (; (it != e) && (is_id || is_blank)(*it); ++it)
{}
}
else
{
for (; (it != e) && (is_id)(*it); ++it)
{}
}

bool at_delimiter = false;
for (; it != e; ++it)
Expand All @@ -215,11 +223,21 @@ class format_fasta
{}

#else // ↑↑↑ WORKAROUND | ORIGINAL ↓↓↓
if (options.fasta_ignore_blanks_before_id)
{
std::ranges::copy(stream_view | std::views::drop_while(is_id || is_blank) // skip leading >
| detail::take_until_or_throw(is_cntrl || is_blank) // read ID until delimiter…
| views::char_to<std::ranges::range_value_t<id_type>>,
std::cpp20::back_inserter(id)); // … ^A is old delimiter
}
else
{
std::ranges::copy(stream_view | std::views::drop_while(is_id)
| detail::take_until_or_throw(is_cntrl || is_blank) // read ID until delimiter…
| views::char_to<std::ranges::range_value_t<id_type>>,
std::cpp20::back_inserter(id)); // … ^A is old delimiter

std::ranges::copy(stream_view | std::views::drop_while(is_id || is_blank) // skip leading >
| detail::take_until_or_throw(is_cntrl || is_blank) // read ID until delimiter…
| views::char_to<std::ranges::range_value_t<id_type>>,
std::cpp20::back_inserter(id)); // … ^A is old delimiter
}

// consume rest of line
detail::consume(stream_view | detail::take_line_or_throw);
Expand All @@ -231,8 +249,16 @@ class format_fasta
#if SEQAN3_WORKAROUND_VIEW_PERFORMANCE
auto it = stream_view.begin();
auto e = stream_view.end();
for (; (it != e) && (is_id || is_blank)(*it); ++it)
{}
if (options.fasta_ignore_blanks_before_id)
{
for (; (it != e) && (is_id || is_blank)(*it); ++it)
{}
}
else
{
for (; (it != e) && (is_id)(*it); ++it)
{}
}

bool at_delimiter = false;
for (; it != e; ++it)
Expand All @@ -249,11 +275,21 @@ class format_fasta
throw unexpected_end_of_input{"FASTA ID line did not end in newline."};

#else // ↑↑↑ WORKAROUND | ORIGINAL ↓↓↓
if (options.fasta_ignore_blanks_before_id)
{
std::ranges::copy(stream_view | detail::take_line_or_throw // read line
| std::views::drop_while(is_id || is_blank) // skip leading >
| views::char_to<std::ranges::range_value_t<id_type>>,
std::cpp20::back_inserter(id));
}
else
{
std::ranges::copy(stream_view | detail::take_line_or_throw // read line
| std::views::drop_while(is_id) // skip leading >
| views::char_to<std::ranges::range_value_t<id_type>>,
std::cpp20::back_inserter(id));

std::ranges::copy(stream_view | detail::take_line_or_throw // read line
| std::views::drop_while(is_id || is_blank) // skip leading >
| views::char_to<std::ranges::range_value_t<id_type>>,
std::cpp20::back_inserter(id));
}
#endif // SEQAN3_WORKAROUND_VIEW_PERFORMANCE
}
}
Expand Down
2 changes: 2 additions & 0 deletions include/seqan3/io/sequence_file/input_options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ struct sequence_file_input_options
bool truncate_ids = false;
//!\brief Read the complete_header into the seqan3::field::id for embl or genbank format.
bool embl_genbank_complete_header = false;
//!\brief Remove spaces after ">" (or ";") before the actual ID.
bool fasta_ignore_blanks_before_id = true;
};

} // namespace seqan3

0 comments on commit 0345900

Please sign in to comment.