From edaac4e24f77449df09421c9d1815d85fbc98033 Mon Sep 17 00:00:00 2001 From: hosseinem Date: Mon, 10 Jan 2022 14:27:00 +0100 Subject: [PATCH 01/16] [FEATURE] Adding minstrobe and syncmer preliminary implementations --- include/minstrobe.hpp | 525 ++++++++++++++++++++++++++++++++++++++++ include/syncmer.hpp | 541 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1066 insertions(+) create mode 100644 include/minstrobe.hpp create mode 100644 include/syncmer.hpp diff --git a/include/minstrobe.hpp b/include/minstrobe.hpp new file mode 100644 index 0000000..d7746b2 --- /dev/null +++ b/include/minstrobe.hpp @@ -0,0 +1,525 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +/*!\file + * \author Hossein Eizadi Moghadam + * \brief Provides minstrobe. + */ + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace seqan3::detail +{ +// --------------------------------------------------------------------------------------------------------------------- +// minstrobe_view class +// --------------------------------------------------------------------------------------------------------------------- + +/*!\brief The type returned by minstrobe. + * \tparam urng1_t The type of the underlying range, must model std::ranges::forward_range, the reference type must + * model std::totally_ordered. The typical use case is that the reference type is the result of + * seqan3::kmer_hash. + * \tparam measure_distance If true, then not the actual minstrobes are returned, but the distances of the minstrobes. + * \implements std::ranges::view + * \ingroup search_views + * + * + * \note Most members of this class are generated by std::ranges::view_interface which is not yet documented here. + + */ +template +class minstrobe_view : public std::ranges::view_interface> +{ +private: + static_assert(std::ranges::forward_range, "The minstrobe_view only works on forward_ranges."); + static_assert(std::totally_ordered>, + "The reference type of the underlying range must model std::totally_ordered."); + + //!\brief Whether the given ranges are const_iterable + static constexpr bool const_iterable = seqan3::const_iterable_range; + + //!\brief The first underlying range. + urng1_t urange1{}; + size_t window_min{}; + size_t window_max{}; + size_t n{}; + //!\brief The number of values in one window. + + + template + class basic_iterator; + + //!\brief The sentinel type of the minstrobe_view. + using sentinel = std::default_sentinel_t; + +public: + /*!\name Constructors, destructor and assignment + * \{ + */ + /// \cond Workaround_Doxygen + minstrobe_view() requires std::default_initializable = default; //!< Defaulted. + /// \endcond + minstrobe_view(minstrobe_view const & rhs) = default; //!< Defaulted. + minstrobe_view(minstrobe_view && rhs) = default; //!< Defaulted. + minstrobe_view & operator=(minstrobe_view const & rhs) = default; //!< Defaulted. + minstrobe_view & operator=(minstrobe_view && rhs) = default; //!< Defaulted. + ~minstrobe_view() = default; //!< Defaulted. + + /*!\brief Construct from a view and a given number of values in one window. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] mod_used The number of values in one window. + */ + minstrobe_view(urng1_t urange1, size_t const window_min, size_t const window_max, size_t const n) : + urange1{std::move(urange1)}, + window_min{window_min}, + window_max{window_max}, + n{n} + {} + + /*!\brief Construct from a non-view that can be view-wrapped and a given number of values in one window. + * \tparam other_urng1_t The type of another urange. Must model std::ranges::viewable_range and be constructible + from urng1_t. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] mod_used The number of values in one window. + */ + template + //!\cond + requires (std::ranges::viewable_range && + std::constructible_from>>) + //!\endcond + minstrobe_view(other_urng1_t && urange1, size_t const window_min, size_t const window_max, size_t const n) : + urange1{std::views::all(std::forward(urange1))}, + window_min{window_min}, + window_max{window_max}, + n{n} + {} + + /*!\name Iterators + * \{ + */ + /*!\brief Returns an iterator to the first element of the range. + * \returns Iterator to the first element. + * + * \details + * + * ### Complexity + * + * Constant. + * + * ### Exceptions + * + * Strong exception guarantee. + */ + basic_iterator begin() + { + return {std::ranges::begin(urange1), + std::ranges::end(urange1), + window_min, + window_max, + n}; + } + + //!\copydoc begin() + basic_iterator begin() const + //!\cond + requires const_iterable + //!\endcond + { + return {std::ranges::cbegin(urange1), + std::ranges::cend(urange1), + window_min, + window_max, + n}; + } + + /*!\brief Returns an iterator to the element following the last element of the range. + * \returns Iterator to the end. + * + * \details + * + * This element acts as a placeholder; attempting to dereference it results in undefined behaviour. + * + * ### Complexity + * + * Constant. + * + * ### Exceptions + * + * No-throw guarantee. + */ + sentinel end() const + { + return {}; + } + //!\} +}; + +//!\brief Iterator for calculating minstrobes. +template +template +class minstrobe_view::basic_iterator +{ +private: + //!\brief The sentinel type of the first underlying range. + using urng1_sentinel_t = maybe_const_sentinel_t; + //!\brief The iterator type of the first underlying range. + using urng1_iterator_t = maybe_const_iterator_t; + + template + friend class basic_iterator; + +public: + /*!\name Associated types + * \{ + */ + //!\brief Type for distances between iterators. + using difference_type = std::ranges::range_difference_t; + //!\brief Value type of this iterator. + using value_t = std::ranges::range_value_t; + + using value_type = std::tuple; + //!\brief The pointer type. + using pointer = void; + //!\brief Reference to `value_type`. + using reference = value_type; + //!\brief Tag this class as a forward iterator. + using iterator_category = std::forward_iterator_tag; + //!\brief Tag this class as a forward iterator. + using iterator_concept = iterator_category; + //!\} + + /*!\name Constructors, destructor and assignment + * \{ + */ + basic_iterator() = default; //!< Defaulted. + basic_iterator(basic_iterator const &) = default; //!< Defaulted. + basic_iterator(basic_iterator &&) = default; //!< Defaulted. + basic_iterator & operator=(basic_iterator const &) = default; //!< Defaulted. + basic_iterator & operator=(basic_iterator &&) = default; //!< Defaulted. + ~basic_iterator() = default; //!< Defaulted. + + //!\brief Allow iterator on a const range to be constructible from an iterator over a non-const range. + basic_iterator(basic_iterator const & it) + //!\cond + requires const_range + //!\endcond + : minstrobe_value{std::move(it.minstrobe_value)}, + urng1_iterator{std::move(it.urng1_iterator)}, + urng1_sentinel{std::move(it.urng1_sentinel)} + {} + + /*!\brief Construct from begin and end iterators of a given range over std::totally_ordered values, and the number + of values per window. + * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. + * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. + * \param[in] mod_used The number of values in one window. + * + * \details + * + * Looks at the number of values per window in two ranges, returns the smallest between both as minstrobe and + * shifts then by one to repeat this action. If a minstrobe in consecutive windows is the same, it is returned only + * once. + */ + basic_iterator(urng1_iterator_t urng1_iterator, + urng1_sentinel_t urng1_sentinel, + size_t window_min, + size_t window_max, + size_t n) : + urng1_iterator{std::move(urng1_iterator)}, + urng1_sentinel{std::move(urng1_sentinel)} + { + size_t size = std::ranges::distance(urng1_iterator, urng1_sentinel); + size_t p_req = ((n-1)*window_max) + 1; + n = size - p_req + 1; + window_first(window_min, window_max); + } + //!\} + + //!\anchor basic_iterator_comparison_minstrobe + //!\name Comparison operators + //!\{ + + //!\brief Compare to another basic_iterator. + friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) + { + return (lhs.urng1_iterator == rhs.urng1_iterator); + } + + //!\brief Compare to another basic_iterator. + friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) + { + return !(lhs == rhs); + } + + //!\brief Compare to the sentinel of the minstrobe_view. + friend bool operator==(basic_iterator const & lhs, sentinel const &) + { + return lhs.urng1_iterator == lhs.urng1_sentinel; + } + + //!\brief Compare to the sentinel of the minstrobe_view. + friend bool operator==(sentinel const & lhs, basic_iterator const & rhs) + { + return rhs == lhs; + } + + //!\brief Compare to the sentinel of the minstrobe_view. + friend bool operator!=(sentinel const & lhs, basic_iterator const & rhs) + { + return !(lhs == rhs); + } + + //!\brief Compare to the sentinel of the minstrobe_view. + friend bool operator!=(basic_iterator const & lhs, sentinel const & rhs) + { + return !(lhs == rhs); + } + //!\} + + //!\brief Pre-increment. + basic_iterator & operator++() noexcept + { + next_unique_minstrobe(); + return *this; + } + + //!\brief Post-increment. + basic_iterator operator++(int) noexcept + { + basic_iterator tmp{*this}; + next_unique_minstrobe(); + return tmp; + } + + //!\brief Return the minstrobe. + value_type operator*() const noexcept + { + return minstrobe_value; + } + +private: + //!\brief The minstrobe value. + value_type minstrobe_value{}; + + //!\brief The offset relative to the beginning of the window where the minimizer value is found. + size_t minstrobe_position_offset{}; + + //!\brief Iterator to the rightmost value of one window. + urng1_iterator_t urng1_iterator{}; + //!brief Iterator to last element in range. + urng1_sentinel_t urng1_sentinel{}; + //!\brief Iterator to the rightmost value of one window of the second range. + + + //!\brief Stored values per window. It is necessary to store them, because a shift can remove the current minstrobe. + urng1_iterator_t second_iterator{}; + std::deque window_values{}; + size_t window_size{}; + bool end{}; + //!\brief Increments iterator by 1. + void next_unique_minstrobe() + { + while (!next_minstrobe()) {} + } + + //!\brief Returns new window value. + auto window_value() const + { + return *urng1_iterator; + } + + auto second_window_value() const + { + return *second_iterator; + } + + //!\brief Advances the window to the next position. + void advance_window() + { + ++urng1_iterator; + } + + void advance_second_window() + { + ++second_iterator; + } + + //!\brief Calculates minstrobes for the first window. + void window_first(size_t const window_min, size_t const window_max) + { + + window_size = ( window_max - window_min + 1 ); + end = false; + + if (window_size == 0u) + return; + second_iterator = urng1_iterator + window_min; + + for (int i = 1u; i < window_size; ++i) + { + window_values.push_back(second_window_value()); + advance_second_window(); + } + window_values.push_back(second_window_value()); + + auto minstrobe_it = std::ranges::min_element(window_values, std::less_equal{}); + minstrobe_value = std::make_tuple(window_value(), *minstrobe_it); + minstrobe_position_offset = std::distance(std::begin(window_values), minstrobe_it); + + } + + /*!\brief Calculates the next minstrobe value. + * \returns True, if new minstrobe is found or end is reached. Otherwise returns false. + * \details + * For the following windows, we remove the first window value (is now not in window_values) and add the new + * value that results from the window shifting. + */ + bool next_minstrobe() + { + //for (std::vector vec : minstrobes){ + // for (value_type i : vec){ std::cout << i << std::endl;}; + //}; + advance_second_window(); + advance_window(); + if (second_iterator == urng1_sentinel){ + end = true; + return false; + }; + + if (urng1_iterator == urng1_sentinel) + return true; + + if (end) return false; + + value_t const new_value = window_value(); + value_t const sw_new_value = second_window_value(); + + + std::get<0>(minstrobe_value) = new_value; + + window_values.pop_front(); + window_values.push_back(sw_new_value); + + if (minstrobe_position_offset == 0) + { + auto minstrobe_it = std::ranges::min_element(window_values, std::less_equal{}); + std::get<1>(minstrobe_value) = *minstrobe_it; + minstrobe_position_offset = std::distance(std::begin(window_values), minstrobe_it); + return true; + + } + + if (sw_new_value < std::get<1>(minstrobe_value)) + { + std::get<1>(minstrobe_value) = sw_new_value; + minstrobe_position_offset = window_values.size() - 1; + return true; + } + + --minstrobe_position_offset; + return true; + } +}; + + + +//!\brief A deduction guide for the view class template. +template +minstrobe_view(rng1_t &&, size_t const window_min, size_t const window_max, size_t const n) -> minstrobe_view>; + + + +// --------------------------------------------------------------------------------------------------------------------- +// minstrobe_fn (adaptor definition) +// --------------------------------------------------------------------------------------------------------------------- + +//![adaptor_def] +//!\brief minstrobe's range adaptor object type (non-closure). +//!\ingroup search_views +struct minstrobe_fn +{ + //!\brief Store the number of values in one window and return a range adaptor closure object. + constexpr auto operator()(const size_t window_min, const size_t window_max, const size_t n) const + { + return adaptor_from_functor{*this, window_min, window_max, n}; + } + + /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many + * values one window contains. + * \tparam urng1_t The type of the input range to process. Must model std::ranges::viewable_range. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] mod_used The number of values in one window. + * \returns A range of converted values. + */ + template + constexpr auto operator()(urng1_t && urange1, size_t const window_min, size_t const window_max, size_t const n) const + { + static_assert(std::ranges::viewable_range, + "The range parameter to views::minstrobe cannot be a temporary of a non-view range."); + static_assert(std::ranges::forward_range, + "The range parameter to views::minstrobe must model std::ranges::forward_range."); + + if (window_max - window_min == 0) // Would just return urange1 without any changes + throw std::invalid_argument{"The chosen min and max window are not valid. " + "Please choose a value greater than 1 or use two ranges."}; + + return minstrobe_view{urange1, window_min, window_max, n}; + } +}; +//![adaptor_def] + +} // namespace seqan3::detail + +namespace seqan3::views +{ +/*!\brief Computes minstrobes for a range of comparable values. A minstrobe is a value that fullfills the + condition value % mod_used. + * \tparam urng_t The type of the first range being processed. See below for requirements. [template + * parameter is omitted in pipe notation] + * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] + * \param[in] mod_used The mod value used. + * \returns A range of std::totally_ordered where each value is ... See below for the + * properties of the returned range. + * \ingroup search_views + * + * + * ### View properties + * + * | Concepts and traits | `urng_t` (underlying range type) | `rrng_t` (returned range type) | + * |----------------------------------|:----------------------------------:|:--------------------------------:| + * | std::ranges::input_range | *required* | *preserved* | + * | std::ranges::forward_range | *required* | *preserved* | + * | std::ranges::bidirectional_range | | *lost* | + * | std::ranges::random_access_range | | *lost* | + * | std::ranges::contiguous_range | | *lost* | + * | | | | + * | std::ranges::viewable_range | *required* | *guaranteed* | + * | std::ranges::view | | *guaranteed* | + * | std::ranges::sized_range | | *lost* | + * | std::ranges::common_range | | *lost* | + * | std::ranges::output_range | | *lost* | + * | seqan3::const_iterable_range | | *preserved* | + * | | | | + * | std::ranges::range_reference_t | std::totally_ordered | std::totally_ordered | + * + * See the views views submodule documentation for detailed descriptions of the view properties. + */ +inline constexpr auto minstrobe = detail::minstrobe_fn{}; + +} // namespace seqan3::views diff --git a/include/syncmer.hpp b/include/syncmer.hpp new file mode 100644 index 0000000..485bdf1 --- /dev/null +++ b/include/syncmer.hpp @@ -0,0 +1,541 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +/*!\file + * \author Hossein Eizadi Moghadam + * \brief Provides syncmer. + */ + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace seqan3::detail +{ +// --------------------------------------------------------------------------------------------------------------------- +// syncmer_view class +// --------------------------------------------------------------------------------------------------------------------- + +/*!\brief The type returned by syncmer. + * \tparam urng1_t The type of the underlying range, must model std::ranges::forward_range, the reference type must + * model std::totally_ordered. The typical use case is that the reference type is the result of + * seqan3::kmer_hash. + * \tparam measure_distance If true, then not the actual syncmers are returned, but the distances of the syncmers. + * \implements std::ranges::view + * \ingroup search_views + * + * + * \note Most members of this class are generated by std::ranges::view_interface which is not yet documented here. + + */ +template +class syncmer_view : public std::ranges::view_interface> +{ +private: + static_assert(std::ranges::forward_range, "The syncmer_view only works on forward_ranges."); + static_assert(std::totally_ordered>, + "The reference type of the underlying range must model std::totally_ordered."); + + //!\brief Whether the given ranges are const_iterable + static constexpr bool const_iterable = seqan3::const_iterable_range; + + //!\brief The first underlying range. + urng1_t urange1{}; + size_t K{}; + size_t window_size{}; + size_t S{}; + //!\brief The number of values in one window. + + + template + class basic_iterator; + + //!\brief The sentinel type of the syncmer_view. + using sentinel = std::default_sentinel_t; + +public: + /*!\name Constructors, destructor and assignment + * \{ + */ + /// \cond Workaround_Doxygen + syncmer_view() requires std::default_initializable = default; //!< Defaulted. + /// \endcond + syncmer_view(syncmer_view const & rhs) = default; //!< Defaulted. + syncmer_view(syncmer_view && rhs) = default; //!< Defaulted. + syncmer_view & operator=(syncmer_view const & rhs) = default; //!< Defaulted. + syncmer_view & operator=(syncmer_view && rhs) = default; //!< Defaulted. + ~syncmer_view() = default; //!< Defaulted. + + /*!\brief Construct from a view and a given number of values in one window. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] mod_used The number of values in one window. + */ + syncmer_view(urng1_t urange1, size_t const K, size_t const window_size, size_t const S) : + urange1{std::move(urange1)}, + K{K}, + window_size{window_size}, + S{S} + {} + + /*!\brief Construct from a non-view that can be view-wrapped and a given number of values in one window. + * \tparam other_urng1_t The type of another urange. Must model std::ranges::viewable_range and be constructible + from urng1_t. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] mod_used The number of values in one window. + */ + template + //!\cond + requires (std::ranges::viewable_range && + std::constructible_from>>) + //!\endcond + syncmer_view(other_urng1_t && urange1, size_t const K, size_t const window_size, size_t const S) : + urange1{std::views::all(std::forward(urange1))}, + K{K}, + window_size{window_size}, + S{S} + {} + + /*!\name Iterators + * \{ + */ + /*!\brief Returns an iterator to the first element of the range. + * \returns Iterator to the first element. + * + * \details + * + * ### Complexity + * + * Constant. + * + * ### Exceptions + * + * Strong exception guarantee. + */ + basic_iterator begin() + { + return {std::ranges::begin(urange1), + std::ranges::end(urange1), + K, + window_size, + S}; + } + + //!\copydoc begin() + basic_iterator begin() const + //!\cond + requires const_iterable + //!\endcond + { + return {std::ranges::cbegin(urange1), + std::ranges::cend(urange1), + K, + window_size, + S}; + } + + /*!\brief Returns an iterator to the element following the last element of the range. + * \returns Iterator to the end. + * + * \details + * + * This element acts as a placeholder; attempting to dereference it results in undefined behaviour. + * + * ### Complexity + * + * Constant. + * + * ### Exceptions + * + * No-throw guarantee. + */ + sentinel end() const + { + return {}; + } + //!\} +}; + +//!\brief Iterator for calculating syncmers. +template +template +class syncmer_view::basic_iterator +{ +private: + //!\brief The sentinel type of the first underlying range. + using urng1_sentinel_t = maybe_const_sentinel_t; + //!\brief The iterator type of the first underlying range. + using urng1_iterator_t = maybe_const_iterator_t; + + template + friend class basic_iterator; + +public: + /*!\name Associated types + * \{ + */ + //!\brief Type for distances between iterators. + using difference_type = std::ranges::range_difference_t; + //!\brief Value type of this iterator. + using value_type = std::ranges::range_value_t; + //!\brief The pointer type. + using pointer = void; + //!\brief Reference to `value_type`. + using reference = value_type; + //!\brief Tag this class as a forward iterator. + using iterator_category = std::forward_iterator_tag; + //!\brief Tag this class as a forward iterator. + using iterator_concept = iterator_category; + //!\} + + /*!\name Constructors, destructor and assignment + * \{ + */ + basic_iterator() = default; //!< Defaulted. + basic_iterator(basic_iterator const &) = default; //!< Defaulted. + basic_iterator(basic_iterator &&) = default; //!< Defaulted. + basic_iterator & operator=(basic_iterator const &) = default; //!< Defaulted. + basic_iterator & operator=(basic_iterator &&) = default; //!< Defaulted. + ~basic_iterator() = default; //!< Defaulted. + + //!\brief Allow iterator on a const range to be constructible from an iterator over a non-const range. + basic_iterator(basic_iterator const & it) + //!\cond + requires const_range + //!\endcond + : syncmer_value{std::move(it.syncmer_value)}, + urng1_iterator{std::move(it.urng1_iterator)}, + urng1_sentinel{std::move(it.urng1_sentinel)} + {} + + /*!\brief Construct from begin and end iterators of a given range over std::totally_ordered values, and the number + of values per window. + * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. + * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. + * \param[in] mod_used The number of values in one window. + * + * \details + * + * Looks at the number of values per window in two ranges, returns the smallest between both as syncmer and + * shifts then by one to repeat this action. If a syncmer in consecutive windows is the same, it is returned only + * once. + */ + basic_iterator(urng1_iterator_t urng1_iterator, + urng1_sentinel_t urng1_sentinel, + size_t K, + size_t window_size, + size_t S) : + urng1_iterator{std::move(urng1_iterator)}, + urng1_sentinel{std::move(urng1_sentinel)} + { + window_first(K, window_size, S); + } + //!\} + + //!\anchor basic_iterator_comparison_syncmer + //!\name Comparison operators + //!\{ + + //!\brief Compare to another basic_iterator. + friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) + { + return (lhs.urng1_iterator == rhs.urng1_iterator); + } + + //!\brief Compare to another basic_iterator. + friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) + { + return !(lhs == rhs); + } + + //!\brief Compare to the sentinel of the syncmer_view. + friend bool operator==(basic_iterator const & lhs, sentinel const &) + { + return lhs.urng1_iterator == lhs.urng1_sentinel; + } + + //!\brief Compare to the sentinel of the syncmer_view. + friend bool operator==(sentinel const & lhs, basic_iterator const & rhs) + { + return rhs == lhs; + } + + //!\brief Compare to the sentinel of the syncmer_view. + friend bool operator!=(sentinel const & lhs, basic_iterator const & rhs) + { + return !(lhs == rhs); + } + + //!\brief Compare to the sentinel of the syncmer_view. + friend bool operator!=(basic_iterator const & lhs, sentinel const & rhs) + { + return !(lhs == rhs); + } + //!\} + + //!\brief Pre-increment. + basic_iterator & operator++() noexcept + { + next_unique_syncmer(); + return *this; + } + + //!\brief Post-increment. + basic_iterator operator++(int) noexcept + { + basic_iterator tmp{*this}; + next_unique_syncmer(); + return tmp; + } + + //!\brief Return the syncmer. + value_type operator*() const noexcept + { + return syncmer_value; + } + +private: + //!\brief The syncmer value. + value_type syncmer_value{}; + + //!\brief The offset relative to the beginning of the window where the minimizer value is found. + size_t syncmer_position_offset{}; + + //!\brief Iterator to the rightmost value of one window. + urng1_iterator_t urng1_iterator{}; + urng1_iterator_t smallest_s_it{}; + //!brief Iterator to last element in range. + urng1_sentinel_t urng1_sentinel{}; + //!\brief Iterator to the rightmost value of one window of the second range. + + + //!\brief Stored values per window. It is necessary to store them, because a shift can remove the current syncmer. + std::deque window_values{}; + bool first; + size_t POS{}; + size_t k{}; + size_t s{}; + size_t w_size{}; + //!\brief Increments iterator by 1. + void next_unique_syncmer() + { + while (!next_syncmer()) {} + } + + //!\brief Returns new window value. + auto window_value() const + { + return *urng1_iterator; + } + + + //!\brief Advances the window to the next position. + void advance_window() + { + ++urng1_iterator; + } + + + //!\brief Calculates syncmers for the first window. + void window_first(size_t const K, size_t const window_size, size_t const S) + { + s=S; + k=K; + w_size = window_size; + + if (window_size == 0u) + return; + + for (int i = 1u; i < ( window_size - (S - 1) ); ++i) + { + window_values.push_back(window_value()); + advance_window(); + } + window_values.push_back(window_value()); + + auto smallest_s_it = std::ranges::min_element(window_values, std::less_equal{}); + syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); + + if (K-(S-1)-1 <= syncmer_position_offset) { + auto syncmer_it = smallest_s_it - (K - 1) + (S-1); + syncmer_value = *syncmer_it; + first = true; + }; + + if ( syncmer_position_offset + (S + k - 3) <= window_size ) { + auto syncmer_it = smallest_s_it; + syncmer_value = *syncmer_it; + }; + + + + + } + + /*!\brief Calculates the next syncmer value. + * \returns True, if new syncmer is found or end is reached. Otherwise returns false. + * \details + * For the following windows, we remove the first window value (is now not in window_values) and add the new + * value that results from the window shifting. + */ + bool next_syncmer() + { + //for (std::vector vec : syncmers){ + // for (value_type i : vec){ std::cout << i << std::endl;}; + //}; + advance_window(); + + + if (urng1_iterator == urng1_sentinel) + return true; + + value_type const new_value = window_value(); + + window_values.pop_front(); + window_values.push_back(new_value); + + if (syncmer_position_offset == 0) + { + auto smallest_s_it = std::ranges::min_element(window_values, std::less_equal{}); + syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); + + if (k-(s-1)-1 <= syncmer_position_offset) { + + auto syncmer_it = smallest_s_it - (k - 1) + (s-1); + syncmer_value = *syncmer_it; + first = true; + }; + + if ( syncmer_position_offset + (s + k - 3) <= w_size ) { + auto syncmer_it = smallest_s_it; + syncmer_value = *syncmer_it; + }; + + return true; + } + + if (new_value < *(window_values.begin()+syncmer_position_offset-1) && syncmer_position_offset != window_values.size() - k + s ) + { + + syncmer_position_offset = window_values.size() - 1; + auto syncmer_it = urng1_iterator - (k - 1) + (s-1); + syncmer_value = *syncmer_it; + return true; + } + + if ( syncmer_position_offset - 1 + (s + k - 3) <= w_size && first) { + + auto syncmer_it = (window_values.begin()+syncmer_position_offset - 1); + syncmer_value = *syncmer_it; + --syncmer_position_offset; + first = false; + return true; + }; + + + + --syncmer_position_offset; + return false; + } +}; + + + +//!\brief A deduction guide for the view class template. +template +syncmer_view(rng1_t &&, size_t const K, size_t const window_size, size_t const S) -> syncmer_view>; + + + +// --------------------------------------------------------------------------------------------------------------------- +// syncmer_fn (adaptor definition) +// --------------------------------------------------------------------------------------------------------------------- + +//![adaptor_def] +//!\brief syncmer's range adaptor object type (non-closure). +//!\ingroup search_views +struct syncmer_fn +{ + //!\brief Store the number of values in one window and return a range adaptor closure object. + constexpr auto operator()(const size_t K, const size_t window_size, const size_t S) const + { + return adaptor_from_functor{*this, K, window_size, S}; + } + + /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many + * values one window contains. + * \tparam urng1_t The type of the input range to process. Must model std::ranges::viewable_range. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] mod_used The number of values in one window. + * \returns A range of converted values. + */ + template + constexpr auto operator()(urng1_t && urange1, size_t const K, size_t const window_size, size_t const S) const + { + static_assert(std::ranges::viewable_range, + "The range parameter to views::syncmer cannot be a temporary of a non-view range."); + static_assert(std::ranges::forward_range, + "The range parameter to views::syncmer must model std::ranges::forward_range."); + + if (window_size - K == -1) // Would just return urange1 without any changes + throw std::invalid_argument{"The chosen K-mer and window size are not valid. " + "Please choose a value that satisfize the given condition."}; + + return syncmer_view{urange1, K, window_size, S}; + } +}; +//![adaptor_def] + +} // namespace seqan3::detail + +namespace seqan3::views +{ +/*!\brief Computes syncmers for a range of comparable values. A syncmer is a value that fullfills the + condition value % mod_used. + * \tparam urng_t The type of the first range being processed. See below for requirements. [template + * parameter is omitted in pipe notation] + * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] + * \param[in] mod_used The mod value used. + * \returns A range of std::totally_ordered where each value is ... See below for the + * properties of the returned range. + * \ingroup search_views + * + * + * ### View properties + * + * | Concepts and traits | `urng_t` (underlying range type) | `rrng_t` (returned range type) | + * |----------------------------------|:----------------------------------:|:--------------------------------:| + * | std::ranges::input_range | *required* | *preserved* | + * | std::ranges::forward_range | *required* | *preserved* | + * | std::ranges::bidirectional_range | | *lost* | + * | std::ranges::random_access_range | | *lost* | + * | std::ranges::contiguous_range | | *lost* | + * | | | | + * | std::ranges::viewable_range | *required* | *guaranteed* | + * | std::ranges::view | | *guaranteed* | + * | std::ranges::sized_range | | *lost* | + * | std::ranges::common_range | | *lost* | + * | std::ranges::output_range | | *lost* | + * | seqan3::const_iterable_range | | *preserved* | + * | | | | + * | std::ranges::range_reference_t | std::totally_ordered | std::totally_ordered | + * + * See the views views submodule documentation for detailed descriptions of the view properties. + */ +inline constexpr auto syncmer = detail::syncmer_fn{}; + +} // namespace seqan3::views From 6ef4bf6bbdd4bbda8d4a4216d1f900c5c27d5a1f Mon Sep 17 00:00:00 2001 From: Hossein Eizadi Moghadam Date: Wed, 12 Jan 2022 18:49:13 +0100 Subject: [PATCH 02/16] [FIX] fixed some typos in minstrobe.hpp file --- include/minstrobe.hpp | 48 ++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/include/minstrobe.hpp b/include/minstrobe.hpp index d7746b2..ac2bf83 100644 --- a/include/minstrobe.hpp +++ b/include/minstrobe.hpp @@ -53,12 +53,12 @@ class minstrobe_view : public std::ranges::view_interface class basic_iterator; @@ -81,7 +81,9 @@ class minstrobe_view : public std::ranges::view_interface //!\cond @@ -191,7 +195,7 @@ class minstrobe_view::basic_iterator using difference_type = std::ranges::range_difference_t; //!\brief Value type of this iterator. using value_t = std::ranges::range_value_t; - + //!\brief Value type of the output. using value_type = std::tuple; //!\brief The pointer type. using pointer = void; @@ -227,13 +231,15 @@ class minstrobe_view::basic_iterator of values per window. * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. - * \param[in] mod_used The number of values in one window. + * \param[in] window_min The minimum value to start the next window + * \param[in] window_max The maximum value to start the next window + * \param[in] n The order of minstrobes * * \details * - * Looks at the number of values per window in two ranges, returns the smallest between both as minstrobe and - * shifts then by one to repeat this action. If a minstrobe in consecutive windows is the same, it is returned only - * once. + * Looks at the number of values per window with two iterators. First iterator adds the next value in the tuple as + * the first strobe. The second iterator adds the minimum value of the window to the second position of the tuple. + * */ basic_iterator(urng1_iterator_t urng1_iterator, urng1_sentinel_t urng1_sentinel, @@ -243,9 +249,6 @@ class minstrobe_view::basic_iterator urng1_iterator{std::move(urng1_iterator)}, urng1_sentinel{std::move(urng1_sentinel)} { - size_t size = std::ranges::distance(urng1_iterator, urng1_sentinel); - size_t p_req = ((n-1)*window_max) + 1; - n = size - p_req + 1; window_first(window_min, window_max); } //!\} @@ -319,15 +322,14 @@ class minstrobe_view::basic_iterator //!\brief The offset relative to the beginning of the window where the minimizer value is found. size_t minstrobe_position_offset{}; - //!\brief Iterator to the rightmost value of one window. + //!\brief Iterator to the first value of minstrobe. urng1_iterator_t urng1_iterator{}; - //!brief Iterator to last element in range. + //!\brief Iterator to the right most value of the window. + urng1_iterator_t second_iterator{}; + //!\brief Iterator to last element in range. urng1_sentinel_t urng1_sentinel{}; - //!\brief Iterator to the rightmost value of one window of the second range. - //!\brief Stored values per window. It is necessary to store them, because a shift can remove the current minstrobe. - urng1_iterator_t second_iterator{}; std::deque window_values{}; size_t window_size{}; bool end{}; @@ -337,23 +339,25 @@ class minstrobe_view::basic_iterator while (!next_minstrobe()) {} } - //!\brief Returns new window value. + //!\brief Returns new window value of the first iterator. auto window_value() const { return *urng1_iterator; } + //!\brief Returns new window value of the second iterator. auto second_window_value() const { return *second_iterator; } - //!\brief Advances the window to the next position. + //!\brief Advances the window of the first iterator to the next position. void advance_window() { ++urng1_iterator; } + //!\brief Advances the window of the second iterator to the next position. void advance_second_window() { ++second_iterator; @@ -464,8 +468,10 @@ struct minstrobe_fn * \tparam urng1_t The type of the input range to process. Must model std::ranges::viewable_range. * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] mod_used The number of values in one window. - * \returns A range of converted values. + * \param[in] window_min The minimum value to start the next window + * \param[in] window_max The maximum value to start the next window + * \param[in] n The order of minstrobes + * \returns A range of converted values in tuples. */ template constexpr auto operator()(urng1_t && urange1, size_t const window_min, size_t const window_max, size_t const n) const From e4039bda4dc9a4bcd7ccbb4ada2c44021d80cffe Mon Sep 17 00:00:00 2001 From: hosseinem Date: Fri, 14 Jan 2022 20:35:54 +0100 Subject: [PATCH 03/16] [FIX] Syncmer implementation with two ranges --- include/syncmer.hpp | 163 +++++++++++++++++++++++--------------------- 1 file changed, 85 insertions(+), 78 deletions(-) diff --git a/include/syncmer.hpp b/include/syncmer.hpp index 485bdf1..1e5c7a4 100644 --- a/include/syncmer.hpp +++ b/include/syncmer.hpp @@ -40,24 +40,28 @@ namespace seqan3::detail * \note Most members of this class are generated by std::ranges::view_interface which is not yet documented here. */ -template -class syncmer_view : public std::ranges::view_interface> +template +class syncmer_view : public std::ranges::view_interface> { private: static_assert(std::ranges::forward_range, "The syncmer_view only works on forward_ranges."); + static_assert(std::ranges::forward_range, "The syncmer_view only works on forward_ranges."); static_assert(std::totally_ordered>, "The reference type of the underlying range must model std::totally_ordered."); + static_assert(std::totally_ordered>, + "The reference type of the underlying range must model std::totally_ordered."); //!\brief Whether the given ranges are const_iterable static constexpr bool const_iterable = seqan3::const_iterable_range; + static constexpr bool const_iterable2 = seqan3::const_iterable_range; //!\brief The first underlying range. urng1_t urange1{}; + urng2_t urange2{}; size_t K{}; - size_t window_size{}; size_t S{}; //!\brief The number of values in one window. - + template class basic_iterator; @@ -66,11 +70,11 @@ class syncmer_view : public std::ranges::view_interface> using sentinel = std::default_sentinel_t; public: - /*!\name Constructors, destructor and assignment + /*!\name Constructors, destructor andt assignment * \{ */ /// \cond Workaround_Doxygen - syncmer_view() requires std::default_initializable = default; //!< Defaulted. + syncmer_view() requires std::default_initializable && std::default_initializable = default; //!< Defaulted. /// \endcond syncmer_view(syncmer_view const & rhs) = default; //!< Defaulted. syncmer_view(syncmer_view && rhs) = default; //!< Defaulted. @@ -83,10 +87,10 @@ class syncmer_view : public std::ranges::view_interface> * std::ranges::forward_range. * \param[in] mod_used The number of values in one window. */ - syncmer_view(urng1_t urange1, size_t const K, size_t const window_size, size_t const S) : + syncmer_view(urng1_t urange1, urng2_t urange2, size_t const K, size_t const S) : urange1{std::move(urange1)}, + urange2{std::move(urange2)}, K{K}, - window_size{window_size}, S{S} {} @@ -97,15 +101,17 @@ class syncmer_view : public std::ranges::view_interface> * std::ranges::forward_range. * \param[in] mod_used The number of values in one window. */ - template + template //!\cond requires (std::ranges::viewable_range && - std::constructible_from>>) + std::constructible_from>> && + std::ranges::viewable_range && + std::constructible_from>>) //!\endcond - syncmer_view(other_urng1_t && urange1, size_t const K, size_t const window_size, size_t const S) : + syncmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const K, size_t const S) : urange1{std::views::all(std::forward(urange1))}, + urange2{std::views::all(std::forward(urange2))}, K{K}, - window_size{window_size}, S{S} {} @@ -128,22 +134,22 @@ class syncmer_view : public std::ranges::view_interface> basic_iterator begin() { return {std::ranges::begin(urange1), + std::ranges::begin(urange2), std::ranges::end(urange1), K, - window_size, S}; } //!\copydoc begin() basic_iterator begin() const //!\cond - requires const_iterable + requires const_iterable && const_iterable2 //!\endcond { return {std::ranges::cbegin(urange1), + std::ranges::cbegin(urange2), std::ranges::cend(urange1), K, - window_size, S}; } @@ -170,15 +176,16 @@ class syncmer_view : public std::ranges::view_interface> }; //!\brief Iterator for calculating syncmers. -template +template template -class syncmer_view::basic_iterator +class syncmer_view::basic_iterator { private: //!\brief The sentinel type of the first underlying range. using urng1_sentinel_t = maybe_const_sentinel_t; //!\brief The iterator type of the first underlying range. using urng1_iterator_t = maybe_const_iterator_t; + using urng2_iterator_t = maybe_const_iterator_t; template friend class basic_iterator; @@ -190,7 +197,7 @@ class syncmer_view::basic_iterator //!\brief Type for distances between iterators. using difference_type = std::ranges::range_difference_t; //!\brief Value type of this iterator. - using value_type = std::ranges::range_value_t; + using value_type = std::ranges::range_value_t; //!\brief The pointer type. using pointer = void; //!\brief Reference to `value_type`. @@ -218,6 +225,7 @@ class syncmer_view::basic_iterator //!\endcond : syncmer_value{std::move(it.syncmer_value)}, urng1_iterator{std::move(it.urng1_iterator)}, + urng2_iterator{std::move(it.urng2_iterator)}, urng1_sentinel{std::move(it.urng1_sentinel)} {} @@ -234,14 +242,15 @@ class syncmer_view::basic_iterator * once. */ basic_iterator(urng1_iterator_t urng1_iterator, + urng2_iterator_t urng2_iterator, urng1_sentinel_t urng1_sentinel, size_t K, - size_t window_size, size_t S) : urng1_iterator{std::move(urng1_iterator)}, + urng2_iterator{std::move(urng2_iterator)}, urng1_sentinel{std::move(urng1_sentinel)} { - window_first(K, window_size, S); + window_first(K, S); } //!\} @@ -316,19 +325,16 @@ class syncmer_view::basic_iterator //!\brief Iterator to the rightmost value of one window. urng1_iterator_t urng1_iterator{}; - urng1_iterator_t smallest_s_it{}; + urng2_iterator_t urng2_iterator{}; + //!brief Iterator to last element in range. urng1_sentinel_t urng1_sentinel{}; //!\brief Iterator to the rightmost value of one window of the second range. - + size_t w_size{}; //!\brief Stored values per window. It is necessary to store them, because a shift can remove the current syncmer. std::deque window_values{}; - bool first; - size_t POS{}; - size_t k{}; - size_t s{}; - size_t w_size{}; + //!\brief Increments iterator by 1. void next_unique_syncmer() { @@ -340,49 +346,53 @@ class syncmer_view::basic_iterator { return *urng1_iterator; } - + //!\brief Advances the window to the next position. void advance_window() { ++urng1_iterator; + ++urng2_iterator; } - + void advance_it_1() + { + ++urng1_iterator; + } + + //!\brief Calculates syncmers for the first window. - void window_first(size_t const K, size_t const window_size, size_t const S) + void window_first(const size_t K, const size_t S) { - s=S; - k=K; - w_size = window_size; + w_size = K - S + 1; - if (window_size == 0u) + if (w_size == 0u) return; - - for (int i = 1u; i < ( window_size - (S - 1) ); ++i) + + for (int i = 1u; i < K - 1 ; ++i) { window_values.push_back(window_value()); - advance_window(); + advance_it_1(); } window_values.push_back(window_value()); - + + auto smallest_s_it = std::ranges::min_element(window_values, std::less_equal{}); - syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); + syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); - if (K-(S-1)-1 <= syncmer_position_offset) { - auto syncmer_it = smallest_s_it - (K - 1) + (S-1); + if (syncmer_position_offset == 0) { + auto syncmer_it = urng2_iterator; syncmer_value = *syncmer_it; - first = true; - }; + } - if ( syncmer_position_offset + (S + k - 3) <= window_size ) { - auto syncmer_it = smallest_s_it; + else if ( syncmer_position_offset == w_size - 1 ) { + auto syncmer_it = urng2_iterator; syncmer_value = *syncmer_it; - }; + }; + + - - } /*!\brief Calculates the next syncmer value. @@ -398,12 +408,12 @@ class syncmer_view::basic_iterator //}; advance_window(); - + if (urng1_iterator == urng1_sentinel) return true; value_type const new_value = window_value(); - + window_values.pop_front(); window_values.push_back(new_value); @@ -412,41 +422,37 @@ class syncmer_view::basic_iterator auto smallest_s_it = std::ranges::min_element(window_values, std::less_equal{}); syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); - if (k-(s-1)-1 <= syncmer_position_offset) { + if (syncmer_position_offset == 0) { - auto syncmer_it = smallest_s_it - (k - 1) + (s-1); + auto syncmer_it = urng2_iterator; syncmer_value = *syncmer_it; - first = true; + return true; }; - if ( syncmer_position_offset + (s + k - 3) <= w_size ) { - auto syncmer_it = smallest_s_it; + if ( syncmer_position_offset == w_size - 1 ) { + auto syncmer_it = urng2_iterator; syncmer_value = *syncmer_it; + return true; }; - return true; } - - if (new_value < *(window_values.begin()+syncmer_position_offset-1) && syncmer_position_offset != window_values.size() - k + s ) + + else if (new_value < *(window_values.begin()+(syncmer_position_offset-1))) { - - syncmer_position_offset = window_values.size() - 1; - auto syncmer_it = urng1_iterator - (k - 1) + (s-1); + syncmer_position_offset = w_size - 1; + auto syncmer_it = urng2_iterator; syncmer_value = *syncmer_it; return true; } + else if (syncmer_position_offset == 1){ + auto syncmer_it = urng2_iterator; + syncmer_value = *syncmer_it; + --syncmer_position_offset; + return true; + }; - if ( syncmer_position_offset - 1 + (s + k - 3) <= w_size && first) { - auto syncmer_it = (window_values.begin()+syncmer_position_offset - 1); - syncmer_value = *syncmer_it; - --syncmer_position_offset; - first = false; - return true; - }; - - --syncmer_position_offset; return false; } @@ -455,8 +461,8 @@ class syncmer_view::basic_iterator //!\brief A deduction guide for the view class template. -template -syncmer_view(rng1_t &&, size_t const K, size_t const window_size, size_t const S) -> syncmer_view>; +template +syncmer_view(rng1_t &&, rng2_t &&, size_t const K, size_t const S) -> syncmer_view, std::views::all_t>; @@ -470,9 +476,10 @@ syncmer_view(rng1_t &&, size_t const K, size_t const window_size, size_t const S struct syncmer_fn { //!\brief Store the number of values in one window and return a range adaptor closure object. - constexpr auto operator()(const size_t K, const size_t window_size, const size_t S) const + template + constexpr auto operator()(urng2_t urange2, const size_t K, const size_t S) const { - return adaptor_from_functor{*this, K, window_size, S}; + return adaptor_from_functor{*this, urange2, K, S}; } /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many @@ -483,19 +490,19 @@ struct syncmer_fn * \param[in] mod_used The number of values in one window. * \returns A range of converted values. */ - template - constexpr auto operator()(urng1_t && urange1, size_t const K, size_t const window_size, size_t const S) const + template + constexpr auto operator()(urng1_t && urange1, urng2_t && urange2, size_t const K, size_t const S) const { static_assert(std::ranges::viewable_range, "The range parameter to views::syncmer cannot be a temporary of a non-view range."); static_assert(std::ranges::forward_range, "The range parameter to views::syncmer must model std::ranges::forward_range."); - if (window_size - K == -1) // Would just return urange1 without any changes + if (K == 1) // Would just return urange1 without any changes throw std::invalid_argument{"The chosen K-mer and window size are not valid. " "Please choose a value that satisfize the given condition."}; - return syncmer_view{urange1, K, window_size, S}; + return syncmer_view{urange1, urange2, K, S}; } }; //![adaptor_def] From 2623d8755b16b7c7aba0fbe792631dd140cf56c6 Mon Sep 17 00:00:00 2001 From: hosseinem Date: Mon, 17 Jan 2022 18:02:23 +0100 Subject: [PATCH 04/16] [FEATURE,FIX] Adding opensyncmer.hpp file and fixing case of tie in syncmer.hpp --- include/opensyncmer.hpp | 533 ++++++++++++++++++++++++++++++++++++++++ include/syncmer.hpp | 9 +- 2 files changed, 538 insertions(+), 4 deletions(-) create mode 100644 include/opensyncmer.hpp diff --git a/include/opensyncmer.hpp b/include/opensyncmer.hpp new file mode 100644 index 0000000..a928726 --- /dev/null +++ b/include/opensyncmer.hpp @@ -0,0 +1,533 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +/*!\file + * \author Hossein Eizadi Moghadam + * \brief Provides opensyncmer. + */ + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace seqan3::detail +{ +// --------------------------------------------------------------------------------------------------------------------- +// opensyncmer_view class +// --------------------------------------------------------------------------------------------------------------------- + +/*!\brief The type returned by opensyncmer. + * \tparam urng1_t The type of the underlying range, must model std::ranges::forward_range, the reference type must + * model std::totally_ordered. The typical use case is that the reference type is the result of + * seqan3::kmer_hash. + * \tparam measure_distance If true, then not the actual opensyncmers are returned, but the distances of the opensyncmers. + * \implements std::ranges::view + * \ingroup search_views + * + * + * \note Most members of this class are generated by std::ranges::view_interface which is not yet documented here. + + */ +template +class opensyncmer_view : public std::ranges::view_interface> +{ +private: + static_assert(std::ranges::forward_range, "The opensyncmer_view only works on forward_ranges."); + static_assert(std::ranges::forward_range, "The opensyncmer_view only works on forward_ranges."); + static_assert(std::totally_ordered>, + "The reference type of the underlying range must model std::totally_ordered."); + static_assert(std::totally_ordered>, + "The reference type of the underlying range must model std::totally_ordered."); + + //!\brief Whether the given ranges are const_iterable + static constexpr bool const_iterable = seqan3::const_iterable_range; + static constexpr bool const_iterable2 = seqan3::const_iterable_range; + + //!\brief The first underlying range. + urng1_t urange1{}; + urng2_t urange2{}; + size_t K{}; + size_t S{}; + //!\brief The number of values in one window. + + + template + class basic_iterator; + + //!\brief The sentinel type of the opensyncmer_view. + using sentinel = std::default_sentinel_t; + +public: + /*!\name Constructors, destructor andt assignment + * \{ + */ + /// \cond Workaround_Doxygen + opensyncmer_view() requires std::default_initializable && std::default_initializable = default; //!< Defaulted. + /// \endcond + opensyncmer_view(opensyncmer_view const & rhs) = default; //!< Defaulted. + opensyncmer_view(opensyncmer_view && rhs) = default; //!< Defaulted. + opensyncmer_view & operator=(opensyncmer_view const & rhs) = default; //!< Defaulted. + opensyncmer_view & operator=(opensyncmer_view && rhs) = default; //!< Defaulted. + ~opensyncmer_view() = default; //!< Defaulted. + + /*!\brief Construct from a view and a given number of values in one window. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] mod_used The number of values in one window. + */ + opensyncmer_view(urng1_t urange1, urng2_t urange2, size_t const K, size_t const S) : + urange1{std::move(urange1)}, + urange2{std::move(urange2)}, + K{K}, + S{S} + {} + + /*!\brief Construct from a non-view that can be view-wrapped and a given number of values in one window. + * \tparam other_urng1_t The type of another urange. Must model std::ranges::viewable_range and be constructible + from urng1_t. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] mod_used The number of values in one window. + */ + template + //!\cond + requires (std::ranges::viewable_range && + std::constructible_from>> && + std::ranges::viewable_range && + std::constructible_from>>) + //!\endcond + opensyncmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const K, size_t const S) : + urange1{std::views::all(std::forward(urange1))}, + urange2{std::views::all(std::forward(urange2))}, + K{K}, + S{S} + {} + + /*!\name Iterators + * \{ + */ + /*!\brief Returns an iterator to the first element of the range. + * \returns Iterator to the first element. + * + * \details + * + * ### Complexity + * + * Constant. + * + * ### Exceptions + * + * Strong exception guarantee. + */ + basic_iterator begin() + { + return {std::ranges::begin(urange1), + std::ranges::begin(urange2), + std::ranges::end(urange1), + K, + S}; + } + + //!\copydoc begin() + basic_iterator begin() const + //!\cond + requires const_iterable && const_iterable2 + //!\endcond + { + return {std::ranges::cbegin(urange1), + std::ranges::cbegin(urange2), + std::ranges::cend(urange1), + K, + S}; + } + + /*!\brief Returns an iterator to the element following the last element of the range. + * \returns Iterator to the end. + * + * \details + * + * This element acts as a placeholder; attempting to dereference it results in undefined behaviour. + * + * ### Complexity + * + * Constant. + * + * ### Exceptions + * + * No-throw guarantee. + */ + sentinel end() const + { + return {}; + } + //!\} +}; + +//!\brief Iterator for calculating opensyncmers. +template +template +class opensyncmer_view::basic_iterator +{ +private: + //!\brief The sentinel type of the first underlying range. + using urng1_sentinel_t = maybe_const_sentinel_t; + //!\brief The iterator type of the first underlying range. + using urng1_iterator_t = maybe_const_iterator_t; + using urng2_iterator_t = maybe_const_iterator_t; + + template + friend class basic_iterator; + +public: + /*!\name Associated types + * \{ + */ + //!\brief Type for distances between iterators. + using difference_type = std::ranges::range_difference_t; + //!\brief Value type of this iterator. + using value_type = std::ranges::range_value_t; + //!\brief The pointer type. + using pointer = void; + //!\brief Reference to `value_type`. + using reference = value_type; + //!\brief Tag this class as a forward iterator. + using iterator_category = std::forward_iterator_tag; + //!\brief Tag this class as a forward iterator. + using iterator_concept = iterator_category; + //!\} + + /*!\name Constructors, destructor and assignment + * \{ + */ + basic_iterator() = default; //!< Defaulted. + basic_iterator(basic_iterator const &) = default; //!< Defaulted. + basic_iterator(basic_iterator &&) = default; //!< Defaulted. + basic_iterator & operator=(basic_iterator const &) = default; //!< Defaulted. + basic_iterator & operator=(basic_iterator &&) = default; //!< Defaulted. + ~basic_iterator() = default; //!< Defaulted. + + //!\brief Allow iterator on a const range to be constructible from an iterator over a non-const range. + basic_iterator(basic_iterator const & it) + //!\cond + requires const_range + //!\endcond + : opensyncmer_value{std::move(it.opensyncmer_value)}, + urng1_iterator{std::move(it.urng1_iterator)}, + urng2_iterator{std::move(it.urng2_iterator)}, + urng1_sentinel{std::move(it.urng1_sentinel)} + {} + + /*!\brief Construct from begin and end iterators of a given range over std::totally_ordered values, and the number + of values per window. + * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. + * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. + * \param[in] mod_used The number of values in one window. + * + * \details + * + * Looks at the number of values per window in two ranges, returns the smallest between both as opensyncmer and + * shifts then by one to repeat this action. If a opensyncmer in consecutive windows is the same, it is returned only + * once. + */ + basic_iterator(urng1_iterator_t urng1_iterator, + urng2_iterator_t urng2_iterator, + urng1_sentinel_t urng1_sentinel, + size_t K, + size_t S) : + urng1_iterator{std::move(urng1_iterator)}, + urng2_iterator{std::move(urng2_iterator)}, + urng1_sentinel{std::move(urng1_sentinel)} + { + window_first(K, S); + } + //!\} + + //!\anchor basic_iterator_comparison_opensyncmer + //!\name Comparison operators + //!\{ + + //!\brief Compare to another basic_iterator. + friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) + { + return (lhs.urng1_iterator == rhs.urng1_iterator); + } + + //!\brief Compare to another basic_iterator. + friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) + { + return !(lhs == rhs); + } + + //!\brief Compare to the sentinel of the opensyncmer_view. + friend bool operator==(basic_iterator const & lhs, sentinel const &) + { + return lhs.urng1_iterator == lhs.urng1_sentinel; + } + + //!\brief Compare to the sentinel of the opensyncmer_view. + friend bool operator==(sentinel const & lhs, basic_iterator const & rhs) + { + return rhs == lhs; + } + + //!\brief Compare to the sentinel of the opensyncmer_view. + friend bool operator!=(sentinel const & lhs, basic_iterator const & rhs) + { + return !(lhs == rhs); + } + + //!\brief Compare to the sentinel of the opensyncmer_view. + friend bool operator!=(basic_iterator const & lhs, sentinel const & rhs) + { + return !(lhs == rhs); + } + //!\} + + //!\brief Pre-increment. + basic_iterator & operator++() noexcept + { + next_unique_opensyncmer(); + return *this; + } + + //!\brief Post-increment. + basic_iterator operator++(int) noexcept + { + basic_iterator tmp{*this}; + next_unique_opensyncmer(); + return tmp; + } + + //!\brief Return the opensyncmer. + value_type operator*() const noexcept + { + return opensyncmer_value; + } + +private: + //!\brief The opensyncmer value. + value_type opensyncmer_value{}; + + //!\brief The offset relative to the beginning of the window where the minimizer value is found. + size_t opensyncmer_position_offset{}; + + //!\brief Iterator to the rightmost value of one window. + urng1_iterator_t urng1_iterator{}; + urng2_iterator_t urng2_iterator{}; + + //!brief Iterator to last element in range. + urng1_sentinel_t urng1_sentinel{}; + //!\brief Iterator to the rightmost value of one window of the second range. + size_t w_size{}; + + //!\brief Stored values per window. It is necessary to store them, because a shift can remove the current opensyncmer. + std::deque window_values{}; + + //!\brief Increments iterator by 1. + void next_unique_opensyncmer() + { + while (!next_opensyncmer()) {} + } + + //!\brief Returns new window value. + auto window_value() const + { + return *urng1_iterator; + } + + + //!\brief Advances the window to the next position. + void advance_window() + { + ++urng1_iterator; + ++urng2_iterator; + } + + void advance_it_1() + { + ++urng1_iterator; + } + + + //!\brief Calculates opensyncmers for the first window. + void window_first(const size_t K, const size_t S) + { + w_size = K - S + 1; + + if (w_size == 0u) + return; + + for (int i = 1u; i < K - 1 ; ++i) + { + window_values.push_back(window_value()); + advance_it_1(); + } + window_values.push_back(window_value()); + + + auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); + opensyncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); + + if (opensyncmer_position_offset == 0) { + auto opensyncmer_it = urng2_iterator; + opensyncmer_value = *opensyncmer_it; + } + + } + + /*!\brief Calculates the next opensyncmer value. + * \returns True, if new opensyncmer is found or end is reached. Otherwise returns false. + * \details + * For the following windows, we remove the first window value (is now not in window_values) and add the new + * value that results from the window shifting. + */ + bool next_opensyncmer() + { + //for (std::vector vec : opensyncmers){ + // for (value_type i : vec){ std::cout << i << std::endl;}; + //}; + advance_window(); + + + if (urng1_iterator == urng1_sentinel) + return true; + + value_type const new_value = window_value(); + + window_values.pop_front(); + window_values.push_back(new_value); + + if (opensyncmer_position_offset == 0) + { + + auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); + + opensyncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); + + if (opensyncmer_position_offset == 0) { + + auto opensyncmer_it = urng2_iterator; + opensyncmer_value = *opensyncmer_it; + return true; + }; + } + + else if (new_value < *(window_values.begin()+(opensyncmer_position_offset-1))) + { + opensyncmer_position_offset = w_size - 1; + return false; + } + else if (opensyncmer_position_offset == 1){ + auto opensyncmer_it = urng2_iterator; + opensyncmer_value = *opensyncmer_it; + --opensyncmer_position_offset; + return true; + }; + + + + --opensyncmer_position_offset; + return false; + } +}; + + + +//!\brief A deduction guide for the view class template. +template +opensyncmer_view(rng1_t &&, rng2_t &&, size_t const K, size_t const S) -> opensyncmer_view, std::views::all_t>; + + + +// --------------------------------------------------------------------------------------------------------------------- +// opensyncmer_fn (adaptor definition) +// --------------------------------------------------------------------------------------------------------------------- + +//![adaptor_def] +//!\brief opensyncmer's range adaptor object type (non-closure). +//!\ingroup search_views +struct opensyncmer_fn +{ + //!\brief Store the number of values in one window and return a range adaptor closure object. + template + constexpr auto operator()(urng2_t urange2, const size_t K, const size_t S) const + { + return adaptor_from_functor{*this, urange2, K, S}; + } + + /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many + * values one window contains. + * \tparam urng1_t The type of the input range to process. Must model std::ranges::viewable_range. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] mod_used The number of values in one window. + * \returns A range of converted values. + */ + template + constexpr auto operator()(urng1_t && urange1, urng2_t && urange2, size_t const K, size_t const S) const + { + static_assert(std::ranges::viewable_range, + "The range parameter to views::opensyncmer cannot be a temporary of a non-view range."); + static_assert(std::ranges::forward_range, + "The range parameter to views::opensyncmer must model std::ranges::forward_range."); + + if (K == 1) // Would just return urange1 without any changes + throw std::invalid_argument{"The chosen K-mer and window size are not valid. " + "Please choose a value that satisfize the given condition."}; + + return opensyncmer_view{urange1, urange2, K, S}; + } +}; +//![adaptor_def] + +} // namespace seqan3::detail + +namespace seqan3::views +{ +/*!\brief Computes opensyncmers for a range of comparable values. A opensyncmer is a value that fullfills the + condition value % mod_used. + * \tparam urng_t The type of the first range being processed. See below for requirements. [template + * parameter is omitted in pipe notation] + * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] + * \param[in] mod_used The mod value used. + * \returns A range of std::totally_ordered where each value is ... See below for the + * properties of the returned range. + * \ingroup search_views + * + * + * ### View properties + * + * | Concepts and traits | `urng_t` (underlying range type) | `rrng_t` (returned range type) | + * |----------------------------------|:----------------------------------:|:--------------------------------:| + * | std::ranges::input_range | *required* | *preserved* | + * | std::ranges::forward_range | *required* | *preserved* | + * | std::ranges::bidirectional_range | | *lost* | + * | std::ranges::random_access_range | | *lost* | + * | std::ranges::contiguous_range | | *lost* | + * | | | | + * | std::ranges::viewable_range | *required* | *guaranteed* | + * | std::ranges::view | | *guaranteed* | + * | std::ranges::sized_range | | *lost* | + * | std::ranges::common_range | | *lost* | + * | std::ranges::output_range | | *lost* | + * | seqan3::const_iterable_range | | *preserved* | + * | | | | + * | std::ranges::range_reference_t | std::totally_ordered | std::totally_ordered | + * + * See the views views submodule documentation for detailed descriptions of the view properties. + */ +inline constexpr auto opensyncmer = detail::opensyncmer_fn{}; + +} // namespace seqan3::views diff --git a/include/syncmer.hpp b/include/syncmer.hpp index 1e5c7a4..eadca9d 100644 --- a/include/syncmer.hpp +++ b/include/syncmer.hpp @@ -377,7 +377,7 @@ class syncmer_view::basic_iterator window_values.push_back(window_value()); - auto smallest_s_it = std::ranges::min_element(window_values, std::less_equal{}); + auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); if (syncmer_position_offset == 0) { @@ -392,7 +392,6 @@ class syncmer_view::basic_iterator - } /*!\brief Calculates the next syncmer value. @@ -419,9 +418,11 @@ class syncmer_view::basic_iterator if (syncmer_position_offset == 0) { - auto smallest_s_it = std::ranges::min_element(window_values, std::less_equal{}); - syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); + auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); + + syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); + if (syncmer_position_offset == 0) { auto syncmer_it = urng2_iterator; From 8502f495e54fabdae6c0546f9b523077ddd2baa5 Mon Sep 17 00:00:00 2001 From: Hossein Eizadi Moghadam Date: Thu, 20 Jan 2022 18:40:29 +0100 Subject: [PATCH 05/16] [FIX] Some typos and comments fixed. --- include/minstrobe.hpp | 140 ++++++++++++++++++---------------------- include/opensyncmer.hpp | 72 ++++++++++++--------- include/syncmer.hpp | 92 ++++++++++++-------------- 3 files changed, 148 insertions(+), 156 deletions(-) diff --git a/include/minstrobe.hpp b/include/minstrobe.hpp index ac2bf83..dabb3da 100644 --- a/include/minstrobe.hpp +++ b/include/minstrobe.hpp @@ -32,7 +32,6 @@ namespace seqan3::detail * \tparam urng1_t The type of the underlying range, must model std::ranges::forward_range, the reference type must * model std::totally_ordered. The typical use case is that the reference type is the result of * seqan3::kmer_hash. - * \tparam measure_distance If true, then not the actual minstrobes are returned, but the distances of the minstrobes. * \implements std::ranges::view * \ingroup search_views * @@ -54,11 +53,12 @@ class minstrobe_view : public std::ranges::view_interface class basic_iterator; @@ -81,15 +81,13 @@ class minstrobe_view : public std::ranges::view_interface //!\cond requires (std::ranges::viewable_range && std::constructible_from>>) //!\endcond - minstrobe_view(other_urng1_t && urange1, size_t const window_min, size_t const window_max, size_t const n) : + minstrobe_view(other_urng1_t && urange1, size_t const window_min, size_t const window_max) : urange1{std::views::all(std::forward(urange1))}, window_min{window_min}, - window_max{window_max}, - n{n} + window_max{window_max} {} /*!\name Iterators @@ -134,8 +130,7 @@ class minstrobe_view : public std::ranges::view_interface::basic_iterator : minstrobe_value{std::move(it.minstrobe_value)}, urng1_iterator{std::move(it.urng1_iterator)}, urng1_sentinel{std::move(it.urng1_sentinel)} + {} /*!\brief Construct from begin and end iterators of a given range over std::totally_ordered values, and the number of values per window. * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. - * \param[in] window_min The minimum value to start the next window - * \param[in] window_max The maximum value to start the next window - * \param[in] n The order of minstrobes + * \param[in] window_min The lower offset for the position of the next window from the previous one. + * \param[in] window_max The upper offset for the position of the next window from the previous one. * * \details * * Looks at the number of values per window with two iterators. First iterator adds the next value in the tuple as * the first strobe. The second iterator adds the minimum value of the window to the second position of the tuple. - * + * */ basic_iterator(urng1_iterator_t urng1_iterator, urng1_sentinel_t urng1_sentinel, size_t window_min, - size_t window_max, - size_t n) : + size_t window_max) : urng1_iterator{std::move(urng1_iterator)}, urng1_sentinel{std::move(urng1_sentinel)} { @@ -319,20 +312,22 @@ class minstrobe_view::basic_iterator //!\brief The minstrobe value. value_type minstrobe_value{}; - //!\brief The offset relative to the beginning of the window where the minimizer value is found. + //!\brief The offset relative to the beginning of the window where the minstrobe value is found. size_t minstrobe_position_offset{}; - //!\brief Iterator to the first value of minstrobe. - urng1_iterator_t urng1_iterator{}; //!\brief Iterator to the right most value of the window. - urng1_iterator_t second_iterator{}; + urng1_iterator_t urng1_iterator{}; + //!\brief Iterator to the first value of minstrobe. + urng1_iterator_t first_iterator{}; //!\brief Iterator to last element in range. urng1_sentinel_t urng1_sentinel{}; //!\brief Stored values per window. It is necessary to store them, because a shift can remove the current minstrobe. std::deque window_values{}; + + //!\brief The number of values in one window. size_t window_size{}; - bool end{}; + //!\brief Increments iterator by 1. void next_unique_minstrobe() { @@ -344,11 +339,11 @@ class minstrobe_view::basic_iterator { return *urng1_iterator; } - - //!\brief Returns new window value of the second iterator. - auto second_window_value() const + + //!\brief Returns new window value of the first iterator. + auto first_window_value() const { - return *second_iterator; + return *first_iterator; } //!\brief Advances the window of the first iterator to the next position. @@ -356,35 +351,35 @@ class minstrobe_view::basic_iterator { ++urng1_iterator; } - - //!\brief Advances the window of the second iterator to the next position. - void advance_second_window() + + //!\brief Advances the window of the first iterator to the next position. + void advance_first_window() { - ++second_iterator; + ++first_iterator; } - + //!\brief Calculates minstrobes for the first window. - void window_first(size_t const window_min, size_t const window_max) + void window_first(const size_t window_min, const size_t window_max) { - - window_size = ( window_max - window_min + 1 ); - end = false; - + window_size = (window_max - window_min + 1); + if (window_size == 0u) return; - second_iterator = urng1_iterator + window_min; + + first_iterator = urng1_iterator; + std::advance(urng1_iterator, window_min); for (int i = 1u; i < window_size; ++i) { - window_values.push_back(second_window_value()); - advance_second_window(); + window_values.push_back(window_value()); + advance_window(); } - window_values.push_back(second_window_value()); + window_values.push_back(window_value()); auto minstrobe_it = std::ranges::min_element(window_values, std::less_equal{}); - minstrobe_value = std::make_tuple(window_value(), *minstrobe_it); + minstrobe_value = std::make_tuple(first_window_value(), *minstrobe_it); minstrobe_position_offset = std::distance(std::begin(window_values), minstrobe_it); - + } /*!\brief Calculates the next minstrobe value. @@ -395,27 +390,18 @@ class minstrobe_view::basic_iterator */ bool next_minstrobe() { - //for (std::vector vec : minstrobes){ - // for (value_type i : vec){ std::cout << i << std::endl;}; - //}; - advance_second_window(); + advance_first_window(); advance_window(); - if (second_iterator == urng1_sentinel){ - end = true; - return false; - }; - + if (urng1_iterator == urng1_sentinel) return true; - - if (end) return false; - value_t const new_value = window_value(); - value_t const sw_new_value = second_window_value(); + value_t const new_value = first_window_value(); + value_t const sw_new_value = window_value(); std::get<0>(minstrobe_value) = new_value; - + window_values.pop_front(); window_values.push_back(sw_new_value); @@ -425,16 +411,16 @@ class minstrobe_view::basic_iterator std::get<1>(minstrobe_value) = *minstrobe_it; minstrobe_position_offset = std::distance(std::begin(window_values), minstrobe_it); return true; - + } - + if (sw_new_value < std::get<1>(minstrobe_value)) { std::get<1>(minstrobe_value) = sw_new_value; minstrobe_position_offset = window_values.size() - 1; return true; } - + --minstrobe_position_offset; return true; } @@ -444,7 +430,7 @@ class minstrobe_view::basic_iterator //!\brief A deduction guide for the view class template. template -minstrobe_view(rng1_t &&, size_t const window_min, size_t const window_max, size_t const n) -> minstrobe_view>; +minstrobe_view(rng1_t &&, size_t const window_min, size_t const window_max) -> minstrobe_view>; @@ -458,9 +444,9 @@ minstrobe_view(rng1_t &&, size_t const window_min, size_t const window_max, size struct minstrobe_fn { //!\brief Store the number of values in one window and return a range adaptor closure object. - constexpr auto operator()(const size_t window_min, const size_t window_max, const size_t n) const + constexpr auto operator()(const size_t window_min, const size_t window_max) const { - return adaptor_from_functor{*this, window_min, window_max, n}; + return adaptor_from_functor{*this, window_min, window_max}; } /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many @@ -468,13 +454,12 @@ struct minstrobe_fn * \tparam urng1_t The type of the input range to process. Must model std::ranges::viewable_range. * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] window_min The minimum value to start the next window - * \param[in] window_max The maximum value to start the next window - * \param[in] n The order of minstrobes + * \param[in] window_min The lower offset for the position of the next window from the previous one. + * \param[in] window_max The upper offset for the position of the next window from the previous one. * \returns A range of converted values in tuples. */ template - constexpr auto operator()(urng1_t && urange1, size_t const window_min, size_t const window_max, size_t const n) const + constexpr auto operator()(urng1_t && urange1, size_t const window_min, size_t const window_max) const { static_assert(std::ranges::viewable_range, "The range parameter to views::minstrobe cannot be a temporary of a non-view range."); @@ -485,7 +470,7 @@ struct minstrobe_fn throw std::invalid_argument{"The chosen min and max window are not valid. " "Please choose a value greater than 1 or use two ranges."}; - return minstrobe_view{urange1, window_min, window_max, n}; + return minstrobe_view{urange1, window_min, window_max}; } }; //![adaptor_def] @@ -499,7 +484,8 @@ namespace seqan3::views * \tparam urng_t The type of the first range being processed. See below for requirements. [template * parameter is omitted in pipe notation] * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] - * \param[in] mod_used The mod value used. + * \param[in] window_min The lower offset for the position of the next window from the previous one. + * \param[in] window_max The upper offset for the position of the next window from the previous one. * \returns A range of std::totally_ordered where each value is ... See below for the * properties of the returned range. * \ingroup search_views diff --git a/include/opensyncmer.hpp b/include/opensyncmer.hpp index a928726..58f1f2c 100644 --- a/include/opensyncmer.hpp +++ b/include/opensyncmer.hpp @@ -14,7 +14,6 @@ #include #include -#include #include #include @@ -32,7 +31,6 @@ namespace seqan3::detail * \tparam urng1_t The type of the underlying range, must model std::ranges::forward_range, the reference type must * model std::totally_ordered. The typical use case is that the reference type is the result of * seqan3::kmer_hash. - * \tparam measure_distance If true, then not the actual opensyncmers are returned, but the distances of the opensyncmers. * \implements std::ranges::view * \ingroup search_views * @@ -57,11 +55,12 @@ class opensyncmer_view : public std::ranges::view_interface class basic_iterator; @@ -85,7 +84,10 @@ class opensyncmer_view : public std::ranges::view_interface //!\cond @@ -185,6 +186,7 @@ class opensyncmer_view::basic_iterator using urng1_sentinel_t = maybe_const_sentinel_t; //!\brief The iterator type of the first underlying range. using urng1_iterator_t = maybe_const_iterator_t; + //!\brief The iterator type of the second underlying range. using urng2_iterator_t = maybe_const_iterator_t; template @@ -232,8 +234,10 @@ class opensyncmer_view::basic_iterator /*!\brief Construct from begin and end iterators of a given range over std::totally_ordered values, and the number of values per window. * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. + * \param[in] urng2_iterator Iterator pointing to the first position of the first std::totally_ordered range. * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. - * \param[in] mod_used The number of values in one window. + * \param[in] K The k-mer size used. + * \param[in] S The s-mer size used. * * \details * @@ -320,16 +324,19 @@ class opensyncmer_view::basic_iterator //!\brief The opensyncmer value. value_type opensyncmer_value{}; - //!\brief The offset relative to the beginning of the window where the minimizer value is found. + //!\brief The offset relative to the beginning of the window where the opensyncmer value is found. size_t opensyncmer_position_offset{}; - //!\brief Iterator to the rightmost value of one window. + //!\brief Iterator to the rightmost value of one kmer. urng1_iterator_t urng1_iterator{}; + + //!\brief Iterator to the rightmost value of one kmer in the second range. urng2_iterator_t urng2_iterator{}; //!brief Iterator to last element in range. urng1_sentinel_t urng1_sentinel{}; - //!\brief Iterator to the rightmost value of one window of the second range. + + //!\brief The number of values in one window. size_t w_size{}; //!\brief Stored values per window. It is necessary to store them, because a shift can remove the current opensyncmer. @@ -354,8 +361,9 @@ class opensyncmer_view::basic_iterator ++urng1_iterator; ++urng2_iterator; } - - void advance_it_1() + + //!\brief Advances the first window to the next position. + void advance_first_window() { ++urng1_iterator; } @@ -368,11 +376,11 @@ class opensyncmer_view::basic_iterator if (w_size == 0u) return; - + for (int i = 1u; i < K - 1 ; ++i) { window_values.push_back(window_value()); - advance_it_1(); + advance_first_window(); } window_values.push_back(window_value()); @@ -395,9 +403,6 @@ class opensyncmer_view::basic_iterator */ bool next_opensyncmer() { - //for (std::vector vec : opensyncmers){ - // for (value_type i : vec){ std::cout << i << std::endl;}; - //}; advance_window(); @@ -413,9 +418,9 @@ class opensyncmer_view::basic_iterator { auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); - + opensyncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); - + if (opensyncmer_position_offset == 0) { auto opensyncmer_it = urng2_iterator; @@ -470,9 +475,13 @@ struct opensyncmer_fn /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many * values one window contains. * \tparam urng1_t The type of the input range to process. Must model std::ranges::viewable_range. + * \tparam urng2_t The type of the input range to process. Must model std::ranges::viewable_range. * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] mod_used The number of values in one window. + * \param[in] urange2 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] K The k-mer size used. + * \param[in] S The s-mer size used. * \returns A range of converted values. */ template @@ -483,8 +492,8 @@ struct opensyncmer_fn static_assert(std::ranges::forward_range, "The range parameter to views::opensyncmer must model std::ranges::forward_range."); - if (K == 1) // Would just return urange1 without any changes - throw std::invalid_argument{"The chosen K-mer and window size are not valid. " + if (K < 1 || S < 0) // Would just return urange1 without any changes + throw std::invalid_argument{"The chosen K-mer or S-mer are not valid. " "Please choose a value that satisfize the given condition."}; return opensyncmer_view{urange1, urange2, K, S}; @@ -496,12 +505,15 @@ struct opensyncmer_fn namespace seqan3::views { -/*!\brief Computes opensyncmers for a range of comparable values. A opensyncmer is a value that fullfills the - condition value % mod_used. +/*!\brief Computes opensyncmers for a range of comparable values. A opensyncmer is a kmer that has the its smallest smer at its start or end. * \tparam urng_t The type of the first range being processed. See below for requirements. [template * parameter is omitted in pipe notation] - * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] - * \param[in] mod_used The mod value used. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] urange2 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] K The k-mer size used. + * \param[in] S The s-mer size used. * \returns A range of std::totally_ordered where each value is ... See below for the * properties of the returned range. * \ingroup search_views diff --git a/include/syncmer.hpp b/include/syncmer.hpp index eadca9d..5d95083 100644 --- a/include/syncmer.hpp +++ b/include/syncmer.hpp @@ -14,7 +14,6 @@ #include #include -#include #include #include @@ -32,7 +31,6 @@ namespace seqan3::detail * \tparam urng1_t The type of the underlying range, must model std::ranges::forward_range, the reference type must * model std::totally_ordered. The typical use case is that the reference type is the result of * seqan3::kmer_hash. - * \tparam measure_distance If true, then not the actual syncmers are returned, but the distances of the syncmers. * \implements std::ranges::view * \ingroup search_views * @@ -57,11 +55,12 @@ class syncmer_view : public std::ranges::view_interface class basic_iterator; @@ -85,7 +84,10 @@ class syncmer_view : public std::ranges::view_interface //!\cond @@ -185,6 +186,7 @@ class syncmer_view::basic_iterator using urng1_sentinel_t = maybe_const_sentinel_t; //!\brief The iterator type of the first underlying range. using urng1_iterator_t = maybe_const_iterator_t; + //!\brief The iterator type of the second underlying range. using urng2_iterator_t = maybe_const_iterator_t; template @@ -232,8 +234,10 @@ class syncmer_view::basic_iterator /*!\brief Construct from begin and end iterators of a given range over std::totally_ordered values, and the number of values per window. * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. + * \param[in] urng2_iterator Iterator pointing to the first position of the first std::totally_ordered range. * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. - * \param[in] mod_used The number of values in one window. + * \param[in] K The k-mer size used. + * \param[in] S The s-mer size used. * * \details * @@ -320,16 +324,19 @@ class syncmer_view::basic_iterator //!\brief The syncmer value. value_type syncmer_value{}; - //!\brief The offset relative to the beginning of the window where the minimizer value is found. + //!\brief The offset relative to the beginning of the window where the syncmer value is found. size_t syncmer_position_offset{}; - //!\brief Iterator to the rightmost value of one window. + //!\brief Iterator to the rightmost value of one kmer. urng1_iterator_t urng1_iterator{}; + + //!\brief Iterator to the rightmost value of one kmer in the second range. urng2_iterator_t urng2_iterator{}; //!brief Iterator to last element in range. urng1_sentinel_t urng1_sentinel{}; - //!\brief Iterator to the rightmost value of one window of the second range. + + //!\brief The number of values in one window. size_t w_size{}; //!\brief Stored values per window. It is necessary to store them, because a shift can remove the current syncmer. @@ -348,14 +355,15 @@ class syncmer_view::basic_iterator } - //!\brief Advances the window to the next position. + //!\brief Advances both windows to the next position. void advance_window() { ++urng1_iterator; ++urng2_iterator; } - - void advance_it_1() + + //!\brief Advances the first window to the next position. + void advance_first_window() { ++urng1_iterator; } @@ -368,11 +376,11 @@ class syncmer_view::basic_iterator if (w_size == 0u) return; - + for (int i = 1u; i < K - 1 ; ++i) { window_values.push_back(window_value()); - advance_it_1(); + advance_first_window(); } window_values.push_back(window_value()); @@ -380,18 +388,11 @@ class syncmer_view::basic_iterator auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); - if (syncmer_position_offset == 0) { + if (syncmer_position_offset == 0 || syncmer_position_offset == w_size - 1 ) { auto syncmer_it = urng2_iterator; syncmer_value = *syncmer_it; } - else if ( syncmer_position_offset == w_size - 1 ) { - auto syncmer_it = urng2_iterator; - syncmer_value = *syncmer_it; - }; - - - } /*!\brief Calculates the next syncmer value. @@ -402,9 +403,6 @@ class syncmer_view::basic_iterator */ bool next_syncmer() { - //for (std::vector vec : syncmers){ - // for (value_type i : vec){ std::cout << i << std::endl;}; - //}; advance_window(); @@ -418,24 +416,15 @@ class syncmer_view::basic_iterator if (syncmer_position_offset == 0) { - auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); - syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); - - if (syncmer_position_offset == 0) { - auto syncmer_it = urng2_iterator; - syncmer_value = *syncmer_it; - return true; - }; + if (syncmer_position_offset == 0 || syncmer_position_offset == w_size - 1) { - if ( syncmer_position_offset == w_size - 1 ) { auto syncmer_it = urng2_iterator; syncmer_value = *syncmer_it; return true; }; - } else if (new_value < *(window_values.begin()+(syncmer_position_offset-1))) @@ -452,8 +441,6 @@ class syncmer_view::basic_iterator return true; }; - - --syncmer_position_offset; return false; } @@ -486,9 +473,13 @@ struct syncmer_fn /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many * values one window contains. * \tparam urng1_t The type of the input range to process. Must model std::ranges::viewable_range. + * \tparam urng2_t The type of the input range to process. Must model std::ranges::viewable_range. * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] mod_used The number of values in one window. + * \param[in] urange2 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] K The k-mer size used. + * \param[in] S The s-mer size used. * \returns A range of converted values. */ template @@ -499,8 +490,8 @@ struct syncmer_fn static_assert(std::ranges::forward_range, "The range parameter to views::syncmer must model std::ranges::forward_range."); - if (K == 1) // Would just return urange1 without any changes - throw std::invalid_argument{"The chosen K-mer and window size are not valid. " + if (K < 1 || S < 0) // Would just return urange1 without any changes + throw std::invalid_argument{"The chosen K-mer or S-mer are not valid. " "Please choose a value that satisfize the given condition."}; return syncmer_view{urange1, urange2, K, S}; @@ -512,12 +503,15 @@ struct syncmer_fn namespace seqan3::views { -/*!\brief Computes syncmers for a range of comparable values. A syncmer is a value that fullfills the - condition value % mod_used. +/*!\brief Computes syncmers for a range of comparable values. A syncmer is a kmer that has the its smallest smer at its start or end. * \tparam urng_t The type of the first range being processed. See below for requirements. [template * parameter is omitted in pipe notation] - * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] - * \param[in] mod_used The mod value used. + * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] urange2 The input range to process. Must model std::ranges::viewable_range and + * std::ranges::forward_range. + * \param[in] K The k-mer size used. + * \param[in] S The s-mer size used. * \returns A range of std::totally_ordered where each value is ... See below for the * properties of the returned range. * \ingroup search_views From 27b73fa99024be38210c2a08e10e24914b7c73a2 Mon Sep 17 00:00:00 2001 From: Hossein Eizadi Moghadam Date: Mon, 24 Jan 2022 16:27:29 +0100 Subject: [PATCH 06/16] [FIX] Some typos fixed --- include/minstrobe.hpp | 19 +++++++++---------- include/opensyncmer.hpp | 4 ++-- include/syncmer.hpp | 2 +- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/include/minstrobe.hpp b/include/minstrobe.hpp index dabb3da..c9c264c 100644 --- a/include/minstrobe.hpp +++ b/include/minstrobe.hpp @@ -47,16 +47,16 @@ class minstrobe_view : public std::ranges::view_interface>, "The reference type of the underlying range must model std::totally_ordered."); - //!\brief Whether the given ranges are const_iterable + //!\brief Whether the given ranges are const_iterable. static constexpr bool const_iterable = seqan3::const_iterable_range; //!\brief The first underlying range. urng1_t urange1{}; - //!\brief lower offset for the position of the next window + //!\brief lower offset for the position of the next window. size_t window_min{}; - //!\brief upper offset for the position of the next window + //!\brief upper offset for the position of the next window. size_t window_max{}; template @@ -353,9 +353,10 @@ class minstrobe_view::basic_iterator } //!\brief Advances the window of the first iterator to the next position. - void advance_first_window() + void advance_all_windows() { ++first_iterator; + ++urng1_iterator; } //!\brief Calculates minstrobes for the first window. @@ -390,8 +391,7 @@ class minstrobe_view::basic_iterator */ bool next_minstrobe() { - advance_first_window(); - advance_window(); + advance_all_windows(); if (urng1_iterator == urng1_sentinel) return true; @@ -443,7 +443,7 @@ minstrobe_view(rng1_t &&, size_t const window_min, size_t const window_max) -> m //!\ingroup search_views struct minstrobe_fn { - //!\brief Store the number of values in one window and return a range adaptor closure object. + //!\brief Store the number of values in two windows and return a range adaptor closure object. constexpr auto operator()(const size_t window_min, const size_t window_max) const { return adaptor_from_functor{*this, window_min, window_max}; @@ -467,7 +467,7 @@ struct minstrobe_fn "The range parameter to views::minstrobe must model std::ranges::forward_range."); if (window_max - window_min == 0) // Would just return urange1 without any changes - throw std::invalid_argument{"The chosen min and max window are not valid. " + throw std::invalid_argument{"The chosen min and max windows are not valid. " "Please choose a value greater than 1 or use two ranges."}; return minstrobe_view{urange1, window_min, window_max}; @@ -479,8 +479,7 @@ struct minstrobe_fn namespace seqan3::views { -/*!\brief Computes minstrobes for a range of comparable values. A minstrobe is a value that fullfills the - condition value % mod_used. +/*!\brief Computes minstrobes for a range of comparable values. A minstrobe is a value that is composed of a few single strobes concatenated together, chosen window_min elements apart based on their minimum value in a window. * \tparam urng_t The type of the first range being processed. See below for requirements. [template * parameter is omitted in pipe notation] * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] diff --git a/include/opensyncmer.hpp b/include/opensyncmer.hpp index 58f1f2c..8f70b36 100644 --- a/include/opensyncmer.hpp +++ b/include/opensyncmer.hpp @@ -49,7 +49,7 @@ class opensyncmer_view : public std::ranges::view_interface>, "The reference type of the underlying range must model std::totally_ordered."); - //!\brief Whether the given ranges are const_iterable + //!\brief Whether the given ranges are const_iterable. static constexpr bool const_iterable = seqan3::const_iterable_range; static constexpr bool const_iterable2 = seqan3::const_iterable_range; @@ -505,7 +505,7 @@ struct opensyncmer_fn namespace seqan3::views { -/*!\brief Computes opensyncmers for a range of comparable values. A opensyncmer is a kmer that has the its smallest smer at its start or end. +/*!\brief Computes opensyncmers for a range of comparable values. A opensyncmer is a kmer that has the its smallest smer at its start. * \tparam urng_t The type of the first range being processed. See below for requirements. [template * parameter is omitted in pipe notation] * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and diff --git a/include/syncmer.hpp b/include/syncmer.hpp index 5d95083..0c84dab 100644 --- a/include/syncmer.hpp +++ b/include/syncmer.hpp @@ -49,7 +49,7 @@ class syncmer_view : public std::ranges::view_interface>, "The reference type of the underlying range must model std::totally_ordered."); - //!\brief Whether the given ranges are const_iterable + //!\brief Whether the given ranges are const_iterable. static constexpr bool const_iterable = seqan3::const_iterable_range; static constexpr bool const_iterable2 = seqan3::const_iterable_range; From 264ae8bb98c934abd2694ba15aea1d327ee932ef Mon Sep 17 00:00:00 2001 From: Hossein Eizadi Moghadam Date: Mon, 24 Jan 2022 22:20:48 +0100 Subject: [PATCH 07/16] [FIX] problems resolved. --- include/minstrobe.hpp | 67 +++++++------- include/opensyncmer.hpp | 144 +++++++++++++++--------------- include/syncmer.hpp | 191 ++++++++++++++++++++++------------------ 3 files changed, 210 insertions(+), 192 deletions(-) diff --git a/include/minstrobe.hpp b/include/minstrobe.hpp index c9c264c..f58f0ec 100644 --- a/include/minstrobe.hpp +++ b/include/minstrobe.hpp @@ -347,33 +347,27 @@ class minstrobe_view::basic_iterator } //!\brief Advances the window of the first iterator to the next position. - void advance_window() - { - ++urng1_iterator; - } - - //!\brief Advances the window of the first iterator to the next position. - void advance_all_windows() + void advance_windows() { ++first_iterator; - ++urng1_iterator; + ++urng1_iterator; } //!\brief Calculates minstrobes for the first window. void window_first(const size_t window_min, const size_t window_max) { - window_size = (window_max - window_min + 1); + window_size = (window_max - window_min + 1); - if (window_size == 0u) + if (window_size == 0u) return; - first_iterator = urng1_iterator; - std::advance(urng1_iterator, window_min); + first_iterator = urng1_iterator; + std::advance(urng1_iterator, window_min); for (int i = 1u; i < window_size; ++i) { window_values.push_back(window_value()); - advance_window(); + ++urng1_iterator; } window_values.push_back(window_value()); @@ -391,7 +385,7 @@ class minstrobe_view::basic_iterator */ bool next_minstrobe() { - advance_all_windows(); + advance_windows(); if (urng1_iterator == urng1_sentinel) return true; @@ -399,30 +393,28 @@ class minstrobe_view::basic_iterator value_t const new_value = first_window_value(); value_t const sw_new_value = window_value(); - std::get<0>(minstrobe_value) = new_value; window_values.pop_front(); window_values.push_back(sw_new_value); - if (minstrobe_position_offset == 0) - { - auto minstrobe_it = std::ranges::min_element(window_values, std::less_equal{}); - std::get<1>(minstrobe_value) = *minstrobe_it; - minstrobe_position_offset = std::distance(std::begin(window_values), minstrobe_it); - return true; - - } + if (minstrobe_position_offset == 0) + { + auto minstrobe_it = std::ranges::min_element(window_values, std::less_equal{}); + std::get<1>(minstrobe_value) = *minstrobe_it; + minstrobe_position_offset = std::distance(std::begin(window_values), minstrobe_it); + return true; + } - if (sw_new_value < std::get<1>(minstrobe_value)) - { - std::get<1>(minstrobe_value) = sw_new_value; - minstrobe_position_offset = window_values.size() - 1; - return true; - } + if (sw_new_value < std::get<1>(minstrobe_value)) + { + std::get<1>(minstrobe_value) = sw_new_value; + minstrobe_position_offset = window_values.size() - 1; + return true; + } - --minstrobe_position_offset; - return true; + --minstrobe_position_offset; + return true; } }; @@ -466,9 +458,9 @@ struct minstrobe_fn static_assert(std::ranges::forward_range, "The range parameter to views::minstrobe must model std::ranges::forward_range."); - if (window_max - window_min == 0) // Would just return urange1 without any changes + if (window_max <= window_min) // Would just return urange1 without any changes throw std::invalid_argument{"The chosen min and max windows are not valid. " - "Please choose a value greater than 1 or use two ranges."}; + "Window_max should be greater than window_min."}; return minstrobe_view{urange1, window_min, window_max}; } @@ -479,7 +471,8 @@ struct minstrobe_fn namespace seqan3::views { -/*!\brief Computes minstrobes for a range of comparable values. A minstrobe is a value that is composed of a few single strobes concatenated together, chosen window_min elements apart based on their minimum value in a window. +/*!\brief Computes minstrobes for a range of comparable values. A minstrobe consists of a starting strobe + * concatenated with n−1 consecutively concatenated minimizers. * \tparam urng_t The type of the first range being processed. See below for requirements. [template * parameter is omitted in pipe notation] * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] @@ -489,6 +482,12 @@ namespace seqan3::views * properties of the returned range. * \ingroup search_views * + * \details + * + * A minstrobe defined by [Sahlin K.](https://genome.cshlp.org/content/31/11/2080.full.pdf) consists of + * a starting strobe concatenated with n−1 consecutively concatenated minimizers in their respective windows. + * For example for the following list of hash values `[6, 26, 41, 38, 24, 33, 6, 27, 47]` and 3 as `window_min`, + * 5 as `window_max`, the minstrobe values are `[(6,24),(26,6),(41,6),(38,6)]`. * * ### View properties * diff --git a/include/opensyncmer.hpp b/include/opensyncmer.hpp index 8f70b36..c02ab4b 100644 --- a/include/opensyncmer.hpp +++ b/include/opensyncmer.hpp @@ -45,9 +45,9 @@ class opensyncmer_view : public std::ranges::view_interface, "The opensyncmer_view only works on forward_ranges."); static_assert(std::ranges::forward_range, "The opensyncmer_view only works on forward_ranges."); static_assert(std::totally_ordered>, - "The reference type of the underlying range must model std::totally_ordered."); + "The reference type of the first underlying range must model std::totally_ordered."); static_assert(std::totally_ordered>, - "The reference type of the underlying range must model std::totally_ordered."); + "The reference type of the second underlying range must model std::totally_ordered."); //!\brief Whether the given ranges are const_iterable. static constexpr bool const_iterable = seqan3::const_iterable_range; @@ -58,9 +58,9 @@ class opensyncmer_view : public std::ranges::view_interface class basic_iterator; @@ -84,23 +84,23 @@ class opensyncmer_view : public std::ranges::view_interface //!\cond @@ -109,11 +109,11 @@ class opensyncmer_view : public std::ranges::view_interface && std::constructible_from>>) //!\endcond - opensyncmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const K, size_t const S) : + opensyncmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const kmer_size, size_t const smer_size) : urange1{std::views::all(std::forward(urange1))}, urange2{std::views::all(std::forward(urange2))}, - K{K}, - S{S} + kmer_size{kmer_size}, + smer_size{smer_size} {} /*!\name Iterators @@ -137,8 +137,8 @@ class opensyncmer_view : public std::ranges::view_interface::basic_iterator /*!\brief Construct from begin and end iterators of a given range over std::totally_ordered values, and the number of values per window. * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. - * \param[in] urng2_iterator Iterator pointing to the first position of the first std::totally_ordered range. + * \param[in] urng2_iterator Iterator pointing to the first position of the second std::totally_ordered range. * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. - * \param[in] K The k-mer size used. - * \param[in] S The s-mer size used. + * \param[in] kmer_size The k-mer size used. + * \param[in] smer_size The s-mer size used. * * \details * @@ -248,13 +248,13 @@ class opensyncmer_view::basic_iterator basic_iterator(urng1_iterator_t urng1_iterator, urng2_iterator_t urng2_iterator, urng1_sentinel_t urng1_sentinel, - size_t K, - size_t S) : + size_t kmer_size, + size_t smer_size) : urng1_iterator{std::move(urng1_iterator)}, urng2_iterator{std::move(urng2_iterator)}, urng1_sentinel{std::move(urng1_sentinel)} { - window_first(K, S); + window_first(kmer_size, smer_size); } //!\} @@ -370,14 +370,14 @@ class opensyncmer_view::basic_iterator //!\brief Calculates opensyncmers for the first window. - void window_first(const size_t K, const size_t S) + void window_first(const size_t kmer_size, const size_t smer_size) { - w_size = K - S + 1; + w_size = kmer_size - smer_size + 1; - if (w_size == 0u) + if (w_size == 0u) return; - for (int i = 1u; i < K - 1 ; ++i) + for (int i = 1u; i < kmer_size - 1 ; ++i) { window_values.push_back(window_value()); advance_first_window(); @@ -386,12 +386,12 @@ class opensyncmer_view::basic_iterator auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); - opensyncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); + opensyncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); - if (opensyncmer_position_offset == 0) { - auto opensyncmer_it = urng2_iterator; - opensyncmer_value = *opensyncmer_it; - } + if (opensyncmer_position_offset == 0) { + auto opensyncmer_it = urng2_iterator; + opensyncmer_value = *opensyncmer_it; + } } @@ -403,7 +403,7 @@ class opensyncmer_view::basic_iterator */ bool next_opensyncmer() { - advance_window(); + advance_window(); if (urng1_iterator == urng1_sentinel) @@ -414,37 +414,37 @@ class opensyncmer_view::basic_iterator window_values.pop_front(); window_values.push_back(new_value); - if (opensyncmer_position_offset == 0) - { + if (opensyncmer_position_offset == 0) + { - auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); + auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); - opensyncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); + opensyncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); - if (opensyncmer_position_offset == 0) { + if (opensyncmer_position_offset == 0) { - auto opensyncmer_it = urng2_iterator; - opensyncmer_value = *opensyncmer_it; - return true; - }; - } + auto opensyncmer_it = urng2_iterator; + opensyncmer_value = *opensyncmer_it; + return true; + }; + } - else if (new_value < *(window_values.begin()+(opensyncmer_position_offset-1))) - { - opensyncmer_position_offset = w_size - 1; - return false; - } - else if (opensyncmer_position_offset == 1){ - auto opensyncmer_it = urng2_iterator; - opensyncmer_value = *opensyncmer_it; - --opensyncmer_position_offset; - return true; - }; + else if (new_value < *(window_values.begin()+(opensyncmer_position_offset-1))) + { + opensyncmer_position_offset = w_size - 1; + return false; + } + else if (opensyncmer_position_offset == 1){ + auto opensyncmer_it = urng2_iterator; + opensyncmer_value = *opensyncmer_it; + --opensyncmer_position_offset; + return true; + }; - --opensyncmer_position_offset; - return false; + --opensyncmer_position_offset; + return false; } }; @@ -452,7 +452,7 @@ class opensyncmer_view::basic_iterator //!\brief A deduction guide for the view class template. template -opensyncmer_view(rng1_t &&, rng2_t &&, size_t const K, size_t const S) -> opensyncmer_view, std::views::all_t>; +opensyncmer_view(rng1_t &&, rng2_t &&, size_t const kmer_size, size_t const smer_size) -> opensyncmer_view, std::views::all_t>; @@ -467,36 +467,36 @@ struct opensyncmer_fn { //!\brief Store the number of values in one window and return a range adaptor closure object. template - constexpr auto operator()(urng2_t urange2, const size_t K, const size_t S) const + constexpr auto operator()(urng2_t urange2, const size_t kmer_size, const size_t smer_size) const { - return adaptor_from_functor{*this, urange2, K, S}; + return adaptor_from_functor{*this, urange2, kmer_size, smer_size}; } /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many * values one window contains. - * \tparam urng1_t The type of the input range to process. Must model std::ranges::viewable_range. - * \tparam urng2_t The type of the input range to process. Must model std::ranges::viewable_range. + * \tparam urng1_t The type of the first input range to process. Must model std::ranges::viewable_range. + * \tparam urng2_t The type of the second input range to process. Must model std::ranges::viewable_range. * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] urange2 The input range to process. Must model std::ranges::viewable_range and + * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] K The k-mer size used. - * \param[in] S The s-mer size used. + * \param[in] kmer_size The k-mer size used. + * \param[in] smer_size The s-mer size used. * \returns A range of converted values. */ template - constexpr auto operator()(urng1_t && urange1, urng2_t && urange2, size_t const K, size_t const S) const + constexpr auto operator()(urng1_t && urange1, urng2_t && urange2, size_t const kmer_size, size_t const smer_size) const { static_assert(std::ranges::viewable_range, "The range parameter to views::opensyncmer cannot be a temporary of a non-view range."); static_assert(std::ranges::forward_range, "The range parameter to views::opensyncmer must model std::ranges::forward_range."); - if (K < 1 || S < 0) // Would just return urange1 without any changes + if (kmer_size < 1 || smer_size < 0) // Would just return urange1 without any changes throw std::invalid_argument{"The chosen K-mer or S-mer are not valid. " "Please choose a value that satisfize the given condition."}; - return opensyncmer_view{urange1, urange2, K, S}; + return opensyncmer_view{urange1, urange2, kmer_size, smer_size}; } }; //![adaptor_def] @@ -510,10 +510,10 @@ namespace seqan3::views * parameter is omitted in pipe notation] * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] urange2 The input range to process. Must model std::ranges::viewable_range and + * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] K The k-mer size used. - * \param[in] S The s-mer size used. + * \param[in] kmer_size The k-mer size used. + * \param[in] smer_size The s-mer size used. * \returns A range of std::totally_ordered where each value is ... See below for the * properties of the returned range. * \ingroup search_views diff --git a/include/syncmer.hpp b/include/syncmer.hpp index 0c84dab..ce2c4f4 100644 --- a/include/syncmer.hpp +++ b/include/syncmer.hpp @@ -31,6 +31,7 @@ namespace seqan3::detail * \tparam urng1_t The type of the underlying range, must model std::ranges::forward_range, the reference type must * model std::totally_ordered. The typical use case is that the reference type is the result of * seqan3::kmer_hash. + * \tparam opensyncmer If false, syncmers are used but if ture, open-syncmers are used. * \implements std::ranges::view * \ingroup search_views * @@ -38,16 +39,16 @@ namespace seqan3::detail * \note Most members of this class are generated by std::ranges::view_interface which is not yet documented here. */ -template +template class syncmer_view : public std::ranges::view_interface> { private: static_assert(std::ranges::forward_range, "The syncmer_view only works on forward_ranges."); static_assert(std::ranges::forward_range, "The syncmer_view only works on forward_ranges."); static_assert(std::totally_ordered>, - "The reference type of the underlying range must model std::totally_ordered."); + "The reference type of the first underlying range must model std::totally_ordered."); static_assert(std::totally_ordered>, - "The reference type of the underlying range must model std::totally_ordered."); + "The reference type of the second underlying range must model std::totally_ordered."); //!\brief Whether the given ranges are const_iterable. static constexpr bool const_iterable = seqan3::const_iterable_range; @@ -58,9 +59,9 @@ class syncmer_view : public std::ranges::view_interface class basic_iterator; @@ -82,25 +83,25 @@ class syncmer_view : public std::ranges::view_interface //!\cond @@ -109,11 +110,11 @@ class syncmer_view : public std::ranges::view_interface && std::constructible_from>>) //!\endcond - syncmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const K, size_t const S) : + syncmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const kmer_size, size_t const smer_size) : urange1{std::views::all(std::forward(urange1))}, urange2{std::views::all(std::forward(urange2))}, - K{K}, - S{S} + kmer_size{kmer_size}, + smer_size{smer_size} {} /*!\name Iterators @@ -137,8 +138,8 @@ class syncmer_view : public std::ranges::view_interface +template template -class syncmer_view::basic_iterator +class syncmer_view::basic_iterator { private: //!\brief The sentinel type of the first underlying range. @@ -234,10 +235,10 @@ class syncmer_view::basic_iterator /*!\brief Construct from begin and end iterators of a given range over std::totally_ordered values, and the number of values per window. * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. - * \param[in] urng2_iterator Iterator pointing to the first position of the first std::totally_ordered range. + * \param[in] urng2_iterator Iterator pointing to the first position of the second std::totally_ordered range. * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. - * \param[in] K The k-mer size used. - * \param[in] S The s-mer size used. + * \param[in] kmer_size The k-mer size used. + * \param[in] smer_size The s-mer size used. * * \details * @@ -248,13 +249,13 @@ class syncmer_view::basic_iterator basic_iterator(urng1_iterator_t urng1_iterator, urng2_iterator_t urng2_iterator, urng1_sentinel_t urng1_sentinel, - size_t K, - size_t S) : + size_t kmer_size, + size_t smer_size) : urng1_iterator{std::move(urng1_iterator)}, urng2_iterator{std::move(urng2_iterator)}, urng1_sentinel{std::move(urng1_sentinel)} { - window_first(K, S); + window_first(kmer_size, smer_size); } //!\} @@ -370,14 +371,14 @@ class syncmer_view::basic_iterator //!\brief Calculates syncmers for the first window. - void window_first(const size_t K, const size_t S) + void window_first(const size_t kmer_size, const size_t smer_size) { - w_size = K - S + 1; + w_size = kmer_size - smer_size + 1; - if (w_size == 0u) + if (w_size == 0u) return; - for (int i = 1u; i < K - 1 ; ++i) + for (int i = 1u; i < kmer_size - 1 ; ++i) { window_values.push_back(window_value()); advance_first_window(); @@ -386,12 +387,20 @@ class syncmer_view::basic_iterator auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); - syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); + syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); - if (syncmer_position_offset == 0 || syncmer_position_offset == w_size - 1 ) { - auto syncmer_it = urng2_iterator; - syncmer_value = *syncmer_it; - } + + if (opensyncmer && syncmer_position_offset == 0) + { + auto syncmer_it = urng2_iterator; + syncmer_value = *syncmer_it; + } + + else if (!opensyncmer && syncmer_position_offset == 0 || syncmer_position_offset == w_size - 1 ) + { + auto syncmer_it = urng2_iterator; + syncmer_value = *syncmer_it; + } } @@ -403,7 +412,7 @@ class syncmer_view::basic_iterator */ bool next_syncmer() { - advance_window(); + advance_window(); if (urng1_iterator == urng1_sentinel) @@ -414,35 +423,43 @@ class syncmer_view::basic_iterator window_values.pop_front(); window_values.push_back(new_value); - if (syncmer_position_offset == 0) - { - auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); - syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); - - if (syncmer_position_offset == 0 || syncmer_position_offset == w_size - 1) { - - auto syncmer_it = urng2_iterator; - syncmer_value = *syncmer_it; - return true; - }; - } - - else if (new_value < *(window_values.begin()+(syncmer_position_offset-1))) - { - syncmer_position_offset = w_size - 1; - auto syncmer_it = urng2_iterator; - syncmer_value = *syncmer_it; - return true; - } - else if (syncmer_position_offset == 1){ - auto syncmer_it = urng2_iterator; - syncmer_value = *syncmer_it; - --syncmer_position_offset; - return true; - }; - - --syncmer_position_offset; - return false; + if (syncmer_position_offset == 0) + { + auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); + syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); + + if (opensyncmer && syncmer_position_offset == 0) + { + auto syncmer_it = urng2_iterator; + syncmer_value = *syncmer_it; + return true; + } + + else if (!opensyncmer && syncmer_position_offset == 0 || syncmer_position_offset == w_size - 1 ) + { + auto syncmer_it = urng2_iterator; + syncmer_value = *syncmer_it; + return true; + } + } + + else if (!opensyncmer && new_value < *(window_values.begin()+(syncmer_position_offset-1))) + { + syncmer_position_offset = w_size - 1; + auto syncmer_it = urng2_iterator; + syncmer_value = *syncmer_it; + return true; + } + else if (syncmer_position_offset == 1) + { + auto syncmer_it = urng2_iterator; + syncmer_value = *syncmer_it; + --syncmer_position_offset; + return true; + }; + + --syncmer_position_offset; + return false; } }; @@ -450,9 +467,10 @@ class syncmer_view::basic_iterator //!\brief A deduction guide for the view class template. template -syncmer_view(rng1_t &&, rng2_t &&, size_t const K, size_t const S) -> syncmer_view, std::views::all_t>; - +syncmer_view(rng1_t &&, rng2_t &&, size_t const kmer_size, size_t const smer_size) -> syncmer_view, std::views::all_t>; +template +syncmer_view(rng1_t &&, rng2_t &&, size_t const kmer_size, size_t const smer_size) -> syncmer_view, std::views::all_t, opensyncmer>; // --------------------------------------------------------------------------------------------------------------------- // syncmer_fn (adaptor definition) @@ -465,36 +483,36 @@ struct syncmer_fn { //!\brief Store the number of values in one window and return a range adaptor closure object. template - constexpr auto operator()(urng2_t urange2, const size_t K, const size_t S) const + constexpr auto operator()(urng2_t urange2, const size_t kmer_size, const size_t smer_size) const { - return adaptor_from_functor{*this, urange2, K, S}; + return adaptor_from_functor{*this, urange2, kmer_size, smer_size}; } /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many * values one window contains. - * \tparam urng1_t The type of the input range to process. Must model std::ranges::viewable_range. - * \tparam urng2_t The type of the input range to process. Must model std::ranges::viewable_range. + * \tparam urng1_t The type of the first input range to process. Must model std::ranges::viewable_range. + * \tparam urng2_t The type of the second input range to process. Must model std::ranges::viewable_range. * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] urange2 The input range to process. Must model std::ranges::viewable_range and + * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] K The k-mer size used. - * \param[in] S The s-mer size used. + * \param[in] kmer_size The k-mer size used. + * \param[in] smer_size The s-mer size used. * \returns A range of converted values. */ template - constexpr auto operator()(urng1_t && urange1, urng2_t && urange2, size_t const K, size_t const S) const + constexpr auto operator()(urng1_t && urange1, urng2_t && urange2, size_t const kmer_size, size_t const smer_size) const { static_assert(std::ranges::viewable_range, "The range parameter to views::syncmer cannot be a temporary of a non-view range."); static_assert(std::ranges::forward_range, "The range parameter to views::syncmer must model std::ranges::forward_range."); - if (K < 1 || S < 0) // Would just return urange1 without any changes + if (kmer_size < 1 || smer_size < 0 || kmer_size < smer_size) // throw std::invalid_argument{"The chosen K-mer or S-mer are not valid. " - "Please choose a value that satisfize the given condition."}; + "Please choose a K-mer size greater than 1 and an S-mer size greater than 0 and smaller than k-mer size."}; - return syncmer_view{urange1, urange2, K, S}; + return syncmer_view{urange1, urange2, kmer_size, smer_size}; } }; //![adaptor_def] @@ -503,15 +521,16 @@ struct syncmer_fn namespace seqan3::views { -/*!\brief Computes syncmers for a range of comparable values. A syncmer is a kmer that has the its smallest smer at its start or end. +/*!\brief Computes syncmers for a range of comparable values. A syncmer is a kmer that has its smallest smer + * (s < k) at its start or end. An open-syncmer has its smer at its start. * \tparam urng_t The type of the first range being processed. See below for requirements. [template * parameter is omitted in pipe notation] - * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and + * \param[in] urange1 The first input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] urange2 The input range to process. Must model std::ranges::viewable_range and + * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] K The k-mer size used. - * \param[in] S The s-mer size used. + * \param[in] kmer_size The k-mer size used. + * \param[in] smer_size The s-mer size used. * \returns A range of std::totally_ordered where each value is ... See below for the * properties of the returned range. * \ingroup search_views From 03af103b9c9f6fdb000935e022ddc6e66178984a Mon Sep 17 00:00:00 2001 From: Hossein Eizadi Moghadam Date: Mon, 24 Jan 2022 22:24:17 +0100 Subject: [PATCH 08/16] [FIX] opensyncmer.hpp removed. --- include/opensyncmer.hpp | 545 ---------------------------------------- 1 file changed, 545 deletions(-) delete mode 100644 include/opensyncmer.hpp diff --git a/include/opensyncmer.hpp b/include/opensyncmer.hpp deleted file mode 100644 index c02ab4b..0000000 --- a/include/opensyncmer.hpp +++ /dev/null @@ -1,545 +0,0 @@ -// ----------------------------------------------------------------------------------------------------- -// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin -// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik -// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License -// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md -// ----------------------------------------------------------------------------------------------------- - -/*!\file - * \author Hossein Eizadi Moghadam - * \brief Provides opensyncmer. - */ - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include - -namespace seqan3::detail -{ -// --------------------------------------------------------------------------------------------------------------------- -// opensyncmer_view class -// --------------------------------------------------------------------------------------------------------------------- - -/*!\brief The type returned by opensyncmer. - * \tparam urng1_t The type of the underlying range, must model std::ranges::forward_range, the reference type must - * model std::totally_ordered. The typical use case is that the reference type is the result of - * seqan3::kmer_hash. - * \implements std::ranges::view - * \ingroup search_views - * - * - * \note Most members of this class are generated by std::ranges::view_interface which is not yet documented here. - - */ -template -class opensyncmer_view : public std::ranges::view_interface> -{ -private: - static_assert(std::ranges::forward_range, "The opensyncmer_view only works on forward_ranges."); - static_assert(std::ranges::forward_range, "The opensyncmer_view only works on forward_ranges."); - static_assert(std::totally_ordered>, - "The reference type of the first underlying range must model std::totally_ordered."); - static_assert(std::totally_ordered>, - "The reference type of the second underlying range must model std::totally_ordered."); - - //!\brief Whether the given ranges are const_iterable. - static constexpr bool const_iterable = seqan3::const_iterable_range; - static constexpr bool const_iterable2 = seqan3::const_iterable_range; - - //!\brief The first underlying range. - urng1_t urange1{}; - //!\brief The second underlying range. - urng2_t urange2{}; - //!\brief The size of k-mer. - size_t kmer_size{}; - //!\brief The size of s-mer. - size_t smer_size{}; - - template - class basic_iterator; - - //!\brief The sentinel type of the opensyncmer_view. - using sentinel = std::default_sentinel_t; - -public: - /*!\name Constructors, destructor andt assignment - * \{ - */ - /// \cond Workaround_Doxygen - opensyncmer_view() requires std::default_initializable && std::default_initializable = default; //!< Defaulted. - /// \endcond - opensyncmer_view(opensyncmer_view const & rhs) = default; //!< Defaulted. - opensyncmer_view(opensyncmer_view && rhs) = default; //!< Defaulted. - opensyncmer_view & operator=(opensyncmer_view const & rhs) = default; //!< Defaulted. - opensyncmer_view & operator=(opensyncmer_view && rhs) = default; //!< Defaulted. - ~opensyncmer_view() = default; //!< Defaulted. - - /*!\brief Construct from a view and a given number of values in one window. - * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and - * std::ranges::forward_range. - * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and - * std::ranges::forward_range. - * \param[in] kmer_size The k-mer size used. - * \param[in] smer_size The s-mer size used. - */ - opensyncmer_view(urng1_t urange1, urng2_t urange2, size_t const kmer_size, size_t const smer_size) : - urange1{std::move(urange1)}, - urange2{std::move(urange2)}, - kmer_size{kmer_size}, - smer_size{smer_size} - {} - - /*!\brief Construct from a non-view that can be view-wrapped and a given number of values in one window. - * \tparam other_urng1_t The type of another urange. Must model std::ranges::viewable_range and be constructible from urng1_t. - * \tparam other_urng2_t The type of another urange. Must model std::ranges::viewable_range and be constructible from urng2_t. - * \param[in] kmer_size The k-mer size used. - * \param[in] smer_size The s-mer size used. - */ - template - //!\cond - requires (std::ranges::viewable_range && - std::constructible_from>> && - std::ranges::viewable_range && - std::constructible_from>>) - //!\endcond - opensyncmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const kmer_size, size_t const smer_size) : - urange1{std::views::all(std::forward(urange1))}, - urange2{std::views::all(std::forward(urange2))}, - kmer_size{kmer_size}, - smer_size{smer_size} - {} - - /*!\name Iterators - * \{ - */ - /*!\brief Returns an iterator to the first element of the range. - * \returns Iterator to the first element. - * - * \details - * - * ### Complexity - * - * Constant. - * - * ### Exceptions - * - * Strong exception guarantee. - */ - basic_iterator begin() - { - return {std::ranges::begin(urange1), - std::ranges::begin(urange2), - std::ranges::end(urange1), - kmer_size, - smer_size}; - } - - //!\copydoc begin() - basic_iterator begin() const - //!\cond - requires const_iterable && const_iterable2 - //!\endcond - { - return {std::ranges::cbegin(urange1), - std::ranges::cbegin(urange2), - std::ranges::cend(urange1), - kmer_size, - smer_size}; - } - - /*!\brief Returns an iterator to the element following the last element of the range. - * \returns Iterator to the end. - * - * \details - * - * This element acts as a placeholder; attempting to dereference it results in undefined behaviour. - * - * ### Complexity - * - * Constant. - * - * ### Exceptions - * - * No-throw guarantee. - */ - sentinel end() const - { - return {}; - } - //!\} -}; - -//!\brief Iterator for calculating opensyncmers. -template -template -class opensyncmer_view::basic_iterator -{ -private: - //!\brief The sentinel type of the first underlying range. - using urng1_sentinel_t = maybe_const_sentinel_t; - //!\brief The iterator type of the first underlying range. - using urng1_iterator_t = maybe_const_iterator_t; - //!\brief The iterator type of the second underlying range. - using urng2_iterator_t = maybe_const_iterator_t; - - template - friend class basic_iterator; - -public: - /*!\name Associated types - * \{ - */ - //!\brief Type for distances between iterators. - using difference_type = std::ranges::range_difference_t; - //!\brief Value type of this iterator. - using value_type = std::ranges::range_value_t; - //!\brief The pointer type. - using pointer = void; - //!\brief Reference to `value_type`. - using reference = value_type; - //!\brief Tag this class as a forward iterator. - using iterator_category = std::forward_iterator_tag; - //!\brief Tag this class as a forward iterator. - using iterator_concept = iterator_category; - //!\} - - /*!\name Constructors, destructor and assignment - * \{ - */ - basic_iterator() = default; //!< Defaulted. - basic_iterator(basic_iterator const &) = default; //!< Defaulted. - basic_iterator(basic_iterator &&) = default; //!< Defaulted. - basic_iterator & operator=(basic_iterator const &) = default; //!< Defaulted. - basic_iterator & operator=(basic_iterator &&) = default; //!< Defaulted. - ~basic_iterator() = default; //!< Defaulted. - - //!\brief Allow iterator on a const range to be constructible from an iterator over a non-const range. - basic_iterator(basic_iterator const & it) - //!\cond - requires const_range - //!\endcond - : opensyncmer_value{std::move(it.opensyncmer_value)}, - urng1_iterator{std::move(it.urng1_iterator)}, - urng2_iterator{std::move(it.urng2_iterator)}, - urng1_sentinel{std::move(it.urng1_sentinel)} - {} - - /*!\brief Construct from begin and end iterators of a given range over std::totally_ordered values, and the number - of values per window. - * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. - * \param[in] urng2_iterator Iterator pointing to the first position of the second std::totally_ordered range. - * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. - * \param[in] kmer_size The k-mer size used. - * \param[in] smer_size The s-mer size used. - * - * \details - * - * Looks at the number of values per window in two ranges, returns the smallest between both as opensyncmer and - * shifts then by one to repeat this action. If a opensyncmer in consecutive windows is the same, it is returned only - * once. - */ - basic_iterator(urng1_iterator_t urng1_iterator, - urng2_iterator_t urng2_iterator, - urng1_sentinel_t urng1_sentinel, - size_t kmer_size, - size_t smer_size) : - urng1_iterator{std::move(urng1_iterator)}, - urng2_iterator{std::move(urng2_iterator)}, - urng1_sentinel{std::move(urng1_sentinel)} - { - window_first(kmer_size, smer_size); - } - //!\} - - //!\anchor basic_iterator_comparison_opensyncmer - //!\name Comparison operators - //!\{ - - //!\brief Compare to another basic_iterator. - friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) - { - return (lhs.urng1_iterator == rhs.urng1_iterator); - } - - //!\brief Compare to another basic_iterator. - friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) - { - return !(lhs == rhs); - } - - //!\brief Compare to the sentinel of the opensyncmer_view. - friend bool operator==(basic_iterator const & lhs, sentinel const &) - { - return lhs.urng1_iterator == lhs.urng1_sentinel; - } - - //!\brief Compare to the sentinel of the opensyncmer_view. - friend bool operator==(sentinel const & lhs, basic_iterator const & rhs) - { - return rhs == lhs; - } - - //!\brief Compare to the sentinel of the opensyncmer_view. - friend bool operator!=(sentinel const & lhs, basic_iterator const & rhs) - { - return !(lhs == rhs); - } - - //!\brief Compare to the sentinel of the opensyncmer_view. - friend bool operator!=(basic_iterator const & lhs, sentinel const & rhs) - { - return !(lhs == rhs); - } - //!\} - - //!\brief Pre-increment. - basic_iterator & operator++() noexcept - { - next_unique_opensyncmer(); - return *this; - } - - //!\brief Post-increment. - basic_iterator operator++(int) noexcept - { - basic_iterator tmp{*this}; - next_unique_opensyncmer(); - return tmp; - } - - //!\brief Return the opensyncmer. - value_type operator*() const noexcept - { - return opensyncmer_value; - } - -private: - //!\brief The opensyncmer value. - value_type opensyncmer_value{}; - - //!\brief The offset relative to the beginning of the window where the opensyncmer value is found. - size_t opensyncmer_position_offset{}; - - //!\brief Iterator to the rightmost value of one kmer. - urng1_iterator_t urng1_iterator{}; - - //!\brief Iterator to the rightmost value of one kmer in the second range. - urng2_iterator_t urng2_iterator{}; - - //!brief Iterator to last element in range. - urng1_sentinel_t urng1_sentinel{}; - - //!\brief The number of values in one window. - size_t w_size{}; - - //!\brief Stored values per window. It is necessary to store them, because a shift can remove the current opensyncmer. - std::deque window_values{}; - - //!\brief Increments iterator by 1. - void next_unique_opensyncmer() - { - while (!next_opensyncmer()) {} - } - - //!\brief Returns new window value. - auto window_value() const - { - return *urng1_iterator; - } - - - //!\brief Advances the window to the next position. - void advance_window() - { - ++urng1_iterator; - ++urng2_iterator; - } - - //!\brief Advances the first window to the next position. - void advance_first_window() - { - ++urng1_iterator; - } - - - //!\brief Calculates opensyncmers for the first window. - void window_first(const size_t kmer_size, const size_t smer_size) - { - w_size = kmer_size - smer_size + 1; - - if (w_size == 0u) - return; - - for (int i = 1u; i < kmer_size - 1 ; ++i) - { - window_values.push_back(window_value()); - advance_first_window(); - } - window_values.push_back(window_value()); - - - auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); - opensyncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); - - if (opensyncmer_position_offset == 0) { - auto opensyncmer_it = urng2_iterator; - opensyncmer_value = *opensyncmer_it; - } - - } - - /*!\brief Calculates the next opensyncmer value. - * \returns True, if new opensyncmer is found or end is reached. Otherwise returns false. - * \details - * For the following windows, we remove the first window value (is now not in window_values) and add the new - * value that results from the window shifting. - */ - bool next_opensyncmer() - { - advance_window(); - - - if (urng1_iterator == urng1_sentinel) - return true; - - value_type const new_value = window_value(); - - window_values.pop_front(); - window_values.push_back(new_value); - - if (opensyncmer_position_offset == 0) - { - - auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); - - opensyncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); - - if (opensyncmer_position_offset == 0) { - - auto opensyncmer_it = urng2_iterator; - opensyncmer_value = *opensyncmer_it; - return true; - }; - } - - else if (new_value < *(window_values.begin()+(opensyncmer_position_offset-1))) - { - opensyncmer_position_offset = w_size - 1; - return false; - } - else if (opensyncmer_position_offset == 1){ - auto opensyncmer_it = urng2_iterator; - opensyncmer_value = *opensyncmer_it; - --opensyncmer_position_offset; - return true; - }; - - - - --opensyncmer_position_offset; - return false; - } -}; - - - -//!\brief A deduction guide for the view class template. -template -opensyncmer_view(rng1_t &&, rng2_t &&, size_t const kmer_size, size_t const smer_size) -> opensyncmer_view, std::views::all_t>; - - - -// --------------------------------------------------------------------------------------------------------------------- -// opensyncmer_fn (adaptor definition) -// --------------------------------------------------------------------------------------------------------------------- - -//![adaptor_def] -//!\brief opensyncmer's range adaptor object type (non-closure). -//!\ingroup search_views -struct opensyncmer_fn -{ - //!\brief Store the number of values in one window and return a range adaptor closure object. - template - constexpr auto operator()(urng2_t urange2, const size_t kmer_size, const size_t smer_size) const - { - return adaptor_from_functor{*this, urange2, kmer_size, smer_size}; - } - - /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many - * values one window contains. - * \tparam urng1_t The type of the first input range to process. Must model std::ranges::viewable_range. - * \tparam urng2_t The type of the second input range to process. Must model std::ranges::viewable_range. - * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and - * std::ranges::forward_range. - * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and - * std::ranges::forward_range. - * \param[in] kmer_size The k-mer size used. - * \param[in] smer_size The s-mer size used. - * \returns A range of converted values. - */ - template - constexpr auto operator()(urng1_t && urange1, urng2_t && urange2, size_t const kmer_size, size_t const smer_size) const - { - static_assert(std::ranges::viewable_range, - "The range parameter to views::opensyncmer cannot be a temporary of a non-view range."); - static_assert(std::ranges::forward_range, - "The range parameter to views::opensyncmer must model std::ranges::forward_range."); - - if (kmer_size < 1 || smer_size < 0) // Would just return urange1 without any changes - throw std::invalid_argument{"The chosen K-mer or S-mer are not valid. " - "Please choose a value that satisfize the given condition."}; - - return opensyncmer_view{urange1, urange2, kmer_size, smer_size}; - } -}; -//![adaptor_def] - -} // namespace seqan3::detail - -namespace seqan3::views -{ -/*!\brief Computes opensyncmers for a range of comparable values. A opensyncmer is a kmer that has the its smallest smer at its start. - * \tparam urng_t The type of the first range being processed. See below for requirements. [template - * parameter is omitted in pipe notation] - * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and - * std::ranges::forward_range. - * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and - * std::ranges::forward_range. - * \param[in] kmer_size The k-mer size used. - * \param[in] smer_size The s-mer size used. - * \returns A range of std::totally_ordered where each value is ... See below for the - * properties of the returned range. - * \ingroup search_views - * - * - * ### View properties - * - * | Concepts and traits | `urng_t` (underlying range type) | `rrng_t` (returned range type) | - * |----------------------------------|:----------------------------------:|:--------------------------------:| - * | std::ranges::input_range | *required* | *preserved* | - * | std::ranges::forward_range | *required* | *preserved* | - * | std::ranges::bidirectional_range | | *lost* | - * | std::ranges::random_access_range | | *lost* | - * | std::ranges::contiguous_range | | *lost* | - * | | | | - * | std::ranges::viewable_range | *required* | *guaranteed* | - * | std::ranges::view | | *guaranteed* | - * | std::ranges::sized_range | | *lost* | - * | std::ranges::common_range | | *lost* | - * | std::ranges::output_range | | *lost* | - * | seqan3::const_iterable_range | | *preserved* | - * | | | | - * | std::ranges::range_reference_t | std::totally_ordered | std::totally_ordered | - * - * See the views views submodule documentation for detailed descriptions of the view properties. - */ -inline constexpr auto opensyncmer = detail::opensyncmer_fn{}; - -} // namespace seqan3::views From dd43b6d6a6f4c7fafaad5e60438f6ce9f7c1d000 Mon Sep 17 00:00:00 2001 From: Hossein Eizadi Moghadam Date: Tue, 25 Jan 2022 00:17:35 +0100 Subject: [PATCH 09/16] [FIX] Problems resolved. --- include/minstrobe.hpp | 19 +++++-------------- include/syncmer.hpp | 5 +++++ 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/include/minstrobe.hpp b/include/minstrobe.hpp index f58f0ec..bde3c41 100644 --- a/include/minstrobe.hpp +++ b/include/minstrobe.hpp @@ -290,7 +290,7 @@ class minstrobe_view::basic_iterator //!\brief Pre-increment. basic_iterator & operator++() noexcept { - next_unique_minstrobe(); + next_minstrobe(); return *this; } @@ -298,7 +298,7 @@ class minstrobe_view::basic_iterator basic_iterator operator++(int) noexcept { basic_iterator tmp{*this}; - next_unique_minstrobe(); + next_minstrobe(); return tmp; } @@ -328,12 +328,6 @@ class minstrobe_view::basic_iterator //!\brief The number of values in one window. size_t window_size{}; - //!\brief Increments iterator by 1. - void next_unique_minstrobe() - { - while (!next_minstrobe()) {} - } - //!\brief Returns new window value of the first iterator. auto window_value() const { @@ -383,12 +377,10 @@ class minstrobe_view::basic_iterator * For the following windows, we remove the first window value (is now not in window_values) and add the new * value that results from the window shifting. */ - bool next_minstrobe() + void next_minstrobe() { advance_windows(); - if (urng1_iterator == urng1_sentinel) - return true; value_t const new_value = first_window_value(); value_t const sw_new_value = window_value(); @@ -403,18 +395,17 @@ class minstrobe_view::basic_iterator auto minstrobe_it = std::ranges::min_element(window_values, std::less_equal{}); std::get<1>(minstrobe_value) = *minstrobe_it; minstrobe_position_offset = std::distance(std::begin(window_values), minstrobe_it); - return true; + return; } if (sw_new_value < std::get<1>(minstrobe_value)) { std::get<1>(minstrobe_value) = sw_new_value; minstrobe_position_offset = window_values.size() - 1; - return true; + return; } --minstrobe_position_offset; - return true; } }; diff --git a/include/syncmer.hpp b/include/syncmer.hpp index ce2c4f4..5ad7d54 100644 --- a/include/syncmer.hpp +++ b/include/syncmer.hpp @@ -535,6 +535,11 @@ namespace seqan3::views * properties of the returned range. * \ingroup search_views * + * A Syncmer as defined by [Edgar R.](https://peerj.com/articles/10805.pdf) is a kmer that has its smallest smer + * (s < k) at its start or end. An open-syncmer has its smer at its start. For example for the following sequence + * `ACGGCGACGTT` and 5 as `kmer_size`, 2 as `smer_size`, the closed-syncmer values are `ACGGC,CGGCG,GCGAC,ACGTT` + * and the open-syncmer values are `ACGGC,CGGCG,ACGTT`. + * * * ### View properties * From 5054df3153446e05a08890aaefb3f5c428f05393 Mon Sep 17 00:00:00 2001 From: Hossein Eizadi Moghadam Date: Tue, 25 Jan 2022 19:09:57 +0100 Subject: [PATCH 10/16] [FIX] constexpr added to syncmer and minstrobe iterator renamed --- include/minstrobe.hpp | 53 +++++++++++++++--------------------- include/syncmer.hpp | 62 ++++++++++++++++++++++++++----------------- 2 files changed, 58 insertions(+), 57 deletions(-) diff --git a/include/minstrobe.hpp b/include/minstrobe.hpp index bde3c41..412290f 100644 --- a/include/minstrobe.hpp +++ b/include/minstrobe.hpp @@ -217,14 +217,14 @@ class minstrobe_view::basic_iterator requires const_range //!\endcond : minstrobe_value{std::move(it.minstrobe_value)}, - urng1_iterator{std::move(it.urng1_iterator)}, + second_iterator{std::move(it.second_iterator)}, urng1_sentinel{std::move(it.urng1_sentinel)} {} /*!\brief Construct from begin and end iterators of a given range over std::totally_ordered values, and the number of values per window. - * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. + * \param[in] second_iterator Iterator pointing to the first position of the first std::totally_ordered range. * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. * \param[in] window_min The lower offset for the position of the next window from the previous one. * \param[in] window_max The upper offset for the position of the next window from the previous one. @@ -235,11 +235,11 @@ class minstrobe_view::basic_iterator * the first strobe. The second iterator adds the minimum value of the window to the second position of the tuple. * */ - basic_iterator(urng1_iterator_t urng1_iterator, + basic_iterator(urng1_iterator_t second_iterator, urng1_sentinel_t urng1_sentinel, size_t window_min, size_t window_max) : - urng1_iterator{std::move(urng1_iterator)}, + second_iterator{std::move(second_iterator)}, urng1_sentinel{std::move(urng1_sentinel)} { window_first(window_min, window_max); @@ -253,7 +253,7 @@ class minstrobe_view::basic_iterator //!\brief Compare to another basic_iterator. friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) { - return (lhs.urng1_iterator == rhs.urng1_iterator); + return (lhs.second_iterator == rhs.second_iterator); } //!\brief Compare to another basic_iterator. @@ -265,7 +265,7 @@ class minstrobe_view::basic_iterator //!\brief Compare to the sentinel of the minstrobe_view. friend bool operator==(basic_iterator const & lhs, sentinel const &) { - return lhs.urng1_iterator == lhs.urng1_sentinel; + return lhs.second_iterator == lhs.urng1_sentinel; } //!\brief Compare to the sentinel of the minstrobe_view. @@ -315,10 +315,12 @@ class minstrobe_view::basic_iterator //!\brief The offset relative to the beginning of the window where the minstrobe value is found. size_t minstrobe_position_offset{}; - //!\brief Iterator to the right most value of the window. - urng1_iterator_t urng1_iterator{}; - //!\brief Iterator to the first value of minstrobe. + //!\brief Iterator to the first strobe of minstrobe. urng1_iterator_t first_iterator{}; + + //!\brief Iterator to the right most value of the window and hence the second strobe of minstrobe. + urng1_iterator_t second_iterator{}; + //!\brief Iterator to last element in range. urng1_sentinel_t urng1_sentinel{}; @@ -328,23 +330,11 @@ class minstrobe_view::basic_iterator //!\brief The number of values in one window. size_t window_size{}; - //!\brief Returns new window value of the first iterator. - auto window_value() const - { - return *urng1_iterator; - } - - //!\brief Returns new window value of the first iterator. - auto first_window_value() const - { - return *first_iterator; - } - //!\brief Advances the window of the first iterator to the next position. void advance_windows() { ++first_iterator; - ++urng1_iterator; + ++second_iterator; } //!\brief Calculates minstrobes for the first window. @@ -355,18 +345,18 @@ class minstrobe_view::basic_iterator if (window_size == 0u) return; - first_iterator = urng1_iterator; - std::advance(urng1_iterator, window_min); + first_iterator = second_iterator; + std::advance(second_iterator, window_min); for (int i = 1u; i < window_size; ++i) { - window_values.push_back(window_value()); - ++urng1_iterator; + window_values.push_back(*second_iterator); + ++second_iterator; } - window_values.push_back(window_value()); + window_values.push_back(*second_iterator); auto minstrobe_it = std::ranges::min_element(window_values, std::less_equal{}); - minstrobe_value = std::make_tuple(first_window_value(), *minstrobe_it); + minstrobe_value = std::make_tuple(*first_iterator, *minstrobe_it); minstrobe_position_offset = std::distance(std::begin(window_values), minstrobe_it); } @@ -381,9 +371,8 @@ class minstrobe_view::basic_iterator { advance_windows(); - - value_t const new_value = first_window_value(); - value_t const sw_new_value = window_value(); + value_t const new_value = *first_iterator; + value_t const sw_new_value = *second_iterator; std::get<0>(minstrobe_value) = new_value; @@ -475,7 +464,7 @@ namespace seqan3::views * * \details * - * A minstrobe defined by [Sahlin K.](https://genome.cshlp.org/content/31/11/2080.full.pdf) consists of + * A minstrobe defined by [Sahlin K.](https://genome.cshlp.org/content/31/11/2080.full.pdf) consists of * a starting strobe concatenated with n−1 consecutively concatenated minimizers in their respective windows. * For example for the following list of hash values `[6, 26, 41, 38, 24, 33, 6, 27, 47]` and 3 as `window_min`, * 5 as `window_max`, the minstrobe values are `[(6,24),(26,6),(41,6),(38,6)]`. diff --git a/include/syncmer.hpp b/include/syncmer.hpp index 5ad7d54..4608f89 100644 --- a/include/syncmer.hpp +++ b/include/syncmer.hpp @@ -390,18 +390,22 @@ class syncmer_view::basic_iterator syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); - if (opensyncmer && syncmer_position_offset == 0) + if constexpr (opensyncmer) { - auto syncmer_it = urng2_iterator; - syncmer_value = *syncmer_it; + if (syncmer_position_offset == 0) + { + auto syncmer_it = urng2_iterator; + syncmer_value = *syncmer_it; + }; } - - else if (!opensyncmer && syncmer_position_offset == 0 || syncmer_position_offset == w_size - 1 ) + else { - auto syncmer_it = urng2_iterator; - syncmer_value = *syncmer_it; + if (syncmer_position_offset == 0 || syncmer_position_offset == w_size - 1 ) + { + auto syncmer_it = urng2_iterator; + syncmer_value = *syncmer_it; + }; } - } /*!\brief Calculates the next syncmer value. @@ -428,30 +432,38 @@ class syncmer_view::basic_iterator auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); - if (opensyncmer && syncmer_position_offset == 0) + if constexpr (opensyncmer) { - auto syncmer_it = urng2_iterator; - syncmer_value = *syncmer_it; - return true; + if (syncmer_position_offset == 0) + { + auto syncmer_it = urng2_iterator; + syncmer_value = *syncmer_it; + return true; + }; } - - else if (!opensyncmer && syncmer_position_offset == 0 || syncmer_position_offset == w_size - 1 ) + else { - auto syncmer_it = urng2_iterator; - syncmer_value = *syncmer_it; - return true; - } + if (syncmer_position_offset == 0 || syncmer_position_offset == w_size - 1 ) + { + auto syncmer_it = urng2_iterator; + syncmer_value = *syncmer_it; + return true; + }; + }; } - else if (!opensyncmer && new_value < *(window_values.begin()+(syncmer_position_offset-1))) + else if (new_value < *(window_values.begin()+(syncmer_position_offset-1))) { syncmer_position_offset = w_size - 1; + if constexpr (!opensyncmer) + { auto syncmer_it = urng2_iterator; syncmer_value = *syncmer_it; return true; + }; } else if (syncmer_position_offset == 1) - { + { auto syncmer_it = urng2_iterator; syncmer_value = *syncmer_it; --syncmer_position_offset; @@ -469,8 +481,8 @@ class syncmer_view::basic_iterator template syncmer_view(rng1_t &&, rng2_t &&, size_t const kmer_size, size_t const smer_size) -> syncmer_view, std::views::all_t>; -template -syncmer_view(rng1_t &&, rng2_t &&, size_t const kmer_size, size_t const smer_size) -> syncmer_view, std::views::all_t, opensyncmer>; +template +syncmer_view(rng1_t &&, rng2_t &&, size_t const kmer_size, size_t const smer_size) -> syncmer_view, std::views::all_t, opn>; // --------------------------------------------------------------------------------------------------------------------- // syncmer_fn (adaptor definition) @@ -508,7 +520,7 @@ struct syncmer_fn static_assert(std::ranges::forward_range, "The range parameter to views::syncmer must model std::ranges::forward_range."); - if (kmer_size < 1 || smer_size < 0 || kmer_size < smer_size) // + if (kmer_size < 1 || smer_size < 0 || kmer_size < smer_size) throw std::invalid_argument{"The chosen K-mer or S-mer are not valid. " "Please choose a K-mer size greater than 1 and an S-mer size greater than 0 and smaller than k-mer size."}; @@ -521,7 +533,7 @@ struct syncmer_fn namespace seqan3::views { -/*!\brief Computes syncmers for a range of comparable values. A syncmer is a kmer that has its smallest smer +/*!\brief Computes syncmers for a range of comparable values. A syncmer is a kmer that has its smallest smer * (s < k) at its start or end. An open-syncmer has its smer at its start. * \tparam urng_t The type of the first range being processed. See below for requirements. [template * parameter is omitted in pipe notation] @@ -535,7 +547,7 @@ namespace seqan3::views * properties of the returned range. * \ingroup search_views * - * A Syncmer as defined by [Edgar R.](https://peerj.com/articles/10805.pdf) is a kmer that has its smallest smer + * A Syncmer as defined by [Edgar R.](https://peerj.com/articles/10805.pdf) is a kmer that has its smallest smer * (s < k) at its start or end. An open-syncmer has its smer at its start. For example for the following sequence * `ACGGCGACGTT` and 5 as `kmer_size`, 2 as `smer_size`, the closed-syncmer values are `ACGGC,CGGCG,GCGAC,ACGTT` * and the open-syncmer values are `ACGGC,CGGCG,ACGTT`. From 38fedc296e4ec666b447021a7d7bf1545dbc187f Mon Sep 17 00:00:00 2001 From: Hossein Eizadi Moghadam Date: Mon, 31 Jan 2022 18:56:39 +0100 Subject: [PATCH 11/16] [FIX] Syncmer documentation fixed. --- include/syncmer.hpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/syncmer.hpp b/include/syncmer.hpp index 4608f89..cefb73b 100644 --- a/include/syncmer.hpp +++ b/include/syncmer.hpp @@ -28,9 +28,13 @@ namespace seqan3::detail // --------------------------------------------------------------------------------------------------------------------- /*!\brief The type returned by syncmer. - * \tparam urng1_t The type of the underlying range, must model std::ranges::forward_range, the reference type must - * model std::totally_ordered. The typical use case is that the reference type is the result of + * \tparam urng1_t The type of the first underlying range, must model std::ranges::forward_range, the reference type + * must model std::totally_ordered. The typical use case is that the reference type is the result of * seqan3::kmer_hash. + * \tparam urng2_t The type of the second underlying range, must model std::ranges::forward_range, the reference + * type must model std::totally_ordered. The typical use case is that the reference type is the + * result of seqan3::kmer_hash. + * * \tparam opensyncmer If false, syncmers are used but if ture, open-syncmers are used. * \implements std::ranges::view * \ingroup search_views @@ -521,7 +525,7 @@ struct syncmer_fn "The range parameter to views::syncmer must model std::ranges::forward_range."); if (kmer_size < 1 || smer_size < 0 || kmer_size < smer_size) - throw std::invalid_argument{"The chosen K-mer or S-mer are not valid. " + throw std::invalid_argument{"The chosen K-mer or S-mer are not valid." "Please choose a K-mer size greater than 1 and an S-mer size greater than 0 and smaller than k-mer size."}; return syncmer_view{urange1, urange2, kmer_size, smer_size}; From 44752d11a7b2e517bbd14175dbc4dfcba0dfe17a Mon Sep 17 00:00:00 2001 From: hosseinem Date: Wed, 9 Feb 2022 16:35:10 +0100 Subject: [PATCH 12/16] [FEATURE,FIX] syncmer_test added and syncmer.hpp edited. --- include/syncmer.hpp | 95 ++++++++++++------------- test/api/CMakeLists.txt | 2 + test/api/syncmer_test.cpp | 145 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 193 insertions(+), 49 deletions(-) create mode 100644 test/api/syncmer_test.cpp diff --git a/include/syncmer.hpp b/include/syncmer.hpp index cefb73b..92a847f 100644 --- a/include/syncmer.hpp +++ b/include/syncmer.hpp @@ -28,13 +28,13 @@ namespace seqan3::detail // --------------------------------------------------------------------------------------------------------------------- /*!\brief The type returned by syncmer. - * \tparam urng1_t The type of the first underlying range, must model std::ranges::forward_range, the reference type + * \tparam urng1_t The type of the first underlying range, must model std::ranges::forward_range, the reference type * must model std::totally_ordered. The typical use case is that the reference type is the result of * seqan3::kmer_hash. - * \tparam urng2_t The type of the second underlying range, must model std::ranges::forward_range, the reference - * type must model std::totally_ordered. The typical use case is that the reference type is the + * \tparam urng2_t The type of the second underlying range, must model std::ranges::forward_range, the reference + * type must model std::totally_ordered. The typical use case is that the reference type is the * result of seqan3::kmer_hash. - * + * * \tparam opensyncmer If false, syncmers are used but if ture, open-syncmers are used. * \implements std::ranges::view * \ingroup search_views @@ -91,21 +91,27 @@ class syncmer_view : public std::ranges::view_interface //!\cond @@ -114,11 +120,11 @@ class syncmer_view : public std::ranges::view_interface && std::constructible_from>>) //!\endcond - syncmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const kmer_size, size_t const smer_size) : + syncmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const smer_size, size_t const kmer_size) : urange1{std::views::all(std::forward(urange1))}, urange2{std::views::all(std::forward(urange2))}, - kmer_size{kmer_size}, - smer_size{smer_size} + smer_size{smer_size}, + kmer_size{kmer_size} {} /*!\name Iterators @@ -142,8 +148,8 @@ class syncmer_view : public std::ranges::view_interface::basic_iterator * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. * \param[in] urng2_iterator Iterator pointing to the first position of the second std::totally_ordered range. * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. - * \param[in] kmer_size The k-mer size used. * \param[in] smer_size The s-mer size used. + * \param[in] kmer_size The k-mer size used. * * \details * - * Looks at the number of values per window in two ranges, returns the smallest between both as syncmer and - * shifts then by one to repeat this action. If a syncmer in consecutive windows is the same, it is returned only - * once. + * Looks at the number of values per window in two ranges, returns the smallest in smer and returns the corresponding + * kmer from the other range as syncmer and shifts then by one to repeat this action. */ basic_iterator(urng1_iterator_t urng1_iterator, urng2_iterator_t urng2_iterator, urng1_sentinel_t urng1_sentinel, - size_t kmer_size, - size_t smer_size) : + size_t smer_size, + size_t kmer_size) : urng1_iterator{std::move(urng1_iterator)}, urng2_iterator{std::move(urng2_iterator)}, urng1_sentinel{std::move(urng1_sentinel)} { - window_first(kmer_size, smer_size); + size_t size = std::ranges::distance(urng1_iterator, urng1_sentinel); + window_first(smer_size, kmer_size, size); } //!\} @@ -367,29 +373,21 @@ class syncmer_view::basic_iterator ++urng2_iterator; } - //!\brief Advances the first window to the next position. - void advance_first_window() - { - ++urng1_iterator; - } - - //!\brief Calculates syncmers for the first window. - void window_first(const size_t kmer_size, const size_t smer_size) + void window_first(const size_t smer_size, const size_t kmer_size, const size_t size) { w_size = kmer_size - smer_size + 1; - if (w_size == 0u) + if (w_size == 0u || w_size > size) return; for (int i = 1u; i < kmer_size - 1 ; ++i) { window_values.push_back(window_value()); - advance_first_window(); + ++urng1_iterator; } window_values.push_back(window_value()); - auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); @@ -421,11 +419,10 @@ class syncmer_view::basic_iterator bool next_syncmer() { advance_window(); - - + if (urng1_iterator == urng1_sentinel) return true; - + value_type const new_value = window_value(); window_values.pop_front(); @@ -483,10 +480,10 @@ class syncmer_view::basic_iterator //!\brief A deduction guide for the view class template. template -syncmer_view(rng1_t &&, rng2_t &&, size_t const kmer_size, size_t const smer_size) -> syncmer_view, std::views::all_t>; +syncmer_view(rng1_t &&, rng2_t &&, size_t const smer_size, size_t const kmer_size) -> syncmer_view, std::views::all_t>; template -syncmer_view(rng1_t &&, rng2_t &&, size_t const kmer_size, size_t const smer_size) -> syncmer_view, std::views::all_t, opn>; +syncmer_view(rng1_t &&, rng2_t &&, size_t const smer_size, size_t const kmer_size) -> syncmer_view, std::views::all_t, opn>; // --------------------------------------------------------------------------------------------------------------------- // syncmer_fn (adaptor definition) @@ -499,9 +496,9 @@ struct syncmer_fn { //!\brief Store the number of values in one window and return a range adaptor closure object. template - constexpr auto operator()(urng2_t urange2, const size_t kmer_size, const size_t smer_size) const + constexpr auto operator()(urng2_t urange2, const size_t smer_size, const size_t kmer_size) const { - return adaptor_from_functor{*this, urange2, kmer_size, smer_size}; + return adaptor_from_functor{*this, urange2, smer_size, kmer_size}; } /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many @@ -512,23 +509,23 @@ struct syncmer_fn * std::ranges::forward_range. * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] kmer_size The k-mer size used. * \param[in] smer_size The s-mer size used. + * \param[in] kmer_size The k-mer size used. * \returns A range of converted values. */ template - constexpr auto operator()(urng1_t && urange1, urng2_t && urange2, size_t const kmer_size, size_t const smer_size) const + constexpr auto operator()(urng1_t && urange1, urng2_t && urange2, size_t const smer_size, size_t const kmer_size) const { static_assert(std::ranges::viewable_range, "The range parameter to views::syncmer cannot be a temporary of a non-view range."); static_assert(std::ranges::forward_range, "The range parameter to views::syncmer must model std::ranges::forward_range."); - if (kmer_size < 1 || smer_size < 0 || kmer_size < smer_size) + if (smer_size < 1 || kmer_size <= smer_size) throw std::invalid_argument{"The chosen K-mer or S-mer are not valid." - "Please choose a K-mer size greater than 1 and an S-mer size greater than 0 and smaller than k-mer size."}; + "Please choose an S-mer size greater than 0 and a K-mer size greater than the S-mer size."}; - return syncmer_view{urange1, urange2, kmer_size, smer_size}; + return syncmer_view{urange1, urange2, smer_size, kmer_size}; } }; //![adaptor_def] @@ -545,8 +542,8 @@ namespace seqan3::views * std::ranges::forward_range. * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] kmer_size The k-mer size used. * \param[in] smer_size The s-mer size used. + * \param[in] kmer_size The k-mer size used. * \returns A range of std::totally_ordered where each value is ... See below for the * properties of the returned range. * \ingroup search_views diff --git a/test/api/CMakeLists.txt b/test/api/CMakeLists.txt index 73ac436..6d97390 100644 --- a/test/api/CMakeLists.txt +++ b/test/api/CMakeLists.txt @@ -8,3 +8,5 @@ add_api_test (minimiser_distance_test.cpp) add_api_test (modmer_test.cpp) add_api_test (modmer_hash_test.cpp) add_api_test (modmer_hash_distance_test.cpp) + +add_api_test (syncmer_test.cpp) diff --git a/test/api/syncmer_test.cpp b/test/api/syncmer_test.cpp new file mode 100644 index 0000000..630c449 --- /dev/null +++ b/test/api/syncmer_test.cpp @@ -0,0 +1,145 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "../../lib/seqan3/test/unit/range/iterator_test_template.hpp" + +#include "syncmer.hpp" + +using seqan3::operator""_dna4; +using seqan3::operator""_shape; +using result_t = std::vector; + +inline static constexpr auto smer_view = seqan3::views::kmer_hash(seqan3::ungapped{2}); +inline static constexpr auto kmer_view = seqan3::views::kmer_hash(seqan3::ungapped{5}); + +using iterator_type = std::ranges::iterator_t< decltype(seqan3::detail::syncmer_view(std::declval() + | smer_view, std::declval() + | kmer_view, 2, 5))>; + +template <> +struct iterator_fixture : public ::testing::Test +{ + using iterator_tag = std::forward_iterator_tag; + static constexpr bool const_iterable = true; + + seqan3::dna4_vector text{"ACGGCGACGTTTAG"_dna4}; + decltype(seqan3::views::kmer_hash(text, seqan3::ungapped{2})) vec = text | smer_view; + result_t expected_range{105,422,609,111,447,764,1010}; + + decltype(seqan3::views::syncmer(seqan3::views::kmer_hash(text, seqan3::ungapped{2}), text | kmer_view, 2, 5)) test_range = + seqan3::views::syncmer(vec, text | kmer_view, 2, 5); +}; + +using test_types = ::testing::Types; +INSTANTIATE_TYPED_TEST_SUITE_P(iterator_fixture, iterator_fixture, test_types, ); + +template +class syncmer_view_properties_test: public ::testing::Test { }; + +using underlying_range_types = ::testing::Types, + std::vector const, + seqan3::bitpacked_sequence, + seqan3::bitpacked_sequence const, + std::list, + std::list const, + std::forward_list, + std::forward_list const>; +TYPED_TEST_SUITE(syncmer_view_properties_test, underlying_range_types, ); + +class syncmer_test : public ::testing::Test +{ +protected: + std::vector text1{"AAAAAA"_dna4}; + result_t result1{0, 0}; + result_t result1_open{0, 0}; + + std::vector too_short_text{"AC"_dna4}; + + // ACGG CGGC, GGCG, GCGA, CGAC, GACG, ACGT, CGTT, GTTT, TTTA, TTAG + // CCGT GCCG CGCC TCGC GTCG CGTC ACGT AACG AAAC TAAA CTAA + // ACGG CGGC cgcc GCGA CGAC cgtc ACGT aacg aaac taaa ctaa + std::vector text3{"ACGGCGACGTTTAG"_dna4}; + result_t result3_ungapped{105,422,609,111,447,764,1010}; // ACGG, GGCG, GCGA, GACG, TTTA, TTAG + result_t result3_open{105,422,111,447,764}; + result_t result3_ungapped_stop{105,422,609}; // ACGG, GGCG, GCGA, GACG + result_t result3_open_stop{105,422}; + result_t result3_ungapped_start{111,447,764,1010}; // For start at second A, TTTA, TTAG + result_t result3_open_start{111,447,764}; +}; + +template +void compare_types(adaptor_t v) +{ + EXPECT_TRUE(std::ranges::input_range); + EXPECT_TRUE(std::ranges::forward_range); + EXPECT_FALSE(std::ranges::bidirectional_range); + EXPECT_FALSE(std::ranges::random_access_range); + EXPECT_TRUE(std::ranges::view); + EXPECT_FALSE(std::ranges::sized_range); + EXPECT_FALSE(std::ranges::common_range); + EXPECT_TRUE(seqan3::const_iterable_range); + EXPECT_FALSE((std::ranges::output_range)); +} + +TYPED_TEST(syncmer_view_properties_test, concepts) +{ + TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, + 'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG + + auto v = seqan3::detail::syncmer_view(text | smer_view, text | kmer_view, 2, 5); + compare_types(v); +} + +TYPED_TEST(syncmer_view_properties_test, different_inputs_kmer_hash) +{ + TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, + 'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG + result_t ungapped{109,438,865,111,447,764,1010}; // GTCG, TCGA, GACG, TTTA, TTAG + EXPECT_RANGE_EQ(ungapped, seqan3::detail::syncmer_view(text | smer_view, text | kmer_view, 2, 5)); +} + +TEST_F(syncmer_test, ungapped_kmer_hash) +{ + EXPECT_RANGE_EQ(result1, seqan3::detail::syncmer_view(text1 | smer_view, text1 | kmer_view, 2, 5)); + auto empty_view = seqan3::detail::syncmer_view(too_short_text | smer_view, too_short_text | kmer_view, 2, 5); + EXPECT_TRUE(std::ranges::empty(empty_view)); + EXPECT_RANGE_EQ(result3_ungapped, seqan3::detail::syncmer_view(text3 | smer_view, text3 | kmer_view, 2, 5)); + + auto v1 = text1 | smer_view; + auto v1_2 = text1 | kmer_view; + EXPECT_RANGE_EQ(result1_open, (seqan3::detail::syncmer_view(v1, v1_2, 2, 5))); + auto v2 = text3 | smer_view; + auto v2_2 = text3 | kmer_view; + EXPECT_RANGE_EQ(result3_open, (seqan3::detail::syncmer_view(v2, v2_2, 2, 5))); +} + +TEST_F(syncmer_test, combinability) +{ + auto stop_at_t = std::views::take_while([] (seqan3::dna4 const x) { return x != 'T'_dna4; }); + EXPECT_RANGE_EQ(result3_ungapped_stop, seqan3::detail::syncmer_view(text3 | stop_at_t | smer_view, text3 | stop_at_t | kmer_view, 2, 5)); + + auto v1 = text3 | stop_at_t | smer_view; + auto v2 = text3 | stop_at_t | kmer_view; + + EXPECT_RANGE_EQ(result3_open_stop, (seqan3::detail::syncmer_view(v1, v2, 2, 5))); + + auto start_at_a = std::views::drop(6); + EXPECT_RANGE_EQ(result3_ungapped_start, (seqan3::detail::syncmer_view{text3 | start_at_a | smer_view, text3 | start_at_a | kmer_view, 2, 5})); + + auto v3 = text3 | start_at_a | smer_view; + auto v3_2 = text3 | start_at_a | kmer_view; + + EXPECT_RANGE_EQ(result3_open_start, (seqan3::detail::syncmer_view(v3, v3_2, 2, 5))); +} From 730d04bd410c9f8ae2996702b80d4352b134d6e6 Mon Sep 17 00:00:00 2001 From: hosseinem Date: Wed, 9 Feb 2022 18:27:28 +0100 Subject: [PATCH 13/16] [FIX] syncmer_test files fixed and minstrobe removed. --- include/minstrobe.hpp | 495 -------------------------------------- include/syncmer.hpp | 12 +- test/api/syncmer_test.cpp | 24 +- 3 files changed, 16 insertions(+), 515 deletions(-) delete mode 100644 include/minstrobe.hpp diff --git a/include/minstrobe.hpp b/include/minstrobe.hpp deleted file mode 100644 index 412290f..0000000 --- a/include/minstrobe.hpp +++ /dev/null @@ -1,495 +0,0 @@ -// ----------------------------------------------------------------------------------------------------- -// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin -// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik -// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License -// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md -// ----------------------------------------------------------------------------------------------------- - -/*!\file - * \author Hossein Eizadi Moghadam - * \brief Provides minstrobe. - */ - -#pragma once - -#include -#include -#include - -#include -#include -#include -#include -#include - -namespace seqan3::detail -{ -// --------------------------------------------------------------------------------------------------------------------- -// minstrobe_view class -// --------------------------------------------------------------------------------------------------------------------- - -/*!\brief The type returned by minstrobe. - * \tparam urng1_t The type of the underlying range, must model std::ranges::forward_range, the reference type must - * model std::totally_ordered. The typical use case is that the reference type is the result of - * seqan3::kmer_hash. - * \implements std::ranges::view - * \ingroup search_views - * - * - * \note Most members of this class are generated by std::ranges::view_interface which is not yet documented here. - - */ -template -class minstrobe_view : public std::ranges::view_interface> -{ -private: - static_assert(std::ranges::forward_range, "The minstrobe_view only works on forward_ranges."); - static_assert(std::totally_ordered>, - "The reference type of the underlying range must model std::totally_ordered."); - - //!\brief Whether the given ranges are const_iterable. - static constexpr bool const_iterable = seqan3::const_iterable_range; - - //!\brief The first underlying range. - urng1_t urange1{}; - - //!\brief lower offset for the position of the next window. - size_t window_min{}; - - //!\brief upper offset for the position of the next window. - size_t window_max{}; - - template - class basic_iterator; - - //!\brief The sentinel type of the minstrobe_view. - using sentinel = std::default_sentinel_t; - -public: - /*!\name Constructors, destructor and assignment - * \{ - */ - /// \cond Workaround_Doxygen - minstrobe_view() requires std::default_initializable = default; //!< Defaulted. - /// \endcond - minstrobe_view(minstrobe_view const & rhs) = default; //!< Defaulted. - minstrobe_view(minstrobe_view && rhs) = default; //!< Defaulted. - minstrobe_view & operator=(minstrobe_view const & rhs) = default; //!< Defaulted. - minstrobe_view & operator=(minstrobe_view && rhs) = default; //!< Defaulted. - ~minstrobe_view() = default; //!< Defaulted. - - /*!\brief Construct from a view and a given number of values in one window. - * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and - * std::ranges::forward_range. - * \param[in] window_min The lower offset for the position of the next window from the previous one. - * \param[in] window_max The upper offset for the position of the next window from the previous one. - */ - minstrobe_view(urng1_t urange1, size_t const window_min, size_t const window_max) : - urange1{std::move(urange1)}, - window_min{window_min}, - window_max{window_max} - {} - - /*!\brief Construct from a non-view that can be view-wrapped and a given number of values in one window. - * \tparam other_urng1_t The type of another urange. Must model std::ranges::viewable_range and be constructible - from urng1_t. - * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and - * std::ranges::forward_range. - * \param[in] window_min The lower offset for the position of the next window from the previous one. - * \param[in] window_max The upper offset for the position of the next window from the previous one. - */ - template - //!\cond - requires (std::ranges::viewable_range && - std::constructible_from>>) - //!\endcond - minstrobe_view(other_urng1_t && urange1, size_t const window_min, size_t const window_max) : - urange1{std::views::all(std::forward(urange1))}, - window_min{window_min}, - window_max{window_max} - {} - - /*!\name Iterators - * \{ - */ - /*!\brief Returns an iterator to the first element of the range. - * \returns Iterator to the first element. - * - * \details - * - * ### Complexity - * - * Constant. - * - * ### Exceptions - * - * Strong exception guarantee. - */ - basic_iterator begin() - { - return {std::ranges::begin(urange1), - std::ranges::end(urange1), - window_min, - window_max}; - } - - //!\copydoc begin() - basic_iterator begin() const - //!\cond - requires const_iterable - //!\endcond - { - return {std::ranges::cbegin(urange1), - std::ranges::cend(urange1), - window_min, - window_max}; - } - - /*!\brief Returns an iterator to the element following the last element of the range. - * \returns Iterator to the end. - * - * \details - * - * This element acts as a placeholder; attempting to dereference it results in undefined behaviour. - * - * ### Complexity - * - * Constant. - * - * ### Exceptions - * - * No-throw guarantee. - */ - sentinel end() const - { - return {}; - } - //!\} -}; - -//!\brief Iterator for calculating minstrobes. -template -template -class minstrobe_view::basic_iterator -{ -private: - //!\brief The sentinel type of the first underlying range. - using urng1_sentinel_t = maybe_const_sentinel_t; - //!\brief The iterator type of the first underlying range. - using urng1_iterator_t = maybe_const_iterator_t; - - template - friend class basic_iterator; - -public: - /*!\name Associated types - * \{ - */ - //!\brief Type for distances between iterators. - using difference_type = std::ranges::range_difference_t; - //!\brief Value type of this iterator. - using value_t = std::ranges::range_value_t; - //!\brief Value type of the output. - using value_type = std::tuple; - //!\brief The pointer type. - using pointer = void; - //!\brief Reference to `value_type`. - using reference = value_type; - //!\brief Tag this class as a forward iterator. - using iterator_category = std::forward_iterator_tag; - //!\brief Tag this class as a forward iterator. - using iterator_concept = iterator_category; - //!\} - - /*!\name Constructors, destructor and assignment - * \{ - */ - basic_iterator() = default; //!< Defaulted. - basic_iterator(basic_iterator const &) = default; //!< Defaulted. - basic_iterator(basic_iterator &&) = default; //!< Defaulted. - basic_iterator & operator=(basic_iterator const &) = default; //!< Defaulted. - basic_iterator & operator=(basic_iterator &&) = default; //!< Defaulted. - ~basic_iterator() = default; //!< Defaulted. - - //!\brief Allow iterator on a const range to be constructible from an iterator over a non-const range. - basic_iterator(basic_iterator const & it) - //!\cond - requires const_range - //!\endcond - : minstrobe_value{std::move(it.minstrobe_value)}, - second_iterator{std::move(it.second_iterator)}, - urng1_sentinel{std::move(it.urng1_sentinel)} - - {} - - /*!\brief Construct from begin and end iterators of a given range over std::totally_ordered values, and the number - of values per window. - * \param[in] second_iterator Iterator pointing to the first position of the first std::totally_ordered range. - * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. - * \param[in] window_min The lower offset for the position of the next window from the previous one. - * \param[in] window_max The upper offset for the position of the next window from the previous one. - * - * \details - * - * Looks at the number of values per window with two iterators. First iterator adds the next value in the tuple as - * the first strobe. The second iterator adds the minimum value of the window to the second position of the tuple. - * - */ - basic_iterator(urng1_iterator_t second_iterator, - urng1_sentinel_t urng1_sentinel, - size_t window_min, - size_t window_max) : - second_iterator{std::move(second_iterator)}, - urng1_sentinel{std::move(urng1_sentinel)} - { - window_first(window_min, window_max); - } - //!\} - - //!\anchor basic_iterator_comparison_minstrobe - //!\name Comparison operators - //!\{ - - //!\brief Compare to another basic_iterator. - friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) - { - return (lhs.second_iterator == rhs.second_iterator); - } - - //!\brief Compare to another basic_iterator. - friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) - { - return !(lhs == rhs); - } - - //!\brief Compare to the sentinel of the minstrobe_view. - friend bool operator==(basic_iterator const & lhs, sentinel const &) - { - return lhs.second_iterator == lhs.urng1_sentinel; - } - - //!\brief Compare to the sentinel of the minstrobe_view. - friend bool operator==(sentinel const & lhs, basic_iterator const & rhs) - { - return rhs == lhs; - } - - //!\brief Compare to the sentinel of the minstrobe_view. - friend bool operator!=(sentinel const & lhs, basic_iterator const & rhs) - { - return !(lhs == rhs); - } - - //!\brief Compare to the sentinel of the minstrobe_view. - friend bool operator!=(basic_iterator const & lhs, sentinel const & rhs) - { - return !(lhs == rhs); - } - //!\} - - //!\brief Pre-increment. - basic_iterator & operator++() noexcept - { - next_minstrobe(); - return *this; - } - - //!\brief Post-increment. - basic_iterator operator++(int) noexcept - { - basic_iterator tmp{*this}; - next_minstrobe(); - return tmp; - } - - //!\brief Return the minstrobe. - value_type operator*() const noexcept - { - return minstrobe_value; - } - -private: - //!\brief The minstrobe value. - value_type minstrobe_value{}; - - //!\brief The offset relative to the beginning of the window where the minstrobe value is found. - size_t minstrobe_position_offset{}; - - //!\brief Iterator to the first strobe of minstrobe. - urng1_iterator_t first_iterator{}; - - //!\brief Iterator to the right most value of the window and hence the second strobe of minstrobe. - urng1_iterator_t second_iterator{}; - - //!\brief Iterator to last element in range. - urng1_sentinel_t urng1_sentinel{}; - - //!\brief Stored values per window. It is necessary to store them, because a shift can remove the current minstrobe. - std::deque window_values{}; - - //!\brief The number of values in one window. - size_t window_size{}; - - //!\brief Advances the window of the first iterator to the next position. - void advance_windows() - { - ++first_iterator; - ++second_iterator; - } - - //!\brief Calculates minstrobes for the first window. - void window_first(const size_t window_min, const size_t window_max) - { - window_size = (window_max - window_min + 1); - - if (window_size == 0u) - return; - - first_iterator = second_iterator; - std::advance(second_iterator, window_min); - - for (int i = 1u; i < window_size; ++i) - { - window_values.push_back(*second_iterator); - ++second_iterator; - } - window_values.push_back(*second_iterator); - - auto minstrobe_it = std::ranges::min_element(window_values, std::less_equal{}); - minstrobe_value = std::make_tuple(*first_iterator, *minstrobe_it); - minstrobe_position_offset = std::distance(std::begin(window_values), minstrobe_it); - - } - - /*!\brief Calculates the next minstrobe value. - * \returns True, if new minstrobe is found or end is reached. Otherwise returns false. - * \details - * For the following windows, we remove the first window value (is now not in window_values) and add the new - * value that results from the window shifting. - */ - void next_minstrobe() - { - advance_windows(); - - value_t const new_value = *first_iterator; - value_t const sw_new_value = *second_iterator; - - std::get<0>(minstrobe_value) = new_value; - - window_values.pop_front(); - window_values.push_back(sw_new_value); - - if (minstrobe_position_offset == 0) - { - auto minstrobe_it = std::ranges::min_element(window_values, std::less_equal{}); - std::get<1>(minstrobe_value) = *minstrobe_it; - minstrobe_position_offset = std::distance(std::begin(window_values), minstrobe_it); - return; - } - - if (sw_new_value < std::get<1>(minstrobe_value)) - { - std::get<1>(minstrobe_value) = sw_new_value; - minstrobe_position_offset = window_values.size() - 1; - return; - } - - --minstrobe_position_offset; - } -}; - - - -//!\brief A deduction guide for the view class template. -template -minstrobe_view(rng1_t &&, size_t const window_min, size_t const window_max) -> minstrobe_view>; - - - -// --------------------------------------------------------------------------------------------------------------------- -// minstrobe_fn (adaptor definition) -// --------------------------------------------------------------------------------------------------------------------- - -//![adaptor_def] -//!\brief minstrobe's range adaptor object type (non-closure). -//!\ingroup search_views -struct minstrobe_fn -{ - //!\brief Store the number of values in two windows and return a range adaptor closure object. - constexpr auto operator()(const size_t window_min, const size_t window_max) const - { - return adaptor_from_functor{*this, window_min, window_max}; - } - - /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many - * values one window contains. - * \tparam urng1_t The type of the input range to process. Must model std::ranges::viewable_range. - * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and - * std::ranges::forward_range. - * \param[in] window_min The lower offset for the position of the next window from the previous one. - * \param[in] window_max The upper offset for the position of the next window from the previous one. - * \returns A range of converted values in tuples. - */ - template - constexpr auto operator()(urng1_t && urange1, size_t const window_min, size_t const window_max) const - { - static_assert(std::ranges::viewable_range, - "The range parameter to views::minstrobe cannot be a temporary of a non-view range."); - static_assert(std::ranges::forward_range, - "The range parameter to views::minstrobe must model std::ranges::forward_range."); - - if (window_max <= window_min) // Would just return urange1 without any changes - throw std::invalid_argument{"The chosen min and max windows are not valid. " - "Window_max should be greater than window_min."}; - - return minstrobe_view{urange1, window_min, window_max}; - } -}; -//![adaptor_def] - -} // namespace seqan3::detail - -namespace seqan3::views -{ -/*!\brief Computes minstrobes for a range of comparable values. A minstrobe consists of a starting strobe - * concatenated with n−1 consecutively concatenated minimizers. - * \tparam urng_t The type of the first range being processed. See below for requirements. [template - * parameter is omitted in pipe notation] - * \param[in] urange1 The range being processed. [parameter is omitted in pipe notation] - * \param[in] window_min The lower offset for the position of the next window from the previous one. - * \param[in] window_max The upper offset for the position of the next window from the previous one. - * \returns A range of std::totally_ordered where each value is ... See below for the - * properties of the returned range. - * \ingroup search_views - * - * \details - * - * A minstrobe defined by [Sahlin K.](https://genome.cshlp.org/content/31/11/2080.full.pdf) consists of - * a starting strobe concatenated with n−1 consecutively concatenated minimizers in their respective windows. - * For example for the following list of hash values `[6, 26, 41, 38, 24, 33, 6, 27, 47]` and 3 as `window_min`, - * 5 as `window_max`, the minstrobe values are `[(6,24),(26,6),(41,6),(38,6)]`. - * - * ### View properties - * - * | Concepts and traits | `urng_t` (underlying range type) | `rrng_t` (returned range type) | - * |----------------------------------|:----------------------------------:|:--------------------------------:| - * | std::ranges::input_range | *required* | *preserved* | - * | std::ranges::forward_range | *required* | *preserved* | - * | std::ranges::bidirectional_range | | *lost* | - * | std::ranges::random_access_range | | *lost* | - * | std::ranges::contiguous_range | | *lost* | - * | | | | - * | std::ranges::viewable_range | *required* | *guaranteed* | - * | std::ranges::view | | *guaranteed* | - * | std::ranges::sized_range | | *lost* | - * | std::ranges::common_range | | *lost* | - * | std::ranges::output_range | | *lost* | - * | seqan3::const_iterable_range | | *preserved* | - * | | | | - * | std::ranges::range_reference_t | std::totally_ordered | std::totally_ordered | - * - * See the views views submodule documentation for detailed descriptions of the view properties. - */ -inline constexpr auto minstrobe = detail::minstrobe_fn{}; - -} // namespace seqan3::views diff --git a/include/syncmer.hpp b/include/syncmer.hpp index 92a847f..33ef7a8 100644 --- a/include/syncmer.hpp +++ b/include/syncmer.hpp @@ -265,7 +265,11 @@ class syncmer_view::basic_iterator urng1_sentinel{std::move(urng1_sentinel)} { size_t size = std::ranges::distance(urng1_iterator, urng1_sentinel); - window_first(smer_size, kmer_size, size); + w_size = kmer_size - smer_size + 1; + if (w_size > size) + throw std::invalid_argument{"The given sequence is too short to satisfy the given parameters.\n" + "Please choose smaller parameters."}; + window_first(kmer_size, w_size); } //!\} @@ -374,11 +378,9 @@ class syncmer_view::basic_iterator } //!\brief Calculates syncmers for the first window. - void window_first(const size_t smer_size, const size_t kmer_size, const size_t size) + void window_first(const size_t kmer_size, const size_t w_size) { - w_size = kmer_size - smer_size + 1; - - if (w_size == 0u || w_size > size) + if (w_size == 0u) return; for (int i = 1u; i < kmer_size - 1 ; ++i) diff --git a/test/api/syncmer_test.cpp b/test/api/syncmer_test.cpp index 630c449..4ca4f5f 100644 --- a/test/api/syncmer_test.cpp +++ b/test/api/syncmer_test.cpp @@ -65,18 +65,14 @@ class syncmer_test : public ::testing::Test result_t result1{0, 0}; result_t result1_open{0, 0}; - std::vector too_short_text{"AC"_dna4}; - - // ACGG CGGC, GGCG, GCGA, CGAC, GACG, ACGT, CGTT, GTTT, TTTA, TTAG - // CCGT GCCG CGCC TCGC GTCG CGTC ACGT AACG AAAC TAAA CTAA - // ACGG CGGC cgcc GCGA CGAC cgtc ACGT aacg aaac taaa ctaa - std::vector text3{"ACGGCGACGTTTAG"_dna4}; - result_t result3_ungapped{105,422,609,111,447,764,1010}; // ACGG, GGCG, GCGA, GACG, TTTA, TTAG - result_t result3_open{105,422,111,447,764}; - result_t result3_ungapped_stop{105,422,609}; // ACGG, GGCG, GCGA, GACG - result_t result3_open_stop{105,422}; - result_t result3_ungapped_start{111,447,764,1010}; // For start at second A, TTTA, TTAG - result_t result3_open_start{111,447,764}; + std::vector text3{"ACGGCGACGTTTAG"_dna4}; // Kmers: ACGGC CGGCG GGCGA GCGAC CGACG GACGT ACGTT CGTTT GTTTA TTTAG + // Hashed: 105, 422, 664, 609, 390, 539, 111, 447, 764, 1010 + result_t result3_ungapped{105,422,609,111,447,764,1010}; // Syncmers: ACGGC CGGCG GCGAC ACGTT CGTTT GTTTA TTTAG + result_t result3_open{105,422,111,447,764}; // Openyncmers: ACGGC CGGCG ACGTT CGTTT GTTTA + result_t result3_ungapped_stop{105,422,609}; // Syncmer stop: ACGGC CGGCG GCGAC + result_t result3_open_stop{105,422}; // Opensyncmer stop: ACGGC CGGCG + result_t result3_ungapped_start{111,447,764,1010}; // Syncmer start: ACGTT CGTTT GTTTA TTTAG + result_t result3_open_start{111,447,764}; // Opensyncmer start: ACGTT CGTTT GTTTA }; template @@ -106,15 +102,13 @@ TYPED_TEST(syncmer_view_properties_test, different_inputs_kmer_hash) { TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG - result_t ungapped{109,438,865,111,447,764,1010}; // GTCG, TCGA, GACG, TTTA, TTAG + result_t ungapped{109,438,865,111,447,764,1010}; EXPECT_RANGE_EQ(ungapped, seqan3::detail::syncmer_view(text | smer_view, text | kmer_view, 2, 5)); } TEST_F(syncmer_test, ungapped_kmer_hash) { EXPECT_RANGE_EQ(result1, seqan3::detail::syncmer_view(text1 | smer_view, text1 | kmer_view, 2, 5)); - auto empty_view = seqan3::detail::syncmer_view(too_short_text | smer_view, too_short_text | kmer_view, 2, 5); - EXPECT_TRUE(std::ranges::empty(empty_view)); EXPECT_RANGE_EQ(result3_ungapped, seqan3::detail::syncmer_view(text3 | smer_view, text3 | kmer_view, 2, 5)); auto v1 = text1 | smer_view; From b7c791d457bb832b0a1ae2e9c8bcda4db562686d Mon Sep 17 00:00:00 2001 From: hosseinem Date: Thu, 10 Feb 2022 16:19:07 +0100 Subject: [PATCH 14/16] [FIX] Smer and Kmer removed. --- include/syncmer.hpp | 117 +++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 67 deletions(-) diff --git a/include/syncmer.hpp b/include/syncmer.hpp index 33ef7a8..0209b68 100644 --- a/include/syncmer.hpp +++ b/include/syncmer.hpp @@ -35,7 +35,7 @@ namespace seqan3::detail * type must model std::totally_ordered. The typical use case is that the reference type is the * result of seqan3::kmer_hash. * - * \tparam opensyncmer If false, syncmers are used but if ture, open-syncmers are used. + * \tparam opensyncmer If false, syncmers are used but if ture, open-syncmers are used. Default: False. * \implements std::ranges::view * \ingroup search_views * @@ -62,10 +62,8 @@ class syncmer_view : public std::ranges::view_interface class basic_iterator; @@ -91,14 +89,12 @@ class syncmer_view : public std::ranges::view_interface //!\cond @@ -120,11 +115,10 @@ class syncmer_view : public std::ranges::view_interface && std::constructible_from>>) //!\endcond - syncmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const smer_size, size_t const kmer_size) : + syncmer_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const window_size) : urange1{std::views::all(std::forward(urange1))}, urange2{std::views::all(std::forward(urange2))}, - smer_size{smer_size}, - kmer_size{kmer_size} + window_size{window_size} {} /*!\name Iterators @@ -148,8 +142,7 @@ class syncmer_view : public std::ranges::view_interface::basic_iterator * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. * \param[in] urng2_iterator Iterator pointing to the first position of the second std::totally_ordered range. * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. - * \param[in] smer_size The s-mer size used. - * \param[in] kmer_size The k-mer size used. + * \param[in] window_size The window size used (should be window size - subwindow size + 1). * * \details * - * Looks at the number of values per window in two ranges, returns the smallest in smer and returns the corresponding - * kmer from the other range as syncmer and shifts then by one to repeat this action. + * Looks at the number of values per window in two ranges, if the smallest subwindow in a window is at its beginning + * or its end, it returns the window as a syncmer and shifts then by one to repeat this action. */ basic_iterator(urng1_iterator_t urng1_iterator, urng2_iterator_t urng2_iterator, urng1_sentinel_t urng1_sentinel, - size_t smer_size, - size_t kmer_size) : + size_t window_size) : urng1_iterator{std::move(urng1_iterator)}, urng2_iterator{std::move(urng2_iterator)}, urng1_sentinel{std::move(urng1_sentinel)} { size_t size = std::ranges::distance(urng1_iterator, urng1_sentinel); - w_size = kmer_size - smer_size + 1; - if (w_size > size) - throw std::invalid_argument{"The given sequence is too short to satisfy the given parameters.\n" - "Please choose smaller parameters."}; - window_first(kmer_size, w_size); + + if (window_size > size) + throw std::invalid_argument{"The given sequence is too short to satisfy the given window_size.\n" + "Please choose a smaller window_size."}; + window_first(window_size); } //!\} @@ -280,7 +270,9 @@ class syncmer_view::basic_iterator //!\brief Compare to another basic_iterator. friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) { - return (lhs.urng1_iterator == rhs.urng1_iterator); + return (lhs.urng1_iterator == rhs.urng1_iterator) && + (rhs.urng2_iterator == rhs.urng2_iterator) && + (lhs.window_size() == rhs.window_size()); } //!\brief Compare to another basic_iterator. @@ -351,25 +343,18 @@ class syncmer_view::basic_iterator //!brief Iterator to last element in range. urng1_sentinel_t urng1_sentinel{}; - //!\brief The number of values in one window. - size_t w_size{}; - //!\brief Stored values per window. It is necessary to store them, because a shift can remove the current syncmer. std::deque window_values{}; + //!brief The number of elements in one window. + size_t w_size{}; + //!\brief Increments iterator by 1. void next_unique_syncmer() { while (!next_syncmer()) {} } - //!\brief Returns new window value. - auto window_value() const - { - return *urng1_iterator; - } - - //!\brief Advances both windows to the next position. void advance_window() { @@ -378,22 +363,22 @@ class syncmer_view::basic_iterator } //!\brief Calculates syncmers for the first window. - void window_first(const size_t kmer_size, const size_t w_size) + void window_first(const size_t window_size) { - if (w_size == 0u) + w_size = window_size; + if (window_size == 0u) return; - for (int i = 1u; i < kmer_size - 1 ; ++i) + for (int i = 0u; i < w_size - 1 ; ++i) { - window_values.push_back(window_value()); + window_values.push_back(*urng1_iterator); ++urng1_iterator; } - window_values.push_back(window_value()); + window_values.push_back(*urng1_iterator); auto smallest_s_it = std::ranges::min_element(window_values, std::less{}); syncmer_position_offset = std::distance(std::begin(window_values), smallest_s_it); - if constexpr (opensyncmer) { if (syncmer_position_offset == 0) @@ -421,11 +406,11 @@ class syncmer_view::basic_iterator bool next_syncmer() { advance_window(); - + if (urng1_iterator == urng1_sentinel) return true; - - value_type const new_value = window_value(); + + value_type const new_value = *urng1_iterator; window_values.pop_front(); window_values.push_back(new_value); @@ -460,9 +445,9 @@ class syncmer_view::basic_iterator syncmer_position_offset = w_size - 1; if constexpr (!opensyncmer) { - auto syncmer_it = urng2_iterator; - syncmer_value = *syncmer_it; - return true; + auto syncmer_it = urng2_iterator; + syncmer_value = *syncmer_it; + return true; }; } else if (syncmer_position_offset == 1) @@ -482,10 +467,10 @@ class syncmer_view::basic_iterator //!\brief A deduction guide for the view class template. template -syncmer_view(rng1_t &&, rng2_t &&, size_t const smer_size, size_t const kmer_size) -> syncmer_view, std::views::all_t>; +syncmer_view(rng1_t &&, rng2_t &&, size_t const window_size) -> syncmer_view, std::views::all_t>; template -syncmer_view(rng1_t &&, rng2_t &&, size_t const smer_size, size_t const kmer_size) -> syncmer_view, std::views::all_t, opn>; +syncmer_view(rng1_t &&, rng2_t &&, size_t const window_size) -> syncmer_view, std::views::all_t, opn>; // --------------------------------------------------------------------------------------------------------------------- // syncmer_fn (adaptor definition) @@ -498,9 +483,9 @@ struct syncmer_fn { //!\brief Store the number of values in one window and return a range adaptor closure object. template - constexpr auto operator()(urng2_t urange2, const size_t smer_size, const size_t kmer_size) const + constexpr auto operator()(urng2_t urange2, const size_t window_size) const { - return adaptor_from_functor{*this, urange2, smer_size, kmer_size}; + return adaptor_from_functor{*this, urange2, window_size}; } /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many @@ -511,23 +496,22 @@ struct syncmer_fn * std::ranges::forward_range. * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] smer_size The s-mer size used. - * \param[in] kmer_size The k-mer size used. + * \param[in] window_size The window size used (should be window size - subwindow size + 1). * \returns A range of converted values. */ template - constexpr auto operator()(urng1_t && urange1, urng2_t && urange2, size_t const smer_size, size_t const kmer_size) const + constexpr auto operator()(urng1_t && urange1, urng2_t && urange2, size_t const window_size) const { static_assert(std::ranges::viewable_range, "The range parameter to views::syncmer cannot be a temporary of a non-view range."); static_assert(std::ranges::forward_range, "The range parameter to views::syncmer must model std::ranges::forward_range."); - if (smer_size < 1 || kmer_size <= smer_size) + if (window_size < 1) throw std::invalid_argument{"The chosen K-mer or S-mer are not valid." "Please choose an S-mer size greater than 0 and a K-mer size greater than the S-mer size."}; - return syncmer_view{urange1, urange2, smer_size, kmer_size}; + return syncmer_view{urange1, urange2, window_size}; } }; //![adaptor_def] @@ -536,23 +520,22 @@ struct syncmer_fn namespace seqan3::views { -/*!\brief Computes syncmers for a range of comparable values. A syncmer is a kmer that has its smallest smer - * (s < k) at its start or end. An open-syncmer has its smer at its start. +/*!\brief Computes syncmers for a range of comparable values. A syncmer is a window with size k that has + * its smallest subwindow of size s, (s < k) at its start or end. An open-syncmer has its smallest subwindow at its start. * \tparam urng_t The type of the first range being processed. See below for requirements. [template * parameter is omitted in pipe notation] * \param[in] urange1 The first input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] smer_size The s-mer size used. - * \param[in] kmer_size The k-mer size used. + * \param[in] window_size The window size used (should be window size - subwindow size + 1). * \returns A range of std::totally_ordered where each value is ... See below for the * properties of the returned range. * \ingroup search_views * * A Syncmer as defined by [Edgar R.](https://peerj.com/articles/10805.pdf) is a kmer that has its smallest smer * (s < k) at its start or end. An open-syncmer has its smer at its start. For example for the following sequence - * `ACGGCGACGTT` and 5 as `kmer_size`, 2 as `smer_size`, the closed-syncmer values are `ACGGC,CGGCG,GCGAC,ACGTT` + * `ACGGCGACGTT` and 5 as `window size`, 2 as `subwindow size`, the closed-syncmer values are `ACGGC,CGGCG,GCGAC,ACGTT` * and the open-syncmer values are `ACGGC,CGGCG,ACGTT`. * * From 47a419d78c7c4b6550f3fb86be40d0998c148cf8 Mon Sep 17 00:00:00 2001 From: hosseinem Date: Thu, 10 Feb 2022 17:12:21 +0100 Subject: [PATCH 15/16] [FIX] Syncmer_test fixed according to syncmer.hpp --- include/syncmer.hpp | 25 +++++++++++++------------ test/api/syncmer_test.cpp | 26 +++++++++++++------------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/include/syncmer.hpp b/include/syncmer.hpp index 0209b68..44e55da 100644 --- a/include/syncmer.hpp +++ b/include/syncmer.hpp @@ -62,7 +62,7 @@ class syncmer_view : public std::ranges::view_interface @@ -89,7 +89,7 @@ class syncmer_view : public std::ranges::view_interface //!\cond @@ -231,7 +231,8 @@ class syncmer_view::basic_iterator : syncmer_value{std::move(it.syncmer_value)}, urng1_iterator{std::move(it.urng1_iterator)}, urng2_iterator{std::move(it.urng2_iterator)}, - urng1_sentinel{std::move(it.urng1_sentinel)} + urng1_sentinel{std::move(it.urng1_sentinel)}, + w_size{std::move(it.w_size)} {} /*!\brief Construct from begin and end iterators of a given range over std::totally_ordered values, and the number @@ -239,12 +240,12 @@ class syncmer_view::basic_iterator * \param[in] urng1_iterator Iterator pointing to the first position of the first std::totally_ordered range. * \param[in] urng2_iterator Iterator pointing to the first position of the second std::totally_ordered range. * \param[in] urng1_sentinel Iterator pointing to the last position of the first std::totally_ordered range. - * \param[in] window_size The window size used (should be window size - subwindow size + 1). + * \param[in] window_size The number of elements in one window (should be window size - subwindow size + 1). * * \details * - * Looks at the number of values per window in two ranges, if the smallest subwindow in a window is at its beginning - * or its end, it returns the window as a syncmer and shifts then by one to repeat this action. + * Looks at the number of values per window in two ranges, if the smallest subwindow in a window is at its start + * or end, it returns the window as a syncmer and shifts then by one to repeat this action. */ basic_iterator(urng1_iterator_t urng1_iterator, urng2_iterator_t urng2_iterator, @@ -272,7 +273,7 @@ class syncmer_view::basic_iterator { return (lhs.urng1_iterator == rhs.urng1_iterator) && (rhs.urng2_iterator == rhs.urng2_iterator) && - (lhs.window_size() == rhs.window_size()); + (lhs.w_size == rhs.w_size); } //!\brief Compare to another basic_iterator. @@ -496,7 +497,7 @@ struct syncmer_fn * std::ranges::forward_range. * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] window_size The window size used (should be window size - subwindow size + 1). + * \param[in] window_size The number of elements in one window (should be window size - subwindow size + 1). * \returns A range of converted values. */ template @@ -508,8 +509,8 @@ struct syncmer_fn "The range parameter to views::syncmer must model std::ranges::forward_range."); if (window_size < 1) - throw std::invalid_argument{"The chosen K-mer or S-mer are not valid." - "Please choose an S-mer size greater than 0 and a K-mer size greater than the S-mer size."}; + throw std::invalid_argument{"The chosen window_size is not valid." + "Please choose a subwindow size greater than 0 and a window size greater than the subwindow size."}; return syncmer_view{urange1, urange2, window_size}; } @@ -528,7 +529,7 @@ namespace seqan3::views * std::ranges::forward_range. * \param[in] urange2 The second input range to process. Must model std::ranges::viewable_range and * std::ranges::forward_range. - * \param[in] window_size The window size used (should be window size - subwindow size + 1). + * \param[in] window_size The number of elements in one window (should be window size - subwindow size + 1). * \returns A range of std::totally_ordered where each value is ... See below for the * properties of the returned range. * \ingroup search_views diff --git a/test/api/syncmer_test.cpp b/test/api/syncmer_test.cpp index 4ca4f5f..686205d 100644 --- a/test/api/syncmer_test.cpp +++ b/test/api/syncmer_test.cpp @@ -26,7 +26,7 @@ inline static constexpr auto kmer_view = seqan3::views::kmer_hash(seqan3::ungapp using iterator_type = std::ranges::iterator_t< decltype(seqan3::detail::syncmer_view(std::declval() | smer_view, std::declval() - | kmer_view, 2, 5))>; + | kmer_view, 4))>; template <> struct iterator_fixture : public ::testing::Test @@ -38,8 +38,8 @@ struct iterator_fixture : public ::testing::Test decltype(seqan3::views::kmer_hash(text, seqan3::ungapped{2})) vec = text | smer_view; result_t expected_range{105,422,609,111,447,764,1010}; - decltype(seqan3::views::syncmer(seqan3::views::kmer_hash(text, seqan3::ungapped{2}), text | kmer_view, 2, 5)) test_range = - seqan3::views::syncmer(vec, text | kmer_view, 2, 5); + decltype(seqan3::views::syncmer(seqan3::views::kmer_hash(text, seqan3::ungapped{2}), text | kmer_view, 4)) test_range = + seqan3::views::syncmer(vec, text | kmer_view, 4); }; using test_types = ::testing::Types; @@ -94,7 +94,7 @@ TYPED_TEST(syncmer_view_properties_test, concepts) TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG - auto v = seqan3::detail::syncmer_view(text | smer_view, text | kmer_view, 2, 5); + auto v = seqan3::detail::syncmer_view(text | smer_view, text | kmer_view, 4); compare_types(v); } @@ -103,37 +103,37 @@ TYPED_TEST(syncmer_view_properties_test, different_inputs_kmer_hash) TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG result_t ungapped{109,438,865,111,447,764,1010}; - EXPECT_RANGE_EQ(ungapped, seqan3::detail::syncmer_view(text | smer_view, text | kmer_view, 2, 5)); + EXPECT_RANGE_EQ(ungapped, seqan3::detail::syncmer_view(text | smer_view, text | kmer_view, 4)); } TEST_F(syncmer_test, ungapped_kmer_hash) { - EXPECT_RANGE_EQ(result1, seqan3::detail::syncmer_view(text1 | smer_view, text1 | kmer_view, 2, 5)); - EXPECT_RANGE_EQ(result3_ungapped, seqan3::detail::syncmer_view(text3 | smer_view, text3 | kmer_view, 2, 5)); + EXPECT_RANGE_EQ(result1, seqan3::detail::syncmer_view(text1 | smer_view, text1 | kmer_view, 4)); + EXPECT_RANGE_EQ(result3_ungapped, seqan3::detail::syncmer_view(text3 | smer_view, text3 | kmer_view, 4)); auto v1 = text1 | smer_view; auto v1_2 = text1 | kmer_view; - EXPECT_RANGE_EQ(result1_open, (seqan3::detail::syncmer_view(v1, v1_2, 2, 5))); + EXPECT_RANGE_EQ(result1_open, (seqan3::detail::syncmer_view(v1, v1_2, 4))); auto v2 = text3 | smer_view; auto v2_2 = text3 | kmer_view; - EXPECT_RANGE_EQ(result3_open, (seqan3::detail::syncmer_view(v2, v2_2, 2, 5))); + EXPECT_RANGE_EQ(result3_open, (seqan3::detail::syncmer_view(v2, v2_2, 4))); } TEST_F(syncmer_test, combinability) { auto stop_at_t = std::views::take_while([] (seqan3::dna4 const x) { return x != 'T'_dna4; }); - EXPECT_RANGE_EQ(result3_ungapped_stop, seqan3::detail::syncmer_view(text3 | stop_at_t | smer_view, text3 | stop_at_t | kmer_view, 2, 5)); + EXPECT_RANGE_EQ(result3_ungapped_stop, seqan3::detail::syncmer_view(text3 | stop_at_t | smer_view, text3 | stop_at_t | kmer_view, 4)); auto v1 = text3 | stop_at_t | smer_view; auto v2 = text3 | stop_at_t | kmer_view; - EXPECT_RANGE_EQ(result3_open_stop, (seqan3::detail::syncmer_view(v1, v2, 2, 5))); + EXPECT_RANGE_EQ(result3_open_stop, (seqan3::detail::syncmer_view(v1, v2, 4))); auto start_at_a = std::views::drop(6); - EXPECT_RANGE_EQ(result3_ungapped_start, (seqan3::detail::syncmer_view{text3 | start_at_a | smer_view, text3 | start_at_a | kmer_view, 2, 5})); + EXPECT_RANGE_EQ(result3_ungapped_start, (seqan3::detail::syncmer_view{text3 | start_at_a | smer_view, text3 | start_at_a | kmer_view, 4})); auto v3 = text3 | start_at_a | smer_view; auto v3_2 = text3 | start_at_a | kmer_view; - EXPECT_RANGE_EQ(result3_open_start, (seqan3::detail::syncmer_view(v3, v3_2, 2, 5))); + EXPECT_RANGE_EQ(result3_open_start, (seqan3::detail::syncmer_view(v3, v3_2, 4))); } From ad5ddce43358718a4f670523ac6880be5eaca167 Mon Sep 17 00:00:00 2001 From: Hossein Eizadi Moghadam Date: Fri, 11 Feb 2022 18:29:44 +0100 Subject: [PATCH 16/16] [FIX] Some errors fixed. --- include/syncmer.hpp | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/include/syncmer.hpp b/include/syncmer.hpp index 44e55da..90dcca9 100644 --- a/include/syncmer.hpp +++ b/include/syncmer.hpp @@ -39,7 +39,6 @@ namespace seqan3::detail * \implements std::ranges::view * \ingroup search_views * - * * \note Most members of this class are generated by std::ranges::view_interface which is not yet documented here. */ @@ -386,7 +385,7 @@ class syncmer_view::basic_iterator { auto syncmer_it = urng2_iterator; syncmer_value = *syncmer_it; - }; + } } else { @@ -394,7 +393,7 @@ class syncmer_view::basic_iterator { auto syncmer_it = urng2_iterator; syncmer_value = *syncmer_it; - }; + } } } @@ -428,7 +427,7 @@ class syncmer_view::basic_iterator auto syncmer_it = urng2_iterator; syncmer_value = *syncmer_it; return true; - }; + } } else { @@ -437,8 +436,8 @@ class syncmer_view::basic_iterator auto syncmer_it = urng2_iterator; syncmer_value = *syncmer_it; return true; - }; - }; + } + } } else if (new_value < *(window_values.begin()+(syncmer_position_offset-1))) @@ -449,7 +448,7 @@ class syncmer_view::basic_iterator auto syncmer_it = urng2_iterator; syncmer_value = *syncmer_it; return true; - }; + } } else if (syncmer_position_offset == 1) { @@ -457,7 +456,7 @@ class syncmer_view::basic_iterator syncmer_value = *syncmer_it; --syncmer_position_offset; return true; - }; + } --syncmer_position_offset; return false; @@ -489,8 +488,8 @@ struct syncmer_fn return adaptor_from_functor{*this, urange2, window_size}; } - /*!\brief Call the view's constructor with two arguments: the underlying view and an integer indicating how many - * values one window contains. + /*!\brief Call the view's constructor with three arguments: the two underlying views and an integer indicating + * how many values one window contains. * \tparam urng1_t The type of the first input range to process. Must model std::ranges::viewable_range. * \tparam urng2_t The type of the second input range to process. Must model std::ranges::viewable_range. * \param[in] urange1 The input range to process. Must model std::ranges::viewable_range and @@ -522,7 +521,8 @@ struct syncmer_fn namespace seqan3::views { /*!\brief Computes syncmers for a range of comparable values. A syncmer is a window with size k that has - * its smallest subwindow of size s, (s < k) at its start or end. An open-syncmer has its smallest subwindow at its start. + * its smallest subwindow of size s, (s < k) at its start or end. An open-syncmer has its smallest subwindow + * at its start. * \tparam urng_t The type of the first range being processed. See below for requirements. [template * parameter is omitted in pipe notation] * \param[in] urange1 The first input range to process. Must model std::ranges::viewable_range and @@ -536,8 +536,9 @@ namespace seqan3::views * * A Syncmer as defined by [Edgar R.](https://peerj.com/articles/10805.pdf) is a kmer that has its smallest smer * (s < k) at its start or end. An open-syncmer has its smer at its start. For example for the following sequence - * `ACGGCGACGTT` and 5 as `window size`, 2 as `subwindow size`, the closed-syncmer values are `ACGGC,CGGCG,GCGAC,ACGTT` - * and the open-syncmer values are `ACGGC,CGGCG,ACGTT`. + * `ACGGCGACGTT` and 5 as `window size`, 2 as `subwindow size`, the closed-syncmer values are + * `ACGGC,CGGCG,GCGAC,ACGTT` and the open-syncmer values are `ACGGC,CGGCG,ACGTT`. + * * * * ### View properties