From 8082c9e2b814ed9cc8bce5b5b836d0b27fd4a362 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 27 Oct 2024 20:37:39 +0200 Subject: [PATCH 01/11] benchmark --- benchmarks/src/find_and_count.cpp | 20 ++++++++++++++-- benchmarks/src/search.cpp | 39 +++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/benchmarks/src/find_and_count.cpp b/benchmarks/src/find_and_count.cpp index 9c608bfe35..6cfa482590 100644 --- a/benchmarks/src/find_and_count.cpp +++ b/benchmarks/src/find_and_count.cpp @@ -7,12 +7,15 @@ #include #include #include +#include #include enum class Op { FindSized, FindUnsized, Count, + StrFind, + StrRFind, }; using namespace std; @@ -22,7 +25,9 @@ void bm(benchmark::State& state) { const auto size = static_cast(state.range(0)); const auto pos = static_cast(state.range(1)); - vector a(size, T{'0'}); + constexpr bool is_string_op = Operation == Op::StrFind || Operation == Op::StrRFind; + + conditional_t, vector> a(size, T{'0'}); if (pos < size) { a[pos] = T{'1'}; @@ -33,12 +38,18 @@ void bm(benchmark::State& state) { } for (auto _ : state) { + benchmark::DoNotOptimize(a); + if constexpr (Operation == Op::FindSized) { benchmark::DoNotOptimize(ranges::find(a.begin(), a.end(), T{'1'})); } else if constexpr (Operation == Op::FindUnsized) { benchmark::DoNotOptimize(ranges::find(a.begin(), unreachable_sentinel, T{'1'})); } else if constexpr (Operation == Op::Count) { benchmark::DoNotOptimize(ranges::count(a.begin(), a.end(), T{'1'})); + } else if constexpr (Operation == Op::StrFind) { + benchmark::DoNotOptimize(a.find(T{'1'})); + } else if constexpr (Operation == Op::StrRFind) { + benchmark::DoNotOptimize(a.rfind(T{'1'})); } } } @@ -49,7 +60,6 @@ void common_args(auto bm) { bm->Args({63, 62})->Args({31, 30})->Args({15, 14})->Args({7, 6}); } - BENCHMARK(bm)->Apply(common_args); BENCHMARK(bm)->Apply(common_args); BENCHMARK(bm)->Apply(common_args); @@ -63,4 +73,10 @@ BENCHMARK(bm)->Apply(common_args); BENCHMARK(bm)->Apply(common_args); BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); + +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); + BENCHMARK_MAIN(); diff --git a/benchmarks/src/search.cpp b/benchmarks/src/search.cpp index 3bbf45f70e..15e6569de4 100644 --- a/benchmarks/src/search.cpp +++ b/benchmarks/src/search.cpp @@ -111,6 +111,22 @@ void search_default_searcher(benchmark::State& state) { } } +template +void member_find(benchmark::State& state) { + const auto& src_haystack = patterns[static_cast(state.range())].data; + const auto& src_needle = patterns[static_cast(state.range())].pattern; + + const T haystack(src_haystack.begin(), src_haystack.end()); + const T needle(src_needle.begin(), src_needle.end()); + + for (auto _ : state) { + benchmark::DoNotOptimize(haystack); + benchmark::DoNotOptimize(needle); + auto res = haystack.find(needle); + benchmark::DoNotOptimize(res); + } +} + template void classic_find_end(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; @@ -143,6 +159,23 @@ void ranges_find_end(benchmark::State& state) { } } +template +void member_rfind(benchmark::State& state) { + const auto& src_haystack = patterns[static_cast(state.range())].data; + const auto& src_needle = patterns[static_cast(state.range())].pattern; + + const T haystack(src_haystack.begin(), src_haystack.end()); + const T needle(src_needle.begin(), src_needle.end()); + + for (auto _ : state) { + benchmark::DoNotOptimize(haystack); + benchmark::DoNotOptimize(needle); + auto res = haystack.rfind(needle); + benchmark::DoNotOptimize(res); + } +} + + void common_args(auto bm) { bm->DenseRange(0, std::size(patterns) - 1, 1); } @@ -158,10 +191,16 @@ BENCHMARK(ranges_search)->Apply(common_args); BENCHMARK(search_default_searcher)->Apply(common_args); BENCHMARK(search_default_searcher)->Apply(common_args); +BENCHMARK(member_find)->Apply(common_args); +BENCHMARK(member_find)->Apply(common_args); + BENCHMARK(classic_find_end)->Apply(common_args); BENCHMARK(classic_find_end)->Apply(common_args); BENCHMARK(ranges_find_end)->Apply(common_args); BENCHMARK(ranges_find_end)->Apply(common_args); +BENCHMARK(member_rfind)->Apply(common_args); +BENCHMARK(member_rfind)->Apply(common_args); + BENCHMARK_MAIN(); From 87509352353e012104be0bcbe77e3d50c32b2e12 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 27 Oct 2024 20:50:48 +0200 Subject: [PATCH 02/11] single character coverage --- .../VSO_0000000_vector_algorithms/test.cpp | 33 ++++++++++++++++--- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 8498602479..9625a14cb4 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -1218,12 +1218,31 @@ void test_bitset(mt19937_64& gen) { test_randomized_bitset_base_count<512 - 5, 32 + 10>(gen); } +template +void test_case_string_find(const basic_string& input_haystack, const T ch) { + const auto expected_iter = last_known_good_find(input_haystack.begin(), input_haystack.end(), ch); + const auto expected = + (expected_iter != input_haystack.end()) ? expected_iter - input_haystack.begin() : ptrdiff_t{-1}; + const auto actual = static_cast(input_haystack.find(ch)); + assert(expected == actual); +} + +template +void test_case_string_rfind(const basic_string& input_haystack, const T ch) { + const auto expected_iter = last_known_good_find_last(input_haystack.begin(), input_haystack.end(), ch); + const auto expected = + (expected_iter != input_haystack.end()) ? expected_iter - input_haystack.begin() : ptrdiff_t{-1}; + const auto actual = static_cast(input_haystack.rfind(ch)); + assert(expected == actual); +} + template void test_case_string_find_first_of(const basic_string& input_haystack, const basic_string& input_needle) { - auto expected_iter = last_known_good_find_first_of( + const auto expected_iter = last_known_good_find_first_of( input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end()); - auto expected = (expected_iter != input_haystack.end()) ? expected_iter - input_haystack.begin() : ptrdiff_t{-1}; - auto actual = static_cast(input_haystack.find_first_of(input_needle)); + const auto expected = + (expected_iter != input_haystack.end()) ? expected_iter - input_haystack.begin() : ptrdiff_t{-1}; + const auto actual = static_cast(input_haystack.find_first_of(input_needle)); assert(expected == actual); } @@ -1242,8 +1261,8 @@ size_t last_known_good_find_last_of(const basic_string& h, const basic_string template void test_case_string_find_last_of(const basic_string& input_haystack, const basic_string& input_needle) { - size_t expected = last_known_good_find_last_of(input_haystack, input_needle); - size_t actual = input_haystack.find_last_of(input_needle); + const size_t expected = last_known_good_find_last_of(input_haystack, input_needle); + const size_t actual = input_haystack.find_last_of(input_needle); assert(expected == actual); } @@ -1255,6 +1274,10 @@ void test_basic_string_dis(mt19937_64& gen, D& dis) { input_needle.reserve(needleDataCount); for (;;) { + const T ch = static_cast(dis(gen)); + test_case_string_find(input_haystack, ch); + test_case_string_rfind(input_haystack, ch); + input_needle.clear(); test_case_string_find_first_of(input_haystack, input_needle); From f62d78a67dbfc7a5cd7af4b136fb07eabe6f4543 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 27 Oct 2024 21:14:26 +0200 Subject: [PATCH 03/11] string in string coverage --- .../VSO_0000000_vector_algorithms/test.cpp | 55 +++++++++++++++++-- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 9625a14cb4..8a3471e7a8 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -1219,7 +1219,7 @@ void test_bitset(mt19937_64& gen) { } template -void test_case_string_find(const basic_string& input_haystack, const T ch) { +void test_case_string_find_ch(const basic_string& input_haystack, const T ch) { const auto expected_iter = last_known_good_find(input_haystack.begin(), input_haystack.end(), ch); const auto expected = (expected_iter != input_haystack.end()) ? expected_iter - input_haystack.begin() : ptrdiff_t{-1}; @@ -1228,7 +1228,7 @@ void test_case_string_find(const basic_string& input_haystack, const T ch) { } template -void test_case_string_rfind(const basic_string& input_haystack, const T ch) { +void test_case_string_rfind_ch(const basic_string& input_haystack, const T ch) { const auto expected_iter = last_known_good_find_last(input_haystack.begin(), input_haystack.end(), ch); const auto expected = (expected_iter != input_haystack.end()) ? expected_iter - input_haystack.begin() : ptrdiff_t{-1}; @@ -1266,26 +1266,73 @@ void test_case_string_find_last_of(const basic_string& input_haystack, const assert(expected == actual); } +template +void test_case_string_find_str(const basic_string& input_haystack, const basic_string& input_needle) { + ptrdiff_t expected; + if (input_needle.empty()) { + expected = 0; + } else { + const auto expected_iter = last_known_good_search( + input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end()); + expected = (expected_iter != input_haystack.end()) ? expected_iter - input_haystack.begin() : ptrdiff_t{-1}; + } + const auto actual = static_cast(input_haystack.find(input_needle)); + assert(expected == actual); +} + +template +void test_case_string_rfind_str(const basic_string& input_haystack, const basic_string& input_needle) { + ptrdiff_t expected; + if (input_needle.empty()) { + expected = static_cast(input_haystack.size()); + } else { + const auto expected_iter = last_known_good_find_end( + input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end()); + expected = (expected_iter != input_haystack.end()) ? expected_iter - input_haystack.begin() : ptrdiff_t{-1}; + } + const auto actual = static_cast(input_haystack.rfind(input_needle)); + assert(expected == actual); +} + template void test_basic_string_dis(mt19937_64& gen, D& dis) { basic_string input_haystack; basic_string input_needle; + basic_string temp; input_haystack.reserve(haystackDataCount); input_needle.reserve(needleDataCount); + temp.reserve(needleDataCount); for (;;) { const T ch = static_cast(dis(gen)); - test_case_string_find(input_haystack, ch); - test_case_string_rfind(input_haystack, ch); + test_case_string_find_ch(input_haystack, ch); + test_case_string_rfind_ch(input_haystack, ch); input_needle.clear(); test_case_string_find_first_of(input_haystack, input_needle); test_case_string_find_last_of(input_haystack, input_needle); + test_case_string_find_str(input_haystack, input_needle); + test_case_string_rfind_str(input_haystack, input_needle); + for (size_t attempts = 0; attempts < needleDataCount; ++attempts) { input_needle.push_back(static_cast(dis(gen))); test_case_string_find_first_of(input_haystack, input_needle); test_case_string_find_last_of(input_haystack, input_needle); + test_case_string_find_str(input_haystack, input_needle); + test_case_string_rfind_str(input_haystack, input_needle); + + // For large needles the chance of a match is low, so test a guaranteed match + if (input_haystack.size() > input_needle.size() * 2) { + uniform_int_distribution pos_dis(0, input_haystack.size() - input_needle.size()); + const size_t pos = pos_dis(gen); + const auto overwritten_first = input_haystack.begin() + static_cast(pos); + temp.assign(overwritten_first, overwritten_first + static_cast(input_needle.size())); + copy(input_needle.begin(), input_needle.end(), overwritten_first); + test_case_string_find_str(input_haystack, input_needle); + test_case_string_rfind_str(input_haystack, input_needle); + copy(temp.begin(), temp.end(), overwritten_first); + } } if (input_haystack.size() == haystackDataCount) { From 7c5d6f8a0d67aedb885a1a7e618fc4989a1b38d7 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 27 Oct 2024 22:01:59 +0200 Subject: [PATCH 04/11] vectorize basic_string::rfind (character) and basic_string::find(string) --- stl/inc/__msvc_string_view.hpp | 30 ++++++++++++++++++++++++++++++ stl/inc/algorithm | 32 -------------------------------- stl/inc/xutility | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 32 deletions(-) diff --git a/stl/inc/__msvc_string_view.hpp b/stl/inc/__msvc_string_view.hpp index 676fedf943..aa618ad4e4 100644 --- a/stl/inc/__msvc_string_view.hpp +++ b/stl/inc/__msvc_string_view.hpp @@ -628,6 +628,21 @@ constexpr size_t _Traits_find(_In_reads_(_Hay_size) const _Traits_ptr_t<_Traits> return _Start_at; } +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Is_implementation_handled_char_traits<_Traits> && sizeof(_Traits::char_type) <= 2) { + if (!_STD _Is_constant_evaluated()) { + const auto _End = _Haystack + _Hay_size; + const auto _Ptr = _STD _Search_vectorized(_Haystack + _Start_at, _End, _Needle, _Needle_size); + + if (_Ptr != _End) { + return static_cast(_Ptr - _Haystack); + } else { + return static_cast(-1); + } + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS + const auto _Possible_matches_end = _Haystack + (_Hay_size - _Needle_size) + 1; for (auto _Match_try = _Haystack + _Start_at;; ++_Match_try) { _Match_try = _Traits::find(_Match_try, static_cast(_Possible_matches_end - _Match_try), *_Needle); @@ -688,6 +703,21 @@ constexpr size_t _Traits_rfind_ch(_In_reads_(_Hay_size) const _Traits_ptr_t<_Tra return static_cast(-1); } +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Is_implementation_handled_char_traits<_Traits> && sizeof(_Traits::char_type) <= 2) { + if (!_STD _Is_constant_evaluated()) { + const auto _End = _Haystack + _Hay_size; + const auto _Ptr = _STD _Find_last_vectorized(_Haystack + _Start_at, _End, _Ch); + + if (_Ptr != _End) { + return static_cast(_Ptr - _Haystack); + } else { + return static_cast(-1); + } + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS + for (auto _Match_try = _Haystack + (_STD min)(_Start_at, _Hay_size - 1);; --_Match_try) { if (_Traits::eq(*_Match_try, _Ch)) { return static_cast(_Match_try - _Haystack); // found a match diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 09f0f12fa6..dc4532f05f 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -54,11 +54,6 @@ _Min_max_element_t __stdcall __std_minmax_element_8(const void* _First, const vo _Min_max_element_t __stdcall __std_minmax_element_f(const void* _First, const void* _Last, bool _Unused) noexcept; _Min_max_element_t __stdcall __std_minmax_element_d(const void* _First, const void* _Last, bool _Unused) noexcept; -const void* __stdcall __std_find_last_trivial_1(const void* _First, const void* _Last, uint8_t _Val) noexcept; -const void* __stdcall __std_find_last_trivial_2(const void* _First, const void* _Last, uint16_t _Val) noexcept; -const void* __stdcall __std_find_last_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept; -const void* __stdcall __std_find_last_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept; - __declspec(noalias) _Min_max_1i __stdcall __std_minmax_1i(const void* _First, const void* _Last) noexcept; __declspec(noalias) _Min_max_1u __stdcall __std_minmax_1u(const void* _First, const void* _Last) noexcept; __declspec(noalias) _Min_max_2i __stdcall __std_minmax_2i(const void* _First, const void* _Last) noexcept; @@ -162,33 +157,6 @@ auto _Minmax_vectorized(_Ty* const _First, _Ty* const _Last) noexcept { } } -template -_Ty* _Find_last_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val) noexcept { - if constexpr (is_pointer_v<_TVal> || is_null_pointer_v<_TVal>) { -#ifdef _WIN64 - return const_cast<_Ty*>( - static_cast(::__std_find_last_trivial_8(_First, _Last, reinterpret_cast(_Val)))); -#else - return const_cast<_Ty*>( - static_cast(::__std_find_last_trivial_4(_First, _Last, reinterpret_cast(_Val)))); -#endif - } else if constexpr (sizeof(_Ty) == 1) { - return const_cast<_Ty*>( - static_cast(::__std_find_last_trivial_1(_First, _Last, static_cast(_Val)))); - } else if constexpr (sizeof(_Ty) == 2) { - return const_cast<_Ty*>( - static_cast(::__std_find_last_trivial_2(_First, _Last, static_cast(_Val)))); - } else if constexpr (sizeof(_Ty) == 4) { - return const_cast<_Ty*>( - static_cast(::__std_find_last_trivial_4(_First, _Last, static_cast(_Val)))); - } else if constexpr (sizeof(_Ty) == 8) { - return const_cast<_Ty*>( - static_cast(::__std_find_last_trivial_8(_First, _Last, static_cast(_Val)))); - } else { - _STL_INTERNAL_STATIC_ASSERT(false); // unexpected size - } -} - template __declspec(noalias) void _Replace_vectorized( _Ty* const _First, _Ty* const _Last, const _TVal1 _Old_val, const _TVal2 _New_val) noexcept { diff --git a/stl/inc/xutility b/stl/inc/xutility index e67be17a51..89ef416efa 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -93,6 +93,11 @@ const void* __stdcall __std_find_trivial_2(const void* _First, const void* _Last const void* __stdcall __std_find_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept; const void* __stdcall __std_find_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept; +const void* __stdcall __std_find_last_trivial_1(const void* _First, const void* _Last, uint8_t _Val) noexcept; +const void* __stdcall __std_find_last_trivial_2(const void* _First, const void* _Last, uint16_t _Val) noexcept; +const void* __stdcall __std_find_last_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept; +const void* __stdcall __std_find_last_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept; + const void* __stdcall __std_find_first_of_trivial_1( const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept; const void* __stdcall __std_find_first_of_trivial_2( @@ -212,6 +217,33 @@ _Ty* _Find_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val) noe } } +template +_Ty* _Find_last_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val) noexcept { + if constexpr (is_pointer_v<_TVal> || is_null_pointer_v<_TVal>) { +#ifdef _WIN64 + return const_cast<_Ty*>( + static_cast(::__std_find_last_trivial_8(_First, _Last, reinterpret_cast(_Val)))); +#else + return const_cast<_Ty*>( + static_cast(::__std_find_last_trivial_4(_First, _Last, reinterpret_cast(_Val)))); +#endif + } else if constexpr (sizeof(_Ty) == 1) { + return const_cast<_Ty*>( + static_cast(::__std_find_last_trivial_1(_First, _Last, static_cast(_Val)))); + } else if constexpr (sizeof(_Ty) == 2) { + return const_cast<_Ty*>( + static_cast(::__std_find_last_trivial_2(_First, _Last, static_cast(_Val)))); + } else if constexpr (sizeof(_Ty) == 4) { + return const_cast<_Ty*>( + static_cast(::__std_find_last_trivial_4(_First, _Last, static_cast(_Val)))); + } else if constexpr (sizeof(_Ty) == 8) { + return const_cast<_Ty*>( + static_cast(::__std_find_last_trivial_8(_First, _Last, static_cast(_Val)))); + } else { + _STL_INTERNAL_STATIC_ASSERT(false); // unexpected size + } +} + // find_first_of vectorization is likely to be a win after this size (in elements) _INLINE_VAR constexpr ptrdiff_t _Threshold_find_first_of = 16; From 2d6538ae094e8261dc20b782e564ce13bf6c591d Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 27 Oct 2024 22:26:59 +0200 Subject: [PATCH 05/11] Don't vectorize rfind for now --- stl/inc/__msvc_string_view.hpp | 15 --------------- stl/inc/algorithm | 32 ++++++++++++++++++++++++++++++++ stl/inc/xutility | 32 -------------------------------- 3 files changed, 32 insertions(+), 47 deletions(-) diff --git a/stl/inc/__msvc_string_view.hpp b/stl/inc/__msvc_string_view.hpp index aa618ad4e4..6617aa829d 100644 --- a/stl/inc/__msvc_string_view.hpp +++ b/stl/inc/__msvc_string_view.hpp @@ -703,21 +703,6 @@ constexpr size_t _Traits_rfind_ch(_In_reads_(_Hay_size) const _Traits_ptr_t<_Tra return static_cast(-1); } -#if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Is_implementation_handled_char_traits<_Traits> && sizeof(_Traits::char_type) <= 2) { - if (!_STD _Is_constant_evaluated()) { - const auto _End = _Haystack + _Hay_size; - const auto _Ptr = _STD _Find_last_vectorized(_Haystack + _Start_at, _End, _Ch); - - if (_Ptr != _End) { - return static_cast(_Ptr - _Haystack); - } else { - return static_cast(-1); - } - } - } -#endif // _USE_STD_VECTOR_ALGORITHMS - for (auto _Match_try = _Haystack + (_STD min)(_Start_at, _Hay_size - 1);; --_Match_try) { if (_Traits::eq(*_Match_try, _Ch)) { return static_cast(_Match_try - _Haystack); // found a match diff --git a/stl/inc/algorithm b/stl/inc/algorithm index dc4532f05f..09f0f12fa6 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -54,6 +54,11 @@ _Min_max_element_t __stdcall __std_minmax_element_8(const void* _First, const vo _Min_max_element_t __stdcall __std_minmax_element_f(const void* _First, const void* _Last, bool _Unused) noexcept; _Min_max_element_t __stdcall __std_minmax_element_d(const void* _First, const void* _Last, bool _Unused) noexcept; +const void* __stdcall __std_find_last_trivial_1(const void* _First, const void* _Last, uint8_t _Val) noexcept; +const void* __stdcall __std_find_last_trivial_2(const void* _First, const void* _Last, uint16_t _Val) noexcept; +const void* __stdcall __std_find_last_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept; +const void* __stdcall __std_find_last_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept; + __declspec(noalias) _Min_max_1i __stdcall __std_minmax_1i(const void* _First, const void* _Last) noexcept; __declspec(noalias) _Min_max_1u __stdcall __std_minmax_1u(const void* _First, const void* _Last) noexcept; __declspec(noalias) _Min_max_2i __stdcall __std_minmax_2i(const void* _First, const void* _Last) noexcept; @@ -157,6 +162,33 @@ auto _Minmax_vectorized(_Ty* const _First, _Ty* const _Last) noexcept { } } +template +_Ty* _Find_last_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val) noexcept { + if constexpr (is_pointer_v<_TVal> || is_null_pointer_v<_TVal>) { +#ifdef _WIN64 + return const_cast<_Ty*>( + static_cast(::__std_find_last_trivial_8(_First, _Last, reinterpret_cast(_Val)))); +#else + return const_cast<_Ty*>( + static_cast(::__std_find_last_trivial_4(_First, _Last, reinterpret_cast(_Val)))); +#endif + } else if constexpr (sizeof(_Ty) == 1) { + return const_cast<_Ty*>( + static_cast(::__std_find_last_trivial_1(_First, _Last, static_cast(_Val)))); + } else if constexpr (sizeof(_Ty) == 2) { + return const_cast<_Ty*>( + static_cast(::__std_find_last_trivial_2(_First, _Last, static_cast(_Val)))); + } else if constexpr (sizeof(_Ty) == 4) { + return const_cast<_Ty*>( + static_cast(::__std_find_last_trivial_4(_First, _Last, static_cast(_Val)))); + } else if constexpr (sizeof(_Ty) == 8) { + return const_cast<_Ty*>( + static_cast(::__std_find_last_trivial_8(_First, _Last, static_cast(_Val)))); + } else { + _STL_INTERNAL_STATIC_ASSERT(false); // unexpected size + } +} + template __declspec(noalias) void _Replace_vectorized( _Ty* const _First, _Ty* const _Last, const _TVal1 _Old_val, const _TVal2 _New_val) noexcept { diff --git a/stl/inc/xutility b/stl/inc/xutility index 89ef416efa..e67be17a51 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -93,11 +93,6 @@ const void* __stdcall __std_find_trivial_2(const void* _First, const void* _Last const void* __stdcall __std_find_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept; const void* __stdcall __std_find_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept; -const void* __stdcall __std_find_last_trivial_1(const void* _First, const void* _Last, uint8_t _Val) noexcept; -const void* __stdcall __std_find_last_trivial_2(const void* _First, const void* _Last, uint16_t _Val) noexcept; -const void* __stdcall __std_find_last_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept; -const void* __stdcall __std_find_last_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept; - const void* __stdcall __std_find_first_of_trivial_1( const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept; const void* __stdcall __std_find_first_of_trivial_2( @@ -217,33 +212,6 @@ _Ty* _Find_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val) noe } } -template -_Ty* _Find_last_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val) noexcept { - if constexpr (is_pointer_v<_TVal> || is_null_pointer_v<_TVal>) { -#ifdef _WIN64 - return const_cast<_Ty*>( - static_cast(::__std_find_last_trivial_8(_First, _Last, reinterpret_cast(_Val)))); -#else - return const_cast<_Ty*>( - static_cast(::__std_find_last_trivial_4(_First, _Last, reinterpret_cast(_Val)))); -#endif - } else if constexpr (sizeof(_Ty) == 1) { - return const_cast<_Ty*>( - static_cast(::__std_find_last_trivial_1(_First, _Last, static_cast(_Val)))); - } else if constexpr (sizeof(_Ty) == 2) { - return const_cast<_Ty*>( - static_cast(::__std_find_last_trivial_2(_First, _Last, static_cast(_Val)))); - } else if constexpr (sizeof(_Ty) == 4) { - return const_cast<_Ty*>( - static_cast(::__std_find_last_trivial_4(_First, _Last, static_cast(_Val)))); - } else if constexpr (sizeof(_Ty) == 8) { - return const_cast<_Ty*>( - static_cast(::__std_find_last_trivial_8(_First, _Last, static_cast(_Val)))); - } else { - _STL_INTERNAL_STATIC_ASSERT(false); // unexpected size - } -} - // find_first_of vectorization is likely to be a win after this size (in elements) _INLINE_VAR constexpr ptrdiff_t _Threshold_find_first_of = 16; From ca1f963c977cbf3935a6a1e199c1274dcb3ad29f Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 27 Oct 2024 22:28:56 +0200 Subject: [PATCH 06/11] Don't benchmark character serach for now --- benchmarks/src/find_and_count.cpp | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/benchmarks/src/find_and_count.cpp b/benchmarks/src/find_and_count.cpp index 6cfa482590..9c608bfe35 100644 --- a/benchmarks/src/find_and_count.cpp +++ b/benchmarks/src/find_and_count.cpp @@ -7,15 +7,12 @@ #include #include #include -#include #include enum class Op { FindSized, FindUnsized, Count, - StrFind, - StrRFind, }; using namespace std; @@ -25,9 +22,7 @@ void bm(benchmark::State& state) { const auto size = static_cast(state.range(0)); const auto pos = static_cast(state.range(1)); - constexpr bool is_string_op = Operation == Op::StrFind || Operation == Op::StrRFind; - - conditional_t, vector> a(size, T{'0'}); + vector a(size, T{'0'}); if (pos < size) { a[pos] = T{'1'}; @@ -38,18 +33,12 @@ void bm(benchmark::State& state) { } for (auto _ : state) { - benchmark::DoNotOptimize(a); - if constexpr (Operation == Op::FindSized) { benchmark::DoNotOptimize(ranges::find(a.begin(), a.end(), T{'1'})); } else if constexpr (Operation == Op::FindUnsized) { benchmark::DoNotOptimize(ranges::find(a.begin(), unreachable_sentinel, T{'1'})); } else if constexpr (Operation == Op::Count) { benchmark::DoNotOptimize(ranges::count(a.begin(), a.end(), T{'1'})); - } else if constexpr (Operation == Op::StrFind) { - benchmark::DoNotOptimize(a.find(T{'1'})); - } else if constexpr (Operation == Op::StrRFind) { - benchmark::DoNotOptimize(a.rfind(T{'1'})); } } } @@ -60,6 +49,7 @@ void common_args(auto bm) { bm->Args({63, 62})->Args({31, 30})->Args({15, 14})->Args({7, 6}); } + BENCHMARK(bm)->Apply(common_args); BENCHMARK(bm)->Apply(common_args); BENCHMARK(bm)->Apply(common_args); @@ -73,10 +63,4 @@ BENCHMARK(bm)->Apply(common_args); BENCHMARK(bm)->Apply(common_args); BENCHMARK(bm)->Apply(common_args); -BENCHMARK(bm)->Apply(common_args); -BENCHMARK(bm)->Apply(common_args); - -BENCHMARK(bm)->Apply(common_args); -BENCHMARK(bm)->Apply(common_args); - BENCHMARK_MAIN(); From 4d782ce531b4526043347e0a4349dba3e60eeeb6 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 27 Oct 2024 22:31:04 +0200 Subject: [PATCH 07/11] Revert "single character coverage" This reverts commit 87509352353e012104be0bcbe77e3d50c32b2e12. # Conflicts: # tests/std/tests/VSO_0000000_vector_algorithms/test.cpp --- .../VSO_0000000_vector_algorithms/test.cpp | 33 +++---------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 8a3471e7a8..7468cddf2f 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -1218,31 +1218,12 @@ void test_bitset(mt19937_64& gen) { test_randomized_bitset_base_count<512 - 5, 32 + 10>(gen); } -template -void test_case_string_find_ch(const basic_string& input_haystack, const T ch) { - const auto expected_iter = last_known_good_find(input_haystack.begin(), input_haystack.end(), ch); - const auto expected = - (expected_iter != input_haystack.end()) ? expected_iter - input_haystack.begin() : ptrdiff_t{-1}; - const auto actual = static_cast(input_haystack.find(ch)); - assert(expected == actual); -} - -template -void test_case_string_rfind_ch(const basic_string& input_haystack, const T ch) { - const auto expected_iter = last_known_good_find_last(input_haystack.begin(), input_haystack.end(), ch); - const auto expected = - (expected_iter != input_haystack.end()) ? expected_iter - input_haystack.begin() : ptrdiff_t{-1}; - const auto actual = static_cast(input_haystack.rfind(ch)); - assert(expected == actual); -} - template void test_case_string_find_first_of(const basic_string& input_haystack, const basic_string& input_needle) { - const auto expected_iter = last_known_good_find_first_of( + auto expected_iter = last_known_good_find_first_of( input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end()); - const auto expected = - (expected_iter != input_haystack.end()) ? expected_iter - input_haystack.begin() : ptrdiff_t{-1}; - const auto actual = static_cast(input_haystack.find_first_of(input_needle)); + auto expected = (expected_iter != input_haystack.end()) ? expected_iter - input_haystack.begin() : ptrdiff_t{-1}; + auto actual = static_cast(input_haystack.find_first_of(input_needle)); assert(expected == actual); } @@ -1261,8 +1242,8 @@ size_t last_known_good_find_last_of(const basic_string& h, const basic_string template void test_case_string_find_last_of(const basic_string& input_haystack, const basic_string& input_needle) { - const size_t expected = last_known_good_find_last_of(input_haystack, input_needle); - const size_t actual = input_haystack.find_last_of(input_needle); + size_t expected = last_known_good_find_last_of(input_haystack, input_needle); + size_t actual = input_haystack.find_last_of(input_needle); assert(expected == actual); } @@ -1304,10 +1285,6 @@ void test_basic_string_dis(mt19937_64& gen, D& dis) { temp.reserve(needleDataCount); for (;;) { - const T ch = static_cast(dis(gen)); - test_case_string_find_ch(input_haystack, ch); - test_case_string_rfind_ch(input_haystack, ch); - input_needle.clear(); test_case_string_find_first_of(input_haystack, input_needle); From 22f19b47f92d382969612488d31e4b644bd009e1 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 27 Oct 2024 22:33:16 +0200 Subject: [PATCH 08/11] Don't test rfind string for now --- .../VSO_0000000_vector_algorithms/test.cpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 7468cddf2f..f351a10173 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -1261,20 +1261,6 @@ void test_case_string_find_str(const basic_string& input_haystack, const basi assert(expected == actual); } -template -void test_case_string_rfind_str(const basic_string& input_haystack, const basic_string& input_needle) { - ptrdiff_t expected; - if (input_needle.empty()) { - expected = static_cast(input_haystack.size()); - } else { - const auto expected_iter = last_known_good_find_end( - input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end()); - expected = (expected_iter != input_haystack.end()) ? expected_iter - input_haystack.begin() : ptrdiff_t{-1}; - } - const auto actual = static_cast(input_haystack.rfind(input_needle)); - assert(expected == actual); -} - template void test_basic_string_dis(mt19937_64& gen, D& dis) { basic_string input_haystack; @@ -1290,14 +1276,12 @@ void test_basic_string_dis(mt19937_64& gen, D& dis) { test_case_string_find_first_of(input_haystack, input_needle); test_case_string_find_last_of(input_haystack, input_needle); test_case_string_find_str(input_haystack, input_needle); - test_case_string_rfind_str(input_haystack, input_needle); for (size_t attempts = 0; attempts < needleDataCount; ++attempts) { input_needle.push_back(static_cast(dis(gen))); test_case_string_find_first_of(input_haystack, input_needle); test_case_string_find_last_of(input_haystack, input_needle); test_case_string_find_str(input_haystack, input_needle); - test_case_string_rfind_str(input_haystack, input_needle); // For large needles the chance of a match is low, so test a guaranteed match if (input_haystack.size() > input_needle.size() * 2) { @@ -1307,7 +1291,6 @@ void test_basic_string_dis(mt19937_64& gen, D& dis) { temp.assign(overwritten_first, overwritten_first + static_cast(input_needle.size())); copy(input_needle.begin(), input_needle.end(), overwritten_first); test_case_string_find_str(input_haystack, input_needle); - test_case_string_rfind_str(input_haystack, input_needle); copy(temp.begin(), temp.end(), overwritten_first); } } From 602e8d7aadaa37148eb6a44678626309355c6f01 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 27 Oct 2024 22:36:31 +0200 Subject: [PATCH 09/11] Don't benchmark rfind string for now --- benchmarks/src/search.cpp | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/benchmarks/src/search.cpp b/benchmarks/src/search.cpp index 15e6569de4..ad026b994f 100644 --- a/benchmarks/src/search.cpp +++ b/benchmarks/src/search.cpp @@ -159,23 +159,6 @@ void ranges_find_end(benchmark::State& state) { } } -template -void member_rfind(benchmark::State& state) { - const auto& src_haystack = patterns[static_cast(state.range())].data; - const auto& src_needle = patterns[static_cast(state.range())].pattern; - - const T haystack(src_haystack.begin(), src_haystack.end()); - const T needle(src_needle.begin(), src_needle.end()); - - for (auto _ : state) { - benchmark::DoNotOptimize(haystack); - benchmark::DoNotOptimize(needle); - auto res = haystack.rfind(needle); - benchmark::DoNotOptimize(res); - } -} - - void common_args(auto bm) { bm->DenseRange(0, std::size(patterns) - 1, 1); } @@ -200,7 +183,4 @@ BENCHMARK(classic_find_end)->Apply(common_args); BENCHMARK(ranges_find_end)->Apply(common_args); BENCHMARK(ranges_find_end)->Apply(common_args); -BENCHMARK(member_rfind)->Apply(common_args); -BENCHMARK(member_rfind)->Apply(common_args); - BENCHMARK_MAIN(); From dbff7f8593cb5e679af6572f1ba2a58ae592b1c0 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 29 Oct 2024 10:23:11 -0700 Subject: [PATCH 10/11] Use if-else instead of verbose conditional. --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index f351a10173..0f23b5bfe0 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -1255,7 +1255,12 @@ void test_case_string_find_str(const basic_string& input_haystack, const basi } else { const auto expected_iter = last_known_good_search( input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end()); - expected = (expected_iter != input_haystack.end()) ? expected_iter - input_haystack.begin() : ptrdiff_t{-1}; + + if (expected_iter != input_haystack.end()) { + expected = expected_iter - input_haystack.begin(); + } else { + expected = -1; + } } const auto actual = static_cast(input_haystack.find(input_needle)); assert(expected == actual); From a6a41dbe6e720dd67d1a98af3a4a088811143222 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 29 Oct 2024 10:52:28 -0700 Subject: [PATCH 11/11] Missing `typename`. --- stl/inc/__msvc_string_view.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/__msvc_string_view.hpp b/stl/inc/__msvc_string_view.hpp index 6617aa829d..56d9d449ae 100644 --- a/stl/inc/__msvc_string_view.hpp +++ b/stl/inc/__msvc_string_view.hpp @@ -629,7 +629,7 @@ constexpr size_t _Traits_find(_In_reads_(_Hay_size) const _Traits_ptr_t<_Traits> } #if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Is_implementation_handled_char_traits<_Traits> && sizeof(_Traits::char_type) <= 2) { + if constexpr (_Is_implementation_handled_char_traits<_Traits> && sizeof(typename _Traits::char_type) <= 2) { if (!_STD _Is_constant_evaluated()) { const auto _End = _Haystack + _Hay_size; const auto _Ptr = _STD _Search_vectorized(_Haystack + _Start_at, _End, _Needle, _Needle_size);