Skip to content

Commit

Permalink
Use different search algorithm in Strings::contains_any (#1255)
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas1664 authored Oct 31, 2023
1 parent d88738a commit fe0ad6c
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 20 deletions.
16 changes: 13 additions & 3 deletions include/vcpkg/base/strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
#include <algorithm>
#include <vector>

#ifdef __APPLE__
#include <experimental/functional>
#endif

namespace vcpkg::Strings::details
{
void append_internal(std::string& into, char c);
Expand Down Expand Up @@ -43,6 +47,12 @@ namespace vcpkg::Strings::details

namespace vcpkg::Strings
{
#ifdef __APPLE__
using boyer_moore_horspool_searcher = std::experimental::boyer_moore_horspool_searcher<std::string::const_iterator>;
#else
using boyer_moore_horspool_searcher = std::boyer_moore_horspool_searcher<std::string::const_iterator>;
#endif

template<class... Args>
std::string& append(std::string& into, const Args&... args)
{
Expand Down Expand Up @@ -169,11 +179,11 @@ namespace vcpkg::Strings
StringView left_tag,
StringView right_tag);

bool contains_any_ignoring_c_comments(const std::string& source, View<StringView> to_find);
bool contains_any_ignoring_c_comments(const std::string& source, View<boyer_moore_horspool_searcher> to_find);

bool contains_any_ignoring_hash_comments(StringView source, View<StringView> to_find);
bool contains_any_ignoring_hash_comments(StringView source, View<boyer_moore_horspool_searcher> to_find);

bool contains_any(StringView source, View<StringView> to_find);
bool long_string_contains_any(StringView source, View<boyer_moore_horspool_searcher> to_find);

[[nodiscard]] bool equals(StringView a, StringView b);

Expand Down
14 changes: 12 additions & 2 deletions src/vcpkg-test/strings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,12 @@ TEST_CASE ("find_first_of", "[strings]")
TEST_CASE ("contains_any_ignoring_c_comments", "[strings]")
{
using vcpkg::Strings::contains_any_ignoring_c_comments;
vcpkg::StringView to_find[] = {"abc", "wer"};
std::string a = "abc";
std::string b = "wer";

vcpkg::Strings::boyer_moore_horspool_searcher to_find[] = {
vcpkg::Strings::boyer_moore_horspool_searcher(a.begin(), a.end()),
vcpkg::Strings::boyer_moore_horspool_searcher(b.begin(), b.end())};
REQUIRE(contains_any_ignoring_c_comments(R"(abc)", to_find));
REQUIRE(contains_any_ignoring_c_comments(R"("abc")", to_find));
REQUIRE_FALSE(contains_any_ignoring_c_comments(R"("" //abc)", to_find));
Expand Down Expand Up @@ -127,7 +132,12 @@ TEST_CASE ("contains_any_ignoring_c_comments", "[strings]")
TEST_CASE ("contains_any_ignoring_hash_comments", "[strings]")
{
using vcpkg::Strings::contains_any_ignoring_hash_comments;
vcpkg::StringView to_find[] = {"abc", "wer"};
std::string a = "abc";
std::string b = "wer";

vcpkg::Strings::boyer_moore_horspool_searcher to_find[] = {
vcpkg::Strings::boyer_moore_horspool_searcher(a.begin(), a.end()),
vcpkg::Strings::boyer_moore_horspool_searcher(b.begin(), b.end())};
REQUIRE(contains_any_ignoring_hash_comments("abc", to_find));
REQUIRE(contains_any_ignoring_hash_comments("wer", to_find));
REQUIRE(contains_any_ignoring_hash_comments("wer # test", to_find));
Expand Down
25 changes: 17 additions & 8 deletions src/vcpkg/base/strings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,8 @@ Optional<StringView> Strings::find_at_most_one_enclosed(StringView input, String
return result.front();
}

bool vcpkg::Strings::contains_any_ignoring_c_comments(const std::string& source, View<StringView> to_find)
bool vcpkg::Strings::contains_any_ignoring_c_comments(const std::string& source,
View<boyer_moore_horspool_searcher> to_find)
{
std::string::size_type offset = 0;
std::string::size_type no_comment_offset = 0;
Expand All @@ -390,14 +391,14 @@ bool vcpkg::Strings::contains_any_ignoring_c_comments(const std::string& source,
auto start = source.find_first_of("/\"", no_comment_offset);
if (start == std::string::npos || start + 1 == source.size() || no_comment_offset == std::string::npos)
{
return Strings::contains_any(StringView(source).substr(offset), to_find);
return Strings::long_string_contains_any(StringView(source).substr(offset), to_find);
}

if (source[start] == '/')
{
if (source[start + 1] == '/' || source[start + 1] == '*')
{
if (Strings::contains_any(StringView(source).substr(offset, start - offset), to_find))
if (Strings::long_string_contains_any(StringView(source).substr(offset, start - offset), to_find))
{
return true;
}
Expand Down Expand Up @@ -444,7 +445,7 @@ bool vcpkg::Strings::contains_any_ignoring_c_comments(const std::string& source,
return false;
}

bool Strings::contains_any_ignoring_hash_comments(StringView source, View<StringView> to_find)
bool Strings::contains_any_ignoring_hash_comments(StringView source, View<boyer_moore_horspool_searcher> to_find)
{
auto first = source.data();
auto block_start = first;
Expand All @@ -453,7 +454,7 @@ bool Strings::contains_any_ignoring_hash_comments(StringView source, View<String
{
if (*first == '#')
{
if (Strings::contains_any(StringView{block_start, first}, to_find))
if (Strings::long_string_contains_any(StringView{block_start, first}, to_find))
{
return true;
}
Expand All @@ -468,12 +469,20 @@ bool Strings::contains_any_ignoring_hash_comments(StringView source, View<String
}
}

return Strings::contains_any(StringView{block_start, last}, to_find);
return Strings::long_string_contains_any(StringView{block_start, last}, to_find);
}

bool Strings::contains_any(StringView source, View<StringView> to_find)
bool Strings::long_string_contains_any(StringView source, View<boyer_moore_horspool_searcher> to_find)
{
return Util::any_of(to_find, [=](StringView s) { return Strings::contains(source, s); });
for (const auto& subject : to_find)
{
auto found = std::search(source.begin(), source.end(), subject);
if (found != source.end())
{
return true;
}
}
return false;
}

bool Strings::equals(StringView a, StringView b)
Expand Down
15 changes: 8 additions & 7 deletions src/vcpkg/postbuildlint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1222,25 +1222,25 @@ namespace vcpkg

static bool file_contains_absolute_paths(const ReadOnlyFilesystem& fs,
const Path& file,
const std::vector<StringView> stringview_paths)
View<Strings::boyer_moore_horspool_searcher> searcher_paths)
{
const auto extension = file.extension();
if (extension == ".h" || extension == ".hpp" || extension == ".hxx")
{
return Strings::contains_any_ignoring_c_comments(fs.read_contents(file, IgnoreErrors{}), stringview_paths);
return Strings::contains_any_ignoring_c_comments(fs.read_contents(file, IgnoreErrors{}), searcher_paths);
}

if (extension == ".cfg" || extension == ".ini" || file.filename() == "usage")
{
const auto contents = fs.read_contents(file, IgnoreErrors{});
return Strings::contains_any(contents, stringview_paths);
return Strings::long_string_contains_any(contents, searcher_paths);
}

if (extension == ".py" || extension == ".sh" || extension == ".cmake" || extension == ".pc" ||
extension == ".conf")
{
const auto contents = fs.read_contents(file, IgnoreErrors{});
return Strings::contains_any_ignoring_hash_comments(contents, stringview_paths);
return Strings::contains_any_ignoring_hash_comments(contents, searcher_paths);
}

if (extension.empty())
Expand All @@ -1254,7 +1254,7 @@ namespace vcpkg
Strings::starts_with(StringView(buffer, sizeof(buffer)), "\xEF\xBB\xBF#!") /* ignore byte-order mark */)
{
const auto contents = fs.read_contents(file, IgnoreErrors{});
return Strings::contains_any_ignoring_hash_comments(contents, stringview_paths);
return Strings::contains_any_ignoring_hash_comments(contents, searcher_paths);
}
return false;
}
Expand Down Expand Up @@ -1283,14 +1283,15 @@ namespace vcpkg

Util::sort_unique_erase(string_paths);

const auto stringview_paths = Util::fmap(string_paths, [](std::string& s) { return StringView(s); });
const auto searcher_paths = Util::fmap(
string_paths, [](std::string& s) { return Strings::boyer_moore_horspool_searcher(s.begin(), s.end()); });

std::vector<Path> failing_files;
std::mutex mtx;
auto files = fs.get_regular_files_recursive(dir, IgnoreErrors{});

parallel_for_each_n(files.begin(), files.size(), [&](const Path& file) {
if (file_contains_absolute_paths(fs, file, stringview_paths))
if (file_contains_absolute_paths(fs, file, searcher_paths))
{
std::lock_guard lock{mtx};
failing_files.push_back(file);
Expand Down

0 comments on commit fe0ad6c

Please sign in to comment.