diff --git a/browser/net/BUILD.gn b/browser/net/BUILD.gn index 2951a658ca3b..ba256fcd2f6e 100644 --- a/browser/net/BUILD.gn +++ b/browser/net/BUILD.gn @@ -52,6 +52,7 @@ source_set("net") { "//net", "//services/network/public/cpp", "//services/network/public/mojom", + "//third_party/re2", "//url", ] diff --git a/browser/net/brave_site_hacks_network_delegate_helper.cc b/browser/net/brave_site_hacks_network_delegate_helper.cc index bbefb3587f64..67ee8bf3d6bd 100644 --- a/browser/net/brave_site_hacks_network_delegate_helper.cc +++ b/browser/net/brave_site_hacks_network_delegate_helper.cc @@ -7,7 +7,11 @@ #include #include +#include +#include "base/lazy_instance.h" +#include "base/metrics/histogram_macros.h" +#include "base/no_destructor.h" #include "base/sequenced_task_runner.h" #include "base/strings/string_util.h" #include "brave/common/network_constants.h" @@ -21,6 +25,7 @@ #include "content/public/common/referrer.h" #include "extensions/common/url_pattern.h" #include "net/url_request/url_request.h" +#include "third_party/re2/src/re2/re2.h" using content::BrowserThread; using content::Referrer; @@ -29,6 +34,42 @@ namespace brave { namespace { +const std::string& GetQueryStringTrackers() { + static const base::NoDestructor trackers(base::JoinString( + std::vector({"fbclid", "gclid", "msclkid", "mc_eid"}), "|")); + return *trackers; +} + +// From src/components/autofill/content/renderer/page_passwords_analyser.cc +// and password_form_conversion_utils.cc: +#define DECLARE_LAZY_MATCHER(NAME, PATTERN) \ + struct LabelPatternLazyInstanceTraits_##NAME \ + : public base::internal::DestructorAtExitLazyInstanceTraits { \ + static re2::RE2* New(void* instance) { \ + re2::RE2::Options options; \ + options.set_case_sensitive(false); \ + re2::RE2* matcher = new (instance) re2::RE2(PATTERN, options); \ + DCHECK(matcher->ok()); \ + return matcher; \ + } \ + }; \ + base::LazyInstance NAME = \ + LAZY_INSTANCE_INITIALIZER + +// e.g. "?fbclid=1234" +DECLARE_LAZY_MATCHER(tracker_only_matcher, + "^(" + GetQueryStringTrackers() + ")=[^&]+$"); + +// e.g. "?fbclid=1234&foo=1" +DECLARE_LAZY_MATCHER(tracker_first_matcher, + "^(" + GetQueryStringTrackers() + ")=[^&]+&"); + +// e.g. "?foo=1&fbclid=1234" or "?foo=1&fbclid=1234&bar=2" +DECLARE_LAZY_MATCHER(tracker_appended_matcher, + "&(" + GetQueryStringTrackers() + ")=[^&]+"); + +#undef DECLARE_LAZY_MATCHER + bool ApplyPotentialReferrerBlock(std::shared_ptr ctx) { DCHECK_CURRENTLY_ON(BrowserThread::UI); GURL target_origin = ctx->request_url.GetOrigin(); @@ -50,12 +91,39 @@ bool ApplyPotentialReferrerBlock(std::shared_ptr ctx) { return false; } +void ApplyPotentialQueryStringFilter(const GURL& request_url, + std::string* new_url_spec) { + DCHECK(new_url_spec); + SCOPED_UMA_HISTOGRAM_TIMER("Brave.SiteHacks.QueryFilter"); + std::string new_query = request_url.query(); + // Note: the ordering of these replacements is important. + const int replacement_count = + re2::RE2::GlobalReplace(&new_query, tracker_appended_matcher.Get(), "") + + re2::RE2::GlobalReplace(&new_query, tracker_first_matcher.Get(), "") + + re2::RE2::GlobalReplace(&new_query, tracker_only_matcher.Get(), ""); + + if (replacement_count > 0) { + url::Replacements replacements; + if (new_query.empty()) { + replacements.ClearQuery(); + } else { + replacements.SetQuery(new_query.c_str(), + url::Component(0, new_query.size())); + } + *new_url_spec = request_url.ReplaceComponents(replacements).spec(); + } +} + } // namespace int OnBeforeURLRequest_SiteHacksWork( const ResponseCallback& next_callback, std::shared_ptr ctx) { ApplyPotentialReferrerBlock(ctx); + + if (ctx->request_url.has_query()) { + ApplyPotentialQueryStringFilter(ctx->request_url, &ctx->new_url_spec); + } return net::OK; } diff --git a/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc b/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc index ca44eec6f9d5..ed67d06112cd 100644 --- a/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc +++ b/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc @@ -7,6 +7,7 @@ #include #include +#include #include #include "brave/browser/net/url_context.h" @@ -203,4 +204,85 @@ TEST_F(BraveSiteHacksNetworkDelegateHelperTest, }); } +TEST_F(BraveSiteHacksNetworkDelegateHelperTest, QueryStringUntouched) { + const std::vector urls({ + "https://example.com/", + "https://example.com/?", + "https://example.com/?+%20", + "https://user:pass@example.com/path/file.html?foo=1#fragment", + "http://user:pass@example.com/path/file.html?foo=1&bar=2#fragment", + "https://example.com/?file=https%3A%2F%2Fexample.com%2Ftest.pdf", + "https://example.com/?title=1+2&caption=1%202", + "https://example.com/?foo=1&&bar=2#fragment", + "https://example.com/?foo&bar=&#fragment", + "https://example.com/?foo=1&fbcid=no&gcid=no&mc_cid=no&bar=&#frag", + "https://example.com/?fbclid=&gclid&=mc_eid&msclkid=", + "https://example.com/?value=fbclid=1¬-gclid=2&foo+mc_eid=3", + "https://example.com/?+fbclid=1", + "https://example.com/?%20fbclid=1", + "https://example.com/#fbclid=1", + }); + for (const auto& url : urls) { + net::TestDelegate test_delegate; + std::unique_ptr request = context()->CreateRequest( + GURL(url), net::IDLE, &test_delegate, TRAFFIC_ANNOTATION_FOR_TESTS); + + std::shared_ptr brave_request_info( + new brave::BraveRequestInfo()); + brave::BraveRequestInfo::FillCTXFromRequest(request.get(), + brave_request_info); + brave::ResponseCallback callback; + int ret = + brave::OnBeforeURLRequest_SiteHacksWork(callback, brave_request_info); + EXPECT_EQ(ret, net::OK); + // new_url should not be set + EXPECT_TRUE(brave_request_info->new_url_spec.empty()); + EXPECT_EQ(request->url(), GURL(url)); + } +} + +TEST_F(BraveSiteHacksNetworkDelegateHelperTest, QueryStringFiltered) { + const std::vector> urls( + { + // { original url, expected url after filtering } + {"https://example.com/?fbclid=1234", "https://example.com/"}, + {"https://example.com/?fbclid=1234&", "https://example.com/"}, + {"https://example.com/?&fbclid=1234", "https://example.com/"}, + {"https://example.com/?gclid=1234", "https://example.com/"}, + {"https://example.com/?fbclid=0&gclid=1&msclkid=a&mc_eid=a1", + "https://example.com/"}, + {"https://example.com/?fbclid=&foo=1&bar=2&gclid=abc", + "https://example.com/?fbclid=&foo=1&bar=2"}, + {"https://example.com/?fbclid=&foo=1&gclid=1234&bar=2", + "https://example.com/?fbclid=&foo=1&bar=2"}, + {"http://u:p@example.com/path/file.html?foo=1&fbclid=abcd#fragment", + "http://u:p@example.com/path/file.html?foo=1#fragment"}, + // Obscure edge cases that break most parsers: + {"https://example.com/?fbclid&foo&&gclid=2&bar=&%20", + "https://example.com/?fbclid&foo&&bar=&%20"}, + {"https://example.com/?fbclid=1&1==2&=msclkid&foo=bar&&a=b=c&", + "https://example.com/?1==2&=msclkid&foo=bar&&a=b=c&"}, + {"https://example.com/?fbclid=1&=2&?foo=yes&bar=2+", + "https://example.com/?=2&?foo=yes&bar=2+"}, + {"https://example.com/?fbclid=1&a+b+c=some%20thing&1%202=3+4", + "https://example.com/?a+b+c=some%20thing&1%202=3+4"}, + }); + for (const auto& pair : urls) { + net::TestDelegate test_delegate; + std::unique_ptr request = + context()->CreateRequest(GURL(pair.first), net::IDLE, &test_delegate, + TRAFFIC_ANNOTATION_FOR_TESTS); + + std::shared_ptr brave_request_info( + new brave::BraveRequestInfo()); + brave::BraveRequestInfo::FillCTXFromRequest(request.get(), + brave_request_info); + brave::ResponseCallback callback; + int ret = + brave::OnBeforeURLRequest_SiteHacksWork(callback, brave_request_info); + EXPECT_EQ(ret, net::OK); + EXPECT_EQ(brave_request_info->new_url_spec, pair.second); + } +} + } // namespace