From 19feaac85734d519df5d6543ae92b1dead2f5974 Mon Sep 17 00:00:00 2001 From: Gary Illyes Date: Tue, 14 Nov 2023 13:19:28 +0000 Subject: [PATCH] Normalize the checked key to lowercase when looking for unsupported rules. PiperOrigin-RevId: 582285639 --- reporting_robots.cc | 4 +++- reporting_robots_test.cc | 21 +++++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/reporting_robots.cc b/reporting_robots.cc index 33d7864..7c39518 100644 --- a/reporting_robots.cc +++ b/reporting_robots.cc @@ -4,6 +4,7 @@ #include #include +#include "absl/strings/ascii.h" #include "absl/strings/string_view.h" namespace googlebot { @@ -69,7 +70,8 @@ void RobotsParsingReporter::HandleUnknownAction(int line_num, absl::string_view action, absl::string_view line_value) { RobotsParsedLine::RobotsTagName rtn = - std::count(kUnsupportedTags.begin(), kUnsupportedTags.end(), action) > 0 + std::count(kUnsupportedTags.begin(), kUnsupportedTags.end(), + absl::AsciiStrToLower(action)) > 0 ? RobotsParsedLine::kUnused : RobotsParsedLine::kUnknown; unused_directives_++; diff --git a/reporting_robots_test.cc b/reporting_robots_test.cc index f508c2a..554496c 100644 --- a/reporting_robots_test.cc +++ b/reporting_robots_test.cc @@ -111,11 +111,12 @@ TEST(RobotsUnittest, LinesNumbersAreCountedCorrectly) { "useragent: baz\n" // 11 "disallaw: /some\n" // 12 "site-map: https://e/s.xml #comment\n" // 13 - "sitemap: https://e/t.xml\n"; // 14 - // 15 (from \n) + "sitemap: https://e/t.xml\n" // 14 + "Noarchive: /someCapital\n"; // 15 + // 16 (from \n) googlebot::ParseRobotsTxt(kSimpleFile, &report); EXPECT_EQ(8, report.valid_directives()); - EXPECT_EQ(15, report.last_line_seen()); + EXPECT_EQ(16, report.last_line_seen()); EXPECT_EQ(report.parse_results().size(), report.last_line_seen()); std::vector lines = absl::StrSplit(kSimpleFile, '\n'); @@ -295,10 +296,22 @@ TEST(RobotsUnittest, LinesNumbersAreCountedCorrectly) { .has_directive = true, .is_acceptable_typo = false, }}); - // For line 15 (which is empty and comes from the last \n) + // For line "Noarchive: /someCapital\n" // 15 expectLineToParseTo( lines, report.parse_results(), RobotsParsedLine{.line_num = 15, + .tag_name = RobotsParsedLine::RobotsTagName::kUnused, + .is_typo = false, + .metadata = RobotsParseHandler::LineMetadata{ + .is_empty = false, + .has_comment = false, + .is_comment = false, + .has_directive = true, + }}); + // For line 16 (which is empty and comes from the last \n) + expectLineToParseTo( + lines, report.parse_results(), + RobotsParsedLine{.line_num = 16, .tag_name = RobotsParsedLine::RobotsTagName::kUnknown, .is_typo = false, .metadata = RobotsParseHandler::LineMetadata{