From 39d18830ba44b15fed550ab5b948c0acbe1f678a Mon Sep 17 00:00:00 2001 From: Oren Leiman Date: Thu, 26 Sep 2024 15:08:29 -0700 Subject: [PATCH] pp/httpd: Remove extra decode on path param get seastar::http::request::get_path_param performs URL decoding automatically. When this path was updated to use the new method, the pre-existing URL decode behavior was left in place. This is usually not noticeable because decoding an un-encoded string is a no-op. However, that's not the case if the decoded string itself contains URL escape sequences. For example, given the path parameter "foo%252Fbar": - decoded once: "foo%2Fbar" - decoded twice: "foo/bar" So this commit removes the "extra" decode step from the PP httpd module and adds a ducktape test to confirm that URL escape sequences are preserved in a client-provided subject name, the specific case that led to this fix. Unfortunately, get_path_param destroys some information such that distinguishing between an absent parameter and an ill-formed parameter is no longer possible. So this commit includes a bit of extra logic for the nominally unlikely case where get_path_param returns an empty string. CORE-7738 Signed-off-by: Oren Leiman (cherry picked from commit bcab3ff93038c059650cd5198a22676f5ba6718a) --- src/v/pandaproxy/parsing/httpd.h | 21 +++++++++++++++------ tests/rptest/tests/schema_registry_test.py | 13 +++++++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/v/pandaproxy/parsing/httpd.h b/src/v/pandaproxy/parsing/httpd.h index 87fdfcffb7ba..a094538a2ca3 100644 --- a/src/v/pandaproxy/parsing/httpd.h +++ b/src/v/pandaproxy/parsing/httpd.h @@ -94,13 +94,22 @@ T header(const ss::http::request& req, const ss::sstring& name) { template T request_param(const ss::http::request& req, const ss::sstring& name) { const auto& param{req.get_path_param(name)}; - ss::sstring value; - if (!ss::http::internal::url_decode(param, value)) { - throw error( - error_code::invalid_param, - fmt::format("Invalid parameter '{}' got '{}'", name, param)); + if (param.empty()) [[unlikely]] { + // either the param wasn't present or URL decoding failed + try { + const auto& p = req.param.at(name); + if (p.size() > 1) { + // we got something more than the leading '/', so URL decoding + // must have failed + throw error( + error_code::invalid_param, + fmt::format("Invalid parameter '{}' got '{}'", name, p)); + } + } catch (const std::out_of_range& e) { + std::ignore = e; + } } - return detail::parse_param("parameter", name, value); + return detail::parse_param("parameter", name, param); } template diff --git a/tests/rptest/tests/schema_registry_test.py b/tests/rptest/tests/schema_registry_test.py index af3cd299aec8..c9f5d2d9e5f5 100644 --- a/tests/rptest/tests/schema_registry_test.py +++ b/tests/rptest/tests/schema_registry_test.py @@ -1073,6 +1073,19 @@ def test_post_subjects_subject_versions(self): result = result_raw.json() # assert result["schema"] == json.dumps(schema_def) + self.logger.debug( + "Post schema 1 with escape chars in the subject name") + name = f"{topic}%25252fkey" + name_quoted = urllib.parse.quote(name) + result_raw = self._post_subjects_subject_versions(subject=name_quoted, + data=schema_1_data) + self.logger.warn(result_raw) + assert result_raw.status_code == requests.codes.ok + result_raw = self._get_subjects_subject_versions(subject=name_quoted) + assert result_raw.status_code == requests.codes.ok + subjs = self._get_subjects().json() + assert name in subjs, f"Expected '{name}' in subjects response, got {json.dumps(subjs, indent=1)}" + @cluster(num_nodes=3) def test_post_subjects_subject_versions_version_many(self): """