From 9c8d35f899035fa06021ab3fe6919f892c2f0c6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Uhliarik?= Date: Fri, 11 Oct 2024 02:06:31 +0200 Subject: [PATCH] Added new argument to Http::One::ParseBws() Depending on new wsp_only argument in ParseBws() it will be decided which set of whitespaces characters will be parsed. If wsp_only is set to true, only SP and HTAB chars will be parsed. Also optimized number of ParseBws calls. --- src/http/one/Parser.cc | 4 ++-- src/http/one/Parser.h | 3 ++- src/http/one/TeChunkedParser.cc | 13 +++++++++---- src/http/one/TeChunkedParser.h | 2 +- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc index b1908316a0b..01d7e3bc0e8 100644 --- a/src/http/one/Parser.cc +++ b/src/http/one/Parser.cc @@ -273,9 +273,9 @@ Http::One::ErrorLevel() // BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule void -Http::One::ParseBws(Parser::Tokenizer &tok) +Http::One::ParseBws(Parser::Tokenizer &tok, const bool wsp_only) { - const auto count = tok.skipAll(Parser::WhitespaceCharacters()); + const auto count = tok.skipAll(wsp_only ? CharacterSet::WSP : Parser::WhitespaceCharacters()); if (tok.atEnd()) throw InsufficientInput(); // even if count is positive diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h index d9a0ac8c273..08200371cd6 100644 --- a/src/http/one/Parser.h +++ b/src/http/one/Parser.h @@ -163,8 +163,9 @@ class Parser : public RefCountable }; /// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace) +/// \param wsp_only force skipping of whitespaces only, don't consider skipping relaxed delimeter chars /// \throws InsufficientInput when the end of BWS cannot be confirmed -void ParseBws(Parser::Tokenizer &); +void ParseBws(Parser::Tokenizer &, const bool wsp_only = false); /// the right debugs() level for logging HTTP violation messages int ErrorLevel(); diff --git a/src/http/one/TeChunkedParser.cc b/src/http/one/TeChunkedParser.cc index 04753395e16..41e1e5ddaea 100644 --- a/src/http/one/TeChunkedParser.cc +++ b/src/http/one/TeChunkedParser.cc @@ -125,8 +125,11 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok) // Code becomes much simpler when incremental parsing functions throw on // bad or insufficient input, like in the code below. TODO: Expand up. try { - tok.skipAll(CharacterSet::WSP); // Some servers send SP/TAB after chunk-size - parseChunkExtensions(tok); // a possibly empty chunk-ext list + // A possibly empty chunk-ext list. If no chunk-ext has been found, + // try to skip trailing BWS, because some servers send "chunk-size BWS CRLF". + if (!parseChunkExtensions(tok)) + ParseBws(tok, true); + tok.skipRequired("CRLF after [chunk-ext]", Http1::CrLf()); buf_ = tok.remaining(); parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME; @@ -140,20 +143,22 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok) /// Parses the chunk-ext list (RFC 9112 section 7.1.1: /// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] ) -void +bool Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &callerTok) { + bool foundChunkExt = false; do { auto tok = callerTok; ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size if (!tok.skip(';')) - return; // reached the end of extensions (if any) + return foundChunkExt; // reached the end of extensions (if any) parseOneChunkExtension(tok); buf_ = tok.remaining(); // got one extension callerTok = tok; + foundChunkExt = true; } while (true); } diff --git a/src/http/one/TeChunkedParser.h b/src/http/one/TeChunkedParser.h index 02eacd1bb89..8c5d4bb4cba 100644 --- a/src/http/one/TeChunkedParser.h +++ b/src/http/one/TeChunkedParser.h @@ -71,7 +71,7 @@ class TeChunkedParser : public Http1::Parser private: bool parseChunkSize(Tokenizer &tok); bool parseChunkMetadataSuffix(Tokenizer &); - void parseChunkExtensions(Tokenizer &); + bool parseChunkExtensions(Tokenizer &); void parseOneChunkExtension(Tokenizer &); bool parseChunkBody(Tokenizer &tok); bool parseChunkEnd(Tokenizer &tok);