From 9c8d35f899035fa06021ab3fe6919f892c2f0c6b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lubo=C5=A1=20Uhliarik?= <luhliari@redhat.com>
Date: Fri, 11 Oct 2024 02:06:31 +0200
Subject: [PATCH] Added new argument to Http::One::ParseBws()

Depending on new wsp_only argument in ParseBws() it will be decided
which set of whitespaces characters will be parsed. If wsp_only is set
to true, only SP and HTAB chars will be parsed.

Also optimized number of ParseBws calls.
---
 src/http/one/Parser.cc          |  4 ++--
 src/http/one/Parser.h           |  3 ++-
 src/http/one/TeChunkedParser.cc | 13 +++++++++----
 src/http/one/TeChunkedParser.h  |  2 +-
 4 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc
index b1908316a0b..01d7e3bc0e8 100644
--- a/src/http/one/Parser.cc
+++ b/src/http/one/Parser.cc
@@ -273,9 +273,9 @@ Http::One::ErrorLevel()
 
 // BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule
 void
-Http::One::ParseBws(Parser::Tokenizer &tok)
+Http::One::ParseBws(Parser::Tokenizer &tok, const bool wsp_only)
 {
-    const auto count = tok.skipAll(Parser::WhitespaceCharacters());
+    const auto count = tok.skipAll(wsp_only ? CharacterSet::WSP : Parser::WhitespaceCharacters());
 
     if (tok.atEnd())
         throw InsufficientInput(); // even if count is positive
diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h
index d9a0ac8c273..08200371cd6 100644
--- a/src/http/one/Parser.h
+++ b/src/http/one/Parser.h
@@ -163,8 +163,9 @@ class Parser : public RefCountable
 };
 
 /// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace)
+/// \param wsp_only force skipping of whitespaces only, don't consider skipping relaxed delimeter chars
 /// \throws InsufficientInput when the end of BWS cannot be confirmed
-void ParseBws(Parser::Tokenizer &);
+void ParseBws(Parser::Tokenizer &, const bool wsp_only = false);
 
 /// the right debugs() level for logging HTTP violation messages
 int ErrorLevel();
diff --git a/src/http/one/TeChunkedParser.cc b/src/http/one/TeChunkedParser.cc
index 04753395e16..41e1e5ddaea 100644
--- a/src/http/one/TeChunkedParser.cc
+++ b/src/http/one/TeChunkedParser.cc
@@ -125,8 +125,11 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
     // Code becomes much simpler when incremental parsing functions throw on
     // bad or insufficient input, like in the code below. TODO: Expand up.
     try {
-        tok.skipAll(CharacterSet::WSP); // Some servers send SP/TAB after chunk-size
-        parseChunkExtensions(tok); // a possibly empty chunk-ext list
+        // A possibly empty chunk-ext list. If no chunk-ext has been found,
+        // try to skip trailing BWS, because some servers send "chunk-size BWS CRLF".
+        if (!parseChunkExtensions(tok))
+            ParseBws(tok, true);
+
         tok.skipRequired("CRLF after [chunk-ext]", Http1::CrLf());
         buf_ = tok.remaining();
         parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
@@ -140,20 +143,22 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
 
 /// Parses the chunk-ext list (RFC 9112 section 7.1.1:
 /// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
-void
+bool
 Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &callerTok)
 {
+    bool foundChunkExt = false;
     do {
         auto tok = callerTok;
 
         ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
 
         if (!tok.skip(';'))
-            return; // reached the end of extensions (if any)
+            return foundChunkExt; // reached the end of extensions (if any)
 
         parseOneChunkExtension(tok);
         buf_ = tok.remaining(); // got one extension
         callerTok = tok;
+        foundChunkExt = true;
     } while (true);
 }
 
diff --git a/src/http/one/TeChunkedParser.h b/src/http/one/TeChunkedParser.h
index 02eacd1bb89..8c5d4bb4cba 100644
--- a/src/http/one/TeChunkedParser.h
+++ b/src/http/one/TeChunkedParser.h
@@ -71,7 +71,7 @@ class TeChunkedParser : public Http1::Parser
 private:
     bool parseChunkSize(Tokenizer &tok);
     bool parseChunkMetadataSuffix(Tokenizer &);
-    void parseChunkExtensions(Tokenizer &);
+    bool parseChunkExtensions(Tokenizer &);
     void parseOneChunkExtension(Tokenizer &);
     bool parseChunkBody(Tokenizer &tok);
     bool parseChunkEnd(Tokenizer &tok);