From a7de066b05bbf23974430a43b5550caf73b4c48a Mon Sep 17 00:00:00 2001 From: Mildred Ki'Lya Date: Sat, 9 Jan 2021 01:24:44 +0100 Subject: [PATCH] Improve uri.parseQuery to never raise an error In case of malformed query string where there is `=` on the value, handle this character as part of the value instead of throwing an error. The following query string should no longer crash a program: key=value&key2=x=1 It will be interpreted as [("key", "value"), ("key2", "x=1")] This is correct according to latest WhatWG's HTML5 specification recarding the urlencoded parser: https://url.spec.whatwg.org/#concept-urlencoded-parser Older behavior can be restored using the -d:nimLegacyParseQueryStrict flag. --- changelog.md | 6 ++++++ lib/pure/uri.nim | 36 ++++++++++++++++++++++-------------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/changelog.md b/changelog.md index 0705d00909f40..02374a1dfdfe8 100644 --- a/changelog.md +++ b/changelog.md @@ -96,6 +96,12 @@ with other backends. see #9125. Use `-d:nimLegacyJsRound` for previous behavior. - Added `socketstream` module that wraps sockets in the stream interface +- Changed the behavior of `uri.decodeQuery` when there are unencoded `=` + characters in the decoded values. Prior versions would raise an error. This is + no longer the case to comply with the HTML spec and other languages + implementations. Old behavior can be obtained with + `-d:nimLegacyParseQueryStrict` + diff --git a/lib/pure/uri.nim b/lib/pure/uri.nim index 7f553be1abc97..fa2e74bdbfd12 100644 --- a/lib/pure/uri.nim +++ b/lib/pure/uri.nim @@ -162,21 +162,25 @@ func encodeQuery*(query: openArray[(string, string)], usePlus = true, iterator decodeQuery*(data: string): tuple[key, value: TaintedString] = ## Reads and decodes query string ``data`` and yields the (key, value) pairs the - ## data consists of. + ## data consists of. If compiled with -d:nimLegacyParseQueryStrict, an error + ## is raised when there is an unencoded ``=`` character in a decoded value, + ## which was the behavior in Nim < 1.6 runnableExamples: - import std/sugar - let s = collect(newSeq): - for k, v in decodeQuery("foo=1&bar=2"): (k, v) - doAssert s == @[("foo", "1"), ("bar", "2")] + import std/sequtils + doAssert toSeq(decodeQuery("foo=1&bar=2=3")) == @[("foo", "1"), ("bar", "2=3")] + doAssert toSeq(decodeQuery("&a&=b&=&&")) == @[("", ""), ("a", ""), ("", "b"), ("", ""), ("", "")] - proc parseData(data: string, i: int, field: var string): int = + proc parseData(data: string, i: int, field: var string, sep: char): int = result = i while result < data.len: - case data[result] + let c = data[result] + case c of '%': add(field, decodePercent(data, result)) of '+': add(field, ' ') - of '=', '&': break - else: add(field, data[result]) + of '&': break + else: + if c == sep: break + else: add(field, data[result]) inc(result) var i = 0 @@ -185,16 +189,20 @@ iterator decodeQuery*(data: string): tuple[key, value: TaintedString] = # decode everything in one pass: while i < data.len: setLen(name, 0) # reuse memory - i = parseData(data, i, name) + i = parseData(data, i, name, '=') setLen(value, 0) # reuse memory if i < data.len and data[i] == '=': inc(i) # skip '=' - i = parseData(data, i, value) + when defined(nimLegacyParseQueryStrict): + i = parseData(data, i, value, '=') + else: + i = parseData(data, i, value, '&') yield (name.TaintedString, value.TaintedString) if i < data.len: - if data[i] == '&': inc(i) - else: - uriParseError("'&' expected at index '$#' for '$#'" % [$i, data]) + when defined(nimLegacyParseQueryStrict): + if data[i] != '&': + uriParseError("'&' expected at index '$#' for '$#'" % [$i, data]) + inc(i) func parseAuthority(authority: string, result: var Uri) = var i = 0