Skip to content

Commit

Permalink
Improve uri.parseQuery to never raise an error (nim-lang#16647)
Browse files Browse the repository at this point in the history
In case of malformed query string where there is `=` on the value, handle
this character as part of the value instead of throwing an error.

The following query string should no longer crash a program:

    key=value&key2=x=1

It will be interpreted as [("key", "value"), ("key2", "x=1")]

This is correct according to latest WhatWG's HTML5 specification
recarding the urlencoded parser:
https://url.spec.whatwg.org/#concept-urlencoded-parser

Older behavior can be restored using the -d:nimLegacyParseQueryStrict
flag.
  • Loading branch information
mildred authored and ardek66 committed Mar 26, 2021
1 parent c5d65c6 commit 0526044
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 27 deletions.
10 changes: 10 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@
with other backends. see #9125. Use `-d:nimLegacyJsRound` for previous behavior.
- Added `socketstream` module that wraps sockets in the stream interface

- Changed the behavior of `uri.decodeQuery` when there are unencoded `=`
characters in the decoded values. Prior versions would raise an error. This is
no longer the case to comply with the HTML spec and other languages
implementations. Old behavior can be obtained with
`-d:nimLegacyParseQueryStrict`. `cgi.decodeData` which uses the same
underlying code is also updated the same way.




- Added `math.signbit`.

- Removed the optional `longestMatch` parameter of the `critbits._WithPrefix` iterators (it never worked reliably)
Expand Down
14 changes: 4 additions & 10 deletions lib/pure/cgi.nim
Original file line number Diff line number Diff line change
Expand Up @@ -84,23 +84,17 @@ proc getEncodedData(allowedMethods: set[RequestMethod]): string =
iterator decodeData*(data: string): tuple[key, value: TaintedString] =
## Reads and decodes CGI data and yields the (name, value) pairs the
## data consists of.
try:
for (key, value) in uri.decodeQuery(data):
yield (key, value)
except UriParseError as e:
cgiError(e.msg)
for (key, value) in uri.decodeQuery(data):
yield (key, value)

iterator decodeData*(allowedMethods: set[RequestMethod] =
{methodNone, methodPost, methodGet}): tuple[key, value: TaintedString] =
## Reads and decodes CGI data and yields the (name, value) pairs the
## data consists of. If the client does not use a method listed in the
## `allowedMethods` set, a `CgiError` exception is raised.
let data = getEncodedData(allowedMethods)
try:
for (key, value) in uri.decodeQuery(data):
yield (key, value)
except UriParseError as e:
cgiError(e.msg)
for (key, value) in uri.decodeQuery(data):
yield (key, value)

proc readData*(allowedMethods: set[RequestMethod] =
{methodNone, methodPost, methodGet}): StringTableRef =
Expand Down
38 changes: 23 additions & 15 deletions lib/pure/uri.nim
Original file line number Diff line number Diff line change
Expand Up @@ -161,22 +161,26 @@ func encodeQuery*(query: openArray[(string, string)], usePlus = true,
result.add(encodeUrl(val, usePlus))

iterator decodeQuery*(data: string): tuple[key, value: TaintedString] =
## Reads and decodes query string ``data`` and yields the (key, value) pairs the
## data consists of.
## Reads and decodes query string `data` and yields the `(key, value)` pairs
## the data consists of. If compiled with `-d:nimLegacyParseQueryStrict`, an
## error is raised when there is an unencoded `=` character in a decoded
## value, which was the behavior in Nim < 1.5.1
runnableExamples:
import std/sugar
let s = collect(newSeq):
for k, v in decodeQuery("foo=1&bar=2"): (k, v)
doAssert s == @[("foo", "1"), ("bar", "2")]
import std/sequtils
doAssert toSeq(decodeQuery("foo=1&bar=2=3")) == @[("foo", "1"), ("bar", "2=3")]
doAssert toSeq(decodeQuery("&a&=b&=&&")) == @[("", ""), ("a", ""), ("", "b"), ("", ""), ("", "")]

proc parseData(data: string, i: int, field: var string): int =
proc parseData(data: string, i: int, field: var string, sep: char): int =
result = i
while result < data.len:
case data[result]
let c = data[result]
case c
of '%': add(field, decodePercent(data, result))
of '+': add(field, ' ')
of '=', '&': break
else: add(field, data[result])
of '&': break
else:
if c == sep: break
else: add(field, data[result])
inc(result)

var i = 0
Expand All @@ -185,16 +189,20 @@ iterator decodeQuery*(data: string): tuple[key, value: TaintedString] =
# decode everything in one pass:
while i < data.len:
setLen(name, 0) # reuse memory
i = parseData(data, i, name)
i = parseData(data, i, name, '=')
setLen(value, 0) # reuse memory
if i < data.len and data[i] == '=':
inc(i) # skip '='
i = parseData(data, i, value)
when defined(nimLegacyParseQueryStrict):
i = parseData(data, i, value, '=')
else:
i = parseData(data, i, value, '&')
yield (name.TaintedString, value.TaintedString)
if i < data.len:
if data[i] == '&': inc(i)
else:
uriParseError("'&' expected at index '$#' for '$#'" % [$i, data])
when defined(nimLegacyParseQueryStrict):
if data[i] != '&':
uriParseError("'&' expected at index '$#' for '$#'" % [$i, data])
inc(i)

func parseAuthority(authority: string, result: var Uri) =
var i = 0
Expand Down
3 changes: 1 addition & 2 deletions tests/stdlib/turi.nim
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,7 @@ template main() =

block: # decodeQuery
doAssert toSeq(decodeQuery("a=1&b=0")) == @[("a", "1"), ("b", "0")]
doAssertRaises(UriParseError):
discard toSeq(decodeQuery("a=1&b=2c=6"))
doAssert toSeq(decodeQuery("a=1&b=2c=6")) == @[("a", "1"), ("b", "2c=6")]

static: main()
main()

0 comments on commit 0526044

Please sign in to comment.