diff --git a/src/core/n-strings.c b/src/core/n-strings.c index 8cbc4addf3..1d28fc22d4 100644 --- a/src/core/n-strings.c +++ b/src/core/n-strings.c @@ -652,7 +652,12 @@ static struct digest { *dp++ = (REBUNI)n; up += 3; len -= 2; - } else { + } + else if (*up == space_char && as_uri) { + *dp++ = ' '; + up++; + } + else { *dp++ = *up++; } } @@ -723,13 +728,13 @@ static struct digest { *dp++ = space_char; continue; } - if (c == space_char) goto escaped; + if (c == space_char) goto escaped_ascii; } if (Check_Bit_Cased(VAL_SERIES(val_bitset), c)) { *dp++ = c; continue; } - escaped: + escaped_ascii: *dp++ = escape_char; *dp++ = Hex_Digits[(c & 0xf0) >> 4]; *dp++ = Hex_Digits[ c & 0xf]; @@ -742,7 +747,16 @@ static struct digest { while (up < ep) { REBUNI c = up[0]; up++; - + if (no_space) { + if (c == ' ') { + *dp++ = space_char; + continue; + } + if (c == space_char) { + encoded_size = Encode_UTF8_Char(encoded, c); + goto escaped_uni; + } + } if (c >= 0x80) {// all non-ASCII characters *must* be percent encoded encoded_size = Encode_UTF8_Char(encoded, c); } else { @@ -753,6 +767,7 @@ static struct digest { encoded[0] = cast(REBYTE, c); encoded_size = 1; } + escaped_uni: for (n = 0; n < encoded_size; ++n) { *dp++ = escape_char; *dp++ = Hex_Digits[(encoded[n] & 0xf0) >> 4]; diff --git a/src/tests/units/series-test.r3 b/src/tests/units/series-test.r3 index e8b128e0d4..9c5825832b 100644 --- a/src/tests/units/series-test.r3 +++ b/src/tests/units/series-test.r3 @@ -1723,15 +1723,25 @@ Rebol [ --test-- "ENHEX/uri" --assert "a%20b%2B" = enhex "a b+" --assert "a+b%2B" = enhex/uri "a b+" + --assert "a%20%C3%A1%2B" = enhex "a á+" + --assert "a+%C3%A1%2B" = enhex/uri "a á+" ; quoted-printable: --assert "a=20b_" = enhex/escape "a b_" #"=" - --assert "a_b=5F" = enhex/uri/escape "a b_" #"=" + --assert "a_b=5F" = enhex/uri/escape "a b_" #"=" + --assert "a=20=C3=A1_" = enhex/escape "a á_" #"=" + --assert "a_=C3=A1=5F" = enhex/escape/uri "a á_" #"=" + --test-- "DEHEX/uri" --assert "a+b+" = dehex "a+b%2B" --assert "a b+" = dehex/uri "a+b%2B" ; quoted-printable: --assert "a_b_" = dehex/escape"a_b=5F" #"=" --assert "a b_" = dehex/uri/escape"a_b=5F" #"=" + ; to get propper UTF8 results, we must use binary input (for now?) + --assert "a á+" = to string! dehex to binary! "a%20%C3%A1%2B" + --assert "a á+" = to string! dehex/uri to binary! "a+%C3%A1%2B" + --assert "a á_" = to string! dehex/escape to binary! "a=20=C3=A1_" #"=" + --assert "a á_" = to string! dehex/escape/uri to binary! "a_=C3=A1=5F" #"=" ===end-group===