Skip to content

Commit

Permalink
FIX: enhex/uri and dehex/uri with an unicode input
Browse files Browse the repository at this point in the history
  • Loading branch information
Oldes committed Jun 27, 2022
1 parent c6bbbae commit 66bf626
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 5 deletions.
23 changes: 19 additions & 4 deletions src/core/n-strings.c
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,12 @@ static struct digest {
*dp++ = (REBUNI)n;
up += 3;
len -= 2;
} else {
}
else if (*up == space_char && as_uri) {
*dp++ = ' ';
up++;
}
else {
*dp++ = *up++;
}
}
Expand Down Expand Up @@ -723,13 +728,13 @@ static struct digest {
*dp++ = space_char;
continue;
}
if (c == space_char) goto escaped;
if (c == space_char) goto escaped_ascii;
}
if (Check_Bit_Cased(VAL_SERIES(val_bitset), c)) {
*dp++ = c;
continue;
}
escaped:
escaped_ascii:
*dp++ = escape_char;
*dp++ = Hex_Digits[(c & 0xf0) >> 4];
*dp++ = Hex_Digits[ c & 0xf];
Expand All @@ -742,7 +747,16 @@ static struct digest {
while (up < ep) {
REBUNI c = up[0];
up++;

if (no_space) {
if (c == ' ') {
*dp++ = space_char;
continue;
}
if (c == space_char) {
encoded_size = Encode_UTF8_Char(encoded, c);
goto escaped_uni;
}
}
if (c >= 0x80) {// all non-ASCII characters *must* be percent encoded
encoded_size = Encode_UTF8_Char(encoded, c);
} else {
Expand All @@ -753,6 +767,7 @@ static struct digest {
encoded[0] = cast(REBYTE, c);
encoded_size = 1;
}
escaped_uni:
for (n = 0; n < encoded_size; ++n) {
*dp++ = escape_char;
*dp++ = Hex_Digits[(encoded[n] & 0xf0) >> 4];
Expand Down
12 changes: 11 additions & 1 deletion src/tests/units/series-test.r3
Original file line number Diff line number Diff line change
Expand Up @@ -1723,15 +1723,25 @@ Rebol [
--test-- "ENHEX/uri"
--assert "a%20b%2B" = enhex "a b+"
--assert "a+b%2B" = enhex/uri "a b+"
--assert "a%20%C3%A1%2B" = enhex "a á+"
--assert "a+%C3%A1%2B" = enhex/uri "a á+"
; quoted-printable:
--assert "a=20b_" = enhex/escape "a b_" #"="
--assert "a_b=5F" = enhex/uri/escape "a b_" #"="
--assert "a_b=5F" = enhex/uri/escape "a b_" #"="
--assert "a=20=C3=A1_" = enhex/escape "a á_" #"="
--assert "a_=C3=A1=5F" = enhex/escape/uri "a á_" #"="

--test-- "DEHEX/uri"
--assert "a+b+" = dehex "a+b%2B"
--assert "a b+" = dehex/uri "a+b%2B"
; quoted-printable:
--assert "a_b_" = dehex/escape"a_b=5F" #"="
--assert "a b_" = dehex/uri/escape"a_b=5F" #"="
; to get propper UTF8 results, we must use binary input (for now?)
--assert "a á+" = to string! dehex to binary! "a%20%C3%A1%2B"
--assert "a á+" = to string! dehex/uri to binary! "a+%C3%A1%2B"
--assert "a á_" = to string! dehex/escape to binary! "a=20=C3=A1_" #"="
--assert "a á_" = to string! dehex/escape/uri to binary! "a_=C3=A1=5F" #"="

===end-group===

Expand Down

0 comments on commit 66bf626

Please sign in to comment.