From cf7f0f3a1579c3a9127dd013420fca41546c842c Mon Sep 17 00:00:00 2001 From: Neil Fuller Date: Wed, 16 Mar 2016 14:24:40 +0000 Subject: [PATCH] Reapply an upstream OkHttp change to Android for HttpUrl Manually reapplied change from upstream to address HttpUrl encoding issues. Upstream details: Comment: Don't percent-encode non-ASCII characters in fragments Fixes https://github.com/square/okhttp/issues/1635 SHA: f4feb0adfcd8e209f90d4fffb6facf5c07f57110 Bug: 27590872 (cherry picked from commit 469258d7cc4690e91c3950020c4aea106f216b21) Change-Id: I35e688e30d307a1c3f8b0d0b4ca18b799e28ab7d --- .../java/com/squareup/okhttp/HttpUrlTest.java | 39 +++++++++- .../okhttp/UrlComponentEncodingTester.java | 14 ++++ .../squareup/okhttp/WebPlatformUrlTest.java | 2 - .../squareup/okhttp/FormEncodingBuilder.java | 8 +- .../java/com/squareup/okhttp/HttpUrl.java | 73 ++++++++++--------- 5 files changed, 95 insertions(+), 41 deletions(-) diff --git a/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java b/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java index 4f04af9..d2fc31d 100644 --- a/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java +++ b/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java @@ -466,8 +466,45 @@ public final class HttpUrlTest { new UrlComponentEncodingTester() .override(Encoding.IDENTITY, ' ', '"', '#', '<', '>', '?', '`') .skipForUri('%', ' ', '"', '#', '<', '>', '\\', '^', '`', '{', '|', '}') + .identityForNonAscii() .test(Component.FRAGMENT); - // TODO(jwilson): don't percent-encode non-ASCII characters. (But do encode control characters!) + } + + @Test public void fragmentNonAscii() throws Exception { + HttpUrl url = HttpUrl.parse("http://host/#Σ"); + assertEquals("http://host/#Σ", url.toString()); + assertEquals("Σ", url.fragment()); + assertEquals("Σ", url.encodedFragment()); + assertEquals("http://host/#Σ", url.uri().toString()); + } + + @Test public void fragmentNonAsciiThatOffendsJavaNetUri() throws Exception { + HttpUrl url = HttpUrl.parse("http://host/#\u0080"); + assertEquals("http://host/#\u0080", url.toString()); + assertEquals("\u0080", url.fragment()); + assertEquals("\u0080", url.encodedFragment()); + try { + url.uri(); + fail(); + } catch (IllegalStateException expected) { + // Possibly a bug in java.net.URI. Many non-ASCII code points work, this one doesn't! + } + } + + @Test public void fragmentPercentEncodedNonAscii() throws Exception { + HttpUrl url = HttpUrl.parse("http://host/#%C2%80"); + assertEquals("http://host/#%C2%80", url.toString()); + assertEquals("\u0080", url.fragment()); + assertEquals("%C2%80", url.encodedFragment()); + assertEquals("http://host/#%C2%80", url.uri().toString()); + } + + @Test public void fragmentPercentEncodedPartialCodePoint() throws Exception { + HttpUrl url = HttpUrl.parse("http://host/#%80"); + assertEquals("http://host/#%80", url.toString()); + assertEquals("\ufffd", url.fragment()); // Unicode replacement character. + assertEquals("%80", url.encodedFragment()); + assertEquals("http://host/#%80", url.uri().toString()); } @Test public void relativePath() throws Exception { diff --git a/okhttp-tests/src/test/java/com/squareup/okhttp/UrlComponentEncodingTester.java b/okhttp-tests/src/test/java/com/squareup/okhttp/UrlComponentEncodingTester.java index 99e9791..22502eb 100644 --- a/okhttp-tests/src/test/java/com/squareup/okhttp/UrlComponentEncodingTester.java +++ b/okhttp-tests/src/test/java/com/squareup/okhttp/UrlComponentEncodingTester.java @@ -27,6 +27,10 @@ /** Tests how each code point is encoded and decoded in the context of each URL component. */ class UrlComponentEncodingTester { + private static final int UNICODE_2 = 0x07ff; // Arbitrary code point that's 2 bytes in UTF-8. + private static final int UNICODE_3 = 0xffff; // Arbitrary code point that's 3 bytes in UTF-8. + private static final int UNICODE_4 = 0x10ffff; // Arbitrary code point that's 4 bytes in UTF-8. + /** * The default encode set for the ASCII range. The specific rules vary per-component: for example, * '?' may be identity-encoded in a fragment, but must be percent-encoded in a path. @@ -164,6 +168,9 @@ class UrlComponentEncodingTester { map.put((int) '}', Encoding.IDENTITY); map.put((int) '~', Encoding.IDENTITY); map.put( 0x7f, Encoding.PERCENT); // Delete + map.put( UNICODE_2, Encoding.PERCENT); + map.put( UNICODE_3, Encoding.PERCENT); + map.put( UNICODE_4, Encoding.PERCENT); defaultEncodings = Collections.unmodifiableMap(map); } @@ -181,6 +188,13 @@ public UrlComponentEncodingTester override(Encoding encoding, int... codePoints) return this; } + public UrlComponentEncodingTester identityForNonAscii() { + encodings.put(UNICODE_2, Encoding.IDENTITY); + encodings.put(UNICODE_3, Encoding.IDENTITY); + encodings.put(UNICODE_4, Encoding.IDENTITY); + return this; + } + /** * Configure a character to be skipped but only for conversion to and from {@code java.net.URI}. * That class is more strict than the others. diff --git a/okhttp-tests/src/test/java/com/squareup/okhttp/WebPlatformUrlTest.java b/okhttp-tests/src/test/java/com/squareup/okhttp/WebPlatformUrlTest.java index e45761c..2c619c2 100644 --- a/okhttp-tests/src/test/java/com/squareup/okhttp/WebPlatformUrlTest.java +++ b/okhttp-tests/src/test/java/com/squareup/okhttp/WebPlatformUrlTest.java @@ -57,8 +57,6 @@ public static List parameters() { "Parsing: against ", "Parsing: against ", "Parsing: against ", - "Parsing: <#β> against ", - "Parsing: against ", "Parsing: against ", // This test fails on Java 7 but passes on Java 8. See HttpUrlTest.hostWithTrailingDot(). "Parsing: against ", diff --git a/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java b/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java index 6f4b93c..f5134d9 100644 --- a/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java +++ b/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java @@ -33,10 +33,10 @@ public FormEncodingBuilder add(String name, String value) { content.writeByte('&'); } HttpUrl.canonicalize(content, name, 0, name.length(), - HttpUrl.FORM_ENCODE_SET, false, true); + HttpUrl.FORM_ENCODE_SET, false, true, true); content.writeByte('='); HttpUrl.canonicalize(content, value, 0, value.length(), - HttpUrl.FORM_ENCODE_SET, false, true); + HttpUrl.FORM_ENCODE_SET, false, true, true); return this; } @@ -46,10 +46,10 @@ public FormEncodingBuilder addEncoded(String name, String value) { content.writeByte('&'); } HttpUrl.canonicalize(content, name, 0, name.length(), - HttpUrl.FORM_ENCODE_SET, true, true); + HttpUrl.FORM_ENCODE_SET, true, true, true); content.writeByte('='); HttpUrl.canonicalize(content, value, 0, value.length(), - HttpUrl.FORM_ENCODE_SET, true, true); + HttpUrl.FORM_ENCODE_SET, true, true, true); return this; } diff --git a/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java b/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java index 76764c2..68b09ef 100644 --- a/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java +++ b/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java @@ -686,25 +686,25 @@ public Builder scheme(String scheme) { public Builder username(String username) { if (username == null) throw new IllegalArgumentException("username == null"); - this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false); + this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false, true); return this; } public Builder encodedUsername(String encodedUsername) { if (encodedUsername == null) throw new IllegalArgumentException("encodedUsername == null"); - this.encodedUsername = canonicalize(encodedUsername, USERNAME_ENCODE_SET, true, false); + this.encodedUsername = canonicalize(encodedUsername, USERNAME_ENCODE_SET, true, false, true); return this; } public Builder password(String password) { if (password == null) throw new IllegalArgumentException("password == null"); - this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false); + this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false, true); return this; } public Builder encodedPassword(String encodedPassword) { if (encodedPassword == null) throw new IllegalArgumentException("encodedPassword == null"); - this.encodedPassword = canonicalize(encodedPassword, PASSWORD_ENCODE_SET, true, false); + this.encodedPassword = canonicalize(encodedPassword, PASSWORD_ENCODE_SET, true, false, true); return this; } @@ -747,7 +747,7 @@ public Builder addEncodedPathSegment(String encodedPathSegment) { public Builder setPathSegment(int index, String pathSegment) { if (pathSegment == null) throw new IllegalArgumentException("pathSegment == null"); String canonicalPathSegment = canonicalize( - pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false); + pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false, true); if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) { throw new IllegalArgumentException("unexpected path segment: " + pathSegment); } @@ -760,7 +760,7 @@ public Builder setEncodedPathSegment(int index, String encodedPathSegment) { throw new IllegalArgumentException("encodedPathSegment == null"); } String canonicalPathSegment = canonicalize(encodedPathSegment, - 0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false); + 0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false, true); encodedPathSegments.set(index, canonicalPathSegment); if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) { throw new IllegalArgumentException("unexpected path segment: " + encodedPathSegment); @@ -787,14 +787,15 @@ public Builder encodedPath(String encodedPath) { public Builder query(String query) { this.encodedQueryNamesAndValues = query != null - ? queryStringToNamesAndValues(canonicalize(query, QUERY_ENCODE_SET, false, true)) + ? queryStringToNamesAndValues(canonicalize(query, QUERY_ENCODE_SET, false, true, true)) : null; return this; } public Builder encodedQuery(String encodedQuery) { this.encodedQueryNamesAndValues = encodedQuery != null - ? queryStringToNamesAndValues(canonicalize(encodedQuery, QUERY_ENCODE_SET, true, true)) + ? queryStringToNamesAndValues( + canonicalize(encodedQuery, QUERY_ENCODE_SET, true, true, true)) : null; return this; } @@ -803,9 +804,10 @@ public Builder encodedQuery(String encodedQuery) { public Builder addQueryParameter(String name, String value) { if (name == null) throw new IllegalArgumentException("name == null"); if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>(); - encodedQueryNamesAndValues.add(canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, true)); + encodedQueryNamesAndValues.add( + canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, true, true)); encodedQueryNamesAndValues.add(value != null - ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, true) + ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, true, true) : null); return this; } @@ -815,9 +817,9 @@ public Builder addEncodedQueryParameter(String encodedName, String encodedValue) if (encodedName == null) throw new IllegalArgumentException("encodedName == null"); if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>(); encodedQueryNamesAndValues.add( - canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, true)); + canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, true, true)); encodedQueryNamesAndValues.add(encodedValue != null - ? canonicalize(encodedValue, QUERY_COMPONENT_ENCODE_SET, true, true) + ? canonicalize(encodedValue, QUERY_COMPONENT_ENCODE_SET, true, true, true) : null); return this; } @@ -837,7 +839,7 @@ public Builder setEncodedQueryParameter(String encodedName, String encodedValue) public Builder removeAllQueryParameters(String name) { if (name == null) throw new IllegalArgumentException("name == null"); if (encodedQueryNamesAndValues == null) return this; - String nameToRemove = canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, true); + String nameToRemove = canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, true, true); removeAllCanonicalQueryParameters(nameToRemove); return this; } @@ -846,7 +848,7 @@ public Builder removeAllEncodedQueryParameters(String encodedName) { if (encodedName == null) throw new IllegalArgumentException("encodedName == null"); if (encodedQueryNamesAndValues == null) return this; removeAllCanonicalQueryParameters( - canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, true)); + canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, true, true)); return this; } @@ -865,14 +867,14 @@ private void removeAllCanonicalQueryParameters(String canonicalName) { public Builder fragment(String fragment) { this.encodedFragment = fragment != null - ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false) + ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false, false) : null; return this; } public Builder encodedFragment(String encodedFragment) { this.encodedFragment = encodedFragment != null - ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false) + ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false, false) : null; return this; } @@ -885,19 +887,20 @@ Builder reencodeForUri() { for (int i = 0, size = encodedPathSegments.size(); i < size; i++) { String pathSegment = encodedPathSegments.get(i); encodedPathSegments.set(i, - canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, false)); + canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, false, true)); } if (encodedQueryNamesAndValues != null) { for (int i = 0, size = encodedQueryNamesAndValues.size(); i < size; i++) { String component = encodedQueryNamesAndValues.get(i); if (component != null) { encodedQueryNamesAndValues.set(i, - canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true)); + canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true, true)); } } } if (encodedFragment != null) { - encodedFragment = canonicalize(encodedFragment, FRAGMENT_ENCODE_SET_URI, true, false); + encodedFragment = canonicalize( + encodedFragment, FRAGMENT_ENCODE_SET_URI, true, false, false); } return this; } @@ -1010,19 +1013,19 @@ ParseResult parse(HttpUrl base, String input) { int passwordColonOffset = delimiterOffset( input, pos, componentDelimiterOffset, ":"); String canonicalUsername = canonicalize( - input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false); + input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false, true); this.encodedUsername = hasUsername ? this.encodedUsername + "%40" + canonicalUsername : canonicalUsername; if (passwordColonOffset != componentDelimiterOffset) { hasPassword = true; this.encodedPassword = canonicalize(input, passwordColonOffset + 1, - componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false); + componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, true); } hasUsername = true; } else { this.encodedPassword = this.encodedPassword + "%40" + canonicalize( - input, pos, componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false); + input, pos, componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, true); } pos = componentDelimiterOffset + 1; break; @@ -1069,14 +1072,14 @@ ParseResult parse(HttpUrl base, String input) { if (pos < limit && input.charAt(pos) == '?') { int queryDelimiterOffset = delimiterOffset(input, pos, limit, "#"); this.encodedQueryNamesAndValues = queryStringToNamesAndValues(canonicalize( - input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, true)); + input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, true, true)); pos = queryDelimiterOffset; } // Fragment. if (pos < limit && input.charAt(pos) == '#') { this.encodedFragment = canonicalize( - input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false); + input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false); } return ParseResult.SUCCESS; @@ -1113,7 +1116,7 @@ private void resolvePath(String input, int pos, int limit) { private void push(String input, int pos, int limit, boolean addTrailingSlash, boolean alreadyEncoded) { String segment = canonicalize( - input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false); + input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false, true); if (isDot(segment)) { return; // Skip '.' path segments. } @@ -1464,7 +1467,7 @@ private static String inet6AddressToAscii(byte[] address) { private static int parsePort(String input, int pos, int limit) { try { // Canonicalize the port string to skip '\n' etc. - String portString = canonicalize(input, pos, limit, "", false, false); + String portString = canonicalize(input, pos, limit, "", false, false, true); int i = Integer.parseInt(portString); if (i > 0 && i <= 65535) return i; return -1; @@ -1553,21 +1556,23 @@ static int decodeHexDigit(char c) { * * @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'. * @param plusIsSpace true to encode '+' as "%2B" if it is not already encoded + * @param asciiOnly true to encode all non-ASCII codepoints. */ static String canonicalize(String input, int pos, int limit, String encodeSet, - boolean alreadyEncoded, boolean plusIsSpace) { + boolean alreadyEncoded, boolean plusIsSpace, boolean asciiOnly) { int codePoint; for (int i = pos; i < limit; i += Character.charCount(codePoint)) { codePoint = input.codePointAt(i); if (codePoint < 0x20 - || codePoint >= 0x7f + || codePoint == 0x7f + || (codePoint >= 0x80 && asciiOnly) || encodeSet.indexOf(codePoint) != -1 || (codePoint == '%' && !alreadyEncoded) || (codePoint == '+' && plusIsSpace)) { // Slow path: the character at i requires encoding! Buffer out = new Buffer(); out.writeUtf8(input, pos, i); - canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, plusIsSpace); + canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, plusIsSpace, asciiOnly); return out.readUtf8(); } } @@ -1577,7 +1582,7 @@ static String canonicalize(String input, int pos, int limit, String encodeSet, } static void canonicalize(Buffer out, String input, int pos, int limit, - String encodeSet, boolean alreadyEncoded, boolean plusIsSpace) { + String encodeSet, boolean alreadyEncoded, boolean plusIsSpace, boolean asciiOnly) { Buffer utf8Buffer = null; // Lazily allocated. int codePoint; for (int i = pos; i < limit; i += Character.charCount(codePoint)) { @@ -1589,7 +1594,8 @@ static void canonicalize(Buffer out, String input, int pos, int limit, // Encode '+' as '%2B' since we permit ' ' to be encoded as either '+' or '%20'. out.writeUtf8(alreadyEncoded ? "+" : "%2B"); } else if (codePoint < 0x20 - || codePoint >= 0x7f + || codePoint == 0x7f + || (codePoint >= 0x80 && asciiOnly) || encodeSet.indexOf(codePoint) != -1 || (codePoint == '%' && !alreadyEncoded)) { // Percent encode this character. @@ -1611,8 +1617,7 @@ static void canonicalize(Buffer out, String input, int pos, int limit, } static String canonicalize( - String input, String encodeSet, boolean alreadyEncoded, boolean plusIsSpace) { - return canonicalize( - input, 0, input.length(), encodeSet, alreadyEncoded, plusIsSpace); + String input, String encodeSet, boolean alreadyEncoded, boolean query, boolean asciiOnly) { + return canonicalize(input, 0, input.length(), encodeSet, alreadyEncoded, query, asciiOnly); } }