From 3281dee547ef61c7c792a6e2b94b4b66b0486376 Mon Sep 17 00:00:00 2001 From: Neil Fuller Date: Wed, 16 Mar 2016 14:48:38 +0000 Subject: [PATCH] Reapply an upstream OkHttp change to Android for HttpUrl Manually reapplied change from upstream to address HttpUrl encoding issues. This change did not apply cleanly. Upstream FormEncodingBuilder has been refactored. Equivalent changes have been made. Upstream details: Comment: Never throw converting an HttpUrl to a java.net.URI. Just do arbitrary amounts of transformation. Sigh. Closes https://github.com/square/okhttp/issues/2116 SHA: d77edcc8905148f18a691be180c4f8f366a5ee1b Bug: 27590872 (cherry picked from commit 4c521731e582bbd36d1ef7276b25a347f91d9bbb) Change-Id: I15317abbfcd6d7d4af3ce9793f61c912d9b66991 --- .../java/com/squareup/okhttp/HttpUrlTest.java | 60 ++++++--- .../squareup/okhttp/FormEncodingBuilder.java | 8 +- .../java/com/squareup/okhttp/HttpUrl.java | 116 +++++++++++------- 3 files changed, 115 insertions(+), 69 deletions(-) diff --git a/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java b/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java index d2fc31d..71deb6c 100644 --- a/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java +++ b/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java @@ -483,12 +483,7 @@ public final class HttpUrlTest { assertEquals("http://host/#\u0080", url.toString()); assertEquals("\u0080", url.fragment()); assertEquals("\u0080", url.encodedFragment()); - try { - url.uri(); - fail(); - } catch (IllegalStateException expected) { - // Possibly a bug in java.net.URI. Many non-ASCII code points work, this one doesn't! - } + assertEquals(new URI("http://host/#"), url.uri()); // Control characters may be stripped! } @Test public void fragmentPercentEncodedNonAscii() throws Exception { @@ -1056,19 +1051,46 @@ public final class HttpUrlTest { assertEquals("http://host/#=[]:;%22~%7C?%23@%5E/$%25*", url.uri().toString()); } - @Test public void toUriWithMalformedPercentEscape() throws Exception { - HttpUrl url = new HttpUrl.Builder() - .scheme("http") - .host("host") - .encodedPath("/%xx") - .build(); - assertEquals("http://host/%xx", url.toString()); - try { - url.uri(); - fail(); - } catch (IllegalStateException expected) { - assertEquals("not valid as a java.net.URI: http://host/%xx", expected.getMessage()); - } + @Test public void toUriWithControlCharacters() throws Exception { + // Percent-encoded in the path. + assertEquals(new URI("http://host/a%00b"), HttpUrl.parse("http://host/a\u0000b").uri()); + assertEquals(new URI("http://host/a%C2%80b"), HttpUrl.parse("http://host/a\u0080b").uri()); + assertEquals(new URI("http://host/a%C2%9Fb"), HttpUrl.parse("http://host/a\u009fb").uri()); + // Percent-encoded in the query. + assertEquals(new URI("http://host/?a%00b"), HttpUrl.parse("http://host/?a\u0000b").uri()); + assertEquals(new URI("http://host/?a%C2%80b"), HttpUrl.parse("http://host/?a\u0080b").uri()); + assertEquals(new URI("http://host/?a%C2%9Fb"), HttpUrl.parse("http://host/?a\u009fb").uri()); + // Stripped from the fragment. + assertEquals(new URI("http://host/#a%00b"), HttpUrl.parse("http://host/#a\u0000b").uri()); + assertEquals(new URI("http://host/#ab"), HttpUrl.parse("http://host/#a\u0080b").uri()); + assertEquals(new URI("http://host/#ab"), HttpUrl.parse("http://host/#a\u009fb").uri()); + } + + @Test public void toUriWithSpaceCharacters() throws Exception { + // Percent-encoded in the path. + assertEquals(new URI("http://host/a%0Bb"), HttpUrl.parse("http://host/a\u000bb").uri()); + assertEquals(new URI("http://host/a%20b"), HttpUrl.parse("http://host/a b").uri()); + assertEquals(new URI("http://host/a%E2%80%89b"), HttpUrl.parse("http://host/a\u2009b").uri()); + assertEquals(new URI("http://host/a%E3%80%80b"), HttpUrl.parse("http://host/a\u3000b").uri()); + // Percent-encoded in the query. + assertEquals(new URI("http://host/?a%0Bb"), HttpUrl.parse("http://host/?a\u000bb").uri()); + assertEquals(new URI("http://host/?a%20b"), HttpUrl.parse("http://host/?a b").uri()); + assertEquals(new URI("http://host/?a%E2%80%89b"), HttpUrl.parse("http://host/?a\u2009b").uri()); + assertEquals(new URI("http://host/?a%E3%80%80b"), HttpUrl.parse("http://host/?a\u3000b").uri()); + // Stripped from the fragment. + assertEquals(new URI("http://host/#a%0Bb"), HttpUrl.parse("http://host/#a\u000bb").uri()); + assertEquals(new URI("http://host/#a%20b"), HttpUrl.parse("http://host/#a b").uri()); + assertEquals(new URI("http://host/#ab"), HttpUrl.parse("http://host/#a\u2009b").uri()); + assertEquals(new URI("http://host/#ab"), HttpUrl.parse("http://host/#a\u3000b").uri()); + } + + @Test public void toUriWithNonHexPercentEscape() throws Exception { + assertEquals(new URI("http://host/%25xx"), HttpUrl.parse("http://host/%xx").uri()); + } + + @Test public void toUriWithTruncatedPercentEscape() throws Exception { + assertEquals(new URI("http://host/%25a"), HttpUrl.parse("http://host/%a").uri()); + assertEquals(new URI("http://host/%25"), HttpUrl.parse("http://host/%").uri()); } @Test public void fromJavaNetUrl() throws Exception { diff --git a/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java b/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java index f5134d9..96f6917 100644 --- a/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java +++ b/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java @@ -33,10 +33,10 @@ public FormEncodingBuilder add(String name, String value) { content.writeByte('&'); } HttpUrl.canonicalize(content, name, 0, name.length(), - HttpUrl.FORM_ENCODE_SET, false, true, true); + HttpUrl.FORM_ENCODE_SET, false, false, true, true); content.writeByte('='); HttpUrl.canonicalize(content, value, 0, value.length(), - HttpUrl.FORM_ENCODE_SET, false, true, true); + HttpUrl.FORM_ENCODE_SET, false, false, true, true); return this; } @@ -46,10 +46,10 @@ public FormEncodingBuilder addEncoded(String name, String value) { content.writeByte('&'); } HttpUrl.canonicalize(content, name, 0, name.length(), - HttpUrl.FORM_ENCODE_SET, true, true, true); + HttpUrl.FORM_ENCODE_SET, true, false, true, true); content.writeByte('='); HttpUrl.canonicalize(content, value, 0, value.length(), - HttpUrl.FORM_ENCODE_SET, true, true, true); + HttpUrl.FORM_ENCODE_SET, true, false, true, true); return this; } diff --git a/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java b/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java index 68b09ef..beabeca 100644 --- a/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java +++ b/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java @@ -327,19 +327,29 @@ public URL url() { } /** - * Returns this URL as a {@link URI java.net.URI}. Because {@code URI} forbids certain characters - * like {@code [} and {@code |}, the returned URI may escape more characters than this URL. + * Returns this URL as a {@link URI java.net.URI}. Because {@code URI} is more strict than this + * class, the returned URI may be semantically different from this URL: + * * - *

This method throws an unchecked {@link IllegalStateException} if it cannot be converted to a - * URI even after escaping forbidden characters. In particular, URLs that contain malformed - * percent escapes like {@code http://host/%xx} will trigger this exception. + *

These differences may have a significant consequence when the URI is interpretted by a + * webserver. For this reason the {@linkplain URI URI class} and this method should be avoided. */ public URI uri() { + String uri = newBuilder().reencodeForUri().toString(); try { - String uri = newBuilder().reencodeForUri().toString(); return new URI(uri); } catch (URISyntaxException e) { - throw new IllegalStateException("not valid as a java.net.URI: " + url); + // Unlikely edge case: the URI has a forbidden character in the fragment. Strip it & retry. + try { + String stripped = uri.replaceAll("[\\u0000-\\u001F\\u007F-\\u009F\\p{javaWhitespace}]", ""); + return URI.create(stripped); + } catch (Exception e1) { + throw new RuntimeException(e); // Unexpected! + } } } @@ -686,25 +696,27 @@ public Builder scheme(String scheme) { public Builder username(String username) { if (username == null) throw new IllegalArgumentException("username == null"); - this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false, true); + this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false, false, true); return this; } public Builder encodedUsername(String encodedUsername) { if (encodedUsername == null) throw new IllegalArgumentException("encodedUsername == null"); - this.encodedUsername = canonicalize(encodedUsername, USERNAME_ENCODE_SET, true, false, true); + this.encodedUsername = canonicalize( + encodedUsername, USERNAME_ENCODE_SET, true, false, false, true); return this; } public Builder password(String password) { if (password == null) throw new IllegalArgumentException("password == null"); - this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false, true); + this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false, false, true); return this; } public Builder encodedPassword(String encodedPassword) { if (encodedPassword == null) throw new IllegalArgumentException("encodedPassword == null"); - this.encodedPassword = canonicalize(encodedPassword, PASSWORD_ENCODE_SET, true, false, true); + this.encodedPassword = canonicalize( + encodedPassword, PASSWORD_ENCODE_SET, true, false, false, true); return this; } @@ -747,7 +759,7 @@ public Builder addEncodedPathSegment(String encodedPathSegment) { public Builder setPathSegment(int index, String pathSegment) { if (pathSegment == null) throw new IllegalArgumentException("pathSegment == null"); String canonicalPathSegment = canonicalize( - pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false, true); + pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false, false, true); if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) { throw new IllegalArgumentException("unexpected path segment: " + pathSegment); } @@ -760,7 +772,7 @@ public Builder setEncodedPathSegment(int index, String encodedPathSegment) { throw new IllegalArgumentException("encodedPathSegment == null"); } String canonicalPathSegment = canonicalize(encodedPathSegment, - 0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false, true); + 0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false, false, true); encodedPathSegments.set(index, canonicalPathSegment); if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) { throw new IllegalArgumentException("unexpected path segment: " + encodedPathSegment); @@ -787,7 +799,8 @@ public Builder encodedPath(String encodedPath) { public Builder query(String query) { this.encodedQueryNamesAndValues = query != null - ? queryStringToNamesAndValues(canonicalize(query, QUERY_ENCODE_SET, false, true, true)) + ? queryStringToNamesAndValues(canonicalize( + query, QUERY_ENCODE_SET, false, false, true, true)) : null; return this; } @@ -795,7 +808,7 @@ public Builder query(String query) { public Builder encodedQuery(String encodedQuery) { this.encodedQueryNamesAndValues = encodedQuery != null ? queryStringToNamesAndValues( - canonicalize(encodedQuery, QUERY_ENCODE_SET, true, true, true)) + canonicalize(encodedQuery, QUERY_ENCODE_SET, true, false, true, true)) : null; return this; } @@ -805,9 +818,9 @@ public Builder addQueryParameter(String name, String value) { if (name == null) throw new IllegalArgumentException("name == null"); if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>(); encodedQueryNamesAndValues.add( - canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, true, true)); + canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true)); encodedQueryNamesAndValues.add(value != null - ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, true, true) + ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, false, true, true) : null); return this; } @@ -817,9 +830,9 @@ public Builder addEncodedQueryParameter(String encodedName, String encodedValue) if (encodedName == null) throw new IllegalArgumentException("encodedName == null"); if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>(); encodedQueryNamesAndValues.add( - canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, true, true)); + canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true)); encodedQueryNamesAndValues.add(encodedValue != null - ? canonicalize(encodedValue, QUERY_COMPONENT_ENCODE_SET, true, true, true) + ? canonicalize(encodedValue, QUERY_COMPONENT_ENCODE_SET, true, false, true, true) : null); return this; } @@ -839,7 +852,8 @@ public Builder setEncodedQueryParameter(String encodedName, String encodedValue) public Builder removeAllQueryParameters(String name) { if (name == null) throw new IllegalArgumentException("name == null"); if (encodedQueryNamesAndValues == null) return this; - String nameToRemove = canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, true, true); + String nameToRemove = canonicalize( + name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true); removeAllCanonicalQueryParameters(nameToRemove); return this; } @@ -848,7 +862,7 @@ public Builder removeAllEncodedQueryParameters(String encodedName) { if (encodedName == null) throw new IllegalArgumentException("encodedName == null"); if (encodedQueryNamesAndValues == null) return this; removeAllCanonicalQueryParameters( - canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, true, true)); + canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true)); return this; } @@ -867,14 +881,14 @@ private void removeAllCanonicalQueryParameters(String canonicalName) { public Builder fragment(String fragment) { this.encodedFragment = fragment != null - ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false, false) + ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false, false, false) : null; return this; } public Builder encodedFragment(String encodedFragment) { this.encodedFragment = encodedFragment != null - ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false, false) + ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false, false, false) : null; return this; } @@ -887,20 +901,20 @@ Builder reencodeForUri() { for (int i = 0, size = encodedPathSegments.size(); i < size; i++) { String pathSegment = encodedPathSegments.get(i); encodedPathSegments.set(i, - canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, false, true)); + canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, true, false, true)); } if (encodedQueryNamesAndValues != null) { for (int i = 0, size = encodedQueryNamesAndValues.size(); i < size; i++) { String component = encodedQueryNamesAndValues.get(i); if (component != null) { encodedQueryNamesAndValues.set(i, - canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true, true)); + canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true, true, true)); } } } if (encodedFragment != null) { encodedFragment = canonicalize( - encodedFragment, FRAGMENT_ENCODE_SET_URI, true, false, false); + encodedFragment, FRAGMENT_ENCODE_SET_URI, true, true, false, false); } return this; } @@ -1013,19 +1027,19 @@ ParseResult parse(HttpUrl base, String input) { int passwordColonOffset = delimiterOffset( input, pos, componentDelimiterOffset, ":"); String canonicalUsername = canonicalize( - input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false, true); + input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false, false, true); this.encodedUsername = hasUsername ? this.encodedUsername + "%40" + canonicalUsername : canonicalUsername; if (passwordColonOffset != componentDelimiterOffset) { hasPassword = true; this.encodedPassword = canonicalize(input, passwordColonOffset + 1, - componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, true); + componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true); } hasUsername = true; } else { - this.encodedPassword = this.encodedPassword + "%40" + canonicalize( - input, pos, componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, true); + this.encodedPassword = this.encodedPassword + "%40" + canonicalize(input, pos, + componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true); } pos = componentDelimiterOffset + 1; break; @@ -1072,14 +1086,14 @@ ParseResult parse(HttpUrl base, String input) { if (pos < limit && input.charAt(pos) == '?') { int queryDelimiterOffset = delimiterOffset(input, pos, limit, "#"); this.encodedQueryNamesAndValues = queryStringToNamesAndValues(canonicalize( - input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, true, true)); + input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, false, true, true)); pos = queryDelimiterOffset; } // Fragment. if (pos < limit && input.charAt(pos) == '#') { this.encodedFragment = canonicalize( - input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false); + input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false, false); } return ParseResult.SUCCESS; @@ -1116,7 +1130,7 @@ private void resolvePath(String input, int pos, int limit) { private void push(String input, int pos, int limit, boolean addTrailingSlash, boolean alreadyEncoded) { String segment = canonicalize( - input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false, true); + input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false, false, true); if (isDot(segment)) { return; // Skip '.' path segments. } @@ -1467,7 +1481,7 @@ private static String inet6AddressToAscii(byte[] address) { private static int parsePort(String input, int pos, int limit) { try { // Canonicalize the port string to skip '\n' etc. - String portString = canonicalize(input, pos, limit, "", false, false, true); + String portString = canonicalize(input, pos, limit, "", false, false, false, true); int i = Integer.parseInt(portString); if (i > 0 && i <= 65535) return i; return -1; @@ -1536,6 +1550,13 @@ static void percentDecode(Buffer out, String encoded, int pos, int limit, boolea } } + static boolean percentEncoded(String encoded, int pos, int limit) { + return pos + 2 < limit + && encoded.charAt(pos) == '%' + && decodeHexDigit(encoded.charAt(pos + 1)) != -1 + && decodeHexDigit(encoded.charAt(pos + 2)) != -1; + } + static int decodeHexDigit(char c) { if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') return c - 'a' + 10; @@ -1555,24 +1576,26 @@ static int decodeHexDigit(char c) { * * * @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'. + * @param strict true to encode '%' if it is not the prefix of a valid percent encoding. * @param plusIsSpace true to encode '+' as "%2B" if it is not already encoded * @param asciiOnly true to encode all non-ASCII codepoints. */ static String canonicalize(String input, int pos, int limit, String encodeSet, - boolean alreadyEncoded, boolean plusIsSpace, boolean asciiOnly) { + boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) { int codePoint; for (int i = pos; i < limit; i += Character.charCount(codePoint)) { codePoint = input.codePointAt(i); if (codePoint < 0x20 || codePoint == 0x7f - || (codePoint >= 0x80 && asciiOnly) + || codePoint >= 0x80 && asciiOnly || encodeSet.indexOf(codePoint) != -1 - || (codePoint == '%' && !alreadyEncoded) - || (codePoint == '+' && plusIsSpace)) { + || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit)) + || codePoint == '+' && plusIsSpace) { // Slow path: the character at i requires encoding! Buffer out = new Buffer(); out.writeUtf8(input, pos, i); - canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, plusIsSpace, asciiOnly); + canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, strict, plusIsSpace, + asciiOnly); return out.readUtf8(); } } @@ -1581,8 +1604,8 @@ static String canonicalize(String input, int pos, int limit, String encodeSet, return input.substring(pos, limit); } - static void canonicalize(Buffer out, String input, int pos, int limit, - String encodeSet, boolean alreadyEncoded, boolean plusIsSpace, boolean asciiOnly) { + static void canonicalize(Buffer out, String input, int pos, int limit, String encodeSet, + boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) { Buffer utf8Buffer = null; // Lazily allocated. int codePoint; for (int i = pos; i < limit; i += Character.charCount(codePoint)) { @@ -1595,9 +1618,9 @@ static void canonicalize(Buffer out, String input, int pos, int limit, out.writeUtf8(alreadyEncoded ? "+" : "%2B"); } else if (codePoint < 0x20 || codePoint == 0x7f - || (codePoint >= 0x80 && asciiOnly) + || codePoint >= 0x80 && asciiOnly || encodeSet.indexOf(codePoint) != -1 - || (codePoint == '%' && !alreadyEncoded)) { + || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))) { // Percent encode this character. if (utf8Buffer == null) { utf8Buffer = new Buffer(); @@ -1616,8 +1639,9 @@ static void canonicalize(Buffer out, String input, int pos, int limit, } } - static String canonicalize( - String input, String encodeSet, boolean alreadyEncoded, boolean query, boolean asciiOnly) { - return canonicalize(input, 0, input.length(), encodeSet, alreadyEncoded, query, asciiOnly); + static String canonicalize(String input, String encodeSet, boolean alreadyEncoded, boolean strict, + boolean plusIsSpace, boolean asciiOnly) { + return canonicalize( + input, 0, input.length(), encodeSet, alreadyEncoded, strict, plusIsSpace, asciiOnly); } }