diff --git a/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java b/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java index d2fc31d..71deb6c 100644 --- a/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java +++ b/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java @@ -483,12 +483,7 @@ public final class HttpUrlTest { assertEquals("http://host/#\u0080", url.toString()); assertEquals("\u0080", url.fragment()); assertEquals("\u0080", url.encodedFragment()); - try { - url.uri(); - fail(); - } catch (IllegalStateException expected) { - // Possibly a bug in java.net.URI. Many non-ASCII code points work, this one doesn't! - } + assertEquals(new URI("http://host/#"), url.uri()); // Control characters may be stripped! } @Test public void fragmentPercentEncodedNonAscii() throws Exception { @@ -1056,19 +1051,46 @@ public final class HttpUrlTest { assertEquals("http://host/#=[]:;%22~%7C?%23@%5E/$%25*", url.uri().toString()); } - @Test public void toUriWithMalformedPercentEscape() throws Exception { - HttpUrl url = new HttpUrl.Builder() - .scheme("http") - .host("host") - .encodedPath("/%xx") - .build(); - assertEquals("http://host/%xx", url.toString()); - try { - url.uri(); - fail(); - } catch (IllegalStateException expected) { - assertEquals("not valid as a java.net.URI: http://host/%xx", expected.getMessage()); - } + @Test public void toUriWithControlCharacters() throws Exception { + // Percent-encoded in the path. + assertEquals(new URI("http://host/a%00b"), HttpUrl.parse("http://host/a\u0000b").uri()); + assertEquals(new URI("http://host/a%C2%80b"), HttpUrl.parse("http://host/a\u0080b").uri()); + assertEquals(new URI("http://host/a%C2%9Fb"), HttpUrl.parse("http://host/a\u009fb").uri()); + // Percent-encoded in the query. + assertEquals(new URI("http://host/?a%00b"), HttpUrl.parse("http://host/?a\u0000b").uri()); + assertEquals(new URI("http://host/?a%C2%80b"), HttpUrl.parse("http://host/?a\u0080b").uri()); + assertEquals(new URI("http://host/?a%C2%9Fb"), HttpUrl.parse("http://host/?a\u009fb").uri()); + // Stripped from the fragment. + assertEquals(new URI("http://host/#a%00b"), HttpUrl.parse("http://host/#a\u0000b").uri()); + assertEquals(new URI("http://host/#ab"), HttpUrl.parse("http://host/#a\u0080b").uri()); + assertEquals(new URI("http://host/#ab"), HttpUrl.parse("http://host/#a\u009fb").uri()); + } + + @Test public void toUriWithSpaceCharacters() throws Exception { + // Percent-encoded in the path. + assertEquals(new URI("http://host/a%0Bb"), HttpUrl.parse("http://host/a\u000bb").uri()); + assertEquals(new URI("http://host/a%20b"), HttpUrl.parse("http://host/a b").uri()); + assertEquals(new URI("http://host/a%E2%80%89b"), HttpUrl.parse("http://host/a\u2009b").uri()); + assertEquals(new URI("http://host/a%E3%80%80b"), HttpUrl.parse("http://host/a\u3000b").uri()); + // Percent-encoded in the query. + assertEquals(new URI("http://host/?a%0Bb"), HttpUrl.parse("http://host/?a\u000bb").uri()); + assertEquals(new URI("http://host/?a%20b"), HttpUrl.parse("http://host/?a b").uri()); + assertEquals(new URI("http://host/?a%E2%80%89b"), HttpUrl.parse("http://host/?a\u2009b").uri()); + assertEquals(new URI("http://host/?a%E3%80%80b"), HttpUrl.parse("http://host/?a\u3000b").uri()); + // Stripped from the fragment. + assertEquals(new URI("http://host/#a%0Bb"), HttpUrl.parse("http://host/#a\u000bb").uri()); + assertEquals(new URI("http://host/#a%20b"), HttpUrl.parse("http://host/#a b").uri()); + assertEquals(new URI("http://host/#ab"), HttpUrl.parse("http://host/#a\u2009b").uri()); + assertEquals(new URI("http://host/#ab"), HttpUrl.parse("http://host/#a\u3000b").uri()); + } + + @Test public void toUriWithNonHexPercentEscape() throws Exception { + assertEquals(new URI("http://host/%25xx"), HttpUrl.parse("http://host/%xx").uri()); + } + + @Test public void toUriWithTruncatedPercentEscape() throws Exception { + assertEquals(new URI("http://host/%25a"), HttpUrl.parse("http://host/%a").uri()); + assertEquals(new URI("http://host/%25"), HttpUrl.parse("http://host/%").uri()); } @Test public void fromJavaNetUrl() throws Exception { diff --git a/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java b/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java index f5134d9..96f6917 100644 --- a/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java +++ b/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java @@ -33,10 +33,10 @@ public FormEncodingBuilder add(String name, String value) { content.writeByte('&'); } HttpUrl.canonicalize(content, name, 0, name.length(), - HttpUrl.FORM_ENCODE_SET, false, true, true); + HttpUrl.FORM_ENCODE_SET, false, false, true, true); content.writeByte('='); HttpUrl.canonicalize(content, value, 0, value.length(), - HttpUrl.FORM_ENCODE_SET, false, true, true); + HttpUrl.FORM_ENCODE_SET, false, false, true, true); return this; } @@ -46,10 +46,10 @@ public FormEncodingBuilder addEncoded(String name, String value) { content.writeByte('&'); } HttpUrl.canonicalize(content, name, 0, name.length(), - HttpUrl.FORM_ENCODE_SET, true, true, true); + HttpUrl.FORM_ENCODE_SET, true, false, true, true); content.writeByte('='); HttpUrl.canonicalize(content, value, 0, value.length(), - HttpUrl.FORM_ENCODE_SET, true, true, true); + HttpUrl.FORM_ENCODE_SET, true, false, true, true); return this; } diff --git a/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java b/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java index 68b09ef..beabeca 100644 --- a/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java +++ b/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java @@ -327,19 +327,29 @@ public URL url() { } /** - * Returns this URL as a {@link URI java.net.URI}. Because {@code URI} forbids certain characters - * like {@code [} and {@code |}, the returned URI may escape more characters than this URL. + * Returns this URL as a {@link URI java.net.URI}. Because {@code URI} is more strict than this + * class, the returned URI may be semantically different from this URL: + *
This method throws an unchecked {@link IllegalStateException} if it cannot be converted to a - * URI even after escaping forbidden characters. In particular, URLs that contain malformed - * percent escapes like {@code http://host/%xx} will trigger this exception. + *
These differences may have a significant consequence when the URI is interpretted by a + * webserver. For this reason the {@linkplain URI URI class} and this method should be avoided. */ public URI uri() { + String uri = newBuilder().reencodeForUri().toString(); try { - String uri = newBuilder().reencodeForUri().toString(); return new URI(uri); } catch (URISyntaxException e) { - throw new IllegalStateException("not valid as a java.net.URI: " + url); + // Unlikely edge case: the URI has a forbidden character in the fragment. Strip it & retry. + try { + String stripped = uri.replaceAll("[\\u0000-\\u001F\\u007F-\\u009F\\p{javaWhitespace}]", ""); + return URI.create(stripped); + } catch (Exception e1) { + throw new RuntimeException(e); // Unexpected! + } } } @@ -686,25 +696,27 @@ public Builder scheme(String scheme) { public Builder username(String username) { if (username == null) throw new IllegalArgumentException("username == null"); - this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false, true); + this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false, false, true); return this; } public Builder encodedUsername(String encodedUsername) { if (encodedUsername == null) throw new IllegalArgumentException("encodedUsername == null"); - this.encodedUsername = canonicalize(encodedUsername, USERNAME_ENCODE_SET, true, false, true); + this.encodedUsername = canonicalize( + encodedUsername, USERNAME_ENCODE_SET, true, false, false, true); return this; } public Builder password(String password) { if (password == null) throw new IllegalArgumentException("password == null"); - this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false, true); + this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false, false, true); return this; } public Builder encodedPassword(String encodedPassword) { if (encodedPassword == null) throw new IllegalArgumentException("encodedPassword == null"); - this.encodedPassword = canonicalize(encodedPassword, PASSWORD_ENCODE_SET, true, false, true); + this.encodedPassword = canonicalize( + encodedPassword, PASSWORD_ENCODE_SET, true, false, false, true); return this; } @@ -747,7 +759,7 @@ public Builder addEncodedPathSegment(String encodedPathSegment) { public Builder setPathSegment(int index, String pathSegment) { if (pathSegment == null) throw new IllegalArgumentException("pathSegment == null"); String canonicalPathSegment = canonicalize( - pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false, true); + pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false, false, true); if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) { throw new IllegalArgumentException("unexpected path segment: " + pathSegment); } @@ -760,7 +772,7 @@ public Builder setEncodedPathSegment(int index, String encodedPathSegment) { throw new IllegalArgumentException("encodedPathSegment == null"); } String canonicalPathSegment = canonicalize(encodedPathSegment, - 0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false, true); + 0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false, false, true); encodedPathSegments.set(index, canonicalPathSegment); if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) { throw new IllegalArgumentException("unexpected path segment: " + encodedPathSegment); @@ -787,7 +799,8 @@ public Builder encodedPath(String encodedPath) { public Builder query(String query) { this.encodedQueryNamesAndValues = query != null - ? queryStringToNamesAndValues(canonicalize(query, QUERY_ENCODE_SET, false, true, true)) + ? queryStringToNamesAndValues(canonicalize( + query, QUERY_ENCODE_SET, false, false, true, true)) : null; return this; } @@ -795,7 +808,7 @@ public Builder query(String query) { public Builder encodedQuery(String encodedQuery) { this.encodedQueryNamesAndValues = encodedQuery != null ? queryStringToNamesAndValues( - canonicalize(encodedQuery, QUERY_ENCODE_SET, true, true, true)) + canonicalize(encodedQuery, QUERY_ENCODE_SET, true, false, true, true)) : null; return this; } @@ -805,9 +818,9 @@ public Builder addQueryParameter(String name, String value) { if (name == null) throw new IllegalArgumentException("name == null"); if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>(); encodedQueryNamesAndValues.add( - canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, true, true)); + canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true)); encodedQueryNamesAndValues.add(value != null - ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, true, true) + ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, false, true, true) : null); return this; } @@ -817,9 +830,9 @@ public Builder addEncodedQueryParameter(String encodedName, String encodedValue) if (encodedName == null) throw new IllegalArgumentException("encodedName == null"); if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>(); encodedQueryNamesAndValues.add( - canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, true, true)); + canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true)); encodedQueryNamesAndValues.add(encodedValue != null - ? canonicalize(encodedValue, QUERY_COMPONENT_ENCODE_SET, true, true, true) + ? canonicalize(encodedValue, QUERY_COMPONENT_ENCODE_SET, true, false, true, true) : null); return this; } @@ -839,7 +852,8 @@ public Builder setEncodedQueryParameter(String encodedName, String encodedValue) public Builder removeAllQueryParameters(String name) { if (name == null) throw new IllegalArgumentException("name == null"); if (encodedQueryNamesAndValues == null) return this; - String nameToRemove = canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, true, true); + String nameToRemove = canonicalize( + name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true); removeAllCanonicalQueryParameters(nameToRemove); return this; } @@ -848,7 +862,7 @@ public Builder removeAllEncodedQueryParameters(String encodedName) { if (encodedName == null) throw new IllegalArgumentException("encodedName == null"); if (encodedQueryNamesAndValues == null) return this; removeAllCanonicalQueryParameters( - canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, true, true)); + canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true)); return this; } @@ -867,14 +881,14 @@ private void removeAllCanonicalQueryParameters(String canonicalName) { public Builder fragment(String fragment) { this.encodedFragment = fragment != null - ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false, false) + ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false, false, false) : null; return this; } public Builder encodedFragment(String encodedFragment) { this.encodedFragment = encodedFragment != null - ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false, false) + ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false, false, false) : null; return this; } @@ -887,20 +901,20 @@ Builder reencodeForUri() { for (int i = 0, size = encodedPathSegments.size(); i < size; i++) { String pathSegment = encodedPathSegments.get(i); encodedPathSegments.set(i, - canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, false, true)); + canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, true, false, true)); } if (encodedQueryNamesAndValues != null) { for (int i = 0, size = encodedQueryNamesAndValues.size(); i < size; i++) { String component = encodedQueryNamesAndValues.get(i); if (component != null) { encodedQueryNamesAndValues.set(i, - canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true, true)); + canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true, true, true)); } } } if (encodedFragment != null) { encodedFragment = canonicalize( - encodedFragment, FRAGMENT_ENCODE_SET_URI, true, false, false); + encodedFragment, FRAGMENT_ENCODE_SET_URI, true, true, false, false); } return this; } @@ -1013,19 +1027,19 @@ ParseResult parse(HttpUrl base, String input) { int passwordColonOffset = delimiterOffset( input, pos, componentDelimiterOffset, ":"); String canonicalUsername = canonicalize( - input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false, true); + input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false, false, true); this.encodedUsername = hasUsername ? this.encodedUsername + "%40" + canonicalUsername : canonicalUsername; if (passwordColonOffset != componentDelimiterOffset) { hasPassword = true; this.encodedPassword = canonicalize(input, passwordColonOffset + 1, - componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, true); + componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true); } hasUsername = true; } else { - this.encodedPassword = this.encodedPassword + "%40" + canonicalize( - input, pos, componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, true); + this.encodedPassword = this.encodedPassword + "%40" + canonicalize(input, pos, + componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true); } pos = componentDelimiterOffset + 1; break; @@ -1072,14 +1086,14 @@ ParseResult parse(HttpUrl base, String input) { if (pos < limit && input.charAt(pos) == '?') { int queryDelimiterOffset = delimiterOffset(input, pos, limit, "#"); this.encodedQueryNamesAndValues = queryStringToNamesAndValues(canonicalize( - input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, true, true)); + input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, false, true, true)); pos = queryDelimiterOffset; } // Fragment. if (pos < limit && input.charAt(pos) == '#') { this.encodedFragment = canonicalize( - input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false); + input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false, false); } return ParseResult.SUCCESS; @@ -1116,7 +1130,7 @@ private void resolvePath(String input, int pos, int limit) { private void push(String input, int pos, int limit, boolean addTrailingSlash, boolean alreadyEncoded) { String segment = canonicalize( - input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false, true); + input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false, false, true); if (isDot(segment)) { return; // Skip '.' path segments. } @@ -1467,7 +1481,7 @@ private static String inet6AddressToAscii(byte[] address) { private static int parsePort(String input, int pos, int limit) { try { // Canonicalize the port string to skip '\n' etc. - String portString = canonicalize(input, pos, limit, "", false, false, true); + String portString = canonicalize(input, pos, limit, "", false, false, false, true); int i = Integer.parseInt(portString); if (i > 0 && i <= 65535) return i; return -1; @@ -1536,6 +1550,13 @@ static void percentDecode(Buffer out, String encoded, int pos, int limit, boolea } } + static boolean percentEncoded(String encoded, int pos, int limit) { + return pos + 2 < limit + && encoded.charAt(pos) == '%' + && decodeHexDigit(encoded.charAt(pos + 1)) != -1 + && decodeHexDigit(encoded.charAt(pos + 2)) != -1; + } + static int decodeHexDigit(char c) { if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') return c - 'a' + 10; @@ -1555,24 +1576,26 @@ static int decodeHexDigit(char c) { * * * @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'. + * @param strict true to encode '%' if it is not the prefix of a valid percent encoding. * @param plusIsSpace true to encode '+' as "%2B" if it is not already encoded * @param asciiOnly true to encode all non-ASCII codepoints. */ static String canonicalize(String input, int pos, int limit, String encodeSet, - boolean alreadyEncoded, boolean plusIsSpace, boolean asciiOnly) { + boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) { int codePoint; for (int i = pos; i < limit; i += Character.charCount(codePoint)) { codePoint = input.codePointAt(i); if (codePoint < 0x20 || codePoint == 0x7f - || (codePoint >= 0x80 && asciiOnly) + || codePoint >= 0x80 && asciiOnly || encodeSet.indexOf(codePoint) != -1 - || (codePoint == '%' && !alreadyEncoded) - || (codePoint == '+' && plusIsSpace)) { + || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit)) + || codePoint == '+' && plusIsSpace) { // Slow path: the character at i requires encoding! Buffer out = new Buffer(); out.writeUtf8(input, pos, i); - canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, plusIsSpace, asciiOnly); + canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, strict, plusIsSpace, + asciiOnly); return out.readUtf8(); } } @@ -1581,8 +1604,8 @@ static String canonicalize(String input, int pos, int limit, String encodeSet, return input.substring(pos, limit); } - static void canonicalize(Buffer out, String input, int pos, int limit, - String encodeSet, boolean alreadyEncoded, boolean plusIsSpace, boolean asciiOnly) { + static void canonicalize(Buffer out, String input, int pos, int limit, String encodeSet, + boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) { Buffer utf8Buffer = null; // Lazily allocated. int codePoint; for (int i = pos; i < limit; i += Character.charCount(codePoint)) { @@ -1595,9 +1618,9 @@ static void canonicalize(Buffer out, String input, int pos, int limit, out.writeUtf8(alreadyEncoded ? "+" : "%2B"); } else if (codePoint < 0x20 || codePoint == 0x7f - || (codePoint >= 0x80 && asciiOnly) + || codePoint >= 0x80 && asciiOnly || encodeSet.indexOf(codePoint) != -1 - || (codePoint == '%' && !alreadyEncoded)) { + || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))) { // Percent encode this character. if (utf8Buffer == null) { utf8Buffer = new Buffer(); @@ -1616,8 +1639,9 @@ static void canonicalize(Buffer out, String input, int pos, int limit, } } - static String canonicalize( - String input, String encodeSet, boolean alreadyEncoded, boolean query, boolean asciiOnly) { - return canonicalize(input, 0, input.length(), encodeSet, alreadyEncoded, query, asciiOnly); + static String canonicalize(String input, String encodeSet, boolean alreadyEncoded, boolean strict, + boolean plusIsSpace, boolean asciiOnly) { + return canonicalize( + input, 0, input.length(), encodeSet, alreadyEncoded, strict, plusIsSpace, asciiOnly); } }