From 1123dd210a4b06b56986029386e5562aad63aa0d Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Fri, 10 Nov 2023 10:48:01 +1100 Subject: [PATCH] Added support for per-request authentication to Jsoup.connect (#2046) Added support for per-request authentication Uses the multi-version support so that in Java versions that support it (9+), an authenticator is set via `java.net.HttpURLConnection.setAuthenticator()`. On Java 8, we set the system-wide default authenticator, and use ThreadLocals to enable per-request authenticators. Also adds tests for HTTP and HTTPS server and proxy basic authentication. --- CHANGES | 3 + pom.xml | 1 + src/main/java/org/jsoup/Connection.java | 103 +++++++++++++++++ .../jsoup/helper/AuthenticationHandler.java | 90 +++++++++++++++ .../java/org/jsoup/helper/HttpConnection.java | 21 ++++ .../jsoup/helper/RequestAuthenticator.java | 92 +++++++++++++++ .../org/jsoup/helper/RequestAuthHandler.java | 24 ++++ .../helper/AuthenticationHandlerTest.java | 7 ++ .../org/jsoup/helper/HttpConnectionTest.java | 36 ++++++ .../org/jsoup/integration/ConnectTest.java | 78 ++++++++++++- .../java/org/jsoup/integration/ProxyTest.java | 92 ++++++++++++++- .../org/jsoup/integration/TestServer.java | 31 ++++- .../integration/servlets/AuthFilter.java | 107 ++++++++++++++++++ .../integration/servlets/ProxyServlet.java | 36 +++++- 14 files changed, 708 insertions(+), 13 deletions(-) create mode 100644 src/main/java/org/jsoup/helper/AuthenticationHandler.java create mode 100644 src/main/java/org/jsoup/helper/RequestAuthenticator.java create mode 100644 src/main/java9/org/jsoup/helper/RequestAuthHandler.java create mode 100644 src/test/java/org/jsoup/helper/AuthenticationHandlerTest.java create mode 100644 src/test/java/org/jsoup/integration/servlets/AuthFilter.java diff --git a/CHANGES b/CHANGES index ab2de95a32..2bbe999171 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,9 @@ jsoup changelog Release 1.17.1 [PENDING] + * Improvement: in Jsoup.connect(), added support for request-level authentication, supporting authentication to + proxies and to servers. + * Improvement: in the Elements list, added direct support for `#set(index, element)`, `#remove(index)`, `#remove(object)`, `#clear()`, `#removeAll(collection)`, `#retainAll(collection)`, `#removeIf(filter)`, `#replaceAll(operator)`. These methods update the original DOM, as well as the Elements list. diff --git a/pom.xml b/pom.xml index 5a0e1dd006..4facf9e4ee 100644 --- a/pom.xml +++ b/pom.xml @@ -94,6 +94,7 @@ java.io.UncheckedIOException java.util.function.Predicate java.util.function.UnaryOperator + java.net.HttpURLConnection diff --git a/src/main/java/org/jsoup/Connection.java b/src/main/java/org/jsoup/Connection.java index 04d55c30dd..43d302cc7a 100644 --- a/src/main/java/org/jsoup/Connection.java +++ b/src/main/java/org/jsoup/Connection.java @@ -1,5 +1,6 @@ package org.jsoup; +import org.jsoup.helper.RequestAuthenticator; import org.jsoup.nodes.Document; import org.jsoup.parser.Parser; @@ -9,6 +10,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.UncheckedIOException; +import java.net.Authenticator; import java.net.CookieStore; import java.net.Proxy; import java.net.URL; @@ -69,6 +71,28 @@ public final boolean hasBody() { */ Connection newRequest(); + /** + Creates a new request, using this Connection as the session-state and to initialize the connection settings (which + may then be independently changed on the returned {@link Connection.Request} object). + @return a new Connection object, with a shared Cookie Store and initialized settings from this Connection and Request + @param url URL for the new request + @since 1.17.1 + */ + default Connection newRequest(String url) { + return newRequest().url(url); + } + + /** + Creates a new request, using this Connection as the session-state and to initialize the connection settings (which + may then be independently changed on the returned {@link Connection.Request} object). + @return a new Connection object, with a shared Cookie Store and initialized settings from this Connection and Request + @param url URL for the new request + @since 1.17.1 + */ + default Connection newRequest(URL url) { + return newRequest().url(url); + } + /** * Set the request URL to fetch. The protocol must be HTTP or HTTPS. * @param url URL to connect to @@ -322,6 +346,64 @@

For GET requests, data parameters will be sent on the request query string. F */ Connection postDataCharset(String charset); + /** + Set the authenticator to use for this connection, enabling requests to URLs, and via proxies, that require + authentication credentials. +

The authentication scheme used is automatically detected during the request execution. + Supported schemes (subject to the platform) are {@code basic}, {@code digest}, {@code NTLM}, + and {@code Kerberos}.

+ +

To use, supply a {@link RequestAuthenticator} function that: +

    +
  1. validates the URL that is requesting authentication, and
  2. +
  3. returns the appropriate credentials (username and password)
  4. +
+

+ +

For example, to authenticate both to a proxy and a downstream web server: +

+     Connection session = Jsoup.newSession()
+         .proxy("proxy.example.com", 8080)
+         .auth(auth -> {
+             if (auth.isServer()) { // provide credentials for the request url
+                 Validate.isTrue(auth.url().getHost().equals("example.com"));
+                 // check that we're sending credentials were we expect, and not redirected out
+                 return auth.credentials("username", "password");
+             } else { // auth.isProxy()
+                 return auth.credentials("proxy-user", "proxy-password");
+             }
+         });
+
+     Connection.Response response = session.newRequest("https://example.com/adminzone/").execute();
+     
+

+ +

The system may cache the authentication and use it for subsequent requests to the same resource.

+ +

Implementation notes

+

For compatibility, on a Java 8 platform, authentication is set up via the system-wide default + {@link java.net.Authenticator#setDefault(Authenticator)} method via a ThreadLocal delegator. Whilst the + authenticator used is request specific and thread-safe, if you have other calls to {@code setDefault}, they will be + incompatible with this implementation.

+

On Java 9 and above, the preceding note does not apply; authenticators are directly set on the request.

+

If you are attempting to authenticate to a proxy that uses the {@code basic} scheme and will be fetching HTTPS + URLs, you need to configure your Java platform to enable that, by setting the + {@code jdk.http.auth.tunneling.disabledSchemes} system property to {@code ""}. + This must be executed prior to any authorization attempts. E.g.: +

+     static {
+        System.setProperty("jdk.http.auth.tunneling.disabledSchemes", "");
+        // removes Basic, which is otherwise excluded from auth for CONNECT tunnels
+     }
+

+ * @param authenticator the authenticator to use in this connection + * @return this Connection, for chaining + * @since 1.17.1 + */ + default Connection auth(@Nullable RequestAuthenticator authenticator) { + throw new UnsupportedOperationException(); + } + /** * Execute the request as a GET, and parse the result. * @return parsed Document @@ -699,6 +781,27 @@ interface Request extends Base { */ String postDataCharset(); + /** + Set the authenticator to use for this request. + See {@link Connection#auth(RequestAuthenticator) Connection.auth(authenticator)} for examples and + implementation notes. + * @param authenticator the authenticator + * @return this Request, for chaining. + * @since 1.17.1 + */ + default Request auth(@Nullable RequestAuthenticator authenticator) { + throw new UnsupportedOperationException(); + } + + /** + Get the RequestAuthenticator, if any, that will be used on this request. + * @return the RequestAuthenticator, or {@code null} if not set + * @since 1.17.1 + */ + @Nullable + default RequestAuthenticator auth() { + throw new UnsupportedOperationException(); + } } /** diff --git a/src/main/java/org/jsoup/helper/AuthenticationHandler.java b/src/main/java/org/jsoup/helper/AuthenticationHandler.java new file mode 100644 index 0000000000..0dade52bce --- /dev/null +++ b/src/main/java/org/jsoup/helper/AuthenticationHandler.java @@ -0,0 +1,90 @@ +package org.jsoup.helper; + +import javax.annotation.Nullable; +import java.lang.reflect.Constructor; +import java.net.Authenticator; +import java.net.HttpURLConnection; +import java.net.PasswordAuthentication; + +/** + Handles per request Authenticator-based authentication. Loads the class `org.jsoup.helper.RequestAuthHandler` if + per-request Authenticators are supported (Java 9+), or installs a system-wide Authenticator that delegates to a request + ThreadLocal. + */ +class AuthenticationHandler extends Authenticator { + static final int MaxAttempts = 5; // max authentication attempts per request. allows for multiple auths (e.g. proxy and server) in one request, but saves otherwise 20 requests if credentials are incorrect. + static AuthShim handler; + + static { + try { + //noinspection unchecked + Class perRequestClass = (Class) Class.forName("org.jsoup.helper.RequestAuthHandler"); + Constructor constructor = perRequestClass.getConstructor(); + handler = constructor.newInstance(); + } catch (ClassNotFoundException e) { + handler = new GlobalHandler(); + } catch (Exception e) { + throw new IllegalStateException(e); + } + } + + @Nullable RequestAuthenticator auth; + int attemptCount = 0; + + AuthenticationHandler() {} + + AuthenticationHandler(RequestAuthenticator auth) { + this.auth = auth; + } + + /** + Authentication callback, called by HttpURLConnection - either as system-wide default (Java 8) or per HttpURLConnection (Java 9+) + * @return credentials, or null if not attempting to auth. + */ + @Nullable @Override public final PasswordAuthentication getPasswordAuthentication() { + AuthenticationHandler delegate = handler.get(this); + if (delegate == null) return null; // this request has no auth handler + delegate.attemptCount++; + // if the password returned fails, Java will repeatedly retry the request with a new password auth hit (because + // it may be an interactive prompt, and the user could eventually get it right). But in Jsoup's context, the + // auth will either be correct or not, so just abandon + if (delegate.attemptCount > MaxAttempts) + return null; + if (delegate.auth == null) + return null; // detached - would have been the Global Authenticator (not a delegate) + + RequestAuthenticator.Context ctx = new RequestAuthenticator.Context( + this.getRequestingURL(), this.getRequestorType(), this.getRequestingPrompt()); + return delegate.auth.authenticate(ctx); + } + + interface AuthShim { + void enable(RequestAuthenticator auth, HttpURLConnection con); + + void remove(); + + @Nullable AuthenticationHandler get(AuthenticationHandler helper); + } + + /** + On Java 8 we install a system-wide Authenticator, which pulls the delegating Auth from a ThreadLocal pool. + */ + static class GlobalHandler implements AuthShim { + static ThreadLocal authenticators = new ThreadLocal<>(); + static { + Authenticator.setDefault(new AuthenticationHandler()); + } + + @Override public void enable(RequestAuthenticator auth, HttpURLConnection con) { + authenticators.set(new AuthenticationHandler(auth)); + } + + @Override public void remove() { + authenticators.remove(); + } + + @Override public AuthenticationHandler get(AuthenticationHandler helper) { + return authenticators.get(); + } + } +} diff --git a/src/main/java/org/jsoup/helper/HttpConnection.java b/src/main/java/org/jsoup/helper/HttpConnection.java index 6bc52e1c7a..03cbf358fa 100644 --- a/src/main/java/org/jsoup/helper/HttpConnection.java +++ b/src/main/java/org/jsoup/helper/HttpConnection.java @@ -377,6 +377,10 @@ public Connection postDataCharset(String charset) { return this; } + @Override public Connection auth(RequestAuthenticator authenticator) { + req.auth(authenticator); + return this; + } @SuppressWarnings("unchecked") private static abstract class Base> implements Connection.Base { @@ -596,6 +600,7 @@ public static class Request extends HttpConnection.Base impl private String postDataCharset = DataUtil.defaultCharsetName; private @Nullable SSLSocketFactory sslSocketFactory; private CookieManager cookieManager; + private @Nullable RequestAuthenticator authenticator; private volatile boolean executing = false; Request() { @@ -626,6 +631,7 @@ public static class Request extends HttpConnection.Base impl parserDefined = copy.parserDefined; sslSocketFactory = copy.sslSocketFactory; // these are all synchronized so safe to share cookieManager = copy.cookieManager; + authenticator = copy.authenticator; executing = false; } @@ -764,6 +770,15 @@ public String postDataCharset() { CookieManager cookieManager() { return cookieManager; } + + @Override public Connection.Request auth(@Nullable RequestAuthenticator authenticator) { + this.authenticator = authenticator; + return this; + } + + @Override @Nullable public RequestAuthenticator auth() { + return authenticator; + } } public static class Response extends HttpConnection.Base implements Connection.Response { @@ -898,6 +913,10 @@ else if (methodHasBody) throw e; } finally { req.executing = false; + + // detach any thread local auth delegate + if (req.authenticator != null) + AuthenticationHandler.handler.remove(); } res.executed = true; @@ -1008,6 +1027,8 @@ private static HttpURLConnection createConnection(HttpConnection.Request req) th if (req.sslSocketFactory() != null && conn instanceof HttpsURLConnection) ((HttpsURLConnection) conn).setSSLSocketFactory(req.sslSocketFactory()); + if (req.authenticator != null) + AuthenticationHandler.handler.enable(req.authenticator, conn); // removed in finally if (req.method().hasBody()) conn.setDoOutput(true); CookieUtil.applyCookiesToRequest(req, conn); // from the Request key/val cookies and the Cookie Store diff --git a/src/main/java/org/jsoup/helper/RequestAuthenticator.java b/src/main/java/org/jsoup/helper/RequestAuthenticator.java new file mode 100644 index 0000000000..3284f8dc7a --- /dev/null +++ b/src/main/java/org/jsoup/helper/RequestAuthenticator.java @@ -0,0 +1,92 @@ +package org.jsoup.helper; + +import org.jsoup.Connection; + +import javax.annotation.Nullable; +import java.net.Authenticator; +import java.net.PasswordAuthentication; +import java.net.URL; + +/** + A {@code RequestAuthenticator} is used in {@link Connection} to authenticate if required to proxies and web + servers. See {@link Connection#auth(RequestAuthenticator)}. + */ +@FunctionalInterface +public interface RequestAuthenticator { + + /** + Provide authentication credentials for the provided Request Context. + * @param auth the request context including URL, type (Server or Proxy), and realm. + * @return credentials for the request. May return {@code null} if they are not applicable -- but the request will + * likely fail, as this method is only called if the request asked for authentication. + */ + @Nullable + PasswordAuthentication authenticate(Context auth); + + /** + Provides details for the request, to determine the appropriate credentials to return. + */ + class Context { + private final URL url; + private final Authenticator.RequestorType type; + private final String realm; + + Context(URL url, Authenticator.RequestorType type, String realm) { + this.url = url; + this.type = type; + this.realm = realm; + } + + /** + Get he URL that is being requested. + * @return URL + */ + public URL url() { + return url; + } + + /** + Get the requestor type: {@link Authenticator.RequestorType#PROXY PROXY} if a proxy is requesting + authentication, or {@link Authenticator.RequestorType#SERVER SERVER} if the URL's server is requesting. + * @return requestor type + */ + public Authenticator.RequestorType type() { + return type; + } + + /** + Get the realm of the authentication request. + * @return realm of the authentication request + */ + public String realm() { + return realm; + } + + /** + Gets if the authentication request is for a proxy. + * @return true if type==proxy. + */ + public boolean isProxy() { + return type == Authenticator.RequestorType.PROXY; + } + + /** + Gets if the authentication request is for a server. + * @return true if type==server. + */ + public boolean isServer() { + return type == Authenticator.RequestorType.SERVER; + } + + /** + Helper method to return a PasswordAuthentication object. + * @param username username credential + * @param password password credential + * @return a constructed PasswordAuthentication + */ + public PasswordAuthentication credentials(String username, String password) { + return new PasswordAuthentication(username, password.toCharArray()); + } + } + +} diff --git a/src/main/java9/org/jsoup/helper/RequestAuthHandler.java b/src/main/java9/org/jsoup/helper/RequestAuthHandler.java new file mode 100644 index 0000000000..0df80de209 --- /dev/null +++ b/src/main/java9/org/jsoup/helper/RequestAuthHandler.java @@ -0,0 +1,24 @@ +package org.jsoup.helper; + +import java.net.HttpURLConnection; + +/** + A per-request authentication shim, used in Java 9+. + */ +class RequestAuthHandler implements AuthenticationHandler.AuthShim { + public RequestAuthHandler() {} + + @Override public void enable(RequestAuthenticator auth, HttpURLConnection con) { + AuthenticationHandler authenticator = new AuthenticationHandler(auth); + con.setAuthenticator(authenticator); + } + + @Override public void remove() { + // noop; would remove thread-local in Global Handler + } + + @Override public AuthenticationHandler get(AuthenticationHandler helper) { + // would get thread-local in Global Handler + return helper; + } +} diff --git a/src/test/java/org/jsoup/helper/AuthenticationHandlerTest.java b/src/test/java/org/jsoup/helper/AuthenticationHandlerTest.java new file mode 100644 index 0000000000..c0dd692d37 --- /dev/null +++ b/src/test/java/org/jsoup/helper/AuthenticationHandlerTest.java @@ -0,0 +1,7 @@ +package org.jsoup.helper; + +public class AuthenticationHandlerTest { + public static final int MaxAttempts = AuthenticationHandler.MaxAttempts; + + // tests are in ConnectionTest, ProxyTest. This class just makes the MaxAttempts visible for test. +} diff --git a/src/test/java/org/jsoup/helper/HttpConnectionTest.java b/src/test/java/org/jsoup/helper/HttpConnectionTest.java index 8df0f80397..8ee6a16600 100644 --- a/src/test/java/org/jsoup/helper/HttpConnectionTest.java +++ b/src/test/java/org/jsoup/helper/HttpConnectionTest.java @@ -6,8 +6,11 @@ import org.jsoup.integration.ParseTest; import org.junit.jupiter.api.Test; +import javax.annotation.Nullable; import java.io.IOException; +import java.net.Authenticator; import java.net.MalformedURLException; +import java.net.PasswordAuthentication; import java.net.URL; import java.util.ArrayList; import java.util.Collection; @@ -373,4 +376,37 @@ public void caseInsensitiveHeaders(Locale locale) { String actual = connect.request().header("Key"); assertEquals(value, actual); } + + @Test void setAuth() throws MalformedURLException { + Connection con = Jsoup.newSession(); + + assertNull(con.request().auth()); + + RequestAuthenticator auth1 = new RequestAuthenticator() { + @Override public PasswordAuthentication authenticate(Context auth) { + return auth.credentials("foo", "bar"); + } + }; + + RequestAuthenticator auth2 = new RequestAuthenticator() { + @Override public PasswordAuthentication authenticate(Context auth) { + return auth.credentials("qux", "baz"); + } + }; + + con.auth(auth1); + assertSame(con.request().auth(), auth1); + + con.auth(auth2); + assertSame(con.request().auth(), auth2); + + con.request().auth(auth1); + assertSame(con.request().auth(), auth1); + + PasswordAuthentication creds = auth1.authenticate( + new RequestAuthenticator.Context(new URL("http://example.com"), Authenticator.RequestorType.SERVER, "Realm")); + assertNotNull(creds); + assertEquals("foo", creds.getUserName()); + assertEquals("bar", new String(creds.getPassword())); + } } diff --git a/src/test/java/org/jsoup/integration/ConnectTest.java b/src/test/java/org/jsoup/integration/ConnectTest.java index 424ba4877d..612ea600e4 100644 --- a/src/test/java/org/jsoup/integration/ConnectTest.java +++ b/src/test/java/org/jsoup/integration/ConnectTest.java @@ -19,17 +19,21 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; +import javax.servlet.http.HttpServletResponse; import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.net.Authenticator; import java.net.MalformedURLException; import java.net.URL; import java.net.URLDecoder; import java.nio.file.Files; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Stream; +import static org.jsoup.helper.AuthenticationHandlerTest.MaxAttempts; import static org.jsoup.helper.HttpConnection.CONTENT_TYPE; import static org.jsoup.helper.HttpConnection.MULTIPART_FORM_DATA; import static org.jsoup.integration.UrlConnectTest.browserUa; @@ -745,11 +749,11 @@ public void maxBodySizeInReadToByteBuffer() throws IOException { assertEquals("", ihVal("Query String", resultDoc)); // new request to echo, should not have form data, but should have cookies from implicit session - Document newEcho = submit.newRequest().url(echoUrl).get(); + Document newEcho = submit.newRequest(echoUrl).get(); assertEquals("One=EchoServlet; One=Root", ihVal("Cookie", newEcho)); assertEquals("", ihVal("Query String", newEcho)); - Document cookieDoc = submit.newRequest().url(cookieUrl).get(); + Document cookieDoc = submit.newRequest(cookieUrl).get(); assertEquals("CookieServlet", ihVal("One", cookieDoc)); // different cookie path } @@ -760,7 +764,7 @@ public void maxBodySizeInReadToByteBuffer() throws IOException { String startUrl = FileServlet.urlTo("/htmltests/form-tests.html"); Connection session = Jsoup.newSession(); - Document loginDoc = session.newRequest().url(startUrl).get(); + Document loginDoc = session.newRequest(startUrl).get(); FormElement form = loginDoc.expectForm("#login2"); assertNotNull(form); String username = "admin"; @@ -777,7 +781,7 @@ public void maxBodySizeInReadToByteBuffer() throws IOException { assertEquals(Connection.Method.POST, postRes.method()); Document resultDoc = postRes.parse(); - Document echo2 = resultDoc.connection().newRequest().url(echoUrl).get(); + Document echo2 = resultDoc.connection().newRequest(echoUrl).get(); assertEquals("", ihVal("Query String", echo2)); // should not re-send the data assertEquals("One=EchoServlet; One=Root", ihVal("Cookie", echo2)); } @@ -804,4 +808,70 @@ public void maxBodySizeInReadToByteBuffer() throws IOException { private static Stream echoUrls() { return Stream.of(EchoServlet.Url, EchoServlet.TlsUrl); } + + @ParameterizedTest @MethodSource("echoUrls") + void failsIfNotAuthenticated(String url) throws IOException { + String password = AuthFilter.newServerPassword(); // we don't send it, but ensures cache won't hit + Connection.Response res = Jsoup.connect(url) + .header(AuthFilter.WantsServerAuthentication, "1") + .ignoreHttpErrors(true) + .execute(); + + assertEquals(401, res.statusCode()); + } + + @ParameterizedTest @MethodSource("echoUrls") + void canAuthenticate(String url) throws IOException { + AtomicInteger count = new AtomicInteger(0); + String password = AuthFilter.newServerPassword(); + Connection.Response res = Jsoup.connect(url) + .header(AuthFilter.WantsServerAuthentication, "1") + .auth(ctx -> { + count.incrementAndGet(); + assertEquals(Authenticator.RequestorType.SERVER, ctx.type()); + assertEquals("localhost", ctx.url().getHost()); + assertEquals(AuthFilter.ServerRealm, ctx.realm()); + + return ctx.credentials(AuthFilter.ServerUser, password); + }) + .execute(); + + assertEquals(1, count.get()); + + Document doc = res.parse(); + assertTrue(ihVal("Authorization", doc).startsWith("Basic ")); // tests we set the auth header + } + + @ParameterizedTest @MethodSource("echoUrls") + void incorrectAuth(String url) throws IOException { + Connection session = Jsoup.newSession() + .header(AuthFilter.WantsServerAuthentication, "1") + .ignoreHttpErrors(true); + + String password = AuthFilter.newServerPassword(); + int code = session.newRequest(url).execute().statusCode(); // no auth sent + assertEquals(HttpServletResponse.SC_UNAUTHORIZED, code); + + AtomicInteger count = new AtomicInteger(0); + Connection.Response res = session.newRequest(url) + .auth(ctx -> { + count.incrementAndGet(); + return ctx.credentials(AuthFilter.ServerUser, password + "wrong"); // incorrect + }) + .execute(); + assertEquals(MaxAttempts, count.get()); + assertEquals(HttpServletResponse.SC_UNAUTHORIZED, res.statusCode()); + + AtomicInteger successCount = new AtomicInteger(0); + Connection.Response successRes = session.newRequest(url) + .auth(ctx -> { + successCount.incrementAndGet(); + return ctx.credentials(AuthFilter.ServerUser, password); // correct + }) + .execute(); + assertEquals(1, successCount.get()); + assertEquals(HttpServletResponse.SC_OK, successRes.statusCode()); + } + + // proxy connection tests are in ProxyTest } diff --git a/src/test/java/org/jsoup/integration/ProxyTest.java b/src/test/java/org/jsoup/integration/ProxyTest.java index a02bb18ef5..18c1a60b49 100644 --- a/src/test/java/org/jsoup/integration/ProxyTest.java +++ b/src/test/java/org/jsoup/integration/ProxyTest.java @@ -2,6 +2,7 @@ import org.jsoup.Connection; import org.jsoup.Jsoup; +import org.jsoup.integration.servlets.AuthFilter; import org.jsoup.integration.servlets.EchoServlet; import org.jsoup.integration.servlets.FileServlet; import org.jsoup.integration.servlets.HelloServlet; @@ -14,11 +15,14 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; +import javax.servlet.http.HttpServletResponse; import java.io.IOException; +import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Stream; +import static org.jsoup.helper.AuthenticationHandlerTest.MaxAttempts; import static org.jsoup.integration.ConnectTest.ihVal; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; /** Tests Jsoup.connect proxy support */ @@ -49,6 +53,10 @@ private static Stream helloUrls() { return Stream.of(HelloServlet.Url, HelloServlet.TlsUrl); } + private static Stream echoUrls() { + return Stream.of(EchoServlet.Url, EchoServlet.TlsUrl); + } + private static void assertVia(Connection.Response res) { assertEquals(res.header("Via"), ProxyServlet.Via); } @@ -71,18 +79,92 @@ private static void assertVia(Connection.Response res) { @Test void proxyForSession() throws IOException { Connection session = Jsoup.newSession().proxy(proxy.hostname, proxy.port); - Connection.Response medRes = session.newRequest().url(FileServlet.urlTo("/htmltests/medium.html")).execute(); - Connection.Response largeRes = session.newRequest().url(FileServlet.urlTo("/htmltests/large.html")).execute(); + Connection.Response medRes = session.newRequest(FileServlet.urlTo("/htmltests/medium.html")).execute(); + Connection.Response largeRes = session.newRequest(FileServlet.urlTo("/htmltests/large.html")).execute(); assertVia(medRes); assertVia(largeRes); assertEquals("Medium HTML", medRes.parse().title()); assertEquals("Large HTML", largeRes.parse().title()); - Connection.Response smedRes = session.newRequest().url(FileServlet.tlsUrlTo("/htmltests/medium.html")).execute(); - Connection.Response slargeRes = session.newRequest().url(FileServlet.tlsUrlTo("/htmltests/large.html")).execute(); + Connection.Response smedRes = session.newRequest(FileServlet.tlsUrlTo("/htmltests/medium.html")).execute(); + Connection.Response slargeRes = session.newRequest(FileServlet.tlsUrlTo("/htmltests/large.html")).execute(); assertEquals("Medium HTML", smedRes.parse().title()); assertEquals("Large HTML", slargeRes.parse().title()); } + + @ParameterizedTest @MethodSource("echoUrls") + void canAuthenticateToProxy(String url) throws IOException { + int closed = TestServer.closeAuthedProxyConnections(); // reset any existing authed connections from previous tests, so we can test the auth flow + + // the proxy wants auth, but not the server. HTTP and HTTPS, so tests direct proxy and CONNECT + Connection session = Jsoup.newSession() + .proxy(proxy.hostname, proxy.authedPort).ignoreHttpErrors(true); + String password = AuthFilter.newProxyPassword(); + + // fail first + try { + Connection.Response execute = session.newRequest(url) + .execute(); + int code = execute.statusCode(); // no auth sent + assertEquals(HttpServletResponse.SC_PROXY_AUTHENTICATION_REQUIRED, code); + } catch (IOException e) { + // in CONNECT (for the HTTPS url), URLConnection will throw the proxy connect as a Stringly typed IO exception - "Unable to tunnel through proxy. Proxy returns "HTTP/1.1 407 Proxy Authentication Required"". (Not a response code) + assertTrue(e.getMessage().contains("407")); + } + + try { + AtomicInteger count = new AtomicInteger(0); + Connection.Response res = session.newRequest(url) + .auth(ctx -> { + count.incrementAndGet(); + return ctx.credentials(AuthFilter.ProxyUser, password + "wrong"); // incorrect + }) + .execute(); + assertEquals(MaxAttempts, count.get()); + assertEquals(HttpServletResponse.SC_PROXY_AUTHENTICATION_REQUIRED, res.statusCode()); + } catch (IOException e) { + assertTrue(e.getMessage().contains("407")); + } + + AtomicInteger successCount = new AtomicInteger(0); + Connection.Response successRes = session.newRequest(url) + .auth(ctx -> { + successCount.incrementAndGet(); + return ctx.credentials(AuthFilter.ProxyUser, password); // correct + }) + .execute(); + assertEquals(1, successCount.get()); + assertEquals(HttpServletResponse.SC_OK, successRes.statusCode()); + } + + @ParameterizedTest @MethodSource("echoUrls") + void canAuthToProxyAndServer(String url) throws IOException { + String serverPassword = AuthFilter.newServerPassword(); + String proxyPassword = AuthFilter.newProxyPassword(); + AtomicInteger count = new AtomicInteger(0); + + Connection session = Jsoup.newSession() // both proxy and server will want auth + .proxy(proxy.hostname, proxy.authedPort) + .header(AuthFilter.WantsServerAuthentication, "1") + .auth(auth -> { + count.incrementAndGet(); + + if (auth.isServer()) { + assertEquals(url, auth.url().toString()); + assertEquals(AuthFilter.ServerRealm, auth.realm()); + return auth.credentials(AuthFilter.ServerUser, serverPassword); + } else { + assertTrue(auth.isProxy()); + return auth.credentials(AuthFilter.ProxyUser, proxyPassword); + } + }); + + + Connection.Response res = session.newRequest(url).execute(); + assertEquals(200, res.statusCode()); + assertEquals(2, count.get()); // hit server and proxy auth stages + assertEquals("Webserver Environment Variables", res.parse().title()); + } } diff --git a/src/test/java/org/jsoup/integration/TestServer.java b/src/test/java/org/jsoup/integration/TestServer.java index 67b7b84769..777ccd3050 100644 --- a/src/test/java/org/jsoup/integration/TestServer.java +++ b/src/test/java/org/jsoup/integration/TestServer.java @@ -9,8 +9,11 @@ import org.eclipse.jetty.server.ServerConnector; import org.eclipse.jetty.server.SslConnectionFactory; import org.eclipse.jetty.server.handler.HandlerWrapper; +import org.eclipse.jetty.servlet.FilterHolder; +import org.eclipse.jetty.servlet.FilterMapping; import org.eclipse.jetty.servlet.ServletHandler; import org.eclipse.jetty.util.ssl.SslContextFactory; +import org.jsoup.integration.servlets.AuthFilter; import org.jsoup.integration.servlets.BaseServlet; import org.jsoup.integration.servlets.ProxyServlet; @@ -29,6 +32,7 @@ import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; import java.security.cert.CertificateException; +import java.util.concurrent.atomic.AtomicInteger; public class TestServer { static int Port; @@ -40,10 +44,11 @@ public class TestServer { private static final Server Jetty = newServer(); private static final ServletHandler JettyHandler = new ServletHandler(); private static final Server Proxy = newServer(); + private static final Server AuthedProxy = newServer(); private static final HandlerWrapper ProxyHandler = new HandlerWrapper(); + private static final HandlerWrapper AuthedProxyHandler = new HandlerWrapper(); private static final ProxySettings ProxySettings = new ProxySettings(); - private static Server newServer() { return new Server(new InetSocketAddress(Localhost, 0)); } @@ -51,6 +56,7 @@ private static Server newServer() { static { Jetty.setHandler(JettyHandler); Proxy.setHandler(ProxyHandler); + AuthedProxy.setHandler(AuthedProxyHandler); // TLS setup: try { @@ -72,19 +78,39 @@ public static void start() { try { Jetty.start(); + JettyHandler.addFilterWithMapping(new FilterHolder(new AuthFilter(false, false)), "/*", FilterMapping.ALL); Connector[] jcons = Jetty.getConnectors(); Port = ((ServerConnector) jcons[0]).getLocalPort(); TlsPort = ((ServerConnector) jcons[1]).getLocalPort(); - ProxyHandler.setHandler(ProxyServlet.createHandler()); + ProxyHandler.setHandler(ProxyServlet.createHandler(false)); // includes proxy, CONNECT proxy, and Auth filters Proxy.start(); ProxySettings.port = ((ServerConnector) Proxy.getConnectors()[0]).getLocalPort(); + + AuthedProxyHandler.setHandler(ProxyServlet.createHandler(true)); + AuthedProxy.start(); + ProxySettings.authedPort = ((ServerConnector) AuthedProxy.getConnectors()[0]).getLocalPort(); } catch (Exception e) { throw new IllegalStateException(e); } } } + /** + Close any current connections to the authed proxy. Tunneled connections only authenticate in their first + CONNECT, and may be kept alive and reused. So when we want to test unauthed - authed flows, we need to disconnect + them first. + */ + static int closeAuthedProxyConnections() { + ServerConnector connector = (ServerConnector) AuthedProxy.getConnectors()[0]; + AtomicInteger count = new AtomicInteger(); + connector.getConnectedEndPoints().forEach(endPoint -> { + endPoint.close(); + count.getAndIncrement(); + }); + return count.get(); + } + public static ServletUrls map(Class servletClass) { synchronized (Jetty) { if (!Jetty.isStarted()) @@ -122,6 +148,7 @@ public static ProxySettings proxySettings() { public static class ProxySettings { final String hostname = Localhost; int port; + int authedPort; } private static void addHttpsConnector(File keystoreFile, Server server) { diff --git a/src/test/java/org/jsoup/integration/servlets/AuthFilter.java b/src/test/java/org/jsoup/integration/servlets/AuthFilter.java new file mode 100644 index 0000000000..7ad700924f --- /dev/null +++ b/src/test/java/org/jsoup/integration/servlets/AuthFilter.java @@ -0,0 +1,107 @@ +package org.jsoup.integration.servlets; + +import javax.servlet.Filter; +import javax.servlet.FilterChain; +import javax.servlet.FilterConfig; +import javax.servlet.ServletException; +import javax.servlet.ServletRequest; +import javax.servlet.ServletResponse; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Base64; + +/** + A filter to test basic authenticated requests. If the request header "X-Wants-Authentication" is set, or if + alwaysWantsAuth is enabled, the filter is invoked, and requests must send the correct user authentication details. + */ +public class AuthFilter implements Filter { + public static final String WantsServerAuthentication = "X-Wants-ServerAuthentication"; + public static final String ServerUser = "admin"; + public static final String ServerRealm = "jsoup test server authentication realm"; + private static volatile String ServerPassword = newServerPassword(); + + public static final String WantsProxyAuthentication = "X-Wants-ProxyAuthentication"; + public static final String ProxyUser = "foxyproxy"; + public static final String ProxyRealm = "jsoup test proxy authentication realm"; + private static volatile String ProxyPassword = newProxyPassword(); + + private final boolean alwaysWantsAuth; // we run a particular port that always wants auth - so the CONNECT tunnels can be authed. (The Java proxy tunnel CONNECT request strips the wants-auth headers) + private final boolean forProxy; + private final String wantsHeader; + private final String authorizationHeader; + + /** + Creates an Authentication Filter with hardcoded credential expectations. + * @param alwaysWantsAuth true if this filter should always check for authentication, regardless of the Wants Auth header + * @param forProxy true if this wraps a Proxy and should use Proxy-Authenticate headers, credentials etc. False + * if wrapping the web server. + */ + public AuthFilter(boolean alwaysWantsAuth, boolean forProxy) { + this.alwaysWantsAuth = alwaysWantsAuth; + this.forProxy = forProxy; + + wantsHeader = forProxy ? WantsProxyAuthentication : WantsServerAuthentication; + authorizationHeader = forProxy ? "Proxy-Authorization" : "Authorization"; + } + + private static String newPassword() { + return "pass-" + Math.random(); + } + + // passwords get rotated in tests so that Java's auth cache is invalidated and a new auth callback occurs. + // requires tests hitting these are called serially. + public static String newServerPassword() { + return ServerPassword = newPassword() + "-server"; + } + + public static String newProxyPassword() { + return ProxyPassword = newPassword() + "-proxy"; + } + + @Override public void init(FilterConfig filterConfig) throws ServletException {} + + @Override + public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException { + HttpServletRequest req = (HttpServletRequest) request; + HttpServletResponse res = (HttpServletResponse) response; + + boolean accessGranted = checkAuth(req); + if (accessGranted) { + chain.doFilter(request, response); + return; + } + + // Wants but failed auth - send appropriate header: + if (forProxy) { + res.setHeader("Proxy-Authenticate", "Basic realm=\"" + ProxyRealm + "\""); + // ^^ Duped in ProxyServlet for CONNECT + res.sendError(HttpServletResponse.SC_PROXY_AUTHENTICATION_REQUIRED); + } else { + res.setHeader("WWW-Authenticate", "Basic realm=\"" + ServerRealm + "\""); + res.sendError(HttpServletResponse.SC_UNAUTHORIZED); + } + } + + @Override public void destroy() {} + + public boolean checkAuth(HttpServletRequest req) { + if (alwaysWantsAuth || req.getHeader(wantsHeader) != null) { + String authHeader = req.getHeader(authorizationHeader); + if (authHeader != null) { + int space = authHeader.indexOf(' '); + if (space > 0) { + String value = authHeader.substring(space + 1); + String expected = forProxy ? + (ProxyUser + ":" + ProxyPassword) : + (ServerUser + ":" + ServerPassword); + String base64 = Base64.getEncoder().encodeToString(expected.getBytes(StandardCharsets.UTF_8)); + return base64.equals(value); // if passed auth + } + } + return false; // unexpected header value + } + return true; // auth not required + } +} diff --git a/src/test/java/org/jsoup/integration/servlets/ProxyServlet.java b/src/test/java/org/jsoup/integration/servlets/ProxyServlet.java index 5fda42c428..bdd360bcf2 100644 --- a/src/test/java/org/jsoup/integration/servlets/ProxyServlet.java +++ b/src/test/java/org/jsoup/integration/servlets/ProxyServlet.java @@ -4,6 +4,8 @@ import org.eclipse.jetty.proxy.AsyncProxyServlet; import org.eclipse.jetty.proxy.ConnectHandler; import org.eclipse.jetty.server.Handler; +import org.eclipse.jetty.servlet.FilterHolder; +import org.eclipse.jetty.servlet.FilterMapping; import org.eclipse.jetty.servlet.ServletHandler; import org.eclipse.jetty.servlet.ServletHolder; import org.jsoup.integration.TestServer; @@ -11,15 +13,24 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import static org.jsoup.integration.servlets.AuthFilter.ProxyRealm; + public class ProxyServlet extends AsyncProxyServlet { public static TestServer.ProxySettings ProxySettings = TestServer.proxySettings(); public static String Via = "1.1 jsoup test proxy"; - public static Handler createHandler() { + static { + System.setProperty("jdk.http.auth.tunneling.disabledSchemes", ""); + // removes Basic, which is otherwise excluded from auth for CONNECT tunnels + } + + public static Handler createHandler(boolean alwaysAuth) { // ConnectHandler wraps this ProxyServlet and handles CONNECT, which sets up a tunnel for HTTPS requests and is // opaque to the proxy. The ProxyServlet handles simple HTTP requests. - ConnectHandler connectHandler = new ConnectHandler(); + AuthFilter authFilter = new AuthFilter(alwaysAuth, true); + ConnectHandler connectHandler = new ConnectProxy(authFilter); ServletHandler proxyHandler = new ServletHandler(); + proxyHandler.addFilterWithMapping(new FilterHolder(authFilter), "/*", FilterMapping.ALL); // auth for HTTP proxy ServletHolder proxyServletHolder = new ServletHolder(ProxyServlet.class); // Holder wraps as it requires maxThreads initialization proxyServletHolder.setAsyncSupported(true); proxyServletHolder.setInitParameter("maxThreads", "8"); @@ -34,4 +45,25 @@ protected void onServerResponseHeaders(HttpServletRequest clientRequest, HttpSer super.onServerResponseHeaders(clientRequest, proxyResponse, serverResponse); proxyResponse.addHeader("Via", Via); } + + /** Supports CONNECT tunnels */ + static class ConnectProxy extends ConnectHandler { + final AuthFilter authFilter; + + public ConnectProxy(AuthFilter authFilter) { + this.authFilter = authFilter; + } + + @Override + protected boolean handleAuthentication(HttpServletRequest req, HttpServletResponse res, String address) { + boolean accessGranted = authFilter.checkAuth(req); + //System.err.println("CONNECT AUTH: " + accessGranted); + + // need to add the desired auth header if not granted. Returning false here will also send 407 header + if (!accessGranted) { + res.setHeader("Proxy-Authenticate", "Basic realm=\"" + ProxyRealm + "\""); + } + return accessGranted; + } + } }