From 1123dd210a4b06b56986029386e5562aad63aa0d Mon Sep 17 00:00:00 2001
From: Jonathan Hedley
Date: Fri, 10 Nov 2023 10:48:01 +1100
Subject: [PATCH] Added support for per-request authentication to Jsoup.connect
(#2046)
Added support for per-request authentication
Uses the multi-version support so that in Java versions that support it (9+), an authenticator is set via `java.net.HttpURLConnection.setAuthenticator()`.
On Java 8, we set the system-wide default authenticator, and use ThreadLocals to enable per-request authenticators.
Also adds tests for HTTP and HTTPS server and proxy basic authentication.
---
CHANGES | 3 +
pom.xml | 1 +
src/main/java/org/jsoup/Connection.java | 103 +++++++++++++++++
.../jsoup/helper/AuthenticationHandler.java | 90 +++++++++++++++
.../java/org/jsoup/helper/HttpConnection.java | 21 ++++
.../jsoup/helper/RequestAuthenticator.java | 92 +++++++++++++++
.../org/jsoup/helper/RequestAuthHandler.java | 24 ++++
.../helper/AuthenticationHandlerTest.java | 7 ++
.../org/jsoup/helper/HttpConnectionTest.java | 36 ++++++
.../org/jsoup/integration/ConnectTest.java | 78 ++++++++++++-
.../java/org/jsoup/integration/ProxyTest.java | 92 ++++++++++++++-
.../org/jsoup/integration/TestServer.java | 31 ++++-
.../integration/servlets/AuthFilter.java | 107 ++++++++++++++++++
.../integration/servlets/ProxyServlet.java | 36 +++++-
14 files changed, 708 insertions(+), 13 deletions(-)
create mode 100644 src/main/java/org/jsoup/helper/AuthenticationHandler.java
create mode 100644 src/main/java/org/jsoup/helper/RequestAuthenticator.java
create mode 100644 src/main/java9/org/jsoup/helper/RequestAuthHandler.java
create mode 100644 src/test/java/org/jsoup/helper/AuthenticationHandlerTest.java
create mode 100644 src/test/java/org/jsoup/integration/servlets/AuthFilter.java
diff --git a/CHANGES b/CHANGES
index ab2de95a32..2bbe999171 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,9 @@
jsoup changelog
Release 1.17.1 [PENDING]
+ * Improvement: in Jsoup.connect(), added support for request-level authentication, supporting authentication to
+ proxies and to servers.
+
* Improvement: in the Elements list, added direct support for `#set(index, element)`, `#remove(index)`,
`#remove(object)`, `#clear()`, `#removeAll(collection)`, `#retainAll(collection)`, `#removeIf(filter)`,
`#replaceAll(operator)`. These methods update the original DOM, as well as the Elements list.
diff --git a/pom.xml b/pom.xml
index 5a0e1dd006..4facf9e4ee 100644
--- a/pom.xml
+++ b/pom.xml
@@ -94,6 +94,7 @@
java.io.UncheckedIOException
java.util.function.Predicate
java.util.function.UnaryOperator
+ java.net.HttpURLConnection
diff --git a/src/main/java/org/jsoup/Connection.java b/src/main/java/org/jsoup/Connection.java
index 04d55c30dd..43d302cc7a 100644
--- a/src/main/java/org/jsoup/Connection.java
+++ b/src/main/java/org/jsoup/Connection.java
@@ -1,5 +1,6 @@
package org.jsoup;
+import org.jsoup.helper.RequestAuthenticator;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
@@ -9,6 +10,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
+import java.net.Authenticator;
import java.net.CookieStore;
import java.net.Proxy;
import java.net.URL;
@@ -69,6 +71,28 @@ public final boolean hasBody() {
*/
Connection newRequest();
+ /**
+ Creates a new request, using this Connection as the session-state and to initialize the connection settings (which
+ may then be independently changed on the returned {@link Connection.Request} object).
+ @return a new Connection object, with a shared Cookie Store and initialized settings from this Connection and Request
+ @param url URL for the new request
+ @since 1.17.1
+ */
+ default Connection newRequest(String url) {
+ return newRequest().url(url);
+ }
+
+ /**
+ Creates a new request, using this Connection as the session-state and to initialize the connection settings (which
+ may then be independently changed on the returned {@link Connection.Request} object).
+ @return a new Connection object, with a shared Cookie Store and initialized settings from this Connection and Request
+ @param url URL for the new request
+ @since 1.17.1
+ */
+ default Connection newRequest(URL url) {
+ return newRequest().url(url);
+ }
+
/**
* Set the request URL to fetch. The protocol must be HTTP or HTTPS.
* @param url URL to connect to
@@ -322,6 +346,64 @@ For GET requests, data parameters will be sent on the request query string. F
*/
Connection postDataCharset(String charset);
+ /**
+ Set the authenticator to use for this connection, enabling requests to URLs, and via proxies, that require
+ authentication credentials.
+
The authentication scheme used is automatically detected during the request execution.
+ Supported schemes (subject to the platform) are {@code basic}, {@code digest}, {@code NTLM},
+ and {@code Kerberos}.
+
+ To use, supply a {@link RequestAuthenticator} function that:
+
+ - validates the URL that is requesting authentication, and
+ - returns the appropriate credentials (username and password)
+
+
+
+ For example, to authenticate both to a proxy and a downstream web server:
+
+ Connection session = Jsoup.newSession()
+ .proxy("proxy.example.com", 8080)
+ .auth(auth -> {
+ if (auth.isServer()) { // provide credentials for the request url
+ Validate.isTrue(auth.url().getHost().equals("example.com"));
+ // check that we're sending credentials were we expect, and not redirected out
+ return auth.credentials("username", "password");
+ } else { // auth.isProxy()
+ return auth.credentials("proxy-user", "proxy-password");
+ }
+ });
+
+ Connection.Response response = session.newRequest("https://example.com/adminzone/").execute();
+
+
+
+ The system may cache the authentication and use it for subsequent requests to the same resource.
+
+ Implementation notes
+ For compatibility, on a Java 8 platform, authentication is set up via the system-wide default
+ {@link java.net.Authenticator#setDefault(Authenticator)} method via a ThreadLocal delegator. Whilst the
+ authenticator used is request specific and thread-safe, if you have other calls to {@code setDefault}, they will be
+ incompatible with this implementation.
+ On Java 9 and above, the preceding note does not apply; authenticators are directly set on the request.
+ If you are attempting to authenticate to a proxy that uses the {@code basic} scheme and will be fetching HTTPS
+ URLs, you need to configure your Java platform to enable that, by setting the
+ {@code jdk.http.auth.tunneling.disabledSchemes} system property to {@code ""}.
+ This must be executed prior to any authorization attempts. E.g.:
+
+ static {
+ System.setProperty("jdk.http.auth.tunneling.disabledSchemes", "");
+ // removes Basic, which is otherwise excluded from auth for CONNECT tunnels
+ }
+
+ * @param authenticator the authenticator to use in this connection
+ * @return this Connection, for chaining
+ * @since 1.17.1
+ */
+ default Connection auth(@Nullable RequestAuthenticator authenticator) {
+ throw new UnsupportedOperationException();
+ }
+
/**
* Execute the request as a GET, and parse the result.
* @return parsed Document
@@ -699,6 +781,27 @@ interface Request extends Base {
*/
String postDataCharset();
+ /**
+ Set the authenticator to use for this request.
+ See {@link Connection#auth(RequestAuthenticator) Connection.auth(authenticator)} for examples and
+ implementation notes.
+ * @param authenticator the authenticator
+ * @return this Request, for chaining.
+ * @since 1.17.1
+ */
+ default Request auth(@Nullable RequestAuthenticator authenticator) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ Get the RequestAuthenticator, if any, that will be used on this request.
+ * @return the RequestAuthenticator, or {@code null} if not set
+ * @since 1.17.1
+ */
+ @Nullable
+ default RequestAuthenticator auth() {
+ throw new UnsupportedOperationException();
+ }
}
/**
diff --git a/src/main/java/org/jsoup/helper/AuthenticationHandler.java b/src/main/java/org/jsoup/helper/AuthenticationHandler.java
new file mode 100644
index 0000000000..0dade52bce
--- /dev/null
+++ b/src/main/java/org/jsoup/helper/AuthenticationHandler.java
@@ -0,0 +1,90 @@
+package org.jsoup.helper;
+
+import javax.annotation.Nullable;
+import java.lang.reflect.Constructor;
+import java.net.Authenticator;
+import java.net.HttpURLConnection;
+import java.net.PasswordAuthentication;
+
+/**
+ Handles per request Authenticator-based authentication. Loads the class `org.jsoup.helper.RequestAuthHandler` if
+ per-request Authenticators are supported (Java 9+), or installs a system-wide Authenticator that delegates to a request
+ ThreadLocal.
+ */
+class AuthenticationHandler extends Authenticator {
+ static final int MaxAttempts = 5; // max authentication attempts per request. allows for multiple auths (e.g. proxy and server) in one request, but saves otherwise 20 requests if credentials are incorrect.
+ static AuthShim handler;
+
+ static {
+ try {
+ //noinspection unchecked
+ Class perRequestClass = (Class) Class.forName("org.jsoup.helper.RequestAuthHandler");
+ Constructor constructor = perRequestClass.getConstructor();
+ handler = constructor.newInstance();
+ } catch (ClassNotFoundException e) {
+ handler = new GlobalHandler();
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ @Nullable RequestAuthenticator auth;
+ int attemptCount = 0;
+
+ AuthenticationHandler() {}
+
+ AuthenticationHandler(RequestAuthenticator auth) {
+ this.auth = auth;
+ }
+
+ /**
+ Authentication callback, called by HttpURLConnection - either as system-wide default (Java 8) or per HttpURLConnection (Java 9+)
+ * @return credentials, or null if not attempting to auth.
+ */
+ @Nullable @Override public final PasswordAuthentication getPasswordAuthentication() {
+ AuthenticationHandler delegate = handler.get(this);
+ if (delegate == null) return null; // this request has no auth handler
+ delegate.attemptCount++;
+ // if the password returned fails, Java will repeatedly retry the request with a new password auth hit (because
+ // it may be an interactive prompt, and the user could eventually get it right). But in Jsoup's context, the
+ // auth will either be correct or not, so just abandon
+ if (delegate.attemptCount > MaxAttempts)
+ return null;
+ if (delegate.auth == null)
+ return null; // detached - would have been the Global Authenticator (not a delegate)
+
+ RequestAuthenticator.Context ctx = new RequestAuthenticator.Context(
+ this.getRequestingURL(), this.getRequestorType(), this.getRequestingPrompt());
+ return delegate.auth.authenticate(ctx);
+ }
+
+ interface AuthShim {
+ void enable(RequestAuthenticator auth, HttpURLConnection con);
+
+ void remove();
+
+ @Nullable AuthenticationHandler get(AuthenticationHandler helper);
+ }
+
+ /**
+ On Java 8 we install a system-wide Authenticator, which pulls the delegating Auth from a ThreadLocal pool.
+ */
+ static class GlobalHandler implements AuthShim {
+ static ThreadLocal authenticators = new ThreadLocal<>();
+ static {
+ Authenticator.setDefault(new AuthenticationHandler());
+ }
+
+ @Override public void enable(RequestAuthenticator auth, HttpURLConnection con) {
+ authenticators.set(new AuthenticationHandler(auth));
+ }
+
+ @Override public void remove() {
+ authenticators.remove();
+ }
+
+ @Override public AuthenticationHandler get(AuthenticationHandler helper) {
+ return authenticators.get();
+ }
+ }
+}
diff --git a/src/main/java/org/jsoup/helper/HttpConnection.java b/src/main/java/org/jsoup/helper/HttpConnection.java
index 6bc52e1c7a..03cbf358fa 100644
--- a/src/main/java/org/jsoup/helper/HttpConnection.java
+++ b/src/main/java/org/jsoup/helper/HttpConnection.java
@@ -377,6 +377,10 @@ public Connection postDataCharset(String charset) {
return this;
}
+ @Override public Connection auth(RequestAuthenticator authenticator) {
+ req.auth(authenticator);
+ return this;
+ }
@SuppressWarnings("unchecked")
private static abstract class Base> implements Connection.Base {
@@ -596,6 +600,7 @@ public static class Request extends HttpConnection.Base impl
private String postDataCharset = DataUtil.defaultCharsetName;
private @Nullable SSLSocketFactory sslSocketFactory;
private CookieManager cookieManager;
+ private @Nullable RequestAuthenticator authenticator;
private volatile boolean executing = false;
Request() {
@@ -626,6 +631,7 @@ public static class Request extends HttpConnection.Base impl
parserDefined = copy.parserDefined;
sslSocketFactory = copy.sslSocketFactory; // these are all synchronized so safe to share
cookieManager = copy.cookieManager;
+ authenticator = copy.authenticator;
executing = false;
}
@@ -764,6 +770,15 @@ public String postDataCharset() {
CookieManager cookieManager() {
return cookieManager;
}
+
+ @Override public Connection.Request auth(@Nullable RequestAuthenticator authenticator) {
+ this.authenticator = authenticator;
+ return this;
+ }
+
+ @Override @Nullable public RequestAuthenticator auth() {
+ return authenticator;
+ }
}
public static class Response extends HttpConnection.Base implements Connection.Response {
@@ -898,6 +913,10 @@ else if (methodHasBody)
throw e;
} finally {
req.executing = false;
+
+ // detach any thread local auth delegate
+ if (req.authenticator != null)
+ AuthenticationHandler.handler.remove();
}
res.executed = true;
@@ -1008,6 +1027,8 @@ private static HttpURLConnection createConnection(HttpConnection.Request req) th
if (req.sslSocketFactory() != null && conn instanceof HttpsURLConnection)
((HttpsURLConnection) conn).setSSLSocketFactory(req.sslSocketFactory());
+ if (req.authenticator != null)
+ AuthenticationHandler.handler.enable(req.authenticator, conn); // removed in finally
if (req.method().hasBody())
conn.setDoOutput(true);
CookieUtil.applyCookiesToRequest(req, conn); // from the Request key/val cookies and the Cookie Store
diff --git a/src/main/java/org/jsoup/helper/RequestAuthenticator.java b/src/main/java/org/jsoup/helper/RequestAuthenticator.java
new file mode 100644
index 0000000000..3284f8dc7a
--- /dev/null
+++ b/src/main/java/org/jsoup/helper/RequestAuthenticator.java
@@ -0,0 +1,92 @@
+package org.jsoup.helper;
+
+import org.jsoup.Connection;
+
+import javax.annotation.Nullable;
+import java.net.Authenticator;
+import java.net.PasswordAuthentication;
+import java.net.URL;
+
+/**
+ A {@code RequestAuthenticator} is used in {@link Connection} to authenticate if required to proxies and web
+ servers. See {@link Connection#auth(RequestAuthenticator)}.
+ */
+@FunctionalInterface
+public interface RequestAuthenticator {
+
+ /**
+ Provide authentication credentials for the provided Request Context.
+ * @param auth the request context including URL, type (Server or Proxy), and realm.
+ * @return credentials for the request. May return {@code null} if they are not applicable -- but the request will
+ * likely fail, as this method is only called if the request asked for authentication.
+ */
+ @Nullable
+ PasswordAuthentication authenticate(Context auth);
+
+ /**
+ Provides details for the request, to determine the appropriate credentials to return.
+ */
+ class Context {
+ private final URL url;
+ private final Authenticator.RequestorType type;
+ private final String realm;
+
+ Context(URL url, Authenticator.RequestorType type, String realm) {
+ this.url = url;
+ this.type = type;
+ this.realm = realm;
+ }
+
+ /**
+ Get he URL that is being requested.
+ * @return URL
+ */
+ public URL url() {
+ return url;
+ }
+
+ /**
+ Get the requestor type: {@link Authenticator.RequestorType#PROXY PROXY} if a proxy is requesting
+ authentication, or {@link Authenticator.RequestorType#SERVER SERVER} if the URL's server is requesting.
+ * @return requestor type
+ */
+ public Authenticator.RequestorType type() {
+ return type;
+ }
+
+ /**
+ Get the realm of the authentication request.
+ * @return realm of the authentication request
+ */
+ public String realm() {
+ return realm;
+ }
+
+ /**
+ Gets if the authentication request is for a proxy.
+ * @return true if type==proxy.
+ */
+ public boolean isProxy() {
+ return type == Authenticator.RequestorType.PROXY;
+ }
+
+ /**
+ Gets if the authentication request is for a server.
+ * @return true if type==server.
+ */
+ public boolean isServer() {
+ return type == Authenticator.RequestorType.SERVER;
+ }
+
+ /**
+ Helper method to return a PasswordAuthentication object.
+ * @param username username credential
+ * @param password password credential
+ * @return a constructed PasswordAuthentication
+ */
+ public PasswordAuthentication credentials(String username, String password) {
+ return new PasswordAuthentication(username, password.toCharArray());
+ }
+ }
+
+}
diff --git a/src/main/java9/org/jsoup/helper/RequestAuthHandler.java b/src/main/java9/org/jsoup/helper/RequestAuthHandler.java
new file mode 100644
index 0000000000..0df80de209
--- /dev/null
+++ b/src/main/java9/org/jsoup/helper/RequestAuthHandler.java
@@ -0,0 +1,24 @@
+package org.jsoup.helper;
+
+import java.net.HttpURLConnection;
+
+/**
+ A per-request authentication shim, used in Java 9+.
+ */
+class RequestAuthHandler implements AuthenticationHandler.AuthShim {
+ public RequestAuthHandler() {}
+
+ @Override public void enable(RequestAuthenticator auth, HttpURLConnection con) {
+ AuthenticationHandler authenticator = new AuthenticationHandler(auth);
+ con.setAuthenticator(authenticator);
+ }
+
+ @Override public void remove() {
+ // noop; would remove thread-local in Global Handler
+ }
+
+ @Override public AuthenticationHandler get(AuthenticationHandler helper) {
+ // would get thread-local in Global Handler
+ return helper;
+ }
+}
diff --git a/src/test/java/org/jsoup/helper/AuthenticationHandlerTest.java b/src/test/java/org/jsoup/helper/AuthenticationHandlerTest.java
new file mode 100644
index 0000000000..c0dd692d37
--- /dev/null
+++ b/src/test/java/org/jsoup/helper/AuthenticationHandlerTest.java
@@ -0,0 +1,7 @@
+package org.jsoup.helper;
+
+public class AuthenticationHandlerTest {
+ public static final int MaxAttempts = AuthenticationHandler.MaxAttempts;
+
+ // tests are in ConnectionTest, ProxyTest. This class just makes the MaxAttempts visible for test.
+}
diff --git a/src/test/java/org/jsoup/helper/HttpConnectionTest.java b/src/test/java/org/jsoup/helper/HttpConnectionTest.java
index 8df0f80397..8ee6a16600 100644
--- a/src/test/java/org/jsoup/helper/HttpConnectionTest.java
+++ b/src/test/java/org/jsoup/helper/HttpConnectionTest.java
@@ -6,8 +6,11 @@
import org.jsoup.integration.ParseTest;
import org.junit.jupiter.api.Test;
+import javax.annotation.Nullable;
import java.io.IOException;
+import java.net.Authenticator;
import java.net.MalformedURLException;
+import java.net.PasswordAuthentication;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
@@ -373,4 +376,37 @@ public void caseInsensitiveHeaders(Locale locale) {
String actual = connect.request().header("Key");
assertEquals(value, actual);
}
+
+ @Test void setAuth() throws MalformedURLException {
+ Connection con = Jsoup.newSession();
+
+ assertNull(con.request().auth());
+
+ RequestAuthenticator auth1 = new RequestAuthenticator() {
+ @Override public PasswordAuthentication authenticate(Context auth) {
+ return auth.credentials("foo", "bar");
+ }
+ };
+
+ RequestAuthenticator auth2 = new RequestAuthenticator() {
+ @Override public PasswordAuthentication authenticate(Context auth) {
+ return auth.credentials("qux", "baz");
+ }
+ };
+
+ con.auth(auth1);
+ assertSame(con.request().auth(), auth1);
+
+ con.auth(auth2);
+ assertSame(con.request().auth(), auth2);
+
+ con.request().auth(auth1);
+ assertSame(con.request().auth(), auth1);
+
+ PasswordAuthentication creds = auth1.authenticate(
+ new RequestAuthenticator.Context(new URL("http://example.com"), Authenticator.RequestorType.SERVER, "Realm"));
+ assertNotNull(creds);
+ assertEquals("foo", creds.getUserName());
+ assertEquals("bar", new String(creds.getPassword()));
+ }
}
diff --git a/src/test/java/org/jsoup/integration/ConnectTest.java b/src/test/java/org/jsoup/integration/ConnectTest.java
index 424ba4877d..612ea600e4 100644
--- a/src/test/java/org/jsoup/integration/ConnectTest.java
+++ b/src/test/java/org/jsoup/integration/ConnectTest.java
@@ -19,17 +19,21 @@
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;
+import javax.servlet.http.HttpServletResponse;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
+import java.net.Authenticator;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.nio.file.Files;
import java.util.List;
import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Stream;
+import static org.jsoup.helper.AuthenticationHandlerTest.MaxAttempts;
import static org.jsoup.helper.HttpConnection.CONTENT_TYPE;
import static org.jsoup.helper.HttpConnection.MULTIPART_FORM_DATA;
import static org.jsoup.integration.UrlConnectTest.browserUa;
@@ -745,11 +749,11 @@ public void maxBodySizeInReadToByteBuffer() throws IOException {
assertEquals("", ihVal("Query String", resultDoc));
// new request to echo, should not have form data, but should have cookies from implicit session
- Document newEcho = submit.newRequest().url(echoUrl).get();
+ Document newEcho = submit.newRequest(echoUrl).get();
assertEquals("One=EchoServlet; One=Root", ihVal("Cookie", newEcho));
assertEquals("", ihVal("Query String", newEcho));
- Document cookieDoc = submit.newRequest().url(cookieUrl).get();
+ Document cookieDoc = submit.newRequest(cookieUrl).get();
assertEquals("CookieServlet", ihVal("One", cookieDoc)); // different cookie path
}
@@ -760,7 +764,7 @@ public void maxBodySizeInReadToByteBuffer() throws IOException {
String startUrl = FileServlet.urlTo("/htmltests/form-tests.html");
Connection session = Jsoup.newSession();
- Document loginDoc = session.newRequest().url(startUrl).get();
+ Document loginDoc = session.newRequest(startUrl).get();
FormElement form = loginDoc.expectForm("#login2");
assertNotNull(form);
String username = "admin";
@@ -777,7 +781,7 @@ public void maxBodySizeInReadToByteBuffer() throws IOException {
assertEquals(Connection.Method.POST, postRes.method());
Document resultDoc = postRes.parse();
- Document echo2 = resultDoc.connection().newRequest().url(echoUrl).get();
+ Document echo2 = resultDoc.connection().newRequest(echoUrl).get();
assertEquals("", ihVal("Query String", echo2)); // should not re-send the data
assertEquals("One=EchoServlet; One=Root", ihVal("Cookie", echo2));
}
@@ -804,4 +808,70 @@ public void maxBodySizeInReadToByteBuffer() throws IOException {
private static Stream echoUrls() {
return Stream.of(EchoServlet.Url, EchoServlet.TlsUrl);
}
+
+ @ParameterizedTest @MethodSource("echoUrls")
+ void failsIfNotAuthenticated(String url) throws IOException {
+ String password = AuthFilter.newServerPassword(); // we don't send it, but ensures cache won't hit
+ Connection.Response res = Jsoup.connect(url)
+ .header(AuthFilter.WantsServerAuthentication, "1")
+ .ignoreHttpErrors(true)
+ .execute();
+
+ assertEquals(401, res.statusCode());
+ }
+
+ @ParameterizedTest @MethodSource("echoUrls")
+ void canAuthenticate(String url) throws IOException {
+ AtomicInteger count = new AtomicInteger(0);
+ String password = AuthFilter.newServerPassword();
+ Connection.Response res = Jsoup.connect(url)
+ .header(AuthFilter.WantsServerAuthentication, "1")
+ .auth(ctx -> {
+ count.incrementAndGet();
+ assertEquals(Authenticator.RequestorType.SERVER, ctx.type());
+ assertEquals("localhost", ctx.url().getHost());
+ assertEquals(AuthFilter.ServerRealm, ctx.realm());
+
+ return ctx.credentials(AuthFilter.ServerUser, password);
+ })
+ .execute();
+
+ assertEquals(1, count.get());
+
+ Document doc = res.parse();
+ assertTrue(ihVal("Authorization", doc).startsWith("Basic ")); // tests we set the auth header
+ }
+
+ @ParameterizedTest @MethodSource("echoUrls")
+ void incorrectAuth(String url) throws IOException {
+ Connection session = Jsoup.newSession()
+ .header(AuthFilter.WantsServerAuthentication, "1")
+ .ignoreHttpErrors(true);
+
+ String password = AuthFilter.newServerPassword();
+ int code = session.newRequest(url).execute().statusCode(); // no auth sent
+ assertEquals(HttpServletResponse.SC_UNAUTHORIZED, code);
+
+ AtomicInteger count = new AtomicInteger(0);
+ Connection.Response res = session.newRequest(url)
+ .auth(ctx -> {
+ count.incrementAndGet();
+ return ctx.credentials(AuthFilter.ServerUser, password + "wrong"); // incorrect
+ })
+ .execute();
+ assertEquals(MaxAttempts, count.get());
+ assertEquals(HttpServletResponse.SC_UNAUTHORIZED, res.statusCode());
+
+ AtomicInteger successCount = new AtomicInteger(0);
+ Connection.Response successRes = session.newRequest(url)
+ .auth(ctx -> {
+ successCount.incrementAndGet();
+ return ctx.credentials(AuthFilter.ServerUser, password); // correct
+ })
+ .execute();
+ assertEquals(1, successCount.get());
+ assertEquals(HttpServletResponse.SC_OK, successRes.statusCode());
+ }
+
+ // proxy connection tests are in ProxyTest
}
diff --git a/src/test/java/org/jsoup/integration/ProxyTest.java b/src/test/java/org/jsoup/integration/ProxyTest.java
index a02bb18ef5..18c1a60b49 100644
--- a/src/test/java/org/jsoup/integration/ProxyTest.java
+++ b/src/test/java/org/jsoup/integration/ProxyTest.java
@@ -2,6 +2,7 @@
import org.jsoup.Connection;
import org.jsoup.Jsoup;
+import org.jsoup.integration.servlets.AuthFilter;
import org.jsoup.integration.servlets.EchoServlet;
import org.jsoup.integration.servlets.FileServlet;
import org.jsoup.integration.servlets.HelloServlet;
@@ -14,11 +15,14 @@
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;
+import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
+import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Stream;
+import static org.jsoup.helper.AuthenticationHandlerTest.MaxAttempts;
import static org.jsoup.integration.ConnectTest.ihVal;
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.*;
/**
Tests Jsoup.connect proxy support */
@@ -49,6 +53,10 @@ private static Stream helloUrls() {
return Stream.of(HelloServlet.Url, HelloServlet.TlsUrl);
}
+ private static Stream echoUrls() {
+ return Stream.of(EchoServlet.Url, EchoServlet.TlsUrl);
+ }
+
private static void assertVia(Connection.Response res) {
assertEquals(res.header("Via"), ProxyServlet.Via);
}
@@ -71,18 +79,92 @@ private static void assertVia(Connection.Response res) {
@Test void proxyForSession() throws IOException {
Connection session = Jsoup.newSession().proxy(proxy.hostname, proxy.port);
- Connection.Response medRes = session.newRequest().url(FileServlet.urlTo("/htmltests/medium.html")).execute();
- Connection.Response largeRes = session.newRequest().url(FileServlet.urlTo("/htmltests/large.html")).execute();
+ Connection.Response medRes = session.newRequest(FileServlet.urlTo("/htmltests/medium.html")).execute();
+ Connection.Response largeRes = session.newRequest(FileServlet.urlTo("/htmltests/large.html")).execute();
assertVia(medRes);
assertVia(largeRes);
assertEquals("Medium HTML", medRes.parse().title());
assertEquals("Large HTML", largeRes.parse().title());
- Connection.Response smedRes = session.newRequest().url(FileServlet.tlsUrlTo("/htmltests/medium.html")).execute();
- Connection.Response slargeRes = session.newRequest().url(FileServlet.tlsUrlTo("/htmltests/large.html")).execute();
+ Connection.Response smedRes = session.newRequest(FileServlet.tlsUrlTo("/htmltests/medium.html")).execute();
+ Connection.Response slargeRes = session.newRequest(FileServlet.tlsUrlTo("/htmltests/large.html")).execute();
assertEquals("Medium HTML", smedRes.parse().title());
assertEquals("Large HTML", slargeRes.parse().title());
}
+
+ @ParameterizedTest @MethodSource("echoUrls")
+ void canAuthenticateToProxy(String url) throws IOException {
+ int closed = TestServer.closeAuthedProxyConnections(); // reset any existing authed connections from previous tests, so we can test the auth flow
+
+ // the proxy wants auth, but not the server. HTTP and HTTPS, so tests direct proxy and CONNECT
+ Connection session = Jsoup.newSession()
+ .proxy(proxy.hostname, proxy.authedPort).ignoreHttpErrors(true);
+ String password = AuthFilter.newProxyPassword();
+
+ // fail first
+ try {
+ Connection.Response execute = session.newRequest(url)
+ .execute();
+ int code = execute.statusCode(); // no auth sent
+ assertEquals(HttpServletResponse.SC_PROXY_AUTHENTICATION_REQUIRED, code);
+ } catch (IOException e) {
+ // in CONNECT (for the HTTPS url), URLConnection will throw the proxy connect as a Stringly typed IO exception - "Unable to tunnel through proxy. Proxy returns "HTTP/1.1 407 Proxy Authentication Required"". (Not a response code)
+ assertTrue(e.getMessage().contains("407"));
+ }
+
+ try {
+ AtomicInteger count = new AtomicInteger(0);
+ Connection.Response res = session.newRequest(url)
+ .auth(ctx -> {
+ count.incrementAndGet();
+ return ctx.credentials(AuthFilter.ProxyUser, password + "wrong"); // incorrect
+ })
+ .execute();
+ assertEquals(MaxAttempts, count.get());
+ assertEquals(HttpServletResponse.SC_PROXY_AUTHENTICATION_REQUIRED, res.statusCode());
+ } catch (IOException e) {
+ assertTrue(e.getMessage().contains("407"));
+ }
+
+ AtomicInteger successCount = new AtomicInteger(0);
+ Connection.Response successRes = session.newRequest(url)
+ .auth(ctx -> {
+ successCount.incrementAndGet();
+ return ctx.credentials(AuthFilter.ProxyUser, password); // correct
+ })
+ .execute();
+ assertEquals(1, successCount.get());
+ assertEquals(HttpServletResponse.SC_OK, successRes.statusCode());
+ }
+
+ @ParameterizedTest @MethodSource("echoUrls")
+ void canAuthToProxyAndServer(String url) throws IOException {
+ String serverPassword = AuthFilter.newServerPassword();
+ String proxyPassword = AuthFilter.newProxyPassword();
+ AtomicInteger count = new AtomicInteger(0);
+
+ Connection session = Jsoup.newSession() // both proxy and server will want auth
+ .proxy(proxy.hostname, proxy.authedPort)
+ .header(AuthFilter.WantsServerAuthentication, "1")
+ .auth(auth -> {
+ count.incrementAndGet();
+
+ if (auth.isServer()) {
+ assertEquals(url, auth.url().toString());
+ assertEquals(AuthFilter.ServerRealm, auth.realm());
+ return auth.credentials(AuthFilter.ServerUser, serverPassword);
+ } else {
+ assertTrue(auth.isProxy());
+ return auth.credentials(AuthFilter.ProxyUser, proxyPassword);
+ }
+ });
+
+
+ Connection.Response res = session.newRequest(url).execute();
+ assertEquals(200, res.statusCode());
+ assertEquals(2, count.get()); // hit server and proxy auth stages
+ assertEquals("Webserver Environment Variables", res.parse().title());
+ }
}
diff --git a/src/test/java/org/jsoup/integration/TestServer.java b/src/test/java/org/jsoup/integration/TestServer.java
index 67b7b84769..777ccd3050 100644
--- a/src/test/java/org/jsoup/integration/TestServer.java
+++ b/src/test/java/org/jsoup/integration/TestServer.java
@@ -9,8 +9,11 @@
import org.eclipse.jetty.server.ServerConnector;
import org.eclipse.jetty.server.SslConnectionFactory;
import org.eclipse.jetty.server.handler.HandlerWrapper;
+import org.eclipse.jetty.servlet.FilterHolder;
+import org.eclipse.jetty.servlet.FilterMapping;
import org.eclipse.jetty.servlet.ServletHandler;
import org.eclipse.jetty.util.ssl.SslContextFactory;
+import org.jsoup.integration.servlets.AuthFilter;
import org.jsoup.integration.servlets.BaseServlet;
import org.jsoup.integration.servlets.ProxyServlet;
@@ -29,6 +32,7 @@
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateException;
+import java.util.concurrent.atomic.AtomicInteger;
public class TestServer {
static int Port;
@@ -40,10 +44,11 @@ public class TestServer {
private static final Server Jetty = newServer();
private static final ServletHandler JettyHandler = new ServletHandler();
private static final Server Proxy = newServer();
+ private static final Server AuthedProxy = newServer();
private static final HandlerWrapper ProxyHandler = new HandlerWrapper();
+ private static final HandlerWrapper AuthedProxyHandler = new HandlerWrapper();
private static final ProxySettings ProxySettings = new ProxySettings();
-
private static Server newServer() {
return new Server(new InetSocketAddress(Localhost, 0));
}
@@ -51,6 +56,7 @@ private static Server newServer() {
static {
Jetty.setHandler(JettyHandler);
Proxy.setHandler(ProxyHandler);
+ AuthedProxy.setHandler(AuthedProxyHandler);
// TLS setup:
try {
@@ -72,19 +78,39 @@ public static void start() {
try {
Jetty.start();
+ JettyHandler.addFilterWithMapping(new FilterHolder(new AuthFilter(false, false)), "/*", FilterMapping.ALL);
Connector[] jcons = Jetty.getConnectors();
Port = ((ServerConnector) jcons[0]).getLocalPort();
TlsPort = ((ServerConnector) jcons[1]).getLocalPort();
- ProxyHandler.setHandler(ProxyServlet.createHandler());
+ ProxyHandler.setHandler(ProxyServlet.createHandler(false)); // includes proxy, CONNECT proxy, and Auth filters
Proxy.start();
ProxySettings.port = ((ServerConnector) Proxy.getConnectors()[0]).getLocalPort();
+
+ AuthedProxyHandler.setHandler(ProxyServlet.createHandler(true));
+ AuthedProxy.start();
+ ProxySettings.authedPort = ((ServerConnector) AuthedProxy.getConnectors()[0]).getLocalPort();
} catch (Exception e) {
throw new IllegalStateException(e);
}
}
}
+ /**
+ Close any current connections to the authed proxy. Tunneled connections only authenticate in their first
+ CONNECT, and may be kept alive and reused. So when we want to test unauthed - authed flows, we need to disconnect
+ them first.
+ */
+ static int closeAuthedProxyConnections() {
+ ServerConnector connector = (ServerConnector) AuthedProxy.getConnectors()[0];
+ AtomicInteger count = new AtomicInteger();
+ connector.getConnectedEndPoints().forEach(endPoint -> {
+ endPoint.close();
+ count.getAndIncrement();
+ });
+ return count.get();
+ }
+
public static ServletUrls map(Class extends BaseServlet> servletClass) {
synchronized (Jetty) {
if (!Jetty.isStarted())
@@ -122,6 +148,7 @@ public static ProxySettings proxySettings() {
public static class ProxySettings {
final String hostname = Localhost;
int port;
+ int authedPort;
}
private static void addHttpsConnector(File keystoreFile, Server server) {
diff --git a/src/test/java/org/jsoup/integration/servlets/AuthFilter.java b/src/test/java/org/jsoup/integration/servlets/AuthFilter.java
new file mode 100644
index 0000000000..7ad700924f
--- /dev/null
+++ b/src/test/java/org/jsoup/integration/servlets/AuthFilter.java
@@ -0,0 +1,107 @@
+package org.jsoup.integration.servlets;
+
+import javax.servlet.Filter;
+import javax.servlet.FilterChain;
+import javax.servlet.FilterConfig;
+import javax.servlet.ServletException;
+import javax.servlet.ServletRequest;
+import javax.servlet.ServletResponse;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Base64;
+
+/**
+ A filter to test basic authenticated requests. If the request header "X-Wants-Authentication" is set, or if
+ alwaysWantsAuth is enabled, the filter is invoked, and requests must send the correct user authentication details.
+ */
+public class AuthFilter implements Filter {
+ public static final String WantsServerAuthentication = "X-Wants-ServerAuthentication";
+ public static final String ServerUser = "admin";
+ public static final String ServerRealm = "jsoup test server authentication realm";
+ private static volatile String ServerPassword = newServerPassword();
+
+ public static final String WantsProxyAuthentication = "X-Wants-ProxyAuthentication";
+ public static final String ProxyUser = "foxyproxy";
+ public static final String ProxyRealm = "jsoup test proxy authentication realm";
+ private static volatile String ProxyPassword = newProxyPassword();
+
+ private final boolean alwaysWantsAuth; // we run a particular port that always wants auth - so the CONNECT tunnels can be authed. (The Java proxy tunnel CONNECT request strips the wants-auth headers)
+ private final boolean forProxy;
+ private final String wantsHeader;
+ private final String authorizationHeader;
+
+ /**
+ Creates an Authentication Filter with hardcoded credential expectations.
+ * @param alwaysWantsAuth true if this filter should always check for authentication, regardless of the Wants Auth header
+ * @param forProxy true if this wraps a Proxy and should use Proxy-Authenticate headers, credentials etc. False
+ * if wrapping the web server.
+ */
+ public AuthFilter(boolean alwaysWantsAuth, boolean forProxy) {
+ this.alwaysWantsAuth = alwaysWantsAuth;
+ this.forProxy = forProxy;
+
+ wantsHeader = forProxy ? WantsProxyAuthentication : WantsServerAuthentication;
+ authorizationHeader = forProxy ? "Proxy-Authorization" : "Authorization";
+ }
+
+ private static String newPassword() {
+ return "pass-" + Math.random();
+ }
+
+ // passwords get rotated in tests so that Java's auth cache is invalidated and a new auth callback occurs.
+ // requires tests hitting these are called serially.
+ public static String newServerPassword() {
+ return ServerPassword = newPassword() + "-server";
+ }
+
+ public static String newProxyPassword() {
+ return ProxyPassword = newPassword() + "-proxy";
+ }
+
+ @Override public void init(FilterConfig filterConfig) throws ServletException {}
+
+ @Override
+ public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException {
+ HttpServletRequest req = (HttpServletRequest) request;
+ HttpServletResponse res = (HttpServletResponse) response;
+
+ boolean accessGranted = checkAuth(req);
+ if (accessGranted) {
+ chain.doFilter(request, response);
+ return;
+ }
+
+ // Wants but failed auth - send appropriate header:
+ if (forProxy) {
+ res.setHeader("Proxy-Authenticate", "Basic realm=\"" + ProxyRealm + "\"");
+ // ^^ Duped in ProxyServlet for CONNECT
+ res.sendError(HttpServletResponse.SC_PROXY_AUTHENTICATION_REQUIRED);
+ } else {
+ res.setHeader("WWW-Authenticate", "Basic realm=\"" + ServerRealm + "\"");
+ res.sendError(HttpServletResponse.SC_UNAUTHORIZED);
+ }
+ }
+
+ @Override public void destroy() {}
+
+ public boolean checkAuth(HttpServletRequest req) {
+ if (alwaysWantsAuth || req.getHeader(wantsHeader) != null) {
+ String authHeader = req.getHeader(authorizationHeader);
+ if (authHeader != null) {
+ int space = authHeader.indexOf(' ');
+ if (space > 0) {
+ String value = authHeader.substring(space + 1);
+ String expected = forProxy ?
+ (ProxyUser + ":" + ProxyPassword) :
+ (ServerUser + ":" + ServerPassword);
+ String base64 = Base64.getEncoder().encodeToString(expected.getBytes(StandardCharsets.UTF_8));
+ return base64.equals(value); // if passed auth
+ }
+ }
+ return false; // unexpected header value
+ }
+ return true; // auth not required
+ }
+}
diff --git a/src/test/java/org/jsoup/integration/servlets/ProxyServlet.java b/src/test/java/org/jsoup/integration/servlets/ProxyServlet.java
index 5fda42c428..bdd360bcf2 100644
--- a/src/test/java/org/jsoup/integration/servlets/ProxyServlet.java
+++ b/src/test/java/org/jsoup/integration/servlets/ProxyServlet.java
@@ -4,6 +4,8 @@
import org.eclipse.jetty.proxy.AsyncProxyServlet;
import org.eclipse.jetty.proxy.ConnectHandler;
import org.eclipse.jetty.server.Handler;
+import org.eclipse.jetty.servlet.FilterHolder;
+import org.eclipse.jetty.servlet.FilterMapping;
import org.eclipse.jetty.servlet.ServletHandler;
import org.eclipse.jetty.servlet.ServletHolder;
import org.jsoup.integration.TestServer;
@@ -11,15 +13,24 @@
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
+import static org.jsoup.integration.servlets.AuthFilter.ProxyRealm;
+
public class ProxyServlet extends AsyncProxyServlet {
public static TestServer.ProxySettings ProxySettings = TestServer.proxySettings();
public static String Via = "1.1 jsoup test proxy";
- public static Handler createHandler() {
+ static {
+ System.setProperty("jdk.http.auth.tunneling.disabledSchemes", "");
+ // removes Basic, which is otherwise excluded from auth for CONNECT tunnels
+ }
+
+ public static Handler createHandler(boolean alwaysAuth) {
// ConnectHandler wraps this ProxyServlet and handles CONNECT, which sets up a tunnel for HTTPS requests and is
// opaque to the proxy. The ProxyServlet handles simple HTTP requests.
- ConnectHandler connectHandler = new ConnectHandler();
+ AuthFilter authFilter = new AuthFilter(alwaysAuth, true);
+ ConnectHandler connectHandler = new ConnectProxy(authFilter);
ServletHandler proxyHandler = new ServletHandler();
+ proxyHandler.addFilterWithMapping(new FilterHolder(authFilter), "/*", FilterMapping.ALL); // auth for HTTP proxy
ServletHolder proxyServletHolder = new ServletHolder(ProxyServlet.class); // Holder wraps as it requires maxThreads initialization
proxyServletHolder.setAsyncSupported(true);
proxyServletHolder.setInitParameter("maxThreads", "8");
@@ -34,4 +45,25 @@ protected void onServerResponseHeaders(HttpServletRequest clientRequest, HttpSer
super.onServerResponseHeaders(clientRequest, proxyResponse, serverResponse);
proxyResponse.addHeader("Via", Via);
}
+
+ /** Supports CONNECT tunnels */
+ static class ConnectProxy extends ConnectHandler {
+ final AuthFilter authFilter;
+
+ public ConnectProxy(AuthFilter authFilter) {
+ this.authFilter = authFilter;
+ }
+
+ @Override
+ protected boolean handleAuthentication(HttpServletRequest req, HttpServletResponse res, String address) {
+ boolean accessGranted = authFilter.checkAuth(req);
+ //System.err.println("CONNECT AUTH: " + accessGranted);
+
+ // need to add the desired auth header if not granted. Returning false here will also send 407 header
+ if (!accessGranted) {
+ res.setHeader("Proxy-Authenticate", "Basic realm=\"" + ProxyRealm + "\"");
+ }
+ return accessGranted;
+ }
+ }
}