Skip to content

Commit

Permalink
Upgrade 2x Apache httpcomponent libraries used from their old 4.x ver…
Browse files Browse the repository at this point in the history
…ions

to the latest 5.x versions. Required updating imports and rewriting a bit
of code in the CssScanner class.
  • Loading branch information
davewichers committed Mar 30, 2022
1 parent 1bae19f commit b09b02f
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 52 deletions.
44 changes: 8 additions & 36 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -60,48 +60,20 @@
</profiles>

<dependencies>
<!-- dependency>
<groupId>net.sourceforge.nekohtml</groupId>
<artifactId>nekohtml</artifactId>
<version>1.9.22</version>
<exclusions>
<! exclude this as nekohtml uses an older xercesImpl and we want to eliminate the convergence mismatch >
<exclusion>
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
</exclusion>
</exclusions>
</dependency -->
<dependency>
<groupId>net.sourceforge.htmlunit</groupId>
<artifactId>neko-htmlunit</artifactId>
<version>2.60.0</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.13</version>
<exclusions>
<!-- exclude these as httpclient uses older versions of these libraries that we directly import and we want to eliminate the convergence mismatch -->
<exclusion>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</exclusion>
<!-- exclude obsolete commons-logging in favor of jcl-over-slf4j to ensure logs all pipe through slf4j-api -->
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
</exclusion>
</exclusions>
<groupId>org.apache.httpcomponents.client5</groupId>
<artifactId>httpclient5</artifactId>
<version>5.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.4.15</version>
<groupId>org.apache.httpcomponents.core5</groupId>
<artifactId>httpcore5</artifactId>
<version>5.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.xmlgraphics</groupId>
Expand Down Expand Up @@ -280,7 +252,7 @@
<maxJdkVersion>1.8</maxJdkVersion>
<ignoreOptionals>true</ignoreOptionals>
<ignoredScopes>test</ignoredScopes>
<message>Dependencies shouldn't require Java 8+.</message>
<message>Dependencies shouldn't require Java 9+.</message>
</enforceBytecodeVersion>
<requireMavenVersion>
<version>3.3.9</version>
Expand All @@ -296,7 +268,7 @@
<rules>
<requireJavaVersion>
<version>1.7</version>
<message>Antisamy is written to support Java 7+.</message>
<message>Antisamy source code is written to support Java 7+.</message>
</requireJavaVersion>
</rules>
</configuration>
Expand Down
54 changes: 38 additions & 16 deletions src/main/java/org/owasp/validator/css/CssScanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,17 @@

import org.apache.batik.css.parser.ParseException;
import org.apache.batik.css.parser.Parser;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.apache.hc.core5.http.ClassicHttpResponse;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpStatus;
import org.apache.hc.client5.http.ClientProtocolException;
import org.apache.hc.client5.http.classic.HttpClient;
import org.apache.hc.client5.http.config.RequestConfig;
import org.apache.hc.client5.http.classic.methods.HttpGet;
import org.apache.hc.client5.http.impl.classic.HttpClientBuilder;
import org.apache.hc.core5.http.io.HttpClientResponseHandler;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.util.Timeout;
import org.owasp.validator.html.CleanResults;
import org.owasp.validator.html.InternalPolicy;
import org.owasp.validator.html.Policy;
Expand All @@ -70,7 +75,7 @@
*/
public class CssScanner {

protected static final int DEFAULT_TIMEOUT = 1000;
protected static final Timeout DEFAULT_TIMEOUT = Timeout.ofMilliseconds(1000);

private static final String CDATA = "^\\s*<!\\[CDATA\\[(.*)\\]\\]>\\s*$";

Expand Down Expand Up @@ -263,15 +268,14 @@ private void parseImportedStylesheets(LinkedList<URI> stylesheets, List<String>

// Ensure that we have appropriate timeout values so we don't
// get DoSed waiting for returns
int timeout = DEFAULT_TIMEOUT;
Timeout timeout = DEFAULT_TIMEOUT;
try {
timeout = Integer.parseInt(policy.getDirective(Policy.CONNECTION_TIMEOUT));
timeout = Timeout.ofMilliseconds(Long.parseLong(policy.getDirective(Policy.CONNECTION_TIMEOUT)));
} catch (NumberFormatException nfe) {
}

RequestConfig requestConfig = RequestConfig.custom()
.setSocketTimeout(timeout)
.setConnectTimeout(timeout)
.setResponseTimeout(timeout)

This comment has been minimized.

Copy link
@spassarop

spassarop Mar 31, 2022

Collaborator

@davewichers only left response timeout because of the comment above? setConnectTimeout is still valid, from what I've seen in decompiled classes it is used as timeout when opening the socket to connect to target host. After it has a successful socket bind, it sets the response timeout and then performs the actual request. So in total it would be 2 seconds max if everything is that delayed. I can add setConnectTimeout back if it seems reasonable to you, next to the response timeout.

This comment has been minimized.

Copy link
@davewichers

davewichers Mar 31, 2022

Author Collaborator

If I accidentally deleted setConnectTimeout(), but it's still valid, please put it back.

.setConnectionRequestTimeout(timeout)
.build();

Expand Down Expand Up @@ -302,13 +306,33 @@ private void parseImportedStylesheets(LinkedList<URI> stylesheets, List<String>
continue;
}

HttpGet stylesheetRequest = new HttpGet(stylesheetUri);
// Pulled directly from: https://github.com/apache/httpcomponents-client/blob/5.1.x/httpclient5/src/test/java/org/apache/hc/client5/http/examples/ClientWithResponseHandler.java
// Create a custom response handler to read in the stylesheet
final HttpClientResponseHandler<String> responseHandler = new HttpClientResponseHandler<String>() {

@Override
public String handleResponse(
final ClassicHttpResponse response) throws IOException {
final int status = response.getCode();
if (status >= HttpStatus.SC_SUCCESS && status < HttpStatus.SC_REDIRECTION) {
final HttpEntity entity = response.getEntity();
try {
return entity != null ? EntityUtils.toString(entity) : null;
} catch (final ParseException | org.apache.hc.core5.http.ParseException ex) {
throw new ClientProtocolException(ex);
}
} else {
throw new ClientProtocolException("Unexpected response status: " + status);
}
}
};

byte[] stylesheet = null;

try {
String responseBody = httpClient.execute(new HttpGet(stylesheetUri), responseHandler);
// pull down stylesheet, observing size limit
HttpResponse response = httpClient.execute(stylesheetRequest);
stylesheet = EntityUtils.toByteArray(response.getEntity());
stylesheet = responseBody.getBytes();
if (stylesheet != null && stylesheet.length > sizeLimit) {
errorMessages.add(ErrorMessageUtil.getMessage(
messages,
Expand All @@ -323,8 +347,6 @@ private void parseImportedStylesheets(LinkedList<URI> stylesheets, List<String>
messages,
ErrorMessageUtil.ERROR_CSS_IMPORT_FAILURE,
new Object[] { HTMLEntityEncoder.htmlEntityEncode(stylesheetUri.toString()) }));
} finally {
stylesheetRequest.releaseConnection();
}

if (stylesheet != null) {
Expand Down

0 comments on commit b09b02f

Please sign in to comment.