Skip to content

Commit

Permalink
REDIRECT FIXING: * It turns out the redirect field in the CDX is neve…
Browse files Browse the repository at this point in the history
…r used to actually redirect!

* Set the cdx generation code path (HTTPRecordAnnotator) to always set the redirect field to '-'
* Added commented out code to AccessPoint to *enable* using the redirect field, possibly for later use.
* Fixed UrlOperations.resolveUrl(String, String) to return original url with spaces AND \r escaped, even if there was an error resolving
* Added UrlOparations.resolveUrl(String, String, String) to return the default argument specified, in case
of an error
  • Loading branch information
ikreymer committed Sep 5, 2012
1 parent 915af9d commit d258e24
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,8 @@ public void annotateHTTPContent(CaptureSearchResult result,
for (Header httpHeader : headers) {
if (httpHeader.getName().toUpperCase().equals(
UPPER_LOCATION)) {

String locationStr = httpHeader.getValue();
// TODO: "Location" is supposed to be absolute:

// Old Comment: "Location" is supposed to be absolute:
// (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html)
// (section 14.30) but Content-Location can be
// relative.
Expand All @@ -104,9 +103,16 @@ public void annotateHTTPContent(CaptureSearchResult result,
// headers...
// should we prefer one over the other?
// right now, we're ignoring "Content-Location"
result.setRedirectUrl(
UrlOperations.resolveUrl(result.getOriginalUrl(),
locationStr));
//

// NOTE: FILLING THE REDIRECT FIELD IN CDX IS DISABLED!
// If we want to support redirect in cdx as long as the url is valid
// comment out the following lines:

// String locationStr = httpHeader.getValue();
// result.setRedirectUrl(
// UrlOperations.resolveUrl(result.getOriginalUrl(),
// locationStr, "-"));

} else if(httpHeader.getName().toLowerCase().equals("content-type")) {
mimeType = transformHTTPMime(httpHeader.getValue());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ public class UrlOperations {
Pattern.compile("(?:[0-9a-z_.:-]+@)?([0-9a-z_.-]++)");
private static final Pattern USERINFO_REGEX_SIMPLE =
Pattern.compile("^([0-9a-z_.:-]+)(?:@[0-9a-z_.-]++)");

/**
* Tests if the String argument looks like it could be a legitimate
* authority fragment of a URL, that is, is it an IP address, or, are the
Expand All @@ -146,15 +146,33 @@ public static boolean isAuthority(String authString) {
return (m != null) && m.matches();
}

/** Resolve URL, but return a minimally escaped version in case of
* error
* @param baseUrl
* @param url
* @return
*/

public static String resolveUrl(String baseUrl, String url) {
String resolvedUrl = resolveUrl(baseUrl, url, null);
if (resolvedUrl == null) {
resolvedUrl = url.replace(" ", "%20");
resolvedUrl = resolvedUrl.replace("\r", "%0D");
}
return resolvedUrl;
}

/**
* Resolve a possibly relative url argument against a base URL.
* @param baseUrl the base URL against which the url should be resolved
* @param url the URL, possibly relative, to make absolute.
* @return url resolved against baseUrl, unless it is absolute already, and
* further transformed by whatever escaping normally takes place with a
* UURI.
* In case of error, return the defaultValue
*/
public static String resolveUrl(String baseUrl, String url) {
public static String resolveUrl(String baseUrl, String url, String defaultValue) {

for(final String scheme : ALL_SCHEMES) {
if(url.startsWith(scheme)) {
try {
Expand All @@ -163,7 +181,7 @@ public static String resolveUrl(String baseUrl, String url) {
LOGGER.warning(e.getLocalizedMessage() + ": " + url);
// can't let a space exist... send back close to whatever came
// in...
return url.replace(" ", "%20");
return defaultValue;
}
}
}
Expand All @@ -174,8 +192,9 @@ public static String resolveUrl(String baseUrl, String url) {
resolvedURI = UURIFactory.getInstance(absBaseURI, url);
} catch (URIException e) {
LOGGER.warning(e.getLocalizedMessage() + ": " + url);
return url.replace(" ", "%20");
return defaultValue;
}

return resolvedURI.getEscapedURI();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,17 @@ protected void handleReplay(WaybackRequest wbRequest,

closest.setClosest(true);
checkAnchorWindow(wbRequest,closest);

// Support for redirect from the CDX redirectUrl field
// This was the intended use of the redirect field, but has not actually be tested
// To enable this functionality, uncomment the lines below
// This is an optimization that allows for redirects to be handled without loading the original content
//
//String redir = closest.getRedirectUrl();
//if ((redir != null) && !redir.equals("-")) {
// String fullRedirect = getUriConverter().makeReplayURI(closest.getCaptureTimestamp(), redir);
// throw new BetterRequestException(fullRedirect, Integer.valueOf(closest.getHttpCode()));
//}

try {
resource =
Expand Down

0 comments on commit d258e24

Please sign in to comment.