-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Sanitize URLs in file fields to handle invalid pipe characters ('|') #12156
Changes from 4 commits
63f9562
0f1ea59
e12d3d6
547300d
f392c2b
3551dc2
2f390d4
0f8c12d
387331b
49395f2
0051d0b
160d814
0afa80e
e0436f8
62440b3
c7ec658
51a8507
11ec9f4
2e6c548
650272a
5a55611
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
|
||
import java.net.MalformedURLException; | ||
import java.net.URI; | ||
import java.net.URISyntaxException; | ||
import java.net.URL; | ||
import java.net.URLDecoder; | ||
import java.nio.charset.StandardCharsets; | ||
|
@@ -24,8 +25,8 @@ private URLUtil() { | |
* Cleans URLs returned by Google search. | ||
* | ||
* <example> | ||
* If you copy links from search results from Google, all links will be enriched with search meta data, e.g. | ||
* https://www.google.de/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&&url=http%3A%2F%2Fwww.inrg.csie.ntu.edu.tw%2Falgorithm2014%2Fhomework%2FWagner-74.pdf&ei=DifeVYHkDYWqU5W0j6gD&usg=AFQjCNFl638rl5KVta1jIMWLyb4CPSZidg&sig2=0hSSMw9XZXL3HJWwEcJtOg | ||
* If you copy links from search results from Google, all links will be enriched with search meta data, e.g. | ||
* https://www.google.de/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&&url=http%3A%2F%2Fwww.inrg.csie.ntu.edu.tw%2Falgorithm2014%2Fhomework%2FWagner-74.pdf&ei=DifeVYHkDYWqU5W0j6gD&usg=AFQjCNFl638rl5KVta1jIMWLyb4CPSZidg&sig2=0hSSMw9XZXL3HJWwEcJtOg | ||
* </example> | ||
* | ||
* @param url the Google search URL string | ||
|
@@ -39,7 +40,7 @@ public static String cleanGoogleSearchURL(String url) { | |
} | ||
// Extract destination URL | ||
try { | ||
URL searchURL = URI.create(url).toURL(); | ||
URL searchURL = URLUtil.create(url); | ||
// URL parameters | ||
String query = searchURL.getQuery(); | ||
// no parameters | ||
|
@@ -62,7 +63,8 @@ public static String cleanGoogleSearchURL(String url) { | |
} | ||
} | ||
return url; | ||
} catch (MalformedURLException e) { | ||
} catch ( | ||
MalformedURLException e) { | ||
return url; | ||
} | ||
} | ||
|
@@ -77,9 +79,11 @@ public static String cleanGoogleSearchURL(String url) { | |
*/ | ||
public static boolean isURL(String url) { | ||
try { | ||
URI.create(url).toURL(); | ||
URLUtil.create(url); | ||
return true; | ||
} catch (MalformedURLException | IllegalArgumentException e) { | ||
} catch ( | ||
MalformedURLException | | ||
IllegalArgumentException e) { | ||
return false; | ||
} | ||
} | ||
|
@@ -96,11 +100,12 @@ public static Optional<String> getSuffix(final String link, ExternalApplications | |
String strippedLink = link; | ||
try { | ||
// Try to strip the query string, if any, to get the correct suffix: | ||
URL url = URI.create(link).toURL(); | ||
URL url = URLUtil.create(link); | ||
if ((url.getQuery() != null) && (url.getQuery().length() < (link.length() - 1))) { | ||
strippedLink = link.substring(0, link.length() - url.getQuery().length() - 1); | ||
} | ||
} catch (MalformedURLException e) { | ||
} catch ( | ||
MalformedURLException e) { | ||
// Don't report this error, since this getting the suffix is a non-critical | ||
// operation, and this error will be triggered and reported elsewhere. | ||
} | ||
|
@@ -138,4 +143,38 @@ public static Optional<String> getSuffix(final String link, ExternalApplications | |
return Optional.ofNullable(suffix); | ||
} | ||
} | ||
/** | ||
* Creates a {@link URL} object from the given string URL. | ||
* | ||
* @param url the URL string to be converted into a {@link URL}. | ||
* @return the {@link URL} object created from the string URL. | ||
* @throws MalformedURLException if the URL is malformed and cannot be converted to a {@link URL}. | ||
*/ | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no empty line between JavaDoc and method add an empty line above JavaDoc |
||
public static URL create(String url) throws MalformedURLException { | ||
return URLUtil.createUri(url).toURL(); | ||
} | ||
|
||
/** | ||
* Creates a {@link URI} object from the given string URL. | ||
* | ||
* This method attempts to convert the given URL string into a {@link URI} object. | ||
* The pipe character ('|') is replaced with its percent-encoded equivalent ("%7C") because the pipe character | ||
* is not a valid character in certain parts of a URI (specifically, in the path or query components). | ||
* According to the URI specification (RFC 3986), certain characters must be percent-encoded when used in specific contexts. | ||
* | ||
* @param url the URL string to be converted into a {@link URI}. | ||
* @return the {@link URI} object created from the string URL. | ||
* @throws IllegalArgumentException if the string URL is not a valid URI or if the URI format is incorrect. | ||
* @throws URISyntaxException if the string URL has an invalid syntax and cannot be converted into a {@link URI}. | ||
*/ | ||
public static URI createUri(String url) { | ||
try { | ||
// Replace '|' character with its percent-encoded representation '%7C'. | ||
String urlFormat = url.replace("|", "%7C"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add a comment explaining why we are doing this and the relevant references. |
||
return new URI(urlFormat); | ||
} catch (URISyntaxException e) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add |
||
throw new IllegalArgumentException(e); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove empty line (we know that the IntelliJ autoformatter creates this - but no one invested significant time to fix this)