Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sanitize URLs in file fields to handle invalid pipe characters ('|') #12156

Merged
merged 21 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -193,5 +193,6 @@
requires mslinks;
requires org.antlr.antlr4.runtime;
requires org.libreoffice.uno;
requires org.apache.httpcomponents.client5.httpclient5;
// endregion
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package org.jabref.gui.entryeditor.citationrelationtab.semanticscholar;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.util.List;

import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImporterPreferences;
import org.jabref.logic.importer.fetcher.CustomizableKeyFetcher;
Expand Down Expand Up @@ -38,7 +38,7 @@ public List<BibEntry> searchCitedBy(BibEntry entry) throws FetcherException {

URL citationsUrl;
try {
citationsUrl = URI.create(getAPIUrl("citations", entry)).toURL();
citationsUrl = URLUtil.create(getAPIUrl("citations", entry));
} catch (MalformedURLException e) {
throw new FetcherException("Malformed URL", e);
}
Expand All @@ -62,7 +62,7 @@ public List<BibEntry> searchCiting(BibEntry entry) throws FetcherException {

URL referencesUrl;
try {
referencesUrl = URI.create(getAPIUrl("references", entry)).toURL();
referencesUrl = URLUtil.create(getAPIUrl("references", entry));
} catch (MalformedURLException e) {
throw new FetcherException("Malformed URL", e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
Expand Down Expand Up @@ -242,7 +241,7 @@ private List<LinkedFileViewModel> findAssociatedNotLinkedFiles(BibEntry entry) {

public boolean downloadFile(String urlText) {
try {
URL url = URI.create(urlText).toURL();
URL url = URLUtil.create(urlText);
addFromURLAndDownload(url);
return true;
} catch (MalformedURLException exception) {
Expand Down
55 changes: 47 additions & 8 deletions src/main/java/org/jabref/gui/fieldeditors/URLUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
Expand All @@ -24,8 +25,8 @@ private URLUtil() {
* Cleans URLs returned by Google search.
*
* <example>
* If you copy links from search results from Google, all links will be enriched with search meta data, e.g.
* https://www.google.de/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&&url=http%3A%2F%2Fwww.inrg.csie.ntu.edu.tw%2Falgorithm2014%2Fhomework%2FWagner-74.pdf&ei=DifeVYHkDYWqU5W0j6gD&usg=AFQjCNFl638rl5KVta1jIMWLyb4CPSZidg&sig2=0hSSMw9XZXL3HJWwEcJtOg
* If you copy links from search results from Google, all links will be enriched with search meta data, e.g.
* https://www.google.de/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&&url=http%3A%2F%2Fwww.inrg.csie.ntu.edu.tw%2Falgorithm2014%2Fhomework%2FWagner-74.pdf&ei=DifeVYHkDYWqU5W0j6gD&usg=AFQjCNFl638rl5KVta1jIMWLyb4CPSZidg&sig2=0hSSMw9XZXL3HJWwEcJtOg
* </example>
*
* @param url the Google search URL string
Expand All @@ -39,7 +40,7 @@ public static String cleanGoogleSearchURL(String url) {
}
// Extract destination URL
try {
URL searchURL = URI.create(url).toURL();
URL searchURL = URLUtil.create(url);
// URL parameters
String query = searchURL.getQuery();
// no parameters
Expand All @@ -62,7 +63,8 @@ public static String cleanGoogleSearchURL(String url) {
}
}
return url;
} catch (MalformedURLException e) {
} catch (
MalformedURLException e) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove empty line (we know that the IntelliJ autoformatter creates this - but no one invested significant time to fix this)

return url;
}
}
Expand All @@ -77,9 +79,11 @@ public static String cleanGoogleSearchURL(String url) {
*/
public static boolean isURL(String url) {
try {
URI.create(url).toURL();
URLUtil.create(url);
return true;
} catch (MalformedURLException | IllegalArgumentException e) {
} catch (
MalformedURLException |
IllegalArgumentException e) {
return false;
}
}
Expand All @@ -96,11 +100,12 @@ public static Optional<String> getSuffix(final String link, ExternalApplications
String strippedLink = link;
try {
// Try to strip the query string, if any, to get the correct suffix:
URL url = URI.create(link).toURL();
URL url = URLUtil.create(link);
if ((url.getQuery() != null) && (url.getQuery().length() < (link.length() - 1))) {
strippedLink = link.substring(0, link.length() - url.getQuery().length() - 1);
}
} catch (MalformedURLException e) {
} catch (
MalformedURLException e) {
// Don't report this error, since this getting the suffix is a non-critical
// operation, and this error will be triggered and reported elsewhere.
}
Expand Down Expand Up @@ -138,4 +143,38 @@ public static Optional<String> getSuffix(final String link, ExternalApplications
return Optional.ofNullable(suffix);
}
}
/**
* Creates a {@link URL} object from the given string URL.
*
* @param url the URL string to be converted into a {@link URL}.
* @return the {@link URL} object created from the string URL.
* @throws MalformedURLException if the URL is malformed and cannot be converted to a {@link URL}.
*/

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no empty line between JavaDoc and method

add an empty line above JavaDoc

public static URL create(String url) throws MalformedURLException {
return URLUtil.createUri(url).toURL();
}

/**
* Creates a {@link URI} object from the given string URL.
*
* This method attempts to convert the given URL string into a {@link URI} object.
* The pipe character ('|') is replaced with its percent-encoded equivalent ("%7C") because the pipe character
* is not a valid character in certain parts of a URI (specifically, in the path or query components).
* According to the URI specification (RFC 3986), certain characters must be percent-encoded when used in specific contexts.
*
* @param url the URL string to be converted into a {@link URI}.
* @return the {@link URI} object created from the string URL.
* @throws IllegalArgumentException if the string URL is not a valid URI or if the URI format is incorrect.
* @throws URISyntaxException if the string URL has an invalid syntax and cannot be converted into a {@link URI}.
*/
public static URI createUri(String url) {
try {
// Replace '|' character with its percent-encoded representation '%7C'.
String urlFormat = url.replace("|", "%7C");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment explaining why we are doing this and the relevant references.

return new URI(urlFormat);
} catch (URISyntaxException e) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add @throws JavaDoc, too

throw new IllegalArgumentException(e);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package org.jabref.gui.linkedfile;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.util.Optional;

Expand All @@ -11,6 +10,7 @@
import org.jabref.gui.actions.ActionHelper;
import org.jabref.gui.actions.SimpleCommand;
import org.jabref.gui.fieldeditors.LinkedFileViewModel;
import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.gui.preferences.GuiPreferences;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.util.TaskExecutor;
Expand Down Expand Up @@ -61,7 +61,7 @@ public void execute() {
}

try {
URL url = URI.create(urlforDownload.get()).toURL();
URL url = URLUtil.create(urlforDownload.get());
LinkedFileViewModel onlineFile = new LinkedFileViewModel(
new LinkedFile(url, ""),
entry,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.jabref.gui.externalfiletype.ExternalFileType;
import org.jabref.gui.externalfiletype.ExternalFileTypes;
import org.jabref.gui.externalfiletype.UnknownExternalFileType;
import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.gui.frame.ExternalApplicationsPreferences;
import org.jabref.gui.util.FileDialogConfiguration;
import org.jabref.logic.FilePreferences;
Expand Down Expand Up @@ -139,7 +140,7 @@ public LinkedFile getNewLinkedFile() {

if (LinkedFile.isOnlineLink(link.getValue())) {
try {
return new LinkedFile(description.getValue(), URI.create(link.getValue()).toURL(), fileType, sourceUrl.getValue());
return new LinkedFile(description.getValue(), URLUtil.create(link.getValue()), fileType, sourceUrl.getValue());
} catch (MalformedURLException e) {
return new LinkedFile(description.getValue(), link.getValue(), fileType, sourceUrl.getValue());
}
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/org/jabref/gui/theme/StyleSheetFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Files;
Expand All @@ -11,6 +10,8 @@
import java.util.Optional;
import java.util.concurrent.atomic.AtomicReference;

import org.jabref.gui.fieldeditors.URLUtil;

import com.google.common.base.Strings;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -54,7 +55,7 @@ final class StyleSheetFile extends StyleSheet {

StyleSheetFile(URL url) {
this.url = url;
this.path = Path.of(URI.create(url.toExternalForm()));
this.path = Path.of(URLUtil.createUri(url.toExternalForm()));
reload();
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/jabref/logic/importer/fetcher/ACS.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.util.Objects;
import java.util.Optional;

import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.logic.importer.FulltextFetcher;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
Expand Down Expand Up @@ -45,7 +45,7 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException {

if (link != null) {
LOGGER.info("Fulltext PDF found @ ACS.");
return Optional.of(URI.create(source.replaceFirst("/abs/", "/pdf/")).toURL());
return Optional.of(URLUtil.create(source.replaceFirst("/abs/", "/pdf/")));
}
return Optional.empty();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.util.Objects;
import java.util.Optional;

import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.logic.importer.FulltextFetcher;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
Expand Down Expand Up @@ -48,7 +48,7 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException {
if (code == 200) {
LOGGER.info("Fulltext PDF found @ APS.");
try {
return Optional.of(URI.create(pdfRequestUrl).toURL());
return Optional.of(URLUtil.create(pdfRequestUrl));
} catch (MalformedURLException e) {
LOGGER.warn("APS returned malformed URL, cannot find PDF.");
}
Expand Down Expand Up @@ -77,7 +77,7 @@ private Optional<String> getId(String doi) {

URLConnection con;
try {
con = URI.create(doiRequest).toURL().openConnection();
con = URLUtil.create(doiRequest).openConnection();
con.connect();
con.getInputStream();
String[] urlParts = con.getURL().toString().split("abstract/");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
Expand All @@ -24,6 +23,7 @@
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.logic.cleanup.EprintCleanup;
import org.jabref.logic.help.HelpFile;
import org.jabref.logic.importer.FetcherException;
Expand Down Expand Up @@ -710,7 +710,7 @@ public ArXivEntry(Node item) {
if (linkTitle.equals(Optional.of("pdf"))) {
pdfUrlParsed = XMLUtil.getAttributeContent(linkNode, "href").map(url -> {
try {
return URI.create(url).toURL();
return URLUtil.create(url);
} catch (MalformedURLException e) {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.util.Optional;

import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParseException;
Expand Down Expand Up @@ -37,7 +37,7 @@ public static Optional<BibEntry> getEntry(String entryUrl, ImportFormatPreferenc
String cleanURL = entryUrl.replace("%", "%25").replace(":", "%3A").replace("/", "%2F").replace("?", "%3F")
.replace("&", "%26").replace("=", "%3D");

URL url = URI.create(BibsonomyScraper.BIBSONOMY_SCRAPER + cleanURL + BibsonomyScraper.BIBSONOMY_SCRAPER_POST).toURL();
URL url = URLUtil.create(BibsonomyScraper.BIBSONOMY_SCRAPER + cleanURL + BibsonomyScraper.BIBSONOMY_SCRAPER_POST);
String bibtex = new URLDownload(url).asString();
return BibtexParser.singleFromString(bibtex, importFormatPreferences);
} catch (IOException | FetcherException ex) {
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.util.List;
import java.util.Objects;
import java.util.Optional;

import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.http.dto.SimpleHttpResponse;
import org.jabref.logic.help.HelpFile;
import org.jabref.logic.importer.FetcherException;
Expand Down Expand Up @@ -103,13 +103,13 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException, FetcherExc
Optional<String> id = entry.getField(StandardField.DOI);
if (id.isPresent()) {
String source = PDF_URL.formatted(id.get());
return Optional.of(URI.create(source).toURL());
return Optional.of(URLUtil.create(source));
}

// if using id fails, we can try the source URL
Optional<String> urlString = entry.getField(StandardField.URL);
if (urlString.isPresent()) {
return Optional.of(URI.create(urlString.get()).toURL());
return Optional.of(URLUtil.create(urlString.get()));
}

return Optional.empty();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.util.Collections;
Expand All @@ -13,6 +12,7 @@
import java.util.concurrent.CompletionException;
import java.util.regex.Pattern;

import org.jabref.gui.fieldeditors.URLUtil;
import org.jabref.logic.cleanup.FieldFormatterCleanup;
import org.jabref.logic.formatter.bibtexfields.ClearFormatter;
import org.jabref.logic.formatter.bibtexfields.HtmlToLatexFormatter;
Expand Down Expand Up @@ -122,7 +122,7 @@ public Optional<BibEntry> performSearchById(String identifier) throws FetcherExc

URL doiURL;
try {
doiURL = URI.create(doi.get().getURIAsASCIIString()).toURL();
doiURL = URLUtil.create(doi.get().getURIAsASCIIString());
} catch (MalformedURLException e) {
throw new FetcherException("Malformed URL", e);
}
Expand Down Expand Up @@ -219,7 +219,7 @@ public List<BibEntry> performSearch(BibEntry entry) throws FetcherException {
public Optional<String> getAgency(DOI doi) throws FetcherException, MalformedURLException {
Optional<String> agency = Optional.empty();
try {
URLDownload download = getUrlDownload(URI.create(DOI.AGENCY_RESOLVER + "/" + doi.asString()).toURL());
URLDownload download = getUrlDownload(URLUtil.create(DOI.AGENCY_RESOLVER + "/" + doi.asString()));
JSONObject response = new JSONArray(download.asString()).getJSONObject(0);
if (response != null) {
agency = Optional.ofNullable(response.optString("RA"));
Expand Down
Loading
Loading