diff --git a/CHANGELOG.md b/CHANGELOG.md index e12f8311528..d801ca0eb8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - We added drag and drop events for field 'Groups' in entry editor panel. [#569](https://github.com/koppor/jabref/issues/569) - We added support for parsing MathML in the Medline importer. [#4273](https://github.com/JabRef/jabref/issues/4273) - We added the ability to search for a DOI directly from 'Web Search'. [#9674](https://github.com/JabRef/jabref/issues/9674) +- We added a cleanup activity that identifies a URL in the `note` field and moves it to the `url` field. [koppor#216](https://github.com/koppor/jabref/issues/216) ### Changed diff --git a/src/main/java/org/jabref/gui/cleanup/CleanupPresetPanel.fxml b/src/main/java/org/jabref/gui/cleanup/CleanupPresetPanel.fxml index 2ffb576b405..4ab659749cd 100644 --- a/src/main/java/org/jabref/gui/cleanup/CleanupPresetPanel.fxml +++ b/src/main/java/org/jabref/gui/cleanup/CleanupPresetPanel.fxml @@ -16,6 +16,7 @@ + diff --git a/src/main/java/org/jabref/gui/cleanup/CleanupPresetPanel.java b/src/main/java/org/jabref/gui/cleanup/CleanupPresetPanel.java index 5c83ec5dfa8..1a0cf3d6df2 100644 --- a/src/main/java/org/jabref/gui/cleanup/CleanupPresetPanel.java +++ b/src/main/java/org/jabref/gui/cleanup/CleanupPresetPanel.java @@ -27,6 +27,7 @@ public class CleanupPresetPanel extends VBox { @FXML private Label cleanupRenamePDFLabel; @FXML private CheckBox cleanUpDOI; @FXML private CheckBox cleanUpEprint; + @FXML private CheckBox cleanUpURL; @FXML private CheckBox cleanUpISSN; @FXML private CheckBox cleanUpMovePDF; @FXML private CheckBox cleanUpMakePathsRelative; @@ -100,6 +101,7 @@ private void init(CleanupPreferences cleanupPreferences, FilePreferences filePre private void updateDisplay(CleanupPreferences preset) { cleanUpDOI.setSelected(preset.isActive(CleanupPreferences.CleanupStep.CLEAN_UP_DOI)); cleanUpEprint.setSelected(preset.isActive(CleanupPreferences.CleanupStep.CLEANUP_EPRINT)); + cleanUpURL.setSelected(preset.isActive(CleanupPreferences.CleanupStep.CLEAN_UP_URL)); if (!cleanUpMovePDF.isDisabled()) { cleanUpMovePDF.setSelected(preset.isActive(CleanupPreferences.CleanupStep.MOVE_PDF)); } @@ -129,6 +131,9 @@ public CleanupPreferences getCleanupPreset() { if (cleanUpEprint.isSelected()) { activeJobs.add(CleanupPreferences.CleanupStep.CLEANUP_EPRINT); } + if (cleanUpURL.isSelected()) { + activeJobs.add(CleanupPreferences.CleanupStep.CLEAN_UP_URL); + } if (cleanUpISSN.isSelected()) { activeJobs.add(CleanupPreferences.CleanupStep.CLEAN_UP_ISSN); } diff --git a/src/main/java/org/jabref/logic/cleanup/CleanupWorker.java b/src/main/java/org/jabref/logic/cleanup/CleanupWorker.java index d6ead59176f..1663db75f87 100644 --- a/src/main/java/org/jabref/logic/cleanup/CleanupWorker.java +++ b/src/main/java/org/jabref/logic/cleanup/CleanupWorker.java @@ -57,6 +57,8 @@ private CleanupJob toJob(CleanupPreferences.CleanupStep action) { new DoiCleanup(); case CLEANUP_EPRINT -> new EprintCleanup(); + case CLEAN_UP_URL -> + new URLCleanup(); case MAKE_PATHS_RELATIVE -> new RelativePathsCleanup(databaseContext, filePreferences); case RENAME_PDF -> diff --git a/src/main/java/org/jabref/logic/cleanup/URLCleanup.java b/src/main/java/org/jabref/logic/cleanup/URLCleanup.java new file mode 100644 index 00000000000..d7569a120f2 --- /dev/null +++ b/src/main/java/org/jabref/logic/cleanup/URLCleanup.java @@ -0,0 +1,76 @@ +package org.jabref.logic.cleanup; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jabref.model.FieldChange; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.Field; +import org.jabref.model.entry.field.StandardField; + +/** + * Checks whether URL exists in note field, and stores it under url field. + */ +public class URLCleanup implements CleanupJob { + + private static final Field NOTE_FIELD = StandardField.NOTE; + private static final Field URL_FIELD = StandardField.URL; + + @Override + public List cleanup(BibEntry entry) { + List changes = new ArrayList<>(); + + String noteFieldValue = entry.getField(NOTE_FIELD).orElse(null); + + /* + * The urlRegex was originally fetched from a suggested solution in + * https://stackoverflow.com/questions/28185064/python-infinite-loop-in-regex-to-match-url. + * In order to be functional, we made the necessary adjustments regarding Java + * features (mainly doubled backslashes). + */ + String urlRegex = "(?i)\\b((?:https?://|www\\d{0,3}[.]|[a-z0-9.\\-]+[.]" + + "[a-z]{2,4}/)(?:[^\\s()<>\\\\]+|\\(([^\\s()<>\\\\]+|(\\([^\\s()" + + "<>\\\\]+\\)))*\\))+(?:\\(([^\\s()<>\\\\]+|(\\([^\\s()<>\\\\]+\\" + + ")))*\\)|[^\\s`!()\\[\\]{};:'\".,<>?«»“”‘’]))"; + + final Pattern pattern = Pattern.compile(urlRegex, Pattern.CASE_INSENSITIVE); + final Matcher matcher = pattern.matcher(noteFieldValue); + + if (matcher.find()) { + String url = matcher.group(); + + // Remove the URL from the NoteFieldValue + String newNoteFieldValue = noteFieldValue + .replace(url, "") + + /* + * The following regex erases unnecessary remaining + * content in note field. Explanation: + *
    + *
  • "(, )?": Matches an optional comma followed by a space
  • + *
  • "\\?": Matches an optional backslash
  • + *
  • "url\{\}": Matches the literal string "url{}"
  • + *
+ * Note that the backslashes are doubled as Java requirement + */ + .replaceAll("(, )?\\\\?url\\{\\}(, )?", ""); + + /* + * In case the url and note fields hold the same URL, then we just + * remove it from the note field, and no other action is performed. + */ + if (entry.hasField(URL_FIELD)) { + String urlFieldValue = entry.getField(URL_FIELD).orElse(null); + if (urlFieldValue.equals(url)) { + entry.setField(NOTE_FIELD, newNoteFieldValue).ifPresent(changes::add); + } + } else { + entry.setField(NOTE_FIELD, newNoteFieldValue).ifPresent(changes::add); + entry.setField(URL_FIELD, url).ifPresent(changes::add); + } + } + return changes; + } +} diff --git a/src/main/java/org/jabref/preferences/CleanupPreferences.java b/src/main/java/org/jabref/preferences/CleanupPreferences.java index 516fd09e2d6..cf3b24491fa 100644 --- a/src/main/java/org/jabref/preferences/CleanupPreferences.java +++ b/src/main/java/org/jabref/preferences/CleanupPreferences.java @@ -80,6 +80,7 @@ public enum CleanupStep { */ CLEAN_UP_DOI, CLEANUP_EPRINT, + CLEAN_UP_URL, MAKE_PATHS_RELATIVE, RENAME_PDF, RENAME_PDF_ONLY_RELATIVE_PATHS, diff --git a/src/main/resources/l10n/JabRef_en.properties b/src/main/resources/l10n/JabRef_en.properties index 02dbc0da732..2b42a9f85d8 100644 --- a/src/main/resources/l10n/JabRef_en.properties +++ b/src/main/resources/l10n/JabRef_en.properties @@ -1131,6 +1131,7 @@ Cleanup\ entries=Cleanup entries Automatically\ assign\ new\ entry\ to\ selected\ groups=Automatically assign new entry to selected groups %0\ mode=%0 mode Move\ DOIs\ from\ note\ and\ URL\ field\ to\ DOI\ field\ and\ remove\ http\ prefix=Move DOIs from note and URL field to DOI field and remove http prefix +Move\ URL\ in\ note\ field\ to\ url\ field=Move URL in note field to url field Make\ paths\ of\ linked\ files\ relative\ (if\ possible)=Make paths of linked files relative (if possible) Rename\ PDFs\ to\ given\ filename\ format\ pattern=Rename PDFs to given filename format pattern Rename\ only\ PDFs\ having\ a\ relative\ path=Rename only PDFs having a relative path diff --git a/src/main/resources/l10n/JabRef_fr.properties b/src/main/resources/l10n/JabRef_fr.properties index de1bc257b4e..839c0aeba70 100644 --- a/src/main/resources/l10n/JabRef_fr.properties +++ b/src/main/resources/l10n/JabRef_fr.properties @@ -1055,7 +1055,7 @@ exportFormat=Format d'exportation Output\ file\ missing=Fichier de sortie manquant The\ output\ option\ depends\ on\ a\ valid\ input\ option.=L'option de sortie dépend d'une option d'entrée valide. Linked\ file\ name\ conventions=Conventions pour les noms de fichiers liés -Filename\ format\ pattern=Modèle de format de nom de fichier +Filename\ format\ pattern=Modèle de format de nom de fichier Additional\ parameters=Paramètres additionnels Cite\ selected\ entries\ between\ parenthesis=Citer les entrées sélectionnées entre parenthèses Cite\ selected\ entries\ with\ in-text\ citation=Citer les entrées sélectionnées comme incluse dans le texte diff --git a/src/main/resources/l10n/JabRef_ko.properties b/src/main/resources/l10n/JabRef_ko.properties index 64b5c09d23f..9e6e9c6097e 100644 --- a/src/main/resources/l10n/JabRef_ko.properties +++ b/src/main/resources/l10n/JabRef_ko.properties @@ -1849,7 +1849,7 @@ Add\ new\ String=문자열 추가 Must\ not\ be\ empty\!=비워둘 수 없습니다\! Open\ Help\ page=도움말 열기 Add\ new\ field\ name=새 필드 이름 추가 -Field\ name\:=필드 이름\: +Field\ name\:=필드 이름\: Field\ name\ "%0"\ already\ exists=필드 이름 "%0"이 이미 존재합니다 No\ field\ name\ selected\!=필드 이름을 선택하지 않았습니다 Remove\ field\ name=필드 이름 제거 diff --git a/src/main/resources/l10n/JabRef_ru.properties b/src/main/resources/l10n/JabRef_ru.properties index 121ca4f2c4c..3c9a9facd21 100644 --- a/src/main/resources/l10n/JabRef_ru.properties +++ b/src/main/resources/l10n/JabRef_ru.properties @@ -2225,7 +2225,7 @@ This\ entry\ type\ is\ intended\ for\ sources\ such\ as\ web\ sites\ which\ are\ A\ single-volume\ work\ of\ reference\ such\ as\ an\ encyclopedia\ or\ a\ dictionary.=Неделимая работа или ссылка, как энциклопедия или словарь. A\ technical\ report,\ research\ report,\ or\ white\ paper\ published\ by\ a\ university\ or\ some\ other\ institution.=Технический отчет, исследовательский отчет, или белая книга, выпущенная институтом или другим учреждением. An\ entry\ set\ is\ a\ group\ of\ entries\ which\ are\ cited\ as\ a\ single\ reference\ and\ listed\ as\ a\ single\ item\ in\ the\ bibliography.=Набор записей представляет собой группу записей, которые приводятся в виде единой ссылки и перечислены в виде одного элемента в библиографии. -Supplemental\ material\ in\ a\ "Book".\ This\ type\ is\ provided\ for\ elements\ such\ as\ prefaces,\ introductions,\ forewords,\ afterwords,\ etc.\ which\ often\ have\ a\ generic\ title\ only.=Дополнительный материал в "Книге" предназначен для таких элементов, как предисловия, введения, послесловия и т.д. +Supplemental\ material\ in\ a\ "Book".\ This\ type\ is\ provided\ for\ elements\ such\ as\ prefaces,\ introductions,\ forewords,\ afterwords,\ etc.\ which\ often\ have\ a\ generic\ title\ only.=Дополнительный материал в "Книге" предназначен для таких элементов, как предисловия, введения, послесловия и т.д. Supplemental\ material\ in\ a\ "Collection".=Дополнительные материалы в "Коллекции". Supplemental\ material\ in\ a\ "Periodical".\ This\ type\ may\ be\ useful\ when\ referring\ to\ items\ such\ as\ regular\ columns,\ obituaries,\ letters\ to\ the\ editor,\ etc.\ which\ only\ have\ a\ generic\ title.=Дополнительные материалы в "Периодическом издании". Этот тип может быть полезен при обращении к таким элементам, как обычные колонки, некрологи, письма к редактору и т.д., которые имеют только общее название. A\ thesis\ written\ for\ an\ educational\ institution\ to\ satisfy\ the\ requirements\ for\ a\ degree.=Тезис, написанный для учебного заведения с целью удовлетворения требований к степени. diff --git a/src/test/java/org/jabref/logic/cleanup/URLCleanupTest.java b/src/test/java/org/jabref/logic/cleanup/URLCleanupTest.java new file mode 100644 index 00000000000..7bfd2fd990b --- /dev/null +++ b/src/test/java/org/jabref/logic/cleanup/URLCleanupTest.java @@ -0,0 +1,118 @@ +package org.jabref.logic.cleanup; + +import java.util.stream.Stream; + +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.StandardField; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class URLCleanupTest { + + @ParameterizedTest + @MethodSource("provideURL") + public void testChangeURL(BibEntry expected, BibEntry urlInputField) { + URLCleanup cleanUp = new URLCleanup(); + cleanUp.cleanup(urlInputField); + + assertEquals(expected, urlInputField); + } + + private static Stream provideURL() { + return Stream.of( + + // Input Note field has two arguments stored , with the latter being a url. + Arguments.of( + new BibEntry().withField(StandardField.URL, + "https://hdl.handle.net/10442/hedi/6089") + .withField(StandardField.NOTE, + "this is a note"), + new BibEntry().withField(StandardField.NOTE, + "this is a note, \\url{https://hdl.handle.net/10442/hedi/6089}")), + + // Input Note field has two arguments stored, with the former being a url. + Arguments.of( + new BibEntry().withField(StandardField.URL, + "https://hdl.handle.net/10442/hedi/6089") + .withField(StandardField.NOTE, + "this is a note"), + new BibEntry().withField(StandardField.NOTE, + "\\url{https://hdl.handle.net/10442/hedi/6089}, this is a note")), + + // Input Note field has more than one URLs stored. + Arguments.of( + new BibEntry().withField(StandardField.URL, + "https://hdl.handle.net/10442/hedi/6089") + .withField(StandardField.NOTE, + "\\url{http://142.42.1.1:8080}"), + new BibEntry().withField(StandardField.NOTE, + "\\url{https://hdl.handle.net/10442/hedi/6089}, " + + "\\url{http://142.42.1.1:8080}")), + + // Input Note field has several values stored. + Arguments.of( + new BibEntry().withField(StandardField.URL, + "https://example.org") + .withField(StandardField.NOTE, + "cited by Kramer, 2002."), + new BibEntry().withField(StandardField.NOTE, + "\\url{https://example.org}, cited by Kramer, 2002.")), + + /* + * Several input URL types (e.g, not secure protocol, password included for + * authentication, IP address, port etc.) to be correctly identified. + */ + Arguments.of( + new BibEntry().withField(StandardField.URL, + "https://hdl.handle.net/10442/hedi/6089"), + new BibEntry().withField(StandardField.NOTE, + "\\url{https://hdl.handle.net/10442/hedi/6089}")), + + Arguments.of( + new BibEntry().withField(StandardField.URL, + "http://hdl.handle.net/10442/hedi/6089"), + new BibEntry().withField(StandardField.NOTE, + "\\url{http://hdl.handle.net/10442/hedi/6089}")), + + Arguments.of( + new BibEntry().withField(StandardField.URL, + "http://userid:password@example.com:8080"), + new BibEntry().withField(StandardField.NOTE, + "\\url{http://userid:password@example.com:8080}")), + + Arguments.of( + new BibEntry().withField(StandardField.URL, + "http://142.42.1.1:8080"), + new BibEntry().withField(StandardField.NOTE, + "\\url{http://142.42.1.1:8080}")), + + Arguments.of( + new BibEntry().withField(StandardField.URL, + "http://☺.damowmow.com"), + new BibEntry().withField(StandardField.NOTE, + "\\url{http://☺.damowmow.com}")), + + Arguments.of( + new BibEntry().withField(StandardField.URL, + "http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com"), + new BibEntry().withField(StandardField.NOTE, + "\\url{http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com}")), + + Arguments.of( + new BibEntry().withField(StandardField.URL, + "https://www.example.com/foo/?bar=baz&inga=42&quux"), + new BibEntry().withField(StandardField.NOTE, + "\\url{https://www.example.com/foo/?bar=baz&inga=42&quux}")), + + Arguments.of( + new BibEntry().withField(StandardField.URL, + "https://www.example.com/foo/?bar=baz&inga=42&quux"), + new BibEntry().withField(StandardField.NOTE, + "https://www.example.com/foo/?bar=baz&inga=42&quux")) + ); + } +}