Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cleanup activity for URL field #9970

Merged
merged 22 commits into from
Jun 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
a8a1251
Make the necessary adjustments to several classes
Alexandra-Stath Jun 3, 2023
07973f7
Add cleanup activity regarding URL field
Alexandra-Stath Jun 3, 2023
e251b39
Add parameterized tests for URLCleanup java class
Alexandra-Stath Jun 3, 2023
ef5ff9d
Include cleanup activity description in properties files
Alexandra-Stath Jun 3, 2023
00dd484
Update javadoc comment
Alexandra-Stath Jun 3, 2023
a586e68
Add description in CHANGELOG.md
Alexandra-Stath Jun 3, 2023
d572905
Resolve merge conflicts
Alexandra-Stath Jun 3, 2023
940c3ec
Update properties files
Alexandra-Stath Jun 4, 2023
9c83833
Update URLCleanup java class with javadoc comment about urlRegex
Alexandra-Stath Jun 4, 2023
7b3878e
Update URLCleanup java class with javadoc comment about urlRegex
Alexandra-Stath Jun 4, 2023
d69d98c
Update Latex command usage and fix checkstyle errors
Alexandra-Stath Jun 4, 2023
7db3f62
Remove comment from CleanupPreferences class
Alexandra-Stath Jun 6, 2023
07db0a6
Update cleanup activity in URLCleanup class
Alexandra-Stath Jun 6, 2023
846dda5
Update tests structure and add more cases
Alexandra-Stath Jun 6, 2023
3b8b5db
Update comment in URLCleanup class
Alexandra-Stath Jun 6, 2023
907d774
Resolve conflicts caused in CHANGELOG.md
Alexandra-Stath Jun 6, 2023
e48c418
Merge branch 'main' into fix-for-koppor-216
Alexandra-Stath Jun 6, 2023
583917b
Correction of CleanupPresetPanel and property files
Alexandra-Stath Jun 7, 2023
dbb41af
Merge branch 'fix-for-koppor-216' of https://github.com/Alexandra-Sta…
Alexandra-Stath Jun 7, 2023
ec6ff74
Update CHANGELOG.md
koppor Jun 8, 2023
0a0ce4f
Apply suggestions from code review
koppor Jun 8, 2023
c8d45b3
Update of URLCleanup class
Alexandra-Stath Jun 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We added drag and drop events for field 'Groups' in entry editor panel. [#569](https://github.com/koppor/jabref/issues/569)
- We added support for parsing MathML in the Medline importer. [#4273](https://github.com/JabRef/jabref/issues/4273)
- We added the ability to search for a DOI directly from 'Web Search'. [#9674](https://github.com/JabRef/jabref/issues/9674)
- We added a cleanup activity that identifies a URL in the `note` field and moves it to the `url` field. [koppor#216](https://github.com/koppor/jabref/issues/216)

### Changed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
<CheckBox fx:id="cleanUpDOI" text="%Move DOIs from note and URL field to DOI field and remove http prefix"/>
<CheckBox fx:id="cleanUpEprint"
text="%Move preprint information from 'URL' and 'journal' field to the 'eprint' field"/>
<CheckBox fx:id="cleanUpURL" text="%Move URL in note field to url field"/>
<CheckBox fx:id="cleanUpISSN" text="%Reformat ISSN"/>
<CheckBox fx:id="cleanUpUpgradeExternalLinks"/>
<CheckBox fx:id="cleanUpMovePDF"/>
Expand Down
5 changes: 5 additions & 0 deletions src/main/java/org/jabref/gui/cleanup/CleanupPresetPanel.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public class CleanupPresetPanel extends VBox {
@FXML private Label cleanupRenamePDFLabel;
@FXML private CheckBox cleanUpDOI;
@FXML private CheckBox cleanUpEprint;
@FXML private CheckBox cleanUpURL;
@FXML private CheckBox cleanUpISSN;
@FXML private CheckBox cleanUpMovePDF;
@FXML private CheckBox cleanUpMakePathsRelative;
Expand Down Expand Up @@ -100,6 +101,7 @@ private void init(CleanupPreferences cleanupPreferences, FilePreferences filePre
private void updateDisplay(CleanupPreferences preset) {
cleanUpDOI.setSelected(preset.isActive(CleanupPreferences.CleanupStep.CLEAN_UP_DOI));
cleanUpEprint.setSelected(preset.isActive(CleanupPreferences.CleanupStep.CLEANUP_EPRINT));
cleanUpURL.setSelected(preset.isActive(CleanupPreferences.CleanupStep.CLEAN_UP_URL));
if (!cleanUpMovePDF.isDisabled()) {
cleanUpMovePDF.setSelected(preset.isActive(CleanupPreferences.CleanupStep.MOVE_PDF));
}
Expand Down Expand Up @@ -129,6 +131,9 @@ public CleanupPreferences getCleanupPreset() {
if (cleanUpEprint.isSelected()) {
activeJobs.add(CleanupPreferences.CleanupStep.CLEANUP_EPRINT);
}
if (cleanUpURL.isSelected()) {
activeJobs.add(CleanupPreferences.CleanupStep.CLEAN_UP_URL);
}
if (cleanUpISSN.isSelected()) {
activeJobs.add(CleanupPreferences.CleanupStep.CLEAN_UP_ISSN);
}
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/jabref/logic/cleanup/CleanupWorker.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ private CleanupJob toJob(CleanupPreferences.CleanupStep action) {
new DoiCleanup();
case CLEANUP_EPRINT ->
new EprintCleanup();
case CLEAN_UP_URL ->
new URLCleanup();
case MAKE_PATHS_RELATIVE ->
new RelativePathsCleanup(databaseContext, filePreferences);
case RENAME_PDF ->
Expand Down
76 changes: 76 additions & 0 deletions src/main/java/org/jabref/logic/cleanup/URLCleanup.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package org.jabref.logic.cleanup;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jabref.model.FieldChange;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.StandardField;

/**
* Checks whether URL exists in note field, and stores it under url field.
*/
public class URLCleanup implements CleanupJob {

private static final Field NOTE_FIELD = StandardField.NOTE;
private static final Field URL_FIELD = StandardField.URL;

@Override
public List<FieldChange> cleanup(BibEntry entry) {
List<FieldChange> changes = new ArrayList<>();

String noteFieldValue = entry.getField(NOTE_FIELD).orElse(null);

/*
* The urlRegex was originally fetched from a suggested solution in
* https://stackoverflow.com/questions/28185064/python-infinite-loop-in-regex-to-match-url.
* In order to be functional, we made the necessary adjustments regarding Java
* features (mainly doubled backslashes).
*/
String urlRegex = "(?i)\\b((?:https?://|www\\d{0,3}[.]|[a-z0-9.\\-]+[.]"
+ "[a-z]{2,4}/)(?:[^\\s()<>\\\\]+|\\(([^\\s()<>\\\\]+|(\\([^\\s()"
+ "<>\\\\]+\\)))*\\))+(?:\\(([^\\s()<>\\\\]+|(\\([^\\s()<>\\\\]+\\"
+ ")))*\\)|[^\\s`!()\\[\\]{};:'\".,<>?«»“”‘’]))";

final Pattern pattern = Pattern.compile(urlRegex, Pattern.CASE_INSENSITIVE);
final Matcher matcher = pattern.matcher(noteFieldValue);

if (matcher.find()) {
String url = matcher.group();

// Remove the URL from the NoteFieldValue
String newNoteFieldValue = noteFieldValue
.replace(url, "")

/*
* The following regex erases unnecessary remaining
* content in note field. Explanation:
* <ul>
* <li>"(, )?": Matches an optional comma followed by a space</li>
* <li>"\\?": Matches an optional backslash</li>
* <li>"url\{\}": Matches the literal string "url{}"</li>
* </ul>
* Note that the backslashes are doubled as Java requirement
*/
.replaceAll("(, )?\\\\?url\\{\\}(, )?", "");

/*
* In case the url and note fields hold the same URL, then we just
* remove it from the note field, and no other action is performed.
*/
if (entry.hasField(URL_FIELD)) {
String urlFieldValue = entry.getField(URL_FIELD).orElse(null);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought, this could lead to issues, but I did not see it. There were no tests covering the orElse branch.

In future: In case Optionals are available, make use of them!

Fix and link to issue at #10435

if (urlFieldValue.equals(url)) {
entry.setField(NOTE_FIELD, newNoteFieldValue).ifPresent(changes::add);
}
} else {
entry.setField(NOTE_FIELD, newNoteFieldValue).ifPresent(changes::add);
entry.setField(URL_FIELD, url).ifPresent(changes::add);
}
}
return changes;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ public enum CleanupStep {
*/
CLEAN_UP_DOI,
CLEANUP_EPRINT,
CLEAN_UP_URL,
MAKE_PATHS_RELATIVE,
RENAME_PDF,
RENAME_PDF_ONLY_RELATIVE_PATHS,
Expand Down
1 change: 1 addition & 0 deletions src/main/resources/l10n/JabRef_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1131,6 +1131,7 @@ Cleanup\ entries=Cleanup entries
Automatically\ assign\ new\ entry\ to\ selected\ groups=Automatically assign new entry to selected groups
%0\ mode=%0 mode
Move\ DOIs\ from\ note\ and\ URL\ field\ to\ DOI\ field\ and\ remove\ http\ prefix=Move DOIs from note and URL field to DOI field and remove http prefix
Move\ URL\ in\ note\ field\ to\ url\ field=Move URL in note field to url field
Make\ paths\ of\ linked\ files\ relative\ (if\ possible)=Make paths of linked files relative (if possible)
Rename\ PDFs\ to\ given\ filename\ format\ pattern=Rename PDFs to given filename format pattern
Rename\ only\ PDFs\ having\ a\ relative\ path=Rename only PDFs having a relative path
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/l10n/JabRef_fr.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1055,7 +1055,7 @@ exportFormat=Format d'exportation
Output\ file\ missing=Fichier de sortie manquant
The\ output\ option\ depends\ on\ a\ valid\ input\ option.=L'option de sortie dépend d'une option d'entrée valide.
Linked\ file\ name\ conventions=Conventions pour les noms de fichiers liés
Filename\ format\ pattern=Modèle de format de nom de fichier
Filename\ format\ pattern=Modèle de format de nom de fichier
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These changes will be overwritten. You need to go to Crowdin service to update issues at the translations.

Additional\ parameters=Paramètres additionnels
Cite\ selected\ entries\ between\ parenthesis=Citer les entrées sélectionnées entre parenthèses
Cite\ selected\ entries\ with\ in-text\ citation=Citer les entrées sélectionnées comme incluse dans le texte
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/l10n/JabRef_ko.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1849,7 +1849,7 @@ Add\ new\ String=문자열 추가
Must\ not\ be\ empty\!=비워둘 수 없습니다\!
Open\ Help\ page=도움말 열기
Add\ new\ field\ name=새 필드 이름 추가
Field\ name\:=필드 이름\:
Field\ name\:=필드 이름\:
Field\ name\ "%0"\ already\ exists=필드 이름 "%0"이 이미 존재합니다
No\ field\ name\ selected\!=필드 이름을 선택하지 않았습니다
Remove\ field\ name=필드 이름 제거
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/l10n/JabRef_ru.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2225,7 +2225,7 @@ This\ entry\ type\ is\ intended\ for\ sources\ such\ as\ web\ sites\ which\ are\
A\ single-volume\ work\ of\ reference\ such\ as\ an\ encyclopedia\ or\ a\ dictionary.=Неделимая работа или ссылка, как энциклопедия или словарь.
A\ technical\ report,\ research\ report,\ or\ white\ paper\ published\ by\ a\ university\ or\ some\ other\ institution.=Технический отчет, исследовательский отчет, или белая книга, выпущенная институтом или другим учреждением.
An\ entry\ set\ is\ a\ group\ of\ entries\ which\ are\ cited\ as\ a\ single\ reference\ and\ listed\ as\ a\ single\ item\ in\ the\ bibliography.=Набор записей представляет собой группу записей, которые приводятся в виде единой ссылки и перечислены в виде одного элемента в библиографии.
Supplemental\ material\ in\ a\ "Book".\ This\ type\ is\ provided\ for\ elements\ such\ as\ prefaces,\ introductions,\ forewords,\ afterwords,\ etc.\ which\ often\ have\ a\ generic\ title\ only.=Дополнительный материал в "Книге" предназначен для таких элементов, как предисловия, введения, послесловия и т.д.
Supplemental\ material\ in\ a\ "Book".\ This\ type\ is\ provided\ for\ elements\ such\ as\ prefaces,\ introductions,\ forewords,\ afterwords,\ etc.\ which\ often\ have\ a\ generic\ title\ only.=Дополнительный материал в "Книге" предназначен для таких элементов, как предисловия, введения, послесловия и т.д.
Supplemental\ material\ in\ a\ "Collection".=Дополнительные материалы в "Коллекции".
Supplemental\ material\ in\ a\ "Periodical".\ This\ type\ may\ be\ useful\ when\ referring\ to\ items\ such\ as\ regular\ columns,\ obituaries,\ letters\ to\ the\ editor,\ etc.\ which\ only\ have\ a\ generic\ title.=Дополнительные материалы в "Периодическом издании". Этот тип может быть полезен при обращении к таким элементам, как обычные колонки, некрологи, письма к редактору и т.д., которые имеют только общее название.
A\ thesis\ written\ for\ an\ educational\ institution\ to\ satisfy\ the\ requirements\ for\ a\ degree.=Тезис, написанный для учебного заведения с целью удовлетворения требований к степени.
Expand Down
118 changes: 118 additions & 0 deletions src/test/java/org/jabref/logic/cleanup/URLCleanupTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
package org.jabref.logic.cleanup;

import java.util.stream.Stream;

import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;

import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import static org.junit.jupiter.api.Assertions.assertEquals;

public class URLCleanupTest {

@ParameterizedTest
@MethodSource("provideURL")
public void testChangeURL(BibEntry expected, BibEntry urlInputField) {
URLCleanup cleanUp = new URLCleanup();
cleanUp.cleanup(urlInputField);

assertEquals(expected, urlInputField);
}

private static Stream<Arguments> provideURL() {
return Stream.of(

// Input Note field has two arguments stored , with the latter being a url.
Arguments.of(
new BibEntry().withField(StandardField.URL,
"https://hdl.handle.net/10442/hedi/6089")
.withField(StandardField.NOTE,
"this is a note"),
new BibEntry().withField(StandardField.NOTE,
"this is a note, \\url{https://hdl.handle.net/10442/hedi/6089}")),

// Input Note field has two arguments stored, with the former being a url.
Arguments.of(
new BibEntry().withField(StandardField.URL,
"https://hdl.handle.net/10442/hedi/6089")
.withField(StandardField.NOTE,
"this is a note"),
new BibEntry().withField(StandardField.NOTE,
"\\url{https://hdl.handle.net/10442/hedi/6089}, this is a note")),

// Input Note field has more than one URLs stored.
Arguments.of(
new BibEntry().withField(StandardField.URL,
"https://hdl.handle.net/10442/hedi/6089")
.withField(StandardField.NOTE,
"\\url{http://142.42.1.1:8080}"),
new BibEntry().withField(StandardField.NOTE,
"\\url{https://hdl.handle.net/10442/hedi/6089}, "
+ "\\url{http://142.42.1.1:8080}")),

// Input Note field has several values stored.
Arguments.of(
new BibEntry().withField(StandardField.URL,
"https://example.org")
.withField(StandardField.NOTE,
"cited by Kramer, 2002."),
new BibEntry().withField(StandardField.NOTE,
"\\url{https://example.org}, cited by Kramer, 2002.")),

/*
* Several input URL types (e.g, not secure protocol, password included for
* authentication, IP address, port etc.) to be correctly identified.
*/
Arguments.of(
new BibEntry().withField(StandardField.URL,
"https://hdl.handle.net/10442/hedi/6089"),
new BibEntry().withField(StandardField.NOTE,
"\\url{https://hdl.handle.net/10442/hedi/6089}")),

Arguments.of(
new BibEntry().withField(StandardField.URL,
"http://hdl.handle.net/10442/hedi/6089"),
new BibEntry().withField(StandardField.NOTE,
"\\url{http://hdl.handle.net/10442/hedi/6089}")),

Arguments.of(
new BibEntry().withField(StandardField.URL,
"http://userid:[email protected]:8080"),
new BibEntry().withField(StandardField.NOTE,
"\\url{http://userid:[email protected]:8080}")),

Arguments.of(
new BibEntry().withField(StandardField.URL,
"http://142.42.1.1:8080"),
new BibEntry().withField(StandardField.NOTE,
"\\url{http://142.42.1.1:8080}")),

Arguments.of(
new BibEntry().withField(StandardField.URL,
"http://☺.damowmow.com"),
new BibEntry().withField(StandardField.NOTE,
"\\url{http://☺.damowmow.com}")),

Arguments.of(
new BibEntry().withField(StandardField.URL,
"http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com"),
new BibEntry().withField(StandardField.NOTE,
"\\url{http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com}")),

Arguments.of(
new BibEntry().withField(StandardField.URL,
"https://www.example.com/foo/?bar=baz&inga=42&quux"),
new BibEntry().withField(StandardField.NOTE,
"\\url{https://www.example.com/foo/?bar=baz&inga=42&quux}")),

Arguments.of(
new BibEntry().withField(StandardField.URL,
"https://www.example.com/foo/?bar=baz&inga=42&quux"),
new BibEntry().withField(StandardField.NOTE,
"https://www.example.com/foo/?bar=baz&inga=42&quux"))
);
}
}