Skip to content

Commit

Permalink
Enable automated cross library search using a cross library query lan… (
Browse files Browse the repository at this point in the history
#7124)

* Enable automated cross library search using a cross library query language.

Signed-off-by: Dominik Voigt <[email protected]>

* Pull Global upward through constructor.

* Pull Globals and ImportFormatPreferences up through constructor

Signed-off-by: Dominik Voigt <[email protected]>

* Integrate requested changes and fix architecture tests by correcting test classes

Signed-off-by: Dominik Voigt <[email protected]>

* Remove unused imports

Signed-off-by: Dominik Voigt <[email protected]>
  • Loading branch information
DominikVoigt authored Nov 25, 2020
1 parent 5ca3d0d commit b19c3e4
Show file tree
Hide file tree
Showing 29 changed files with 1,636 additions and 21 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ to the page field for cases where the page numbers are missing. [#7019](https://
- We added a new formatter to output shorthand month format. [#6579](https://github.com/JabRef/jabref/issues/6579)
- We added support for the new Microsoft Edge browser in all platforms. [#7056](https://github.com/JabRef/jabref/pull/7056)
- We reintroduced emacs/bash-like keybindings. [#6017](https://github.com/JabRef/jabref/issues/6017)
- We added a feature to provide automated cross library search using a cross library query language. This provides support for the search step of systematic literature reviews (SLRs). [koppor#369](https://github.com/koppor/jabref/issues/369)

### Changed

Expand Down
2 changes: 2 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ dependencies {
exclude group: 'org.apache.lucene', module: 'lucene-sandbox'
}

implementation group: 'org.eclipse.jgit', name: 'org.eclipse.jgit', version: '5.9.0.202009080501-r'

implementation group: 'org.mariadb.jdbc', name: 'mariadb-java-client', version: '2.7.0'

implementation 'org.postgresql:postgresql:42.2.18'
Expand Down
1 change: 1 addition & 0 deletions src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,5 @@
requires com.h2database.mvstore;
requires lucene.queryparser;
requires lucene.core;
requires org.eclipse.jgit;
}
8 changes: 5 additions & 3 deletions src/main/java/org/jabref/gui/JabRefFrame.java
Original file line number Diff line number Diff line change
Expand Up @@ -815,7 +815,9 @@ private MenuBar createMenu() {
new SeparatorMenuItem(),

factory.createMenuItem(StandardActions.SEND_AS_EMAIL, new SendAsEMailAction(dialogService, stateManager)),
pushToApplicationMenuItem
pushToApplicationMenuItem,
new SeparatorMenuItem(),
factory.createMenuItem(StandardActions.START_SYSTEMATIC_LITERATURE_REVIEW, new StartLiteratureReviewAction(this, Globals.getFileUpdateMonitor(), Globals.prefs.getWorkingDir(), Globals.TASK_EXECUTOR))
);

SidePaneComponent webSearch = sidePaneManager.getComponent(SidePaneType.WEB_SEARCH);
Expand Down Expand Up @@ -992,7 +994,7 @@ public void addParserResult(ParserResult parserResult, boolean focusPanel) {
* This method causes all open LibraryTabs to set up their tables anew. When called from PreferencesDialogViewModel,
* this updates to the new settings.
* We need to notify all tabs about the changes to avoid problems when changing the column set.
* */
*/
public void setupAllTables() {
tabbedPane.getTabs().forEach(tab -> {
LibraryTab libraryTab = (LibraryTab) tab;
Expand All @@ -1013,7 +1015,7 @@ private ContextMenu createTabContextMenu(KeyBindingRepository keyBindingReposito
new SeparatorMenuItem(),
factory.createMenuItem(StandardActions.OPEN_DATABASE_FOLDER, new OpenDatabaseFolder()),
factory.createMenuItem(StandardActions.OPEN_CONSOLE, new OpenConsoleAction(stateManager))
);
);

return contextMenu;
}
Expand Down
81 changes: 81 additions & 0 deletions src/main/java/org/jabref/gui/StartLiteratureReviewAction.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package org.jabref.gui;

import java.io.IOException;
import java.nio.file.Path;
import java.util.Optional;

import org.jabref.gui.actions.SimpleCommand;
import org.jabref.gui.importer.actions.OpenDatabaseAction;
import org.jabref.gui.util.BackgroundTask;
import org.jabref.gui.util.FileDialogConfiguration;
import org.jabref.gui.util.TaskExecutor;
import org.jabref.logic.crawler.Crawler;
import org.jabref.logic.importer.ParseException;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.entry.BibEntryTypesManager;
import org.jabref.model.util.FileUpdateMonitor;
import org.jabref.preferences.JabRefPreferences;

import org.eclipse.jgit.api.errors.GitAPIException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class StartLiteratureReviewAction extends SimpleCommand {
private static final Logger LOGGER = LoggerFactory.getLogger(StartLiteratureReviewAction.class);
private final JabRefFrame frame;
private final DialogService dialogService;
private final FileUpdateMonitor fileUpdateMonitor;
private final Path workingDirectory;
private final TaskExecutor taskExecutor;

public StartLiteratureReviewAction(JabRefFrame frame, FileUpdateMonitor fileUpdateMonitor, Path standardWorkingDirectory, TaskExecutor taskExecutor) {
this.frame = frame;
this.dialogService = frame.getDialogService();
this.fileUpdateMonitor = fileUpdateMonitor;
this.workingDirectory = getInitialDirectory(standardWorkingDirectory);
this.taskExecutor = taskExecutor;
}

@Override
public void execute() {
FileDialogConfiguration fileDialogConfiguration = new FileDialogConfiguration.Builder()
.withInitialDirectory(workingDirectory)
.build();

Optional<Path> studyDefinitionFile = dialogService.showFileOpenDialog(fileDialogConfiguration);
if (studyDefinitionFile.isEmpty()) {
// Do nothing if selection was canceled
return;
}
final Crawler crawler;
try {
crawler = new Crawler(studyDefinitionFile.get(), fileUpdateMonitor, JabRefPreferences.getInstance().getImportFormatPreferences(), JabRefPreferences.getInstance().getSavePreferences(), new BibEntryTypesManager());
} catch (IOException | ParseException | GitAPIException e) {
LOGGER.error("Error during reading of study definition file.", e);
dialogService.showErrorDialogAndWait(Localization.lang("Error during reading of study definition file."), e);
return;
}
BackgroundTask.wrap(() -> {
crawler.performCrawl();
return 0; // Return any value to make this a callable instead of a runnable. This allows throwing exceptions.
})
.onFailure(e -> {
LOGGER.error("Error during persistence of crawling results.");
dialogService.showErrorDialogAndWait(Localization.lang("Error during persistence of crawling results."), e);
})
.onSuccess(unused -> new OpenDatabaseAction(frame).openFile(Path.of(studyDefinitionFile.get().getParent().toString(), "studyResult.bib"), true))
.executeWith(taskExecutor);
}

/**
* @return Path of current panel database directory or the standard working directory
*/
private Path getInitialDirectory(Path standardWorkingDirectory) {
if (frame.getBasePanelCount() == 0) {
return standardWorkingDirectory;
} else {
Optional<Path> databasePath = frame.getCurrentLibraryTab().getBibDatabaseContext().getDatabasePath();
return databasePath.map(Path::getParent).orElse(standardWorkingDirectory);
}
}
}
1 change: 1 addition & 0 deletions src/main/java/org/jabref/gui/actions/StandardActions.java
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ public enum StandardActions implements Action {
PARSE_LATEX(Localization.lang("Search for citations in LaTeX files..."), IconTheme.JabRefIcons.LATEX_CITATIONS),
NEW_SUB_LIBRARY_FROM_AUX(Localization.lang("New sublibrary based on AUX file") + "...", Localization.lang("New BibTeX sublibrary") + Localization.lang("This feature generates a new library based on which entries are needed in an existing LaTeX document."), IconTheme.JabRefIcons.NEW),
WRITE_XMP(Localization.lang("Write XMP metadata to PDFs"), Localization.lang("Will write XMP metadata to the PDFs linked from selected entries."), KeyBinding.WRITE_XMP),
START_SYSTEMATIC_LITERATURE_REVIEW(Localization.lang("Start systematic literature review")),
OPEN_DATABASE_FOLDER(Localization.lang("Reveal in file explorer")),
OPEN_FOLDER(Localization.lang("Open folder"), Localization.lang("Open folder"), KeyBinding.OPEN_FOLDER),
OPEN_FILE(Localization.lang("Open file"), Localization.lang("Open file"), IconTheme.JabRefIcons.FILE, KeyBinding.OPEN_FILE),
Expand Down
52 changes: 52 additions & 0 deletions src/main/java/org/jabref/logic/crawler/Crawler.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package org.jabref.logic.crawler;

import java.io.IOException;
import java.nio.file.Path;
import java.util.List;

import org.jabref.logic.crawler.git.GitHandler;
import org.jabref.logic.exporter.SavePreferences;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParseException;
import org.jabref.model.entry.BibEntryTypesManager;
import org.jabref.model.study.QueryResult;
import org.jabref.model.study.Study;
import org.jabref.model.util.FileUpdateMonitor;

import org.eclipse.jgit.api.errors.GitAPIException;

/**
* This class provides a service for SLR support by conducting an automated search and persistance
* of studies using the queries and E-Libraries specified in the provided study definition file.
*
* It composes a StudyRepository for repository management,
* and a StudyFetcher that manages the crawling over the selected E-Libraries.
*/
public class Crawler {
private final StudyRepository studyRepository;
private final StudyFetcher studyFetcher;

/**
* Creates a crawler for retrieving studies from E-Libraries
*
* @param studyDefinitionFile The path to the study definition file that contains the list of targeted E-Libraries and used cross-library queries
*/
public Crawler(Path studyDefinitionFile, FileUpdateMonitor fileUpdateMonitor, ImportFormatPreferences importFormatPreferences, SavePreferences savePreferences, BibEntryTypesManager bibEntryTypesManager) throws IllegalArgumentException, IOException, ParseException, GitAPIException {
Path studyRepositoryRoot = studyDefinitionFile.getParent();
studyRepository = new StudyRepository(studyRepositoryRoot, new GitHandler(studyRepositoryRoot), importFormatPreferences, fileUpdateMonitor, savePreferences, bibEntryTypesManager);
Study study = studyRepository.getStudy();
LibraryEntryToFetcherConverter libraryEntryToFetcherConverter = new LibraryEntryToFetcherConverter(study.getActiveLibraryEntries(), importFormatPreferences);
this.studyFetcher = new StudyFetcher(libraryEntryToFetcherConverter.getActiveFetchers(), study.getSearchQueryStrings());
}

/**
* This methods performs the crawling of the active libraries defined in the study definition file.
* This method also persists the results in the same folder the study definition file is stored in.
*
* @throws IOException Thrown if a problem occurred during the persistence of the result.
*/
public void performCrawl() throws IOException, GitAPIException {
List<QueryResult> results = studyFetcher.crawl();
studyRepository.persist(results);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package org.jabref.logic.crawler;

import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;

import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.SearchBasedFetcher;
import org.jabref.logic.importer.WebFetchers;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.UnknownField;

import static org.jabref.model.entry.types.SystematicLiteratureReviewStudyEntryType.LIBRARY_ENTRY;

/**
* Converts library entries from the given study into their corresponding fetchers.
*/
class LibraryEntryToFetcherConverter {
private final List<BibEntry> libraryEntries;
private final ImportFormatPreferences importFormatPreferences;

public LibraryEntryToFetcherConverter(List<BibEntry> libraryEntries, ImportFormatPreferences importFormatPreferences) {
this.libraryEntries = libraryEntries;
this.importFormatPreferences = importFormatPreferences;
}

/**
* Returns a list of instances of all active library fetchers.
*
* A fetcher is considered active if there exists an library entry of the library the fetcher is associated with that is enabled.
*
* @return Instances of all active fetchers defined in the study definition.
*/
public List<SearchBasedFetcher> getActiveFetchers() {
return getFetchersFromLibraryEntries(this.libraryEntries);
}

/**
* Transforms a list of libraryEntries into a list of SearchBasedFetcher instances.
*
* @param libraryEntries List of entries
* @return List of fetcher instances
*/
private List<SearchBasedFetcher> getFetchersFromLibraryEntries(List<BibEntry> libraryEntries) {
return libraryEntries.parallelStream()
.filter(bibEntry -> bibEntry.getType().getName().equals(LIBRARY_ENTRY.getName()))
.map(this::createFetcherFromLibraryEntry)
.filter(Objects::nonNull)
.collect(Collectors.toList());
}

/**
* Transforms a library entry into a SearchBasedFetcher instance. This only works if the library entry specifies a supported fetcher.
*
* @param libraryEntry the entry that will be converted
* @return An instance of the fetcher defined by the library entry.
*/
private SearchBasedFetcher createFetcherFromLibraryEntry(BibEntry libraryEntry) {
Set<SearchBasedFetcher> searchBasedFetchers = WebFetchers.getSearchBasedFetchers(importFormatPreferences);
String libraryNameFromFetcher = libraryEntry.getField(new UnknownField("name")).orElse("");
return searchBasedFetchers.stream()
.filter(searchBasedFetcher -> searchBasedFetcher.getName().toLowerCase().equals(libraryNameFromFetcher.toLowerCase()))
.findAny()
.orElse(null);
}
}
80 changes: 80 additions & 0 deletions src/main/java/org/jabref/logic/crawler/StudyFetcher.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package org.jabref.logic.crawler;

import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.PagedSearchBasedFetcher;
import org.jabref.logic.importer.SearchBasedFetcher;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.study.FetchResult;
import org.jabref.model.study.QueryResult;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Delegates the search of the provided set of targeted E-Libraries with the provided queries to the E-Library specific fetchers,
* and aggregates the results returned by the fetchers by query and E-Library.
*/
class StudyFetcher {
private static final Logger LOGGER = LoggerFactory.getLogger(StudyFetcher.class);
private static final int MAX_AMOUNT_OF_RESULTS_PER_FETCHER = 100;

private final List<SearchBasedFetcher> activeFetchers;
private final List<String> searchQueries;

StudyFetcher(List<SearchBasedFetcher> activeFetchers, List<String> searchQueries) throws IllegalArgumentException {
this.searchQueries = searchQueries;
this.activeFetchers = activeFetchers;
}

/**
* Each Map Entry contains the results for one search term for all libraries.
* Each entry of the internal map contains the results for a given library.
* If any library API is not available, its corresponding entry is missing from the internal map.
*/
public List<QueryResult> crawl() {
return searchQueries.parallelStream()
.map(this::getQueryResult)
.collect(Collectors.toList());
}

private QueryResult getQueryResult(String searchQuery) {
return new QueryResult(searchQuery, performSearchOnQuery(searchQuery));
}

/**
* Queries all Databases on the given searchQuery.
*
* @param searchQuery The query the search is performed for.
* @return Mapping of each fetcher by name and all their retrieved publications as a BibDatabase
*/
private List<FetchResult> performSearchOnQuery(String searchQuery) {
return activeFetchers.parallelStream()
.map(fetcher -> performSearchOnQueryForFetcher(searchQuery, fetcher))
.filter(Objects::nonNull)
.collect(Collectors.toList());
}

private FetchResult performSearchOnQueryForFetcher(String searchQuery, SearchBasedFetcher fetcher) {
try {
List<BibEntry> fetchResult = new ArrayList<>();
if (fetcher instanceof PagedSearchBasedFetcher) {
int pages = ((int) Math.ceil(((double) MAX_AMOUNT_OF_RESULTS_PER_FETCHER) / ((PagedSearchBasedFetcher) fetcher).getPageSize()));
for (int page = 0; page < pages; page++) {
fetchResult.addAll(((PagedSearchBasedFetcher) fetcher).performSearchPaged(searchQuery, page).getContent());
}
} else {
fetchResult = fetcher.performSearch(searchQuery);
}
return new FetchResult(fetcher.getName(), new BibDatabase(fetchResult));
} catch (FetcherException e) {
LOGGER.warn(String.format("%s API request failed", fetcher.getName()), e);
return null;
}
}
}
Loading

0 comments on commit b19c3e4

Please sign in to comment.