Skip to content

Commit

Permalink
Search in PDF Files (#2838)
Browse files Browse the repository at this point in the history
Co-authored-by: braunch <[email protected]>
Co-authored-by: Oliver Kopp <[email protected]>
Co-authored-by: Benedikt Tutzer <[email protected]>
Co-authored-by: Carl Christian Snethlage <[email protected]>
Co-authored-by: Christoph <[email protected]>
  • Loading branch information
6 people authored Jul 14, 2021
1 parent 477118c commit ddce573
Show file tree
Hide file tree
Showing 65 changed files with 1,894 additions and 291 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve

### Added

- We added a fulltext search feature. [#2838](https://github.com/JabRef/jabref/pull/2838)

### Changed

### Fixed
Expand Down
6 changes: 2 additions & 4 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,6 @@ dependencies {
antlr4 'org.antlr:antlr4:4.9.2'
implementation 'org.antlr:antlr4-runtime:4.9.2'

implementation (group: 'org.apache.lucene', name: 'lucene-queryparser', version: '8.9.0') {
exclude group: 'org.apache.lucene', module: 'lucene-sandbox'
}

implementation group: 'org.eclipse.jgit', name: 'org.eclipse.jgit', version: '5.12.0.202106070339-r'

implementation group: 'com.fasterxml.jackson.dataformat', name: 'jackson-dataformat-yaml', version: '2.12.4'
Expand Down Expand Up @@ -209,6 +205,8 @@ dependencies {
implementation 'com.vladsch.flexmark:flexmark-ext-gfm-strikethrough:0.62.2'
implementation 'com.vladsch.flexmark:flexmark-ext-gfm-tasklist:0.62.2'

implementation group: 'net.harawata', name: 'appdirs', version: '1.2.1'

testImplementation 'io.github.classgraph:classgraph:4.8.110'
testImplementation 'org.junit.jupiter:junit-jupiter:5.7.2'
testRuntimeOnly 'org.junit.vintage:junit-vintage-engine:5.7.2'
Expand Down
5 changes: 4 additions & 1 deletion external-libraries.md
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ License: Apache-2.0
```
```yaml
Id: org.apache.lucene:lucene-ueryparser
Id: org.apache.lucene:lucene-queryparser
Project: Apache Lucene
URL: https://lucene.apache.org/
License: Apache-2.0
Expand Down Expand Up @@ -567,6 +567,9 @@ org.apache.logging.log4j:log4j-slf4j18-impl:3.0.0-SNAPSHOT
org.apache.lucene:lucene-core:8.9.0
org.apache.lucene:lucene-queries:8.9.0
org.apache.lucene:lucene-queryparser:8.9.0
org.apache.lucene:lucene-analyzers-common:8.9.0
org.apache.lucene:lucene-backward-codecs:8.9.0
org.apache.lucene:lucene-highlighter:8.9.0
org.apache.pdfbox:fontbox:2.0.24
org.apache.pdfbox:pdfbox:2.0.24
org.apache.pdfbox:xmpbox:2.0.24
Expand Down
Binary file added lib/lucene.jar
Binary file not shown.
23 changes: 23 additions & 0 deletions lucene-jar/lib/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
plugins {
id 'java-library'
id 'com.github.johnrengelman.shadow' version '7.0.0'
}

repositories {
mavenCentral()
}

shadowJar {
mergeServiceFiles()
}

dependencies {
implementation 'org.apache.lucene:lucene-core:8.9.0'
implementation ('org.apache.lucene:lucene-queryparser:8.9.0') {
exclude module: "lucene-sandbox"
}
implementation 'org.apache.lucene:lucene-queries:8.9.0'
implementation 'org.apache.lucene:lucene-analyzers-common:8.9.0'
implementation 'org.apache.lucene:lucene-backward-codecs:8.9.0'
implementation 'org.apache.lucene:lucene-highlighter:8.9.0'
}
11 changes: 11 additions & 0 deletions lucene-jar/settings.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/*
* This file was generated by the Gradle 'init' task.
*
* The settings file is used to specify which projects to include in your build.
*
* Detailed information about configuring a multi-project build in Gradle can be found
* in the user manual at https://docs.gradle.org/7.0.2/userguide/multi_project_builds.html
*/

rootProject.name = 'lucene-jar'
include('lib')
6 changes: 4 additions & 2 deletions src/jmh/java/org/jabref/benchmarks/Benchmarks.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.EnumSet;
import java.util.List;
import java.util.Random;
import java.util.stream.Collectors;
Expand All @@ -28,6 +29,7 @@
import org.jabref.model.groups.KeywordGroup;
import org.jabref.model.groups.WordKeywordGroup;
import org.jabref.model.metadata.MetaData;
import org.jabref.model.search.rules.SearchRules.SearchFlags;
import org.jabref.model.util.DummyFileUpdateMonitor;
import org.jabref.preferences.JabRefPreferences;

Expand Down Expand Up @@ -93,14 +95,14 @@ public String write() throws Exception {
@Benchmark
public List<BibEntry> search() {
// FIXME: Reuse SearchWorker here
SearchQuery searchQuery = new SearchQuery("Journal Title 500", false, false);
SearchQuery searchQuery = new SearchQuery("Journal Title 500", EnumSet.noneOf(SearchFlags.class));
return database.getEntries().stream().filter(searchQuery::isMatch).collect(Collectors.toList());
}

@Benchmark
public List<BibEntry> parallelSearch() {
// FIXME: Reuse SearchWorker here
SearchQuery searchQuery = new SearchQuery("Journal Title 500", false, false);
SearchQuery searchQuery = new SearchQuery("Journal Title 500", EnumSet.noneOf(SearchFlags.class));
return database.getEntries().parallelStream().filter(searchQuery::isMatch).collect(Collectors.toList());
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,10 @@
requires flexmark.util.ast;
requires flexmark.util.data;
requires com.h2database.mvstore;
requires lucene.queryparser;
requires lucene.core;
requires lucene;
requires org.eclipse.jgit;
requires com.fasterxml.jackson.databind;
requires com.fasterxml.jackson.dataformat.yaml;
requires com.fasterxml.jackson.datatype.jsr310;
requires net.harawata.appdirs;
}
3 changes: 1 addition & 2 deletions src/main/java/org/jabref/cli/ArgumentProcessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,7 @@ private boolean exportMatches(List<ParserResult> loaded) {
BibDatabase dataBase = pr.getDatabase();

SearchPreferences searchPreferences = Globals.prefs.getSearchPreferences();
SearchQuery query = new SearchQuery(searchTerm, searchPreferences.isCaseSensitive(),
searchPreferences.isRegularExpression());
SearchQuery query = new SearchQuery(searchTerm, searchPreferences.getSearchFlags());
List<BibEntry> matches = new DatabaseSearcher(query, dataBase).getMatches();

// export matches
Expand Down
7 changes: 6 additions & 1 deletion src/main/java/org/jabref/gui/JabRefFrame.java
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
import org.jabref.gui.push.PushToApplicationAction;
import org.jabref.gui.push.PushToApplicationsManager;
import org.jabref.gui.search.GlobalSearchBar;
import org.jabref.gui.search.RebuildFulltextSearchIndexAction;
import org.jabref.gui.shared.ConnectToSharedDatabaseCommand;
import org.jabref.gui.shared.PullChangesFromSharedAction;
import org.jabref.gui.slr.ExistingStudySearchAction;
Expand Down Expand Up @@ -819,7 +820,11 @@ private MenuBar createMenu() {
pushToApplicationMenuItem,
new SeparatorMenuItem(),
factory.createMenuItem(StandardActions.START_NEW_STUDY, new StartNewStudyAction(this, Globals.getFileUpdateMonitor(), Globals.TASK_EXECUTOR, prefs)),
factory.createMenuItem(StandardActions.SEARCH_FOR_EXISTING_STUDY, new ExistingStudySearchAction(this, Globals.getFileUpdateMonitor(), Globals.TASK_EXECUTOR, prefs))
factory.createMenuItem(StandardActions.SEARCH_FOR_EXISTING_STUDY, new ExistingStudySearchAction(this, Globals.getFileUpdateMonitor(), Globals.TASK_EXECUTOR, prefs)),

new SeparatorMenuItem(),

factory.createMenuItem(StandardActions.REBUILD_FULLTEXT_SEARCH_INDEX, new RebuildFulltextSearchIndexAction(stateManager, this::getCurrentLibraryTab, dialogService, prefs.getFilePreferences()))
);

SidePaneComponent webSearch = sidePaneManager.getComponent(SidePaneType.WEB_SEARCH);
Expand Down
29 changes: 29 additions & 0 deletions src/main/java/org/jabref/gui/JabRefMain.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
package org.jabref.gui;

import java.io.File;
import java.io.IOException;
import java.net.Authenticator;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Comparator;

import javafx.application.Application;
import javafx.application.Platform;
Expand All @@ -20,6 +26,7 @@
import org.jabref.logic.remote.client.RemoteClient;
import org.jabref.logic.util.OS;
import org.jabref.migrations.PreferencesMigrations;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.database.BibDatabaseMode;
import org.jabref.preferences.JabRefPreferences;
import org.jabref.preferences.PreferencesService;
Expand Down Expand Up @@ -59,6 +66,8 @@ public void start(Stage mainStage) {

applyPreferences(preferences);

clearOldSearchIndices();

try {
// Process arguments
ArgumentProcessor argumentProcessor = new ArgumentProcessor(arguments, ArgumentProcessor.Mode.INITIAL_START);
Expand Down Expand Up @@ -139,4 +148,24 @@ private static void configureProxy(ProxyPreferences proxyPreferences) {
Authenticator.setDefault(new ProxyAuthenticator());
}
}

private static void clearOldSearchIndices() {
Path currentIndexPath = BibDatabaseContext.getFulltextIndexBasePath();
Path appData = currentIndexPath.getParent();

try (DirectoryStream<Path> stream = Files.newDirectoryStream(appData)) {
for (Path path : stream) {
if (Files.isDirectory(path) && !path.equals(currentIndexPath)) {
LOGGER.info("Deleting out-of-date fulltext search index at {}.", path);
Files.walk(path)
.sorted(Comparator.reverseOrder())
.map(Path::toFile)
.forEach(File::delete);

}
}
} catch (IOException e) {
LOGGER.error("Could not access app-directory at {}", appData, e);
}
}
}
71 changes: 71 additions & 0 deletions src/main/java/org/jabref/gui/LibraryTab.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.jabref.gui;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
Expand Down Expand Up @@ -43,8 +44,11 @@
import org.jabref.logic.autosaveandbackup.BackupManager;
import org.jabref.logic.citationstyle.CitationStyleCache;
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.importer.util.FileFieldParser;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.pdf.FileAnnotationCache;
import org.jabref.logic.pdf.search.indexing.IndexingTaskManager;
import org.jabref.logic.pdf.search.indexing.PdfIndexer;
import org.jabref.logic.search.SearchQuery;
import org.jabref.logic.shared.DatabaseLocation;
import org.jabref.logic.util.UpdateField;
Expand All @@ -56,10 +60,13 @@
import org.jabref.model.database.event.EntriesAddedEvent;
import org.jabref.model.database.event.EntriesRemovedEvent;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.LinkedFile;
import org.jabref.model.entry.event.EntriesEventSource;
import org.jabref.model.entry.event.EntryChangedEvent;
import org.jabref.model.entry.event.FieldChangedEvent;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.FieldFactory;
import org.jabref.model.entry.field.StandardField;
import org.jabref.preferences.PreferencesService;

import com.google.common.eventbus.Subscribe;
Expand Down Expand Up @@ -101,6 +108,8 @@ public class LibraryTab extends Tab {
// initializing it so we prevent NullPointerException
private BackgroundTask<ParserResult> dataLoadingTask = BackgroundTask.wrap(() -> null);

private IndexingTaskManager indexingTaskManager = new IndexingTaskManager(Globals.TASK_EXECUTOR);

public LibraryTab(JabRefFrame frame,
PreferencesService preferencesService,
BibDatabaseContext bibDatabaseContext,
Expand All @@ -125,6 +134,7 @@ public LibraryTab(JabRefFrame frame,
setupAutoCompletion();

this.getDatabase().registerListener(new SearchListener());
this.getDatabase().registerListener(new IndexUpdateListener());
this.getDatabase().registerListener(new EntriesRemovedListener());

// ensure that at each addition of a new entry, the entry is added to the groups interface
Expand Down Expand Up @@ -332,6 +342,8 @@ public void updateTabTitle(boolean isChanged) {
textProperty().setValue(tabTitle.toString());
setTooltip(new Tooltip(toolTipText.toString()));
});

indexingTaskManager.updateDatabaseName(tabTitle.toString());
}

private List<String> collectAllDatabasePaths() {
Expand Down Expand Up @@ -846,4 +858,63 @@ public void listen(EntriesRemovedEvent removedEntriesEvent) {
DefaultTaskExecutor.runInJavaFXThread(() -> frame.getGlobalSearchBar().performSearch());
}
}

private class IndexUpdateListener {

public IndexUpdateListener() {
try {
indexingTaskManager.addToIndex(PdfIndexer.of(bibDatabaseContext, preferencesService.getFilePreferences()), bibDatabaseContext);
} catch (IOException e) {
LOGGER.error("Cannot access lucene index", e);
}
}

@Subscribe
public void listen(EntriesAddedEvent addedEntryEvent) {
try {
PdfIndexer pdfIndexer = PdfIndexer.of(bibDatabaseContext, preferencesService.getFilePreferences());
for (BibEntry addedEntry : addedEntryEvent.getBibEntries()) {
indexingTaskManager.addToIndex(pdfIndexer, addedEntry, bibDatabaseContext);
}
} catch (IOException e) {
LOGGER.error("Cannot access lucene index", e);
}
}

@Subscribe
public void listen(EntriesRemovedEvent removedEntriesEvent) {
try {
PdfIndexer pdfIndexer = PdfIndexer.of(bibDatabaseContext, preferencesService.getFilePreferences());
for (BibEntry removedEntry : removedEntriesEvent.getBibEntries()) {
indexingTaskManager.removeFromIndex(pdfIndexer, removedEntry);
}
} catch (IOException e) {
LOGGER.error("Cannot access lucene index", e);
}
}

@Subscribe
public void listen(FieldChangedEvent fieldChangedEvent) {
if (fieldChangedEvent.getField().equals(StandardField.FILE)) {
List<LinkedFile> oldFileList = FileFieldParser.parse(fieldChangedEvent.getOldValue());
List<LinkedFile> newFileList = FileFieldParser.parse(fieldChangedEvent.getNewValue());

List<LinkedFile> addedFiles = new ArrayList<>(newFileList);
addedFiles.remove(oldFileList);
List<LinkedFile> removedFiles = new ArrayList<>(oldFileList);
removedFiles.remove(newFileList);

try {
indexingTaskManager.addToIndex(PdfIndexer.of(bibDatabaseContext, preferencesService.getFilePreferences()), fieldChangedEvent.getBibEntry(), addedFiles, bibDatabaseContext);
indexingTaskManager.removeFromIndex(PdfIndexer.of(bibDatabaseContext, preferencesService.getFilePreferences()), fieldChangedEvent.getBibEntry(), removedFiles);
} catch (IOException e) {
LOGGER.warn("I/O error when writing lucene index", e);
}
}
}
}

public IndexingTaskManager getIndexingTaskManager() {
return indexingTaskManager;
}
}
1 change: 1 addition & 0 deletions src/main/java/org/jabref/gui/actions/StandardActions.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ public enum StandardActions implements Action {
DELETE(Localization.lang("Delete"), IconTheme.JabRefIcons.DELETE_ENTRY),
DELETE_ENTRY(Localization.lang("Delete Entry"), IconTheme.JabRefIcons.DELETE_ENTRY, KeyBinding.DELETE_ENTRY),
SEND_AS_EMAIL(Localization.lang("Send as email"), IconTheme.JabRefIcons.EMAIL),
REBUILD_FULLTEXT_SEARCH_INDEX(Localization.lang("Rebuild fulltext search index"), IconTheme.JabRefIcons.FILE),
OPEN_EXTERNAL_FILE(Localization.lang("Open file"), IconTheme.JabRefIcons.FILE, KeyBinding.OPEN_FILE),
OPEN_URL(Localization.lang("Open URL or DOI"), IconTheme.JabRefIcons.WWW, KeyBinding.OPEN_URL_OR_DOI),
SEARCH_SHORTSCIENCE(Localization.lang("Search ShortScience")),
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/org/jabref/gui/entryeditor/EntryEditor.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.jabref.gui.StateManager;
import org.jabref.gui.citationkeypattern.GenerateCitationKeySingleAction;
import org.jabref.gui.entryeditor.fileannotationtab.FileAnnotationTab;
import org.jabref.gui.entryeditor.fileannotationtab.FulltextSearchResultsTab;
import org.jabref.gui.externalfiles.ExternalFilesEntryLinker;
import org.jabref.gui.externalfiletype.ExternalFileTypes;
import org.jabref.gui.help.HelpAction;
Expand Down Expand Up @@ -267,6 +268,8 @@ private List<EntryEditorTab> createTabs() {
// LaTeX citations tab
entryEditorTabs.add(new LatexCitationsTab(databaseContext, preferencesService, taskExecutor, dialogService));

entryEditorTabs.add(new FulltextSearchResultsTab(stateManager, preferencesService.getTheme(), preferencesService.getFilePreferences()));

return entryEditorTabs;
}

Expand Down
Loading

0 comments on commit ddce573

Please sign in to comment.