Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix lucene query #12100

Merged
merged 10 commits into from
Oct 27, 2024
7 changes: 5 additions & 2 deletions src/main/java/org/jabref/gui/entryeditor/SourceTab.java
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,16 @@ public SourceTab(BibDatabaseContext bibDatabaseContext,
}

private void highlightSearchPattern() {
if (codeArea == null || searchQueryProperty.get().isEmpty()) {
if (codeArea == null) {
return;
}

codeArea.setStyleClass(0, codeArea.getLength(), TEXT_STYLE);
Map<Optional<Field>, List<String>> searchTermsMap = Highlighter.groupTermsByField(searchQueryProperty.get().get());
if (searchQueryProperty.get().isEmpty()) {
return;
}

Map<Optional<Field>, List<String>> searchTermsMap = Highlighter.groupTermsByField(searchQueryProperty.get().get());
searchTermsMap.forEach((optionalField, terms) -> {
Optional<String> searchPattern = Highlighter.buildSearchPattern(terms);
if (searchPattern.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,6 @@ protected void bindToEntry(BibEntry entry) {
if (entry == null || !shouldShow(entry)) {
return;
}
if (documentViewerView == null) {
documentViewerView = new DocumentViewerView();
}
this.entry = entry;
content.getChildren().clear();

Expand Down Expand Up @@ -159,6 +156,9 @@ private Text createPageLink(LinkedFile linkedFile, int pageNumber) {

pageLink.setOnMouseClicked(event -> {
if (MouseButton.PRIMARY == event.getButton()) {
if (documentViewerView == null) {
documentViewerView = new DocumentViewerView();
}
documentViewerView.switchToFile(linkedFile);
documentViewerView.gotoPage(pageNumber);
documentViewerView.disableLiveMode();
Expand Down
1 change: 1 addition & 0 deletions src/main/java/org/jabref/logic/search/PostgreServer.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public PostgreServer() {
EmbeddedPostgres embeddedPostgres;
try {
embeddedPostgres = EmbeddedPostgres.builder()
.setOutputRedirector(ProcessBuilder.Redirect.DISCARD)
.start();
LOGGER.info("Postgres server started, connection port: {}", embeddedPostgres.getPort());
} catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.search.PostgreConstants;

import io.github.thibaultmeyer.cuid.CUID;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -53,7 +54,7 @@ public BibFieldsIndexer(BibEntryPreferences bibEntryPreferences, BibDatabaseCont
this.keywordSeparator = bibEntryPreferences.getKeywordSeparator();
this.libraryName = databaseContext.getDatabasePath().map(path -> path.getFileName().toString()).orElse("unsaved");

this.mainTable = databaseContext.getUniqueName();
this.mainTable = CUID.randomCUID2(12).toString();
this.splitValuesTable = mainTable + SPLIT_TABLE_SUFFIX;

this.schemaMainTableReference = PostgreConstants.getMainTableSchemaReference(mainTable);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import org.jabref.model.search.query.SqlQueryNode;
import org.jabref.search.SearchParser;

import org.apache.lucene.search.Query;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -24,9 +23,9 @@ public static String flagsToSearchExpression(SearchQuery searchQuery) {
return new SearchFlagsToExpressionVisitor(searchQuery.getSearchFlags()).visit(searchQuery.getContext());
}

public static Query searchToLucene(SearchQuery searchQuery) {
public static String searchToLucene(SearchQuery searchQuery) {
Comment on lines -27 to +26
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We loos strong typing, but I think, the conversion is easier now.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, because using the Analyzer here removes words like "a" and "the", which can lead to empty queries with null. This also made testing a bit confusing. For example, if the search term was image, I had to know its stem (imag) for the tests to pass.

LOGGER.debug("Converting search expression to Lucene: {}", searchQuery.getSearchExpression());
return new SearchToLuceneVisitor().visit(searchQuery.getContext());
return new SearchToLuceneVisitor(searchQuery.getSearchFlags()).visit(searchQuery.getContext());
}

public static List<SearchQueryNode> extractSearchTerms(SearchQuery searchQuery) {
Expand Down
169 changes: 74 additions & 95 deletions src/main/java/org/jabref/logic/search/query/SearchToLuceneVisitor.java
Original file line number Diff line number Diff line change
@@ -1,152 +1,131 @@
package org.jabref.logic.search.query;

import java.util.EnumSet;
import java.util.List;
import java.util.Locale;

import org.jabref.model.search.LinkedFilesConstants;
import org.jabref.model.search.SearchFlags;
import org.jabref.search.SearchBaseVisitor;
import org.jabref.search.SearchParser;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.QueryBuilder;
import org.apache.lucene.queryparser.classic.QueryParser;

/**
* Tests are located in {@link org.jabref.logic.search.query.SearchQueryLuceneConversionTest}.
*/
public class SearchToLuceneVisitor extends SearchBaseVisitor<Query> {
public class SearchToLuceneVisitor extends SearchBaseVisitor<String> {
private final EnumSet<SearchFlags> searchFlags;

private static final List<String> SEARCH_FIELDS = LinkedFilesConstants.PDF_FIELDS;

private final QueryBuilder queryBuilder;

public SearchToLuceneVisitor() {
this.queryBuilder = new QueryBuilder(LinkedFilesConstants.LINKED_FILES_ANALYZER);
public SearchToLuceneVisitor(EnumSet<SearchFlags> searchFlags) {
this.searchFlags = searchFlags;
}

@Override
public Query visitStart(SearchParser.StartContext ctx) {
public String visitStart(SearchParser.StartContext ctx) {
return visit(ctx.andExpression());
}

@Override
public Query visitImplicitAndExpression(SearchParser.ImplicitAndExpressionContext ctx) {
List<Query> children = ctx.expression().stream().map(this::visit).toList();
if (children.size() == 1) {
return children.getFirst();
}
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (Query child : children) {
builder.add(child, BooleanClause.Occur.MUST);
}
return builder.build();
public String visitImplicitAndExpression(SearchParser.ImplicitAndExpressionContext ctx) {
List<String> children = ctx.expression().stream().map(this::visit).toList();
return children.size() == 1 ? children.getFirst() : String.join(" ", children);
}

@Override
public Query visitParenExpression(SearchParser.ParenExpressionContext ctx) {
return visit(ctx.andExpression());
public String visitParenExpression(SearchParser.ParenExpressionContext ctx) {
String expr = visit(ctx.andExpression());
return expr.isEmpty() ? "" : "(" + expr + ")";
}

@Override
public Query visitNegatedExpression(SearchParser.NegatedExpressionContext ctx) {
Query innerQuery = visit(ctx.expression());
if (innerQuery instanceof MatchNoDocsQuery) {
return innerQuery;
}
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(innerQuery, BooleanClause.Occur.MUST_NOT);
return builder.build();
public String visitNegatedExpression(SearchParser.NegatedExpressionContext ctx) {
return "NOT (" + visit(ctx.expression()) + ")";
}

@Override
public Query visitBinaryExpression(SearchParser.BinaryExpressionContext ctx) {
Query left = visit(ctx.left);
Query right = visit(ctx.right);
public String visitBinaryExpression(SearchParser.BinaryExpressionContext ctx) {
String left = visit(ctx.left);
String right = visit(ctx.right);

if (left instanceof MatchNoDocsQuery) {
if (left.isEmpty() && right.isEmpty()) {
return "";
}
if (left.isEmpty()) {
return right;
}
if (right instanceof MatchNoDocsQuery) {
if (right.isEmpty()) {
return left;
}

BooleanQuery.Builder builder = new BooleanQuery.Builder();
String operator = ctx.bin_op.getType() == SearchParser.AND ? " AND " : " OR ";
return left + operator + right;
}

@Override
public String visitComparison(SearchParser.ComparisonContext ctx) {
String term = SearchQueryConversion.unescapeSearchValue(ctx.searchValue());
boolean isQuoted = ctx.searchValue().getStart().getType() == SearchParser.STRING_LITERAL;

// unfielded expression
if (ctx.FIELD() == null) {
if (searchFlags.contains(SearchFlags.REGULAR_EXPRESSION)) {
return "/" + term + "/";
}
return isQuoted ? "\"" + escapeQuotes(term) + "\"" : QueryParser.escape(term);
}

if (ctx.bin_op.getType() == SearchParser.AND) {
builder.add(left, BooleanClause.Occur.MUST);
builder.add(right, BooleanClause.Occur.MUST);
} else if (ctx.bin_op.getType() == SearchParser.OR) {
builder.add(left, BooleanClause.Occur.SHOULD);
builder.add(right, BooleanClause.Occur.SHOULD);
String field = ctx.FIELD().getText().toLowerCase(Locale.ROOT);
if (!isValidField(field)) {
return "";
}

return builder.build();
field = "any".equals(field) || "anyfield".equals(field) ? "" : field + ":";
int operator = ctx.operator().getStart().getType();
return buildFieldExpression(field, term, operator, isQuoted);
}

@Override
public Query visitComparisonExpression(SearchParser.ComparisonExpressionContext ctx) {
return visit(ctx.comparison());
private boolean isValidField(String field) {
return "any".equals(field) || "anyfield".equals(field) || LinkedFilesConstants.PDF_FIELDS.contains(field);
}

@Override
public Query visitComparison(SearchParser.ComparisonContext ctx) {
String field = ctx.FIELD() == null ? null : ctx.FIELD().getText();
String term = SearchQueryConversion.unescapeSearchValue(ctx.searchValue());
private String buildFieldExpression(String field, String term, int operator, boolean isQuoted) {
boolean isRegexOp = isRegexOperator(operator);
boolean isNegationOp = isNegationOperator(operator);

// unfielded expression
if (field == null || "anyfield".equals(field) || "any".equals(field)) {
return createMultiFieldQuery(term, ctx.operator());
} else if (SEARCH_FIELDS.contains(field)) {
return createFieldQuery(field, term, ctx.operator());
if (isRegexOp) {
String expression = field + "/" + term + "/";
return isNegationOp ? "NOT " + expression : expression;
} else {
return new MatchNoDocsQuery();
term = isQuoted ? "\"" + escapeQuotes(term) + "\"" : QueryParser.escape(term);
String expression = field + term;
return isNegationOp ? "NOT " + expression : expression;
}
}

private Query createMultiFieldQuery(String value, SearchParser.OperatorContext operator) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (String field : SEARCH_FIELDS) {
builder.add(createFieldQuery(field, value, operator), BooleanClause.Occur.SHOULD);
}
return builder.build();
private static String escapeQuotes(String term) {
return term.replace("\"", "\\\"");
}

private Query createFieldQuery(String field, String value, SearchParser.OperatorContext operator) {
if (operator == null) {
return createTermOrPhraseQuery(field, value);
}

return switch (operator.getStart().getType()) {
case SearchParser.REQUAL,
SearchParser.CREEQUAL ->
new RegexpQuery(new Term(field, value));
private static boolean isNegationOperator(int operator) {
return switch (operator) {
case SearchParser.NEQUAL,
SearchParser.NCEQUAL,
SearchParser.NEEQUAL,
SearchParser.NCEEQUAL ->
createNegatedQuery(createTermOrPhraseQuery(field, value));
case SearchParser.NREQUAL,
SearchParser.NCREEQUAL ->
createNegatedQuery(new RegexpQuery(new Term(field, value)));
default ->
createTermOrPhraseQuery(field, value);
SearchParser.NCEEQUAL,
SearchParser.NREQUAL,
SearchParser.NCREEQUAL -> true;
default -> false;
};
}

private Query createNegatedQuery(Query query) {
BooleanQuery.Builder negatedQuery = new BooleanQuery.Builder();
negatedQuery.add(query, BooleanClause.Occur.MUST_NOT);
return negatedQuery.build();
}

private Query createTermOrPhraseQuery(String field, String value) {
if (value.contains("*") || value.contains("?")) {
return new TermQuery(new Term(field, value));
}
return queryBuilder.createPhraseQuery(field, value);
private static boolean isRegexOperator(int operator) {
return switch (operator) {
case SearchParser.REQUAL,
SearchParser.CREEQUAL,
SearchParser.NREQUAL,
SearchParser.NCREEQUAL -> true;
default -> false;
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@

import org.apache.lucene.document.Document;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
Expand All @@ -39,29 +42,36 @@ public final class LinkedFilesSearcher {
private final FilePreferences filePreferences;
private final BibDatabaseContext databaseContext;
private final SearcherManager searcherManager;
private final MultiFieldQueryParser parser;

public LinkedFilesSearcher(BibDatabaseContext databaseContext, LuceneIndexer linkedFilesIndexer, FilePreferences filePreferences) {
this.searcherManager = linkedFilesIndexer.getSearcherManager();
this.databaseContext = databaseContext;
this.filePreferences = filePreferences;
this.parser = new MultiFieldQueryParser(LinkedFilesConstants.PDF_FIELDS.toArray(new String[0]), LinkedFilesConstants.LINKED_FILES_ANALYZER);
parser.setDefaultOperator(QueryParser.Operator.AND);
}

public SearchResults search(SearchQuery searchQuery) {
if (!searchQuery.isValid()) {
return new SearchResults();
}

Query luceneQuery = SearchQueryConversion.searchToLucene(searchQuery);
Optional<Query> luceneQuery = getLuceneQuery(searchQuery);
if (luceneQuery.isEmpty()) {
return new SearchResults();
}

EnumSet<SearchFlags> searchFlags = searchQuery.getSearchFlags();
boolean shouldSearchInLinkedFiles = searchFlags.contains(SearchFlags.FULLTEXT) && filePreferences.shouldFulltextIndexLinkedFiles();
if (!shouldSearchInLinkedFiles) {
return new SearchResults();
}

LOGGER.debug("Searching in linked files with query: {}", luceneQuery);
LOGGER.debug("Searching in linked files with query: {}", luceneQuery.get());
try {
IndexSearcher linkedFilesIndexSearcher = acquireIndexSearcher(searcherManager);
SearchResults searchResults = search(linkedFilesIndexSearcher, luceneQuery);
SearchResults searchResults = search(linkedFilesIndexSearcher, luceneQuery.get());
releaseIndexSearcher(searcherManager, linkedFilesIndexSearcher);
return searchResults;
} catch (IOException | IndexSearcher.TooManyClauses e) {
Expand All @@ -70,6 +80,16 @@ public SearchResults search(SearchQuery searchQuery) {
return new SearchResults();
}

private Optional<Query> getLuceneQuery(SearchQuery searchQuery) {
String query = SearchQueryConversion.searchToLucene(searchQuery);
try {
return Optional.of(parser.parse(query));
} catch (ParseException e) {
LOGGER.error("Error during query parsing", e);
return Optional.empty();
}
}

private SearchResults search(IndexSearcher indexSearcher, Query searchQuery) throws IOException {
TopDocs topDocs = indexSearcher.search(searchQuery, Integer.MAX_VALUE);
StoredFields storedFields = indexSearcher.storedFields();
Expand Down
Loading
Loading