Skip to content

Commit

Permalink
Feature/enable paginated fetchers (#7082)
Browse files Browse the repository at this point in the history
  • Loading branch information
DominikVoigt authored Nov 15, 2020
1 parent 3d5f35a commit 00e3409
Show file tree
Hide file tree
Showing 30 changed files with 381 additions and 300 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.jabref.gui.importer.fetcher;

import java.util.Optional;
import java.util.SortedSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand All @@ -23,10 +22,8 @@
import org.jabref.gui.util.BackgroundTask;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.importer.QueryParser;
import org.jabref.logic.importer.SearchBasedFetcher;
import org.jabref.logic.importer.WebFetchers;
import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.strings.StringUtil;
import org.jabref.preferences.JabRefPreferences;
Expand Down Expand Up @@ -109,15 +106,8 @@ public void search() {
SearchBasedFetcher activeFetcher = getSelectedFetcher();

BackgroundTask<ParserResult> task;
QueryParser queryParser = new QueryParser();
Optional<ComplexSearchQuery> generatedQuery = queryParser.parseQueryStringIntoComplexQuery(getQuery());
if (generatedQuery.isPresent()) {
task = BackgroundTask.wrap(() -> new ParserResult(activeFetcher.performComplexSearch(generatedQuery.get())))
.withInitialMessage(Localization.lang("Processing %0", getQuery()));
} else {
task = BackgroundTask.wrap(() -> new ParserResult(activeFetcher.performSearch(getQuery().trim())))
.withInitialMessage(Localization.lang("Processing %0", getQuery()));
}
task = BackgroundTask.wrap(() -> new ParserResult(activeFetcher.performSearch(getQuery().trim())))
.withInitialMessage(Localization.lang("Processing %0", getQuery().trim()));
task.onFailure(dialogService::showErrorDialogAndWait);

ImportEntriesDialog dialog = new ImportEntriesDialog(frame.getCurrentLibraryTab().getBibDatabaseContext(), task);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,52 @@
package org.jabref.logic.importer;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;

import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.paging.Page;

public interface PagedSearchBasedFetcher extends SearchBasedFetcher {

/**
* @param query search query send to endpoint
* @param pageNumber requested site number
* @param complexSearchQuery the complex query defining all fielded search parameters
* @param pageNumber requested site number indexed from 0
* @return Page with search results
*/
Page<BibEntry> performSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException;

/**
* @param complexSearchQuery query string that can be parsed into a complex search query
* @param pageNumber requested site number indexed from 0
* @return Page with search results
*/
Page<BibEntry> performSearchPaged(String query, int pageNumber) throws FetcherException;
default Page<BibEntry> performSearchPaged(String complexSearchQuery, int pageNumber) throws FetcherException {
if (complexSearchQuery.isBlank()) {
return new Page<>(complexSearchQuery, pageNumber, Collections.emptyList());
}
QueryParser queryParser = new QueryParser();
Optional<ComplexSearchQuery> generatedQuery = queryParser.parseQueryStringIntoComplexQuery(complexSearchQuery);
// Otherwise just use query as a default term
return this.performSearchPaged(generatedQuery.orElse(ComplexSearchQuery.builder().defaultFieldPhrase(complexSearchQuery).build()), pageNumber);
}

/**
* @return default pageSize
*/
default int getPageSize() {
return 20;
}

@Override
default List<BibEntry> performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
return new ArrayList<>(performSearchPaged(complexSearchQuery, 0).getContent());
}

@Override
default List<BibEntry> performSearch(String complexSearchQuery) throws FetcherException {
return new ArrayList<>(performSearchPaged(complexSearchQuery, 0).getContent());
}
}
Original file line number Diff line number Diff line change
@@ -1,16 +1,72 @@
package org.jabref.logic.importer;

import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;

import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.paging.Page;

public interface PagedSearchBasedParserFetcher extends SearchBasedParserFetcher, PagedSearchBasedFetcher {

@Override
default Page<BibEntry> performSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException {
// ADR-0014
URL urlForQuery;
try {
urlForQuery = getComplexQueryURL(complexSearchQuery, pageNumber);
} catch (URISyntaxException | MalformedURLException e) {
throw new FetcherException("Search URI crafted from complex search query is malformed", e);
}
return new Page<>(complexSearchQuery.toString(), pageNumber, getBibEntries(urlForQuery));
}

private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
try (InputStream stream = getUrlDownload(urlForQuery).asInputStream()) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);
fetchedEntries.forEach(this::doPostCleanup);
return fetchedEntries;
} catch (IOException e) {
throw new FetcherException("A network error occurred while fetching from " + urlForQuery, e);
} catch (ParseException e) {
throw new FetcherException("An internal parser error occurred while fetching from " + urlForQuery, e);
}
}

/**
* Constructs a URL based on the query, size and page number.
* @param query the search query
* @param size the size of the page
* @param pageNumber the number of the page
* */
URL getURLForQuery(String query, int size, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException;
*
* @param query the search query
* @param pageNumber the number of the page indexed from 0
*/
URL getURLForQuery(String query, int pageNumber) throws URISyntaxException, MalformedURLException;

/**
* Constructs a URL based on the query, size and page number.
*
* @param complexSearchQuery the search query
* @param pageNumber the number of the page indexed from 0
*/
default URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery, int pageNumber) throws URISyntaxException, MalformedURLException {
return getURLForQuery(complexSearchQuery.toString(), pageNumber);
}

@Override
default List<BibEntry> performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
return SearchBasedParserFetcher.super.performSearch(complexSearchQuery);
}

@Override
default URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException {
return getURLForQuery(query, 0);
}

@Override
default URL getURLForQuery(ComplexSearchQuery query) throws URISyntaxException, MalformedURLException, FetcherException {
return getComplexQueryURL(query, 0);
}
}
17 changes: 7 additions & 10 deletions src/main/java/org/jabref/logic/importer/QueryParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,23 @@
import org.apache.lucene.search.QueryVisitor;

/**
* This class converts a query string written in lucene syntax into a complex search query.
* This class converts a query string written in lucene syntax into a complex query.
*
* For simplicity this is limited to fielded data and the boolean AND operator.
* For simplicity this is currently limited to fielded data and the boolean AND operator.
*/
public class QueryParser {

/**
* Parses the given query string into a complex query using lucene.
* Note: For unique fields, the alphabetically first instance in the query string is used in the complex query.
* Note: For unique fields, the alphabetically and numerically first instance in the query string is used in the complex query.
*
* @param queryString The given query string
* @param query The given query string
* @return A complex query containing all fields of the query string
* @throws QueryNodeException Error during parsing
*/
public Optional<ComplexSearchQuery> parseQueryStringIntoComplexQuery(String queryString) {
public Optional<ComplexSearchQuery> parseQueryStringIntoComplexQuery(String query) {
try {
ComplexSearchQuery.ComplexSearchQueryBuilder builder = ComplexSearchQuery.builder();

StandardQueryParser parser = new StandardQueryParser();
Query luceneQuery = parser.parse(queryString, "default");
Query luceneQuery = parser.parse(query, "default");
Set<Term> terms = new HashSet<>();
// This implementation collects all terms from the leaves of the query tree independent of the internal boolean structure
// If further capabilities are required in the future the visitor and ComplexSearchQuery has to be adapted accordingly.
Expand All @@ -44,7 +41,7 @@ public Optional<ComplexSearchQuery> parseQueryStringIntoComplexQuery(String quer

List<Term> sortedTerms = new ArrayList<>(terms);
sortedTerms.sort(Comparator.comparing(Term::text).reversed());
return Optional.of(ComplexSearchQuery.fromTerms(terms));
return Optional.of(ComplexSearchQuery.fromTerms(sortedTerms));
} catch (QueryNodeException | IllegalStateException | IllegalArgumentException ex) {
return Optional.empty();
}
Expand Down
23 changes: 15 additions & 8 deletions src/main/java/org/jabref/logic/importer/SearchBasedFetcher.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.jabref.logic.importer;

import java.util.Collections;
import java.util.List;
import java.util.Optional;

import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.model.entry.BibEntry;
Expand All @@ -12,21 +14,26 @@
public interface SearchBasedFetcher extends WebFetcher {

/**
* Looks for hits which are matched by the given free-text query.
* This method is used to send complex queries using fielded search.
*
* @param query search string
* @param complexSearchQuery the complex search query defining all fielded search parameters
* @return a list of {@link BibEntry}, which are matched by the query (may be empty)
*/
List<BibEntry> performSearch(String query) throws FetcherException;
List<BibEntry> performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException;

/**
* This method is used to send complex queries using fielded search.
* Looks for hits which are matched by the given free-text query.
*
* @param complexSearchQuery the search query defining all fielded search parameters
* @param complexSearchQuery query string that can be parsed into a complex search query
* @return a list of {@link BibEntry}, which are matched by the query (may be empty)
*/
default List<BibEntry> performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
// Default implementation behaves as perform search on all fields concatenated as query
return performSearch(complexSearchQuery.toString());
default List<BibEntry> performSearch(String complexSearchQuery) throws FetcherException {
if (complexSearchQuery.isBlank()) {
return Collections.emptyList();
}
QueryParser queryParser = new QueryParser();
Optional<ComplexSearchQuery> generatedQuery = queryParser.parseQueryStringIntoComplexQuery(complexSearchQuery);
// Otherwise just use query as a default term
return this.performSearch(generatedQuery.orElse(ComplexSearchQuery.builder().defaultFieldPhrase(complexSearchQuery).build()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,11 @@
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Collections;
import java.util.List;

import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.strings.StringUtil;

/**
* Provides a convenient interface for search-based fetcher, which follow the usual three-step procedure:
Expand All @@ -23,34 +21,6 @@
*/
public interface SearchBasedParserFetcher extends SearchBasedFetcher {

/**
* Constructs a URL based on the query.
*
* @param query the search query
*/
URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException;

/**
* Returns the parser used to convert the response to a list of {@link BibEntry}.
*/
Parser getParser();

@Override
default List<BibEntry> performSearch(String query) throws FetcherException {
if (StringUtil.isBlank(query)) {
return Collections.emptyList();
}

// ADR-0014
URL urlForQuery;
try {
urlForQuery = getURLForQuery(query);
} catch (URISyntaxException | MalformedURLException | FetcherException e) {
throw new FetcherException(String.format("Search URI crafted from query %s is malformed", query), e);
}
return getBibEntries(urlForQuery);
}

/**
* This method is used to send queries with advanced URL parameters.
* This method is necessary as the performSearch method does not support certain URL parameters that are used for
Expand All @@ -59,11 +29,11 @@ default List<BibEntry> performSearch(String query) throws FetcherException {
* @param complexSearchQuery the search query defining all fielded search parameters
*/
@Override
default List<BibEntry> performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
default List<BibEntry> performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
// ADR-0014
URL urlForQuery;
try {
urlForQuery = getComplexQueryURL(complexSearchQuery);
urlForQuery = getURLForQuery(complexSearchQuery);
} catch (URISyntaxException | MalformedURLException | FetcherException e) {
throw new FetcherException("Search URI crafted from complex search query is malformed", e);
}
Expand All @@ -82,12 +52,23 @@ private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
}
}

default URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery) throws URISyntaxException, MalformedURLException, FetcherException {
// Default implementation behaves as getURLForQuery using the default field phrases as query
List<String> defaultPhrases = complexSearchQuery.getDefaultFieldPhrases();
return this.getURLForQuery(String.join(" ", defaultPhrases));
default URL getURLForQuery(ComplexSearchQuery query) throws URISyntaxException, MalformedURLException, FetcherException {
// Default implementation behaves as getURLForQuery treating complex query as plain string query
return this.getURLForQuery(query.toString());
}

/**
* Returns the parser used to convert the response to a list of {@link BibEntry}.
*/
Parser getParser();

/**
* Constructs a URL based on the query.
*
* @param query the search query
*/
URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException;

/**
* Performs a cleanup of the fetched entry.
* <p>
Expand Down
Loading

0 comments on commit 00e3409

Please sign in to comment.