Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/enable paginated fetchers #7082

Merged
merged 9 commits into from
Nov 15, 2020
Original file line number Diff line number Diff line change
@@ -1,17 +1,27 @@
package org.jabref.logic.importer;

import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.paging.Page;

public interface PagedSearchBasedFetcher extends SearchBasedFetcher {

/**
* @param query search query send to endpoint
* @param pageNumber requested site number
* @param pageNumber requested site number indexed from 0
* @return Page with search results
*/
Page<BibEntry> performSearchPaged(String query, int pageNumber) throws FetcherException;

/**
* @param query search query send to endpoint
* @param pageNumber requested site number indexed from 0
* @return Page with search results
*/
default Page<BibEntry> performComplexSearchPaged(ComplexSearchQuery query, int pageNumber) throws FetcherException {
return performSearchPaged(query.toString(), pageNumber);
}

/**
* @return default pageSize
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,75 @@
package org.jabref.logic.importer;

import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Collections;
import java.util.List;

import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.paging.Page;
import org.jabref.model.strings.StringUtil;

public interface PagedSearchBasedParserFetcher extends SearchBasedParserFetcher, PagedSearchBasedFetcher {

@Override
default Page<BibEntry> performSearchPaged(String query, int pageNumber) throws FetcherException {
if (StringUtil.isBlank(query)) {
return new Page<BibEntry>(query, pageNumber, Collections.emptyList());
}

// ADR-0014
URL urlForQuery;
try {
urlForQuery = getURLForQuery(query, pageNumber);
} catch (URISyntaxException | MalformedURLException e) {
throw new FetcherException(String.format("Search URI crafted from query %s is malformed", query), e);
}
return new Page<>(query, pageNumber, getBibEntries(urlForQuery));
}

private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
try (InputStream stream = getUrlDownload(urlForQuery).asInputStream()) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);
fetchedEntries.forEach(this::doPostCleanup);
return fetchedEntries;
} catch (IOException e) {
throw new FetcherException("A network error occurred while fetching from " + urlForQuery, e);
} catch (ParseException e) {
throw new FetcherException("An internal parser error occurred while fetching from " + urlForQuery, e);
}
}

@Override
default Page<BibEntry> performComplexSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException {
// ADR-0014
URL urlForQuery;
try {
urlForQuery = getComplexQueryURL(complexSearchQuery, pageNumber);
} catch (URISyntaxException | MalformedURLException e) {
throw new FetcherException("Search URI crafted from complex search query is malformed", e);
}
return new Page<>(complexSearchQuery.toString(), pageNumber, getBibEntries(urlForQuery));
}

/**
* Constructs a URL based on the query, size and page number.
*
* @param query the search query
* @param pageNumber the number of the page indexed from 0
*/
URL getURLForQuery(String query, int pageNumber) throws URISyntaxException, MalformedURLException;

/**
* Constructs a URL based on the query, size and page number.
* @param query the search query
* @param size the size of the page
* @param pageNumber the number of the page
* */
URL getURLForQuery(String query, int size, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException;
*
* @param complexSearchQuery the search query
* @param pageNumber the number of the page indexed from 0
*/
default URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery, int pageNumber) throws URISyntaxException, MalformedURLException {
return getURLForQuery(complexSearchQuery.toString(), pageNumber);
}
}
28 changes: 20 additions & 8 deletions src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import org.jabref.logic.importer.IdBasedFetcher;
import org.jabref.logic.importer.IdFetcher;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.SearchBasedFetcher;
import org.jabref.logic.importer.PagedSearchBasedFetcher;
import org.jabref.logic.util.io.XMLUtil;
import org.jabref.logic.util.strings.StringSimilarity;
import org.jabref.model.entry.BibEntry;
Expand All @@ -31,6 +31,7 @@
import org.jabref.model.entry.identifier.ArXivIdentifier;
import org.jabref.model.entry.identifier.DOI;
import org.jabref.model.entry.types.StandardEntryType;
import org.jabref.model.paging.Page;
import org.jabref.model.strings.StringUtil;
import org.jabref.model.util.OptionalUtil;

Expand All @@ -52,7 +53,7 @@
* <a href="https://github.com/nathangrigg/arxiv2bib">arxiv2bib</a> which is <a href="https://arxiv2bibtex.org/">live</a>
* <a herf="https://gitlab.c3sl.ufpr.br/portalmec/dspace-portalmec/blob/aa209d15082a9870f9daac42c78a35490ce77b52/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivService.java">dspace-portalmec</a>
*/
public class ArXiv implements FulltextFetcher, SearchBasedFetcher, IdBasedFetcher, IdFetcher<ArXivIdentifier> {
public class ArXiv implements FulltextFetcher, PagedSearchBasedFetcher, IdBasedFetcher, IdFetcher<ArXivIdentifier> {

private static final Logger LOGGER = LoggerFactory.getLogger(ArXiv.class);

Expand Down Expand Up @@ -153,8 +154,8 @@ private List<ArXivEntry> searchForEntries(BibEntry entry) throws FetcherExceptio
return Collections.emptyList();
}

private List<ArXivEntry> searchForEntries(String searchQuery) throws FetcherException {
return queryApi(searchQuery, Collections.emptyList(), 0, 10);
private List<ArXivEntry> searchForEntries(String searchQuery, int pageNumber) throws FetcherException {
return queryApi(searchQuery, Collections.emptyList(), getPageSize() * pageNumber, getPageSize());
}

private List<ArXivEntry> queryApi(String searchQuery, List<ArXivIdentifier> ids, int start, int maxResults)
Expand Down Expand Up @@ -250,9 +251,7 @@ public Optional<HelpFile> getHelpPage() {

@Override
public List<BibEntry> performSearch(String query) throws FetcherException {
return searchForEntries(query).stream().map(
(arXivEntry) -> arXivEntry.toBibEntry(importFormatPreferences.getKeywordSeparator()))
.collect(Collectors.toList());
return new ArrayList<>(performSearchPaged(query, 0).getContent());
}

/**
Expand All @@ -263,6 +262,19 @@ public List<BibEntry> performSearch(String query) throws FetcherException {
*/
@Override
public List<BibEntry> performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
return new ArrayList<>(performComplexSearchPaged(complexSearchQuery, 0).getContent());
}

@Override
public Page<BibEntry> performSearchPaged(String query, int pageNumber) throws FetcherException {
List<BibEntry> searchResult = searchForEntries(query, pageNumber).stream()
.map((arXivEntry) -> arXivEntry.toBibEntry(importFormatPreferences.getKeywordSeparator()))
.collect(Collectors.toList());
return new Page<>(query, pageNumber, searchResult);
}

@Override
public Page<BibEntry> performComplexSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException {
List<String> searchTerms = new ArrayList<>();
complexSearchQuery.getAuthors().forEach(author -> searchTerms.add("au:" + author));
complexSearchQuery.getTitlePhrases().forEach(title -> searchTerms.add("ti:" + title));
Expand All @@ -272,7 +284,7 @@ public List<BibEntry> performComplexSearch(ComplexSearchQuery complexSearchQuery
complexSearchQuery.getToYear().ifPresent(year -> searchTerms.add(year.toString()));
searchTerms.addAll(complexSearchQuery.getDefaultFieldPhrases());
String complexQueryString = String.join(" AND ", searchTerms);
return performSearch(complexQueryString);
return performSearchPaged(complexQueryString, pageNumber);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,12 @@ public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLE
}

@Override
public URL getURLForQuery(String query, int size, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getURLForQuery(String query, int pageNumber) throws URISyntaxException, MalformedURLException {
URIBuilder builder = new URIBuilder(API_SEARCH_URL);
builder.addParameter("q", query);
builder.addParameter("fl", "bibcode");
builder.addParameter("rows", String.valueOf(size));
builder.addParameter("start", String.valueOf(size * pageNumber));
builder.addParameter("rows", String.valueOf(getPageSize()));
builder.addParameter("start", String.valueOf(getPageSize() * pageNumber));
return builder.build().toURL();
}

Expand All @@ -105,7 +105,7 @@ public URL getURLForQuery(String query, int size, int pageNumber) throws URISynt
* @return URL which points to a search request for given entry
*/
@Override
public URL getURLForEntry(BibEntry entry) throws URISyntaxException, MalformedURLException, FetcherException {
public URL getURLForEntry(BibEntry entry) throws URISyntaxException, MalformedURLException {
StringBuilder stringBuilder = new StringBuilder();

Optional<String> title = entry.getFieldOrAlias(StandardField.TITLE).map(t -> "title:\"" + t + "\"");
Expand Down Expand Up @@ -194,19 +194,7 @@ public List<BibEntry> performSearch(BibEntry entry) throws FetcherException {

@Override
public List<BibEntry> performSearch(String query) throws FetcherException {

if (StringUtil.isBlank(query)) {
return Collections.emptyList();
}

try {
List<String> bibcodes = fetchBibcodes(getURLForQuery(query));
return performSearchByIds(bibcodes);
} catch (URISyntaxException e) {
throw new FetcherException("Search URI is malformed", e);
} catch (IOException e) {
throw new FetcherException("A network error occurred", e);
}
return new ArrayList<>(performSearchPaged(query, 0).getContent());
}

/**
Expand Down Expand Up @@ -300,12 +288,11 @@ private List<BibEntry> performSearchByIds(Collection<String> identifiers) throws

@Override
public Page<BibEntry> performSearchPaged(String query, int pageNumber) throws FetcherException {

if (StringUtil.isBlank(query)) {
return new Page<>(query, pageNumber);
}
try {
List<String> bibcodes = fetchBibcodes(getURLForQuery(query, getPageSize(), pageNumber));
List<String> bibcodes = fetchBibcodes(getURLForQuery(query, pageNumber));
Collection<BibEntry> results = performSearchByIds(bibcodes);
return new Page<>(query, pageNumber, results);
} catch (URISyntaxException e) {
Expand Down
Loading