JabRef · koppor · Nov 15, 2020 · Nov 6, 2020 · Nov 7, 2020 · Nov 7, 2020
diff --git a/src/main/java/org/jabref/logic/importer/PagedSearchBasedFetcher.java b/src/main/java/org/jabref/logic/importer/PagedSearchBasedFetcher.java
@@ -1,17 +1,27 @@
 package org.jabref.logic.importer;
 
+import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
 import org.jabref.model.entry.BibEntry;
 import org.jabref.model.paging.Page;
 
 public interface PagedSearchBasedFetcher extends SearchBasedFetcher {
 
     /**
      * @param query      search query send to endpoint
-     * @param pageNumber requested site number
+     * @param pageNumber requested site number indexed from 0
      * @return Page with search results
      */
     Page<BibEntry> performSearchPaged(String query, int pageNumber) throws FetcherException;
 
+    /**
+     * @param query      search query send to endpoint
+     * @param pageNumber requested site number indexed from 0
+     * @return Page with search results
+     */
+    default Page<BibEntry> performComplexSearchPaged(ComplexSearchQuery query, int pageNumber) throws FetcherException {
+        return performSearchPaged(query.toString(), pageNumber);
+    }
+
     /**
      * @return default pageSize
      */

diff --git a/src/main/java/org/jabref/logic/importer/PagedSearchBasedParserFetcher.java b/src/main/java/org/jabref/logic/importer/PagedSearchBasedParserFetcher.java
@@ -1,16 +1,75 @@
 package org.jabref.logic.importer;
 
+import java.io.IOException;
+import java.io.InputStream;
 import java.net.MalformedURLException;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.util.Collections;
+import java.util.List;
+
+import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
+import org.jabref.model.entry.BibEntry;
+import org.jabref.model.paging.Page;
+import org.jabref.model.strings.StringUtil;
 
 public interface PagedSearchBasedParserFetcher extends SearchBasedParserFetcher, PagedSearchBasedFetcher {
 
+    @Override
+    default Page<BibEntry> performSearchPaged(String query, int pageNumber) throws FetcherException {
+        if (StringUtil.isBlank(query)) {
+            return new Page<BibEntry>(query, pageNumber, Collections.emptyList());
+        }
+
+        // ADR-0014
+        URL urlForQuery;
+        try {
+            urlForQuery = getURLForQuery(query, pageNumber);
+        } catch (URISyntaxException | MalformedURLException e) {
+            throw new FetcherException(String.format("Search URI crafted from query %s is malformed", query), e);
+        }
+        return new Page<>(query, pageNumber, getBibEntries(urlForQuery));
+    }
+
+    private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
+        try (InputStream stream = getUrlDownload(urlForQuery).asInputStream()) {
+            List<BibEntry> fetchedEntries = getParser().parseEntries(stream);
+            fetchedEntries.forEach(this::doPostCleanup);
+            return fetchedEntries;
+        } catch (IOException e) {
+            throw new FetcherException("A network error occurred while fetching from " + urlForQuery, e);
+        } catch (ParseException e) {
+            throw new FetcherException("An internal parser error occurred while fetching from " + urlForQuery, e);
+        }
+    }
+
+    @Override
+    default Page<BibEntry> performComplexSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException {
+        // ADR-0014
+        URL urlForQuery;
+        try {
+            urlForQuery = getComplexQueryURL(complexSearchQuery, pageNumber);
+        } catch (URISyntaxException | MalformedURLException e) {
+            throw new FetcherException("Search URI crafted from complex search query is malformed", e);
+        }
+        return new Page<>(complexSearchQuery.toString(), pageNumber, getBibEntries(urlForQuery));
+    }
+
+    /**
+     * Constructs a URL based on the query, size and page number.
+     *
+     * @param query      the search query
+     * @param pageNumber the number of the page indexed from 0
+     */
+    URL getURLForQuery(String query, int pageNumber) throws URISyntaxException, MalformedURLException;
+
     /**
      * Constructs a URL based on the query, size and page number.
-     * @param query the search query
-     * @param size the size of the page
-     * @param pageNumber the number of the page
-     * */
-    URL getURLForQuery(String query, int size, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException;
+     *
+     * @param complexSearchQuery      the search query
+     * @param pageNumber the number of the page indexed from 0
+     */
+    default URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery, int pageNumber) throws URISyntaxException, MalformedURLException {
+        return getURLForQuery(complexSearchQuery.toString(), pageNumber);
+    }
 }
diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java b/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java
@@ -22,7 +22,7 @@
 import org.jabref.logic.importer.IdBasedFetcher;
 import org.jabref.logic.importer.IdFetcher;
 import org.jabref.logic.importer.ImportFormatPreferences;
-import org.jabref.logic.importer.SearchBasedFetcher;
+import org.jabref.logic.importer.PagedSearchBasedFetcher;
 import org.jabref.logic.util.io.XMLUtil;
 import org.jabref.logic.util.strings.StringSimilarity;
 import org.jabref.model.entry.BibEntry;
@@ -31,6 +31,7 @@
 import org.jabref.model.entry.identifier.ArXivIdentifier;
 import org.jabref.model.entry.identifier.DOI;
 import org.jabref.model.entry.types.StandardEntryType;
+import org.jabref.model.paging.Page;
 import org.jabref.model.strings.StringUtil;
 import org.jabref.model.util.OptionalUtil;
 
@@ -52,7 +53,7 @@
  * <a href="https://github.com/nathangrigg/arxiv2bib">arxiv2bib</a> which is <a href="https://arxiv2bibtex.org/">live</a>
  * <a herf="https://gitlab.c3sl.ufpr.br/portalmec/dspace-portalmec/blob/aa209d15082a9870f9daac42c78a35490ce77b52/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivService.java">dspace-portalmec</a>
  */
-public class ArXiv implements FulltextFetcher, SearchBasedFetcher, IdBasedFetcher, IdFetcher<ArXivIdentifier> {
+public class ArXiv implements FulltextFetcher, PagedSearchBasedFetcher, IdBasedFetcher, IdFetcher<ArXivIdentifier> {
 
     private static final Logger LOGGER = LoggerFactory.getLogger(ArXiv.class);
 
@@ -157,6 +158,10 @@ private List<ArXivEntry> searchForEntries(String searchQuery) throws FetcherExce
         return queryApi(searchQuery, Collections.emptyList(), 0, 10);
     }
 
+    private List<ArXivEntry> searchForEntries(String searchQuery, int pageNumber) throws FetcherException {
+        return queryApi(searchQuery, Collections.emptyList(), getPageSize() * pageNumber, getPageSize());
+    }
+
     private List<ArXivEntry> queryApi(String searchQuery, List<ArXivIdentifier> ids, int start, int maxResults)
             throws FetcherException {
         Document result = callApi(searchQuery, ids, start, maxResults);
@@ -263,6 +268,19 @@ public List<BibEntry> performSearch(String query) throws FetcherException {
      */
     @Override
     public List<BibEntry> performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
+        return new ArrayList<>(performComplexSearchPaged(complexSearchQuery, 0).getContent());
+    }
+
+    @Override
+    public Page<BibEntry> performSearchPaged(String query, int pageNumber) throws FetcherException {
+        List<BibEntry> searchResult = searchForEntries(query, pageNumber).stream()
+                                                                         .map((arXivEntry) -> arXivEntry.toBibEntry(importFormatPreferences.getKeywordSeparator()))
+                                                                         .collect(Collectors.toList());
+        return new Page<>(query, pageNumber, searchResult);
+    }
+
+    @Override
+    public Page<BibEntry> performComplexSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException {
         List<String> searchTerms = new ArrayList<>();
         complexSearchQuery.getAuthors().forEach(author -> searchTerms.add("au:" + author));
         complexSearchQuery.getTitlePhrases().forEach(title -> searchTerms.add("ti:" + title));
@@ -272,7 +290,7 @@ public List<BibEntry> performComplexSearch(ComplexSearchQuery complexSearchQuery
         complexSearchQuery.getToYear().ifPresent(year -> searchTerms.add(year.toString()));
         searchTerms.addAll(complexSearchQuery.getDefaultFieldPhrases());
         String complexQueryString = String.join(" AND ", searchTerms);
-        return performSearch(complexQueryString);
+        return performSearchPaged(complexQueryString, pageNumber);
     }
 
     @Override

diff --git a/src/main/java/org/jabref/logic/importer/fetcher/AstrophysicsDataSystem.java b/src/main/java/org/jabref/logic/importer/fetcher/AstrophysicsDataSystem.java
@@ -91,12 +91,12 @@ public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLE
     }
 
     @Override
-    public URL getURLForQuery(String query, int size, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException {
+    public URL getURLForQuery(String query, int pageNumber) throws URISyntaxException, MalformedURLException {
         URIBuilder builder = new URIBuilder(API_SEARCH_URL);
         builder.addParameter("q", query);
         builder.addParameter("fl", "bibcode");
-        builder.addParameter("rows", String.valueOf(size));
-        builder.addParameter("start", String.valueOf(size * pageNumber));
+        builder.addParameter("rows", String.valueOf(getPageSize()));
+        builder.addParameter("start", String.valueOf(getPageSize() * pageNumber));
         return builder.build().toURL();
     }
 
@@ -105,7 +105,7 @@ public URL getURLForQuery(String query, int size, int pageNumber) throws URISynt
      * @return URL which points to a search request for given entry
      */
     @Override
-    public URL getURLForEntry(BibEntry entry) throws URISyntaxException, MalformedURLException, FetcherException {
+    public URL getURLForEntry(BibEntry entry) throws URISyntaxException, MalformedURLException {
         StringBuilder stringBuilder = new StringBuilder();
 
         Optional<String> title = entry.getFieldOrAlias(StandardField.TITLE).map(t -> "title:\"" + t + "\"");
@@ -300,12 +300,11 @@ private List<BibEntry> performSearchByIds(Collection<String> identifiers) throws
 
     @Override
     public Page<BibEntry> performSearchPaged(String query, int pageNumber) throws FetcherException {
-
         if (StringUtil.isBlank(query)) {
             return new Page<>(query, pageNumber);
         }
         try {
-            List<String> bibcodes = fetchBibcodes(getURLForQuery(query, getPageSize(), pageNumber));
+            List<String> bibcodes = fetchBibcodes(getURLForQuery(query, pageNumber));
             Collection<BibEntry> results = performSearchByIds(bibcodes);
             return new Page<>(query, pageNumber, results);
         } catch (URISyntaxException e) {

diff --git a/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java b/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java
@@ -17,13 +17,14 @@
 import org.jabref.logic.importer.FetcherException;
 import org.jabref.logic.importer.FulltextFetcher;
 import org.jabref.logic.importer.ImportFormatPreferences;
+import org.jabref.logic.importer.PagedSearchBasedFetcher;
 import org.jabref.logic.importer.ParserResult;
-import org.jabref.logic.importer.SearchBasedFetcher;
 import org.jabref.logic.importer.fileformat.BibtexParser;
 import org.jabref.logic.l10n.Localization;
 import org.jabref.logic.net.URLDownload;
 import org.jabref.model.entry.BibEntry;
 import org.jabref.model.entry.field.StandardField;
+import org.jabref.model.paging.Page;
 import org.jabref.model.util.DummyFileUpdateMonitor;
 
 import org.apache.http.client.utils.URIBuilder;
@@ -38,7 +39,7 @@
  * <p>
  * Search String infos: https://scholar.google.com/intl/en/scholar/help.html#searching
  */
-public class GoogleScholar implements FulltextFetcher, SearchBasedFetcher {
+public class GoogleScholar implements FulltextFetcher, PagedSearchBasedFetcher {
     private static final Logger LOGGER = LoggerFactory.getLogger(GoogleScholar.class);
 
     private static final Pattern LINK_TO_BIB_PATTERN = Pattern.compile("(https:\\/\\/scholar.googleusercontent.com\\/scholar.bib[^\"]*)");
@@ -128,37 +129,7 @@ public Optional<HelpFile> getHelpPage() {
 
     @Override
     public List<BibEntry> performSearch(String query) throws FetcherException {
-        LOGGER.debug("Using URL {}", query);
-        obtainAndModifyCookie();
-        List<BibEntry> foundEntries = new ArrayList<>(20);
-
-        URIBuilder uriBuilder = null;
-        try {
-            uriBuilder = new URIBuilder(BASIC_SEARCH_URL);
-        } catch (URISyntaxException e) {
-            throw new FetcherException("Error while fetching from " + getName() + " at URL " + BASIC_SEARCH_URL, e);
-        }
-
-        uriBuilder.addParameter("hl", "en");
-        uriBuilder.addParameter("btnG", "Search");
-        uriBuilder.addParameter("q", query);
-        String queryURL = uriBuilder.toString();
-
-        try {
-            addHitsFromQuery(foundEntries, queryURL);
-        } catch (IOException e) {
-            // if there are too much requests from the same IP address google is answering with a 503 and redirecting to a captcha challenge
-            // The caught IOException looks for example like this:
-            // java.io.IOException: Server returned HTTP response code: 503 for URL: https://ipv4.google.com/sorry/index?continue=https://scholar.google.com/scholar%3Fhl%3Den%26btnG%3DSearch%26q%3Dbpmn&hl=en&q=CGMSBI0NBDkYuqy9wAUiGQDxp4NLQCWbIEY1HjpH5zFJhv4ANPGdWj0
-            if (e.getMessage().contains("Server returned HTTP response code: 503 for URL")) {
-                throw new FetcherException("Fetching from Google Scholar at URL " + queryURL + " failed.",
-                        Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), e);
-            } else {
-                throw new FetcherException("Error while fetching from " + getName() + " at URL " + queryURL, e);
-            }
-        }
-
-        return foundEntries;
+        return new ArrayList<>(performSearchPaged(query, 0).getContent());
     }
 
     @Override
@@ -259,4 +230,46 @@ private void obtainAndModifyCookie() throws FetcherException {
             throw new FetcherException("Cookie configuration for Google Scholar failed.", e);
         }
     }
+
+    @Override
+    public Page<BibEntry> performSearchPaged(String query, int pageNumber) throws FetcherException {
+        LOGGER.debug("Using URL {}", query);
+        obtainAndModifyCookie();
+        List<BibEntry> foundEntries = new ArrayList<>(20);
+
+        URIBuilder uriBuilder = null;
+        try {
+            uriBuilder = new URIBuilder(BASIC_SEARCH_URL);
+        } catch (URISyntaxException e) {
+            throw new FetcherException("Error while fetching from " + getName() + " at URL " + BASIC_SEARCH_URL, e);
+        }
+
+        uriBuilder.addParameter("hl", "en");
+        uriBuilder.addParameter("start", String.valueOf(pageNumber * getPageSize()));
+        uriBuilder.addParameter("num", String.valueOf(getPageSize()));
+        uriBuilder.addParameter("btnG", "Search");
+        uriBuilder.addParameter("q", query);
+        String queryURL = uriBuilder.toString();
+
+        try {
+            addHitsFromQuery(foundEntries, queryURL);
+        } catch (IOException e) {
+            // if there are too much requests from the same IP address google is answering with a 503 and redirecting to a captcha challenge
+            // The caught IOException looks for example like this:
+            // java.io.IOException: Server returned HTTP response code: 503 for URL: https://ipv4.google.com/sorry/index?continue=https://scholar.google.com/scholar%3Fhl%3Den%26btnG%3DSearch%26q%3Dbpmn&hl=en&q=CGMSBI0NBDkYuqy9wAUiGQDxp4NLQCWbIEY1HjpH5zFJhv4ANPGdWj0
+            if (e.getMessage().contains("Server returned HTTP response code: 503 for URL")) {
+                throw new FetcherException("Fetching from Google Scholar at URL " + queryURL + " failed.",
+                        Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), e);
+            } else {
+                throw new FetcherException("Error while fetching from " + getName() + " at URL " + queryURL, e);
+            }
+        }
+
+        return new Page<>(query, pageNumber, foundEntries);
+    }
+
+    @Override
+    public Page<BibEntry> performComplexSearchPaged(ComplexSearchQuery query, int pageNumber) throws FetcherException {
+        return null;
+    }
 }
diff --git a/src/main/java/org/jabref/logic/importer/fetcher/IEEE.java b/src/main/java/org/jabref/logic/importer/fetcher/IEEE.java
@@ -17,8 +17,8 @@
 import org.jabref.logic.help.HelpFile;
 import org.jabref.logic.importer.FulltextFetcher;
 import org.jabref.logic.importer.ImportFormatPreferences;
+import org.jabref.logic.importer.PagedSearchBasedParserFetcher;
 import org.jabref.logic.importer.Parser;
-import org.jabref.logic.importer.SearchBasedParserFetcher;
 import org.jabref.logic.net.URLDownload;
 import org.jabref.logic.util.BuildInfo;
 import org.jabref.logic.util.OS;
@@ -41,7 +41,7 @@
  *
  * @implNote <a href="https://developer.ieee.org/docs">API documentation</a>
  */
-public class IEEE implements FulltextFetcher, SearchBasedParserFetcher {
+public class IEEE implements FulltextFetcher, PagedSearchBasedParserFetcher {
 
     private static final Logger LOGGER = LoggerFactory.getLogger(IEEE.class);
     private static final String STAMP_BASE_STRING_DOCUMENT = "/stamp/stamp.jsp?tp=&arnumber=";
@@ -193,13 +193,7 @@ public TrustLevel getTrustLevel() {
 
     @Override
     public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException {
-        URIBuilder uriBuilder = new URIBuilder("https://ieeexploreapi.ieee.org/api/v1/search/articles");
-        uriBuilder.addParameter("apikey", API_KEY);
-        uriBuilder.addParameter("querytext", query);
-
-        URLDownload.bypassSSLVerification();
-
-        return uriBuilder.build().toURL();
+        return getURLForQuery(query, 0);
     }
 
     @Override
@@ -234,8 +228,31 @@ public Optional<HelpFile> getHelpPage() {
 
     @Override
     public URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery) throws URISyntaxException, MalformedURLException {
+        return getComplexQueryURL(complexSearchQuery, 0);
+    }
+
+    @Override
+    public URL getURLForQuery(String query, int pageNumber) throws URISyntaxException, MalformedURLException {
         URIBuilder uriBuilder = new URIBuilder("https://ieeexploreapi.ieee.org/api/v1/search/articles");
         uriBuilder.addParameter("apikey", API_KEY);
+        uriBuilder.addParameter("querytext", query);
+        uriBuilder.addParameter("max_records", String.valueOf(getPageSize()));
+        // Starts to index at 1 for the first entry
+        uriBuilder.addParameter("start_record", String.valueOf(getPageSize() * pageNumber) + 1);
+
+        URLDownload.bypassSSLVerification();
+
+        return uriBuilder.build().toURL();
+    }
+
+    @Override
+    public URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery, int pageNumber) throws URISyntaxException, MalformedURLException {
+        URIBuilder uriBuilder = new URIBuilder("https://ieeexploreapi.ieee.org/api/v1/search/articles");
+        uriBuilder.addParameter("apikey", API_KEY);
+        uriBuilder.addParameter("max_records", String.valueOf(getPageSize()));
+        // Starts to index at 1 for the first entry
+        uriBuilder.addParameter("start_record", String.valueOf(getPageSize() * pageNumber) + 1);
+
         if (!complexSearchQuery.getDefaultFieldPhrases().isEmpty()) {
             uriBuilder.addParameter("querytext", String.join(" AND ", complexSearchQuery.getDefaultFieldPhrases()));
         }