-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/enable paginated fetchers #7082
Changes from 4 commits
e5fca67
f2ca2cf
1fdbc6d
215c177
4d86265
9a87a5e
c76bf45
1896325
4e4e419
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,75 @@ | ||
package org.jabref.logic.importer; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.net.MalformedURLException; | ||
import java.net.URISyntaxException; | ||
import java.net.URL; | ||
import java.util.Collections; | ||
import java.util.List; | ||
|
||
import org.jabref.logic.importer.fetcher.ComplexSearchQuery; | ||
import org.jabref.model.entry.BibEntry; | ||
import org.jabref.model.paging.Page; | ||
import org.jabref.model.strings.StringUtil; | ||
|
||
public interface PagedSearchBasedParserFetcher extends SearchBasedParserFetcher, PagedSearchBasedFetcher { | ||
|
||
@Override | ||
default Page<BibEntry> performSearchPaged(String query, int pageNumber) throws FetcherException { | ||
if (StringUtil.isBlank(query)) { | ||
return new Page<BibEntry>(query, pageNumber, Collections.emptyList()); | ||
} | ||
|
||
// ADR-0014 | ||
URL urlForQuery; | ||
try { | ||
urlForQuery = getURLForQuery(query, pageNumber); | ||
} catch (URISyntaxException | MalformedURLException e) { | ||
throw new FetcherException(String.format("Search URI crafted from query %s is malformed", query), e); | ||
} | ||
return new Page<>(query, pageNumber, getBibEntries(urlForQuery)); | ||
} | ||
|
||
private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException { | ||
try (InputStream stream = getUrlDownload(urlForQuery).asInputStream()) { | ||
List<BibEntry> fetchedEntries = getParser().parseEntries(stream); | ||
fetchedEntries.forEach(this::doPostCleanup); | ||
return fetchedEntries; | ||
} catch (IOException e) { | ||
throw new FetcherException("A network error occurred while fetching from " + urlForQuery, e); | ||
} catch (ParseException e) { | ||
throw new FetcherException("An internal parser error occurred while fetching from " + urlForQuery, e); | ||
} | ||
} | ||
|
||
@Override | ||
default Page<BibEntry> performComplexSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException { | ||
// ADR-0014 | ||
URL urlForQuery; | ||
try { | ||
urlForQuery = getComplexQueryURL(complexSearchQuery, pageNumber); | ||
} catch (URISyntaxException | MalformedURLException e) { | ||
throw new FetcherException("Search URI crafted from complex search query is malformed", e); | ||
} | ||
return new Page<>(complexSearchQuery.toString(), pageNumber, getBibEntries(urlForQuery)); | ||
} | ||
|
||
/** | ||
* Constructs a URL based on the query, size and page number. | ||
* | ||
* @param query the search query | ||
* @param pageNumber the number of the page indexed from 0 | ||
*/ | ||
URL getURLForQuery(String query, int pageNumber) throws URISyntaxException, MalformedURLException; | ||
|
||
/** | ||
* Constructs a URL based on the query, size and page number. | ||
* @param query the search query | ||
* @param size the size of the page | ||
* @param pageNumber the number of the page | ||
* */ | ||
URL getURLForQuery(String query, int size, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException; | ||
* | ||
* @param complexSearchQuery the search query | ||
* @param pageNumber the number of the page indexed from 0 | ||
*/ | ||
default URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery, int pageNumber) throws URISyntaxException, MalformedURLException { | ||
return getURLForQuery(complexSearchQuery.toString(), pageNumber); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,8 +17,8 @@ | |
import org.jabref.logic.help.HelpFile; | ||
import org.jabref.logic.importer.FulltextFetcher; | ||
import org.jabref.logic.importer.ImportFormatPreferences; | ||
import org.jabref.logic.importer.PagedSearchBasedParserFetcher; | ||
import org.jabref.logic.importer.Parser; | ||
import org.jabref.logic.importer.SearchBasedParserFetcher; | ||
import org.jabref.logic.net.URLDownload; | ||
import org.jabref.logic.util.BuildInfo; | ||
import org.jabref.logic.util.OS; | ||
|
@@ -41,7 +41,7 @@ | |
* | ||
* @implNote <a href="https://developer.ieee.org/docs">API documentation</a> | ||
*/ | ||
public class IEEE implements FulltextFetcher, SearchBasedParserFetcher { | ||
public class IEEE implements FulltextFetcher, PagedSearchBasedParserFetcher { | ||
|
||
private static final Logger LOGGER = LoggerFactory.getLogger(IEEE.class); | ||
private static final String STAMP_BASE_STRING_DOCUMENT = "/stamp/stamp.jsp?tp=&arnumber="; | ||
|
@@ -193,13 +193,7 @@ public TrustLevel getTrustLevel() { | |
|
||
@Override | ||
public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException { | ||
URIBuilder uriBuilder = new URIBuilder("https://ieeexploreapi.ieee.org/api/v1/search/articles"); | ||
uriBuilder.addParameter("apikey", API_KEY); | ||
uriBuilder.addParameter("querytext", query); | ||
|
||
URLDownload.bypassSSLVerification(); | ||
|
||
return uriBuilder.build().toURL(); | ||
return getURLForQuery(query, 0); | ||
} | ||
|
||
@Override | ||
|
@@ -234,8 +228,31 @@ public Optional<HelpFile> getHelpPage() { | |
|
||
@Override | ||
public URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery) throws URISyntaxException, MalformedURLException { | ||
return getComplexQueryURL(complexSearchQuery, 0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be the default implementation, i.e. push to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Thanks for your feedback! :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I implemented all other suggestions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good! Concerning the first point, doesn't it work to parse the queries as follows: Then you don't need any fall-back to a purely string-based search. In my opinion, this question should be resolved before changing the fetcher in other ways. Otherwise you have a lot of overhead/code duplication now, that will be removed later. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I now removed the normal performSearch all together :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks a lot! If you now also remove the "complex" in the names, I'm super happy ;-) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I now removed the complex from perform search :) |
||
} | ||
|
||
@Override | ||
public URL getURLForQuery(String query, int pageNumber) throws URISyntaxException, MalformedURLException { | ||
URIBuilder uriBuilder = new URIBuilder("https://ieeexploreapi.ieee.org/api/v1/search/articles"); | ||
uriBuilder.addParameter("apikey", API_KEY); | ||
uriBuilder.addParameter("querytext", query); | ||
uriBuilder.addParameter("max_records", String.valueOf(getPageSize())); | ||
// Starts to index at 1 for the first entry | ||
uriBuilder.addParameter("start_record", String.valueOf(getPageSize() * pageNumber) + 1); | ||
|
||
URLDownload.bypassSSLVerification(); | ||
|
||
return uriBuilder.build().toURL(); | ||
} | ||
|
||
@Override | ||
public URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery, int pageNumber) throws URISyntaxException, MalformedURLException { | ||
URIBuilder uriBuilder = new URIBuilder("https://ieeexploreapi.ieee.org/api/v1/search/articles"); | ||
uriBuilder.addParameter("apikey", API_KEY); | ||
uriBuilder.addParameter("max_records", String.valueOf(getPageSize())); | ||
// Starts to index at 1 for the first entry | ||
uriBuilder.addParameter("start_record", String.valueOf(getPageSize() * pageNumber) + 1); | ||
|
||
if (!complexSearchQuery.getDefaultFieldPhrases().isEmpty()) { | ||
uriBuilder.addParameter("querytext", String.join(" AND ", complexSearchQuery.getDefaultFieldPhrases())); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This struck me recently, isn't there a way to check if the exception is a more specific exception so that one could check for the status code number?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do not quite understand what the benefits of that would be.
Could you provide me with an example to grasp your idea, please? :)