Skip to content

Commit

Permalink
Adapt the way the transformer state is handled
Browse files Browse the repository at this point in the history
  • Loading branch information
DominikVoigt committed Jan 15, 2021
1 parent d09f948 commit 787f1bc
Show file tree
Hide file tree
Showing 13 changed files with 79 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public interface PagedSearchBasedFetcher extends SearchBasedFetcher {
* @param pageNumber requested site number indexed from 0
* @return Page with search results
*/
Page<BibEntry> performSearchPagedForTransformedQuery(String transformedQuery, int pageNumber, AbstractQueryTransformer transformer) throws FetcherException;
Page<BibEntry> performSearchPagedForTransformedQuery(String transformedQuery, int pageNumber) throws FetcherException;

/**
* @param searchQuery query string that can be parsed into a complex search query
Expand All @@ -28,10 +28,11 @@ default Page<BibEntry> performSearchPaged(String searchQuery, int pageNumber) th
if (searchQuery.isBlank()) {
return new Page<>(searchQuery, pageNumber, Collections.emptyList());
}
resetTransformer();
AbstractQueryTransformer transformer = getQueryTransformer();
Optional<String> transformedQuery = transformer.parseQueryStringIntoComplexQuery(searchQuery);
// Otherwise just use query as a default term
return this.performSearchPagedForTransformedQuery(transformedQuery.orElse(""), pageNumber, transformer);
return this.performSearchPagedForTransformedQuery(transformedQuery.orElse(""), pageNumber);
}

/**
Expand All @@ -42,7 +43,7 @@ default int getPageSize() {
}

@Override
default List<BibEntry> performSearchForTransformedQuery(String transformedQuery, AbstractQueryTransformer transformer) throws FetcherException {
return new ArrayList<>(performSearchPagedForTransformedQuery(transformedQuery, 0, transformer).getContent());
default List<BibEntry> performSearchForTransformedQuery(String transformedQuery) throws FetcherException {
return new ArrayList<>(performSearchPagedForTransformedQuery(transformedQuery, 0).getContent());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,17 @@
import java.util.ArrayList;
import java.util.List;

import org.jabref.logic.importer.fetcher.transformators.AbstractQueryTransformer;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.paging.Page;

public interface PagedSearchBasedParserFetcher extends SearchBasedParserFetcher, PagedSearchBasedFetcher {

@Override
default Page<BibEntry> performSearchPagedForTransformedQuery(String transformedQuery, int pageNumber, AbstractQueryTransformer transformer) throws FetcherException {
default Page<BibEntry> performSearchPagedForTransformedQuery(String transformedQuery, int pageNumber) throws FetcherException {
// ADR-0014
URL urlForQuery;
try {
urlForQuery = getURLForQuery(transformedQuery, pageNumber, transformer);
urlForQuery = getURLForQuery(transformedQuery, pageNumber);
} catch (URISyntaxException | MalformedURLException e) {
throw new FetcherException("Search URI crafted from complex search query is malformed", e);
}
Expand All @@ -44,15 +43,15 @@ private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
* @param transformedQuery the search query
* @param pageNumber the number of the page indexed from 0
*/
URL getURLForQuery(String transformedQuery, int pageNumber, AbstractQueryTransformer transformer) throws URISyntaxException, MalformedURLException;
URL getURLForQuery(String transformedQuery, int pageNumber) throws URISyntaxException, MalformedURLException;

@Override
default URL getURLForQuery(String transformedQuery, AbstractQueryTransformer transformer) throws URISyntaxException, MalformedURLException, FetcherException {
return getURLForQuery(transformedQuery, 0, transformer);
default URL getURLForQuery(String transformedQuery) throws URISyntaxException, MalformedURLException, FetcherException {
return getURLForQuery(transformedQuery, 0);
}

@Override
default List<BibEntry> performSearchForTransformedQuery(String transformedQuery, AbstractQueryTransformer transformer) throws FetcherException {
return new ArrayList<>(performSearchPagedForTransformedQuery(transformedQuery, 0, transformer).getContent());
default List<BibEntry> performSearchForTransformedQuery(String transformedQuery) throws FetcherException {
return new ArrayList<>(performSearchPagedForTransformedQuery(transformedQuery, 0).getContent());
}
}
12 changes: 8 additions & 4 deletions src/main/java/org/jabref/logic/importer/SearchBasedFetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,18 @@ public interface SearchBasedFetcher extends WebFetcher {
* This method is used to send complex queries using fielded search.
*
* @param transformedQuery the search query defining all fielded search parameters
* @param transformer transformer might be required to extract some parsing information
* @return a list of {@link BibEntry}, which are matched by the query (may be empty)
*/
List<BibEntry> performSearchForTransformedQuery(String transformedQuery, AbstractQueryTransformer transformer) throws FetcherException;
List<BibEntry> performSearchForTransformedQuery(String transformedQuery) throws FetcherException;

/**
* Looks for hits which are matched by the given free-text query.
*
* @param searchQuery query string that can be parsed into a complex search query
* @return a list of {@link BibEntry}, which are matched by the query (may be empty)
*/
default List<BibEntry> performSearch(String searchQuery) throws FetcherException {
default List<BibEntry> performSearch(String searchQuery) throws JabRefException {
resetTransformer();
AbstractQueryTransformer transformer = getQueryTransformer();
if (searchQuery.isBlank()) {
return Collections.emptyList();
Expand All @@ -42,10 +42,14 @@ default List<BibEntry> performSearch(String searchQuery) throws FetcherException
throw new FetcherException("Error occured during query transformation", e);
}
// Otherwise just use query as a default term
return this.performSearchForTransformedQuery(transformedQuery.orElse(""), transformer);
return this.performSearchForTransformedQuery(transformedQuery.orElse(""));
}

default AbstractQueryTransformer getQueryTransformer() {
return new DefaultQueryTransformer();
}

default void resetTransformer() {

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

import org.jabref.logic.JabRefException;
import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.importer.fetcher.transformators.AbstractQueryTransformer;
import org.jabref.model.entry.BibEntry;

/**
Expand All @@ -28,14 +27,13 @@ public interface SearchBasedParserFetcher extends SearchBasedFetcher {
* fielded search, such as a title, author, or year parameter.
*
* @param transformedQuery the search query defining all fielded search parameters
* @param transformer
*/
@Override
default List<BibEntry> performSearchForTransformedQuery(String transformedQuery, AbstractQueryTransformer transformer) throws FetcherException {
default List<BibEntry> performSearchForTransformedQuery(String transformedQuery) throws FetcherException {
// ADR-0014
URL urlForQuery;
try {
urlForQuery = getURLForQuery(transformedQuery, transformer);
urlForQuery = getURLForQuery(transformedQuery);
} catch (URISyntaxException | MalformedURLException | JabRefException e) {
throw new FetcherException("Search URI crafted from complex search query is malformed", e);
}
Expand Down Expand Up @@ -64,7 +62,7 @@ private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
*
* @param transformedQuery the search query
*/
URL getURLForQuery(String transformedQuery, AbstractQueryTransformer transformer) throws URISyntaxException, MalformedURLException, FetcherException;
URL getURLForQuery(String transformedQuery) throws URISyntaxException, MalformedURLException, FetcherException;

/**
* Performs a cleanup of the fetched entry.
Expand Down
14 changes: 10 additions & 4 deletions src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ public class ArXiv implements FulltextFetcher, PagedSearchBasedFetcher, IdBasedF
private static final String API_URL = "https://export.arxiv.org/api/query";

private final ImportFormatPreferences importFormatPreferences;
private ArXivQueryTransformer transformer;

public ArXiv(ImportFormatPreferences importFormatPreferences) {
this.importFormatPreferences = importFormatPreferences;
Expand Down Expand Up @@ -258,15 +259,15 @@ public Optional<HelpFile> getHelpPage() {
* @return A list of entries matching the complex query
*/
@Override
public Page<BibEntry> performSearchPagedForTransformedQuery(String transformedQuery, int pageNumber, AbstractQueryTransformer transformer) throws FetcherException {
public Page<BibEntry> performSearchPagedForTransformedQuery(String transformedQuery, int pageNumber) throws FetcherException {
List<BibEntry> searchResult = searchForEntries(transformedQuery, pageNumber).stream()
.map((arXivEntry) -> arXivEntry.toBibEntry(importFormatPreferences.getKeywordSeparator()))
.collect(Collectors.toList());
return new Page<>(transformedQuery, pageNumber, filterYears(searchResult, transformer));
return new Page<>(transformedQuery, pageNumber, filterYears(searchResult));
}

private List<BibEntry> filterYears(List<BibEntry> searchResult, AbstractQueryTransformer transformer) {
ArXivQueryTransformer arXivQueryTransformer = ((ArXivQueryTransformer) transformer);
private List<BibEntry> filterYears(List<BibEntry> searchResult) {
ArXivQueryTransformer arXivQueryTransformer = transformer;
return searchResult.stream()
.filter(entry -> entry.getField(StandardField.DATE).isPresent())
// Filter the date field for year only
Expand Down Expand Up @@ -426,4 +427,9 @@ public BibEntry toBibEntry(Character keywordDelimiter) {
public AbstractQueryTransformer getQueryTransformer() {
return new ArXivQueryTransformer();
}

@Override
public void resetTransformer() {
transformer = null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import org.jabref.logic.importer.PagedSearchBasedParserFetcher;
import org.jabref.logic.importer.ParseException;
import org.jabref.logic.importer.Parser;
import org.jabref.logic.importer.fetcher.transformators.AbstractQueryTransformer;
import org.jabref.logic.importer.fileformat.BibtexParser;
import org.jabref.logic.net.URLDownload;
import org.jabref.logic.util.BuildInfo;
Expand Down Expand Up @@ -84,7 +83,7 @@ public String getName() {
* @return URL which points to a search request for given query
*/
@Override
public URL getURLForQuery(String transformedQuery, int pageNumber, AbstractQueryTransformer transformer) throws URISyntaxException, MalformedURLException {
public URL getURLForQuery(String transformedQuery, int pageNumber) throws URISyntaxException, MalformedURLException {
URIBuilder builder = new URIBuilder(API_SEARCH_URL);
builder.addParameter("q", transformedQuery);
builder.addParameter("fl", "bibcode");
Expand Down Expand Up @@ -275,10 +274,10 @@ private List<BibEntry> performSearchByIds(Collection<String> identifiers) throws
}

@Override
public Page<BibEntry> performSearchPagedForTransformedQuery(String transformedQuery, int pageNumber, AbstractQueryTransformer transformer) throws FetcherException {
public Page<BibEntry> performSearchPagedForTransformedQuery(String transformedQuery, int pageNumber) throws FetcherException {
try {
// This is currently just interpreting the complex query as a default string query
List<String> bibcodes = fetchBibcodes(getURLForQuery(transformedQuery, pageNumber, transformer));
List<String> bibcodes = fetchBibcodes(getURLForQuery(transformedQuery, pageNumber));
Collection<BibEntry> results = performSearchByIds(bibcodes);
return new Page<>(transformedQuery, pageNumber, results);
} catch (URISyntaxException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.jabref.logic.JabRefException;
import org.jabref.logic.help.HelpFile;
import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImportCleanup;
import org.jabref.logic.importer.SearchBasedFetcher;
import org.jabref.logic.importer.fetcher.transformators.AbstractQueryTransformer;
import org.jabref.model.database.BibDatabaseMode;
import org.jabref.model.entry.BibEntry;

Expand Down Expand Up @@ -47,14 +47,14 @@ public Optional<HelpFile> getHelpPage() {
}

@Override
public List<BibEntry> performSearchForTransformedQuery(String transformedQuery, AbstractQueryTransformer transformer) throws FetcherException {
public List<BibEntry> performSearchForTransformedQuery(String transformedQuery) throws FetcherException {
ImportCleanup cleanup = new ImportCleanup(BibDatabaseMode.BIBTEX);
// All entries have to be converted into one format, this is necessary for the format conversion
return fetchers.parallelStream()
.flatMap(searchBasedFetcher -> {
try {
return searchBasedFetcher.performSearch(transformedQuery).stream();
} catch (FetcherException e) {
} catch (JabRefException e) {
LOGGER.warn(String.format("%s API request failed", searchBasedFetcher.getName()), e);
return Stream.empty();
}
Expand All @@ -63,4 +63,9 @@ public List<BibEntry> performSearchForTransformedQuery(String transformedQuery,
.map(cleanup::doPostCleanup)
.collect(Collectors.toList());
}

@Override
public void resetTransformer() {
fetchers.forEach(SearchBasedFetcher::resetTransformer);
}
}
15 changes: 12 additions & 3 deletions src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ public class GoogleScholar implements FulltextFetcher, PagedSearchBasedFetcher {
private static final int NUM_RESULTS = 10;

private final ImportFormatPreferences importFormatPreferences;
private ScholarQueryTransformer transformer;

public GoogleScholar(ImportFormatPreferences importFormatPreferences) {
Objects.requireNonNull(importFormatPreferences);
Expand Down Expand Up @@ -177,7 +178,7 @@ private void obtainAndModifyCookie() throws FetcherException {
}

@Override
public Page<BibEntry> performSearchPagedForTransformedQuery(String transformedQuery, int pageNumber, AbstractQueryTransformer transformer) throws FetcherException {
public Page<BibEntry> performSearchPagedForTransformedQuery(String transformedQuery, int pageNumber) throws FetcherException {
try {
obtainAndModifyCookie();
List<BibEntry> foundEntries = new ArrayList<>(10);
Expand All @@ -188,7 +189,7 @@ public Page<BibEntry> performSearchPagedForTransformedQuery(String transformedQu
uriBuilder.addParameter("q", transformedQuery);
uriBuilder.addParameter("start", String.valueOf(pageNumber * getPageSize()));
uriBuilder.addParameter("num", String.valueOf(getPageSize()));
ScholarQueryTransformer scholarQueryTransformer = ((ScholarQueryTransformer) transformer);
ScholarQueryTransformer scholarQueryTransformer = transformer;
uriBuilder.addParameter("as_ylo", String.valueOf(scholarQueryTransformer.getStartYear()));
uriBuilder.addParameter("as_yhi", String.valueOf(scholarQueryTransformer.getEndYear()));

Expand Down Expand Up @@ -219,6 +220,14 @@ public Page<BibEntry> performSearchPagedForTransformedQuery(String transformedQu

@Override
public AbstractQueryTransformer getQueryTransformer() {
return new ScholarQueryTransformer();
if (Objects.isNull(transformer)) {
transformer = new ScholarQueryTransformer();
}
return transformer;
}

@Override
public void resetTransformer() {
transformer = null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,12 @@
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.TimeoutException;
import java.util.stream.Collectors;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParseException;
import org.jabref.logic.importer.SearchBasedFetcher;
import org.jabref.logic.importer.fetcher.transformators.AbstractQueryTransformer;
import org.jabref.logic.importer.fileformat.BibtexParser;
import org.jabref.logic.importer.util.GrobidService;
import org.jabref.model.entry.BibEntry;
Expand Down Expand Up @@ -78,13 +76,13 @@ public List<BibEntry> performSearch(String searchQuery) throws FetcherException
List<BibEntry> collect;
try {
collect = Arrays.stream(searchQuery.split("\\r\\r+|\\n\\n+|\\r\\n(\\r\\n)+"))
.map(String::trim)
.filter(str -> !str.isBlank())
.map(this::parseUsingGrobid)
.flatMap(Optional::stream)
.map(this::parseBibToBibEntry)
.flatMap(Optional::stream)
.collect(Collectors.toList());
.map(String::trim)
.filter(str -> !str.isBlank())
.map(this::parseUsingGrobid)
.flatMap(Optional::stream)
.map(this::parseBibToBibEntry)
.flatMap(Optional::stream)
.collect(Collectors.toList());
} catch (RuntimeException e) {
throw new FetcherException(e.getMessage(), e.getCause());
}
Expand All @@ -95,7 +93,7 @@ public List<BibEntry> performSearch(String searchQuery) throws FetcherException
* Not used
*/
@Override
public List<BibEntry> performSearchForTransformedQuery(String transformedQuery, AbstractQueryTransformer transformer) throws FetcherException {
public List<BibEntry> performSearchForTransformedQuery(String transformedQuery) throws FetcherException {
return Collections.emptyList();
}
}
Loading

0 comments on commit 787f1bc

Please sign in to comment.