Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IdBasedSearchFetcher to jstor #7145

Merged
merged 5 commits into from
Dec 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/main/java/org/jabref/logic/importer/WebFetchers.java
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ public static SortedSet<IdBasedFetcher> getIdBasedFetchers(ImportFormatPreferenc
set.add(new IacrEprintFetcher(importFormatPreferences));
set.add(new RfcFetcher(importFormatPreferences));
set.add(new Medra());
set.add(new JstorFetcher(importFormatPreferences));
return set;
}

Expand Down
51 changes: 46 additions & 5 deletions src/main/java/org/jabref/logic/importer/fetcher/JstorFetcher.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
package org.jabref.logic.importer.fetcher;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.FulltextFetcher;
import org.jabref.logic.importer.IdBasedParserFetcher;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParseException;
import org.jabref.logic.importer.Parser;
Expand All @@ -28,11 +33,12 @@
/**
* Fetcher for jstor.org
**/
public class JstorFetcher implements SearchBasedParserFetcher, FulltextFetcher {
public class JstorFetcher implements SearchBasedParserFetcher, FulltextFetcher, IdBasedParserFetcher {

private static final String HOST = "https://www.jstor.org";
private static final String SEARCH_HOST = HOST + "/open/search";
private static final String CITE_HOST = HOST + "/citation/text";
private static final String CITE_HOST = HOST + "/citation/text/";
private static final String URL_QUERY_REGEX = "(?<=\\?).*";

private final ImportFormatPreferences importFormatPreferences;

Expand Down Expand Up @@ -82,21 +88,51 @@ public URL getURLForQuery(ComplexSearchQuery query) throws URISyntaxException, M
return uriBuilder.build().toURL();
}

@Override
public URL getUrlForIdentifier(String identifier) throws FetcherException {
String start = "https://www.jstor.org/citation/text/";
if (identifier.startsWith("http")) {
identifier = identifier.replace("https://www.jstor.org/stable", "");
identifier = identifier.replace("http://www.jstor.org/stable", "");
}
identifier = identifier.replaceAll(URL_QUERY_REGEX, "");

try {
if (identifier.contains("/")) {
// if identifier links to a entry with a valid doi
return new URL(start + identifier);
}
// else use default doi start.
return new URL(start + "10.2307/" + identifier);
} catch (IOException e) {
throw new FetcherException("could not construct url for jstor", e);
}
}

@Override
public Parser getParser() {
return inputStream -> {
BibtexParser parser = new BibtexParser(importFormatPreferences, new DummyFileUpdateMonitor());
String text = new BufferedReader(
new InputStreamReader(inputStream, StandardCharsets.UTF_8)).lines().collect(Collectors.joining());

// does the input stream contain bibtex ?
if (text.startsWith("@")) {
return parser.parseEntries(text);
}
// input stream contains html
List<BibEntry> entries;
try {
Document doc = Jsoup.parse(inputStream, null, HOST);
List<Element> elements = doc.body().getElementsByClass("cite-this-item");

StringBuilder stringBuilder = new StringBuilder();
List<Element> elements = doc.body().getElementsByClass("cite-this-item");
for (Element element : elements) {
String id = element.attr("href").replace("citation/info/", "");

String data = new URLDownload(CITE_HOST + id).asString();
stringBuilder.append(data);
}
BibtexParser parser = new BibtexParser(importFormatPreferences, new DummyFileUpdateMonitor());
entries = new ArrayList<>(parser.parseEntries(stringBuilder.toString()));
} catch (IOException e) {
throw new ParseException("Could not download data from jstor.org", e);
Expand All @@ -111,7 +147,7 @@ public String getName() {
}

@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException, FetcherException {
public Optional<URL> findFullText(BibEntry entry) throws IOException {
if (entry.getField(StandardField.URL).isEmpty()) {
return Optional.empty();
}
Expand All @@ -133,4 +169,9 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException, FetcherExc
public TrustLevel getTrustLevel() {
return TrustLevel.META_SEARCH;
}

@Override
public void doPostCleanup(BibEntry entry) {
// do nothing
}
Comment on lines +173 to +176
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you overwrite this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as both SearchBased and IdBased Fetchers have a doPostCleanup default impl, the compiler is not sure which one to call

}
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,32 @@ public class JstorFetcherTest implements SearchBasedFetcherCapabilityTest {
.withField(StandardField.URL, "http://www.jstor.org/stable/90002164")
.withField(StandardField.YEAR, "2017");

private final BibEntry doiEntry = new BibEntry(StandardEntryType.Article)
.withCitationKey("10.1086/501484")
.withField(StandardField.AUTHOR, "Johnmarshall Reeve")
.withField(StandardField.TITLE, "Teachers as Facilitators: What Autonomy‐Supportive Teachers Do and Why Their Students Benefit")
.withField(StandardField.ISSN, "{00135984, 15548279")
.withField(StandardField.JOURNAL, "The Elementary School Journal")
.withField(StandardField.ABSTRACT, "Abstract Students are sometimes proactive and engaged in classroom learning activities, but they are also sometimes only reactive and passive. Recognizing this, in this article I argue that students’ classroom engagement depends, in part, on the supportive quality of the classroom climate in which they learn. According to the dialectical framework within self‐determination theory, students possess inner motivational resources that classroom conditions can support or frustrate. When teachers find ways to nurture these inner resources, they adopt an autonomy‐supportive motivating style. After articulating what autonomy‐supportive teachers say and do during instruction, I discuss 3 points: teachers can learn how to be more autonomy supportive toward students; teachers most engage students when they offer high levels of both autonomy support and structure; and an autonomy‐supportive motivating style is an important element to a high‐quality teacher‐student relationship.")
.withField(StandardField.PUBLISHER, "The University of Chicago Press")
.withField(StandardField.NUMBER, "3")
.withField(StandardField.PAGES, "225--236")
.withField(StandardField.VOLUME, "106")
.withField(StandardField.URL, "http://www.jstor.org/stable/10.1086/501484")
.withField(StandardField.YEAR, "2006");

@Test
void searchByTitle() throws Exception {
List<BibEntry> entries = fetcher.performSearch("title: \"Test Anxiety Analysis of Chinese College Students in Computer-based Spoken English Test\"");
assertEquals(Collections.singletonList(bibEntry), entries);
}

@Test
void searchById() throws FetcherException {
assertEquals(Optional.of(bibEntry), fetcher.performSearchById("90002164"));
assertEquals(Optional.of(doiEntry), fetcher.performSearchById("https://www.jstor.org/stable/10.1086/501484?seq=1"));
}

@Test
void fetchPDF() throws IOException, FetcherException {
Optional<URL> url = fetcher.findFullText(bibEntry);
Expand Down