Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Fix fetcher tests #5674

Merged
merged 13 commits into from
Nov 29, 2019
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,10 @@ public void doPostCleanup(BibEntry entry) {
entry.getField(StandardField.ABSTRACT)
.filter(abstractText -> abstractText.equals("Not Available <P />"))
.ifPresent(abstractText -> entry.clearField(StandardField.ABSTRACT));

entry.getField(StandardField.ABSTRACT)
.map(abstractText -> abstractText.replace("<P />", "").trim())
.ifPresent(abstractText-> entry.setField(StandardField.ABSTRACT,abstractText));
// The fetcher adds some garbage (number of found entries etc before)
entry.setCommentsBeforeEntry("");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.jabref.logic.formatter.bibtexfields.ClearFormatter;
Expand All @@ -28,6 +26,10 @@
import org.jabref.model.util.DummyFileUpdateMonitor;

import org.apache.http.client.utils.URIBuilder;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
* Fetches data from the INSPIRE database.
Expand Down Expand Up @@ -72,12 +74,15 @@ public Parser getParser() {
String response = new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining(OS.NEWLINE));

List<BibEntry> entries = new ArrayList<>();
BibtexParser bibtexParser = new BibtexParser(preferences, new DummyFileUpdateMonitor());
Pattern pattern = Pattern.compile("<pre>(?s)(.*)</pre>");
Matcher matcher = pattern.matcher(response);
while (matcher.find()) {
String bibtexEntryString = matcher.group(1);
entries.addAll(bibtexParser.parseEntries(bibtexEntryString));

Document doc = Jsoup.parse(response);
Elements preElements = doc.getElementsByTag("pre");

for (Element elem : preElements) {
//We have to use a new instance here, because otherwise only the first entry gets parsed
BibtexParser bibtexParser = new BibtexParser(preferences, new DummyFileUpdateMonitor());
List<BibEntry> entry = bibtexParser.parseEntries(elem.text());
entries.addAll(entry);
}
return entries;
};
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/org/jabref/logic/importer/util/JsonReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ public static JSONObject toJsonObject(InputStreamReader input) throws ParseExcep
while ((inputStr = streamReader.readLine()) != null) {
responseStrBuilder.append(inputStr);
}
if (responseStrBuilder.toString().isBlank()) {
throw new ParseException("Empty input!");
}
return new JSONObject(responseStrBuilder.toString());
} catch (IOException e) {
throw new ParseException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,7 @@ void searchByQueryFindsEntry() throws Exception {

@Test
void testBibJSONConverter() {
String jsonString = "{\n\"title\": \"Design of Finite Word Length Linear-Phase FIR Filters in the Logarithmic Number System Domain\",\n"
+ "\"journal\": {\n\"publisher\": \"Hindawi Publishing Corporation\",\n\"language\": ["
+ "\"English\"],\n\"title\": \"VLSI Design\",\"country\": \"US\",\"volume\": \"2014\""
+ "},\"author\":[{\"name\": \"Syed Asad Alam\"},{\"name\": \"Oscar Gustafsson\""
+ "}\n],\n\"link\":[{\"url\": \"http://dx.doi.org/10.1155/2014/217495\","
+ "\"type\": \"fulltext\"}],\"year\":\"2014\",\"identifier\":[{"
+ "\"type\": \"pissn\",\"id\": \"1065-514X\"},\n{\"type\": \"eissn\","
+ "\"id\": \"1563-5171\"},{\"type\": \"doi\",\"id\": \"10.1155/2014/217495\""
+ "}],\"created_date\":\"2014-05-09T19:38:31Z\"}\"";
String jsonString = "{\"title\":\"Design of Finite Word Length Linear-Phase FIR Filters in the Logarithmic Number System Domain\",\"journal\":{\"publisher\":\"Hindawi Publishing Corporation\",\"language\":[\"English\"],\"title\":\"VLSI Design\",\"country\":\"US\",\"volume\":\"2014\"},\"author\":[{\"name\":\"Syed Asad Alam\"},{\"name\":\"Oscar Gustafsson\"}],\"link\":[{\"url\":\"http://dx.doi.org/10.1155/2014/217495\",\"type\":\"fulltext\"}],\"year\":\"2014\",\"identifier\":[{\"type\":\"pissn\",\"id\":\"1065-514X\"},{\"type\":\"eissn\",\"id\":\"1563-5171\"},{\"type\":\"doi\",\"id\":\"10.1155/2014/217495\"}],\"created_date\":\"2014-05-09T19:38:31Z\"}";
JSONObject jsonObject = new JSONObject(jsonString);
BibEntry bibEntry = DOAJFetcher.parseBibJSONtoBibtex(jsonObject, ',');

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public void testGetName() {

@Test
public void testGetHelpPage() {
assertEquals("DOItoBibTeX", fetcher.getHelpPage().get().getPageName());
assertEquals("import-using-publication-identifiers/doitobibtex>", fetcher.getHelpPage().get().getPageName());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ void findSingleEntry() throws FetcherException {

@Test
@DisabledOnCIServer("CI server is blocked by Google")
void find20Entries() throws FetcherException {
void findManyEntries() throws FetcherException {
List<BibEntry> foundEntries = finder.performSearch("random test string");

assertEquals(20, foundEntries.size());
assertEquals(10, foundEntries.size());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public void testGetName() {

@Test
public void testGetHelpPage() {
assertEquals("GVK", fetcher.getHelpPage().get().getPageName());
assertEquals("import-using-online-bibliographic-database/gvk", fetcher.getHelpPage().get().getPageName());
}

@Test
Expand Down
3 changes: 3 additions & 0 deletions src/test/java/org/jabref/logic/importer/fetcher/IEEETest.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ void searchResultHasNoKeywordTerms() throws FetcherException {
expected.setField(StandardField.DOI, "10.1049/iet-rpg.2018.5648");
expected.setField(StandardField.FILE, ":https\\://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8636659:PDF");
expected.setField(StandardField.ISSUE, "3");
expected.setField(StandardField.ISSN, "1752-1424");
expected.setField(StandardField.JOURNALTITLE, "IET Renewable Power Generation");
expected.setField(StandardField.PAGES, "418--426");
expected.setField(StandardField.PUBLISHER, "IET");
Expand All @@ -125,6 +126,8 @@ void searchByQueryFindsEntry() throws Exception {
expected.setField(StandardField.DOI, "10.1145/2884781.2884806");
expected.setField(StandardField.JOURNALTITLE, "2016 IEEE/ACM 38th International Conference on Software Engineering (ICSE)");
expected.setField(StandardField.PAGES, "273--284");
expected.setField(StandardField.ISBN, "978-1-5090-2071-3");
expected.setField(StandardField.ISSN, "1558-1225");
expected.setField(StandardField.PUBLISHER, "IEEE");
expected.setField(StandardField.KEYWORDS, "Portals, Documentation, Computer bugs, Joining processes, Industries, Open source software, Newcomers, Newbies, Novices, Beginners, Open Source Software, Barriers, Obstacles, Onboarding, Joining Process");
expected.setField(StandardField.TITLE, "Overcoming Open Source Project Entry Barriers with a Portal for Newcomers");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package org.jabref.logic.importer.fetcher;

import java.util.Collections;
import java.util.Arrays;
import java.util.List;

import org.jabref.logic.bibtex.FieldContentParserPreferences;
Expand All @@ -20,6 +20,7 @@

@FetcherTest
class INSPIREFetcherTest {

private INSPIREFetcher fetcher;

@BeforeEach
Expand All @@ -31,18 +32,37 @@ void setUp() {

@Test
void searchByQueryFindsEntry() throws Exception {
BibEntry expected = new BibEntry(StandardEntryType.MastersThesis);
expected.setCiteKey("Diez:2014ppa");
expected.setField(StandardField.AUTHOR, "Diez, Tobias");
expected.setField(StandardField.TITLE, "Slice theorem for Fr\\'echet group actions and covariant symplectic field theory");
expected.setField(StandardField.SCHOOL, "Leipzig U.");
expected.setField(StandardField.YEAR, "2013");
expected.setField(StandardField.URL, "https://inspirehep.net/record/1295621/files/arXiv:1405.2249.pdf");
expected.setField(StandardField.EPRINT, "1405.2249");
expected.setField(StandardField.ARCHIVEPREFIX, "arXiv");
expected.setField(new UnknownField("primaryClass"), "math-ph");
BibEntry phd = new BibEntry(StandardEntryType.PhdThesis);
phd.setCiteKey("Diez:2019pkg");
phd.setField(StandardField.AUTHOR, "Diez, Tobias");
phd.setField(StandardField.TITLE, "Normal Form of Equivariant Maps and Singular Symplectic Reduction in Infinite Dimensions with Applications to Gauge Field Theory");
phd.setField(StandardField.YEAR, "2019");
phd.setField(StandardField.EPRINT, "1909.00744");
phd.setField(new UnknownField("reportnumber"), "urn:nbn:de:bsz:15-qucosa2-352179");
phd.setField(StandardField.ARCHIVEPREFIX, "arXiv");
phd.setField(StandardField.PRIMARYCLASS, "math.SG");

BibEntry article = new BibEntry(StandardEntryType.Article);
article.setCiteKey("Diez:2018gjz");
article.setField(StandardField.AUTHOR, "Diez, Tobias and Rudolph, Gerd");
article.setField(StandardField.TITLE, "Singular symplectic cotangent bundle reduction of gauge field theory");
article.setField(StandardField.YEAR, "2018");
article.setField(StandardField.EPRINT, "1812.04707");
article.setField(StandardField.ARCHIVEPREFIX, "arXiv");
article.setField(StandardField.PRIMARYCLASS, "math-ph");

BibEntry master = new BibEntry(StandardEntryType.MastersThesis);
master.setCiteKey("Diez:2014ppa");
master.setField(StandardField.AUTHOR, "Diez, Tobias");
master.setField(StandardField.TITLE, "Slice theorem for Fr\\'echet group actions and covariant symplectic field theory");
master.setField(StandardField.SCHOOL, "Leipzig U.");
master.setField(StandardField.YEAR, "2013");
master.setField(StandardField.EPRINT, "1405.2249");
master.setField(StandardField.ARCHIVEPREFIX, "arXiv");
master.setField(StandardField.PRIMARYCLASS, "math-ph");

List<BibEntry> fetchedEntries = fetcher.performSearch("Fr\\'echet group actions field");
assertEquals(Collections.singletonList(expected), fetchedEntries);

assertEquals(Arrays.asList(phd, article, master), fetchedEntries);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public void setUp() {

entryWijedasa = new BibEntry();
entryWijedasa.setType(StandardEntryType.Article);
entryWijedasa.setField(StandardField.AUTHOR, "Wijedasa, Lahiru S. and Jauhiainen, Jyrki and Könönen, Mari and Lampela, Maija and Vasander, Harri and Leblanc, Marie-Claire and Evers, Stephanie and Smith, Thomas E. L. and Yule, Catherine M. and Varkkey, Helena and Lupascu, Massimo and Parish, Faizal and Singleton, Ian and Clements, Gopalasamy R. and Aziz, Sheema Abdul and Harrison, Mark E. and Cheyne, Susan and Anshari, Gusti Z. and Meijaard, Erik and Goldstein, Jenny E. and Waldron, Susan and Hergoualc'h, Kristell and Dommain, Rene and Frolking, Steve and Evans, Christopher D. and Posa, Mary Rose C. and Glaser, Paul H. and Suryadiputra, Nyoman and Lubis, Reza and Santika, Truly and Padfield, Rory and Kurnianto, Sofyan and Hadisiswoyo, Panut and Lim, Teck Wyn and Page, Susan E. and Gauci, Vincent and Van Der Meer, Peter J. and Buckland, Helen and Garnier, Fabien and Samuel, Marshall K. and Choo, Liza Nuriati Lim Kim and O'Reilly, Patrick and Warren, Matthew and Suksuwan, Surin and Sumarga, Elham and Jain, Anuj and Laurance, William F. and Couwenberg, John and Joosten, Hans and Vernimmen, Ronald and Hooijer, Aljosja and Malins, Chris and Cochrane, Mark A. and Perumal, Balu and Siegert, Florian and Peh, Kelvin S.-H. and Comeau, Louis-Pierre and Verchot, Louis and Harvey, Charles F. and Cobb, Alex and Jaafar, Zeehan and Wösten, Henk and Manuri, Solichin and Müller, Moritz and Giesen, Wim and Phelps, Jacob and Yong, Ding Li and Silvius, Marcel and Wedeux, Béatrice M. M. and Hoyt, Alison and Osaki, Mitsuru and Hirano, Takashi and Takahashi, Hidenori and Kohyama, Takashi S. and Haraguchi, Akira and Nugroho, Nunung P. and Coomes, David A. and Quoi, Le Phat and Dohong, Alue and Gunawan, Haris and Gaveau, David L. A. and Langner, Andreas and Lim, Felix K. S. and Edwards, David P. and Giam, Xingli and Van Der Werf, Guido and Carmenta, Rachel and Verwer, Caspar C. and Gibson, Luke and Gandois, Laure and Graham, Laura Linda Bozena and Regalino, Jhanson and Wich, Serge A. and Rieley, Jack and Kettridge, Nicholas and Brown, Chloe and Pirard, Romain and Moore, Sam and Capilla, B. Ripoll and Ballhorn, Uwe and Ho, Hua Chew and Hoscilo, Agata and Lohberger, Sandra and Evans, Theodore A. and Yulianti, Nina and Blackham, Grace and Onrizal and Husson, Simon and Murdiyarso, Daniel and Pangala, Sunita and Cole, Lydia E. S. and Tacconi, Luca and Segah, Hendrik and Tonoto, Prayoto and Lee, Janice S. H. and Schmilewski, Gerald and Wulffraat, Stephan and Putra, Erianto Indra and Cattau, Megan E. and Clymo, R. S. and Morrison, Ross and Mujahid, Aazani and Miettinen, Jukka and Liew, Soo Chin and Valpola, Samu and Wilson, David and D'Arcy, Laura and Gerding, Michiel and Sundari, Siti and Thornton, Sara A. and Kalisz, Barbara and Chapman, Stephen J. and Su, Ahmad Suhaizi Mat and Basuki, Imam and Itoh, Masayuki and Traeholt, Carl and Sloan, Sean and Sayok, Alexander K. and Andersen, Roxane");
entryWijedasa.setField(StandardField.AUTHOR, "Wijedasa, Lahiru S. and Jauhiainen, Jyrki and Könönen, Mari and Lampela, Maija and Vasander, Harri and Leblanc, Marie-Claire and Evers, Stephanie and Smith, Thomas E. L. and Yule, Catherine M. and Varkkey, Helena and Lupascu, Massimo and Parish, Faizal and Singleton, Ian and Clements, Gopalasamy R. and Aziz, Sheema Abdul and Harrison, Mark E. and Cheyne, Susan and Anshari, Gusti Z. and Meijaard, Erik and Goldstein, Jenny E. and Waldron, Susan and Hergoualc'h, Kristell and Dommain, Rene and Frolking, Steve and Evans, Christopher D. and Posa, Mary Rose C. and Glaser, Paul H. and Suryadiputra, Nyoman and Lubis, Reza and Santika, Truly and Padfield, Rory and Kurnianto, Sofyan and Hadisiswoyo, Panut and Lim, Teck Wyn and Page, Susan E. and Gauci, Vincent and Van Der Meer, Peter J. and Buckland, Helen and Garnier, Fabien and Samuel, Marshall K. and Choo, Liza Nuriati Lim Kim and O'Reilly, Patrick and Warren, Matthew and Suksuwan, Surin and Sumarga, Elham and Jain, Anuj and Laurance, William F. and Couwenberg, John and Joosten, Hans and Vernimmen, Ronald and Hooijer, Aljosja and Malins, Chris and Cochrane, Mark A. and Perumal, Balu and Siegert, Florian and Peh, Kelvin S.-H. and Comeau, Louis-Pierre and Verchot, Louis and Harvey, Charles F. and Cobb, Alex and Jaafar, Zeehan and Wösten, Henk and Manuri, Solichin and Müller, Moritz and Giesen, Wim and Phelps, Jacob and Yong, Ding Li and Silvius, Marcel and Wedeux, Béatrice M. M. and Hoyt, Alison and Osaki, Mitsuru and Hirano, Takashi and Takahashi, Hidenori and Kohyama, Takashi S. and Haraguchi, Akira and Nugroho, Nunung P. and Coomes, David A. and Quoi, Le Phat and Dohong, Alue and Gunawan, Haris and Gaveau, David L. A. and Langner, Andreas and Lim, Felix K. S. and Edwards, David P. and Giam, Xingli and Van Der Werf, Guido and Carmenta, Rachel and Verwer, Caspar C. and Gibson, Luke and Gandois, Laure and Graham, Laura Linda Bozena and Regalino, Jhanson and Wich, Serge A. and Rieley, Jack and Kettridge, Nicholas and Brown, Chloe and Pirard, Romain and Moore, Sam and Capilla, B. Ripoll and Ballhorn, Uwe and Ho, Hua Chew and Hoscilo, Agata and Lohberger, Sandra and Evans, Theodore A. and Yulianti, Nina and Blackham, Grace and Onrizal and Husson, Simon and Murdiyarso, Daniel and Pangala, Sunita and Cole, Lydia E. S. and Tacconi, Luca and Segah, Hendrik and Tonoto, Prayoto and Lee, Janice S. H. and Schmilewski, Gerald and Wulffraat, Stephan and Putra, Erianto Indra and Cattau, Megan E. and Clymo, R. S. and Morrison, Ross and Mujahid, Aazani and Miettinen, Jukka and Liew, Soo Chin and Valpola, Samu and Wilson, David and D'Arcy, Laura and Gerding, Michiel and Sundari, Siti and Thornton, Sara A. and Kalisz, Barbara and Chapman, Stephen J. and Su, Ahmad Suhaizi Mat and Basuki, Imam and Itoh, Masayuki and Traeholt, Carl and Sloan, Sean and Sayok, Alexander K. and Andersen, Roxane");
entryWijedasa.setField(new UnknownField("country"), "England");
entryWijedasa.setField(StandardField.DOI, "10.1111/gcb.13516");
entryWijedasa.setField(StandardField.ISSN, "1365-2486");
Expand All @@ -43,7 +43,7 @@ public void setUp() {
entryWijedasa.setField(StandardField.PMID, "27670948");
entryWijedasa.setField(new UnknownField("pubmodel"), "Print-Electronic");
entryWijedasa.setField(StandardField.PUBSTATE, "ppublish");
entryWijedasa.setField(new UnknownField("revised"), "2018-01-23");
entryWijedasa.setField(new UnknownField("revised"), "2019-11-20");
entryWijedasa.setField(StandardField.TITLE, "Denial of long-term issues with agriculture on tropical peatlands will have devastating consequences.");
entryWijedasa.setField(StandardField.VOLUME, "23");
entryWijedasa.setField(StandardField.YEAR, "2017");
Expand All @@ -68,7 +68,7 @@ public void setUp() {
entryEndharti.setField(StandardField.PMID, "27670445");
entryEndharti.setField(new UnknownField("pubmodel"), "Electronic");
entryEndharti.setField(StandardField.PUBSTATE, "epublish");
entryEndharti.setField(new UnknownField("revised"), "2018-11-13");
entryEndharti.setField(new UnknownField("revised"), "2019-11-20");
entryEndharti.setField(StandardField.VOLUME, "16");
entryEndharti.setField(StandardField.YEAR, "2016");

Expand Down Expand Up @@ -130,7 +130,7 @@ public void testGetName() {

@Test
public void testGetHelpPage() {
assertEquals("Medline", fetcher.getHelpPage().get().getPageName());
assertEquals("import-using-online-bibliographic-database/medline", fetcher.getHelpPage().get().getPageName());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ void setUp() {
void findByDOI() throws IOException {
entry.setField(StandardField.DOI, "10.1038/nature12373");

assertEquals(Optional.of(new URL("https://dash.harvard.edu/bitstream/1/12285462/Nanometer-Scale%20Thermometry.pdf?sequence=1")), finder.findFullText(entry));
assertEquals(Optional.of(new URL("https://dash.harvard.edu/bitstream/1/12285462/1/Nanometer-Scale%20Thermometry.pdf")), finder.findFullText(entry));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public void getNameReturnsEqualIdName() {

@Test
public void getHelpPageReturnsEqualHelpPage() {
assertEquals("RFCtoBibTeX", fetcher.getHelpPage().get().getPageName());
assertEquals("import-using-publication-identifiers/rfctobibtex", fetcher.getHelpPage().get().getPageName());
}

@Test
Expand Down
Loading