Skip to content

Commit

Permalink
Add Pubmed/Medline Query Transformer (JabRef#8818)
Browse files Browse the repository at this point in the history
* Add Pubmed/Medline Query Transformer

Supports the default search fields and boolean operators

Fixes https://discourse.jabref.org/t/native-pubmed-search/3354

* checkstyle

* Update SuffixTransformerTest.java
  • Loading branch information
Siedlerchr authored May 21, 2022
1 parent b4e08ac commit 74f33d6
Show file tree
Hide file tree
Showing 7 changed files with 186 additions and 25 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- Writing BibTeX data into a PDF (XMP) removes braces. [#8452](https://github.com/JabRef/jabref/issues/8452)
- Writing BibTeX data into a PDF (XMP) does not write the `file` field.
- Writing BibTeX data into a PDF (XMP) considers the configured keyword separator (and does not use "," as default any more)
- The Medline/Pubmed search now also supports the [default fields and operators for searching](https://docs.jabref.org/collect/import-using-online-bibliographic-database#search-syntax). [forum#3554](https://discourse.jabref.org/t/native-pubmed-search/3354)

### Fixed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import org.jabref.logic.importer.Parser;
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.importer.SearchBasedFetcher;
import org.jabref.logic.importer.fetcher.transformers.DefaultQueryTransformer;
import org.jabref.logic.importer.fetcher.transformers.MedlineQueryTransformer;
import org.jabref.logic.importer.fileformat.MedlineImporter;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.entry.BibEntry;
Expand All @@ -54,16 +54,6 @@ public class MedlineFetcher implements IdBasedParserFetcher, SearchBasedFetcher

private int numberOfResultsFound;

/**
* Replaces all commas in a given string with " AND "
*
* @param query input to remove commas
* @return input without commas
*/
private static String replaceCommaWithAND(String query) {
return query.replaceAll(", ", " AND ").replaceAll(",", " AND ");
}

/**
* When using 'esearch.fcgi?db=<database>&term=<query>' we will get a list of IDs matching the query.
* Input: Any text query (&term)
Expand Down Expand Up @@ -164,7 +154,7 @@ private URL createSearchUrl(String query) throws URISyntaxException, MalformedUR
uriBuilder.addParameter("db", "pubmed");
uriBuilder.addParameter("sort", "relevance");
uriBuilder.addParameter("retmax", String.valueOf(NUMBER_TO_FETCH));
uriBuilder.addParameter("term", replaceCommaWithAND(query));
uriBuilder.addParameter("term", query); // already lucene query
return uriBuilder.build().toURL();
}

Expand Down Expand Up @@ -200,7 +190,7 @@ private List<BibEntry> fetchMedline(List<String> ids) throws FetcherException {
@Override
public List<BibEntry> performSearch(QueryNode luceneQuery) throws FetcherException {
List<BibEntry> entryList;
DefaultQueryTransformer transformer = new DefaultQueryTransformer();
MedlineQueryTransformer transformer = new MedlineQueryTransformer();
Optional<String> transformedQuery = transformer.transformLuceneQuery(luceneQuery);

if (transformedQuery.isEmpty() || transformedQuery.get().isBlank()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package org.jabref.logic.importer.fetcher.transformers;

/**
*
* Medline/Pubmed specific transformer which uses suffixes for searches
* see <a href="https://pubmed.ncbi.nlm.nih.gov/help/#search-tags">Pubmed help</a> for details
*
*/
public class MedlineQueryTransformer extends AbstractQueryTransformer {

@Override
protected String getLogicalAndOperator() {
return " AND ";
}

@Override
protected String getLogicalOrOperator() {
return " OR ";
}

@Override
protected String getLogicalNotOperator() {
return "NOT ";
}

@Override
protected String handleAuthor(String author) {
return author + "[au]";
}

@Override
protected String handleTitle(String title) {
return title + "[ti]";
}

@Override
protected String handleJournal(String journalTitle) {
return journalTitle + "[ta]";
}

@Override
protected String handleYear(String year) {
return year + "[dp]";
}

@Override
protected String handleYearRange(String yearRange) {
parseYearRange(yearRange);
if (endYear == Integer.MAX_VALUE) {
return yearRange;
}
return Integer.toString(startYear) + ":" + Integer.toString(endYear) + "[dp]";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,20 @@ public void testMultipleEntries() throws Exception {
assertEquals(50, entryList.size());
}

@Test
public void testWithLuceneQueryAuthorDate() throws Exception {
List<BibEntry> entryList = fetcher.performSearch("author:vigmond AND year:2021");
entryList.forEach(entry -> entry.clearField(StandardField.ABSTRACT)); // Remove abstract due to copyright);
assertEquals(18, entryList.size());
}

@Test
public void testWithLuceneQueryAuthorDateRange() throws Exception {
List<BibEntry> entryList = fetcher.performSearch("author:vigmond AND year-range:2020-2021");
entryList.forEach(entry -> entry.clearField(StandardField.ABSTRACT)); // Remove abstract due to copyright);
assertEquals(28, entryList.size());
}

@Test
public void testInvalidSearchTerm() throws Exception {
assertEquals(Optional.empty(), fetcher.performSearchById("this.is.a.invalid.search.term.for.the.medline.fetcher"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public String getTitlePrefix() {
}

@Override
public void convertJournalField() throws Exception {
public void convertJournalFieldPrefix() throws Exception {
IEEEQueryTransformer transformer = getTransformer();

String queryString = "journal:Nature";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,24 @@ public abstract class InfixTransformerTest<T extends AbstractQueryTransformer> {
* Example in the case of ':': <code>"author:"</code>
*/

protected abstract String getAuthorPrefix();
protected String getAuthorPrefix() {
return "";
}

protected abstract String getUnFieldedPrefix();
protected String getUnFieldedPrefix() {
return "";
}

protected abstract String getJournalPrefix();
protected String getJournalPrefix() {
return "";
}

protected abstract String getTitlePrefix();
protected String getTitlePrefix() {
return "";
}

@Test
public void convertAuthorField() throws Exception {
public void convertAuthorFieldPrefix() throws Exception {
String queryString = "author:\"Igor Steinmacher\"";
QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD);
Optional<String> searchQuery = getTransformer().transformLuceneQuery(luceneQuery);
Expand All @@ -37,7 +45,7 @@ public void convertAuthorField() throws Exception {
}

@Test
public void convertUnFieldedTerm() throws Exception {
public void convertUnFieldedTermPrefix() throws Exception {
String queryString = "\"default value\"";
QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD);
Optional<String> searchQuery = getTransformer().transformLuceneQuery(luceneQuery);
Expand All @@ -46,7 +54,7 @@ public void convertUnFieldedTerm() throws Exception {
}

@Test
public void convertExplicitUnFieldedTerm() throws Exception {
public void convertExplicitUnFieldedTermPrefix() throws Exception {
String queryString = "default:\"default value\"";
QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD);
Optional<String> searchQuery = getTransformer().transformLuceneQuery(luceneQuery);
Expand All @@ -55,7 +63,7 @@ public void convertExplicitUnFieldedTerm() throws Exception {
}

@Test
public void convertJournalField() throws Exception {
public void convertJournalFieldPrefix() throws Exception {
String queryString = "journal:Nature";
QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD);
Optional<String> searchQuery = getTransformer().transformLuceneQuery(luceneQuery);
Expand All @@ -70,7 +78,7 @@ public void convertJournalField() throws Exception {
public abstract void convertYearRangeField() throws Exception;

@Test
public void convertMultipleValuesWithTheSameField() throws Exception {
public void convertMultipleValuesWithTheSameFieldPrefix() throws Exception {
String queryString = "author:\"Igor Steinmacher\" author:\"Christoph Treude\"";
QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD);
Optional<String> searchQuery = getTransformer().transformLuceneQuery(luceneQuery);
Expand All @@ -79,7 +87,7 @@ public void convertMultipleValuesWithTheSameField() throws Exception {
}

@Test
public void groupedOperations() throws Exception {
public void groupedOperationsPrefix() throws Exception {
String queryString = "(author:\"Igor Steinmacher\" OR author:\"Christoph Treude\" AND author:\"Christoph Freunde\") AND title:test";
QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD);
Optional<String> searchQuery = getTransformer().transformLuceneQuery(luceneQuery);
Expand All @@ -88,7 +96,7 @@ public void groupedOperations() throws Exception {
}

@Test
public void notOperator() throws Exception {
public void notOperatorPrefix() throws Exception {
String queryString = "!(author:\"Igor Steinmacher\" OR author:\"Christoph Treude\")";
QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD);
Optional<String> searchQuery = getTransformer().transformLuceneQuery(luceneQuery);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package org.jabref.logic.importer.fetcher.transformers;

import java.util.Optional;

import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

/**
* Test Interface for all transformers that use suffix notation for their logical binary operators
*/
public abstract class SuffixTransformerTest<T extends AbstractQueryTransformer> {

protected abstract T getTransformer();

protected abstract String getAuthorSuffix();

protected abstract String getUnFieldedSuffix();

protected abstract String getJournalSuffix();

protected abstract String getTitleSuffix();

@Test
public void convertAuthorFieldSuffix() throws Exception {
String queryString = "author:\"Igor Steinmacher\"";
QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD);
Optional<String> searchQuery = getTransformer().transformLuceneQuery(luceneQuery);
Optional<String> expected = Optional.of("\"Igor Steinmacher\"" + getAuthorSuffix());
assertEquals(expected, searchQuery);
}

@Test
public void convertUnFieldedTermSuffix() throws Exception {
String queryString = "\"default value\"";
QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD);
Optional<String> searchQuery = getTransformer().transformLuceneQuery(luceneQuery);
Optional<String> expected = Optional.of(queryString + getUnFieldedSuffix());
assertEquals(expected, searchQuery);
}

@Test
public void convertExplicitUnFieldedTermSuffix() throws Exception {
String queryString = "default:\"default value\"";
QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD);
Optional<String> searchQuery = getTransformer().transformLuceneQuery(luceneQuery);
Optional<String> expected = Optional.of("\"default value\"" + getUnFieldedSuffix());
assertEquals(expected, searchQuery);
}

@Test
public void convertJournalFieldSuffix() throws Exception {
String queryString = "journal:Nature";
QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD);
Optional<String> searchQuery = getTransformer().transformLuceneQuery(luceneQuery);
Optional<String> expected = Optional.of("Nature" + getJournalSuffix());
assertEquals(expected, searchQuery);
}

@Test
public abstract void convertYearField() throws Exception;

@Test
public abstract void convertYearRangeField() throws Exception;

@Test
public void convertMultipleValuesWithTheSameSuffix() throws Exception {
String queryString = "author:\"Igor Steinmacher\" author:\"Christoph Treude\"";
QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD);
Optional<String> searchQuery = getTransformer().transformLuceneQuery(luceneQuery);
Optional<String> expected = Optional.of("\"Igor Steinmacher\"" + getAuthorSuffix() + getTransformer().getLogicalAndOperator() + "\"Christoph Treude\"" + getAuthorSuffix());
assertEquals(expected, searchQuery);
}

@Test
public void groupedOperationsSuffix() throws Exception {
String queryString = "(author:\"Igor Steinmacher\" OR author:\"Christoph Treude\" AND author:\"Christoph Freunde\") AND title:test";
QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD);
Optional<String> searchQuery = getTransformer().transformLuceneQuery(luceneQuery);
Optional<String> expected = Optional.of("(" + "\"Igor Steinmacher\"" + getAuthorSuffix() + getTransformer().getLogicalOrOperator() + "(" + "\"Christoph Treude\"" + getAuthorSuffix() + getTransformer().getLogicalAndOperator() + "\"Christoph Freunde\"" + getAuthorSuffix() + "))" + getTransformer().getLogicalAndOperator() + "test" + getTitleSuffix());
assertEquals(expected, searchQuery);
}

@Test
public void notOperatorSufix() throws Exception {
String queryString = "!(author:\"Igor Steinmacher\" OR author:\"Christoph Treude\")";
QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD);
Optional<String> searchQuery = getTransformer().transformLuceneQuery(luceneQuery);
Optional<String> expected = Optional.of(getTransformer().getLogicalNotOperator() + "(" + "\"Igor Steinmacher\"" + getAuthorSuffix() + getTransformer().getLogicalOrOperator() + "\"Christoph Treude\")" + getAuthorSuffix());
assertEquals(expected, searchQuery);
}
}

0 comments on commit 74f33d6

Please sign in to comment.