Skip to content

Commit

Permalink
Fixes miss-parsed names in AutomaticPersonsGroup (#7228)
Browse files Browse the repository at this point in the history
  • Loading branch information
k3KAW8Pnf7mkmdSMPHz27 authored Jan 18, 2021
1 parent 1d783b4 commit 31ced14
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 75 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
### Changed

- The content of the field `timestamp` is migrated to `creationdate`. In case one configured "udpate timestampe", it is migrated to `modificationdate`. [koppor#130](https://github.com/koppor/jabref/issues/130)
- We fixed an issue where groups generated from authors' last names did not include all entries of the authors' [#5833](https://github.com/JabRef/jabref/issues/5833)
- The export to MS Office XML now uses the month name for the field `MonthAcessed` instead of the two digit number [#7354](https://github.com/JabRef/jabref/issues/7354)

### Fixed
Expand Down
101 changes: 41 additions & 60 deletions src/main/java/org/jabref/model/entry/Author.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,49 +3,31 @@
import java.util.Objects;
import java.util.Optional;

import org.jabref.model.strings.LatexToUnicodeAdapter;
import org.jabref.model.strings.StringUtil;

/**
* This is an immutable class that keeps information regarding single
* author. It is just a container for the information, with very simple
* methods to access it.
* This is an immutable class that keeps information regarding single author. It is just a container for the information, with very simple methods to access it.
* <p>
* Current usage: only methods <code>getLastOnly</code>,
* <code>getFirstLast</code>, and <code>getLastFirst</code> are used;
* all other methods are provided for completeness.
* Current usage: only methods <code>getLastOnly</code>, <code>getFirstLast</code>, and <code>getLastFirst</code> are used; all other methods are provided for completeness.
*/
public class Author {

private final String firstPart;

private final String firstAbbr;

private final String vonPart;

private final String lastPart;

private final String jrPart;
private String latexFreeLastPart;

/**
* Creates the Author object. If any part of the name is absent, <CODE>null</CODE>
* must be passed; otherwise other methods may return erroneous results.
* Creates the Author object. If any part of the name is absent, <CODE>null</CODE> must be passed; otherwise other methods may return erroneous results.
*
* @param first the first name of the author (may consist of several
* tokens, like "Charles Louis Xavier Joseph" in "Charles
* Louis Xavier Joseph de la Vall{\'e}e Poussin")
* @param firstabbr the abbreviated first name of the author (may consist of
* several tokens, like "C. L. X. J." in "Charles Louis
* Xavier Joseph de la Vall{\'e}e Poussin"). It is a
* responsibility of the caller to create a reasonable
* abbreviation of the first name.
* @param von the von part of the author's name (may consist of several
* tokens, like "de la" in "Charles Louis Xavier Joseph de la
* Vall{\'e}e Poussin")
* @param last the last name of the author (may consist of several
* tokens, like "Vall{\'e}e Poussin" in "Charles Louis Xavier
* Joseph de la Vall{\'e}e Poussin")
* @param jr the junior part of the author's name (may consist of
* several tokens, like "Jr. III" in "Smith, Jr. III, John")
* @param first the first name of the author (may consist of several tokens, like "Charles Louis Xavier Joseph" in "Charles Louis Xavier Joseph de la Vall{\'e}e Poussin")
* @param firstabbr the abbreviated first name of the author (may consist of several tokens, like "C. L. X. J." in "Charles Louis Xavier Joseph de la Vall{\'e}e Poussin"). It is a responsibility of the caller to create a reasonable abbreviation of the first name.
* @param von the von part of the author's name (may consist of several tokens, like "de la" in "Charles Louis Xavier Joseph de la Vall{\'e}e Poussin")
* @param last the last name of the author (may consist of several tokens, like "Vall{\'e}e Poussin" in "Charles Louis Xavier Joseph de la Vall{\'e}e Poussin")
* @param jr the junior part of the author's name (may consist of several tokens, like "Jr. III" in "Smith, Jr. III, John")
*/
public Author(String first, String firstabbr, String von, String last, String jr) {
firstPart = addDotIfAbbreviation(removeStartAndEndBraces(first));
Expand Down Expand Up @@ -198,9 +180,11 @@ private boolean properBrackets(String s) {
* Removes start and end brace at a string
* <p>
* E.g.,
* * {Vall{\'e}e Poussin} -> Vall{\'e}e Poussin
* * {Vall{\'e}e} {Poussin} -> Vall{\'e}e Poussin
* * Vall{\'e}e Poussin -> Vall{\'e}e Poussin
* <ul>
* <li>{Vall{\'e}e Poussin} -> Vall{\'e}e Poussin</li>
* <li>{Vall{\'e}e} {Poussin} -> Vall{\'e}e Poussin</li>
* <li>Vall{\'e}e Poussin -> Vall{\'e}e Poussin</li>
* </ul>
*/
private String removeStartAndEndBraces(String name) {
if (StringUtil.isBlank(name)) {
Expand Down Expand Up @@ -263,19 +247,16 @@ public Optional<String> getFirst() {
}

/**
* Returns the abbreviated first name of the author stored in this
* object ("F.").
* Returns the abbreviated first name of the author stored in this object ("F.").
*
* @return abbreviated first name of the author (may consist of several
* tokens)
* @return abbreviated first name of the author (may consist of several tokens)
*/
public Optional<String> getFirstAbbr() {
return Optional.ofNullable(firstAbbr);
}

/**
* Returns the von part of the author's name stored in this object
* ("von").
* Returns the von part of the author's name stored in this object ("von").
*
* @return von part of the author's name (may consist of several tokens)
*/
Expand All @@ -293,20 +274,28 @@ public Optional<String> getLast() {
}

/**
* Returns the junior part of the author's name stored in this object
* ("Jr").
* Returns the last name of the author stored in this object with resolved latex.
*
* @return last name of the author (may consist of several tokens)
*/
public Optional<String> getLastLatexFree() {
if (latexFreeLastPart == null && lastPart != null) {
latexFreeLastPart = LatexToUnicodeAdapter.format(lastPart);
}
return Optional.ofNullable(latexFreeLastPart);
}

/**
* Returns the junior part of the author's name stored in this object ("Jr").
*
* @return junior part of the author's name (may consist of several
* tokens) or null if the author does not have a Jr. Part
* @return junior part of the author's name (may consist of several tokens) or null if the author does not have a Jr. Part
*/
public Optional<String> getJr() {
return Optional.ofNullable(jrPart);
}

/**
* Returns von-part followed by last name ("von Last"). If both fields
* were specified as <CODE>null</CODE>, the empty string <CODE>""</CODE>
* is returned.
* Returns von-part followed by last name ("von Last"). If both fields were specified as <CODE>null</CODE>, the empty string <CODE>""</CODE> is returned.
*
* @return 'von Last'
*/
Expand All @@ -319,13 +308,10 @@ public String getLastOnly() {
}

/**
* Returns the author's name in form 'von Last, Jr., First' with the
* first name full or abbreviated depending on parameter.
* Returns the author's name in form 'von Last, Jr., First' with the first name full or abbreviated depending on parameter.
*
* @param abbr <CODE>true</CODE> - abbreviate first name, <CODE>false</CODE> -
* do not abbreviate
* @return 'von Last, Jr., First' (if <CODE>abbr==false</CODE>) or
* 'von Last, Jr., F.' (if <CODE>abbr==true</CODE>)
* @param abbr <CODE>true</CODE> - abbreviate first name, <CODE>false</CODE> - do not abbreviate
* @return 'von Last, Jr., First' (if <CODE>abbr==false</CODE>) or 'von Last, Jr., F.' (if <CODE>abbr==true</CODE>)
*/
public String getLastFirst(boolean abbr) {
StringBuilder res = new StringBuilder(getLastOnly());
Expand All @@ -339,13 +325,10 @@ public String getLastFirst(boolean abbr) {
}

/**
* Returns the author's name in form 'First von Last, Jr.' with the
* first name full or abbreviated depending on parameter.
* Returns the author's name in form 'First von Last, Jr.' with the first name full or abbreviated depending on parameter.
*
* @param abbr <CODE>true</CODE> - abbreviate first name, <CODE>false</CODE> -
* do not abbreviate
* @return 'First von Last, Jr.' (if <CODE>abbr==false</CODE>) or 'F.
* von Last, Jr.' (if <CODE>abbr==true</CODE>)
* @param abbr <CODE>true</CODE> - abbreviate first name, <CODE>false</CODE> - do not abbreviate
* @return 'First von Last, Jr.' (if <CODE>abbr==false</CODE>) or 'F. von Last, Jr.' (if <CODE>abbr==true</CODE>)
*/
public String getFirstLast(boolean abbr) {
StringBuilder res = new StringBuilder();
Expand All @@ -372,11 +355,9 @@ public String toString() {
}

/**
* Returns the name as "Last, Jr, F." omitting the von-part and removing
* starting braces.
* Returns the name as "Last, Jr, F." omitting the von-part and removing starting braces.
*
* @return "Last, Jr, F." as described above or "" if all these parts
* are empty.
* @return "Last, Jr, F." as described above or "" if all these parts are empty.
*/
public String getNameForAlphabetization() {
StringBuilder res = new StringBuilder();
Expand Down
21 changes: 6 additions & 15 deletions src/main/java/org/jabref/model/groups/AutomaticPersonsGroup.java
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
package org.jabref.model.groups;

import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

import org.jabref.model.entry.Author;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.Field;
import org.jabref.model.util.OptionalUtil;

public class AutomaticPersonsGroup extends AutomaticGroup {

private Field field;
private final Field field;

public AutomaticPersonsGroup(String name, GroupHierarchyType context, Field field) {
super(name, context);
Expand Down Expand Up @@ -44,16 +40,11 @@ public AbstractGroup deepCopy() {

@Override
public Set<GroupTreeNode> createSubgroups(BibEntry entry) {
Optional<AuthorList> authorList = entry.getLatexFreeField(field)
.map(AuthorList::parse);
return OptionalUtil.flatMap(authorList, AuthorList::getAuthors)
.map(Author::getLast)
.filter(Optional::isPresent)
.map(Optional::get)
.filter(lastName -> !lastName.isEmpty())
.map(lastName -> new WordKeywordGroup(lastName, GroupHierarchyType.INDEPENDENT, field, lastName, true, ' ', true))
.map(GroupTreeNode::new)
.collect(Collectors.toSet());
return LastNameGroup.getAsLastNamesLatexFree(field, entry)
.stream()
.map(lastName -> new LastNameGroup(lastName, GroupHierarchyType.INDEPENDENT, field, lastName))
.map(GroupTreeNode::new)
.collect(Collectors.toSet());
}

public Field getField() {
Expand Down
22 changes: 22 additions & 0 deletions src/main/java/org/jabref/model/groups/KeywordGroup.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.jabref.model.groups;

import java.util.Objects;

import org.jabref.model.entry.field.Field;

/**
Expand Down Expand Up @@ -33,4 +35,24 @@ public Field getSearchField() {
public boolean isDynamic() {
return true;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
if (!super.equals(o)) {
return false;
}
KeywordGroup that = (KeywordGroup) o;
return isCaseSensitive() == that.isCaseSensitive() && Objects.equals(getSearchField(), that.getSearchField()) && Objects.equals(getSearchExpression(), that.getSearchExpression());
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), getSearchField(), getSearchExpression(), isCaseSensitive());
}
}
41 changes: 41 additions & 0 deletions src/main/java/org/jabref/model/groups/LastNameGroup.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package org.jabref.model.groups;

import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;

import org.jabref.model.entry.Author;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.Field;
import org.jabref.model.strings.LatexToUnicodeAdapter;

/**
* Matches based on a latex free last name in a specified field. The field is parsed as an author list and the last names are resolved of latex.
*/
public class LastNameGroup extends KeywordGroup {
public LastNameGroup(String groupName, GroupHierarchyType context, Field searchField, String lastName) {
super(groupName, context, searchField, LatexToUnicodeAdapter.format(lastName), true);
}

static List<String> getAsLastNamesLatexFree(Field field, BibEntry bibEntry) {
return bibEntry.getField(field).stream()
.map(AuthorList::parse)
.map(AuthorList::getAuthors)
.flatMap(Collection::stream)
.map(Author::getLastLatexFree)
.flatMap(Optional::stream)
.collect(Collectors.toList());
}

@Override
public boolean contains(BibEntry entry) {
return getAsLastNamesLatexFree(getSearchField(), entry).stream().anyMatch(name -> name.equals(getSearchExpression()));
}

@Override
public AbstractGroup deepCopy() {
return new LastNameGroup(getName(), getHierarchicalContext(), getSearchField(), getSearchExpression());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package org.jabref.model.groups;

import java.util.Arrays;
import java.util.stream.Collectors;

import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;

import org.junit.jupiter.api.Test;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.containsInAnyOrder;

class AutomaticPersonsGroupTest {
private static GroupTreeNode[] createPersonSubGroupFrom(String... lastNames) {
return Arrays.stream(lastNames)
.map(lastName ->
new LastNameGroup(lastName, GroupHierarchyType.INDEPENDENT, StandardField.AUTHOR, lastName))
.map(GroupTreeNode::new)
.collect(Collectors.toList())
.toArray(GroupTreeNode[]::new);
}

@Test
void createSubgroupsFromCommaSeparatedLastNames() {
BibEntry bibEntry = new BibEntry().withField(StandardField.AUTHOR, "Turing, Alan and Hopper, Grace");
var subgroups = new AutomaticPersonsGroup("", GroupHierarchyType.INDEPENDENT, StandardField.AUTHOR).createSubgroups(bibEntry);
var expectedSubgroups = createPersonSubGroupFrom("Turing", "Hopper");
assertThat(subgroups, containsInAnyOrder(expectedSubgroups));
}

@Test
void createSubgroupsContainingSpaceSeparatedNames() {
BibEntry bibEntry = new BibEntry().withField(StandardField.AUTHOR, "Alan Turing and Grace Hopper");
var subgroups = new AutomaticPersonsGroup("", GroupHierarchyType.INDEPENDENT, StandardField.AUTHOR).createSubgroups(bibEntry);
var expectedSubgroups = createPersonSubGroupFrom("Turing", "Hopper");
assertThat(subgroups, containsInAnyOrder(expectedSubgroups));
}

@Test
void createSubgroupFromLatex() {
BibEntry bibEntry = new BibEntry().withField(StandardField.AUTHOR, "Kurt G{\\\"{o}}del");
var subgroup = new AutomaticPersonsGroup("", GroupHierarchyType.INDEPENDENT, StandardField.AUTHOR).createSubgroups(bibEntry);
var expectedSubgroup = createPersonSubGroupFrom("Gödel");
assertThat(subgroup, contains(expectedSubgroup));
}

@Test
void createSubgroupFromUnicode() {
BibEntry bibEntry = new BibEntry().withField(StandardField.AUTHOR, "Kurt Gödel");
var subgroup = new AutomaticPersonsGroup("", GroupHierarchyType.INDEPENDENT, StandardField.AUTHOR).createSubgroups(bibEntry);
var expectedSubgroup = createPersonSubGroupFrom("Gödel");
assertThat(subgroup, contains(expectedSubgroup));
}
}

0 comments on commit 31ced14

Please sign in to comment.