Fixes miss-parsed names in AutomaticPersonsGroup (#7228)

JabRef · Jan 18, 2021 · 31ced14 · 31ced14
1 parent 1d783b4
commit 31ced14
Show file tree

Hide file tree

Showing 6 changed files with 167 additions and 75 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,6 +20,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
 ### Changed
 
 - The content of the field `timestamp` is migrated to `creationdate`. In case one configured "udpate timestampe", it is migrated to `modificationdate`. [koppor#130](https://github.com/koppor/jabref/issues/130)
+- We fixed an issue where groups generated from authors' last names did not include all entries of the authors' [#5833](https://github.com/JabRef/jabref/issues/5833)
 - The export to MS Office XML now uses the month name for the field `MonthAcessed` instead of the two digit number [#7354](https://github.com/JabRef/jabref/issues/7354)
 
 ### Fixed

diff --git a/src/main/java/org/jabref/model/entry/Author.java b/src/main/java/org/jabref/model/entry/Author.java
@@ -3,49 +3,31 @@
 import java.util.Objects;
 import java.util.Optional;
 
+import org.jabref.model.strings.LatexToUnicodeAdapter;
 import org.jabref.model.strings.StringUtil;
 
 /**
- * This is an immutable class that keeps information regarding single
- * author. It is just a container for the information, with very simple
- * methods to access it.
+ * This is an immutable class that keeps information regarding single author. It is just a container for the information, with very simple methods to access it.
  * <p>
- * Current usage: only methods <code>getLastOnly</code>,
- * <code>getFirstLast</code>, and <code>getLastFirst</code> are used;
- * all other methods are provided for completeness.
+ * Current usage: only methods <code>getLastOnly</code>, <code>getFirstLast</code>, and <code>getLastFirst</code> are used; all other methods are provided for completeness.
  */
 public class Author {
 
     private final String firstPart;
-
     private final String firstAbbr;
-
     private final String vonPart;
-
     private final String lastPart;
-
     private final String jrPart;
+    private String latexFreeLastPart;
 
     /**
-     * Creates the Author object. If any part of the name is absent, <CODE>null</CODE>
-     * must be passed; otherwise other methods may return erroneous results.
+     * Creates the Author object. If any part of the name is absent, <CODE>null</CODE> must be passed; otherwise other methods may return erroneous results.
      *
-     * @param first     the first name of the author (may consist of several
-     *                  tokens, like "Charles Louis Xavier Joseph" in "Charles
-     *                  Louis Xavier Joseph de la Vall{\'e}e Poussin")
-     * @param firstabbr the abbreviated first name of the author (may consist of
-     *                  several tokens, like "C. L. X. J." in "Charles Louis
-     *                  Xavier Joseph de la Vall{\'e}e Poussin"). It is a
-     *                  responsibility of the caller to create a reasonable
-     *                  abbreviation of the first name.
-     * @param von       the von part of the author's name (may consist of several
-     *                  tokens, like "de la" in "Charles Louis Xavier Joseph de la
-     *                  Vall{\'e}e Poussin")
-     * @param last      the last name of the author (may consist of several
-     *                  tokens, like "Vall{\'e}e Poussin" in "Charles Louis Xavier
-     *                  Joseph de la Vall{\'e}e Poussin")
-     * @param jr        the junior part of the author's name (may consist of
-     *                  several tokens, like "Jr. III" in "Smith, Jr. III, John")
+     * @param first     the first name of the author (may consist of several tokens, like "Charles Louis Xavier Joseph" in "Charles Louis Xavier Joseph de la Vall{\'e}e Poussin")
+     * @param firstabbr the abbreviated first name of the author (may consist of several tokens, like "C. L. X. J." in "Charles Louis Xavier Joseph de la Vall{\'e}e Poussin"). It is a responsibility of the caller to create a reasonable abbreviation of the first name.
+     * @param von       the von part of the author's name (may consist of several tokens, like "de la" in "Charles Louis Xavier Joseph de la Vall{\'e}e Poussin")
+     * @param last      the last name of the author (may consist of several tokens, like "Vall{\'e}e Poussin" in "Charles Louis Xavier Joseph de la Vall{\'e}e Poussin")
+     * @param jr        the junior part of the author's name (may consist of several tokens, like "Jr. III" in "Smith, Jr. III, John")
      */
     public Author(String first, String firstabbr, String von, String last, String jr) {
         firstPart = addDotIfAbbreviation(removeStartAndEndBraces(first));
@@ -198,9 +180,11 @@ private boolean properBrackets(String s) {
      * Removes start and end brace at a string
      * <p>
      * E.g.,
-     * * {Vall{\'e}e Poussin} -> Vall{\'e}e Poussin
-     * * {Vall{\'e}e} {Poussin} -> Vall{\'e}e Poussin
-     * * Vall{\'e}e Poussin -> Vall{\'e}e Poussin
+     * <ul>
+     *     <li>{Vall{\'e}e Poussin} -> Vall{\'e}e Poussin</li>
+     *     <li>{Vall{\'e}e} {Poussin} -> Vall{\'e}e Poussin</li>
+     *     <li>Vall{\'e}e Poussin -> Vall{\'e}e Poussin</li>
+     * </ul>
      */
     private String removeStartAndEndBraces(String name) {
         if (StringUtil.isBlank(name)) {
@@ -263,19 +247,16 @@ public Optional<String> getFirst() {
     }
 
     /**
-     * Returns the abbreviated first name of the author stored in this
-     * object ("F.").
+     * Returns the abbreviated first name of the author stored in this object ("F.").
      *
-     * @return abbreviated first name of the author (may consist of several
-     * tokens)
+     * @return abbreviated first name of the author (may consist of several tokens)
      */
     public Optional<String> getFirstAbbr() {
         return Optional.ofNullable(firstAbbr);
     }
 
     /**
-     * Returns the von part of the author's name stored in this object
-     * ("von").
+     * Returns the von part of the author's name stored in this object ("von").
      *
      * @return von part of the author's name (may consist of several tokens)
      */
@@ -293,20 +274,28 @@ public Optional<String> getLast() {
     }
 
     /**
-     * Returns the junior part of the author's name stored in this object
-     * ("Jr").
+     * Returns the last name of the author stored in this object with resolved latex.
+     *
+     * @return last name of the author (may consist of several tokens)
+     */
+    public Optional<String> getLastLatexFree() {
+        if (latexFreeLastPart == null && lastPart != null) {
+            latexFreeLastPart = LatexToUnicodeAdapter.format(lastPart);
+        }
+        return Optional.ofNullable(latexFreeLastPart);
+    }
+
+    /**
+     * Returns the junior part of the author's name stored in this object ("Jr").
      *
-     * @return junior part of the author's name (may consist of several
-     * tokens) or null if the author does not have a Jr. Part
+     * @return junior part of the author's name (may consist of several tokens) or null if the author does not have a Jr. Part
      */
     public Optional<String> getJr() {
         return Optional.ofNullable(jrPart);
     }
 
     /**
-     * Returns von-part followed by last name ("von Last"). If both fields
-     * were specified as <CODE>null</CODE>, the empty string <CODE>""</CODE>
-     * is returned.
+     * Returns von-part followed by last name ("von Last"). If both fields were specified as <CODE>null</CODE>, the empty string <CODE>""</CODE> is returned.
      *
      * @return 'von Last'
      */
@@ -319,13 +308,10 @@ public String getLastOnly() {
     }
 
     /**
-     * Returns the author's name in form 'von Last, Jr., First' with the
-     * first name full or abbreviated depending on parameter.
+     * Returns the author's name in form 'von Last, Jr., First' with the first name full or abbreviated depending on parameter.
      *
-     * @param abbr <CODE>true</CODE> - abbreviate first name, <CODE>false</CODE> -
-     *             do not abbreviate
-     * @return 'von Last, Jr., First' (if <CODE>abbr==false</CODE>) or
-     * 'von Last, Jr., F.' (if <CODE>abbr==true</CODE>)
+     * @param abbr <CODE>true</CODE> - abbreviate first name, <CODE>false</CODE> - do not abbreviate
+     * @return 'von Last, Jr., First' (if <CODE>abbr==false</CODE>) or 'von Last, Jr., F.' (if <CODE>abbr==true</CODE>)
      */
     public String getLastFirst(boolean abbr) {
         StringBuilder res = new StringBuilder(getLastOnly());
@@ -339,13 +325,10 @@ public String getLastFirst(boolean abbr) {
     }
 
     /**
-     * Returns the author's name in form 'First von Last, Jr.' with the
-     * first name full or abbreviated depending on parameter.
+     * Returns the author's name in form 'First von Last, Jr.' with the first name full or abbreviated depending on parameter.
      *
-     * @param abbr <CODE>true</CODE> - abbreviate first name, <CODE>false</CODE> -
-     *             do not abbreviate
-     * @return 'First von Last, Jr.' (if <CODE>abbr==false</CODE>) or 'F.
-     * von Last, Jr.' (if <CODE>abbr==true</CODE>)
+     * @param abbr <CODE>true</CODE> - abbreviate first name, <CODE>false</CODE> - do not abbreviate
+     * @return 'First von Last, Jr.' (if <CODE>abbr==false</CODE>) or 'F. von Last, Jr.' (if <CODE>abbr==true</CODE>)
      */
     public String getFirstLast(boolean abbr) {
         StringBuilder res = new StringBuilder();
@@ -372,11 +355,9 @@ public String toString() {
     }
 
     /**
-     * Returns the name as "Last, Jr, F." omitting the von-part and removing
-     * starting braces.
+     * Returns the name as "Last, Jr, F." omitting the von-part and removing starting braces.
      *
-     * @return "Last, Jr, F." as described above or "" if all these parts
-     * are empty.
+     * @return "Last, Jr, F." as described above or "" if all these parts are empty.
      */
     public String getNameForAlphabetization() {
         StringBuilder res = new StringBuilder();

diff --git a/src/main/java/org/jabref/model/groups/AutomaticPersonsGroup.java b/src/main/java/org/jabref/model/groups/AutomaticPersonsGroup.java
@@ -1,19 +1,15 @@
 package org.jabref.model.groups;
 
 import java.util.Objects;
-import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
 
-import org.jabref.model.entry.Author;
-import org.jabref.model.entry.AuthorList;
 import org.jabref.model.entry.BibEntry;
 import org.jabref.model.entry.field.Field;
-import org.jabref.model.util.OptionalUtil;
 
 public class AutomaticPersonsGroup extends AutomaticGroup {
 
-    private Field field;
+    private final Field field;
 
     public AutomaticPersonsGroup(String name, GroupHierarchyType context, Field field) {
         super(name, context);
@@ -44,16 +40,11 @@ public AbstractGroup deepCopy() {
 
     @Override
     public Set<GroupTreeNode> createSubgroups(BibEntry entry) {
-        Optional<AuthorList> authorList = entry.getLatexFreeField(field)
-                                               .map(AuthorList::parse);
-        return OptionalUtil.flatMap(authorList, AuthorList::getAuthors)
-                           .map(Author::getLast)
-                           .filter(Optional::isPresent)
-                           .map(Optional::get)
-                           .filter(lastName -> !lastName.isEmpty())
-                           .map(lastName -> new WordKeywordGroup(lastName, GroupHierarchyType.INDEPENDENT, field, lastName, true, ' ', true))
-                           .map(GroupTreeNode::new)
-                           .collect(Collectors.toSet());
+        return LastNameGroup.getAsLastNamesLatexFree(field, entry)
+                            .stream()
+                            .map(lastName -> new LastNameGroup(lastName, GroupHierarchyType.INDEPENDENT, field, lastName))
+                            .map(GroupTreeNode::new)
+                            .collect(Collectors.toSet());
     }
 
     public Field getField() {

diff --git a/src/main/java/org/jabref/model/groups/KeywordGroup.java b/src/main/java/org/jabref/model/groups/KeywordGroup.java
@@ -1,5 +1,7 @@
 package org.jabref.model.groups;
 
+import java.util.Objects;
+
 import org.jabref.model.entry.field.Field;
 
 /**
@@ -33,4 +35,24 @@ public Field getSearchField() {
     public boolean isDynamic() {
         return true;
     }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) {
+            return true;
+        }
+        if (o == null || getClass() != o.getClass()) {
+            return false;
+        }
+        if (!super.equals(o)) {
+            return false;
+        }
+        KeywordGroup that = (KeywordGroup) o;
+        return isCaseSensitive() == that.isCaseSensitive() && Objects.equals(getSearchField(), that.getSearchField()) && Objects.equals(getSearchExpression(), that.getSearchExpression());
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(super.hashCode(), getSearchField(), getSearchExpression(), isCaseSensitive());
+    }
 }
diff --git a/src/main/java/org/jabref/model/groups/LastNameGroup.java b/src/main/java/org/jabref/model/groups/LastNameGroup.java
@@ -0,0 +1,41 @@
+package org.jabref.model.groups;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+import org.jabref.model.entry.Author;
+import org.jabref.model.entry.AuthorList;
+import org.jabref.model.entry.BibEntry;
+import org.jabref.model.entry.field.Field;
+import org.jabref.model.strings.LatexToUnicodeAdapter;
+
+/**
+ * Matches based on a latex free last name in a specified field. The field is parsed as an author list and the last names are resolved of latex.
+ */
+public class LastNameGroup extends KeywordGroup {
+    public LastNameGroup(String groupName, GroupHierarchyType context, Field searchField, String lastName) {
+        super(groupName, context, searchField, LatexToUnicodeAdapter.format(lastName), true);
+    }
+
+    static List<String> getAsLastNamesLatexFree(Field field, BibEntry bibEntry) {
+        return bibEntry.getField(field).stream()
+                       .map(AuthorList::parse)
+                       .map(AuthorList::getAuthors)
+                       .flatMap(Collection::stream)
+                       .map(Author::getLastLatexFree)
+                       .flatMap(Optional::stream)
+                       .collect(Collectors.toList());
+    }
+
+    @Override
+    public boolean contains(BibEntry entry) {
+        return getAsLastNamesLatexFree(getSearchField(), entry).stream().anyMatch(name -> name.equals(getSearchExpression()));
+    }
+
+    @Override
+    public AbstractGroup deepCopy() {
+        return new LastNameGroup(getName(), getHierarchicalContext(), getSearchField(), getSearchExpression());
+    }
+}
diff --git a/src/test/java/org/jabref/model/groups/AutomaticPersonsGroupTest.java b/src/test/java/org/jabref/model/groups/AutomaticPersonsGroupTest.java
@@ -0,0 +1,56 @@
+package org.jabref.model.groups;
+
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+import org.jabref.model.entry.BibEntry;
+import org.jabref.model.entry.field.StandardField;
+
+import org.junit.jupiter.api.Test;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+
+class AutomaticPersonsGroupTest {
+    private static GroupTreeNode[] createPersonSubGroupFrom(String... lastNames) {
+        return Arrays.stream(lastNames)
+                     .map(lastName ->
+                             new LastNameGroup(lastName, GroupHierarchyType.INDEPENDENT, StandardField.AUTHOR, lastName))
+                     .map(GroupTreeNode::new)
+                     .collect(Collectors.toList())
+                     .toArray(GroupTreeNode[]::new);
+    }
+
+    @Test
+    void createSubgroupsFromCommaSeparatedLastNames() {
+        BibEntry bibEntry = new BibEntry().withField(StandardField.AUTHOR, "Turing, Alan and Hopper, Grace");
+        var subgroups = new AutomaticPersonsGroup("", GroupHierarchyType.INDEPENDENT, StandardField.AUTHOR).createSubgroups(bibEntry);
+        var expectedSubgroups = createPersonSubGroupFrom("Turing", "Hopper");
+        assertThat(subgroups, containsInAnyOrder(expectedSubgroups));
+    }
+
+    @Test
+    void createSubgroupsContainingSpaceSeparatedNames() {
+        BibEntry bibEntry = new BibEntry().withField(StandardField.AUTHOR, "Alan Turing and Grace Hopper");
+        var subgroups = new AutomaticPersonsGroup("", GroupHierarchyType.INDEPENDENT, StandardField.AUTHOR).createSubgroups(bibEntry);
+        var expectedSubgroups = createPersonSubGroupFrom("Turing", "Hopper");
+        assertThat(subgroups, containsInAnyOrder(expectedSubgroups));
+    }
+
+    @Test
+    void createSubgroupFromLatex() {
+        BibEntry bibEntry = new BibEntry().withField(StandardField.AUTHOR, "Kurt G{\\\"{o}}del");
+        var subgroup = new AutomaticPersonsGroup("", GroupHierarchyType.INDEPENDENT, StandardField.AUTHOR).createSubgroups(bibEntry);
+        var expectedSubgroup = createPersonSubGroupFrom("Gödel");
+        assertThat(subgroup, contains(expectedSubgroup));
+    }
+
+    @Test
+    void createSubgroupFromUnicode() {
+        BibEntry bibEntry = new BibEntry().withField(StandardField.AUTHOR, "Kurt Gödel");
+        var subgroup = new AutomaticPersonsGroup("", GroupHierarchyType.INDEPENDENT, StandardField.AUTHOR).createSubgroups(bibEntry);
+        var expectedSubgroup = createPersonSubGroupFrom("Gödel");
+        assertThat(subgroup, contains(expectedSubgroup));
+    }
+}