elastic · rjernst · Mar 28, 2022 · Mar 25, 2022 · Mar 25, 2022 · Mar 25, 2022
diff --git a/server/src/main/java/org/elasticsearch/common/util/CollectionUtils.java b/server/src/main/java/org/elasticsearch/common/util/CollectionUtils.java
@@ -8,13 +8,10 @@
 
 package org.elasticsearch.common.util;
 
-import com.carrotsearch.hppc.ObjectArrayList;
-
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefArray;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.InPlaceMergeSorter;
-import org.apache.lucene.util.IntroSorter;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.collect.Iterators;
 
@@ -47,6 +44,27 @@ public static boolean isEmpty(Object[] array) {
         return array == null || array.length == 0;
     }
 
+    public static <T> void unique(List<T> list, Comparator<T> cmp) {
+        if (list.size() <= 1) {
+            return;
+        }
+
+        int prevNdx = 0;
+        T prevValue = list.get(0);
+        for (int i = 1; i < list.size(); ++i) {
+            T nextValue = list.get(i);
+            if (cmp.compare(nextValue, prevValue) != 0 && prevNdx++ != i) {
+                list.set(prevNdx, nextValue);
+                prevValue = nextValue;
+            }
+        }
+        ++prevNdx;
+        if (prevNdx != list.size()) {
+            // lop off the rest of the list
+            list.subList(prevNdx, list.size()).clear();
+        }
+    }
+
     /**
      * Return a rotated view of the given list with the given distance.
      */
@@ -67,61 +85,6 @@ public static <T> List<T> rotate(final List<T> list, int distance) {
         return new RotatedList<>(list, d);
     }
 
-    public static void sortAndDedup(final ObjectArrayList<byte[]> array) {
-        int len = array.size();
-        if (len > 1) {
-            sort(array);
-            int uniqueCount = 1;
-            for (int i = 1; i < len; ++i) {
-                if (Arrays.equals(array.get(i), array.get(i - 1)) == false) {
-                    array.set(uniqueCount++, array.get(i));
-                }
-            }
-            array.elementsCount = uniqueCount;
-        }
-    }
-
-    public static void sort(final ObjectArrayList<byte[]> array) {
-        new IntroSorter() {
-
-            byte[] pivot;
-
-            @Override
-            protected void swap(int i, int j) {
-                final byte[] tmp = array.get(i);
-                array.set(i, array.get(j));
-                array.set(j, tmp);
-            }
-
-            @Override
-            protected int compare(int i, int j) {
-                return compare(array.get(i), array.get(j));
-            }
-
-            @Override
-            protected void setPivot(int i) {
-                pivot = array.get(i);
-            }
-
-            @Override
-            protected int comparePivot(int j) {
-                return compare(pivot, array.get(j));
-            }
-
-            private int compare(byte[] left, byte[] right) {
-                for (int i = 0, j = 0; i < left.length && j < right.length; i++, j++) {
-                    int a = left[i] & 0xFF;
-                    int b = right[j] & 0xFF;
-                    if (a != b) {
-                        return a - b;
-                    }
-                }
-                return left.length - right.length;
-            }
-
-        }.sort(0, array.size());
-    }
-
     public static int[] toArray(Collection<Integer> ints) {
         Objects.requireNonNull(ints);
         return ints.stream().mapToInt(s -> s).toArray();

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BinaryFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/BinaryFieldMapper.java
@@ -8,8 +8,6 @@
 
 package org.elasticsearch.index.mapper;
 
-import com.carrotsearch.hppc.ObjectArrayList;
-
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
@@ -28,6 +26,8 @@
 
 import java.io.IOException;
 import java.time.ZoneId;
+import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Base64;
 import java.util.Collections;
 import java.util.List;
@@ -194,30 +194,28 @@ protected String contentType() {
 
     public static class CustomBinaryDocValuesField extends CustomDocValuesField {
 
-        private final ObjectArrayList<byte[]> bytesList;
-
-        private int totalSize = 0;
+        private final List<byte[]> bytesList;
 
         public CustomBinaryDocValuesField(String name, byte[] bytes) {
             super(name);
-            bytesList = new ObjectArrayList<>();
+            bytesList = new ArrayList<>();
             add(bytes);
         }
 
         public void add(byte[] bytes) {
             bytesList.add(bytes);
-            totalSize += bytes.length;
         }
 
         @Override
         public BytesRef binaryValue() {
             try {
-                CollectionUtils.sortAndDedup(bytesList);
-                int size = bytesList.size();
-                BytesStreamOutput out = new BytesStreamOutput(totalSize + (size + 1) * 5);
-                out.writeVInt(size);  // write total number of values
-                for (int i = 0; i < size; i++) {
-                    final byte[] value = bytesList.get(i);
+                bytesList.sort(Arrays::compare);
+                CollectionUtils.unique(bytesList, Arrays::compare);
+                int bytesSize = bytesList.stream().map(a -> a.length).reduce(0, Integer::sum);
+                int n = bytesList.size();
+                BytesStreamOutput out = new BytesStreamOutput(bytesSize + (n + 1) * 5);
+                out.writeVInt(n);  // write total number of values
+                for (var value : bytesList) {
                     int valueLength = value.length;
                     out.writeVInt(valueLength);
                     out.writeBytes(value, 0, valueLength);

diff --git a/server/src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java b/server/src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java
@@ -17,6 +17,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -31,6 +32,7 @@
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.lessThan;
 
 public class CollectionUtilsTests extends ESTestCase {
     public void testRotateEmpty() {
@@ -62,6 +64,24 @@ public void testRotate() {
         }
     }
 
+    private <T> void assertUnique(List<T> list, Comparator<T> cmp, int size) {
+        List<T> listCopy = new ArrayList<>(list);
+        CollectionUtils.unique(listCopy, cmp);
+        for (int i = 0; i < listCopy.size() - 1; ++i) {
+            assertThat(cmp.compare(listCopy.get(i), listCopy.get(i + 1)), lessThan(0));
+        }
+        assertThat(listCopy.size(), equalTo(size));
+    }
+
+    public void testUnique() {
+        assertUnique(List.<Integer>of(), Comparator.naturalOrder(), 0);
+        assertUnique(List.of(1), Comparator.naturalOrder(), 1);
+        assertUnique(List.of(1, 2, 3), Comparator.naturalOrder(), 3);
+        assertUnique(List.of(1, 1, 1), Comparator.naturalOrder(), 1);
+        assertUnique(List.of(1, 2, 2, 3), Comparator.naturalOrder(), 3);
+        assertUnique(List.of(1, 2, 2, 2), Comparator.naturalOrder(), 2);
+    }
+
     public void testSortAndDedupByteRefArray() {
         SortedSet<BytesRef> set = new TreeSet<>();
         final int numValues = scaledRandomIntBetween(0, 10000);