diff --git a/server/src/main/java/org/elasticsearch/common/util/CollectionUtils.java b/server/src/main/java/org/elasticsearch/common/util/CollectionUtils.java index e96dada1b99d7..c2c76ea80423b 100644 --- a/server/src/main/java/org/elasticsearch/common/util/CollectionUtils.java +++ b/server/src/main/java/org/elasticsearch/common/util/CollectionUtils.java @@ -8,9 +8,6 @@ package org.elasticsearch.common.util; -import com.carrotsearch.hppc.ObjectArrayList; - -import org.apache.lucene.util.IntroSorter; import org.elasticsearch.common.Strings; import org.elasticsearch.common.collect.Iterators; @@ -20,8 +17,10 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.IdentityHashMap; import java.util.List; +import java.util.ListIterator; import java.util.Locale; import java.util.Map; import java.util.Objects; @@ -42,6 +41,33 @@ public static boolean isEmpty(Object[] array) { return array == null || array.length == 0; } + /** + * Eliminate duplicates from a sorted list in-place. + * + * @param list A sorted list, which will be modified in place. + * @param cmp A comparator the list is already sorted by. + */ + public static void uniquify(List list, Comparator cmp) { + if (list.size() <= 1) { + return; + } + + ListIterator uniqueItr = list.listIterator(); + ListIterator existingItr = list.listIterator(); + T uniqueValue = uniqueItr.next(); // get first element to compare with + existingItr.next(); // advance the existing iterator to the second element, where we will begin comparing + do { + T existingValue = existingItr.next(); + if (cmp.compare(existingValue, uniqueValue) != 0 && (uniqueValue = uniqueItr.next()) != existingValue) { + uniqueItr.set(existingValue); + } + } while (existingItr.hasNext()); + + // Lop off the rest of the list. Note with LinkedList this requires advancing back to this index, + // but Java provides no way to efficiently remove from the end of a non random-access list. + list.subList(uniqueItr.nextIndex(), list.size()).clear(); + } + /** * Return a rotated view of the given list with the given distance. */ @@ -62,61 +88,6 @@ public static List rotate(final List list, int distance) { return new RotatedList<>(list, d); } - public static void sortAndDedup(final ObjectArrayList array) { - int len = array.size(); - if (len > 1) { - sort(array); - int uniqueCount = 1; - for (int i = 1; i < len; ++i) { - if (Arrays.equals(array.get(i), array.get(i - 1)) == false) { - array.set(uniqueCount++, array.get(i)); - } - } - array.elementsCount = uniqueCount; - } - } - - public static void sort(final ObjectArrayList array) { - new IntroSorter() { - - byte[] pivot; - - @Override - protected void swap(int i, int j) { - final byte[] tmp = array.get(i); - array.set(i, array.get(j)); - array.set(j, tmp); - } - - @Override - protected int compare(int i, int j) { - return compare(array.get(i), array.get(j)); - } - - @Override - protected void setPivot(int i) { - pivot = array.get(i); - } - - @Override - protected int comparePivot(int j) { - return compare(pivot, array.get(j)); - } - - private int compare(byte[] left, byte[] right) { - for (int i = 0, j = 0; i < left.length && j < right.length; i++, j++) { - int a = left[i] & 0xFF; - int b = right[j] & 0xFF; - if (a != b) { - return a - b; - } - } - return left.length - right.length; - } - - }.sort(0, array.size()); - } - public static int[] toArray(Collection ints) { Objects.requireNonNull(ints); return ints.stream().mapToInt(s -> s).toArray(); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BinaryFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/BinaryFieldMapper.java index fbc6d630f56ec..c6c35bf8bf1cc 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BinaryFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BinaryFieldMapper.java @@ -8,8 +8,6 @@ package org.elasticsearch.index.mapper; -import com.carrotsearch.hppc.ObjectArrayList; - import org.apache.lucene.document.StoredField; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; @@ -28,6 +26,8 @@ import java.io.IOException; import java.time.ZoneId; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Base64; import java.util.Collections; import java.util.List; @@ -194,30 +194,28 @@ protected String contentType() { public static class CustomBinaryDocValuesField extends CustomDocValuesField { - private final ObjectArrayList bytesList; - - private int totalSize = 0; + private final List bytesList; public CustomBinaryDocValuesField(String name, byte[] bytes) { super(name); - bytesList = new ObjectArrayList<>(); + bytesList = new ArrayList<>(); add(bytes); } public void add(byte[] bytes) { bytesList.add(bytes); - totalSize += bytes.length; } @Override public BytesRef binaryValue() { try { - CollectionUtils.sortAndDedup(bytesList); - int size = bytesList.size(); - BytesStreamOutput out = new BytesStreamOutput(totalSize + (size + 1) * 5); - out.writeVInt(size); // write total number of values - for (int i = 0; i < size; i++) { - final byte[] value = bytesList.get(i); + bytesList.sort(Arrays::compareUnsigned); + CollectionUtils.uniquify(bytesList, Arrays::compareUnsigned); + int bytesSize = bytesList.stream().map(a -> a.length).reduce(0, Integer::sum); + int n = bytesList.size(); + BytesStreamOutput out = new BytesStreamOutput(bytesSize + (n + 1) * 5); + out.writeVInt(n); // write total number of values + for (var value : bytesList) { int valueLength = value.length; out.writeVInt(valueLength); out.writeBytes(value, 0, valueLength); diff --git a/server/src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java b/server/src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java index 4f532f53d5615..89f55a6981ffb 100644 --- a/server/src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java +++ b/server/src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java @@ -13,8 +13,10 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -23,6 +25,7 @@ import static org.elasticsearch.common.util.CollectionUtils.limitSize; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.lessThan; public class CollectionUtilsTests extends ESTestCase { public void testRotateEmpty() { @@ -54,6 +57,25 @@ public void testRotate() { } } + private void assertUniquify(List list, Comparator cmp, int size) { + for (List listCopy : List.of(new ArrayList(list), new LinkedList(list))) { + CollectionUtils.uniquify(listCopy, cmp); + for (int i = 0; i < listCopy.size() - 1; ++i) { + assertThat(cmp.compare(listCopy.get(i), listCopy.get(i + 1)), lessThan(0)); + } + assertThat(listCopy.size(), equalTo(size)); + } + } + + public void testUniquify() { + assertUniquify(List.of(), Comparator.naturalOrder(), 0); + assertUniquify(List.of(1), Comparator.naturalOrder(), 1); + assertUniquify(List.of(1, 2, 3), Comparator.naturalOrder(), 3); + assertUniquify(List.of(1, 1, 1), Comparator.naturalOrder(), 1); + assertUniquify(List.of(1, 2, 2, 3), Comparator.naturalOrder(), 3); + assertUniquify(List.of(1, 2, 2, 2), Comparator.naturalOrder(), 2); + } + public void testEmptyPartition() { assertEquals(Collections.emptyList(), eagerPartition(Collections.emptyList(), 1)); }