-
Notifications
You must be signed in to change notification settings - Fork 25k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Rewrite CollectionUtils dedup to work with any type #85352
Changes from 1 commit
2b54ea7
dd60133
a4a522a
af05696
2458c18
23d6338
9024353
e62f801
7b4f841
cf2b953
0aafa70
ca78af8
c3743a1
f8814e7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,13 +8,10 @@ | |
|
||
package org.elasticsearch.common.util; | ||
|
||
import com.carrotsearch.hppc.ObjectArrayList; | ||
|
||
import org.apache.lucene.util.BytesRef; | ||
import org.apache.lucene.util.BytesRefArray; | ||
import org.apache.lucene.util.BytesRefBuilder; | ||
import org.apache.lucene.util.InPlaceMergeSorter; | ||
import org.apache.lucene.util.IntroSorter; | ||
import org.elasticsearch.common.Strings; | ||
import org.elasticsearch.common.collect.Iterators; | ||
|
||
|
@@ -47,6 +44,27 @@ public static boolean isEmpty(Object[] array) { | |
return array == null || array.length == 0; | ||
} | ||
|
||
public static <T> void unique(List<T> list, Comparator<T> cmp) { | ||
if (list.size() <= 1) { | ||
return; | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we verify that the list implements random access? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The algorithm only requires a forward iterator. I've rewritten to use ListIterator instead of indices. The only caveat is that for LinkedList Java does not provide an efficient means to remove the rest of a list from a given point. |
||
int prevNdx = 0; | ||
T prevValue = list.get(0); | ||
for (int i = 1; i < list.size(); ++i) { | ||
T nextValue = list.get(i); | ||
if (cmp.compare(nextValue, prevValue) != 0 && prevNdx++ != i) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to pre-increment rather than post-increment? Otherwise it looks to me like a list where all elements are unique would still overwrite all the time? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right, it should be pre-increment. |
||
list.set(prevNdx, nextValue); | ||
prevValue = nextValue; | ||
} | ||
} | ||
++prevNdx; | ||
if (prevNdx != list.size()) { | ||
// lop off the rest of the list | ||
list.subList(prevNdx, list.size()).clear(); | ||
} | ||
} | ||
|
||
/** | ||
* Return a rotated view of the given list with the given distance. | ||
*/ | ||
|
@@ -67,61 +85,6 @@ public static <T> List<T> rotate(final List<T> list, int distance) { | |
return new RotatedList<>(list, d); | ||
} | ||
|
||
public static void sortAndDedup(final ObjectArrayList<byte[]> array) { | ||
int len = array.size(); | ||
if (len > 1) { | ||
sort(array); | ||
int uniqueCount = 1; | ||
for (int i = 1; i < len; ++i) { | ||
if (Arrays.equals(array.get(i), array.get(i - 1)) == false) { | ||
array.set(uniqueCount++, array.get(i)); | ||
} | ||
} | ||
array.elementsCount = uniqueCount; | ||
} | ||
} | ||
|
||
public static void sort(final ObjectArrayList<byte[]> array) { | ||
new IntroSorter() { | ||
|
||
byte[] pivot; | ||
|
||
@Override | ||
protected void swap(int i, int j) { | ||
final byte[] tmp = array.get(i); | ||
array.set(i, array.get(j)); | ||
array.set(j, tmp); | ||
} | ||
|
||
@Override | ||
protected int compare(int i, int j) { | ||
return compare(array.get(i), array.get(j)); | ||
} | ||
|
||
@Override | ||
protected void setPivot(int i) { | ||
pivot = array.get(i); | ||
} | ||
|
||
@Override | ||
protected int comparePivot(int j) { | ||
return compare(pivot, array.get(j)); | ||
} | ||
|
||
private int compare(byte[] left, byte[] right) { | ||
for (int i = 0, j = 0; i < left.length && j < right.length; i++, j++) { | ||
int a = left[i] & 0xFF; | ||
int b = right[j] & 0xFF; | ||
if (a != b) { | ||
return a - b; | ||
} | ||
} | ||
return left.length - right.length; | ||
} | ||
|
||
}.sort(0, array.size()); | ||
} | ||
|
||
public static int[] toArray(Collection<Integer> ints) { | ||
Objects.requireNonNull(ints); | ||
return ints.stream().mapToInt(s -> s).toArray(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add javadocs that the list must be sorted according to the given comparator?
Also a bit of a nit-pick, but since this modifies the list in-place, I feel like naming the method after a verb would be more appropriate?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed to
uniquify