From ba9fee502b0c18fda312da55c6304ac8a463f509 Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Tue, 18 May 2021 08:49:53 +0200 Subject: [PATCH] LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (#141) --- lucene/CHANGES.txt | 2 + .../org/apache/lucene/util/PriorityQueue.java | 19 +++--- .../apache/lucene/util/TestPriorityQueue.java | 61 +++++++++++++++---- 3 files changed, 61 insertions(+), 21 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 1a356fa78499..d852f08718e4 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -118,6 +118,8 @@ API Changes Improvements +* LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (Dawid Weiss) + * LUCENE-9687: Hunspell support improvements: add API for spell-checking and suggestions, support compound words, fix various behavior differences between Java and C++ implementations, improve performance (Peter Gromov, Dawid Weiss) diff --git a/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java b/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java index a674225d7dee..fbd06abfe0d3 100644 --- a/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java +++ b/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java @@ -21,7 +21,7 @@ import java.util.function.Supplier; /** - * A PriorityQueue maintains a partial ordering of its elements such that the least element can + * A priority queue maintains a partial ordering of its elements such that the least element can * always be found in constant time. Put()'s and pop()'s require log(size) time but the remove() * cost implemented here is linear. * @@ -74,11 +74,11 @@ public PriorityQueue(int maxSize) { */ public PriorityQueue(int maxSize, Supplier sentinelObjectSupplier) { final int heapSize; + if (0 == maxSize) { // We allocate 1 extra to avoid if statement in top() heapSize = 2; } else { - if ((maxSize < 0) || (maxSize >= ArrayUtil.MAX_ARRAY_LENGTH)) { // Throw exception to prevent confusing OOME: throw new IllegalArgumentException( @@ -89,7 +89,8 @@ public PriorityQueue(int maxSize, Supplier sentinelObjectSupplier) { // 1-based not 0-based. heap[0] is unused. heapSize = maxSize + 1; } - // T is unbounded type, so this unchecked cast works always: + + // T is an unbounded type, so this unchecked cast works always. @SuppressWarnings("unchecked") final T[] h = (T[]) new Object[heapSize]; this.heap = h; @@ -121,9 +122,11 @@ public PriorityQueue(int maxSize, Supplier sentinelObjectSupplier) { * @return the new 'top' element in the queue. */ public final T add(T element) { - size++; - heap[size] = element; - upHeap(size); + // don't modify size until we know heap access didn't throw AIOOB. + int index = size + 1; + heap[index] = element; + size = index; + upHeap(index); return heap[1]; } @@ -138,7 +141,7 @@ public T insertWithOverflow(T element) { if (size < maxSize) { add(element); return null; - } else if (size > 0 && !lessThan(element, heap[1])) { + } else if (size > 0 && lessThan(heap[1], element)) { T ret = heap[1]; heap[1] = element; updateTop(); @@ -273,7 +276,7 @@ private final void downHeap(int i) { * @lucene.internal */ protected final Object[] getHeapArray() { - return (Object[]) heap; + return heap; } @Override diff --git a/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java b/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java index 90fb087ad972..b580d140a40d 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java @@ -21,6 +21,8 @@ import java.util.List; import java.util.NoSuchElementException; import java.util.Random; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; public class TestPriorityQueue extends LuceneTestCase { @@ -47,6 +49,50 @@ protected final void checkValidity() { } } + public void testZeroSizedQueue() { + PriorityQueue pq = new IntegerQueue(0); + assertEquals((Object) 1, pq.insertWithOverflow(1)); + assertEquals(0, pq.size()); + + // should fail, but passes and modifies the top... + pq.add(1); + assertEquals((Object) 1, pq.top()); + } + + public void testNoExtraWorkOnEqualElements() { + class Value { + private final int index; + private final int value; + + Value(int index, int value) { + this.index = index; + this.value = value; + } + } + + PriorityQueue pq = + new PriorityQueue<>(5) { + @Override + protected boolean lessThan(Value a, Value b) { + return a.value < b.value; + } + }; + + // Make all elements equal but record insertion order. + for (int i = 0; i < 100; i++) { + pq.insertWithOverflow(new Value(i, 0)); + } + + ArrayList indexes = new ArrayList<>(); + for (Value e : pq) { + indexes.add(e.index); + } + + // All elements are "equal" so we should have exactly the indexes of those elements that were + // added first. + MatcherAssert.assertThat(indexes, Matchers.containsInAnyOrder(0, 1, 2, 3, 4)); + } + public void testPQ() throws Exception { testPQ(atLeast(10000), random()); } @@ -61,13 +107,6 @@ public static void testPQ(int count, Random gen) { pq.add(next); } - // Date end = new Date(); - - // System.out.print(((float)(end.getTime()-start.getTime()) / count) * 1000); - // System.out.println(" microseconds/put"); - - // start = new Date(); - int last = Integer.MIN_VALUE; for (int i = 0; i < count; i++) { Integer next = pq.pop(); @@ -77,10 +116,6 @@ public static void testPQ(int count, Random gen) { } assertEquals(sum, sum2); - // end = new Date(); - - // System.out.print(((float)(end.getTime()-start.getTime()) / count) * 1000); - // System.out.println(" microseconds/pop"); } public void testClear() { @@ -141,7 +176,7 @@ public void testRemovalsAndInsertions() { if (evicted != null) { assertTrue(sds.remove(evicted)); if (evicted != newEntry) { - assertTrue(evicted == lastLeast); + assertSame(evicted, lastLeast); } } Integer newLeast = pq.top(); @@ -159,7 +194,7 @@ public void testRemovalsAndInsertions() { for (int p = 0; p < 500000; p++) { int element = (int) (random.nextFloat() * (sds.size() - 1)); Integer objectToRemove = sds.get(element); - assertTrue(sds.remove(element) == objectToRemove); + assertSame(sds.remove(element), objectToRemove); assertTrue(pq.remove(objectToRemove)); pq.checkValidity(); Integer newEntry = Math.abs(random.nextInt());