diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 96b4bf5eab75..4bc5cd95e684 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -30,6 +30,10 @@ Improvements system property to increase the set of Java versions that Panama Vectorization will provide optimized implementations for. (Chris Hegarty) +* GITHUB#266: TieredMergePolicy now allows merging up to maxMergeAtOnce + segments for merges below the floor segment size, even if maxMergeAtOnce is + bigger than segsPerTier. (Adrien Grand) + Optimizations --------------------- @@ -93,6 +97,12 @@ Optimizations * GITHUB#14023: Make JVM inlining decisions more predictable in our main queries. (Adrien Grand) +* GITHUB#14032: Speed up PostingsEnum when positions are requested. + (Adrien Grand) + +* GITHUB#14031: Ensure Panama float vector distance impls inlinable. + (Robert Muir, Chris Hegarty) + * GITHUB#14011: Reduce allocation rate in HNSW concurrent merge. (Viliam Durina) Bug Fixes diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene101/Lucene101PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene101/Lucene101PostingsReader.java index 9e79aaf71e15..d879a58b4ab7 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene101/Lucene101PostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene101/Lucene101PostingsReader.java @@ -638,9 +638,13 @@ final class EverythingEnum extends AbstractPostingsEnum { final boolean indexHasPayloads; final boolean indexHasOffsetsOrPayloads; - private int freq; // freq we last read + private long freqFP; // offset of the freq block + private int position; // current position + // value of docBufferUpto on the last doc ID when positions have been read + private int posDocBufferUpto; + // how many positions "behind" we are; nextPosition must // skip these to "catch up": private int posPendingCount; @@ -662,6 +666,7 @@ final class EverythingEnum extends AbstractPostingsEnum { private boolean needsOffsets; // true if we actually need offsets private boolean needsPayloads; // true if we actually need payloads + private boolean needsPayloadsOrOffsets; public EverythingEnum(FieldInfo fieldInfo) throws IOException { super(fieldInfo); @@ -745,8 +750,11 @@ public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOExcep lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset; } - this.needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS); - this.needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS); + this.needsOffsets = + indexHasOffsets && PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS); + this.needsPayloads = + indexHasPayloads && PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS); + this.needsPayloadsOrOffsets = this.needsPayloads || this.needsOffsets; level1BlockPosUpto = 0; level1BlockPayUpto = 0; @@ -758,8 +766,13 @@ public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOExcep } @Override - public int freq() { - return freq; + public int freq() throws IOException { + if (freqFP != -1) { + docIn.seek(freqFP); + pforUtil.decode(docInUtil, freqBuffer); + freqFP = -1; + } + return freqBuffer[docBufferUpto - 1]; } private void refillDocs() throws IOException { @@ -768,11 +781,13 @@ private void refillDocs() throws IOException { if (left >= BLOCK_SIZE) { forDeltaUtil.decodeAndPrefixSum(docInUtil, prevDocID, docBuffer); - pforUtil.decode(docInUtil, freqBuffer); + freqFP = docIn.getFilePointer(); + PForUtil.skip(docIn); docCountUpto += BLOCK_SIZE; } else if (docFreq == 1) { docBuffer[0] = singletonDocID; freqBuffer[0] = (int) totalTermFreq; + freqFP = -1; docBuffer[1] = NO_MORE_DOCS; docCountUpto++; docBufferSize = 1; @@ -781,11 +796,13 @@ private void refillDocs() throws IOException { PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true); prefixSum(docBuffer, left, prevDocID); docBuffer[left] = NO_MORE_DOCS; + freqFP = -1; docCountUpto += left; docBufferSize = left; } prevDocID = docBuffer[BLOCK_SIZE - 1]; docBufferUpto = 0; + posDocBufferUpto = 0; assert docBuffer[docBufferSize] == NO_MORE_DOCS; } @@ -846,6 +863,8 @@ private void moveToNextLevel0Block() throws IOException { payloadByteUpto = level0BlockPayUpto; } posBufferUpto = BLOCK_SIZE; + } else { + posPendingCount += sumOverRange(freqBuffer, posDocBufferUpto, BLOCK_SIZE); } if (docFreq - docCountUpto >= BLOCK_SIZE) { @@ -875,34 +894,23 @@ public int nextDoc() throws IOException { } this.doc = docBuffer[docBufferUpto]; - this.freq = freqBuffer[docBufferUpto]; docBufferUpto++; - posPendingCount += freq; - position = 0; - lastStartOffset = 0; return doc; } private void skipLevel0To(int target) throws IOException { + long posFP; + int posUpto; + long payFP; + int payUpto; + while (true) { prevDocID = level0LastDocID; - // If nextBlockPosFP is less than the current FP, it means that the block of positions for - // the first docs of the next block are already decoded. In this case we just accumulate - // frequencies into posPendingCount instead of seeking backwards and decoding the same pos - // block again. - if (level0PosEndFP >= posIn.getFilePointer()) { - posIn.seek(level0PosEndFP); - posPendingCount = level0BlockPosUpto; - if (indexHasOffsetsOrPayloads) { - assert level0PayEndFP >= payIn.getFilePointer(); - payIn.seek(level0PayEndFP); - payloadByteUpto = level0BlockPayUpto; - } - posBufferUpto = BLOCK_SIZE; - } else { - posPendingCount += sumOverRange(freqBuffer, docBufferUpto, BLOCK_SIZE); - } + posFP = level0PosEndFP; + posUpto = level0BlockPosUpto; + payFP = level0PayEndFP; + payUpto = level0BlockPayUpto; if (docFreq - docCountUpto >= BLOCK_SIZE) { docIn.readVLong(); // skip0 num bytes @@ -931,6 +939,23 @@ private void skipLevel0To(int target) throws IOException { break; } } + + // If nextBlockPosFP is less than the current FP, it means that the block of positions for + // the first docs of the next block are already decoded. In this case we just accumulate + // frequencies into posPendingCount instead of seeking backwards and decoding the same pos + // block again. + if (posFP >= posIn.getFilePointer()) { + posIn.seek(posFP); + posPendingCount = posUpto; + if (indexHasOffsetsOrPayloads) { + assert level0PayEndFP >= payIn.getFilePointer(); + payIn.seek(payFP); + payloadByteUpto = payUpto; + } + posBufferUpto = BLOCK_SIZE; + } else { + posPendingCount += sumOverRange(freqBuffer, posDocBufferUpto, BLOCK_SIZE); + } } @Override @@ -947,16 +972,12 @@ public int advance(int target) throws IOException { } int next = VectorUtil.findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize); - posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1); - this.freq = freqBuffer[next]; this.docBufferUpto = next + 1; - position = 0; - lastStartOffset = 0; return this.doc = docBuffer[next]; } - private void skipPositions() throws IOException { + private void skipPositions(int freq) throws IOException { // Skip positions now: int toSkip = posPendingCount - freq; // if (DEBUG) { @@ -1003,41 +1024,45 @@ private void skipPositions() throws IOException { lastStartOffset = 0; } - private void refillPositions() throws IOException { - if (posIn.getFilePointer() == lastPosBlockFP) { - final int count = (int) (totalTermFreq % BLOCK_SIZE); - int payloadLength = 0; - int offsetLength = 0; - payloadByteUpto = 0; - for (int i = 0; i < count; i++) { - int code = posIn.readVInt(); - if (indexHasPayloads) { - if ((code & 1) != 0) { - payloadLength = posIn.readVInt(); - } - payloadLengthBuffer[i] = payloadLength; - posDeltaBuffer[i] = code >>> 1; - if (payloadLength != 0) { - if (payloadByteUpto + payloadLength > payloadBytes.length) { - payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength); - } - posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength); - payloadByteUpto += payloadLength; + private void refillLastPositionBlock() throws IOException { + final int count = (int) (totalTermFreq % BLOCK_SIZE); + int payloadLength = 0; + int offsetLength = 0; + payloadByteUpto = 0; + for (int i = 0; i < count; i++) { + int code = posIn.readVInt(); + if (indexHasPayloads) { + if ((code & 1) != 0) { + payloadLength = posIn.readVInt(); + } + payloadLengthBuffer[i] = payloadLength; + posDeltaBuffer[i] = code >>> 1; + if (payloadLength != 0) { + if (payloadByteUpto + payloadLength > payloadBytes.length) { + payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength); } - } else { - posDeltaBuffer[i] = code; + posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength); + payloadByteUpto += payloadLength; } + } else { + posDeltaBuffer[i] = code; + } - if (indexHasOffsets) { - int deltaCode = posIn.readVInt(); - if ((deltaCode & 1) != 0) { - offsetLength = posIn.readVInt(); - } - offsetStartDeltaBuffer[i] = deltaCode >>> 1; - offsetLengthBuffer[i] = offsetLength; + if (indexHasOffsets) { + int deltaCode = posIn.readVInt(); + if ((deltaCode & 1) != 0) { + offsetLength = posIn.readVInt(); } + offsetStartDeltaBuffer[i] = deltaCode >>> 1; + offsetLengthBuffer[i] = offsetLength; } - payloadByteUpto = 0; + } + payloadByteUpto = 0; + } + + private void refillPositions() throws IOException { + if (posIn.getFilePointer() == lastPosBlockFP) { + refillLastPositionBlock(); } else { pforUtil.decode(posInUtil, posDeltaBuffer); @@ -1054,8 +1079,7 @@ private void refillPositions() throws IOException { // this works, because when writing a vint block we always force the first length to be // written PForUtil.skip(payIn); // skip over lengths - int numBytes = payIn.readVInt(); // read length of payloadBytes - payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes + payIn.skipBytes(payIn.readVInt()); // skip over payloadBytes } payloadByteUpto = 0; } @@ -1074,13 +1098,40 @@ private void refillPositions() throws IOException { } } + private void accumulatePayloadAndOffsets() { + if (needsPayloads) { + payloadLength = payloadLengthBuffer[posBufferUpto]; + payload.bytes = payloadBytes; + payload.offset = payloadByteUpto; + payload.length = payloadLength; + payloadByteUpto += payloadLength; + } + + if (needsOffsets) { + startOffset = lastStartOffset + offsetStartDeltaBuffer[posBufferUpto]; + endOffset = startOffset + offsetLengthBuffer[posBufferUpto]; + lastStartOffset = startOffset; + } + } + @Override public int nextPosition() throws IOException { - assert posPendingCount > 0; + if (posDocBufferUpto != docBufferUpto) { + int freq = freq(); // triggers lazy decoding of freqs + + // First position that is being read on this doc. + posPendingCount += sumOverRange(freqBuffer, posDocBufferUpto, docBufferUpto); + posDocBufferUpto = docBufferUpto; + + assert posPendingCount > 0; + + if (posPendingCount > freq) { + skipPositions(freq); + posPendingCount = freq; + } - if (posPendingCount > freq) { - skipPositions(); - posPendingCount = freq; + position = 0; + lastStartOffset = 0; } if (posBufferUpto == BLOCK_SIZE) { @@ -1089,18 +1140,8 @@ public int nextPosition() throws IOException { } position += posDeltaBuffer[posBufferUpto]; - if (indexHasPayloads) { - payloadLength = payloadLengthBuffer[posBufferUpto]; - payload.bytes = payloadBytes; - payload.offset = payloadByteUpto; - payload.length = payloadLength; - payloadByteUpto += payloadLength; - } - - if (indexHasOffsets) { - startOffset = lastStartOffset + offsetStartDeltaBuffer[posBufferUpto]; - endOffset = startOffset + offsetLengthBuffer[posBufferUpto]; - lastStartOffset = startOffset; + if (needsPayloadsOrOffsets) { + accumulatePayloadAndOffsets(); } posBufferUpto++; @@ -1110,17 +1151,23 @@ public int nextPosition() throws IOException { @Override public int startOffset() { + if (needsOffsets == false) { + return -1; + } return startOffset; } @Override public int endOffset() { + if (needsOffsets == false) { + return -1; + } return endOffset; } @Override public BytesRef getPayload() { - if (payloadLength == 0) { + if (needsPayloads == false || payloadLength == 0) { return null; } else { return payload; @@ -1466,9 +1513,13 @@ final class BlockImpactsPostingsEnum extends BlockImpactsEnum { final boolean indexHasPayloads; final boolean indexHasOffsetsOrPayloads; - private int freq; // freq we last read + private long freqFP; // offset of the freq block + private int position; // current position + // value of docBufferUpto on the last doc ID when positions have been read + private int posDocBufferUpto; + // how many positions "behind" we are; nextPosition must // skip these to "catch up": private int posPendingCount; @@ -1516,8 +1567,13 @@ public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState } @Override - public int freq() { - return freq; + public int freq() throws IOException { + if (freqFP != -1) { + docIn.seek(freqFP); + pforUtil.decode(docInUtil, freqBuffer); + freqFP = -1; + } + return freqBuffer[docBufferUpto - 1]; } private void refillDocs() throws IOException { @@ -1526,24 +1582,30 @@ private void refillDocs() throws IOException { if (left >= BLOCK_SIZE) { forDeltaUtil.decodeAndPrefixSum(docInUtil, prevDocID, docBuffer); - pforUtil.decode(docInUtil, freqBuffer); + freqFP = docIn.getFilePointer(); + PForUtil.skip(docIn); docCountUpto += BLOCK_SIZE; } else if (docFreq == 1) { docBuffer[0] = singletonDocID; freqBuffer[0] = (int) totalTermFreq; + freqFP = -1; docBuffer[1] = NO_MORE_DOCS; docCountUpto++; docBufferSize = 1; + } else { // Read vInts: PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true); prefixSum(docBuffer, left, prevDocID); docBuffer[left] = NO_MORE_DOCS; + freqFP = -1; docCountUpto += left; docBufferSize = left; + freqFP = -1; } prevDocID = docBuffer[BLOCK_SIZE - 1]; docBufferUpto = 0; + posDocBufferUpto = 0; assert docBuffer[docBufferSize] == NO_MORE_DOCS; } @@ -1585,20 +1647,14 @@ private void skipLevel1To(int target) throws IOException { } private void skipLevel0To(int target) throws IOException { + long posFP; + int posUpto; + while (true) { prevDocID = level0LastDocID; - // If nextBlockPosFP is less than the current FP, it means that the block of positions for - // the first docs of the next block are already decoded. In this case we just accumulate - // frequencies into posPendingCount instead of seeking backwards and decoding the same pos - // block again. - if (level0PosEndFP >= posIn.getFilePointer()) { - posIn.seek(level0PosEndFP); - posPendingCount = level0BlockPosUpto; - posBufferUpto = BLOCK_SIZE; - } else { - posPendingCount += sumOverRange(freqBuffer, docBufferUpto, BLOCK_SIZE); - } + posFP = level0PosEndFP; + posUpto = level0BlockPosUpto; if (docFreq - docCountUpto >= BLOCK_SIZE) { docIn.readVLong(); // skip0 num bytes @@ -1631,6 +1687,18 @@ private void skipLevel0To(int target) throws IOException { break; } } + + // If nextBlockPosFP is less than the current FP, it means that the block of positions for + // the first docs of the next block are already decoded. In this case we just accumulate + // frequencies into posPendingCount instead of seeking backwards and decoding the same pos + // block again. + if (posFP >= posIn.getFilePointer()) { + posIn.seek(posFP); + posPendingCount = posUpto; + posBufferUpto = BLOCK_SIZE; + } else { + posPendingCount += sumOverRange(freqBuffer, posDocBufferUpto, BLOCK_SIZE); + } } @Override @@ -1660,30 +1728,25 @@ public int nextDoc() throws IOException { } doc = docBuffer[docBufferUpto]; - freq = freqBuffer[docBufferUpto]; - posPendingCount += freq; docBufferUpto++; - position = 0; return this.doc; } @Override public int advance(int target) throws IOException { - advanceShallow(target); - if (needsRefilling) { + if (target > level0LastDocID || needsRefilling) { + advanceShallow(target); + assert needsRefilling; refillDocs(); needsRefilling = false; } int next = VectorUtil.findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize); - posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1); - freq = freqBuffer[next]; docBufferUpto = next + 1; - position = 0; return this.doc = docBuffer[next]; } - private void skipPositions() throws IOException { + private void skipPositions(int freq) throws IOException { // Skip positions now: int toSkip = posPendingCount - freq; // if (DEBUG) { @@ -1703,8 +1766,6 @@ private void skipPositions() throws IOException { refillPositions(); posBufferUpto = toSkip; } - - position = 0; } private void refillPositions() throws IOException { @@ -1739,11 +1800,21 @@ private void refillPositions() throws IOException { @Override public int nextPosition() throws IOException { - assert posPendingCount > 0; + if (posDocBufferUpto != docBufferUpto) { + int freq = freq(); // triggers lazy decoding of freqs + + // First position that is being read on this doc. + posPendingCount += sumOverRange(freqBuffer, posDocBufferUpto, docBufferUpto); + posDocBufferUpto = docBufferUpto; + + assert posPendingCount > 0; + + if (posPendingCount > freq) { + skipPositions(freq); + posPendingCount = freq; + } - if (posPendingCount > freq) { - skipPositions(); - posPendingCount = freq; + position = 0; } if (posBufferUpto == BLOCK_SIZE) { diff --git a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java index 2fb0c0783a2e..6447b7305ef2 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java @@ -85,7 +85,7 @@ public class TieredMergePolicy extends MergePolicy { public static final double DEFAULT_NO_CFS_RATIO = 0.1; // User-specified maxMergeAtOnce. In practice we always take the min of its - // value and segsPerTier to avoid suboptimal merging. + // value and segsPerTier for segments above the floor size to avoid suboptimal merging. private int maxMergeAtOnce = 10; private long maxMergedSegmentBytes = 5 * 1024 * 1024 * 1024L; @@ -100,7 +100,13 @@ public TieredMergePolicy() { super(DEFAULT_NO_CFS_RATIO, MergePolicy.DEFAULT_MAX_CFS_SEGMENT_SIZE); } - /** Maximum number of segments to be merged at a time during "normal" merging. Default is 10. */ + /** + * Maximum number of segments to be merged at a time during "normal" merging. Default is 10. + * + *

NOTE: Merges above the {@link #setFloorSegmentMB(double) floor segment size} also + * bound the number of merged segments by {@link #setSegmentsPerTier(double) the number of + * segments per tier}. + */ public TieredMergePolicy setMaxMergeAtOnce(int v) { if (v < 2) { throw new IllegalArgumentException("maxMergeAtOnce must be > 1 (got " + v + ")"); @@ -557,46 +563,46 @@ private MergeSpecification doFindMerges( for (int startIdx = 0; startIdx < sortedEligible.size(); startIdx++) { - long totAfterMergeBytes = 0; - final List candidate = new ArrayList<>(); boolean hitTooLarge = false; long bytesThisMerge = 0; long docCountThisMerge = 0; for (int idx = startIdx; idx < sortedEligible.size() - && candidate.size() < mergeFactor + && candidate.size() < maxMergeAtOnce + // We allow merging more than mergeFactor segments together if the merged segment + // would be less than the floor segment size. This is important because segments + // below the floor segment size are more aggressively merged by this policy, so we + // need to grow them as quickly as possible. + && (candidate.size() < mergeFactor || bytesThisMerge < floorSegmentBytes) && bytesThisMerge < maxMergedSegmentBytes && (bytesThisMerge < floorSegmentBytes || docCountThisMerge <= allowedDocCount); idx++) { final SegmentSizeAndDocs segSizeDocs = sortedEligible.get(idx); final long segBytes = segSizeDocs.sizeInBytes; int segDocCount = segSizeDocs.maxDoc - segSizeDocs.delCount; - if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes - || (totAfterMergeBytes > floorSegmentBytes + if (bytesThisMerge + segBytes > maxMergedSegmentBytes + || (bytesThisMerge > floorSegmentBytes && docCountThisMerge + segDocCount > allowedDocCount)) { // Only set hitTooLarge when reaching the maximum byte size, as this will create // segments of the maximum size which will no longer be eligible for merging for a long // time (until they accumulate enough deletes). - hitTooLarge |= totAfterMergeBytes + segBytes > maxMergedSegmentBytes; - if (candidate.size() == 0) { - // We should never have something coming in that _cannot_ be merged, so handle - // singleton merges - candidate.add(segSizeDocs.segInfo); - bytesThisMerge += segBytes; + hitTooLarge |= bytesThisMerge + segBytes > maxMergedSegmentBytes; + // We should never have something coming in that _cannot_ be merged, so handle + // singleton merges + if (candidate.size() > 0) { + // NOTE: we continue, so that we can try + // "packing" smaller segments into this merge + // to see if we can get closer to the max + // size; this in general is not perfect since + // this is really "bin packing" and we'd have + // to try different permutations. + continue; } - // NOTE: we continue, so that we can try - // "packing" smaller segments into this merge - // to see if we can get closer to the max - // size; this in general is not perfect since - // this is really "bin packing" and we'd have - // to try different permutations. - continue; } candidate.add(segSizeDocs.segInfo); bytesThisMerge += segBytes; docCountThisMerge += segDocCount; - totAfterMergeBytes += segBytes; } // We should never see an empty candidate: we iterated over maxMergeAtOnce @@ -645,7 +651,7 @@ private MergeSpecification doFindMerges( + " tooLarge=" + hitTooLarge + " size=" - + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes / 1024. / 1024.), + + String.format(Locale.ROOT, "%.3f MB", bytesThisMerge / 1024. / 1024.), mergeContext); } @@ -654,7 +660,7 @@ private MergeSpecification doFindMerges( best = candidate; bestScore = score; bestTooLarge = hitTooLarge; - bestMergeBytes = totAfterMergeBytes; + bestMergeBytes = bytesThisMerge; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java b/lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java index e9a23921eceb..9ebc053a6605 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java @@ -31,8 +31,7 @@ */ public final class ScoreCachingWrappingScorer extends Scorable { - private int lastDoc = -1; - private int curDoc = -1; + private boolean scoreIsCached; private float curScore; private final Scorable in; @@ -64,7 +63,8 @@ public void setScorer(Scorable scorer) throws IOException { @Override public void collect(int doc) throws IOException { if (scorer != null) { - scorer.curDoc = doc; + // Invalidate cache when collecting a new doc + scorer.scoreIsCached = false; } super.collect(doc); } @@ -82,9 +82,9 @@ private ScoreCachingWrappingScorer(Scorable scorer) { @Override public float score() throws IOException { - if (lastDoc != curDoc) { + if (scoreIsCached == false) { curScore = in.score(); - curDoc = lastDoc; + scoreIsCached = true; } return curScore; diff --git a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java index 9273f7c5a813..18ef76914bbf 100644 --- a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java +++ b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java @@ -75,6 +75,9 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport { } } + // cached vector sizes for smaller method bodies + private static final int FLOAT_SPECIES_LENGTH = FLOAT_SPECIES.length(); + // the way FMA should work! if available use it, otherwise fall back to mul/add private static FloatVector fma(FloatVector a, FloatVector b, FloatVector c) { if (Constants.HAS_FAST_VECTOR_FMA) { @@ -99,7 +102,7 @@ public float dotProduct(float[] a, float[] b) { float res = 0; // if the array size is large (> 2x platform vector size), its worth the overhead to vectorize - if (a.length > 2 * FLOAT_SPECIES.length()) { + if (a.length > 2 * FLOAT_SPECIES_LENGTH) { i += FLOAT_SPECIES.loopBound(a.length); res += dotProductBody(a, b, i); } @@ -120,30 +123,33 @@ private float dotProductBody(float[] a, float[] b, int limit) { FloatVector acc2 = FloatVector.zero(FLOAT_SPECIES); FloatVector acc3 = FloatVector.zero(FLOAT_SPECIES); FloatVector acc4 = FloatVector.zero(FLOAT_SPECIES); - int unrolledLimit = limit - 3 * FLOAT_SPECIES.length(); - for (; i < unrolledLimit; i += 4 * FLOAT_SPECIES.length()) { + final int unrolledLimit = limit - 3 * FLOAT_SPECIES_LENGTH; + for (; i < unrolledLimit; i += 4 * FLOAT_SPECIES_LENGTH) { // one FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i); FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i); acc1 = fma(va, vb, acc1); // two - FloatVector vc = FloatVector.fromArray(FLOAT_SPECIES, a, i + FLOAT_SPECIES.length()); - FloatVector vd = FloatVector.fromArray(FLOAT_SPECIES, b, i + FLOAT_SPECIES.length()); + final int i2 = i + FLOAT_SPECIES_LENGTH; + FloatVector vc = FloatVector.fromArray(FLOAT_SPECIES, a, i2); + FloatVector vd = FloatVector.fromArray(FLOAT_SPECIES, b, i2); acc2 = fma(vc, vd, acc2); // three - FloatVector ve = FloatVector.fromArray(FLOAT_SPECIES, a, i + 2 * FLOAT_SPECIES.length()); - FloatVector vf = FloatVector.fromArray(FLOAT_SPECIES, b, i + 2 * FLOAT_SPECIES.length()); + final int i3 = i2 + FLOAT_SPECIES_LENGTH; + FloatVector ve = FloatVector.fromArray(FLOAT_SPECIES, a, i3); + FloatVector vf = FloatVector.fromArray(FLOAT_SPECIES, b, i3); acc3 = fma(ve, vf, acc3); // four - FloatVector vg = FloatVector.fromArray(FLOAT_SPECIES, a, i + 3 * FLOAT_SPECIES.length()); - FloatVector vh = FloatVector.fromArray(FLOAT_SPECIES, b, i + 3 * FLOAT_SPECIES.length()); + final int i4 = i3 + FLOAT_SPECIES_LENGTH; + FloatVector vg = FloatVector.fromArray(FLOAT_SPECIES, a, i4); + FloatVector vh = FloatVector.fromArray(FLOAT_SPECIES, b, i4); acc4 = fma(vg, vh, acc4); } // vector tail: less scalar computations for unaligned sizes, esp with big vector sizes - for (; i < limit; i += FLOAT_SPECIES.length()) { + for (; i < limit; i += FLOAT_SPECIES_LENGTH) { FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i); FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i); acc1 = fma(va, vb, acc1); @@ -162,7 +168,7 @@ public float cosine(float[] a, float[] b) { float norm2 = 0; // if the array size is large (> 2x platform vector size), its worth the overhead to vectorize - if (a.length > 2 * FLOAT_SPECIES.length()) { + if (a.length > 2 * FLOAT_SPECIES_LENGTH) { i += FLOAT_SPECIES.loopBound(a.length); float[] ret = cosineBody(a, b, i); sum += ret[0]; @@ -190,8 +196,8 @@ private float[] cosineBody(float[] a, float[] b, int limit) { FloatVector norm1_2 = FloatVector.zero(FLOAT_SPECIES); FloatVector norm2_1 = FloatVector.zero(FLOAT_SPECIES); FloatVector norm2_2 = FloatVector.zero(FLOAT_SPECIES); - int unrolledLimit = limit - FLOAT_SPECIES.length(); - for (; i < unrolledLimit; i += 2 * FLOAT_SPECIES.length()) { + final int unrolledLimit = limit - FLOAT_SPECIES_LENGTH; + for (; i < unrolledLimit; i += 2 * FLOAT_SPECIES_LENGTH) { // one FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i); FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i); @@ -200,14 +206,15 @@ private float[] cosineBody(float[] a, float[] b, int limit) { norm2_1 = fma(vb, vb, norm2_1); // two - FloatVector vc = FloatVector.fromArray(FLOAT_SPECIES, a, i + FLOAT_SPECIES.length()); - FloatVector vd = FloatVector.fromArray(FLOAT_SPECIES, b, i + FLOAT_SPECIES.length()); + final int i2 = i + FLOAT_SPECIES_LENGTH; + FloatVector vc = FloatVector.fromArray(FLOAT_SPECIES, a, i2); + FloatVector vd = FloatVector.fromArray(FLOAT_SPECIES, b, i2); sum2 = fma(vc, vd, sum2); norm1_2 = fma(vc, vc, norm1_2); norm2_2 = fma(vd, vd, norm2_2); } // vector tail: less scalar computations for unaligned sizes, esp with big vector sizes - for (; i < limit; i += FLOAT_SPECIES.length()) { + for (; i < limit; i += FLOAT_SPECIES_LENGTH) { FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i); FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i); sum1 = fma(va, vb, sum1); @@ -227,7 +234,7 @@ public float squareDistance(float[] a, float[] b) { float res = 0; // if the array size is large (> 2x platform vector size), its worth the overhead to vectorize - if (a.length > 2 * FLOAT_SPECIES.length()) { + if (a.length > 2 * FLOAT_SPECIES_LENGTH) { i += FLOAT_SPECIES.loopBound(a.length); res += squareDistanceBody(a, b, i); } @@ -240,6 +247,12 @@ public float squareDistance(float[] a, float[] b) { return res; } + /** helper: returns fma(a.sub(b), a.sub(b), c) */ + private static FloatVector square(FloatVector a, FloatVector b, FloatVector c) { + FloatVector diff = a.sub(b); + return fma(diff, diff, c); + } + /** vectorized square distance body */ private float squareDistanceBody(float[] a, float[] b, int limit) { int i = 0; @@ -249,38 +262,36 @@ private float squareDistanceBody(float[] a, float[] b, int limit) { FloatVector acc2 = FloatVector.zero(FLOAT_SPECIES); FloatVector acc3 = FloatVector.zero(FLOAT_SPECIES); FloatVector acc4 = FloatVector.zero(FLOAT_SPECIES); - int unrolledLimit = limit - 3 * FLOAT_SPECIES.length(); - for (; i < unrolledLimit; i += 4 * FLOAT_SPECIES.length()) { + final int unrolledLimit = limit - 3 * FLOAT_SPECIES_LENGTH; + for (; i < unrolledLimit; i += 4 * FLOAT_SPECIES_LENGTH) { // one FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i); FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i); - FloatVector diff1 = va.sub(vb); - acc1 = fma(diff1, diff1, acc1); + acc1 = square(va, vb, acc1); // two - FloatVector vc = FloatVector.fromArray(FLOAT_SPECIES, a, i + FLOAT_SPECIES.length()); - FloatVector vd = FloatVector.fromArray(FLOAT_SPECIES, b, i + FLOAT_SPECIES.length()); - FloatVector diff2 = vc.sub(vd); - acc2 = fma(diff2, diff2, acc2); + final int i2 = i + FLOAT_SPECIES_LENGTH; + FloatVector vc = FloatVector.fromArray(FLOAT_SPECIES, a, i2); + FloatVector vd = FloatVector.fromArray(FLOAT_SPECIES, b, i2); + acc2 = square(vc, vd, acc2); // three - FloatVector ve = FloatVector.fromArray(FLOAT_SPECIES, a, i + 2 * FLOAT_SPECIES.length()); - FloatVector vf = FloatVector.fromArray(FLOAT_SPECIES, b, i + 2 * FLOAT_SPECIES.length()); - FloatVector diff3 = ve.sub(vf); - acc3 = fma(diff3, diff3, acc3); + final int i3 = i2 + FLOAT_SPECIES_LENGTH; + FloatVector ve = FloatVector.fromArray(FLOAT_SPECIES, a, i3); + FloatVector vf = FloatVector.fromArray(FLOAT_SPECIES, b, i3); + acc3 = square(ve, vf, acc3); // four - FloatVector vg = FloatVector.fromArray(FLOAT_SPECIES, a, i + 3 * FLOAT_SPECIES.length()); - FloatVector vh = FloatVector.fromArray(FLOAT_SPECIES, b, i + 3 * FLOAT_SPECIES.length()); - FloatVector diff4 = vg.sub(vh); - acc4 = fma(diff4, diff4, acc4); + final int i4 = i3 + FLOAT_SPECIES_LENGTH; + FloatVector vg = FloatVector.fromArray(FLOAT_SPECIES, a, i4); + FloatVector vh = FloatVector.fromArray(FLOAT_SPECIES, b, i4); + acc4 = square(vg, vh, acc4); } // vector tail: less scalar computations for unaligned sizes, esp with big vector sizes - for (; i < limit; i += FLOAT_SPECIES.length()) { + for (; i < limit; i += FLOAT_SPECIES_LENGTH) { FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i); FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i); - FloatVector diff = va.sub(vb); - acc1 = fma(diff, diff, acc1); + acc1 = square(va, vb, acc1); } // reduce FloatVector res1 = acc1.add(acc2); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java index a2d678a3ec04..a27bdc5f92cd 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java @@ -163,9 +163,8 @@ protected void assertSegmentInfos(MergePolicy policy, SegmentInfos infos) throws @Override protected void assertMerge(MergePolicy policy, MergeSpecification merges) { TieredMergePolicy tmp = (TieredMergePolicy) policy; - final int mergeFactor = (int) Math.min(tmp.getMaxMergeAtOnce(), tmp.getSegmentsPerTier()); for (OneMerge merge : merges.merges) { - assertTrue(merge.segments.size() <= mergeFactor); + assertTrue(merge.segments.size() <= tmp.getMaxMergeAtOnce()); } } @@ -943,6 +942,49 @@ public void testSimulateUpdates() throws IOException { doTestSimulateUpdates(mergePolicy, numDocs, 2500); } + public void testMergeSizeIsLessThanFloorSize() throws IOException { + MergeContext mergeContext = new MockMergeContext(SegmentCommitInfo::getDelCount); + + SegmentInfos infos = new SegmentInfos(Version.LATEST.major); + // 50 1MB segments + for (int i = 0; i < 50; ++i) { + infos.add(makeSegmentCommitInfo("_0", 1_000_000, 0, 1, IndexWriter.SOURCE_FLUSH)); + } + + TieredMergePolicy mergePolicy = new TieredMergePolicy(); + mergePolicy.setMaxMergeAtOnce(30); + mergePolicy.setFloorSegmentMB(0.1); + + // Segments are above the floor segment size, we get 4 merges of mergeFactor=10 segments each + MergeSpecification mergeSpec = + mergePolicy.findMerges(MergeTrigger.FULL_FLUSH, infos, mergeContext); + assertNotNull(mergeSpec); + assertEquals(4, mergeSpec.merges.size()); + for (OneMerge oneMerge : mergeSpec.merges) { + assertEquals(mergePolicy.getSegmentsPerTier(), oneMerge.segments.size(), 0d); + } + + // Segments are below the floor segment size and it takes 15 segments to go above the floor + // segment size. We get 3 merges of 15 segments each + mergePolicy.setFloorSegmentMB(15); + mergeSpec = mergePolicy.findMerges(MergeTrigger.FULL_FLUSH, infos, mergeContext); + assertNotNull(mergeSpec); + assertEquals(3, mergeSpec.merges.size()); + for (OneMerge oneMerge : mergeSpec.merges) { + assertEquals(15, oneMerge.segments.size()); + } + + // Segments are below the floor segment size and we'd need to merge more than maxMergeAtOnce + // segments to go above the minimum segment size. We get 1 merge of maxMergeAtOnce=30 segments + // and 1 merge of 50-30=20 segments. + mergePolicy.setFloorSegmentMB(60); + mergeSpec = mergePolicy.findMerges(MergeTrigger.FULL_FLUSH, infos, mergeContext); + assertNotNull(mergeSpec); + assertEquals(2, mergeSpec.merges.size()); + assertEquals(30, mergeSpec.merges.get(0).segments.size()); + assertEquals(20, mergeSpec.merges.get(1).segments.size()); + } + public void testFullFlushMerges() throws IOException { AtomicLong segNameGenerator = new AtomicLong(); IOStats stats = new IOStats();