Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fst lucene90 #23

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ Optimizations

* GITHUB#12552: Make FSTPostingsFormat load FSTs off-heap. (Tony X)

* GITHUB#12985: Make Lucene90BlockTreePostingsFormat to build FST off-heap. (Anh Dung Bui)

Bug Fixes
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,23 @@ public final class Lucene90RWPostingsFormat extends PostingsFormat {

private final int minTermBlockSize;
private final int maxTermBlockSize;
private long blockHeapSizeLimitBytes;

/** Creates {@code Lucene90RWPostingsFormat} with default settings. */
public Lucene90RWPostingsFormat() {
this(
Lucene90BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE,
Lucene90BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
Lucene90BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE,
Lucene90BlockTreeTermsWriter.DEFAULT_BLOCK_HEAP_LIMIT_BYTES);
}

public Lucene90RWPostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
public Lucene90RWPostingsFormat(
int minTermBlockSize, int maxTermBlockSize, long blockHeapSizeLimitBytes) {
super("Lucene90");
Lucene90BlockTreeTermsWriter.validateSettings(minTermBlockSize, maxTermBlockSize);
this.minTermBlockSize = minTermBlockSize;
this.maxTermBlockSize = maxTermBlockSize;
this.blockHeapSizeLimitBytes = blockHeapSizeLimitBytes;
}

@Override
Expand All @@ -79,7 +83,8 @@ public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException
postingsWriter,
minTermBlockSize,
maxTermBlockSize,
Lucene90BlockTreeTermsReader.VERSION_START);
Lucene90BlockTreeTermsReader.VERSION_START,
blockHeapSizeLimitBytes);
success = true;
return ret;
} finally {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
import org.apache.lucene.codecs.lucene90.blocktree.FieldReader;
import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsWriter;
import org.apache.lucene.codecs.lucene90.blocktree.Stats;
import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99SkipWriter;
Expand All @@ -45,7 +46,27 @@
import org.apache.lucene.tests.util.TestUtil;

public class TestLucene90PostingsFormat extends BasePostingsFormatTestCase {
private final Codec codec = TestUtil.alwaysPostingsFormat(new Lucene90RWPostingsFormat());
private final Codec codec =
TestUtil.alwaysPostingsFormat(
new Lucene90RWPostingsFormat(
Lucene90BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE,
Lucene90BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE,
getBlockHeapSizeLimitBytes()));

private static long getBlockHeapSizeLimitBytes() {
// randomize the block heap max size between 3 states:
// - 0, effectively disable on-heap FST and always use off-heap
// - DEFAULT_BLOCK_HEAP_LIMIT_BYTES
// - a random number between 0 and DEFAULT_BLOCK_HEAP_LIMIT_BYTES
int r = random().nextInt(2);
if (r == 0) {
return 0;
}
if (r == 1) {
return Lucene90BlockTreeTermsWriter.DEFAULT_BLOCK_HEAP_LIMIT_BYTES;
}
return random().nextLong(Lucene90BlockTreeTermsWriter.DEFAULT_BLOCK_HEAP_LIMIT_BYTES);
}

@Override
protected Codec getCodec() {
Expand Down
Loading