Skip to content

Commit

Permalink
Introduce Elasticsearch PostingFormat based on Lucene 90 positing for…
Browse files Browse the repository at this point in the history
…mat using PFOR (elastic#103601) (elastic#103624)

Lucene 9.9 has introduced a new posting format that uses FOR instead of PFOR. Elasticsearch prefers the former 
format, therefore we introduce it as a our own posting format here.
  • Loading branch information
iverase authored Dec 20, 2023
1 parent 384a944 commit f737e81
Show file tree
Hide file tree
Showing 17 changed files with 5,248 additions and 4 deletions.
7 changes: 7 additions & 0 deletions docs/changelog/103601.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
pr: 103601
summary: Introduce Elasticsearch `PostingFormat` based on Lucene 90 positing format
using PFOR
area: Search
type: bug
issues:
- 103002
3 changes: 2 additions & 1 deletion server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,8 @@
provides org.apache.lucene.codecs.PostingsFormat
with
org.elasticsearch.index.codec.bloomfilter.ES85BloomFilterPostingsFormat,
org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat,
org.elasticsearch.index.codec.postings.ES812PostingsFormat;
provides org.apache.lucene.codecs.DocValuesFormat with ES87TSDBDocValuesFormat;

exports org.elasticsearch.cluster.routing.allocation.shards
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.store.LuceneFilesExtensions;

Expand Down Expand Up @@ -302,6 +303,9 @@ private static void readProximity(Terms terms, PostingsEnum postings) throws IOE
private static BlockTermState getBlockTermState(TermsEnum termsEnum, BytesRef term) throws IOException {
if (term != null && termsEnum.seekExact(term)) {
final TermState termState = termsEnum.termState();
if (termState instanceof final ES812PostingsFormat.IntBlockTermState blockTermState) {
return new BlockTermState(blockTermState.docStartFP, blockTermState.posStartFP, blockTermState.payStartFP);
}
if (termState instanceof final Lucene99PostingsFormat.IntBlockTermState blockTermState) {
return new BlockTermState(blockTermState.docStartFP, blockTermState.posStartFP, blockTermState.payStartFP);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
Expand All @@ -44,6 +45,8 @@ public final class PerFieldMapperCodec extends Lucene99Codec {
private final ES87BloomFilterPostingsFormat bloomFilterPostingsFormat;
private final ES87TSDBDocValuesFormat tsdbDocValuesFormat;

private final ES812PostingsFormat es812PostingsFormat;

static {
assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMapperCodec.class)
: "PerFieldMapperCodec must subclass the latest lucene codec: " + Lucene.LATEST_CODEC;
Expand All @@ -54,6 +57,7 @@ public PerFieldMapperCodec(Mode compressionMode, MapperService mapperService, Bi
this.mapperService = mapperService;
this.bloomFilterPostingsFormat = new ES87BloomFilterPostingsFormat(bigArrays, this::internalGetPostingsFormatForField);
this.tsdbDocValuesFormat = new ES87TSDBDocValuesFormat();
this.es812PostingsFormat = new ES812PostingsFormat();
}

@Override
Expand All @@ -69,7 +73,8 @@ private PostingsFormat internalGetPostingsFormatForField(String field) {
if (format != null) {
return format;
}
return super.getPostingsFormatForField(field);
// return our own posting format using PFOR
return es812PostingsFormat;
}

boolean useBloomFilter(String field) {
Expand Down

Large diffs are not rendered by default.

Loading

0 comments on commit f737e81

Please sign in to comment.