From 6f94eef711a9a8714ec85007c8d020d666d2ba2e Mon Sep 17 00:00:00 2001 From: panguixin Date: Mon, 26 Aug 2024 23:53:41 +0800 Subject: [PATCH 1/2] replace Map with IntObjectHashMap for DV producer --- .../lucene80/Lucene80DocValuesProducer.java | 33 +++++----- .../lucene90/Lucene90DocValuesProducer.java | 63 +++++++++---------- .../perfield/PerFieldDocValuesFormat.java | 25 ++++---- .../index/SegmentDocValuesProducer.java | 21 +++---- .../SlowCompositeCodecReaderWrapper.java | 13 ++-- 5 files changed, 76 insertions(+), 79 deletions(-) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java index c5754e5d1e52..211267d4c031 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java @@ -17,8 +17,6 @@ package org.apache.lucene.backward_codecs.lucene80; import java.io.IOException; -import java.util.HashMap; -import java.util.Map; import org.apache.lucene.backward_codecs.packed.LegacyDirectMonotonicReader; import org.apache.lucene.backward_codecs.packed.LegacyDirectReader; import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil; @@ -41,6 +39,7 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.internal.hppc.IntObjectHashMap; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.DataInput; @@ -53,11 +52,11 @@ /** reader for {@link Lucene80DocValuesFormat} */ final class Lucene80DocValuesProducer extends DocValuesProducer { - private final Map numerics = new HashMap<>(); - private final Map binaries = new HashMap<>(); - private final Map sorted = new HashMap<>(); - private final Map sortedSets = new HashMap<>(); - private final Map sortedNumerics = new HashMap<>(); + private final IntObjectHashMap numerics = new IntObjectHashMap<>(); + private final IntObjectHashMap binaries = new IntObjectHashMap<>(); + private final IntObjectHashMap sorted = new IntObjectHashMap<>(); + private final IntObjectHashMap sortedSets = new IntObjectHashMap<>(); + private final IntObjectHashMap sortedNumerics = new IntObjectHashMap<>(); private final IndexInput data; private final int maxDoc; private int version = -1; @@ -139,7 +138,7 @@ private void readFields(String segmentName, IndexInput meta, FieldInfos infos) } byte type = meta.readByte(); if (type == Lucene80DocValuesFormat.NUMERIC) { - numerics.put(info.name, readNumeric(meta)); + numerics.put(info.number, readNumeric(meta)); } else if (type == Lucene80DocValuesFormat.BINARY) { final boolean compressed; if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) { @@ -158,13 +157,13 @@ private void readFields(String segmentName, IndexInput meta, FieldInfos infos) } else { compressed = version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED; } - binaries.put(info.name, readBinary(meta, compressed)); + binaries.put(info.number, readBinary(meta, compressed)); } else if (type == Lucene80DocValuesFormat.SORTED) { - sorted.put(info.name, readSorted(meta)); + sorted.put(info.number, readSorted(meta)); } else if (type == Lucene80DocValuesFormat.SORTED_SET) { - sortedSets.put(info.name, readSortedSet(meta)); + sortedSets.put(info.number, readSortedSet(meta)); } else if (type == Lucene80DocValuesFormat.SORTED_NUMERIC) { - sortedNumerics.put(info.name, readSortedNumeric(meta)); + sortedNumerics.put(info.number, readSortedNumeric(meta)); } else { throw new CorruptIndexException("invalid type: " + type, meta); } @@ -426,7 +425,7 @@ private static class SortedNumericEntry extends NumericEntry { @Override public NumericDocValues getNumeric(FieldInfo field) throws IOException { - NumericEntry entry = numerics.get(field.name); + NumericEntry entry = numerics.get(field.number); return getNumeric(entry); } @@ -915,7 +914,7 @@ BytesRef decode(int docNumber) throws IOException { @Override public BinaryDocValues getBinary(FieldInfo field) throws IOException { - BinaryEntry entry = binaries.get(field.name); + BinaryEntry entry = binaries.get(field.number); if (entry.compressed) { return getCompressedBinary(entry); } else { @@ -973,7 +972,7 @@ public BytesRef binaryValue() throws IOException { @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { - SortedEntry entry = sorted.get(field.name); + SortedEntry entry = sorted.get(field.number); return getSorted(entry); } @@ -1407,7 +1406,7 @@ public int docFreq() throws IOException { @Override public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { - SortedNumericEntry entry = sortedNumerics.get(field.name); + SortedNumericEntry entry = sortedNumerics.get(field.number); if (entry.numValues == entry.numDocsWithField) { return DocValues.singleton(getNumeric(entry)); } @@ -1543,7 +1542,7 @@ private void set() { @Override public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { - SortedSetEntry entry = sortedSets.get(field.name); + SortedSetEntry entry = sortedSets.get(field.number); if (entry.singleValueEntry != null) { return DocValues.singleton(getSorted(entry.singleValueEntry)); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java index a44092dbc246..f6222f2a21f1 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java @@ -21,8 +21,6 @@ import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT; import java.io.IOException; -import java.util.HashMap; -import java.util.Map; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.BaseTermsEnum; @@ -42,6 +40,7 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.internal.hppc.IntObjectHashMap; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ChecksumIndexInput; @@ -58,12 +57,12 @@ /** reader for {@link Lucene90DocValuesFormat} */ final class Lucene90DocValuesProducer extends DocValuesProducer { - private final Map numerics; - private final Map binaries; - private final Map sorted; - private final Map sortedSets; - private final Map sortedNumerics; - private final Map skippers; + private final IntObjectHashMap numerics; + private final IntObjectHashMap binaries; + private final IntObjectHashMap sorted; + private final IntObjectHashMap sortedSets; + private final IntObjectHashMap sortedNumerics; + private final IntObjectHashMap skippers; private final IndexInput data; private final int maxDoc; private int version = -1; @@ -80,12 +79,12 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); this.maxDoc = state.segmentInfo.maxDoc(); - numerics = new HashMap<>(); - binaries = new HashMap<>(); - sorted = new HashMap<>(); - sortedSets = new HashMap<>(); - sortedNumerics = new HashMap<>(); - skippers = new HashMap<>(); + numerics = new IntObjectHashMap<>(); + binaries = new IntObjectHashMap<>(); + sorted = new IntObjectHashMap<>(); + sortedSets = new IntObjectHashMap<>(); + sortedNumerics = new IntObjectHashMap<>(); + skippers = new IntObjectHashMap<>(); merging = false; // read in the entries from the metadata file. @@ -148,12 +147,12 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { // Used for cloning private Lucene90DocValuesProducer( - Map numerics, - Map binaries, - Map sorted, - Map sortedSets, - Map sortedNumerics, - Map skippers, + IntObjectHashMap numerics, + IntObjectHashMap binaries, + IntObjectHashMap sorted, + IntObjectHashMap sortedSets, + IntObjectHashMap sortedNumerics, + IntObjectHashMap skippers, IndexInput data, int maxDoc, int version, @@ -193,18 +192,18 @@ private void readFields(IndexInput meta, FieldInfos infos) throws IOException { } byte type = meta.readByte(); if (info.hasDocValuesSkipIndex()) { - skippers.put(info.name, readDocValueSkipperMeta(meta)); + skippers.put(info.number, readDocValueSkipperMeta(meta)); } if (type == Lucene90DocValuesFormat.NUMERIC) { - numerics.put(info.name, readNumeric(meta)); + numerics.put(info.number, readNumeric(meta)); } else if (type == Lucene90DocValuesFormat.BINARY) { - binaries.put(info.name, readBinary(meta)); + binaries.put(info.number, readBinary(meta)); } else if (type == Lucene90DocValuesFormat.SORTED) { - sorted.put(info.name, readSorted(meta)); + sorted.put(info.number, readSorted(meta)); } else if (type == Lucene90DocValuesFormat.SORTED_SET) { - sortedSets.put(info.name, readSortedSet(meta)); + sortedSets.put(info.number, readSortedSet(meta)); } else if (type == Lucene90DocValuesFormat.SORTED_NUMERIC) { - sortedNumerics.put(info.name, readSortedNumeric(meta)); + sortedNumerics.put(info.number, readSortedNumeric(meta)); } else { throw new CorruptIndexException("invalid type: " + type, meta); } @@ -429,7 +428,7 @@ private static class SortedNumericEntry extends NumericEntry { @Override public NumericDocValues getNumeric(FieldInfo field) throws IOException { - NumericEntry entry = numerics.get(field.name); + NumericEntry entry = numerics.get(field.number); return getNumeric(entry); } @@ -785,7 +784,7 @@ public boolean advanceExact(int target) throws IOException { @Override public BinaryDocValues getBinary(FieldInfo field) throws IOException { - BinaryEntry entry = binaries.get(field.name); + BinaryEntry entry = binaries.get(field.number); if (entry.docsWithFieldOffset == -2) { return DocValues.emptyBinary(); @@ -890,7 +889,7 @@ public BytesRef binaryValue() throws IOException { @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { - SortedEntry entry = sorted.get(field.name); + SortedEntry entry = sorted.get(field.number); return getSorted(entry); } @@ -1367,7 +1366,7 @@ public int docFreq() throws IOException { @Override public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { - SortedNumericEntry entry = sortedNumerics.get(field.name); + SortedNumericEntry entry = sortedNumerics.get(field.number); return getSortedNumeric(entry); } @@ -1512,7 +1511,7 @@ private void set() { @Override public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { - SortedSetEntry entry = sortedSets.get(field.name); + SortedSetEntry entry = sortedSets.get(field.number); if (entry.singleValueEntry != null) { return DocValues.singleton(getSorted(entry.singleValueEntry)); } @@ -1786,7 +1785,7 @@ long getLongValue(long index) throws IOException { @Override public DocValuesSkipper getSkipper(FieldInfo field) throws IOException { - final DocValuesSkipperEntry entry = skippers.get(field.name); + final DocValuesSkipperEntry entry = skippers.get(field.number); final IndexInput input = data.slice("doc value skipper", entry.offset, entry.length); // Prefetch the first page of data. Following pages are expected to get prefetched through diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java index 2496278fe7a7..6723f530aae0 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java @@ -38,6 +38,7 @@ import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.internal.hppc.IntObjectHashMap; import org.apache.lucene.util.IOUtils; /** @@ -257,9 +258,9 @@ static String getFullSegmentSuffix(String outerSegmentSuffix, String segmentSuff } } - private class FieldsReader extends DocValuesProducer { + private static class FieldsReader extends DocValuesProducer { - private final Map fields = new HashMap<>(); + private final IntObjectHashMap fields = new IntObjectHashMap<>(); private final Map formats = new HashMap<>(); // clone for merge @@ -273,10 +274,10 @@ private class FieldsReader extends DocValuesProducer { } // Then rebuild fields: - for (Map.Entry ent : other.fields.entrySet()) { - DocValuesProducer producer = oldToNew.get(ent.getValue()); + for (IntObjectHashMap.IntObjectCursor ent : other.fields) { + DocValuesProducer producer = oldToNew.get(ent.value); assert producer != null; - fields.put(ent.getKey(), producer); + fields.put(ent.key, producer); } } @@ -305,7 +306,7 @@ public FieldsReader(final SegmentReadState readState) throws IOException { segmentSuffix, format.fieldsProducer(new SegmentReadState(readState, segmentSuffix))); } - fields.put(fieldName, formats.get(segmentSuffix)); + fields.put(fi.number, formats.get(segmentSuffix)); } } } @@ -319,37 +320,37 @@ public FieldsReader(final SegmentReadState readState) throws IOException { @Override public NumericDocValues getNumeric(FieldInfo field) throws IOException { - DocValuesProducer producer = fields.get(field.name); + DocValuesProducer producer = fields.get(field.number); return producer == null ? null : producer.getNumeric(field); } @Override public BinaryDocValues getBinary(FieldInfo field) throws IOException { - DocValuesProducer producer = fields.get(field.name); + DocValuesProducer producer = fields.get(field.number); return producer == null ? null : producer.getBinary(field); } @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { - DocValuesProducer producer = fields.get(field.name); + DocValuesProducer producer = fields.get(field.number); return producer == null ? null : producer.getSorted(field); } @Override public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { - DocValuesProducer producer = fields.get(field.name); + DocValuesProducer producer = fields.get(field.number); return producer == null ? null : producer.getSortedNumeric(field); } @Override public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { - DocValuesProducer producer = fields.get(field.name); + DocValuesProducer producer = fields.get(field.number); return producer == null ? null : producer.getSortedSet(field); } @Override public DocValuesSkipper getSkipper(FieldInfo field) throws IOException { - DocValuesProducer producer = fields.get(field.name); + DocValuesProducer producer = fields.get(field.number); return producer == null ? null : producer.getSkipper(field); } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java index 1d9878fe0dbc..0f4df818ddcb 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java @@ -18,11 +18,10 @@ import java.io.IOException; import java.util.Collections; -import java.util.HashMap; import java.util.IdentityHashMap; -import java.util.Map; import java.util.Set; import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.internal.hppc.IntObjectHashMap; import org.apache.lucene.internal.hppc.LongArrayList; import org.apache.lucene.store.Directory; @@ -32,7 +31,7 @@ // producer? class SegmentDocValuesProducer extends DocValuesProducer { - final Map dvProducersByField = new HashMap<>(); + final IntObjectHashMap dvProducersByField = new IntObjectHashMap<>(); final Set dvProducers = Collections.newSetFromMap(new IdentityHashMap()); final LongArrayList dvGens = new LongArrayList(); @@ -67,7 +66,7 @@ class SegmentDocValuesProducer extends DocValuesProducer { dvGens.add(docValuesGen); dvProducers.add(baseProducer); } - dvProducersByField.put(fi.name, baseProducer); + dvProducersByField.put(fi.number, baseProducer); } else { assert !dvGens.contains(docValuesGen); // otherwise, producer sees only the one fieldinfo it wrote @@ -76,7 +75,7 @@ class SegmentDocValuesProducer extends DocValuesProducer { docValuesGen, si, dir, new FieldInfos(new FieldInfo[] {fi})); dvGens.add(docValuesGen); dvProducers.add(dvp); - dvProducersByField.put(fi.name, dvp); + dvProducersByField.put(fi.number, dvp); } } } catch (Throwable t) { @@ -91,42 +90,42 @@ class SegmentDocValuesProducer extends DocValuesProducer { @Override public NumericDocValues getNumeric(FieldInfo field) throws IOException { - DocValuesProducer dvProducer = dvProducersByField.get(field.name); + DocValuesProducer dvProducer = dvProducersByField.get(field.number); assert dvProducer != null; return dvProducer.getNumeric(field); } @Override public BinaryDocValues getBinary(FieldInfo field) throws IOException { - DocValuesProducer dvProducer = dvProducersByField.get(field.name); + DocValuesProducer dvProducer = dvProducersByField.get(field.number); assert dvProducer != null; return dvProducer.getBinary(field); } @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { - DocValuesProducer dvProducer = dvProducersByField.get(field.name); + DocValuesProducer dvProducer = dvProducersByField.get(field.number); assert dvProducer != null; return dvProducer.getSorted(field); } @Override public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { - DocValuesProducer dvProducer = dvProducersByField.get(field.name); + DocValuesProducer dvProducer = dvProducersByField.get(field.number); assert dvProducer != null; return dvProducer.getSortedNumeric(field); } @Override public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { - DocValuesProducer dvProducer = dvProducersByField.get(field.name); + DocValuesProducer dvProducer = dvProducersByField.get(field.number); assert dvProducer != null; return dvProducer.getSortedSet(field); } @Override public DocValuesSkipper getSkipper(FieldInfo field) throws IOException { - DocValuesProducer dvProducer = dvProducersByField.get(field.name); + DocValuesProducer dvProducer = dvProducersByField.get(field.number); assert dvProducer != null; return dvProducer.getSkipper(field); } diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java index fc6c1d9b2941..d4140343e6ea 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java @@ -20,10 +20,8 @@ import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; import java.util.Iterator; import java.util.List; -import java.util.Map; import java.util.Objects; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.FieldsProducer; @@ -34,6 +32,7 @@ import org.apache.lucene.codecs.TermVectorsReader; import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues; import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; +import org.apache.lucene.internal.hppc.IntObjectHashMap; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.search.VectorScorer; @@ -391,7 +390,7 @@ private static class SlowCompositeDocValuesProducerWrapper extends DocValuesProd private final CodecReader[] codecReaders; private final DocValuesProducer[] producers; private final int[] docStarts; - private final Map cachedOrdMaps = new HashMap<>(); + private final IntObjectHashMap cachedOrdMaps = new IntObjectHashMap<>(); SlowCompositeDocValuesProducerWrapper(CodecReader[] codecReaders, int[] docStarts) { this.codecReaders = codecReaders; @@ -430,14 +429,14 @@ public BinaryDocValues getBinary(FieldInfo field) throws IOException { public SortedDocValues getSorted(FieldInfo field) throws IOException { OrdinalMap map = null; synchronized (cachedOrdMaps) { - map = cachedOrdMaps.get(field.name); + map = cachedOrdMaps.get(field.number); if (map == null) { // uncached, or not a multi dv SortedDocValues dv = MultiDocValues.getSortedValues(new MultiReader(codecReaders), field.name); if (dv instanceof MultiSortedDocValues) { map = ((MultiSortedDocValues) dv).mapping; - cachedOrdMaps.put(field.name, map); + cachedOrdMaps.put(field.number, map); } return dv; } @@ -466,14 +465,14 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { OrdinalMap map = null; synchronized (cachedOrdMaps) { - map = cachedOrdMaps.get(field.name); + map = cachedOrdMaps.get(field.number); if (map == null) { // uncached, or not a multi dv SortedSetDocValues dv = MultiDocValues.getSortedSetValues(new MultiReader(codecReaders), field.name); if (dv instanceof MultiSortedSetDocValues) { map = ((MultiSortedSetDocValues) dv).mapping; - cachedOrdMaps.put(field.name, map); + cachedOrdMaps.put(field.number, map); } return dv; } From 7d1aac1a81a64038c5ea36c5eb1edd88c9f44c7c Mon Sep 17 00:00:00 2001 From: panguixin Date: Mon, 9 Sep 2024 21:15:39 +0800 Subject: [PATCH 2/2] changelog --- lucene/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index f0cafe3ebb30..9eae6ef01587 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -376,6 +376,8 @@ Optimizations * GITHUB#13587: Use Max WAND optimizations with ToParentBlockJoinQuery when using ScoreMode.Max (Mike Pellegrini) +* GITHUB#13686: Replace Map with IntObjectHashMap for DV producer (Pan Guixin) + Changes in runtime behavior ---------------------