Skip to content

Commit

Permalink
Include ignored source as part of loading field values in ValueSource…
Browse files Browse the repository at this point in the history
…ReaderOperator via BlockSourceReader. (elastic#114903) (elastic#115064)

Currently, in compute engine when loading source if source mode is synthetic, the synthetic source loader is already used. But the ignored_source field isn't always marked as a required source field, causing the source to potentially miss a lot of fields.

This change includes _ignored_source field as a required stored field and allowing keyword fields without doc values or stored fields to be used in case of synthetic source.

Relying on synthetic source to get the values (because a field doesn't have stored fields / doc values) is slow. In case of synthetic source we already keep ignored field/values in a special place, named ignored source. Long term in case of synthetic source we should only load ignored source in case a field has no doc values or stored field. Like is being explored in elastic#114886 Thereby avoiding synthesizing the complete _source in order to get only one field.
  • Loading branch information
martijnvg authored Oct 18, 2024
1 parent 8270382 commit d9c930d
Show file tree
Hide file tree
Showing 18 changed files with 613 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
// MatchOnlyText never has norms, so we have to use the field names field
BlockSourceReader.LeafIteratorLookup lookup = BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name());
return new BlockSourceReader.BytesRefsBlockLoader(fetcher, lookup);
var sourceMode = blContext.indexSettings().getIndexMappingSourceMode();
return new BlockSourceReader.BytesRefsBlockLoader(fetcher, lookup, sourceMode);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
BlockSourceReader.LeafIteratorLookup lookup = isStored() || isIndexed()
? BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name())
: BlockSourceReader.lookupMatchingAll();
return new BlockSourceReader.DoublesBlockLoader(valueFetcher, lookup);
var sourceMode = blContext.indexSettings().getIndexMappingSourceMode();
return new BlockSourceReader.DoublesBlockLoader(valueFetcher, lookup, sourceMode);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
protected BlockLoader blockLoaderFromSource(BlockLoaderContext blContext) {
ValueFetcher fetcher = valueFetcher(blContext.sourcePaths(name()), nullValue, GeometryFormatterFactory.WKB);
// TODO consider optimization using BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name())
return new BlockSourceReader.GeometriesBlockLoader(fetcher, BlockSourceReader.lookupMatchingAll());
var sourceMode = blContext.indexSettings().getIndexMappingSourceMode();
return new BlockSourceReader.GeometriesBlockLoader(fetcher, BlockSourceReader.lookupMatchingAll(), sourceMode);
}

protected abstract Object nullValueAsSource(T nullValue);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,22 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

/**
* Loads values from {@code _source}. This whole process is very slow and cast-tastic,
* so it doesn't really try to avoid megamorphic invocations. It's just going to be
* slow.
*/
public abstract class BlockSourceReader implements BlockLoader.RowStrideReader {

// _ignored_source is needed when source mode is synthetic.
static final StoredFieldsSpec NEEDS_SOURCE_AND_IGNORED_SOURCE = new StoredFieldsSpec(
true,
false,
Set.of(IgnoredSourceFieldMapper.NAME)
);

private final ValueFetcher fetcher;
private final List<Object> ignoredValues = new ArrayList<>();
private final DocIdSetIterator iter;
Expand Down Expand Up @@ -91,10 +100,12 @@ public interface LeafIteratorLookup {
private abstract static class SourceBlockLoader implements BlockLoader {
protected final ValueFetcher fetcher;
private final LeafIteratorLookup lookup;
private final SourceFieldMapper.Mode sourceMode;

private SourceBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup) {
private SourceBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup, SourceFieldMapper.Mode sourceMode) {
this.fetcher = fetcher;
this.lookup = lookup;
this.sourceMode = sourceMode;
}

@Override
Expand All @@ -104,7 +115,7 @@ public final ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context)

@Override
public final StoredFieldsSpec rowStrideStoredFieldSpec() {
return StoredFieldsSpec.NEEDS_SOURCE;
return sourceMode == SourceFieldMapper.Mode.SYNTHETIC ? NEEDS_SOURCE_AND_IGNORED_SOURCE : StoredFieldsSpec.NEEDS_SOURCE;
}

@Override
Expand Down Expand Up @@ -140,8 +151,8 @@ public final String toString() {
* Load {@code boolean}s from {@code _source}.
*/
public static class BooleansBlockLoader extends SourceBlockLoader {
public BooleansBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup) {
super(fetcher, lookup);
public BooleansBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup, SourceFieldMapper.Mode sourceMode) {
super(fetcher, lookup, sourceMode);
}

@Override
Expand Down Expand Up @@ -180,8 +191,8 @@ public String toString() {
* Load {@link BytesRef}s from {@code _source}.
*/
public static class BytesRefsBlockLoader extends SourceBlockLoader {
public BytesRefsBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup) {
super(fetcher, lookup);
public BytesRefsBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup, SourceFieldMapper.Mode sourceMode) {
super(fetcher, lookup, sourceMode);
}

@Override
Expand All @@ -191,7 +202,7 @@ public final Builder builder(BlockFactory factory, int expectedCount) {

@Override
protected RowStrideReader rowStrideReader(LeafReaderContext context, DocIdSetIterator iter) throws IOException {
return new BytesRefs(fetcher, iter);
return new BytesRefs(fetcher, iter, null);
}

@Override
Expand All @@ -201,8 +212,8 @@ protected String name() {
}

public static class GeometriesBlockLoader extends SourceBlockLoader {
public GeometriesBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup) {
super(fetcher, lookup);
public GeometriesBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup, SourceFieldMapper.Mode sourceMode) {
super(fetcher, lookup, sourceMode);
}

@Override
Expand All @@ -212,7 +223,7 @@ public final Builder builder(BlockFactory factory, int expectedCount) {

@Override
protected RowStrideReader rowStrideReader(LeafReaderContext context, DocIdSetIterator iter) {
return new Geometries(fetcher, iter);
return new Geometries(fetcher, iter, null);
}

@Override
Expand All @@ -224,7 +235,7 @@ protected String name() {
private static class BytesRefs extends BlockSourceReader {
private final BytesRef scratch = new BytesRef();

BytesRefs(ValueFetcher fetcher, DocIdSetIterator iter) {
BytesRefs(ValueFetcher fetcher, DocIdSetIterator iter, SourceFieldMapper.Mode sourceMode) {
super(fetcher, iter);
}

Expand All @@ -241,7 +252,7 @@ public String toString() {

private static class Geometries extends BlockSourceReader {

Geometries(ValueFetcher fetcher, DocIdSetIterator iter) {
Geometries(ValueFetcher fetcher, DocIdSetIterator iter, SourceFieldMapper.Mode sourceMode) {
super(fetcher, iter);
}

Expand All @@ -264,8 +275,8 @@ public String toString() {
* Load {@code double}s from {@code _source}.
*/
public static class DoublesBlockLoader extends SourceBlockLoader {
public DoublesBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup) {
super(fetcher, lookup);
public DoublesBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup, SourceFieldMapper.Mode sourceMode) {
super(fetcher, lookup, sourceMode);
}

@Override
Expand Down Expand Up @@ -304,8 +315,8 @@ public String toString() {
* Load {@code int}s from {@code _source}.
*/
public static class IntsBlockLoader extends SourceBlockLoader {
public IntsBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup) {
super(fetcher, lookup);
public IntsBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup, SourceFieldMapper.Mode sourceMode) {
super(fetcher, lookup, sourceMode);
}

@Override
Expand Down Expand Up @@ -344,8 +355,8 @@ public String toString() {
* Load {@code long}s from {@code _source}.
*/
public static class LongsBlockLoader extends SourceBlockLoader {
public LongsBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup) {
super(fetcher, lookup);
public LongsBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup, SourceFieldMapper.Mode sourceMode) {
super(fetcher, lookup, sourceMode);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
BlockSourceReader.LeafIteratorLookup lookup = isIndexed() || isStored()
? BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name())
: BlockSourceReader.lookupMatchingAll();
return new BlockSourceReader.BooleansBlockLoader(fetcher, lookup);
return new BlockSourceReader.BooleansBlockLoader(fetcher, lookup, blContext.indexSettings().getIndexMappingSourceMode());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
BlockSourceReader.LeafIteratorLookup lookup = isStored() || isIndexed()
? BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name())
: BlockSourceReader.lookupMatchingAll();
return new BlockSourceReader.LongsBlockLoader(sourceValueFetcher(blContext.sourcePaths(name())), lookup);
var sourceMode = blContext.indexSettings().getIndexMappingSourceMode();
return new BlockSourceReader.LongsBlockLoader(sourceValueFetcher(blContext.sourcePaths(name())), lookup, sourceMode);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -632,18 +632,12 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
if (hasDocValues()) {
return new BlockDocValuesReader.BytesRefsFromOrdsBlockLoader(name());
}
if (isSyntheticSource) {
if (false == isStored()) {
throw new IllegalStateException(
"keyword field ["
+ name()
+ "] is only supported in synthetic _source index if it creates doc values or stored fields"
);
}
if (isStored()) {
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(name());
}
SourceValueFetcher fetcher = sourceValueFetcher(blContext.sourcePaths(name()));
return new BlockSourceReader.BytesRefsBlockLoader(fetcher, sourceBlockLoaderLookup(blContext));
var sourceMode = blContext.indexSettings().getIndexMappingSourceMode();
return new BlockSourceReader.BytesRefsBlockLoader(fetcher, sourceBlockLoaderLookup(blContext), sourceMode);
}

private BlockSourceReader.LeafIteratorLookup sourceBlockLoaderLookup(BlockLoaderContext blContext) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,12 @@ BlockLoader blockLoaderFromDocValues(String fieldName) {
}

@Override
BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSourceReader.LeafIteratorLookup lookup) {
return new BlockSourceReader.DoublesBlockLoader(sourceValueFetcher, lookup);
BlockLoader blockLoaderFromSource(
SourceValueFetcher sourceValueFetcher,
BlockSourceReader.LeafIteratorLookup lookup,
SourceFieldMapper.Mode sourceMode
) {
return new BlockSourceReader.DoublesBlockLoader(sourceValueFetcher, lookup, sourceMode);
}
},
FLOAT("float", NumericType.FLOAT) {
Expand Down Expand Up @@ -645,8 +649,12 @@ BlockLoader blockLoaderFromDocValues(String fieldName) {
}

@Override
BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSourceReader.LeafIteratorLookup lookup) {
return new BlockSourceReader.DoublesBlockLoader(sourceValueFetcher, lookup);
BlockLoader blockLoaderFromSource(
SourceValueFetcher sourceValueFetcher,
BlockSourceReader.LeafIteratorLookup lookup,
SourceFieldMapper.Mode sourceMode
) {
return new BlockSourceReader.DoublesBlockLoader(sourceValueFetcher, lookup, sourceMode);
}
},
DOUBLE("double", NumericType.DOUBLE) {
Expand Down Expand Up @@ -795,8 +803,12 @@ BlockLoader blockLoaderFromDocValues(String fieldName) {
}

@Override
BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSourceReader.LeafIteratorLookup lookup) {
return new BlockSourceReader.DoublesBlockLoader(sourceValueFetcher, lookup);
BlockLoader blockLoaderFromSource(
SourceValueFetcher sourceValueFetcher,
BlockSourceReader.LeafIteratorLookup lookup,
SourceFieldMapper.Mode sourceMode
) {
return new BlockSourceReader.DoublesBlockLoader(sourceValueFetcher, lookup, sourceMode);
}
},
BYTE("byte", NumericType.BYTE) {
Expand Down Expand Up @@ -908,8 +920,12 @@ BlockLoader blockLoaderFromDocValues(String fieldName) {
}

@Override
BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSourceReader.LeafIteratorLookup lookup) {
return new BlockSourceReader.IntsBlockLoader(sourceValueFetcher, lookup);
BlockLoader blockLoaderFromSource(
SourceValueFetcher sourceValueFetcher,
BlockSourceReader.LeafIteratorLookup lookup,
SourceFieldMapper.Mode sourceMode
) {
return new BlockSourceReader.IntsBlockLoader(sourceValueFetcher, lookup, sourceMode);
}

private boolean isOutOfRange(Object value) {
Expand Down Expand Up @@ -1021,8 +1037,12 @@ BlockLoader blockLoaderFromDocValues(String fieldName) {
}

@Override
BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSourceReader.LeafIteratorLookup lookup) {
return new BlockSourceReader.IntsBlockLoader(sourceValueFetcher, lookup);
BlockLoader blockLoaderFromSource(
SourceValueFetcher sourceValueFetcher,
BlockSourceReader.LeafIteratorLookup lookup,
SourceFieldMapper.Mode sourceMode
) {
return new BlockSourceReader.IntsBlockLoader(sourceValueFetcher, lookup, sourceMode);
}

private boolean isOutOfRange(Object value) {
Expand Down Expand Up @@ -1208,8 +1228,12 @@ BlockLoader blockLoaderFromDocValues(String fieldName) {
}

@Override
BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSourceReader.LeafIteratorLookup lookup) {
return new BlockSourceReader.IntsBlockLoader(sourceValueFetcher, lookup);
BlockLoader blockLoaderFromSource(
SourceValueFetcher sourceValueFetcher,
BlockSourceReader.LeafIteratorLookup lookup,
SourceFieldMapper.Mode sourceMode
) {
return new BlockSourceReader.IntsBlockLoader(sourceValueFetcher, lookup, sourceMode);
}
},
LONG("long", NumericType.LONG) {
Expand Down Expand Up @@ -1355,8 +1379,12 @@ BlockLoader blockLoaderFromDocValues(String fieldName) {
}

@Override
BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSourceReader.LeafIteratorLookup lookup) {
return new BlockSourceReader.LongsBlockLoader(sourceValueFetcher, lookup);
BlockLoader blockLoaderFromSource(
SourceValueFetcher sourceValueFetcher,
BlockSourceReader.LeafIteratorLookup lookup,
SourceFieldMapper.Mode sourceMode
) {
return new BlockSourceReader.LongsBlockLoader(sourceValueFetcher, lookup, sourceMode);
}

private boolean isOutOfRange(Object value) {
Expand Down Expand Up @@ -1634,7 +1662,11 @@ protected void writeValue(XContentBuilder b, long value) throws IOException {

abstract BlockLoader blockLoaderFromDocValues(String fieldName);

abstract BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSourceReader.LeafIteratorLookup lookup);
abstract BlockLoader blockLoaderFromSource(
SourceValueFetcher sourceValueFetcher,
BlockSourceReader.LeafIteratorLookup lookup,
SourceFieldMapper.Mode sourceMode
);
}

public static class NumberFieldType extends SimpleMappedFieldType {
Expand Down Expand Up @@ -1773,7 +1805,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
BlockSourceReader.LeafIteratorLookup lookup = isStored() || isIndexed()
? BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name())
: BlockSourceReader.lookupMatchingAll();
return type.blockLoaderFromSource(sourceValueFetcher(blContext.sourcePaths(name())), lookup);
var sourceMode = blContext.indexSettings().getIndexMappingSourceMode();
return type.blockLoaderFromSource(sourceValueFetcher(blContext.sourcePaths(name())), lookup, sourceMode);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1012,17 +1012,20 @@ protected String delegatingTo() {
if (isStored()) {
return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(name());
}
if (isSyntheticSource) {
if (isSyntheticSource && syntheticSourceDelegate == null) {
/*
* When we're in synthetic source mode we don't currently
* support text fields that are not stored and are not children
* of perfect keyword fields. We'd have to load from the parent
* field and then convert the result to a string.
* field and then convert the result to a string. In this case,
* even if we would synthesize the source, the current field
* would be missing.
*/
return null;
}
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
return new BlockSourceReader.BytesRefsBlockLoader(fetcher, blockReaderDisiLookup(blContext));
var sourceMode = blContext.indexSettings().getIndexMappingSourceMode();
return new BlockSourceReader.BytesRefsBlockLoader(fetcher, blockReaderDisiLookup(blContext), sourceMode);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public void testEmptyArray() throws IOException {
private void loadBlock(LeafReaderContext ctx, Consumer<TestBlock> test) throws IOException {
ValueFetcher valueFetcher = SourceValueFetcher.toString(Set.of("field"));
BlockSourceReader.LeafIteratorLookup lookup = BlockSourceReader.lookupFromNorms("field");
BlockLoader loader = new BlockSourceReader.BytesRefsBlockLoader(valueFetcher, lookup);
BlockLoader loader = new BlockSourceReader.BytesRefsBlockLoader(valueFetcher, lookup, null);
assertThat(loader.columnAtATimeReader(ctx), nullValue());
BlockLoader.RowStrideReader reader = loader.rowStrideReader(ctx);
assertThat(loader.rowStrideStoredFieldSpec(), equalTo(StoredFieldsSpec.NEEDS_SOURCE));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1337,12 +1337,15 @@ private BlockLoader getBlockLoader(boolean columnReader) {
return mapper.fieldType(loaderFieldName).blockLoader(new MappedFieldType.BlockLoaderContext() {
@Override
public String indexName() {
throw new UnsupportedOperationException();
return "test_index";
}

@Override
public IndexSettings indexSettings() {
throw new UnsupportedOperationException();
var imd = IndexMetadata.builder(indexName())
.settings(MapperTestCase.indexSettings(IndexVersion.current(), 1, 1).put(Settings.EMPTY))
.build();
return new IndexSettings(imd, Settings.EMPTY);
}

@Override
Expand Down
Loading

0 comments on commit d9c930d

Please sign in to comment.