Skip to content

Commit

Permalink
Text fields are stored by default with synthetic source
Browse files Browse the repository at this point in the history
Synthetic source requires text fields to be stored or have keyword
sub-field that supports synthetic source. If there are no keyword fields
 users currently have to explicitly set 'store' to 'true' or get a
validation exception. This is not the best experience. It is quite
likely that setting `store` to `true` is  the correct thing to do but
users still get an error and need to investigate it. With this change if
 `store` setting is not specified in such context it  will be set to
 `true` by default. Setting it explicitly to `false` results in the
 exception.

Closes elastic#97039
  • Loading branch information
lkts committed Mar 14, 2024
1 parent d47a461 commit 36cb0a5
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 38 deletions.
2 changes: 2 additions & 0 deletions docs/reference/mapping/types/text.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ a <<keyword-synthetic-source, `keyword`>> sub-field that supports synthetic
`_source` or if the `text` field sets `store` to `true`. Either way, it may
not have <<copy-to,`copy_to`>>.

`store` will be set to `true` by default if synthetic source is enabled and there is no suitable <<keyword-synthetic-source, `keyword`>> sub-field.

If using a sub-`keyword` field then the values are sorted in the same way as
a `keyword` field's values are sorted. By default, that means sorted with
duplicates removed. So:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,10 @@ public void setValue(T value) {
this.value = value;
}

public boolean isSet() {
return isSet;
}

public boolean isConfigured() {
return isSet && Objects.equals(value, getDefaultValue()) == false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -387,13 +387,9 @@ private static KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate(Field
if (fieldType.stored()) {
return null;
}
for (Mapper sub : multiFields) {
if (sub.typeName().equals(KeywordFieldMapper.CONTENT_TYPE)) {
KeywordFieldMapper kwd = (KeywordFieldMapper) sub;
if (kwd.hasNormalizer() == false && (kwd.fieldType().hasDocValues() || kwd.fieldType().isStored())) {
return kwd.fieldType();
}
}
var kwd = getKeywordFieldMapperForSyntheticSource(multiFields);
if (kwd != null) {
return kwd.fieldType();
}
return null;
}
Expand Down Expand Up @@ -460,6 +456,20 @@ private SubFieldInfo buildPhraseInfo(FieldType fieldType, TextFieldType parent)
@Override
public TextFieldMapper build(MapperBuilderContext context) {
MultiFields multiFields = multiFieldsBuilder.build(this, context);

// If synthetic source is used we need to either store this field
// to recreate the source or use keyword multi-fields for that.
// So if there are no suitable multi-fields we will default to
// storing the field without requiring users to explicitly set 'store'.
//
// If 'store' parameter was explicitly provided we'll let it pass and
// fail in TextFieldMapper#syntheticFieldLoader later if needed.
if (store.isSet() == false
&& context.isSourceSynthetic()
&& TextFieldMapper.getKeywordFieldMapperForSyntheticSource(multiFields) == null) {
store.setValue(true);
}

FieldType fieldType = TextParams.buildFieldType(
index,
store,
Expand Down Expand Up @@ -1454,15 +1464,12 @@ protected void write(XContentBuilder b, Object value) throws IOException {
}
};
}
for (Mapper sub : this) {
if (sub.typeName().equals(KeywordFieldMapper.CONTENT_TYPE)) {
KeywordFieldMapper kwd = (KeywordFieldMapper) sub;
if (kwd.hasNormalizer() == false && (kwd.fieldType().hasDocValues() || kwd.fieldType().isStored())) {

return kwd.syntheticFieldLoader(simpleName());
}
}
var kwd = getKeywordFieldMapperForSyntheticSource(this);
if (kwd != null) {
return kwd.syntheticFieldLoader(simpleName());
}

throw new IllegalArgumentException(
String.format(
Locale.ROOT,
Expand All @@ -1473,4 +1480,17 @@ protected void write(XContentBuilder b, Object value) throws IOException {
)
);
}

private static KeywordFieldMapper getKeywordFieldMapperForSyntheticSource(Iterable<? extends Mapper> multiFields) {
for (Mapper sub : multiFields) {
if (sub.typeName().equals(KeywordFieldMapper.CONTENT_TYPE)) {
KeywordFieldMapper kwd = (KeywordFieldMapper) sub;
if (kwd.hasNormalizer() == false && (kwd.fieldType().hasDocValues() || kwd.fieldType().isStored())) {
return kwd;
}
}
}

return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ public void testEmptyObject() throws IOException {
public void testUnsupported() throws IOException {
Exception e = expectThrows(
IllegalArgumentException.class,
() -> createDocumentMapper(syntheticSourceMapping(b -> b.startObject("txt").field("type", "text").endObject()))
() -> createDocumentMapper(
syntheticSourceMapping(b -> b.startObject("txt").field("type", "text").field("store", "false").endObject())
)
);
assertThat(
e.getMessage(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1119,6 +1119,7 @@ protected boolean supportsIgnoreMalformed() {
protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) {
assumeFalse("ignore_malformed not supported", ignoreMalformed);
boolean storeTextField = randomBoolean();
boolean explicitlyStoreTextField = randomBoolean();
boolean storedKeywordField = storeTextField || randomBoolean();
boolean indexText = randomBoolean();
Integer ignoreAbove = randomBoolean() ? null : between(10, 100);
Expand All @@ -1138,7 +1139,10 @@ public SyntheticSourceExample example(int maxValues) {
delegate.expectedForSyntheticSource(),
delegate.expectedForBlockLoader(),
b -> {
b.field("type", "text").field("store", true);
b.field("type", "text");
if (explicitlyStoreTextField) {
b.field("store", true);
}
if (indexText == false) {
b.field("index", false);
}
Expand Down Expand Up @@ -1174,30 +1178,42 @@ public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
"field [field] of type [text] doesn't support synthetic source unless it is stored or"
+ " has a sub-field of type [keyword] with doc values or stored and without a normalizer"
);
return List.of(
new SyntheticSourceInvalidExample(err, TextFieldMapperTests.this::minimalMapping),
new SyntheticSourceInvalidExample(err, b -> {
b.field("type", "text");
b.startObject("fields");
{
b.startObject("l");
b.field("type", "long");
b.endObject();
}
return List.of(new SyntheticSourceInvalidExample(err, b -> {
b.field("type", "text");
b.field("store", "false");
}), new SyntheticSourceInvalidExample(err, b -> {
b.field("type", "text");
b.field("store", "false");
b.startObject("fields");
{
b.startObject("l");
b.field("type", "long");
b.endObject();
}),
new SyntheticSourceInvalidExample(err, b -> {
b.field("type", "text");
b.startObject("fields");
{
b.startObject("kwd");
b.field("type", "keyword");
b.field("normalizer", "lowercase");
b.endObject();
}
}
b.endObject();
}), new SyntheticSourceInvalidExample(err, b -> {
b.field("type", "text");
b.field("store", "false");
b.startObject("fields");
{
b.startObject("kwd");
b.field("type", "keyword");
b.field("normalizer", "lowercase");
b.endObject();
})
);
}
b.endObject();
}), new SyntheticSourceInvalidExample(err, b -> {
b.field("type", "text");
b.field("store", "false");
b.startObject("fields");
{
b.startObject("kwd");
b.field("type", "keyword");
b.field("doc_values", "false");
b.endObject();
}
b.endObject();
}));
}
};
}
Expand Down

0 comments on commit 36cb0a5

Please sign in to comment.