Skip to content

Commit

Permalink
Add NestedPathFieldMapper to store nested path information (#51100)
Browse files Browse the repository at this point in the history
Currently nested documents repurpose the _type field to store their nested paths.
This commit adds a dedicated _nested_path field instead, which decouples this
information from types and will allow the removal of the _type field entirely further
down the line. To preserve backwards compatibility, references to this field are
mediated via methods that take an index settings object, and indexes created before
8x still use the _type field.

Relates to #41059
Closes #24362
  • Loading branch information
romseygeek authored Jan 22, 2020
1 parent 551a83a commit 1dc9dd4
Show file tree
Hide file tree
Showing 23 changed files with 375 additions and 133 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.common.xcontent.XContentHelper;
Expand Down Expand Up @@ -417,21 +418,23 @@ private static void innerParseObject(ParseContext context, ObjectMapper mapper,
private static void nested(ParseContext context, ObjectMapper.Nested nested) {
ParseContext.Document nestedDoc = context.doc();
ParseContext.Document parentDoc = nestedDoc.getParent();
Settings settings = context.indexSettings().getSettings();
if (nested.isIncludeInParent()) {
addFields(nestedDoc, parentDoc);
addFields(settings, nestedDoc, parentDoc);
}
if (nested.isIncludeInRoot()) {
ParseContext.Document rootDoc = context.rootDoc();
// don't add it twice, if its included in parent, and we are handling the master doc...
if (!nested.isIncludeInParent() || parentDoc != rootDoc) {
addFields(nestedDoc, rootDoc);
addFields(settings, nestedDoc, rootDoc);
}
}
}

private static void addFields(ParseContext.Document nestedDoc, ParseContext.Document rootDoc) {
private static void addFields(Settings settings, ParseContext.Document nestedDoc, ParseContext.Document rootDoc) {
String nestedPathFieldName = NestedPathFieldMapper.name(settings);
for (IndexableField field : nestedDoc.getFields()) {
if (!field.name().equals(TypeFieldMapper.NAME)) {
if (field.name().equals(nestedPathFieldName) == false) {
rootDoc.add(field);
}
}
Expand All @@ -457,10 +460,7 @@ private static ParseContext nestedContext(ParseContext context, ObjectMapper map
throw new IllegalStateException("The root document of a nested document should have an _id field");
}

// the type of the nested doc starts with __, so we can identify that its a nested one in filters
// note, we don't prefix it with the type of the doc since it allows us to execute a nested query
// across types (for example, with similar nested objects)
nestedDoc.add(new Field(TypeFieldMapper.NAME, mapper.nestedTypePathAsString(), TypeFieldMapper.Defaults.FIELD_TYPE));
nestedDoc.add(NestedPathFieldMapper.field(context.indexSettings().getSettings(), mapper.nestedTypePath()));
return context;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ public enum MergeReason {
//TODO this needs to be cleaned up: _timestamp and _ttl are not supported anymore, _field_names, _seq_no, _version and _source are
//also missing, not sure if on purpose. See IndicesModule#getMetadataMappers
private static final String[] SORTED_META_FIELDS = new String[]{
"_id", IgnoredFieldMapper.NAME, "_index", "_routing", "_size", "_timestamp", "_ttl", "_type"
"_id", IgnoredFieldMapper.NAME, "_index", "_nested_path", "_routing", "_size", "_timestamp", "_ttl", "_type"
};

private static final ObjectHashSet<String> META_FIELDS = ObjectHashSet.from(SORTED_META_FIELDS);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.query.QueryShardContext;

import java.io.IOException;
import java.util.List;
import java.util.Map;

public class NestedPathFieldMapper extends MetadataFieldMapper {

public static final String NAME_PRE_V8 = "_type";
public static final String NAME = "_nested_path";

public static String name(Settings settings) {
if (Version.indexCreated(settings).before(Version.V_8_0_0)) {
return NAME_PRE_V8;
}
return NAME;
}

public static Query filter(Settings settings, String path) {
return new TermQuery(new Term(name(settings), new BytesRef(path)));
}

public static Field field(Settings settings, String path) {
return new Field(name(settings), path, Defaults.FIELD_TYPE);
}

public static class Defaults {

public static final MappedFieldType FIELD_TYPE = new NestedPathFieldType();

static {
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.setStored(false);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
FIELD_TYPE.freeze();
}
}

public static class TypeParser implements MetadataFieldMapper.TypeParser {
@Override
public MetadataFieldMapper.Builder<?,?> parse(String name, Map<String, Object> node,
ParserContext parserContext) throws MapperParsingException {
throw new MapperParsingException(name(parserContext.mapperService().getIndexSettings().getSettings()) + " is not configurable");
}

@Override
public MetadataFieldMapper getDefault(ParserContext context) {
final IndexSettings indexSettings = context.mapperService().getIndexSettings();
return new NestedPathFieldMapper(indexSettings, defaultFieldType(indexSettings));
}
}

public static final class NestedPathFieldType extends StringFieldType {

NestedPathFieldType() {
}

protected NestedPathFieldType(NestedPathFieldType ref) {
super(ref);
}

@Override
public MappedFieldType clone() {
return new NestedPathFieldType(this);
}

@Override
public String typeName() {
return NAME;
}

@Override
public boolean isSearchable() {
return true;
}

@Override
public Query existsQuery(QueryShardContext context) {
throw new UnsupportedOperationException("Cannot run exists() query against the nested field path");
}
}

private NestedPathFieldMapper(IndexSettings indexSettings, MappedFieldType existing) {
this(existing == null ? defaultFieldType(indexSettings) : existing.clone(),
indexSettings);
}

private NestedPathFieldMapper(MappedFieldType fieldType, IndexSettings indexSettings) {
super(name(indexSettings.getSettings()), fieldType, defaultFieldType(indexSettings), indexSettings.getSettings());
}

private static MappedFieldType defaultFieldType(IndexSettings indexSettings) {
MappedFieldType defaultFieldType = Defaults.FIELD_TYPE.clone();
defaultFieldType.setIndexOptions(IndexOptions.NONE);
defaultFieldType.setHasDocValues(false);
defaultFieldType.setName(name(indexSettings.getSettings()));
return defaultFieldType;
}

@Override
public void preParse(ParseContext context) throws IOException {
super.parse(context);
}

@Override
public void parse(ParseContext context) throws IOException {
// we parse in pre parse
}

@Override
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
}

@Override
protected String contentType() {
return NAME;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
return builder;
}

@Override
protected void doMerge(Mapper mergeWith) {
// do nothing here, no merging, but also no exception
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@
package org.elasticsearch.index.mapper;

import org.apache.logging.log4j.LogManager;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.collect.CopyOnWriteHashMap;
import org.elasticsearch.common.logging.DeprecationLogger;
Expand Down Expand Up @@ -310,8 +308,7 @@ protected static void parseProperties(ObjectMapper.Builder objBuilder, Map<Strin

private final Nested nested;

private final String nestedTypePathAsString;
private final BytesRef nestedTypePathAsBytes;
private final String nestedTypePath;

private final Query nestedTypeFilter;

Expand All @@ -335,9 +332,12 @@ protected static void parseProperties(ObjectMapper.Builder objBuilder, Map<Strin
} else {
this.mappers = CopyOnWriteHashMap.copyOf(mappers);
}
this.nestedTypePathAsString = "__" + fullPath;
this.nestedTypePathAsBytes = new BytesRef(nestedTypePathAsString);
this.nestedTypeFilter = new TermQuery(new Term(TypeFieldMapper.NAME, nestedTypePathAsBytes));
if (Version.indexCreated(settings).before(Version.V_8_0_0)) {
this.nestedTypePath = "__" + fullPath;
} else {
this.nestedTypePath = fullPath;
}
this.nestedTypeFilter = NestedPathFieldMapper.filter(settings, nestedTypePath);
}

@Override
Expand Down Expand Up @@ -401,8 +401,8 @@ public String fullPath() {
return this.fullPath;
}

public String nestedTypePathAsString() {
return nestedTypePathAsString;
public String nestedTypePath() {
return this.nestedTypePath;
}

public final Dynamic dynamic() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MetadataFieldMapper;
import org.elasticsearch.index.mapper.NestedPathFieldMapper;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.ObjectMapper;
import org.elasticsearch.index.mapper.RangeFieldMapper;
Expand Down Expand Up @@ -156,6 +157,7 @@ private static Map<String, MetadataFieldMapper.TypeParser> initBuiltInMetadataMa
builtInMetadataMappers.put(IndexFieldMapper.NAME, new IndexFieldMapper.TypeParser());
builtInMetadataMappers.put(SourceFieldMapper.NAME, new SourceFieldMapper.TypeParser());
builtInMetadataMappers.put(TypeFieldMapper.NAME, new TypeFieldMapper.TypeParser());
builtInMetadataMappers.put(NestedPathFieldMapper.NAME, new NestedPathFieldMapper.TypeParser());
builtInMetadataMappers.put(VersionFieldMapper.NAME, new VersionFieldMapper.TypeParser());
builtInMetadataMappers.put(SeqNoFieldMapper.NAME, new SeqNoFieldMapper.TypeParser());
//_field_names must be added last so that it has a chance to see all the other mappers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.elasticsearch.Version;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MetadataFieldMapper;
import org.elasticsearch.index.mapper.NestedPathFieldMapper;
import org.elasticsearch.plugins.MapperPlugin;

import java.util.Collections;
Expand All @@ -37,13 +38,17 @@ public final class MapperRegistry {

private final Map<String, Mapper.TypeParser> mapperParsers;
private final Map<String, MetadataFieldMapper.TypeParser> metadataMapperParsers;
private final Map<String, MetadataFieldMapper.TypeParser> metadataMapperParsers7x;
private final Function<String, Predicate<String>> fieldFilter;


public MapperRegistry(Map<String, Mapper.TypeParser> mapperParsers,
Map<String, MetadataFieldMapper.TypeParser> metadataMapperParsers, Function<String, Predicate<String>> fieldFilter) {
this.mapperParsers = Collections.unmodifiableMap(new LinkedHashMap<>(mapperParsers));
this.metadataMapperParsers = Collections.unmodifiableMap(new LinkedHashMap<>(metadataMapperParsers));
Map<String, MetadataFieldMapper.TypeParser> metadata7x = new LinkedHashMap<>(metadataMapperParsers);
metadata7x.remove(NestedPathFieldMapper.NAME);
this.metadataMapperParsers7x = metadata7x;
this.fieldFilter = fieldFilter;
}

Expand All @@ -60,7 +65,10 @@ public Map<String, Mapper.TypeParser> getMapperParsers() {
* returned map uses the name of the field as a key.
*/
public Map<String, MetadataFieldMapper.TypeParser> getMetadataMapperParsers(Version indexCreatedVersion) {
return metadataMapperParsers;
if (indexCreatedVersion.onOrAfter(Version.V_8_0_0)) {
return metadataMapperParsers;
}
return metadataMapperParsers7x;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,14 +286,14 @@ public void testNestedHaveIdAndTypeFields() throws Exception {
assertNotNull(result.docs().get(0).getField(IdFieldMapper.NAME));
assertEquals(Uid.encodeId("1"), result.docs().get(0).getField(IdFieldMapper.NAME).binaryValue());
assertEquals(IdFieldMapper.Defaults.NESTED_FIELD_TYPE, result.docs().get(0).getField(IdFieldMapper.NAME).fieldType());
assertNotNull(result.docs().get(0).getField(TypeFieldMapper.NAME));
assertEquals("__foo", result.docs().get(0).getField(TypeFieldMapper.NAME).stringValue());
assertNotNull(result.docs().get(0).getField(NestedPathFieldMapper.NAME));
assertEquals("foo", result.docs().get(0).getField(NestedPathFieldMapper.NAME).stringValue());
assertEquals("value1", result.docs().get(0).getField("foo.bar").binaryValue().utf8ToString());
// Root document:
assertNotNull(result.docs().get(1).getField(IdFieldMapper.NAME));
assertEquals(Uid.encodeId("1"), result.docs().get(1).getField(IdFieldMapper.NAME).binaryValue());
assertEquals(IdFieldMapper.Defaults.FIELD_TYPE, result.docs().get(1).getField(IdFieldMapper.NAME).fieldType());
assertNull(result.docs().get(1).getField(TypeFieldMapper.NAME));
assertNull(result.docs().get(1).getField(NestedPathFieldMapper.NAME));
assertEquals("value2", result.docs().get(1).getField("baz").binaryValue().utf8ToString());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
*/
package org.elasticsearch.index.mapper;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.test.ESTestCase;

Expand Down Expand Up @@ -213,15 +215,19 @@ public void testFieldAliasWithDifferentNestedScopes() {
assertEquals(expectedMessage, e.getMessage());
}

private static final Settings SETTINGS = Settings.builder()
.put(IndexMetaData.SETTING_INDEX_VERSION_CREATED.getKey(), Version.CURRENT)
.build();

private static ObjectMapper createObjectMapper(String name) {
return new ObjectMapper(name, name, true,
ObjectMapper.Nested.NO,
ObjectMapper.Dynamic.FALSE, emptyMap(), Settings.EMPTY);
ObjectMapper.Dynamic.FALSE, emptyMap(), SETTINGS);
}

private static ObjectMapper createNestedObjectMapper(String name) {
return new ObjectMapper(name, name, true,
ObjectMapper.Nested.newNested(false, false),
ObjectMapper.Dynamic.FALSE, emptyMap(), Settings.EMPTY);
ObjectMapper.Dynamic.FALSE, emptyMap(), SETTINGS);
}
}
Loading

0 comments on commit 1dc9dd4

Please sign in to comment.