Skip to content

Commit

Permalink
nested: In case of a single type the _id field should be added to the…
Browse files Browse the repository at this point in the history
… nested document instead of _uid field.

When `index.mapping.single_type` is `true` the `_uid` field is not used and instead `_id` field is used.
Prior to this change nested documents would in this case still use the `_uid` field to mark to what root
document they belong to. In case of deleting documents this could lead to only the root  Lucene document
to be deleted and not the nested Lucene documents. This broke the docid block ordering the block join
relies on in order to work correctly and thus causing the `nested` query, `nested` aggregation, nested sorting
and nested inner hits to either fail or yield incorrect results.

This bug only manifests in 6.0.0-ALPHA2 release and snaphots (5.5.0-SNAPSHOT, 5.6.0-SNAPSHOT, 6.0.0-SNAPSHOT).
  • Loading branch information
martijnvg committed Jun 9, 2017
1 parent 87d19b2 commit c7ae27d
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -424,15 +424,33 @@ private static ParseContext nestedContext(ParseContext context, ObjectMapper map
context = context.createNestedContext(mapper.fullPath());
ParseContext.Document nestedDoc = context.doc();
ParseContext.Document parentDoc = nestedDoc.getParent();
// pre add the uid field if possible (id was already provided)
IndexableField uidField = parentDoc.getField(UidFieldMapper.NAME);
if (uidField != null) {
// we don't need to add it as a full uid field in nested docs, since we don't need versioning
// we also rely on this for UidField#loadVersion

// this is a deeply nested field
nestedDoc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
// We need to add the uid or id to this nested Lucene document too,
// If we do not do this then when a document gets deleted only the root Lucene document gets deleted and
// not the nested Lucene documents! Besides the fact that we would have zombie Lucene documents, the ordering of
// documents inside the Lucene index (document blocks) will be incorrect, as nested documents of different root
// documents are then aligned with other root documents. This will lead tothe nested query, sorting, aggregations
// and inner hits to fail or yield incorrect results.
if (context.mapperService().getIndexSettings().isSingleType()) {
IndexableField idField = parentDoc.getField(IdFieldMapper.NAME);
if (idField != null) {
// We just need to store the id as indexed field, so that IndexWriter#deleteDocuments(term) can then
// delete it when the root document is deleted too.
nestedDoc.add(new Field(IdFieldMapper.NAME, idField.stringValue(), IdFieldMapper.Defaults.NESTED_FIELD_TYPE));
} else {
throw new IllegalStateException("The root document of a nested document should have an id field");
}
} else {
IndexableField uidField = parentDoc.getField(UidFieldMapper.NAME);
if (uidField != null) {
/// We just need to store the uid as indexed field, so that IndexWriter#deleteDocuments(term) can then
// delete it when the root document is deleted too.
nestedDoc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
} else {
throw new IllegalStateException("The root document of a nested document should have an uid field");
}
}

// the type of the nested doc starts with __, so we can identify that its a nested one in filters
// note, we don't prefix it with the type of the doc since it allows us to execute a nested query
// across types (for example, with similar nested objects)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ public static class Defaults {
public static final String NAME = IdFieldMapper.NAME;

public static final MappedFieldType FIELD_TYPE = new IdFieldType();
public static final MappedFieldType NESTED_FIELD_TYPE;

static {
FIELD_TYPE.setTokenized(false);
Expand All @@ -62,6 +63,10 @@ public static class Defaults {
FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
FIELD_TYPE.setName(NAME);
FIELD_TYPE.freeze();

NESTED_FIELD_TYPE = FIELD_TYPE.clone();
NESTED_FIELD_TYPE.setStored(false);
NESTED_FIELD_TYPE.freeze();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
Expand Down Expand Up @@ -148,6 +149,92 @@ public void testDotsWithDynamicNestedMapper() throws Exception {
e.getMessage());
}

public void testNestedHaveIdAndTypeFields() throws Exception {
DocumentMapperParser mapperParser1 = createIndex("index1", Settings.builder()
.put("index.mapping.single_type", false).build()
).mapperService().documentMapperParser();
DocumentMapperParser mapperParser2 = createIndex("index2", Settings.builder()
.put("index.mapping.single_type", true).build()
).mapperService().documentMapperParser();

XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties");
{
mapping.startObject("foo");
mapping.field("type", "nested");
{
mapping.startObject("properties");
{

mapping.startObject("bar");
mapping.field("type", "keyword");
mapping.endObject();
}
mapping.endObject();
}
mapping.endObject();
}
{
mapping.startObject("baz");
mapping.field("type", "keyword");
mapping.endObject();
}
mapping.endObject().endObject().endObject();
DocumentMapper mapper1 = mapperParser1.parse("type", new CompressedXContent(mapping.string()));
DocumentMapper mapper2 = mapperParser2.parse("type", new CompressedXContent(mapping.string()));

XContentBuilder doc = XContentFactory.jsonBuilder().startObject();
{
doc.startArray("foo");
{
doc.startObject();
doc.field("bar", "value1");
doc.endObject();
}
doc.endArray();
doc.field("baz", "value2");
}
doc.endObject();

// Verify in the case where multiple types are allowed that the _uid field is added to nested documents:
ParsedDocument result = mapper1.parse(SourceToParse.source("index1", "type", "1", doc.bytes(), XContentType.JSON));
assertEquals(2, result.docs().size());
// Nested document:
assertNull(result.docs().get(0).getField(IdFieldMapper.NAME));
assertNotNull(result.docs().get(0).getField(UidFieldMapper.NAME));
assertEquals("type#1", result.docs().get(0).getField(UidFieldMapper.NAME).stringValue());
assertEquals(UidFieldMapper.Defaults.NESTED_FIELD_TYPE, result.docs().get(0).getField(UidFieldMapper.NAME).fieldType());
assertNotNull(result.docs().get(0).getField(TypeFieldMapper.NAME));
assertEquals("__foo", result.docs().get(0).getField(TypeFieldMapper.NAME).stringValue());
assertEquals("value1", result.docs().get(0).getField("foo.bar").binaryValue().utf8ToString());
// Root document:
assertNull(result.docs().get(1).getField(IdFieldMapper.NAME));
assertNotNull(result.docs().get(1).getField(UidFieldMapper.NAME));
assertEquals("type#1", result.docs().get(1).getField(UidFieldMapper.NAME).stringValue());
assertEquals(UidFieldMapper.Defaults.FIELD_TYPE, result.docs().get(1).getField(UidFieldMapper.NAME).fieldType());
assertNotNull(result.docs().get(1).getField(TypeFieldMapper.NAME));
assertEquals("type", result.docs().get(1).getField(TypeFieldMapper.NAME).stringValue());
assertEquals("value2", result.docs().get(1).getField("baz").binaryValue().utf8ToString());

// Verify in the case where only a single type is allowed that the _id field is added to nested documents:
result = mapper2.parse(SourceToParse.source("index2", "type", "1", doc.bytes(), XContentType.JSON));
assertEquals(2, result.docs().size());
// Nested document:
assertNull(result.docs().get(0).getField(UidFieldMapper.NAME));
assertNotNull(result.docs().get(0).getField(IdFieldMapper.NAME));
assertEquals("1", result.docs().get(0).getField(IdFieldMapper.NAME).stringValue());
assertEquals(IdFieldMapper.Defaults.NESTED_FIELD_TYPE, result.docs().get(0).getField(IdFieldMapper.NAME).fieldType());
assertNotNull(result.docs().get(0).getField(TypeFieldMapper.NAME));
assertEquals("__foo", result.docs().get(0).getField(TypeFieldMapper.NAME).stringValue());
assertEquals("value1", result.docs().get(0).getField("foo.bar").binaryValue().utf8ToString());
// Root document:
assertNull(result.docs().get(1).getField(UidFieldMapper.NAME));
assertNotNull(result.docs().get(1).getField(IdFieldMapper.NAME));
assertEquals("1", result.docs().get(1).getField(IdFieldMapper.NAME).stringValue());
assertEquals(IdFieldMapper.Defaults.FIELD_TYPE, result.docs().get(1).getField(IdFieldMapper.NAME).fieldType());
assertNull(result.docs().get(1).getField(TypeFieldMapper.NAME));
assertEquals("value2", result.docs().get(1).getField("baz").binaryValue().utf8ToString());
}

public void testPropagateDynamicWithExistingMapper() throws Exception {
DocumentMapperParser mapperParser = createIndex("test").mapperService().documentMapperParser();
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
Expand Down Expand Up @@ -639,7 +726,7 @@ public void testDynamicDottedFieldNameLongArrayWithExistingParentWrongType() thr
.value(0)
.value(1)
.endArray().endObject().bytes();
MapperParsingException exception = expectThrows(MapperParsingException.class,
MapperParsingException exception = expectThrows(MapperParsingException.class,
() -> mapper.parse(SourceToParse.source("test", "type", "1", bytes, XContentType.JSON)));
assertEquals("Could not dynamically add mapping for field [foo.bar.baz]. "
+ "Existing mapping for [foo] must be of type object but found [long].", exception.getMessage());
Expand Down Expand Up @@ -758,7 +845,7 @@ public void testDynamicDottedFieldNameLongWithExistingParentWrongType() throws E
BytesReference bytes = XContentFactory.jsonBuilder()
.startObject().field("foo.bar.baz", 0)
.endObject().bytes();
MapperParsingException exception = expectThrows(MapperParsingException.class,
MapperParsingException exception = expectThrows(MapperParsingException.class,
() -> mapper.parse(SourceToParse.source("test", "type", "1", bytes, XContentType.JSON)));
assertEquals("Could not dynamically add mapping for field [foo.bar.baz]. "
+ "Existing mapping for [foo] must be of type object but found [long].", exception.getMessage());
Expand Down Expand Up @@ -880,7 +967,7 @@ public void testDynamicDottedFieldNameObjectWithExistingParentWrongType() throws

BytesReference bytes = XContentFactory.jsonBuilder().startObject().startObject("foo.bar.baz").field("a", 0).endObject().endObject()
.bytes();
MapperParsingException exception = expectThrows(MapperParsingException.class,
MapperParsingException exception = expectThrows(MapperParsingException.class,
() -> mapper.parse(SourceToParse.source("test", "type", "1", bytes, XContentType.JSON)));

assertEquals("Could not dynamically add mapping for field [foo.bar.baz]. "
Expand Down Expand Up @@ -1017,7 +1104,7 @@ public void testNoLevel() throws Exception {
.field("test2", "value2")
.startObject("inner").field("inner_field", "inner_value").endObject()
.endObject()
.bytes(),
.bytes(),
XContentType.JSON));

assertThat(doc.rootDoc().get("test1"), equalTo("value1"));
Expand All @@ -1036,7 +1123,7 @@ public void testTypeLevel() throws Exception {
.field("test2", "value2")
.startObject("inner").field("inner_field", "inner_value").endObject()
.endObject().endObject()
.bytes(),
.bytes(),
XContentType.JSON));

assertThat(doc.rootDoc().get("type.test1"), equalTo("value1"));
Expand All @@ -1056,7 +1143,7 @@ public void testNoLevelWithFieldTypeAsValue() throws Exception {
.field("test2", "value2")
.startObject("inner").field("inner_field", "inner_value").endObject()
.endObject()
.bytes(),
.bytes(),
XContentType.JSON));

assertThat(doc.rootDoc().get("type"), equalTo("value_type"));
Expand All @@ -1077,7 +1164,7 @@ public void testTypeLevelWithFieldTypeAsValue() throws Exception {
.field("test2", "value2")
.startObject("inner").field("inner_field", "inner_value").endObject()
.endObject().endObject()
.bytes(),
.bytes(),
XContentType.JSON));

assertThat(doc.rootDoc().get("type.type"), equalTo("value_type"));
Expand All @@ -1098,7 +1185,7 @@ public void testNoLevelWithFieldTypeAsObject() throws Exception {
.field("test2", "value2")
.startObject("inner").field("inner_field", "inner_value").endObject()
.endObject()
.bytes(),
.bytes(),
XContentType.JSON));

// in this case, we analyze the type object as the actual document, and ignore the other same level fields
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@
public class SimpleNestedIT extends ESIntegTestCase {
public void testSimpleNested() throws Exception {
assertAcked(prepareCreate("test")
.setSettings("index.mapping.single_type", false)
.addMapping("type1", "nested1", "type=nested")
.addMapping("type2", "nested1", "type=nested"));
.addMapping("type1", "nested1", "type=nested"));
ensureGreen();

// check on no data, see it works
Expand Down Expand Up @@ -158,10 +156,6 @@ public void testSimpleNested() throws Exception {
searchResponse = client().prepareSearch("test").setQuery(nestedQuery("nested1", termQuery("nested1.n_field1", "n_value1_1"), ScoreMode.Avg)).execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1L));

searchResponse = client().prepareSearch("test").setTypes("type1", "type2").setQuery(nestedQuery("nested1", termQuery("nested1.n_field1", "n_value1_1"), ScoreMode.Avg)).execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1L));
}

public void testMultiNested() throws Exception {
Expand Down

0 comments on commit c7ae27d

Please sign in to comment.