Skip to content

Commit

Permalink
[Transform] Handle multi-fields properly when creating destination in…
Browse files Browse the repository at this point in the history
…dex. (#66273)
  • Loading branch information
przemekwitek authored Dec 16, 2020
1 parent a393db9 commit 81343ac
Show file tree
Hide file tree
Showing 5 changed files with 207 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ setup:
- match: { generated_dest_index.mappings.properties.airline.type: "keyword" }
- match: { generated_dest_index.mappings.properties.by-hour.type: "date" }
- match: { generated_dest_index.mappings.properties.avg_response.type: "double" }
- match: { generated_dest_index.mappings.properties.time\.max.type: "date" }
- match: { generated_dest_index.mappings.properties.time\.min.type: "date" }
- match: { generated_dest_index.mappings.properties.time.properties.max.type: "date" }
- match: { generated_dest_index.mappings.properties.time.properties.min.type: "date" }

- do:
ingest.put_pipeline:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
import java.util.Map;
import java.util.stream.Stream;

import static java.util.Collections.singletonMap;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toMap;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;
Expand Down Expand Up @@ -55,8 +57,10 @@ private static final String getDateStringForRow(int row) {
private static final String BUSINESS_ID = "business_id";
private static final String COUNT = "count";
private static final String STARS = "stars";
private static final String COMMENT = "comment";

private static final Map<String, Object> row(String userId, String businessId, int count, int stars, String timestamp) {
private static final Map<String, Object> row(
String userId, String businessId, int count, int stars, String timestamp, String comment) {
return new HashMap<>() {{
if (userId != null) {
put(USER_ID, userId);
Expand All @@ -65,40 +69,43 @@ private static final Map<String, Object> row(String userId, String businessId, i
put(COUNT, count);
put(STARS, stars);
put(TIMESTAMP, timestamp);
put(COMMENT, comment);
put("regular_object", singletonMap("foo", 42));
put("nested_object", singletonMap("bar", 43));
}};
}

private static final Object[] EXPECTED_DEST_INDEX_ROWS =
new Object[] {
row("user_0", "business_37", 87, 2, "2017-04-04T12:30:00Z"),
row("user_1", "business_38", 88, 3, "2017-04-05T12:30:00Z"),
row("user_2", "business_39", 89, 4, "2017-04-06T12:30:00Z"),
row("user_3", "business_40", 90, 0, "2017-04-07T12:30:00Z"),
row("user_4", "business_41", 91, 1, "2017-04-08T12:30:00Z"),
row("user_5", "business_42", 92, 2, "2017-04-09T12:30:00Z"),
row("user_6", "business_43", 93, 3, "2017-04-10T12:30:00Z"),
row("user_7", "business_44", 94, 4, "2017-04-11T12:30:00Z"),
row("user_8", "business_45", 95, 0, "2017-04-12T12:30:00Z"),
row("user_9", "business_46", 96, 1, "2017-04-13T12:30:00Z"),
row("user_10", "business_47", 97, 2, "2017-04-14T12:30:00Z"),
row("user_11", "business_48", 98, 3, "2017-04-15T12:30:00Z"),
row("user_12", "business_49", 99, 4, "2017-04-16T12:30:00Z"),
row("user_13", "business_21", 71, 1, "2017-03-16T12:30:00Z"),
row("user_14", "business_22", 72, 2, "2017-03-17T12:30:00Z"),
row("user_15", "business_23", 73, 3, "2017-03-18T12:30:00Z"),
row("user_16", "business_24", 74, 4, "2017-03-19T12:30:00Z"),
row("user_17", "business_25", 75, 0, "2017-03-20T12:30:00Z"),
row("user_18", "business_26", 76, 1, "2017-03-21T12:30:00Z"),
row("user_19", "business_27", 77, 2, "2017-03-22T12:30:00Z"),
row("user_20", "business_28", 78, 3, "2017-03-23T12:30:00Z"),
row("user_21", "business_29", 79, 4, "2017-03-24T12:30:00Z"),
row("user_22", "business_30", 80, 0, "2017-03-25T12:30:00Z"),
row("user_23", "business_31", 81, 1, "2017-03-26T12:30:00Z"),
row("user_24", "business_32", 82, 2, "2017-03-27T12:30:00Z"),
row("user_25", "business_33", 83, 3, "2017-03-28T12:30:00Z"),
row("user_26", "business_34", 84, 4, "2017-04-01T12:30:00Z"),
row("user_27", "business_35", 85, 0, "2017-04-02T12:30:00Z"),
row(null, "business_36", 86, 1, "2017-04-03T12:30:00Z")
row("user_0", "business_37", 87, 2, "2017-04-04T12:30:00Z", "Great stuff, deserves 2 stars"),
row("user_1", "business_38", 88, 3, "2017-04-05T12:30:00Z", "Great stuff, deserves 3 stars"),
row("user_2", "business_39", 89, 4, "2017-04-06T12:30:00Z", "Great stuff, deserves 4 stars"),
row("user_3", "business_40", 90, 0, "2017-04-07T12:30:00Z", "Great stuff, deserves 0 stars"),
row("user_4", "business_41", 91, 1, "2017-04-08T12:30:00Z", "Great stuff, deserves 1 stars"),
row("user_5", "business_42", 92, 2, "2017-04-09T12:30:00Z", "Great stuff, deserves 2 stars"),
row("user_6", "business_43", 93, 3, "2017-04-10T12:30:00Z", "Great stuff, deserves 3 stars"),
row("user_7", "business_44", 94, 4, "2017-04-11T12:30:00Z", "Great stuff, deserves 4 stars"),
row("user_8", "business_45", 95, 0, "2017-04-12T12:30:00Z", "Great stuff, deserves 0 stars"),
row("user_9", "business_46", 96, 1, "2017-04-13T12:30:00Z", "Great stuff, deserves 1 stars"),
row("user_10", "business_47", 97, 2, "2017-04-14T12:30:00Z", "Great stuff, deserves 2 stars"),
row("user_11", "business_48", 98, 3, "2017-04-15T12:30:00Z", "Great stuff, deserves 3 stars"),
row("user_12", "business_49", 99, 4, "2017-04-16T12:30:00Z", "Great stuff, deserves 4 stars"),
row("user_13", "business_21", 71, 1, "2017-03-16T12:30:00Z", "Great stuff, deserves 1 stars"),
row("user_14", "business_22", 72, 2, "2017-03-17T12:30:00Z", "Great stuff, deserves 2 stars"),
row("user_15", "business_23", 73, 3, "2017-03-18T12:30:00Z", "Great stuff, deserves 3 stars"),
row("user_16", "business_24", 74, 4, "2017-03-19T12:30:00Z", "Great stuff, deserves 4 stars"),
row("user_17", "business_25", 75, 0, "2017-03-20T12:30:00Z", "Great stuff, deserves 0 stars"),
row("user_18", "business_26", 76, 1, "2017-03-21T12:30:00Z", "Great stuff, deserves 1 stars"),
row("user_19", "business_27", 77, 2, "2017-03-22T12:30:00Z", "Great stuff, deserves 2 stars"),
row("user_20", "business_28", 78, 3, "2017-03-23T12:30:00Z", "Great stuff, deserves 3 stars"),
row("user_21", "business_29", 79, 4, "2017-03-24T12:30:00Z", "Great stuff, deserves 4 stars"),
row("user_22", "business_30", 80, 0, "2017-03-25T12:30:00Z", "Great stuff, deserves 0 stars"),
row("user_23", "business_31", 81, 1, "2017-03-26T12:30:00Z", "Great stuff, deserves 1 stars"),
row("user_24", "business_32", 82, 2, "2017-03-27T12:30:00Z", "Great stuff, deserves 2 stars"),
row("user_25", "business_33", 83, 3, "2017-03-28T12:30:00Z", "Great stuff, deserves 3 stars"),
row("user_26", "business_34", 84, 4, "2017-04-01T12:30:00Z", "Great stuff, deserves 4 stars"),
row("user_27", "business_35", 85, 0, "2017-04-02T12:30:00Z", "Great stuff, deserves 0 stars"),
row(null, "business_36", 86, 1, "2017-04-03T12:30:00Z", "Great stuff, deserves 1 stars")
};

@After
Expand Down Expand Up @@ -162,11 +169,24 @@ public void testLatestPreview() throws Exception {
GetMappingsResponse sourceIndexMapping =
restClient.indices().getMapping(new GetMappingsRequest().indices(SOURCE_INDEX_NAME), RequestOptions.DEFAULT);
assertThat(
previewResponse.getMappings().get("properties"),
// Mappings we get from preview sometimes contain redundant { "type": "object" } entries.
// We clear them here to be able to compare with the GetMappingsAction output.
clearDefaultObjectType(previewResponse.getMappings().get("properties")),
is(equalTo(sourceIndexMapping.mappings().get(SOURCE_INDEX_NAME).sourceAsMap().get("properties"))));
// Verify preview contents
assertThat(previewResponse.getDocs(), hasSize(NUM_USERS + 1));
assertThat(previewResponse.getDocs(), containsInAnyOrder(EXPECTED_DEST_INDEX_ROWS));
}
}

private static Object clearDefaultObjectType(Object obj) {
if (obj instanceof Map == false) {
return obj;
}
@SuppressWarnings("unchecked")
Map<String, Object> map = (Map<String, Object>) obj;
return map.entrySet().stream()
.filter(entry -> (entry.getKey().equals("type") && entry.getValue().equals("object")) == false)
.collect(toMap(entry -> entry.getKey(), entry -> clearDefaultObjectType(entry.getValue())));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,20 @@ protected void createReviewsIndex(String indexName,
.startObject("stars")
.field("type", "integer")
.endObject()
.startObject("regular_object")
.field("type", "object")
.endObject()
.startObject("nested_object")
.field("type", "nested")
.endObject()
.startObject("comment")
.field("type", "text")
.startObject("fields")
.startObject("keyword")
.field("type", "keyword")
.endObject()
.endObject()
.endObject()
.endObject();
}
builder.endObject();
Expand Down Expand Up @@ -374,6 +388,10 @@ protected void createReviewsIndex(String indexName,
.append(business)
.append("\",\"stars\":")
.append(stars)
.append(",\"comment\":")
.append("\"Great stuff, deserves " + stars + " stars\"")
.append(",\"regular_object\":{\"foo\": 42}")
.append(",\"nested_object\":{\"bar\": 43}")
.append(",\"timestamp\":\"")
.append(dateString)
.append("\"}");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,36 @@
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.ObjectMapper;
import org.elasticsearch.xpack.core.transform.TransformField;
import org.elasticsearch.xpack.core.transform.TransformMessages;
import org.elasticsearch.xpack.core.transform.transforms.TransformConfig;
import org.elasticsearch.xpack.core.transform.transforms.TransformDestIndexSettings;

import java.time.Clock;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static java.util.Map.Entry.comparingByKey;

public final class TransformIndex {
private static final Logger logger = LogManager.getLogger(TransformIndex.class);

/**
* The list of object types used in the mappings.
* We include {@code null} as an alternative for "object", which is the default.
*/
private static final Set<String> OBJECT_TYPES =
new HashSet<>(Arrays.asList(null, ObjectMapper.CONTENT_TYPE, ObjectMapper.NESTED_CONTENT_TYPE));
private static final String PROPERTIES = "properties";
private static final String FIELDS = "fields";
private static final String META = "_meta";

private TransformIndex() {}
Expand Down Expand Up @@ -134,10 +149,28 @@ private static Settings createSettings() {
* }
* @param mappings A Map of the form {"fieldName": "fieldType"}
*/
private static Map<String, Object> createMappingsFromStringMap(Map<String, String> mappings) {
static Map<String, Object> createMappingsFromStringMap(Map<String, String> mappings) {
List<Map.Entry<String, String>> sortedMappingsEntries = new ArrayList<>(mappings.entrySet());
// We sort the entry list to make sure that for each (parent, parent.child) pair, parent entry will be processed before child entry.
sortedMappingsEntries.sort(comparingByKey());
Map<String, Object> fieldMappings = new HashMap<>();
mappings.forEach((k, v) -> fieldMappings.put(k, Map.of("type", v)));

for (Map.Entry<String, String> entry : sortedMappingsEntries) {
String[] parts = Strings.tokenizeToStringArray(entry.getKey(), ".");
String type = entry.getValue();
Map<String, Object> current = fieldMappings;
current = diveInto(current, parts[0]);
for (int j = 1; j < parts.length; ++j) {
// Here we decide whether a dot ('.') means inner object or a multi-field.
current = diveInto(current, OBJECT_TYPES.contains(current.get("type")) ? PROPERTIES : FIELDS);
current = diveInto(current, parts[j]);
}
current.put("type", type);
}
return fieldMappings;
}

@SuppressWarnings("unchecked")
private static Map<String, Object> diveInto(Map<String, Object> map, String key) {
return (Map<String, Object>) map.computeIfAbsent(key, k -> new HashMap<>());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,12 @@
import java.util.HashMap;
import java.util.Map;

import static java.util.Collections.emptyMap;
import static java.util.Collections.singletonMap;
import static org.elasticsearch.common.xcontent.support.XContentMapValues.extractValue;
import static org.hamcrest.Matchers.anEmptyMap;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.doAnswer;
Expand Down Expand Up @@ -68,4 +72,100 @@ public void testCreateDestinationIndex() throws IOException {
assertThat(extractValue("_doc._meta.created_by", map), equalTo(CREATED_BY));
}
}

public void testCreateMappingsFromStringMap() {
assertThat(TransformIndex.createMappingsFromStringMap(emptyMap()), is(anEmptyMap()));
assertThat(
TransformIndex.createMappingsFromStringMap(singletonMap("a", "long")),
is(equalTo(singletonMap("a", singletonMap("type", "long"))))
);
assertThat(
TransformIndex.createMappingsFromStringMap(new HashMap<>() {{
put("a", "long");
put("b", "keyword");
}}),
is(equalTo(new HashMap<>() {{
put("a", singletonMap("type", "long"));
put("b", singletonMap("type", "keyword"));
}}))
);
assertThat(
TransformIndex.createMappingsFromStringMap(new HashMap<>() {{
put("a", "long");
put("a.b", "keyword");
}}),
is(equalTo(new HashMap<>() {{
put("a", new HashMap<>() {{
put("type", "long");
put("fields", singletonMap("b", singletonMap("type", "keyword")));
}});
}}))
);
assertThat(
TransformIndex.createMappingsFromStringMap(new HashMap<>() {{
put("a", "long");
put("a.b", "text");
put("a.b.c", "keyword");
}}),
is(equalTo(new HashMap<>() {{
put("a", new HashMap<>() {{
put("type", "long");
put("fields", new HashMap<>() {{
put("b", new HashMap<>() {{
put("type", "text");
put("fields", new HashMap<>() {{
put("c", singletonMap("type", "keyword"));
}});
}});
}});
}});
}}))
);
assertThat(
TransformIndex.createMappingsFromStringMap(new HashMap<>() {{
put("a", "object");
put("a.b", "long");
put("c", "nested");
put("c.d", "boolean");
put("f", "object");
put("f.g", "object");
put("f.g.h", "text");
put("f.g.h.i", "text");
}}),
is(equalTo(new HashMap<>() {{
put("a", new HashMap<>() {{
put("type", "object");
put("properties", new HashMap<>() {{
put("b", new HashMap<>() {{
put("type", "long");
}});
}});
}});
put("c", new HashMap<>() {{
put("type", "nested");
put("properties", new HashMap<>() {{
put("d", new HashMap<>() {{
put("type", "boolean");
}});
}});
}});
put("f", new HashMap<>() {{
put("type", "object");
put("properties", new HashMap<>() {{
put("g", new HashMap<>() {{
put("type", "object");
put("properties", new HashMap<>() {{
put("h", new HashMap<>() {{
put("type", "text");
put("fields", new HashMap<>() {{
put("i", singletonMap("type", "text"));
}});
}});
}});
}});
}});
}});
}}))
);
}
}

0 comments on commit 81343ac

Please sign in to comment.