From 88c388491267d548f956c66aa4c3a7ba0522c420 Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Fri, 6 Sep 2024 19:13:06 +0200 Subject: [PATCH 1/3] Support widening of numeric types in union-types Only two lines of this PR are the actual fix. All the rest is updating the CSV-spec testing infrastructure to make it easier to test this, and adding the tests. The refactoring involve some cleanup and simplifications also. This update allows us to add alternative mappings of existing data files without copying the files and changing the header line. Some of the existing union-types test files were deleted as a result, which is a step in the right direction. --- .../plugin/esql/qa/testFixtures/build.gradle | 22 +- .../xpack/esql/CsvTestUtils.java | 19 +- .../xpack/esql/CsvTestsDataLoader.java | 219 ++++++++---------- .../resources/mapping-sample_data_str.json | 16 -- .../mapping-sample_data_ts_long.json | 16 -- .../src/main/resources/sample_data_str.csv | 8 - .../src/main/resources/union_types.csv-spec | 51 ++++ .../xpack/esql/action/EsqlCapabilities.java | 5 + .../xpack/esql/analysis/Analyzer.java | 4 +- .../convert/AbstractConvertFunction.java | 2 +- .../elasticsearch/xpack/esql/CsvTests.java | 20 +- 11 files changed, 195 insertions(+), 187 deletions(-) delete mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_str.json delete mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_ts_long.json delete mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_str.csv diff --git a/x-pack/plugin/esql/qa/testFixtures/build.gradle b/x-pack/plugin/esql/qa/testFixtures/build.gradle index e8a95011100f5..b6ed610406631 100644 --- a/x-pack/plugin/esql/qa/testFixtures/build.gradle +++ b/x-pack/plugin/esql/qa/testFixtures/build.gradle @@ -2,16 +2,18 @@ apply plugin: 'elasticsearch.java' apply plugin: org.elasticsearch.gradle.dependencies.CompileOnlyResolvePlugin dependencies { - implementation project(':x-pack:plugin:esql:compute') - implementation project(':x-pack:plugin:esql') - compileOnly project(path: xpackModule('core')) - implementation project(":libs:elasticsearch-x-content") - implementation project(':client:rest') - implementation project(':libs:elasticsearch-logging') - implementation project(':test:framework') - api(testArtifact(project(xpackModule('esql-core')))) - implementation project(':server') - implementation "net.sf.supercsv:super-csv:${versions.supercsv}" + implementation project(':x-pack:plugin:esql:compute') + implementation project(':x-pack:plugin:esql') + compileOnly project(path: xpackModule('core')) + implementation project(":libs:elasticsearch-x-content") + implementation project(':client:rest') + implementation project(':libs:elasticsearch-logging') + implementation project(':test:framework') + api(testArtifact(project(xpackModule('esql-core')))) + implementation project(':server') + implementation "net.sf.supercsv:super-csv:${versions.supercsv}" + implementation "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" + implementation "com.fasterxml.jackson.core:jackson-databind:${versions.jackson}" } /** diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java index c934a8926ee7e..70a054f233a3c 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java @@ -118,7 +118,7 @@ public static Tuple skipVersionRange(String testName, String i return null; } - public static Tuple> loadPageFromCsv(URL source) throws Exception { + public static Tuple> loadPageFromCsv(URL source, Map typeMapping) throws Exception { record CsvColumn(String name, Type type, BuilderWrapper builderWrapper) implements Releasable { void append(String stringValue) { @@ -164,21 +164,16 @@ public void close() { if (columns == null) { columns = new CsvColumn[entries.length]; for (int i = 0; i < entries.length; i++) { - int split = entries[i].indexOf(':'); - String name, typeName; + String[] header = entries[i].split(":"); + String name = header[0].trim(); + String typeName = (typeMapping != null && typeMapping.containsKey(name)) ? typeMapping.get(name) + : header.length > 1 ? header[1].trim() + : null; - if (split < 0) { + if (typeName == null || typeName.isEmpty()) { throw new IllegalArgumentException( "A type is always expected in the schema definition; found " + entries[i] ); - } else { - name = entries[i].substring(0, split).trim(); - typeName = entries[i].substring(split + 1).trim(); - if (typeName.length() == 0) { - throw new IllegalArgumentException( - "A type is always expected in the schema definition; found " + entries[i] - ); - } } Type type = Type.asType(typeName); if (type == null) { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 9ee22113a4244..068adf190653a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -6,6 +6,10 @@ */ package org.elasticsearch.xpack.esql; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; + import org.apache.http.HttpEntity; import org.apache.http.HttpHost; import org.apache.http.auth.AuthScope; @@ -17,20 +21,13 @@ import org.elasticsearch.client.Response; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestClientBuilder; -import org.elasticsearch.cluster.ClusterModule; -import org.elasticsearch.common.CheckedBiFunction; import org.elasticsearch.common.Strings; import org.elasticsearch.common.logging.LogConfigurator; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; import org.elasticsearch.test.rest.ESRestTestCase; -import org.elasticsearch.xcontent.NamedXContentRegistry; -import org.elasticsearch.xcontent.XContent; -import org.elasticsearch.xcontent.XContentParser; -import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xcontent.XContentType; import java.io.BufferedReader; @@ -51,66 +48,43 @@ public class CsvTestsDataLoader { private static final int BULK_DATA_SIZE = 100_000; - private static final TestsDataset EMPLOYEES = new TestsDataset("employees", "mapping-default.json", "employees.csv", null, false); - private static final TestsDataset HOSTS = new TestsDataset("hosts", "mapping-hosts.json", "hosts.csv"); - private static final TestsDataset APPS = new TestsDataset("apps", "mapping-apps.json", "apps.csv"); - private static final TestsDataset LANGUAGES = new TestsDataset("languages", "mapping-languages.json", "languages.csv"); - private static final TestsDataset ALERTS = new TestsDataset("alerts", "mapping-alerts.json", "alerts.csv"); - private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs", "mapping-ul_logs.json", "ul_logs.csv"); - private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data", "mapping-sample_data.json", "sample_data.csv"); - private static final TestsDataset SAMPLE_DATA_STR = new TestsDataset( - "sample_data_str", - "mapping-sample_data_str.json", - "sample_data_str.csv" - ); - private static final TestsDataset SAMPLE_DATA_TS_LONG = new TestsDataset( - "sample_data_ts_long", - "mapping-sample_data_ts_long.json", - "sample_data_ts_long.csv" - ); - private static final TestsDataset MISSING_IP_SAMPLE_DATA = new TestsDataset( - "missing_ip_sample_data", - "mapping-missing_ip_sample_data.json", - "missing_ip_sample_data.csv" - ); - private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips", "mapping-clientips.json", "clientips.csv"); - private static final TestsDataset CLIENT_CIDR = new TestsDataset("client_cidr", "mapping-client_cidr.json", "client_cidr.csv"); - private static final TestsDataset AGES = new TestsDataset("ages", "mapping-ages.json", "ages.csv"); - private static final TestsDataset HEIGHTS = new TestsDataset("heights", "mapping-heights.json", "heights.csv"); - private static final TestsDataset DECADES = new TestsDataset("decades", "mapping-decades.json", "decades.csv"); - private static final TestsDataset AIRPORTS = new TestsDataset("airports", "mapping-airports.json", "airports.csv"); - private static final TestsDataset AIRPORTS_MP = new TestsDataset("airports_mp", "mapping-airports.json", "airports_mp.csv"); - private static final TestsDataset AIRPORTS_WEB = new TestsDataset("airports_web", "mapping-airports_web.json", "airports_web.csv"); - private static final TestsDataset DATE_NANOS = new TestsDataset("date_nanos", "mapping-date_nanos.json", "date_nanos.csv"); - private static final TestsDataset COUNTRIES_BBOX = new TestsDataset( - "countries_bbox", - "mapping-countries_bbox.json", - "countries_bbox.csv" - ); - private static final TestsDataset COUNTRIES_BBOX_WEB = new TestsDataset( - "countries_bbox_web", - "mapping-countries_bbox_web.json", - "countries_bbox_web.csv" - ); - private static final TestsDataset AIRPORT_CITY_BOUNDARIES = new TestsDataset( - "airport_city_boundaries", - "mapping-airport_city_boundaries.json", - "airport_city_boundaries.csv" - ); - private static final TestsDataset CARTESIAN_MULTIPOLYGONS = new TestsDataset( - "cartesian_multipolygons", - "mapping-cartesian_multipolygons.json", - "cartesian_multipolygons.csv" - ); - private static final TestsDataset DISTANCES = new TestsDataset("distances", "mapping-distances.json", "distances.csv"); - private static final TestsDataset K8S = new TestsDataset("k8s", "k8s-mappings.json", "k8s.csv", "k8s-settings.json", true); - private static final TestsDataset ADDRESSES = new TestsDataset("addresses", "mapping-addresses.json", "addresses.csv", null, true); - private static final TestsDataset BOOKS = new TestsDataset("books", "mapping-books.json", "books.csv", null, true); + private static final TestsDataset EMPLOYEES = new TestsDataset("employees", "mapping-default.json", "employees.csv").noSubfields(); + private static final TestsDataset HOSTS = new TestsDataset("hosts"); + private static final TestsDataset APPS = new TestsDataset("apps"); + private static final TestsDataset APPS_SHORT = APPS.withIndex("apps_short").withTypeMapping(Map.of("id", "short")); + private static final TestsDataset LANGUAGES = new TestsDataset("languages"); + private static final TestsDataset ALERTS = new TestsDataset("alerts"); + private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs"); + private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data"); + private static final TestsDataset SAMPLE_DATA_STR = SAMPLE_DATA.withIndex("sample_data_str") + .withTypeMapping(Map.of("client_ip", "keyword")); + private static final TestsDataset SAMPLE_DATA_TS_LONG = SAMPLE_DATA.withIndex("sample_data_ts_long") + .withData("sample_data_ts_long.csv") + .withTypeMapping(Map.of("@timestamp", "long")); + private static final TestsDataset MISSING_IP_SAMPLE_DATA = new TestsDataset("missing_ip_sample_data"); + private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips"); + private static final TestsDataset CLIENT_CIDR = new TestsDataset("client_cidr"); + private static final TestsDataset AGES = new TestsDataset("ages"); + private static final TestsDataset HEIGHTS = new TestsDataset("heights"); + private static final TestsDataset DECADES = new TestsDataset("decades"); + private static final TestsDataset AIRPORTS = new TestsDataset("airports"); + private static final TestsDataset AIRPORTS_MP = AIRPORTS.withIndex("airports_mp").withData("airports_mp.csv"); + private static final TestsDataset AIRPORTS_WEB = new TestsDataset("airports_web"); + private static final TestsDataset DATE_NANOS = new TestsDataset("date_nanos"); + private static final TestsDataset COUNTRIES_BBOX = new TestsDataset("countries_bbox"); + private static final TestsDataset COUNTRIES_BBOX_WEB = new TestsDataset("countries_bbox_web"); + private static final TestsDataset AIRPORT_CITY_BOUNDARIES = new TestsDataset("airport_city_boundaries"); + private static final TestsDataset CARTESIAN_MULTIPOLYGONS = new TestsDataset("cartesian_multipolygons"); + private static final TestsDataset DISTANCES = new TestsDataset("distances"); + private static final TestsDataset K8S = new TestsDataset("k8s", "k8s-mappings.json", "k8s.csv").withSetting("k8s-settings.json"); + private static final TestsDataset ADDRESSES = new TestsDataset("addresses"); + private static final TestsDataset BOOKS = new TestsDataset("books"); public static final Map CSV_DATASET_MAP = Map.ofEntries( Map.entry(EMPLOYEES.indexName, EMPLOYEES), Map.entry(HOSTS.indexName, HOSTS), Map.entry(APPS.indexName, APPS), + Map.entry(APPS_SHORT.indexName, APPS_SHORT), Map.entry(LANGUAGES.indexName, LANGUAGES), Map.entry(UL_LOGS.indexName, UL_LOGS), Map.entry(SAMPLE_DATA.indexName, SAMPLE_DATA), @@ -258,18 +232,8 @@ public static void loadDataSetIntoEs(RestClient client, Logger logger) throws IO } private static void loadDataSetIntoEs(RestClient client, Logger logger, IndexCreator indexCreator) throws IOException { - for (var dataSet : CSV_DATASET_MAP.values()) { - final String settingName = dataSet.settingFileName != null ? "/" + dataSet.settingFileName : null; - load( - client, - dataSet.indexName, - "/" + dataSet.mappingFileName, - settingName, - "/" + dataSet.dataFileName, - dataSet.allowSubFields, - logger, - indexCreator - ); + for (var dataset : CSV_DATASET_MAP.values()) { + load(client, dataset, logger, indexCreator); } forceMerge(client, CSV_DATASET_MAP.keySet(), logger); for (var policy : ENRICH_POLICIES) { @@ -291,32 +255,51 @@ private static void loadEnrichPolicy(RestClient client, String policyName, Strin client.performRequest(request); } - private static void load( - RestClient client, - String indexName, - String mappingName, - String settingName, - String dataName, - boolean allowSubFields, - Logger logger, - IndexCreator indexCreator - ) throws IOException { + private static void load(RestClient client, TestsDataset dataset, Logger logger, IndexCreator indexCreator) throws IOException { + final String mappingName = "/" + dataset.mappingFileName; URL mapping = CsvTestsDataLoader.class.getResource(mappingName); if (mapping == null) { throw new IllegalArgumentException("Cannot find resource " + mappingName); } + final String dataName = "/" + dataset.dataFileName; URL data = CsvTestsDataLoader.class.getResource(dataName); if (data == null) { throw new IllegalArgumentException("Cannot find resource " + dataName); } Settings indexSettings = Settings.EMPTY; + final String settingName = dataset.settingFileName != null ? "/" + dataset.settingFileName : null; if (settingName != null) { indexSettings = Settings.builder() .loadFromStream(settingName, CsvTestsDataLoader.class.getResourceAsStream(settingName), false) .build(); } - indexCreator.createIndex(client, indexName, readTextFile(mapping), indexSettings); - loadCsvData(client, indexName, data, allowSubFields, CsvTestsDataLoader::createParser, logger); + indexCreator.createIndex(client, dataset.indexName, readMappingFile(mapping, dataset.typeMapping), indexSettings); + loadCsvData(client, dataset.indexName, data, dataset.allowSubFields, logger); + } + + private static String readMappingFile(URL resource, Map typeMapping) throws IOException { + String mappingJsonText = readTextFile(resource); + if (typeMapping == null || typeMapping.isEmpty()) { + return mappingJsonText; + } + boolean modified = false; + ObjectMapper mapper = new ObjectMapper(); + JsonNode mappingNode = mapper.readTree(mappingJsonText); + JsonNode propertiesNode = mappingNode.path("properties"); + + for (Map.Entry entry : typeMapping.entrySet()) { + String key = entry.getKey(); + String newType = entry.getValue(); + + if (propertiesNode.has(key)) { + modified = true; + ((ObjectNode) propertiesNode.get(key)).put("type", newType); + } + } + if (modified) { + return mapper.writerWithDefaultPrettyPrinter().writeValueAsString(mappingNode); + } + return mappingJsonText; } public static String readTextFile(URL resource) throws IOException { @@ -345,14 +328,8 @@ public static String readTextFile(URL resource) throws IOException { * - multi-values are comma separated * - commas inside multivalue fields can be escaped with \ (backslash) character */ - private static void loadCsvData( - RestClient client, - String indexName, - URL resource, - boolean allowSubFields, - CheckedBiFunction p, - Logger logger - ) throws IOException { + private static void loadCsvData(RestClient client, String indexName, URL resource, boolean allowSubFields, Logger logger) + throws IOException { ArrayList failures = new ArrayList<>(); StringBuilder builder = new StringBuilder(); try (BufferedReader reader = reader(resource)) { @@ -371,27 +348,17 @@ private static void loadCsvData( columns = new String[entries.length]; for (int i = 0; i < entries.length; i++) { int split = entries[i].indexOf(':'); - String name, typeName; - if (split < 0) { - throw new IllegalArgumentException( - "A type is always expected in the schema definition; found " + entries[i] - ); + columns[i] = entries[i].trim(); } else { - name = entries[i].substring(0, split).trim(); + String name = entries[i].substring(0, split).trim(); if (allowSubFields || name.contains(".") == false) { - typeName = entries[i].substring(split + 1).trim(); - if (typeName.isEmpty()) { - throw new IllegalArgumentException( - "A type is always expected in the schema definition; found " + entries[i] - ); - } + columns[i] = name; } else {// if it's a subfield, ignore it in the _bulk request - name = null; + columns[i] = null; subFieldsIndices.add(i); } } - columns[i] = name; } } // data rows @@ -534,22 +501,40 @@ private static void forceMerge(RestClient client, Set indices, Logger lo } } - private static XContentParser createParser(XContent xContent, InputStream data) throws IOException { - NamedXContentRegistry contentRegistry = new NamedXContentRegistry(ClusterModule.getNamedXWriteables()); - XContentParserConfiguration config = XContentParserConfiguration.EMPTY.withRegistry(contentRegistry) - .withDeprecationHandler(LoggingDeprecationHandler.INSTANCE); - return xContent.createParser(config, data); - } - public record TestsDataset( String indexName, String mappingFileName, String dataFileName, String settingFileName, - boolean allowSubFields + boolean allowSubFields, + Map typeMapping ) { public TestsDataset(String indexName, String mappingFileName, String dataFileName) { - this(indexName, mappingFileName, dataFileName, null, true); + this(indexName, mappingFileName, dataFileName, null, true, null); + } + + public TestsDataset(String indexName) { + this(indexName, "mapping-" + indexName + ".json", indexName + ".csv", null, true, null); + } + + public TestsDataset withIndex(String indexName) { + return new TestsDataset(indexName, mappingFileName, dataFileName, settingFileName, allowSubFields, typeMapping); + } + + public TestsDataset withData(String dataFileName) { + return new TestsDataset(indexName, mappingFileName, dataFileName, settingFileName, allowSubFields, typeMapping); + } + + public TestsDataset withSetting(String settingFileName) { + return new TestsDataset(indexName, mappingFileName, dataFileName, settingFileName, allowSubFields, typeMapping); + } + + public TestsDataset noSubfields() { + return new TestsDataset(indexName, mappingFileName, dataFileName, settingFileName, false, typeMapping); + } + + public TestsDataset withTypeMapping(Map typeMapping) { + return new TestsDataset(indexName, mappingFileName, dataFileName, settingFileName, allowSubFields, typeMapping); } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_str.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_str.json deleted file mode 100644 index 9e97de8c92928..0000000000000 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_str.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "properties": { - "@timestamp": { - "type": "date" - }, - "client_ip": { - "type": "keyword" - }, - "event_duration": { - "type": "long" - }, - "message": { - "type": "keyword" - } - } -} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_ts_long.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_ts_long.json deleted file mode 100644 index ecf21a2a919d0..0000000000000 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_ts_long.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "properties": { - "@timestamp": { - "type": "long" - }, - "client_ip": { - "type": "ip" - }, - "event_duration": { - "type": "long" - }, - "message": { - "type": "keyword" - } - } -} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_str.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_str.csv deleted file mode 100644 index bc98671adc7ff..0000000000000 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_str.csv +++ /dev/null @@ -1,8 +0,0 @@ -@timestamp:date,client_ip:keyword,event_duration:long,message:keyword -2023-10-23T13:55:01.543Z,172.21.3.15,1756467,Connected to 10.1.0.1 -2023-10-23T13:53:55.832Z,172.21.3.15,5033755,Connection error -2023-10-23T13:52:55.015Z,172.21.3.15,8268153,Connection error -2023-10-23T13:51:54.732Z,172.21.3.15,725448,Connection error -2023-10-23T13:33:34.937Z,172.21.0.5,1232382,Disconnected -2023-10-23T12:27:28.948Z,172.21.2.113,2764889,Connected to 10.1.0.2 -2023-10-23T12:15:03.360Z,172.21.2.162,3450233,Connected to 10.1.0.3 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec index c6a2d47a78dc9..3218962678d9f 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec @@ -1351,3 +1351,54 @@ FROM sample_data, sample_data_ts_long null | 172.21.0.5 | 1232382 | Disconnected | Disconnected null | 172.21.0.5 | 1232382 | Disconnected | Disconnected ; + +shortIntegerWidening +required_capability: union_types +required_capability: metadata_fields +required_capability: casting_operator +required_capability: union_types_numeric_widening + +FROM apps, apps_short METADATA _index +| EVAL id = id::integer +| KEEP _index, id, version, name +| WHERE name == "aaaaa" OR name == "hhhhh" +| SORT _index ASC, id ASC +; + +_index:keyword | id:integer | version:version | name:keyword +apps | 1 | 1 | aaaaa +apps | 8 | 1.2.3.4 | hhhhh +apps | 12 | 1.2.3.4 | aaaaa +apps_short | 1 | 1 | aaaaa +apps_short | 8 | 1.2.3.4 | hhhhh +apps_short | 12 | 1.2.3.4 | aaaaa +; + +shortIntegerWideningStats +required_capability: union_types +required_capability: casting_operator +required_capability: union_types_numeric_widening + +FROM apps, apps_short +| EVAL id = id::integer +| STATS count=count() BY name, id +| KEEP id, name, count +| SORT id ASC, name ASC +; + +id:integer | name:keyword | count:long +1 | aaaaa | 2 +2 | bbbbb | 2 +3 | ccccc | 2 +4 | ddddd | 2 +5 | eeeee | 2 +6 | fffff | 2 +7 | ggggg | 2 +8 | hhhhh | 2 +9 | iiiii | 2 +10 | jjjjj | 2 +11 | kkkkk | 2 +12 | aaaaa | 2 +13 | lllll | 2 +14 | mmmmm | 2 +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 120323ebeb7a6..83aa3df090fed 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -188,6 +188,11 @@ public enum Cap { */ UNION_TYPES_MISSING_FIELD, + /** + * Fix for widening of short numeric types in union-types. + */ + UNION_TYPES_NUMERIC_WIDENING, + /** * Fix a parsing issue where numbers below Long.MIN_VALUE threw an exception instead of parsing as doubles. * see Parsing large numbers is inconsistent #104323 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 4f9ef3df29a85..9288e1cf81a15 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -115,7 +115,6 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; -import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; import static org.elasticsearch.xpack.esql.core.type.DataType.isTemporalAmount; import static org.elasticsearch.xpack.esql.stats.FeatureMetric.LIMIT; @@ -1223,8 +1222,7 @@ private Expression resolveConvertFunction(AbstractConvertFunction convert, List< HashMap typeResolutions = new HashMap<>(); Set supportedTypes = convert.supportedTypes(); imf.types().forEach(type -> { - // TODO: Shouldn't we perform widening of small numerical types here? - if (supportedTypes.contains(type)) { + if (supportedTypes.contains(type.widenSmallNumeric())) { TypeResolutionKey key = new TypeResolutionKey(fa.name(), type); var concreteConvert = typeSpecificConvert(convert, fa.source(), type, imf); typeResolutions.put(key, concreteConvert); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java index cf97558cd2676..18b1f80c4abc1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java @@ -63,7 +63,7 @@ protected AbstractConvertFunction(StreamInput in) throws IOException { * Build the evaluator given the evaluator a multivalued field. */ protected final ExpressionEvaluator.Factory evaluator(ExpressionEvaluator.Factory fieldEval) { - DataType sourceType = field().dataType(); + DataType sourceType = field().dataType().widenSmallNumeric(); var factory = factories().get(sourceType); if (factory == null) { throw EsqlIllegalArgumentException.illegalDataType(sourceType); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index a7d8c98a606b5..faf9d04532f1a 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -54,6 +54,8 @@ import org.elasticsearch.xpack.esql.analysis.EnrichResolution; import org.elasticsearch.xpack.esql.analysis.PreAnalyzer; import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.enrich.EnrichLookupService; import org.elasticsearch.xpack.esql.enrich.ResolvedEnrichPolicy; import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; @@ -308,8 +310,18 @@ protected void assertResults(ExpectedResults expected, ActualResults actual, boo // CsvTestUtils.logData(actual.values(), LOGGER); } - private static IndexResolution loadIndexResolution(String mappingName, String indexName) { + private static IndexResolution loadIndexResolution(String mappingName, String indexName, Map typeMapping) { var mapping = new TreeMap<>(loadMapping(mappingName)); + if ((typeMapping == null || typeMapping.isEmpty()) == false) { + for (var entry : typeMapping.entrySet()) { + if (mapping.containsKey(entry.getKey())) { + DataType dataType = DataType.fromTypeName(entry.getValue()); + EsField field = mapping.get(entry.getKey()); + EsField editedField = new EsField(field.getName(), dataType, field.getProperties(), field.isAggregatable()); + mapping.put(entry.getKey(), editedField); + } + } + } return IndexResolution.valid(new EsIndex(indexName, mapping, Map.of(indexName, IndexMode.STANDARD))); } @@ -320,7 +332,7 @@ private static EnrichResolution loadEnrichPolicies() { CsvTestsDataLoader.TestsDataset sourceIndex = CSV_DATASET_MAP.get(policy.getIndices().get(0)); // this could practically work, but it's wrong: // EnrichPolicyResolution should contain the policy (system) index, not the source index - EsIndex esIndex = loadIndexResolution(sourceIndex.mappingFileName(), sourceIndex.indexName()).get(); + EsIndex esIndex = loadIndexResolution(sourceIndex.mappingFileName(), sourceIndex.indexName(), null).get(); var concreteIndices = Map.of(RemoteClusterService.LOCAL_CLUSTER_GROUP_KEY, Iterables.get(esIndex.concreteIndices(), 0)); enrichResolution.addResolvedPolicy( policyConfig.policyName(), @@ -349,7 +361,7 @@ private static EnrichPolicy loadEnrichPolicyMapping(String policyFileName) { } private LogicalPlan analyzedPlan(LogicalPlan parsed, CsvTestsDataLoader.TestsDataset dataset) { - var indexResolution = loadIndexResolution(dataset.mappingFileName(), dataset.indexName()); + var indexResolution = loadIndexResolution(dataset.mappingFileName(), dataset.indexName(), dataset.typeMapping()); var enrichPolicies = loadEnrichPolicies(); var analyzer = new Analyzer(new AnalyzerContext(configuration, functionRegistry, indexResolution, enrichPolicies), TEST_VERIFIER); LogicalPlan plan = analyzer.analyze(parsed); @@ -392,7 +404,7 @@ private static CsvTestsDataLoader.TestsDataset testsDataset(LogicalPlan parsed) } private static TestPhysicalOperationProviders testOperationProviders(CsvTestsDataLoader.TestsDataset dataset) throws Exception { - var testData = loadPageFromCsv(CsvTests.class.getResource("/" + dataset.dataFileName())); + var testData = loadPageFromCsv(CsvTests.class.getResource("/" + dataset.dataFileName()), dataset.typeMapping()); return new TestPhysicalOperationProviders(testData.v1(), testData.v2()); } From ad0bf9ef0628c3d9a61900462d93bc0c28ae649d Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Fri, 6 Sep 2024 19:16:59 +0200 Subject: [PATCH 2/3] Update docs/changelog/112610.yaml --- docs/changelog/112610.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 docs/changelog/112610.yaml diff --git a/docs/changelog/112610.yaml b/docs/changelog/112610.yaml new file mode 100644 index 0000000000000..3d67a80a8f0b3 --- /dev/null +++ b/docs/changelog/112610.yaml @@ -0,0 +1,6 @@ +pr: 112610 +summary: Support widening of numeric types in union-types +area: ES|QL +type: bug +issues: + - 111277 From b06894c9095ded78d95bd7c3048c0565a4e520de Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Fri, 6 Sep 2024 19:17:57 +0200 Subject: [PATCH 3/3] Link capability to PR --- .../org/elasticsearch/xpack/esql/action/EsqlCapabilities.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 83aa3df090fed..06dcea55499da 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -189,7 +189,7 @@ public enum Cap { UNION_TYPES_MISSING_FIELD, /** - * Fix for widening of short numeric types in union-types. + * Fix for widening of short numeric types in union-types. Done in #112610 */ UNION_TYPES_NUMERIC_WIDENING,