elastic · craigtaverner · Sep 11, 2024 · Sep 6, 2024 · Sep 6, 2024 · Sep 6, 2024
diff --git a/docs/changelog/112610.yaml b/docs/changelog/112610.yaml
@@ -0,0 +1,6 @@
+pr: 112610
+summary: Support widening of numeric types in union-types
+area: ES|QL
+type: bug
+issues:
+ - 111277
diff --git a/x-pack/plugin/esql/qa/testFixtures/build.gradle b/x-pack/plugin/esql/qa/testFixtures/build.gradle
@@ -2,16 +2,18 @@ apply plugin: 'elasticsearch.java'
 apply plugin: org.elasticsearch.gradle.dependencies.CompileOnlyResolvePlugin
 
 dependencies {
-    implementation project(':x-pack:plugin:esql:compute')
-    implementation project(':x-pack:plugin:esql')
-    compileOnly project(path: xpackModule('core'))
-    implementation project(":libs:elasticsearch-x-content")
-    implementation project(':client:rest')
-    implementation project(':libs:elasticsearch-logging')
-    implementation project(':test:framework')
-    api(testArtifact(project(xpackModule('esql-core'))))
-    implementation project(':server')
-    implementation "net.sf.supercsv:super-csv:${versions.supercsv}"
+  implementation project(':x-pack:plugin:esql:compute')
+  implementation project(':x-pack:plugin:esql')
+  compileOnly project(path: xpackModule('core'))
+  implementation project(":libs:elasticsearch-x-content")
+  implementation project(':client:rest')
+  implementation project(':libs:elasticsearch-logging')
+  implementation project(':test:framework')
+  api(testArtifact(project(xpackModule('esql-core'))))
+  implementation project(':server')
+  implementation "net.sf.supercsv:super-csv:${versions.supercsv}"
+  implementation "com.fasterxml.jackson.core:jackson-core:${versions.jackson}"
+  implementation "com.fasterxml.jackson.core:jackson-databind:${versions.jackson}"
 }
 
 /**

diff --git a/.../plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java b/.../plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java
@@ -118,7 +118,7 @@ public static Tuple<Version, Version> skipVersionRange(String testName, String i
         return null;
     }
 
-    public static Tuple<Page, List<String>> loadPageFromCsv(URL source) throws Exception {
+    public static Tuple<Page, List<String>> loadPageFromCsv(URL source, Map<String, String> typeMapping) throws Exception {
 
         record CsvColumn(String name, Type type, BuilderWrapper builderWrapper) implements Releasable {
             void append(String stringValue) {
@@ -164,21 +164,16 @@ public void close() {
                     if (columns == null) {
                         columns = new CsvColumn[entries.length];
                         for (int i = 0; i < entries.length; i++) {
-                            int split = entries[i].indexOf(':');
-                            String name, typeName;
+                            String[] header = entries[i].split(":");
+                            String name = header[0].trim();
+                            String typeName = (typeMapping != null && typeMapping.containsKey(name)) ? typeMapping.get(name)
+                                : header.length > 1 ? header[1].trim()
+                                : null;
 
-                            if (split < 0) {
+                            if (typeName == null || typeName.isEmpty()) {
                                 throw new IllegalArgumentException(
                                     "A type is always expected in the schema definition; found " + entries[i]
                                 );
-                            } else {
-                                name = entries[i].substring(0, split).trim();
-                                typeName = entries[i].substring(split + 1).trim();
-                                if (typeName.length() == 0) {
-                                    throw new IllegalArgumentException(
-                                        "A type is always expected in the schema definition; found " + entries[i]
-                                    );
-                                }
                             }
                             Type type = Type.asType(typeName);
                             if (type == null) {

diff --git a/...n/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/...n/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_str.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_str.json
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_ts_long.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_ts_long.json
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_str.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_str.csv
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec
@@ -1351,3 +1351,54 @@ FROM sample_data, sample_data_ts_long
            null | 172.21.0.5   |             1232382 | Disconnected    | Disconnected
            null | 172.21.0.5   |             1232382 | Disconnected    | Disconnected
 ;
+
+shortIntegerWidening
+required_capability: union_types
+required_capability: metadata_fields
+required_capability: casting_operator
+required_capability: union_types_numeric_widening
+
+FROM apps, apps_short METADATA _index
+| EVAL id = id::integer
+| KEEP _index, id, version, name
+| WHERE name == "aaaaa" OR name == "hhhhh"
+| SORT _index ASC, id ASC
+;
+
+_index:keyword | id:integer  | version:version  | name:keyword
+apps           | 1           | 1                | aaaaa
+apps           | 8           | 1.2.3.4          | hhhhh
+apps           | 12          | 1.2.3.4          | aaaaa
+apps_short     | 1           | 1                | aaaaa
+apps_short     | 8           | 1.2.3.4          | hhhhh
+apps_short     | 12          | 1.2.3.4          | aaaaa
+;
+
+shortIntegerWideningStats
+required_capability: union_types
+required_capability: casting_operator
+required_capability: union_types_numeric_widening
+
+FROM apps, apps_short
+| EVAL id = id::integer
+| STATS count=count() BY name, id
+| KEEP id, name, count
+| SORT id ASC, name ASC
+;
+
+id:integer | name:keyword | count:long
+1          | aaaaa        | 2
+2          | bbbbb        | 2
+3          | ccccc        | 2
+4          | ddddd        | 2
+5          | eeeee        | 2
+6          | fffff        | 2
+7          | ggggg        | 2
+8          | hhhhh        | 2
+9          | iiiii        | 2
+10         | jjjjj        | 2
+11         | kkkkk        | 2
+12         | aaaaa        | 2
+13         | lllll        | 2
+14         | mmmmm        | 2
+;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -188,6 +188,11 @@ public enum Cap {
          */
         UNION_TYPES_MISSING_FIELD,
 
+        /**
+         * Fix for widening of short numeric types in union-types. Done in #112610
+         */
+        UNION_TYPES_NUMERIC_WIDENING,
+
         /**
          * Fix a parsing issue where numbers below Long.MIN_VALUE threw an exception instead of parsing as doubles.
          * see <a href="https://github.com/elastic/elasticsearch/issues/104323"> Parsing large numbers is inconsistent #104323 </a>

diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java
@@ -115,7 +115,6 @@
 import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;
 import static org.elasticsearch.xpack.esql.core.type.DataType.LONG;
 import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
-import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG;
 import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION;
 import static org.elasticsearch.xpack.esql.core.type.DataType.isTemporalAmount;
 import static org.elasticsearch.xpack.esql.stats.FeatureMetric.LIMIT;
@@ -1223,8 +1222,7 @@ private Expression resolveConvertFunction(AbstractConvertFunction convert, List<
                 HashMap<TypeResolutionKey, Expression> typeResolutions = new HashMap<>();
                 Set<DataType> supportedTypes = convert.supportedTypes();
                 imf.types().forEach(type -> {
-                    // TODO: Shouldn't we perform widening of small numerical types here?
-                    if (supportedTypes.contains(type)) {
+                    if (supportedTypes.contains(type.widenSmallNumeric())) {
                         TypeResolutionKey key = new TypeResolutionKey(fa.name(), type);
                         var concreteConvert = typeSpecificConvert(convert, fa.source(), type, imf);
                         typeResolutions.put(key, concreteConvert);

diff --git a/.../elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java b/.../elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java
@@ -63,7 +63,7 @@ protected AbstractConvertFunction(StreamInput in) throws IOException {
      * Build the evaluator given the evaluator a multivalued field.
      */
     protected final ExpressionEvaluator.Factory evaluator(ExpressionEvaluator.Factory fieldEval) {
-        DataType sourceType = field().dataType();
+        DataType sourceType = field().dataType().widenSmallNumeric();
         var factory = factories().get(sourceType);
         if (factory == null) {
             throw EsqlIllegalArgumentException.illegalDataType(sourceType);

diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
@@ -54,6 +54,8 @@
 import org.elasticsearch.xpack.esql.analysis.EnrichResolution;
 import org.elasticsearch.xpack.esql.analysis.PreAnalyzer;
 import org.elasticsearch.xpack.esql.core.expression.Attribute;
+import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.core.type.EsField;
 import org.elasticsearch.xpack.esql.enrich.EnrichLookupService;
 import org.elasticsearch.xpack.esql.enrich.ResolvedEnrichPolicy;
 import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry;
@@ -308,8 +310,18 @@ protected void assertResults(ExpectedResults expected, ActualResults actual, boo
         // CsvTestUtils.logData(actual.values(), LOGGER);
     }
 
-    private static IndexResolution loadIndexResolution(String mappingName, String indexName) {
+    private static IndexResolution loadIndexResolution(String mappingName, String indexName, Map<String, String> typeMapping) {
         var mapping = new TreeMap<>(loadMapping(mappingName));
+        if ((typeMapping == null || typeMapping.isEmpty()) == false) {
+            for (var entry : typeMapping.entrySet()) {
+                if (mapping.containsKey(entry.getKey())) {
+                    DataType dataType = DataType.fromTypeName(entry.getValue());
+                    EsField field = mapping.get(entry.getKey());
+                    EsField editedField = new EsField(field.getName(), dataType, field.getProperties(), field.isAggregatable());
+                    mapping.put(entry.getKey(), editedField);
+                }
+            }
+        }
         return IndexResolution.valid(new EsIndex(indexName, mapping, Map.of(indexName, IndexMode.STANDARD)));
     }
 
@@ -320,7 +332,7 @@ private static EnrichResolution loadEnrichPolicies() {
             CsvTestsDataLoader.TestsDataset sourceIndex = CSV_DATASET_MAP.get(policy.getIndices().get(0));
             // this could practically work, but it's wrong:
             // EnrichPolicyResolution should contain the policy (system) index, not the source index
-            EsIndex esIndex = loadIndexResolution(sourceIndex.mappingFileName(), sourceIndex.indexName()).get();
+            EsIndex esIndex = loadIndexResolution(sourceIndex.mappingFileName(), sourceIndex.indexName(), null).get();
             var concreteIndices = Map.of(RemoteClusterService.LOCAL_CLUSTER_GROUP_KEY, Iterables.get(esIndex.concreteIndices(), 0));
             enrichResolution.addResolvedPolicy(
                 policyConfig.policyName(),
@@ -349,7 +361,7 @@ private static EnrichPolicy loadEnrichPolicyMapping(String policyFileName) {
     }
 
     private LogicalPlan analyzedPlan(LogicalPlan parsed, CsvTestsDataLoader.TestsDataset dataset) {
-        var indexResolution = loadIndexResolution(dataset.mappingFileName(), dataset.indexName());
+        var indexResolution = loadIndexResolution(dataset.mappingFileName(), dataset.indexName(), dataset.typeMapping());
         var enrichPolicies = loadEnrichPolicies();
         var analyzer = new Analyzer(new AnalyzerContext(configuration, functionRegistry, indexResolution, enrichPolicies), TEST_VERIFIER);
         LogicalPlan plan = analyzer.analyze(parsed);
@@ -392,7 +404,7 @@ private static CsvTestsDataLoader.TestsDataset testsDataset(LogicalPlan parsed)
     }
 
     private static TestPhysicalOperationProviders testOperationProviders(CsvTestsDataLoader.TestsDataset dataset) throws Exception {
-        var testData = loadPageFromCsv(CsvTests.class.getResource("/" + dataset.dataFileName()));
+        var testData = loadPageFromCsv(CsvTests.class.getResource("/" + dataset.dataFileName()), dataset.typeMapping());
         return new TestPhysicalOperationProviders(testData.v1(), testData.v2());
     }