Support widening of numeric types in union-types (#112610)

* Support widening of numeric types in union-types Only two lines of this PR are the actual fix. All the rest is updating the CSV-spec testing infrastructure to make it easier to test this, and adding the tests. The refactoring involve some cleanup and simplifications also. This update allows us to add alternative mappings of existing data files without copying the files and changing the header line. Some of the existing union-types test files were deleted as a result, which is a step in the right direction. * Update docs/changelog/112610.yaml * Link capability to PR
elastic · Sep 11, 2024 · bb872e6 · bb872e6
1 parent 077b585
commit bb872e6
Show file tree

Hide file tree

Showing 12 changed files with 201 additions and 187 deletions.
diff --git a/docs/changelog/112610.yaml b/docs/changelog/112610.yaml
@@ -0,0 +1,6 @@
+pr: 112610
+summary: Support widening of numeric types in union-types
+area: ES|QL
+type: bug
+issues:
+ - 111277
diff --git a/x-pack/plugin/esql/qa/testFixtures/build.gradle b/x-pack/plugin/esql/qa/testFixtures/build.gradle
@@ -2,16 +2,18 @@ apply plugin: 'elasticsearch.java'
 apply plugin: org.elasticsearch.gradle.dependencies.CompileOnlyResolvePlugin
 
 dependencies {
-    implementation project(':x-pack:plugin:esql:compute')
-    implementation project(':x-pack:plugin:esql')
-    compileOnly project(path: xpackModule('core'))
-    implementation project(":libs:elasticsearch-x-content")
-    implementation project(':client:rest')
-    implementation project(':libs:elasticsearch-logging')
-    implementation project(':test:framework')
-    api(testArtifact(project(xpackModule('esql-core'))))
-    implementation project(':server')
-    implementation "net.sf.supercsv:super-csv:${versions.supercsv}"
+  implementation project(':x-pack:plugin:esql:compute')
+  implementation project(':x-pack:plugin:esql')
+  compileOnly project(path: xpackModule('core'))
+  implementation project(":libs:elasticsearch-x-content")
+  implementation project(':client:rest')
+  implementation project(':libs:elasticsearch-logging')
+  implementation project(':test:framework')
+  api(testArtifact(project(xpackModule('esql-core'))))
+  implementation project(':server')
+  implementation "net.sf.supercsv:super-csv:${versions.supercsv}"
+  implementation "com.fasterxml.jackson.core:jackson-core:${versions.jackson}"
+  implementation "com.fasterxml.jackson.core:jackson-databind:${versions.jackson}"
 }
 
 /**

diff --git a/.../plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java b/.../plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java
@@ -118,7 +118,7 @@ public static Tuple<Version, Version> skipVersionRange(String testName, String i
         return null;
     }
 
-    public static Tuple<Page, List<String>> loadPageFromCsv(URL source) throws Exception {
+    public static Tuple<Page, List<String>> loadPageFromCsv(URL source, Map<String, String> typeMapping) throws Exception {
 
         record CsvColumn(String name, Type type, BuilderWrapper builderWrapper) implements Releasable {
             void append(String stringValue) {
@@ -164,21 +164,16 @@ public void close() {
                     if (columns == null) {
                         columns = new CsvColumn[entries.length];
                         for (int i = 0; i < entries.length; i++) {
-                            int split = entries[i].indexOf(':');
-                            String name, typeName;
+                            String[] header = entries[i].split(":");
+                            String name = header[0].trim();
+                            String typeName = (typeMapping != null && typeMapping.containsKey(name)) ? typeMapping.get(name)
+                                : header.length > 1 ? header[1].trim()
+                                : null;
 
-                            if (split < 0) {
+                            if (typeName == null || typeName.isEmpty()) {
                                 throw new IllegalArgumentException(
                                     "A type is always expected in the schema definition; found " + entries[i]
                                 );
-                            } else {
-                                name = entries[i].substring(0, split).trim();
-                                typeName = entries[i].substring(split + 1).trim();
-                                if (typeName.length() == 0) {
-                                    throw new IllegalArgumentException(
-                                        "A type is always expected in the schema definition; found " + entries[i]
-                                    );
-                                }
                             }
                             Type type = Type.asType(typeName);
                             if (type == null) {

diff --git a/...n/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/...n/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_str.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_str.json
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_ts_long.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_ts_long.json
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_str.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_str.csv
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec
@@ -1351,3 +1351,54 @@ FROM sample_data, sample_data_ts_long
            null | 172.21.0.5   |             1232382 | Disconnected    | Disconnected
            null | 172.21.0.5   |             1232382 | Disconnected    | Disconnected
 ;
+
+shortIntegerWidening
+required_capability: union_types
+required_capability: metadata_fields
+required_capability: casting_operator
+required_capability: union_types_numeric_widening
+
+FROM apps, apps_short METADATA _index
+| EVAL id = id::integer
+| KEEP _index, id, version, name
+| WHERE name == "aaaaa" OR name == "hhhhh"
+| SORT _index ASC, id ASC
+;
+
+_index:keyword | id:integer  | version:version  | name:keyword
+apps           | 1           | 1                | aaaaa
+apps           | 8           | 1.2.3.4          | hhhhh
+apps           | 12          | 1.2.3.4          | aaaaa
+apps_short     | 1           | 1                | aaaaa
+apps_short     | 8           | 1.2.3.4          | hhhhh
+apps_short     | 12          | 1.2.3.4          | aaaaa
+;
+
+shortIntegerWideningStats
+required_capability: union_types
+required_capability: casting_operator
+required_capability: union_types_numeric_widening
+
+FROM apps, apps_short
+| EVAL id = id::integer
+| STATS count=count() BY name, id
+| KEEP id, name, count
+| SORT id ASC, name ASC
+;
+
+id:integer | name:keyword | count:long
+1          | aaaaa        | 2
+2          | bbbbb        | 2
+3          | ccccc        | 2
+4          | ddddd        | 2
+5          | eeeee        | 2
+6          | fffff        | 2
+7          | ggggg        | 2
+8          | hhhhh        | 2
+9          | iiiii        | 2
+10         | jjjjj        | 2
+11         | kkkkk        | 2
+12         | aaaaa        | 2
+13         | lllll        | 2
+14         | mmmmm        | 2
+;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -198,6 +198,11 @@ public enum Cap {
          */
         UNION_TYPES_MISSING_FIELD,
 
+        /**
+         * Fix for widening of short numeric types in union-types. Done in #112610
+         */
+        UNION_TYPES_NUMERIC_WIDENING,
+
         /**
          * Fix a parsing issue where numbers below Long.MIN_VALUE threw an exception instead of parsing as doubles.
          * see <a href="https://github.com/elastic/elasticsearch/issues/104323"> Parsing large numbers is inconsistent #104323 </a>

diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java
@@ -115,7 +115,6 @@
 import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;
 import static org.elasticsearch.xpack.esql.core.type.DataType.LONG;
 import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
-import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG;
 import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION;
 import static org.elasticsearch.xpack.esql.core.type.DataType.isTemporalAmount;
 import static org.elasticsearch.xpack.esql.stats.FeatureMetric.LIMIT;
@@ -1223,8 +1222,7 @@ private Expression resolveConvertFunction(AbstractConvertFunction convert, List<
                 HashMap<TypeResolutionKey, Expression> typeResolutions = new HashMap<>();
                 Set<DataType> supportedTypes = convert.supportedTypes();
                 imf.types().forEach(type -> {
-                    // TODO: Shouldn't we perform widening of small numerical types here?
-                    if (supportedTypes.contains(type)) {
+                    if (supportedTypes.contains(type.widenSmallNumeric())) {
                         TypeResolutionKey key = new TypeResolutionKey(fa.name(), type);
                         var concreteConvert = typeSpecificConvert(convert, fa.source(), type, imf);
                         typeResolutions.put(key, concreteConvert);

diff --git a/.../elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java b/.../elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java
@@ -63,7 +63,7 @@ protected AbstractConvertFunction(StreamInput in) throws IOException {
      * Build the evaluator given the evaluator a multivalued field.
      */
     protected final ExpressionEvaluator.Factory evaluator(ExpressionEvaluator.Factory fieldEval) {
-        DataType sourceType = field().dataType();
+        DataType sourceType = field().dataType().widenSmallNumeric();
         var factory = factories().get(sourceType);
         if (factory == null) {
             throw EsqlIllegalArgumentException.illegalDataType(sourceType);

diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
@@ -54,6 +54,8 @@
 import org.elasticsearch.xpack.esql.analysis.EnrichResolution;
 import org.elasticsearch.xpack.esql.analysis.PreAnalyzer;
 import org.elasticsearch.xpack.esql.core.expression.Attribute;
+import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.core.type.EsField;
 import org.elasticsearch.xpack.esql.enrich.EnrichLookupService;
 import org.elasticsearch.xpack.esql.enrich.ResolvedEnrichPolicy;
 import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry;
@@ -308,8 +310,18 @@ protected void assertResults(ExpectedResults expected, ActualResults actual, boo
         // CsvTestUtils.logData(actual.values(), LOGGER);
     }
 
-    private static IndexResolution loadIndexResolution(String mappingName, String indexName) {
+    private static IndexResolution loadIndexResolution(String mappingName, String indexName, Map<String, String> typeMapping) {
         var mapping = new TreeMap<>(loadMapping(mappingName));
+        if ((typeMapping == null || typeMapping.isEmpty()) == false) {
+            for (var entry : typeMapping.entrySet()) {
+                if (mapping.containsKey(entry.getKey())) {
+                    DataType dataType = DataType.fromTypeName(entry.getValue());
+                    EsField field = mapping.get(entry.getKey());
+                    EsField editedField = new EsField(field.getName(), dataType, field.getProperties(), field.isAggregatable());
+                    mapping.put(entry.getKey(), editedField);
+                }
+            }
+        }
         return IndexResolution.valid(new EsIndex(indexName, mapping, Map.of(indexName, IndexMode.STANDARD)));
     }
 
@@ -320,7 +332,7 @@ private static EnrichResolution loadEnrichPolicies() {
             CsvTestsDataLoader.TestsDataset sourceIndex = CSV_DATASET_MAP.get(policy.getIndices().get(0));
             // this could practically work, but it's wrong:
             // EnrichPolicyResolution should contain the policy (system) index, not the source index
-            EsIndex esIndex = loadIndexResolution(sourceIndex.mappingFileName(), sourceIndex.indexName()).get();
+            EsIndex esIndex = loadIndexResolution(sourceIndex.mappingFileName(), sourceIndex.indexName(), null).get();
             var concreteIndices = Map.of(RemoteClusterService.LOCAL_CLUSTER_GROUP_KEY, Iterables.get(esIndex.concreteIndices(), 0));
             enrichResolution.addResolvedPolicy(
                 policyConfig.policyName(),
@@ -349,7 +361,7 @@ private static EnrichPolicy loadEnrichPolicyMapping(String policyFileName) {
     }
 
     private LogicalPlan analyzedPlan(LogicalPlan parsed, CsvTestsDataLoader.TestsDataset dataset) {
-        var indexResolution = loadIndexResolution(dataset.mappingFileName(), dataset.indexName());
+        var indexResolution = loadIndexResolution(dataset.mappingFileName(), dataset.indexName(), dataset.typeMapping());
         var enrichPolicies = loadEnrichPolicies();
         var analyzer = new Analyzer(new AnalyzerContext(configuration, functionRegistry, indexResolution, enrichPolicies), TEST_VERIFIER);
         LogicalPlan plan = analyzer.analyze(parsed);
@@ -392,7 +404,7 @@ private static CsvTestsDataLoader.TestsDataset testsDataset(LogicalPlan parsed)
     }
 
     private static TestPhysicalOperationProviders testOperationProviders(CsvTestsDataLoader.TestsDataset dataset) throws Exception {
-        var testData = loadPageFromCsv(CsvTests.class.getResource("/" + dataset.dataFileName()));
+        var testData = loadPageFromCsv(CsvTests.class.getResource("/" + dataset.dataFileName()), dataset.typeMapping());
         return new TestPhysicalOperationProviders(testData.v1(), testData.v2());
     }