Skip to content

Commit

Permalink
Support widening of numeric types in union-types (#112610)
Browse files Browse the repository at this point in the history
* Support widening of numeric types in union-types

Only two lines of this PR are the actual fix.
All the rest is updating the CSV-spec testing infrastructure to make it easier to test this, and adding the tests.
The refactoring involve some cleanup and simplifications also.
This update allows us to add alternative mappings of existing data files without copying the files and changing the header line.
Some of the existing union-types test files were deleted as a result, which is a step in the right direction.

* Update docs/changelog/112610.yaml

* Link capability to PR
  • Loading branch information
craigtaverner authored Sep 11, 2024
1 parent 077b585 commit bb872e6
Show file tree
Hide file tree
Showing 12 changed files with 201 additions and 187 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/112610.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 112610
summary: Support widening of numeric types in union-types
area: ES|QL
type: bug
issues:
- 111277
22 changes: 12 additions & 10 deletions x-pack/plugin/esql/qa/testFixtures/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@ apply plugin: 'elasticsearch.java'
apply plugin: org.elasticsearch.gradle.dependencies.CompileOnlyResolvePlugin

dependencies {
implementation project(':x-pack:plugin:esql:compute')
implementation project(':x-pack:plugin:esql')
compileOnly project(path: xpackModule('core'))
implementation project(":libs:elasticsearch-x-content")
implementation project(':client:rest')
implementation project(':libs:elasticsearch-logging')
implementation project(':test:framework')
api(testArtifact(project(xpackModule('esql-core'))))
implementation project(':server')
implementation "net.sf.supercsv:super-csv:${versions.supercsv}"
implementation project(':x-pack:plugin:esql:compute')
implementation project(':x-pack:plugin:esql')
compileOnly project(path: xpackModule('core'))
implementation project(":libs:elasticsearch-x-content")
implementation project(':client:rest')
implementation project(':libs:elasticsearch-logging')
implementation project(':test:framework')
api(testArtifact(project(xpackModule('esql-core'))))
implementation project(':server')
implementation "net.sf.supercsv:super-csv:${versions.supercsv}"
implementation "com.fasterxml.jackson.core:jackson-core:${versions.jackson}"
implementation "com.fasterxml.jackson.core:jackson-databind:${versions.jackson}"
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ public static Tuple<Version, Version> skipVersionRange(String testName, String i
return null;
}

public static Tuple<Page, List<String>> loadPageFromCsv(URL source) throws Exception {
public static Tuple<Page, List<String>> loadPageFromCsv(URL source, Map<String, String> typeMapping) throws Exception {

record CsvColumn(String name, Type type, BuilderWrapper builderWrapper) implements Releasable {
void append(String stringValue) {
Expand Down Expand Up @@ -164,21 +164,16 @@ public void close() {
if (columns == null) {
columns = new CsvColumn[entries.length];
for (int i = 0; i < entries.length; i++) {
int split = entries[i].indexOf(':');
String name, typeName;
String[] header = entries[i].split(":");
String name = header[0].trim();
String typeName = (typeMapping != null && typeMapping.containsKey(name)) ? typeMapping.get(name)
: header.length > 1 ? header[1].trim()
: null;

if (split < 0) {
if (typeName == null || typeName.isEmpty()) {
throw new IllegalArgumentException(
"A type is always expected in the schema definition; found " + entries[i]
);
} else {
name = entries[i].substring(0, split).trim();
typeName = entries[i].substring(split + 1).trim();
if (typeName.length() == 0) {
throw new IllegalArgumentException(
"A type is always expected in the schema definition; found " + entries[i]
);
}
}
Type type = Type.asType(typeName);
if (type == null) {
Expand Down

Large diffs are not rendered by default.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -1351,3 +1351,54 @@ FROM sample_data, sample_data_ts_long
null | 172.21.0.5 | 1232382 | Disconnected | Disconnected
null | 172.21.0.5 | 1232382 | Disconnected | Disconnected
;

shortIntegerWidening
required_capability: union_types
required_capability: metadata_fields
required_capability: casting_operator
required_capability: union_types_numeric_widening

FROM apps, apps_short METADATA _index
| EVAL id = id::integer
| KEEP _index, id, version, name
| WHERE name == "aaaaa" OR name == "hhhhh"
| SORT _index ASC, id ASC
;

_index:keyword | id:integer | version:version | name:keyword
apps | 1 | 1 | aaaaa
apps | 8 | 1.2.3.4 | hhhhh
apps | 12 | 1.2.3.4 | aaaaa
apps_short | 1 | 1 | aaaaa
apps_short | 8 | 1.2.3.4 | hhhhh
apps_short | 12 | 1.2.3.4 | aaaaa
;

shortIntegerWideningStats
required_capability: union_types
required_capability: casting_operator
required_capability: union_types_numeric_widening

FROM apps, apps_short
| EVAL id = id::integer
| STATS count=count() BY name, id
| KEEP id, name, count
| SORT id ASC, name ASC
;

id:integer | name:keyword | count:long
1 | aaaaa | 2
2 | bbbbb | 2
3 | ccccc | 2
4 | ddddd | 2
5 | eeeee | 2
6 | fffff | 2
7 | ggggg | 2
8 | hhhhh | 2
9 | iiiii | 2
10 | jjjjj | 2
11 | kkkkk | 2
12 | aaaaa | 2
13 | lllll | 2
14 | mmmmm | 2
;
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,11 @@ public enum Cap {
*/
UNION_TYPES_MISSING_FIELD,

/**
* Fix for widening of short numeric types in union-types. Done in #112610
*/
UNION_TYPES_NUMERIC_WIDENING,

/**
* Fix a parsing issue where numbers below Long.MIN_VALUE threw an exception instead of parsing as doubles.
* see <a href="https://github.com/elastic/elasticsearch/issues/104323"> Parsing large numbers is inconsistent #104323 </a>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@
import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;
import static org.elasticsearch.xpack.esql.core.type.DataType.LONG;
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG;
import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION;
import static org.elasticsearch.xpack.esql.core.type.DataType.isTemporalAmount;
import static org.elasticsearch.xpack.esql.stats.FeatureMetric.LIMIT;
Expand Down Expand Up @@ -1223,8 +1222,7 @@ private Expression resolveConvertFunction(AbstractConvertFunction convert, List<
HashMap<TypeResolutionKey, Expression> typeResolutions = new HashMap<>();
Set<DataType> supportedTypes = convert.supportedTypes();
imf.types().forEach(type -> {
// TODO: Shouldn't we perform widening of small numerical types here?
if (supportedTypes.contains(type)) {
if (supportedTypes.contains(type.widenSmallNumeric())) {
TypeResolutionKey key = new TypeResolutionKey(fa.name(), type);
var concreteConvert = typeSpecificConvert(convert, fa.source(), type, imf);
typeResolutions.put(key, concreteConvert);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ protected AbstractConvertFunction(StreamInput in) throws IOException {
* Build the evaluator given the evaluator a multivalued field.
*/
protected final ExpressionEvaluator.Factory evaluator(ExpressionEvaluator.Factory fieldEval) {
DataType sourceType = field().dataType();
DataType sourceType = field().dataType().widenSmallNumeric();
var factory = factories().get(sourceType);
if (factory == null) {
throw EsqlIllegalArgumentException.illegalDataType(sourceType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@
import org.elasticsearch.xpack.esql.analysis.EnrichResolution;
import org.elasticsearch.xpack.esql.analysis.PreAnalyzer;
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.type.EsField;
import org.elasticsearch.xpack.esql.enrich.EnrichLookupService;
import org.elasticsearch.xpack.esql.enrich.ResolvedEnrichPolicy;
import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry;
Expand Down Expand Up @@ -308,8 +310,18 @@ protected void assertResults(ExpectedResults expected, ActualResults actual, boo
// CsvTestUtils.logData(actual.values(), LOGGER);
}

private static IndexResolution loadIndexResolution(String mappingName, String indexName) {
private static IndexResolution loadIndexResolution(String mappingName, String indexName, Map<String, String> typeMapping) {
var mapping = new TreeMap<>(loadMapping(mappingName));
if ((typeMapping == null || typeMapping.isEmpty()) == false) {
for (var entry : typeMapping.entrySet()) {
if (mapping.containsKey(entry.getKey())) {
DataType dataType = DataType.fromTypeName(entry.getValue());
EsField field = mapping.get(entry.getKey());
EsField editedField = new EsField(field.getName(), dataType, field.getProperties(), field.isAggregatable());
mapping.put(entry.getKey(), editedField);
}
}
}
return IndexResolution.valid(new EsIndex(indexName, mapping, Map.of(indexName, IndexMode.STANDARD)));
}

Expand All @@ -320,7 +332,7 @@ private static EnrichResolution loadEnrichPolicies() {
CsvTestsDataLoader.TestsDataset sourceIndex = CSV_DATASET_MAP.get(policy.getIndices().get(0));
// this could practically work, but it's wrong:
// EnrichPolicyResolution should contain the policy (system) index, not the source index
EsIndex esIndex = loadIndexResolution(sourceIndex.mappingFileName(), sourceIndex.indexName()).get();
EsIndex esIndex = loadIndexResolution(sourceIndex.mappingFileName(), sourceIndex.indexName(), null).get();
var concreteIndices = Map.of(RemoteClusterService.LOCAL_CLUSTER_GROUP_KEY, Iterables.get(esIndex.concreteIndices(), 0));
enrichResolution.addResolvedPolicy(
policyConfig.policyName(),
Expand Down Expand Up @@ -349,7 +361,7 @@ private static EnrichPolicy loadEnrichPolicyMapping(String policyFileName) {
}

private LogicalPlan analyzedPlan(LogicalPlan parsed, CsvTestsDataLoader.TestsDataset dataset) {
var indexResolution = loadIndexResolution(dataset.mappingFileName(), dataset.indexName());
var indexResolution = loadIndexResolution(dataset.mappingFileName(), dataset.indexName(), dataset.typeMapping());
var enrichPolicies = loadEnrichPolicies();
var analyzer = new Analyzer(new AnalyzerContext(configuration, functionRegistry, indexResolution, enrichPolicies), TEST_VERIFIER);
LogicalPlan plan = analyzer.analyze(parsed);
Expand Down Expand Up @@ -392,7 +404,7 @@ private static CsvTestsDataLoader.TestsDataset testsDataset(LogicalPlan parsed)
}

private static TestPhysicalOperationProviders testOperationProviders(CsvTestsDataLoader.TestsDataset dataset) throws Exception {
var testData = loadPageFromCsv(CsvTests.class.getResource("/" + dataset.dataFileName()));
var testData = loadPageFromCsv(CsvTests.class.getResource("/" + dataset.dataFileName()), dataset.typeMapping());
return new TestPhysicalOperationProviders(testData.v1(), testData.v2());
}

Expand Down

0 comments on commit bb872e6

Please sign in to comment.