Skip to content

Commit

Permalink
Backporting two union-types fixes to 8.15 (from 111932 and 112610) (#…
Browse files Browse the repository at this point in the history
…112821)

* Fix union-types where one index is missing the field (#111932)

* Fix union-types where one index is missing the field

When none of the indexes has the field, a validation error is correctly thrown, and when all indexes have the field, union-types works as normal.
But when some indexes have the field and some do not, we were getting and internal error.
We treat this case similarly to when some documents are missing the field, in which case `null` values are produced.
So now a multi-index query where some indexes are missing the field will produce nulls for the documents coming from those indexes.

* Update docs/changelog/111932.yaml

* Added capability for this fix (missing-field)

* Support widening of numeric types in union-types (#112610)

* Support widening of numeric types in union-types

Only two lines of this PR are the actual fix.
All the rest is updating the CSV-spec testing infrastructure to make it easier to test this, and adding the tests.
The refactoring involve some cleanup and simplifications also.
This update allows us to add alternative mappings of existing data files without copying the files and changing the header line.
Some of the existing union-types test files were deleted as a result, which is a step in the right direction.

* Update docs/changelog/112610.yaml

* Link capability to PR
  • Loading branch information
craigtaverner authored Sep 13, 2024
1 parent dd4f4d5 commit 84b4c59
Show file tree
Hide file tree
Showing 15 changed files with 288 additions and 166 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/111932.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 111932
summary: Fix union-types where one index is missing the field
area: ES|QL
type: bug
issues:
- 111912
6 changes: 6 additions & 0 deletions docs/changelog/112610.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 112610
summary: Support widening of numeric types in union-types
area: ES|QL
type: bug
issues:
- 111277
22 changes: 12 additions & 10 deletions x-pack/plugin/esql/qa/testFixtures/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@ apply plugin: 'elasticsearch.java'
apply plugin: org.elasticsearch.gradle.dependencies.CompileOnlyResolvePlugin

dependencies {
implementation project(':x-pack:plugin:esql:compute')
implementation project(':x-pack:plugin:esql')
compileOnly project(path: xpackModule('core'))
implementation project(":libs:elasticsearch-x-content")
implementation project(':client:rest')
implementation project(':libs:elasticsearch-logging')
implementation project(':test:framework')
api(testArtifact(project(xpackModule('esql-core'))))
implementation project(':server')
implementation "net.sf.supercsv:super-csv:${versions.supercsv}"
implementation project(':x-pack:plugin:esql:compute')
implementation project(':x-pack:plugin:esql')
compileOnly project(path: xpackModule('core'))
implementation project(":libs:elasticsearch-x-content")
implementation project(':client:rest')
implementation project(':libs:elasticsearch-logging')
implementation project(':test:framework')
api(testArtifact(project(xpackModule('esql-core'))))
implementation project(':server')
implementation "net.sf.supercsv:super-csv:${versions.supercsv}"
implementation "com.fasterxml.jackson.core:jackson-core:${versions.jackson}"
implementation "com.fasterxml.jackson.core:jackson-databind:${versions.jackson}"
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ public static Tuple<Version, Version> skipVersionRange(String testName) {
return null;
}

public static Tuple<Page, List<String>> loadPageFromCsv(URL source) throws Exception {
public static Tuple<Page, List<String>> loadPageFromCsv(URL source, Map<String, String> typeMapping) throws Exception {

record CsvColumn(String name, Type type, BuilderWrapper builderWrapper) implements Releasable {
void append(String stringValue) {
Expand Down Expand Up @@ -162,21 +162,16 @@ public void close() {
if (columns == null) {
columns = new CsvColumn[entries.length];
for (int i = 0; i < entries.length; i++) {
int split = entries[i].indexOf(':');
String name, typeName;
String[] header = entries[i].split(":");
String name = header[0].trim();
String typeName = (typeMapping != null && typeMapping.containsKey(name)) ? typeMapping.get(name)
: header.length > 1 ? header[1].trim()
: null;

if (split < 0) {
if (typeName == null || typeName.isEmpty()) {
throw new IllegalArgumentException(
"A type is always expected in the schema definition; found " + entries[i]
);
} else {
name = entries[i].substring(0, split).trim();
typeName = entries[i].substring(split + 1).trim();
if (typeName.length() == 0) {
throw new IllegalArgumentException(
"A type is always expected in the schema definition; found " + entries[i]
);
}
}
Type type = Type.asType(typeName);
if (type == null) {
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
"@timestamp": {
"type": "date"
},
"client_ip": {
"type": "keyword"
},
"event_duration": {
"type": "long"
},
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
@timestamp:date,event_duration:long,message:keyword
2023-10-23T13:55:01.543Z,1756467,Connected to 10.1.0.1
2023-10-23T13:53:55.832Z,5033755,Connection error
2023-10-23T13:52:55.015Z,8268153,Connection error
2023-10-23T13:51:54.732Z,725448,Connection error
2023-10-23T13:33:34.937Z,1232382,Disconnected
2023-10-23T12:27:28.948Z,2764889,Connected to 10.1.0.2
2023-10-23T12:15:03.360Z,3450233,Connected to 10.1.0.3

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,74 @@ count:long | message:keyword
2 | Connected to 10.1.0.3
;

multiIndexMissingIpToString
required_capability: union_types
required_capability: union_types_missing_field

FROM sample_data, sample_data_str, missing_ip_sample_data METADATA _index
| EVAL client_ip = TO_STRING(client_ip)
| KEEP _index, @timestamp, client_ip, event_duration, message
| SORT _index ASC, @timestamp DESC
;

_index:keyword | @timestamp:date | client_ip:keyword | event_duration:long | message:keyword
missing_ip_sample_data | 2023-10-23T13:55:01.543Z | null | 1756467 | Connected to 10.1.0.1
missing_ip_sample_data | 2023-10-23T13:53:55.832Z | null | 5033755 | Connection error
missing_ip_sample_data | 2023-10-23T13:52:55.015Z | null | 8268153 | Connection error
missing_ip_sample_data | 2023-10-23T13:51:54.732Z | null | 725448 | Connection error
missing_ip_sample_data | 2023-10-23T13:33:34.937Z | null | 1232382 | Disconnected
missing_ip_sample_data | 2023-10-23T12:27:28.948Z | null | 2764889 | Connected to 10.1.0.2
missing_ip_sample_data | 2023-10-23T12:15:03.360Z | null | 3450233 | Connected to 10.1.0.3
sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
;

multiIndexMissingIpToIp
required_capability: union_types
required_capability: union_types_missing_field

FROM sample_data, sample_data_str, missing_ip_sample_data METADATA _index
| EVAL client_ip = TO_IP(client_ip)
| KEEP _index, @timestamp, client_ip, event_duration, message
| SORT _index ASC, @timestamp DESC
;

_index:keyword | @timestamp:date | client_ip:ip | event_duration:long | message:keyword
missing_ip_sample_data | 2023-10-23T13:55:01.543Z | null | 1756467 | Connected to 10.1.0.1
missing_ip_sample_data | 2023-10-23T13:53:55.832Z | null | 5033755 | Connection error
missing_ip_sample_data | 2023-10-23T13:52:55.015Z | null | 8268153 | Connection error
missing_ip_sample_data | 2023-10-23T13:51:54.732Z | null | 725448 | Connection error
missing_ip_sample_data | 2023-10-23T13:33:34.937Z | null | 1232382 | Disconnected
missing_ip_sample_data | 2023-10-23T12:27:28.948Z | null | 2764889 | Connected to 10.1.0.2
missing_ip_sample_data | 2023-10-23T12:15:03.360Z | null | 3450233 | Connected to 10.1.0.3
sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
;

multiIndexTsLong
required_capability: union_types
required_capability: metadata_fields
Expand Down Expand Up @@ -1191,3 +1259,54 @@ FROM sample_data, sample_data_ts_long
null | 172.21.0.5 | 1232382 | Disconnected | Disconnected
null | 172.21.0.5 | 1232382 | Disconnected | Disconnected
;

shortIntegerWidening
required_capability: union_types
required_capability: metadata_fields
required_capability: casting_operator
required_capability: union_types_numeric_widening

FROM apps, apps_short METADATA _index
| EVAL id = id::integer
| KEEP _index, id, version, name
| WHERE name == "aaaaa" OR name == "hhhhh"
| SORT _index ASC, id ASC
;

_index:keyword | id:integer | version:version | name:keyword
apps | 1 | 1 | aaaaa
apps | 8 | 1.2.3.4 | hhhhh
apps | 12 | 1.2.3.4 | aaaaa
apps_short | 1 | 1 | aaaaa
apps_short | 8 | 1.2.3.4 | hhhhh
apps_short | 12 | 1.2.3.4 | aaaaa
;

shortIntegerWideningStats
required_capability: union_types
required_capability: casting_operator
required_capability: union_types_numeric_widening

FROM apps, apps_short
| EVAL id = id::integer
| STATS count=count() BY name, id
| KEEP id, name, count
| SORT id ASC, name ASC
;

id:integer | name:keyword | count:long
1 | aaaaa | 2
2 | bbbbb | 2
3 | ccccc | 2
4 | ddddd | 2
5 | eeeee | 2
6 | fffff | 2
7 | ggggg | 2
8 | hhhhh | 2
9 | iiiii | 2
10 | jjjjj | 2
11 | kkkkk | 2
12 | aaaaa | 2
13 | lllll | 2
14 | mmmmm | 2
;
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,16 @@ public enum Cap {
*/
UNION_TYPES_FIX_RENAME_RESOLUTION,

/**
* Fix for union-types when some indexes are missing the required field. Done in #111932.
*/
UNION_TYPES_MISSING_FIELD,

/**
* Fix for widening of short numeric types in union-types. Done in #112610
*/
UNION_TYPES_NUMERIC_WIDENING,

/**
* Fix a parsing issue where numbers below Long.MIN_VALUE threw an exception instead of parsing as doubles.
* see <a href="https://github.com/elastic/elasticsearch/issues/104323"> Parsing large numbers is inconsistent #104323 </a>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1147,8 +1147,7 @@ private Expression resolveConvertFunction(AbstractConvertFunction convert, List<
HashMap<TypeResolutionKey, Expression> typeResolutions = new HashMap<>();
Set<DataType> supportedTypes = convert.supportedTypes();
imf.types().forEach(type -> {
// TODO: Shouldn't we perform widening of small numerical types here?
if (supportedTypes.contains(type)) {
if (supportedTypes.contains(type.widenSmallNumeric())) {
TypeResolutionKey key = new TypeResolutionKey(fa.name(), type);
var concreteConvert = typeSpecificConvert(convert, fa.source(), type, imf);
typeResolutions.put(key, concreteConvert);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ protected AbstractConvertFunction(StreamInput in) throws IOException {
* Build the evaluator given the evaluator a multivalued field.
*/
protected final ExpressionEvaluator.Factory evaluator(ExpressionEvaluator.Factory fieldEval) {
DataType sourceType = field().dataType();
DataType sourceType = field().dataType().widenSmallNumeric();
var factory = factories().get(sourceType);
if (factory == null) {
throw EsqlIllegalArgumentException.illegalDataType(sourceType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,9 @@ private BlockLoader getBlockLoaderFor(
if (unionTypes != null) {
String indexName = shardContext.ctx.index().getName();
Expression conversion = unionTypes.getConversionExpressionForIndex(indexName);
return new TypeConvertingBlockLoader(blockLoader, (AbstractConvertFunction) conversion);
return conversion == null
? BlockLoader.CONSTANT_NULLS
: new TypeConvertingBlockLoader(blockLoader, (AbstractConvertFunction) conversion);
}
return blockLoader;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
import org.elasticsearch.xpack.esql.core.index.IndexResolution;
import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.type.EsField;
import org.elasticsearch.xpack.esql.enrich.EnrichLookupService;
import org.elasticsearch.xpack.esql.enrich.ResolvedEnrichPolicy;
import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry;
Expand Down Expand Up @@ -290,8 +291,18 @@ protected void assertResults(ExpectedResults expected, ActualResults actual, boo
// CsvTestUtils.logData(actual.values(), LOGGER);
}

private static IndexResolution loadIndexResolution(String mappingName, String indexName) {
private static IndexResolution loadIndexResolution(String mappingName, String indexName, Map<String, String> typeMapping) {
var mapping = new TreeMap<>(loadMapping(mappingName));
if ((typeMapping == null || typeMapping.isEmpty()) == false) {
for (var entry : typeMapping.entrySet()) {
if (mapping.containsKey(entry.getKey())) {
DataType dataType = DataType.fromTypeName(entry.getValue());
EsField field = mapping.get(entry.getKey());
EsField editedField = new EsField(field.getName(), dataType, field.getProperties(), field.isAggregatable());
mapping.put(entry.getKey(), editedField);
}
}
}
return IndexResolution.valid(new EsIndex(indexName, mapping, Set.of(indexName)));
}

Expand All @@ -302,7 +313,7 @@ private static EnrichResolution loadEnrichPolicies() {
CsvTestsDataLoader.TestsDataset sourceIndex = CSV_DATASET_MAP.get(policy.getIndices().get(0));
// this could practically work, but it's wrong:
// EnrichPolicyResolution should contain the policy (system) index, not the source index
EsIndex esIndex = loadIndexResolution(sourceIndex.mappingFileName(), sourceIndex.indexName()).get();
EsIndex esIndex = loadIndexResolution(sourceIndex.mappingFileName(), sourceIndex.indexName(), null).get();
var concreteIndices = Map.of(RemoteClusterService.LOCAL_CLUSTER_GROUP_KEY, Iterables.get(esIndex.concreteIndices(), 0));
enrichResolution.addResolvedPolicy(
policyConfig.policyName(),
Expand Down Expand Up @@ -331,7 +342,7 @@ private static EnrichPolicy loadEnrichPolicyMapping(String policyFileName) {
}

private PhysicalPlan physicalPlan(LogicalPlan parsed, CsvTestsDataLoader.TestsDataset dataset) {
var indexResolution = loadIndexResolution(dataset.mappingFileName(), dataset.indexName());
var indexResolution = loadIndexResolution(dataset.mappingFileName(), dataset.indexName(), dataset.typeMapping());
var enrichPolicies = loadEnrichPolicies();
var analyzer = new Analyzer(new AnalyzerContext(configuration, functionRegistry, indexResolution, enrichPolicies), TEST_VERIFIER);
var analyzed = analyzer.analyze(parsed);
Expand Down Expand Up @@ -376,7 +387,7 @@ private static CsvTestsDataLoader.TestsDataset testsDataset(LogicalPlan parsed)
}

private static TestPhysicalOperationProviders testOperationProviders(CsvTestsDataLoader.TestsDataset dataset) throws Exception {
var testData = loadPageFromCsv(CsvTests.class.getResource("/" + dataset.dataFileName()));
var testData = loadPageFromCsv(CsvTests.class.getResource("/" + dataset.dataFileName()), dataset.typeMapping());
return new TestPhysicalOperationProviders(testData.v1(), testData.v2());
}

Expand Down

0 comments on commit 84b4c59

Please sign in to comment.