Skip to content

Commit

Permalink
SQL: Fix issue with GROUP BY YEAR() (elastic#49559)
Browse files Browse the repository at this point in the history
Grouping By YEAR() is translated to a histogram aggregation, but
previously if there was a scalar function invloved (e.g.:
`YEAR(date + INTERVAL 2 YEARS)`), there was no proper script created
and the histogram was applied on a field with name: `date + INTERVAL 2 YEARS`
which doesn't make sense, and resulted in null result.

Check the underlying field of YEAR() and if it's a function call
`asScript()` to properly get the painless script on which the histogram
is applied.

Fixes: elastic#49386
(cherry picked from commit 93c37ab)
(cherry picked from commit e54d7d5)
  • Loading branch information
matriv committed Nov 26, 2019
1 parent 1269524 commit 90378a9
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 1 deletion.
23 changes: 23 additions & 0 deletions x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,29 @@ SELECT HISTOGRAM(YEAR(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY
null |10
;

histogramYearOnDateTimeWithScalars
schema::year:i|c:l
SELECT YEAR(CAST(birth_date + INTERVAL 5 YEARS AS DATE) + INTERVAL 20 MONTHS) AS year, COUNT(*) as c FROM test_emp GROUP BY 1;

year | c
---------------+---------------
null |10
1958 |2
1959 |12
1960 |8
1961 |6
1962 |4
1963 |5
1964 |5
1965 |7
1966 |9
1967 |7
1968 |7
1969 |8
1970 |6
1971 |4
;

histogramNumericWithExpression
schema::h:i|c:l
SELECT HISTOGRAM(emp_no % 100, 10) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,13 @@ static GroupingContext groupBy(List<? extends Expression> groupings) {
// dates are handled differently because of date histograms
if (exp instanceof DateTimeHistogramFunction) {
DateTimeHistogramFunction dthf = (DateTimeHistogramFunction) exp;
key = new GroupByDateHistogram(aggId, nameOf(exp), dthf.interval(), dthf.zoneId());
Expression field = dthf.field();
if (field instanceof FieldAttribute) {
key = new GroupByDateHistogram(aggId, nameOf(field), dthf.interval(), dthf.zoneId());
} else if (field instanceof Function) {
ScriptTemplate script = ((Function) field).asScript();
key = new GroupByDateHistogram(aggId, script, dthf.interval(), dthf.zoneId());
}
}
// all other scalar functions become a script
else if (exp instanceof ScalarFunction) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,46 @@ public void testGroupByHistogram() {
assertEquals(DataType.DATETIME, field.dataType());
}

public void testGroupByHistogramQueryTranslator() {
PhysicalPlan p = optimizeAndPlan("SELECT MAX(int) FROM test GROUP BY HISTOGRAM(date, INTERVAL 2 YEARS)");
assertEquals(EsQueryExec.class, p.getClass());
EsQueryExec eqe = (EsQueryExec) p;
assertEquals(1, eqe.output().size());
assertEquals("MAX(int)", eqe.output().get(0).qualifiedName());
assertEquals(DataType.INTEGER, eqe.output().get(0).dataType());
assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
containsString("\"date_histogram\":{\"field\":\"date\",\"missing_bucket\":true,\"value_type\":\"date\",\"order\":\"asc\","
+ "\"interval\":62208000000,\"time_zone\":\"UTC\"}}}]}"));
}

public void testGroupByYearQueryTranslator() {
PhysicalPlan p = optimizeAndPlan("SELECT YEAR(date) FROM test GROUP BY YEAR(date)");
assertEquals(EsQueryExec.class, p.getClass());
EsQueryExec eqe = (EsQueryExec) p;
assertEquals(1, eqe.output().size());
assertEquals("YEAR(date)", eqe.output().get(0).qualifiedName());
assertEquals(DataType.INTEGER, eqe.output().get(0).dataType());
assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
endsWith("\"date_histogram\":{\"field\":\"date\",\"missing_bucket\":true,\"value_type\":\"date\",\"order\":\"asc\","
+ "\"interval\":31536000000,\"time_zone\":\"UTC\"}}}]}}}"));
}

public void testGroupByYearAndScalarsQueryTranslator() {
PhysicalPlan p = optimizeAndPlan("SELECT YEAR(CAST(date + INTERVAL 5 months AS DATE)) FROM test GROUP BY 1");
assertEquals(EsQueryExec.class, p.getClass());
EsQueryExec eqe = (EsQueryExec) p;
assertEquals(1, eqe.output().size());
assertEquals("YEAR(CAST(date + INTERVAL 5 months AS DATE))", eqe.output().get(0).qualifiedName());
assertEquals(DataType.INTEGER, eqe.output().get(0).dataType());
assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
endsWith("\"date_histogram\":{\"script\":{\"source\":\"InternalSqlScriptUtils.cast(" +
"InternalSqlScriptUtils.add(InternalSqlScriptUtils.docValue(doc,params.v0)," +
"InternalSqlScriptUtils.intervalYearMonth(params.v1,params.v2)),params.v3)\"," +
"\"lang\":\"painless\",\"params\":{\"v0\":\"date\",\"v1\":\"P5M\",\"v2\":\"INTERVAL_MONTH\"," +
"\"v3\":\"DATE\"}},\"missing_bucket\":true,\"value_type\":\"long\",\"order\":\"asc\"," +
"\"interval\":31536000000,\"time_zone\":\"UTC\"}}}]}}}"));
}

public void testGroupByHistogramWithDate() {
LogicalPlan p = plan("SELECT MAX(int) FROM test GROUP BY HISTOGRAM(CAST(date AS DATE), INTERVAL 2 MONTHS)");
assertTrue(p instanceof Aggregate);
Expand Down

0 comments on commit 90378a9

Please sign in to comment.