Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SQL: Implement DATE_TRUNC function #46473

Merged
merged 12 commits into from
Sep 11, 2019
75 changes: 74 additions & 1 deletion docs/reference/sql/functions/date-time.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
[[sql-functions-datetime-interval]]
==== Intervals

A common requirement when dealing with date/time in general revolves around
A common requirement when dealing with date/time in general revolves around
the notion of `interval`, a topic that is worth exploring in the context of {es} and {es-sql}.

{es} has comprehensive support for <<date-math, date math>> both inside <<date-math-index-names, index names>> and <<mapping-date-format, queries>>.
Expand Down Expand Up @@ -248,6 +248,79 @@ include-tagged::{sql-specs}/docs/docs.csv-spec[filterNow]
Currently, using a _precision_ greater than 3 doesn't make any difference to the output of the
function as the maximum number of second fractional digits returned is 3 (milliseconds).

[[sql-functions-datetime-trunc]]
==== `DATE_TRUNC`

.Synopsis:
[source, sql]
--------------------------------------------------
DATE_TRUNC(
string_exp, <1>
datetime_exp) <2>
--------------------------------------------------

*Input*:

<1> string expression denoting the unit to which the date/datetime should be truncated to
<2> date/datetime expression

*Output*: datetime

.Description:

Truncate the date/datetime to the specified unit by setting all fields that are less significant than the specified
one to zero (or one, for day, day of week and month).

[cols="^,^"]
|===
2+h|Datetime truncation units

s|unit
s|abbreviations

| millennium | millennia
| century | centuries
| decade | decades
| year | years, yy, yyyy
| quarter | quarters, qq, q
| month | months, mm, m
| week | weeks, wk, ww
| day | days, dd, d
| hour | hours, hh
| minute | minutes, mi, n
| second | seconds, ss, s
| millisecond | milliseconds, ms
| microsecond | microseconds, mcs
| nanosecond | nanoseconds, ns
|===



[source, sql]
--------------------------------------------------
include-tagged::{sql-specs}/docs/docs.csv-spec[truncateDateTimeMillennium]
--------------------------------------------------

[source, sql]
--------------------------------------------------
include-tagged::{sql-specs}/docs/docs.csv-spec[truncateDateTimeWeek]
--------------------------------------------------

[source, sql]
--------------------------------------------------
include-tagged::{sql-specs}/docs/docs.csv-spec[truncateDateTimeMinutes]
--------------------------------------------------

[source, sql]
--------------------------------------------------
include-tagged::{sql-specs}/docs/docs.csv-spec[truncateDateDecades]
--------------------------------------------------

[source, sql]
--------------------------------------------------
include-tagged::{sql-specs}/docs/docs.csv-spec[truncateDateQuarter]
--------------------------------------------------

[[sql-functions-datetime-day]]
==== `DAY_OF_MONTH/DOM/DAY`

Expand Down
1 change: 1 addition & 0 deletions docs/reference/sql/functions/index.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
** <<sql-functions-current-date>>
** <<sql-functions-current-time>>
** <<sql-functions-current-timestamp>>
** <<sql-functions-datetime-trunc>>
** <<sql-functions-datetime-day>>
** <<sql-functions-datetime-dow>>
** <<sql-functions-datetime-doy>>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.relique.jdbc.csv.CsvResultSet;

import java.io.IOException;
import java.sql.Date;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
Expand All @@ -37,6 +38,9 @@
import static java.sql.Types.REAL;
import static java.sql.Types.SMALLINT;
import static java.sql.Types.TINYINT;
import static java.time.ZoneOffset.UTC;
import static org.elasticsearch.xpack.sql.qa.jdbc.JdbcTestUtils.logResultSetMetadata;
import static org.elasticsearch.xpack.sql.qa.jdbc.JdbcTestUtils.resultSetCurrentData;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.instanceOf;
import static org.junit.Assert.assertEquals;
Expand Down Expand Up @@ -107,7 +111,7 @@ public static void assertResultSetMetadata(ResultSet expected, ResultSet actual,
ResultSetMetaData actualMeta = actual.getMetaData();

if (logger != null) {
JdbcTestUtils.logResultSetMetadata(actual, logger);
logResultSetMetadata(actual, logger);
}

if (expectedMeta.getColumnCount() != actualMeta.getColumnCount()) {
Expand Down Expand Up @@ -210,7 +214,7 @@ private static void doAssertResultSetData(ResultSet expected, ResultSet actual,
assertTrue("Expected more data but no more entries found after [" + count + "]", actual.next());

if (logger != null) {
logger.info(JdbcTestUtils.resultSetCurrentData(actual));
logger.info(resultSetCurrentData(actual));
}

for (int column = 1; column <= columns; column++) {
Expand Down Expand Up @@ -264,6 +268,10 @@ private static void doAssertResultSetData(ResultSet expected, ResultSet actual,
else if (type == Types.TIMESTAMP || type == Types.TIMESTAMP_WITH_TIMEZONE) {
assertEquals(msg, expected.getTimestamp(column), actual.getTimestamp(column));
}
// then date
else if (type == Types.DATE) {
assertEquals(msg, convertDateToSystemTimezone(expected.getDate(column)), actual.getDate(column));
}
// and floats/doubles
else if (type == Types.DOUBLE) {
assertEquals(msg, (double) expectedObject, (double) actualObject, lenientFloatingNumbers ? 1d : 0.0d);
Expand Down Expand Up @@ -301,14 +309,14 @@ else if (type == Types.VARCHAR && actualObject instanceof TemporalAmount) {
} catch (AssertionError ae) {
if (logger != null && actual.next()) {
logger.info("^^^ Assertion failure ^^^");
logger.info(JdbcTestUtils.resultSetCurrentData(actual));
logger.info(resultSetCurrentData(actual));
}
throw ae;
}

if (actual.next()) {
fail("Elasticsearch [" + actual + "] still has data after [" + count + "] entries:\n"
+ JdbcTestUtils.resultSetCurrentData(actual));
+ resultSetCurrentData(actual));
}
}

Expand All @@ -328,4 +336,9 @@ private static int typeOf(int columnType, boolean lenientDataType) {

return columnType;
}

// Used to convert the DATE read from CSV file to a java.sql.Date at the System's timezone (-Dtests.timezone=XXXX)
private static Date convertDateToSystemTimezone(Date date) {
return new Date(date.toLocalDate().atStartOfDay(UTC).toInstant().toEpochMilli());
}
}
17 changes: 9 additions & 8 deletions x-pack/plugin/sql/qa/src/main/resources/command.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,17 @@ CURRENT_DATE |SCALAR
CURRENT_TIME |SCALAR
CURRENT_TIMESTAMP|SCALAR
CURTIME |SCALAR
DATE_TRUNC |SCALAR
DAY |SCALAR
DAYNAME |SCALAR
DAYOFMONTH |SCALAR
DAYOFWEEK |SCALAR
DAYOFYEAR |SCALAR
DAY_NAME |SCALAR
DAY_OF_MONTH |SCALAR
DAY_OF_WEEK |SCALAR
DAY_OF_YEAR |SCALAR
DOM |SCALAR
DAYOFMONTH |SCALAR
DAYOFWEEK |SCALAR
DAYOFYEAR |SCALAR
DAY_NAME |SCALAR
DAY_OF_MONTH |SCALAR
DAY_OF_WEEK |SCALAR
DAY_OF_YEAR |SCALAR
DOM |SCALAR
DOW |SCALAR
DOY |SCALAR
HOUR |SCALAR
Expand Down
147 changes: 146 additions & 1 deletion x-pack/plugin/sql/qa/src/main/resources/datetime.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,151 @@ SELECT WEEK(birth_date) week, birth_date FROM test_emp WHERE WEEK(birth_date) >
2 |1953-01-07T00:00:00.000Z
;

selectDateTruncWithDateTime
schema::dt_hour:ts|dt_min:ts|dt_sec:ts|dt_millis:s|dt_micro:s|dt_nano:s
Copy link
Contributor Author

@matriv matriv Sep 8, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have to cast some columns to string as the .XXX msecs part is not properly read by the CSV infrastructure.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's worth opening another issue for this to fix it long term (either inside the CSV library and elsewhere).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done: #46511

SELECT DATE_TRUNC('hour', '2019-09-04T11:22:33.123Z'::datetime) as dt_hour, DATE_TRUNC('minute', '2019-09-04T11:22:33.123Z'::datetime) as dt_min,
DATE_TRUNC('seconds', '2019-09-04T11:22:33.123Z'::datetime) as dt_sec, DATE_TRUNC('ms', '2019-09-04T11:22:33.123Z'::datetime)::string as dt_millis,
DATE_TRUNC('mcs', '2019-09-04T11:22:33.123Z'::datetime)::string as dt_micro, DATE_TRUNC('nanoseconds', '2019-09-04T11:22:33.123Z'::datetime)::string as dt_nano;

dt_hour | dt_min | dt_sec | dt_millis | dt_micro | dt_nano
-------------------------+---------------------------+--------------------------+--------------------------+--------------------------+-------------------------
2019-09-04T11:00:00.000Z | 2019-09-04T11:22:00.000Z | 2019-09-04T11:22:33.000Z | 2019-09-04T11:22:33.123Z | 2019-09-04T11:22:33.123Z | 2019-09-04T11:22:33.123Z
;

selectDateTruncWithDate
schema::dt_mil:ts|dt_cent:ts|dt_dec:ts|dt_year:ts|dt_quarter:ts|dt_month:ts|dt_week:ts|dt_day:ts
SELECT DATE_TRUNC('millennia', '2019-09-04'::date) as dt_mil, DATE_TRUNC('century', '2019-09-04'::date) as dt_cent,
DATE_TRUNC('decades', '2019-09-04'::date) as dt_dec, DATE_TRUNC('year', '2019-09-04'::date) as dt_year,
DATE_TRUNC('quarter', '2019-09-04'::date) as dt_quarter, DATE_TRUNC('month', '2019-09-04'::date) as dt_month,
DATE_TRUNC('week', '2019-09-04'::date) as dt_week, DATE_TRUNC('day', '2019-09-04'::date) as dt_day;

dt_mil | dt_cent | dt_dec | dt_year | dt_quarter | dt_month | dt_week | dt_day
-------------------------+--------------------------+--------------------------+--------------------------+--------------------------+--------------------------+--------------------------+-------------------------
2000-01-01T00:00:00.000Z | 2000-01-01T00:00:00.000Z | 2010-01-01T00:00:00.000Z | 2019-01-01T00:00:00.000Z | 2019-07-01T00:00:00.000Z | 2019-09-01T00:00:00.000Z | 2019-09-02T00:00:00.000Z | 2019-09-04T00:00:00.000Z
;

selectDateTruncWithField
schema::emp_no:i|birth_date:ts|dt_mil:ts|dt_cent:ts|dt_dec:ts|dt_year:ts|dt_quarter:ts|dt_month:ts|dt_week:ts|dt_day:ts
SELECT emp_no, birth_date, DATE_TRUNC('millennium', birth_date) as dt_mil, DATE_TRUNC('centuries', birth_date) as dt_cent,
DATE_TRUNC('decades', birth_date) as dt_dec, DATE_TRUNC('year', birth_date) as dt_year, DATE_TRUNC('quarter', birth_date) as dt_quarter,
DATE_TRUNC('month', birth_date) as dt_month, DATE_TRUNC('week', birth_date) as dt_week, DATE_TRUNC('day', birth_date) as dt_day
FROM test_emp WHERE emp_no >= 10032 AND emp_no <= 10042 ORDER BY 1;

emp_no | birth_date | dt_mil | dt_cent | dt_dec | dt_year | dt_quarter | dt_month | dt_week | dt_day
--------+-------------------------+--------------------------+--------------------------+--------------------------+--------------------------+--------------------------+--------------------------+--------------------------+-------------------------
10032 |1960-08-09 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1960-07-01 00:00:00.000Z | 1960-08-01 00:00:00.000Z | 1960-08-08 00:00:00.000Z | 1960-08-09 00:00:00.000Z
10033 |1956-11-14 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1950-01-01 00:00:00.000Z | 1956-01-01 00:00:00.000Z | 1956-10-01 00:00:00.000Z | 1956-11-01 00:00:00.000Z | 1956-11-12 00:00:00.000Z | 1956-11-14 00:00:00.000Z
10034 |1962-12-29 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1962-01-01 00:00:00.000Z | 1962-10-01 00:00:00.000Z | 1962-12-01 00:00:00.000Z | 1962-12-24 00:00:00.000Z | 1962-12-29 00:00:00.000Z
10035 |1953-02-08 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1950-01-01 00:00:00.000Z | 1953-01-01 00:00:00.000Z | 1953-01-01 00:00:00.000Z | 1953-02-01 00:00:00.000Z | 1953-02-02 00:00:00.000Z | 1953-02-08 00:00:00.000Z
10036 |1959-08-10 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1950-01-01 00:00:00.000Z | 1959-01-01 00:00:00.000Z | 1959-07-01 00:00:00.000Z | 1959-08-01 00:00:00.000Z | 1959-08-10 00:00:00.000Z | 1959-08-10 00:00:00.000Z
10037 |1963-07-22 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1963-01-01 00:00:00.000Z | 1963-07-01 00:00:00.000Z | 1963-07-01 00:00:00.000Z | 1963-07-22 00:00:00.000Z | 1963-07-22 00:00:00.000Z
10038 |1960-07-20 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1960-07-01 00:00:00.000Z | 1960-07-01 00:00:00.000Z | 1960-07-18 00:00:00.000Z | 1960-07-20 00:00:00.000Z
10039 |1959-10-01 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1950-01-01 00:00:00.000Z | 1959-01-01 00:00:00.000Z | 1959-10-01 00:00:00.000Z | 1959-10-01 00:00:00.000Z | 1959-09-28 00:00:00.000Z | 1959-10-01 00:00:00.000Z
10040 | null | null | null | null | null | null | null | null | null
10041 | null | null | null | null | null | null | null | null | null
10042 | null | null | null | null | null | null | null | null | null
;

selectDateTruncWithNullTruncateField
SELECT DATE_TRUNC(null, birth_date) AS dt FROM test_emp LIMIT 5;

dt:ts
------
null
null
null
null
null
;

selectDateTruncWithScalars
SELECT birth_date, DATE_TRUNC(CAST(CHAR(109) AS VARCHAR), birth_date + INTERVAL 12 YEAR) AS dt FROM test_emp ORDER BY 1 DESC NULLS LAST LIMIT 5;

birth_date:ts | dt:ts
-------------------------+---------------------
1965-01-03 00:00:00.000Z | 1977-01-01 00:00:00.000Z
1964-10-18 00:00:00.000Z | 1976-10-01 00:00:00.000Z
1964-06-11 00:00:00.000Z | 1976-06-01 00:00:00.000Z
1964-06-02 00:00:00.000Z | 1976-06-01 00:00:00.000Z
1964-04-18 00:00:00.000Z | 1976-04-01 00:00:00.000Z
;

selectDateTruncWithTruncArgFromField
SELECT DATE_TRUNC(CONCAT(gender, 'illennium'), birth_date) AS dt FROM test_emp WHERE gender='M' ORDER BY 1 DESC LIMIT 2;

dt:ts
------------------------
0999-12-27 00:00:00.000Z
0999-12-27 00:00:00.000Z
;

selectDateTruncWithComplexExpressions
SELECT gender, birth_date, DATE_TRUNC(CASE WHEN gender = 'M' THEN CONCAT(gender, 'onths') WHEN gender = 'F' THEN 'decade' ELSE 'quarter' END,
birth_date + INTERVAL 10 month) AS dt FROM test_emp WHERE dt > '1954-07-01'::date ORDER BY emp_no LIMIT 10;

gender:s | birth_date:ts | dt:ts
------------+--------------------------+---------------------
F | 1964-06-02 00:00:00.000Z | 1960-01-01 00:00:00.000Z
M | 1959-12-03 00:00:00.000Z | 1960-10-01 00:00:00.000Z
M | 1954-05-01 00:00:00.000Z | 1955-03-01 00:00:00.000Z
M | 1955-01-21 00:00:00.000Z | 1955-11-01 00:00:00.000Z
M | 1958-02-19 00:00:00.000Z | 1958-12-01 00:00:00.000Z
null | 1963-06-01 00:00:00.000Z | 1964-04-01 00:00:00.000Z
null | 1960-10-04 00:00:00.000Z | 1961-07-01 00:00:00.000Z
null | 1963-06-07 00:00:00.000Z | 1964-04-01 00:00:00.000Z
null | 1956-02-12 00:00:00.000Z | 1956-10-01 00:00:00.000Z
null | 1959-08-19 00:00:00.000Z | 1960-04-01 00:00:00.000Z
;

dateTruncOrderBy
schema::emp_no:i|hire_date:ts|dt:ts
SELECT emp_no, hire_date, DATE_TRUNC('quarter', hire_date) as dt FROM test_emp ORDER BY dt NULLS LAST, emp_no LIMIT 5;

emp_no | hire_date | dt
--------+--------------------------+-------------------------
10009 | 1985-02-18 00:00:00.000Z | 1985-01-01 00:00:00.000Z
10048 | 1985-02-24 00:00:00.000Z | 1985-01-01 00:00:00.000Z
10098 | 1985-05-13 00:00:00.000Z | 1985-04-01 00:00:00.000Z
10061 | 1985-09-17 00:00:00.000Z | 1985-07-01 00:00:00.000Z
10076 | 1985-07-09 00:00:00.000Z | 1985-07-01 00:00:00.000Z
;

dateTruncFilter
schema::emp_no:i|hire_date:ts|dt:ts
SELECT emp_no, hire_date, DATE_TRUNC('quarter', hire_date) as dt FROM test_emp WHERE DATE_TRUNC('quarter', hire_date) > '1994-07-01T00:00:00.000Z'::timestamp ORDER BY emp_no;

emp_no | hire_date | dt
--------+--------------------------+-------------------------
10016 | 1995-01-27 00:00:00.000Z | 1995-01-01 00:00:00.000Z
10019 | 1999-04-30 00:00:00.000Z | 1999-04-01 00:00:00.000Z
10022 | 1995-08-22 00:00:00.000Z | 1995-07-01 00:00:00.000Z
10024 | 1997-05-19 00:00:00.000Z | 1997-04-01 00:00:00.000Z
10026 | 1995-03-20 00:00:00.000Z | 1995-01-01 00:00:00.000Z
10054 | 1995-03-13 00:00:00.000Z | 1995-01-01 00:00:00.000Z
10084 | 1995-12-15 00:00:00.000Z | 1995-10-01 00:00:00.000Z
10093 | 1996-11-05 00:00:00.000Z | 1996-10-01 00:00:00.000Z
;

dateTruncGroupBy
schema::count:l|dt:ts
SELECT count(*) as count, DATE_TRUNC('decade', hire_date) dt FROM test_emp GROUP BY dt ORDER BY 2;

count | dt
--------+-------------------------
59 | 1980-01-01 00:00:00.000Z
41 | 1990-01-01 00:00:00.000Z
;

dateTruncHaving
schema::gender:s|dt:ts
SELECT gender, max(hire_date) dt FROM test_emp GROUP BY gender HAVING DATE_TRUNC('year', max(hire_date)) >= '1997-01-01T00:00:00.000Z'::timestamp ORDER BY 1;

gender | dt
--------+-------------------------
null | 1999-04-30 00:00:00.000Z
F | 1997-05-19 00:00:00.000Z
;

//
// Aggregate
//
Expand Down Expand Up @@ -404,4 +549,4 @@ SELECT CAST (CAST (birth_date AS VARCHAR) AS TIMESTAMP) a FROM test_emp WHERE YE
a:ts
---------------
1965-01-03T00:00:00Z
;
;
Loading