Skip to content

Commit

Permalink
SQL: Implement DATE_TRUNC function (#46473)
Browse files Browse the repository at this point in the history
DATE_TRUNC(<truncate field>, <date/datetime>) is a function that allows
the user to truncate a timestamp to the specified field by zeroing out
the rest of the fields. The function is implemented according to the
spec from PostgreSQL: https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-TRUNC

Closes: #46319
  • Loading branch information
matriv authored Sep 11, 2019
1 parent e2977dc commit b37e967
Show file tree
Hide file tree
Showing 22 changed files with 1,104 additions and 30 deletions.
75 changes: 74 additions & 1 deletion docs/reference/sql/functions/date-time.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
[[sql-functions-datetime-interval]]
==== Intervals

A common requirement when dealing with date/time in general revolves around
A common requirement when dealing with date/time in general revolves around
the notion of `interval`, a topic that is worth exploring in the context of {es} and {es-sql}.

{es} has comprehensive support for <<date-math, date math>> both inside <<date-math-index-names, index names>> and <<mapping-date-format, queries>>.
Expand Down Expand Up @@ -248,6 +248,79 @@ include-tagged::{sql-specs}/docs/docs.csv-spec[filterNow]
Currently, using a _precision_ greater than 3 doesn't make any difference to the output of the
function as the maximum number of second fractional digits returned is 3 (milliseconds).

[[sql-functions-datetime-trunc]]
==== `DATE_TRUNC`

.Synopsis:
[source, sql]
--------------------------------------------------
DATE_TRUNC(
string_exp, <1>
datetime_exp) <2>
--------------------------------------------------

*Input*:

<1> string expression denoting the unit to which the date/datetime should be truncated to
<2> date/datetime expression

*Output*: datetime

.Description:

Truncate the date/datetime to the specified unit by setting all fields that are less significant than the specified
one to zero (or one, for day, day of week and month).

[cols="^,^"]
|===
2+h|Datetime truncation units

s|unit
s|abbreviations

| millennium | millennia
| century | centuries
| decade | decades
| year | years, yy, yyyy
| quarter | quarters, qq, q
| month | months, mm, m
| week | weeks, wk, ww
| day | days, dd, d
| hour | hours, hh
| minute | minutes, mi, n
| second | seconds, ss, s
| millisecond | milliseconds, ms
| microsecond | microseconds, mcs
| nanosecond | nanoseconds, ns
|===



[source, sql]
--------------------------------------------------
include-tagged::{sql-specs}/docs/docs.csv-spec[truncateDateTimeMillennium]
--------------------------------------------------

[source, sql]
--------------------------------------------------
include-tagged::{sql-specs}/docs/docs.csv-spec[truncateDateTimeWeek]
--------------------------------------------------

[source, sql]
--------------------------------------------------
include-tagged::{sql-specs}/docs/docs.csv-spec[truncateDateTimeMinutes]
--------------------------------------------------

[source, sql]
--------------------------------------------------
include-tagged::{sql-specs}/docs/docs.csv-spec[truncateDateDecades]
--------------------------------------------------

[source, sql]
--------------------------------------------------
include-tagged::{sql-specs}/docs/docs.csv-spec[truncateDateQuarter]
--------------------------------------------------

[[sql-functions-datetime-day]]
==== `DAY_OF_MONTH/DOM/DAY`

Expand Down
1 change: 1 addition & 0 deletions docs/reference/sql/functions/index.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
** <<sql-functions-current-date>>
** <<sql-functions-current-time>>
** <<sql-functions-current-timestamp>>
** <<sql-functions-datetime-trunc>>
** <<sql-functions-datetime-day>>
** <<sql-functions-datetime-dow>>
** <<sql-functions-datetime-doy>>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.relique.jdbc.csv.CsvResultSet;

import java.io.IOException;
import java.sql.Date;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
Expand All @@ -37,6 +38,9 @@
import static java.sql.Types.REAL;
import static java.sql.Types.SMALLINT;
import static java.sql.Types.TINYINT;
import static java.time.ZoneOffset.UTC;
import static org.elasticsearch.xpack.sql.qa.jdbc.JdbcTestUtils.logResultSetMetadata;
import static org.elasticsearch.xpack.sql.qa.jdbc.JdbcTestUtils.resultSetCurrentData;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.instanceOf;
import static org.junit.Assert.assertEquals;
Expand Down Expand Up @@ -107,7 +111,7 @@ public static void assertResultSetMetadata(ResultSet expected, ResultSet actual,
ResultSetMetaData actualMeta = actual.getMetaData();

if (logger != null) {
JdbcTestUtils.logResultSetMetadata(actual, logger);
logResultSetMetadata(actual, logger);
}

if (expectedMeta.getColumnCount() != actualMeta.getColumnCount()) {
Expand Down Expand Up @@ -210,7 +214,7 @@ private static void doAssertResultSetData(ResultSet expected, ResultSet actual,
assertTrue("Expected more data but no more entries found after [" + count + "]", actual.next());

if (logger != null) {
logger.info(JdbcTestUtils.resultSetCurrentData(actual));
logger.info(resultSetCurrentData(actual));
}

for (int column = 1; column <= columns; column++) {
Expand Down Expand Up @@ -264,6 +268,10 @@ private static void doAssertResultSetData(ResultSet expected, ResultSet actual,
else if (type == Types.TIMESTAMP || type == Types.TIMESTAMP_WITH_TIMEZONE) {
assertEquals(msg, expected.getTimestamp(column), actual.getTimestamp(column));
}
// then date
else if (type == Types.DATE) {
assertEquals(msg, convertDateToSystemTimezone(expected.getDate(column)), actual.getDate(column));
}
// and floats/doubles
else if (type == Types.DOUBLE) {
assertEquals(msg, (double) expectedObject, (double) actualObject, lenientFloatingNumbers ? 1d : 0.0d);
Expand Down Expand Up @@ -301,14 +309,14 @@ else if (type == Types.VARCHAR && actualObject instanceof TemporalAmount) {
} catch (AssertionError ae) {
if (logger != null && actual.next()) {
logger.info("^^^ Assertion failure ^^^");
logger.info(JdbcTestUtils.resultSetCurrentData(actual));
logger.info(resultSetCurrentData(actual));
}
throw ae;
}

if (actual.next()) {
fail("Elasticsearch [" + actual + "] still has data after [" + count + "] entries:\n"
+ JdbcTestUtils.resultSetCurrentData(actual));
+ resultSetCurrentData(actual));
}
}

Expand All @@ -328,4 +336,9 @@ private static int typeOf(int columnType, boolean lenientDataType) {

return columnType;
}

// Used to convert the DATE read from CSV file to a java.sql.Date at the System's timezone (-Dtests.timezone=XXXX)
private static Date convertDateToSystemTimezone(Date date) {
return new Date(date.toLocalDate().atStartOfDay(UTC).toInstant().toEpochMilli());
}
}
17 changes: 9 additions & 8 deletions x-pack/plugin/sql/qa/src/main/resources/command.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,17 @@ CURRENT_DATE |SCALAR
CURRENT_TIME |SCALAR
CURRENT_TIMESTAMP|SCALAR
CURTIME |SCALAR
DATE_TRUNC |SCALAR
DAY |SCALAR
DAYNAME |SCALAR
DAYOFMONTH |SCALAR
DAYOFWEEK |SCALAR
DAYOFYEAR |SCALAR
DAY_NAME |SCALAR
DAY_OF_MONTH |SCALAR
DAY_OF_WEEK |SCALAR
DAY_OF_YEAR |SCALAR
DOM |SCALAR
DAYOFMONTH |SCALAR
DAYOFWEEK |SCALAR
DAYOFYEAR |SCALAR
DAY_NAME |SCALAR
DAY_OF_MONTH |SCALAR
DAY_OF_WEEK |SCALAR
DAY_OF_YEAR |SCALAR
DOM |SCALAR
DOW |SCALAR
DOY |SCALAR
HOUR |SCALAR
Expand Down
147 changes: 146 additions & 1 deletion x-pack/plugin/sql/qa/src/main/resources/datetime.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,151 @@ SELECT WEEK(birth_date) week, birth_date FROM test_emp WHERE WEEK(birth_date) >
2 |1953-01-07T00:00:00.000Z
;

selectDateTruncWithDateTime
schema::dt_hour:ts|dt_min:ts|dt_sec:ts|dt_millis:s|dt_micro:s|dt_nano:s
SELECT DATE_TRUNC('hour', '2019-09-04T11:22:33.123Z'::datetime) as dt_hour, DATE_TRUNC('minute', '2019-09-04T11:22:33.123Z'::datetime) as dt_min,
DATE_TRUNC('seconds', '2019-09-04T11:22:33.123Z'::datetime) as dt_sec, DATE_TRUNC('ms', '2019-09-04T11:22:33.123Z'::datetime)::string as dt_millis,
DATE_TRUNC('mcs', '2019-09-04T11:22:33.123Z'::datetime)::string as dt_micro, DATE_TRUNC('nanoseconds', '2019-09-04T11:22:33.123Z'::datetime)::string as dt_nano;

dt_hour | dt_min | dt_sec | dt_millis | dt_micro | dt_nano
-------------------------+---------------------------+--------------------------+--------------------------+--------------------------+-------------------------
2019-09-04T11:00:00.000Z | 2019-09-04T11:22:00.000Z | 2019-09-04T11:22:33.000Z | 2019-09-04T11:22:33.123Z | 2019-09-04T11:22:33.123Z | 2019-09-04T11:22:33.123Z
;

selectDateTruncWithDate
schema::dt_mil:ts|dt_cent:ts|dt_dec:ts|dt_year:ts|dt_quarter:ts|dt_month:ts|dt_week:ts|dt_day:ts
SELECT DATE_TRUNC('millennia', '2019-09-04'::date) as dt_mil, DATE_TRUNC('century', '2019-09-04'::date) as dt_cent,
DATE_TRUNC('decades', '2019-09-04'::date) as dt_dec, DATE_TRUNC('year', '2019-09-04'::date) as dt_year,
DATE_TRUNC('quarter', '2019-09-04'::date) as dt_quarter, DATE_TRUNC('month', '2019-09-04'::date) as dt_month,
DATE_TRUNC('week', '2019-09-04'::date) as dt_week, DATE_TRUNC('day', '2019-09-04'::date) as dt_day;

dt_mil | dt_cent | dt_dec | dt_year | dt_quarter | dt_month | dt_week | dt_day
-------------------------+--------------------------+--------------------------+--------------------------+--------------------------+--------------------------+--------------------------+-------------------------
2000-01-01T00:00:00.000Z | 2000-01-01T00:00:00.000Z | 2010-01-01T00:00:00.000Z | 2019-01-01T00:00:00.000Z | 2019-07-01T00:00:00.000Z | 2019-09-01T00:00:00.000Z | 2019-09-02T00:00:00.000Z | 2019-09-04T00:00:00.000Z
;

selectDateTruncWithField
schema::emp_no:i|birth_date:ts|dt_mil:ts|dt_cent:ts|dt_dec:ts|dt_year:ts|dt_quarter:ts|dt_month:ts|dt_week:ts|dt_day:ts
SELECT emp_no, birth_date, DATE_TRUNC('millennium', birth_date) as dt_mil, DATE_TRUNC('centuries', birth_date) as dt_cent,
DATE_TRUNC('decades', birth_date) as dt_dec, DATE_TRUNC('year', birth_date) as dt_year, DATE_TRUNC('quarter', birth_date) as dt_quarter,
DATE_TRUNC('month', birth_date) as dt_month, DATE_TRUNC('week', birth_date) as dt_week, DATE_TRUNC('day', birth_date) as dt_day
FROM test_emp WHERE emp_no >= 10032 AND emp_no <= 10042 ORDER BY 1;

emp_no | birth_date | dt_mil | dt_cent | dt_dec | dt_year | dt_quarter | dt_month | dt_week | dt_day
--------+-------------------------+--------------------------+--------------------------+--------------------------+--------------------------+--------------------------+--------------------------+--------------------------+-------------------------
10032 |1960-08-09 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1960-07-01 00:00:00.000Z | 1960-08-01 00:00:00.000Z | 1960-08-08 00:00:00.000Z | 1960-08-09 00:00:00.000Z
10033 |1956-11-14 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1950-01-01 00:00:00.000Z | 1956-01-01 00:00:00.000Z | 1956-10-01 00:00:00.000Z | 1956-11-01 00:00:00.000Z | 1956-11-12 00:00:00.000Z | 1956-11-14 00:00:00.000Z
10034 |1962-12-29 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1962-01-01 00:00:00.000Z | 1962-10-01 00:00:00.000Z | 1962-12-01 00:00:00.000Z | 1962-12-24 00:00:00.000Z | 1962-12-29 00:00:00.000Z
10035 |1953-02-08 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1950-01-01 00:00:00.000Z | 1953-01-01 00:00:00.000Z | 1953-01-01 00:00:00.000Z | 1953-02-01 00:00:00.000Z | 1953-02-02 00:00:00.000Z | 1953-02-08 00:00:00.000Z
10036 |1959-08-10 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1950-01-01 00:00:00.000Z | 1959-01-01 00:00:00.000Z | 1959-07-01 00:00:00.000Z | 1959-08-01 00:00:00.000Z | 1959-08-10 00:00:00.000Z | 1959-08-10 00:00:00.000Z
10037 |1963-07-22 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1963-01-01 00:00:00.000Z | 1963-07-01 00:00:00.000Z | 1963-07-01 00:00:00.000Z | 1963-07-22 00:00:00.000Z | 1963-07-22 00:00:00.000Z
10038 |1960-07-20 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1960-07-01 00:00:00.000Z | 1960-07-01 00:00:00.000Z | 1960-07-18 00:00:00.000Z | 1960-07-20 00:00:00.000Z
10039 |1959-10-01 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1950-01-01 00:00:00.000Z | 1959-01-01 00:00:00.000Z | 1959-10-01 00:00:00.000Z | 1959-10-01 00:00:00.000Z | 1959-09-28 00:00:00.000Z | 1959-10-01 00:00:00.000Z
10040 | null | null | null | null | null | null | null | null | null
10041 | null | null | null | null | null | null | null | null | null
10042 | null | null | null | null | null | null | null | null | null
;

selectDateTruncWithNullTruncateField
SELECT DATE_TRUNC(null, birth_date) AS dt FROM test_emp LIMIT 5;

dt:ts
------
null
null
null
null
null
;

selectDateTruncWithScalars
SELECT birth_date, DATE_TRUNC(CAST(CHAR(109) AS VARCHAR), birth_date + INTERVAL 12 YEAR) AS dt FROM test_emp ORDER BY 1 DESC NULLS LAST LIMIT 5;

birth_date:ts | dt:ts
-------------------------+---------------------
1965-01-03 00:00:00.000Z | 1977-01-01 00:00:00.000Z
1964-10-18 00:00:00.000Z | 1976-10-01 00:00:00.000Z
1964-06-11 00:00:00.000Z | 1976-06-01 00:00:00.000Z
1964-06-02 00:00:00.000Z | 1976-06-01 00:00:00.000Z
1964-04-18 00:00:00.000Z | 1976-04-01 00:00:00.000Z
;

selectDateTruncWithTruncArgFromField
SELECT DATE_TRUNC(CONCAT(gender, 'illennium'), birth_date) AS dt FROM test_emp WHERE gender='M' ORDER BY 1 DESC LIMIT 2;

dt:ts
------------------------
0999-12-27 00:00:00.000Z
0999-12-27 00:00:00.000Z
;

selectDateTruncWithComplexExpressions
SELECT gender, birth_date, DATE_TRUNC(CASE WHEN gender = 'M' THEN CONCAT(gender, 'onths') WHEN gender = 'F' THEN 'decade' ELSE 'quarter' END,
birth_date + INTERVAL 10 month) AS dt FROM test_emp WHERE dt > '1954-07-01'::date ORDER BY emp_no LIMIT 10;

gender:s | birth_date:ts | dt:ts
------------+--------------------------+---------------------
F | 1964-06-02 00:00:00.000Z | 1960-01-01 00:00:00.000Z
M | 1959-12-03 00:00:00.000Z | 1960-10-01 00:00:00.000Z
M | 1954-05-01 00:00:00.000Z | 1955-03-01 00:00:00.000Z
M | 1955-01-21 00:00:00.000Z | 1955-11-01 00:00:00.000Z
M | 1958-02-19 00:00:00.000Z | 1958-12-01 00:00:00.000Z
null | 1963-06-01 00:00:00.000Z | 1964-04-01 00:00:00.000Z
null | 1960-10-04 00:00:00.000Z | 1961-07-01 00:00:00.000Z
null | 1963-06-07 00:00:00.000Z | 1964-04-01 00:00:00.000Z
null | 1956-02-12 00:00:00.000Z | 1956-10-01 00:00:00.000Z
null | 1959-08-19 00:00:00.000Z | 1960-04-01 00:00:00.000Z
;

dateTruncOrderBy
schema::emp_no:i|hire_date:ts|dt:ts
SELECT emp_no, hire_date, DATE_TRUNC('quarter', hire_date) as dt FROM test_emp ORDER BY dt NULLS LAST, emp_no LIMIT 5;

emp_no | hire_date | dt
--------+--------------------------+-------------------------
10009 | 1985-02-18 00:00:00.000Z | 1985-01-01 00:00:00.000Z
10048 | 1985-02-24 00:00:00.000Z | 1985-01-01 00:00:00.000Z
10098 | 1985-05-13 00:00:00.000Z | 1985-04-01 00:00:00.000Z
10061 | 1985-09-17 00:00:00.000Z | 1985-07-01 00:00:00.000Z
10076 | 1985-07-09 00:00:00.000Z | 1985-07-01 00:00:00.000Z
;

dateTruncFilter
schema::emp_no:i|hire_date:ts|dt:ts
SELECT emp_no, hire_date, DATE_TRUNC('quarter', hire_date) as dt FROM test_emp WHERE DATE_TRUNC('quarter', hire_date) > '1994-07-01T00:00:00.000Z'::timestamp ORDER BY emp_no;

emp_no | hire_date | dt
--------+--------------------------+-------------------------
10016 | 1995-01-27 00:00:00.000Z | 1995-01-01 00:00:00.000Z
10019 | 1999-04-30 00:00:00.000Z | 1999-04-01 00:00:00.000Z
10022 | 1995-08-22 00:00:00.000Z | 1995-07-01 00:00:00.000Z
10024 | 1997-05-19 00:00:00.000Z | 1997-04-01 00:00:00.000Z
10026 | 1995-03-20 00:00:00.000Z | 1995-01-01 00:00:00.000Z
10054 | 1995-03-13 00:00:00.000Z | 1995-01-01 00:00:00.000Z
10084 | 1995-12-15 00:00:00.000Z | 1995-10-01 00:00:00.000Z
10093 | 1996-11-05 00:00:00.000Z | 1996-10-01 00:00:00.000Z
;

dateTruncGroupBy
schema::count:l|dt:ts
SELECT count(*) as count, DATE_TRUNC('decade', hire_date) dt FROM test_emp GROUP BY dt ORDER BY 2;

count | dt
--------+-------------------------
59 | 1980-01-01 00:00:00.000Z
41 | 1990-01-01 00:00:00.000Z
;

dateTruncHaving
schema::gender:s|dt:ts
SELECT gender, max(hire_date) dt FROM test_emp GROUP BY gender HAVING DATE_TRUNC('year', max(hire_date)) >= '1997-01-01T00:00:00.000Z'::timestamp ORDER BY 1;

gender | dt
--------+-------------------------
null | 1999-04-30 00:00:00.000Z
F | 1997-05-19 00:00:00.000Z
;

//
// Aggregate
//
Expand Down Expand Up @@ -404,4 +549,4 @@ SELECT CAST (CAST (birth_date AS VARCHAR) AS TIMESTAMP) a FROM test_emp WHERE YE
a:ts
---------------
1965-01-03T00:00:00Z
;
;
Loading

0 comments on commit b37e967

Please sign in to comment.