Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Datetime aggregation fixes. #1061

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
Expand Down Expand Up @@ -69,7 +68,7 @@ public LocalDateTime datetimeValue() {

@Override
public Instant timestampValue() {
return ZonedDateTime.of(date, timeValue(), ZoneId.systemDefault()).toInstant();
return ZonedDateTime.of(date, timeValue(), ExprTimestampValue.ZONE).toInstant();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
Expand Down Expand Up @@ -71,7 +70,7 @@ public LocalTime timeValue() {

@Override
public Instant timestampValue() {
return ZonedDateTime.of(datetime, ZoneId.of("UTC")).toInstant();
return ZonedDateTime.of(datetime, ExprTimestampValue.ZONE).toInstant();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class ExprTimestampValue extends AbstractExprValue {
/**
* todo. only support UTC now.
*/
private static final ZoneId ZONE = ZoneId.of("UTC");
public static final ZoneId ZONE = ZoneId.of("UTC");

private final Instant timestamp;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
* SPDX-License-Identifier: Apache-2.0
*/


package org.opensearch.sql.data.model;

import java.time.temporal.TemporalAmount;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ private static DefaultFunctionResolver avg() {
new ImmutableMap.Builder<FunctionSignature, FunctionBuilder>()
.put(new FunctionSignature(functionName, Collections.singletonList(DOUBLE)),
(functionProperties, arguments) -> new AvgAggregator(arguments, DOUBLE))
.put(new FunctionSignature(functionName, Collections.singletonList(DATE)),
(functionProperties, arguments) -> new AvgAggregator(arguments, DATE))
.put(new FunctionSignature(functionName, Collections.singletonList(DATETIME)),
(functionProperties, arguments) -> new AvgAggregator(arguments, DATETIME))
.put(new FunctionSignature(functionName, Collections.singletonList(TIME)),
(functionProperties, arguments) -> new AvgAggregator(arguments, TIME))
.put(new FunctionSignature(functionName, Collections.singletonList(TIMESTAMP)),
(functionProperties, arguments) -> new AvgAggregator(arguments, TIMESTAMP))
.build()
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,23 @@

package org.opensearch.sql.expression.aggregation;

import static java.time.temporal.ChronoUnit.MILLIS;
import static org.opensearch.sql.utils.ExpressionUtils.format;

import java.time.Instant;
import java.time.LocalTime;
import java.util.List;
import java.util.Locale;
import org.opensearch.sql.data.model.ExprDateValue;
import org.opensearch.sql.data.model.ExprDatetimeValue;
import org.opensearch.sql.data.model.ExprDoubleValue;
import org.opensearch.sql.data.model.ExprIntegerValue;
import org.opensearch.sql.data.model.ExprNullValue;
import org.opensearch.sql.data.model.ExprTimeValue;
import org.opensearch.sql.data.model.ExprTimestampValue;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.expression.DSL;
import org.opensearch.sql.expression.Expression;
import org.opensearch.sql.expression.function.BuiltinFunctionName;

Expand All @@ -23,20 +32,39 @@
*/
public class AvgAggregator extends Aggregator<AvgAggregator.AvgState> {

/**
* To process by different ways different data types, we need to store the type.
* Input data has the same type as the result.
*/
private final ExprCoreType dataType;

public AvgAggregator(List<Expression> arguments, ExprCoreType returnType) {
super(BuiltinFunctionName.AVG.getName(), arguments, returnType);
dataType = returnType;
}

@Override
public AvgState create() {
return new AvgState();
switch (dataType) {
case DATE:
Yury-Fridlyand marked this conversation as resolved.
Show resolved Hide resolved
return new DateAvgState();
case DATETIME:
return new DateTimeAvgState();
case TIMESTAMP:
return new TimestampAvgState();
case TIME:
return new TimeAvgState();
case DOUBLE:
return new DoubleAvgState();
default: //unreachable code - we don't expose signatures for unsupported types
throw new IllegalArgumentException(
String.format("avg aggregation over %s type is not supported", dataType));
}
}

@Override
protected AvgState iterate(ExprValue value, AvgState state) {
state.count++;
state.total += ExprValueUtils.getDoubleValue(value);
return state;
return state.iterate(value);
}

@Override
Expand All @@ -47,18 +75,117 @@ public String toString() {
/**
* Average State.
*/
protected static class AvgState implements AggregationState {
private int count;
private double total;
protected abstract static class AvgState implements AggregationState {
protected ExprValue count;
protected ExprValue total;

AvgState() {
this.count = 0;
this.total = 0d;
this.count = new ExprIntegerValue(0);
this.total = new ExprDoubleValue(0D);
}

@Override
public abstract ExprValue result();

protected AvgState iterate(ExprValue value) {
count = DSL.add(DSL.literal(count), DSL.literal(1)).valueOf();
return this;
}
}

protected static class DoubleAvgState extends AvgState {
@Override
public ExprValue result() {
if (0 == count.integerValue()) {
return ExprNullValue.of();
}
return DSL.divide(DSL.literal(total), DSL.literal(count)).valueOf();
}

@Override
protected AvgState iterate(ExprValue value) {
total = DSL.add(DSL.literal(total), DSL.literal(value)).valueOf();
return super.iterate(value);
}
}

protected static class DateAvgState extends AvgState {
@Override
public ExprValue result() {
if (0 == count.integerValue()) {
return ExprNullValue.of();
}

return new ExprDateValue(
new ExprTimestampValue(Instant.ofEpochMilli(
DSL.divide(DSL.literal(total), DSL.literal(count)).valueOf().longValue()))
.dateValue());
}

@Override
protected AvgState iterate(ExprValue value) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it possible to reuse DSL.adddate()?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but DateAvgState will be less clear and readable:

  protected static class DateAvgState extends AvgState {

    public DateAvgState() {
      this.count = new ExprIntegerValue(0);
      this.total = new ExprDateValue(LocalDate.EPOCH);
    }

    @Override
    public ExprValue result() {
      if (0 == count.integerValue()) {
        return ExprNullValue.of();
      }

      return DSL.adddate(DSL.literal(new ExprDateValue(LocalDate.EPOCH)), 
          DSL.literal(DSL.divide(DSL.literal(DAYS.between(LocalDate.EPOCH, total.dateValue())),
              DSL.literal(count)).valueOf().longValue())).valueOf();
    }

    @Override
    protected AvgState iterate(ExprValue value) {
      total = DSL.adddate(DSL.literal(total),
          DSL.literal(DAYS.between(LocalDate.EPOCH, value.dateValue())))
          .valueOf();

      return super.iterate(value);
    }
  }

total = DSL.add(DSL.literal(total), DSL.literal(value.timestampValue().toEpochMilli()))
.valueOf();
return super.iterate(value);
}
}

protected static class DateTimeAvgState extends AvgState {
@Override
public ExprValue result() {
if (0 == count.integerValue()) {
return ExprNullValue.of();
}

return new ExprDatetimeValue(
new ExprTimestampValue(Instant.ofEpochMilli(
DSL.divide(DSL.literal(total), DSL.literal(count)).valueOf().longValue()))
.datetimeValue());
}

@Override
protected AvgState iterate(ExprValue value) {
total = DSL.add(DSL.literal(total), DSL.literal(value.timestampValue().toEpochMilli()))
.valueOf();
return super.iterate(value);
}
}

protected static class TimestampAvgState extends AvgState {
@Override
public ExprValue result() {
if (0 == count.integerValue()) {
return ExprNullValue.of();
}

return new ExprTimestampValue(Instant.ofEpochMilli(
DSL.divide(DSL.literal(total), DSL.literal(count)).valueOf().longValue()));
}

@Override
protected AvgState iterate(ExprValue value) {
total = DSL.add(DSL.literal(total), DSL.literal(value.timestampValue().toEpochMilli()))
.valueOf();
return super.iterate(value);
}
}

protected static class TimeAvgState extends AvgState {
@Override
public ExprValue result() {
return count == 0 ? ExprNullValue.of() : ExprValueUtils.doubleValue(total / count);
if (0 == count.integerValue()) {
return ExprNullValue.of();
}

return new ExprTimeValue(LocalTime.MIN.plus(
DSL.divide(DSL.literal(total), DSL.literal(count)).valueOf().longValue(), MILLIS));
}

@Override
protected AvgState iterate(ExprValue value) {
total = DSL.add(DSL.literal(total),
DSL.literal(MILLIS.between(LocalTime.MIN, value.timeValue()))).valueOf();
return super.iterate(value);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import org.junit.jupiter.api.Test;
import org.opensearch.sql.exception.ExpressionEvaluationException;
Expand Down Expand Up @@ -43,7 +42,7 @@ public void timestampValueInterfaceTest() {

assertEquals(TIMESTAMP, timestampValue.type());
assertEquals(ZonedDateTime.of(LocalDateTime.parse("2020-07-07T01:01:01"),
ZoneId.of("UTC")).toInstant(), timestampValue.timestampValue());
ExprTimestampValue.ZONE).toInstant(), timestampValue.timestampValue());
assertEquals("2020-07-07 01:01:01", timestampValue.value());
assertEquals("TIMESTAMP '2020-07-07 01:01:01'", timestampValue.toString());
assertEquals(LocalDate.parse("2020-07-07"), timestampValue.dateValue());
Expand All @@ -61,7 +60,7 @@ public void dateValueInterfaceTest() {
assertEquals(LocalTime.parse("00:00:00"), dateValue.timeValue());
assertEquals(LocalDateTime.parse("2012-07-07T00:00:00"), dateValue.datetimeValue());
assertEquals(ZonedDateTime.of(LocalDateTime.parse("2012-07-07T00:00:00"),
ZoneId.systemDefault()).toInstant(), dateValue.timestampValue());
ExprTimestampValue.ZONE).toInstant(), dateValue.timestampValue());
ExpressionEvaluationException exception =
assertThrows(ExpressionEvaluationException.class, () -> integerValue(1).dateValue());
assertEquals("invalid to get dateValue from value of type INTEGER",
Expand All @@ -76,7 +75,7 @@ public void datetimeValueInterfaceTest() {
assertEquals(LocalDate.parse("2020-08-17"), datetimeValue.dateValue());
assertEquals(LocalTime.parse("19:44:00"), datetimeValue.timeValue());
assertEquals(ZonedDateTime.of(LocalDateTime.parse("2020-08-17T19:44:00"),
ZoneId.of("UTC")).toInstant(), datetimeValue.timestampValue());
ExprTimestampValue.ZONE).toInstant(), datetimeValue.timestampValue());
assertEquals("DATETIME '2020-08-17 19:44:00'", datetimeValue.toString());
assertThrows(ExpressionEvaluationException.class, () -> integerValue(1).datetimeValue(),
"invalid to get datetimeValue from value of type INTEGER");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,18 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.opensearch.sql.data.type.ExprCoreType.DATE;
import static org.opensearch.sql.data.type.ExprCoreType.DATETIME;
import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE;
import static org.opensearch.sql.data.type.ExprCoreType.INTEGER;
import static org.opensearch.sql.data.type.ExprCoreType.STRING;
import static org.opensearch.sql.data.type.ExprCoreType.TIME;
import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP;

import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.util.List;
import org.junit.jupiter.api.Test;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.model.ExprValueUtils;
Expand Down Expand Up @@ -62,13 +71,76 @@ public void avg_with_all_missing_or_null() {
assertTrue(result.isNull());
}

@Test
public void avg_numeric_no_values() {
ExprValue result = aggregation(DSL.avg(DSL.ref("dummy", INTEGER)), List.of());
assertTrue(result.isNull());
}

@Test
public void avg_date_no_values() {
ExprValue result = aggregation(DSL.avg(DSL.ref("dummy", DATE)), List.of());
assertTrue(result.isNull());
}

@Test
public void avg_datetime_no_values() {
ExprValue result = aggregation(DSL.avg(DSL.ref("dummy", DATETIME)), List.of());
assertTrue(result.isNull());
}

@Test
public void avg_timestamp_no_values() {
ExprValue result = aggregation(DSL.avg(DSL.ref("dummy", TIMESTAMP)), List.of());
assertTrue(result.isNull());
}

@Test
public void avg_time_no_values() {
ExprValue result = aggregation(DSL.avg(DSL.ref("dummy", TIME)), List.of());
assertTrue(result.isNull());
}

@Test
public void avg_date() {
ExprValue result = aggregation(DSL.avg(DSL.date(DSL.ref("date_value", STRING))), tuples);
assertEquals(LocalDate.of(2007, 7, 2), result.dateValue());
}

@Test
public void avg_datetime() {
var result = aggregation(DSL.avg(DSL.datetime(DSL.ref("datetime_value", STRING))), tuples);
assertEquals(LocalDateTime.of(2012, 7, 2, 3, 30), result.datetimeValue());
}

@Test
public void avg_time() {
ExprValue result = aggregation(DSL.avg(DSL.time(DSL.ref("time_value", STRING))), tuples);
assertEquals(LocalTime.of(9, 30), result.timeValue());
}

@Test
public void avg_timestamp() {
var result = aggregation(DSL.avg(DSL.timestamp(DSL.ref("timestamp_value", STRING))), tuples);
assertEquals(TIMESTAMP, result.type());
assertEquals(LocalDateTime.of(2012, 7, 2, 3, 30), result.datetimeValue());
}

@Test
public void valueOf() {
ExpressionEvaluationException exception = assertThrows(ExpressionEvaluationException.class,
() -> DSL.avg(DSL.ref("double_value", DOUBLE)).valueOf(valueEnv()));
assertEquals("can't evaluate on aggregator: avg", exception.getMessage());
}

@Test
public void avg_on_unsupported_type() {
var aggregator = new AvgAggregator(List.of(DSL.ref("string", STRING)), STRING);
var exception = assertThrows(IllegalArgumentException.class,
() -> aggregator.create());
assertEquals("avg aggregation over STRING type is not supported", exception.getMessage());
}

@Test
public void test_to_string() {
Aggregator avgAggregator = DSL.avg(DSL.ref("integer_value", INTEGER));
Expand Down
Loading