Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Star tree] Adding date field rounding support in star tree #15249

Merged
merged 16 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions server/src/main/java/org/opensearch/common/Rounding.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public enum DateTimeUnit {
WEEK_OF_WEEKYEAR((byte) 1, "week", IsoFields.WEEK_OF_WEEK_BASED_YEAR, true, TimeUnit.DAYS.toMillis(7)) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(7);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
bharath-techie marked this conversation as resolved.
Show resolved Hide resolved
return DateUtils.roundWeekOfWeekYear(utcMillis);
}

Expand All @@ -107,7 +107,7 @@ long extraLocalOffsetLookup() {
YEAR_OF_CENTURY((byte) 2, "year", ChronoField.YEAR_OF_ERA, false, 12) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(366);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundYear(utcMillis);
}

Expand All @@ -118,7 +118,7 @@ long extraLocalOffsetLookup() {
QUARTER_OF_YEAR((byte) 3, "quarter", IsoFields.QUARTER_OF_YEAR, false, 3) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(92);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundQuarterOfYear(utcMillis);
}

Expand All @@ -129,7 +129,7 @@ long extraLocalOffsetLookup() {
MONTH_OF_YEAR((byte) 4, "month", ChronoField.MONTH_OF_YEAR, false, 1) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(31);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundMonthOfYear(utcMillis);
}

Expand All @@ -138,7 +138,7 @@ long extraLocalOffsetLookup() {
}
},
DAY_OF_MONTH((byte) 5, "day", ChronoField.DAY_OF_MONTH, true, ChronoField.DAY_OF_MONTH.getBaseUnit().getDuration().toMillis()) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, this.ratio);
}

Expand All @@ -147,7 +147,7 @@ long extraLocalOffsetLookup() {
}
},
HOUR_OF_DAY((byte) 6, "hour", ChronoField.HOUR_OF_DAY, true, ChronoField.HOUR_OF_DAY.getBaseUnit().getDuration().toMillis()) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, ratio);
}

Expand All @@ -162,7 +162,7 @@ long extraLocalOffsetLookup() {
true,
ChronoField.MINUTE_OF_HOUR.getBaseUnit().getDuration().toMillis()
) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, ratio);
}

Expand All @@ -177,7 +177,7 @@ long extraLocalOffsetLookup() {
true,
ChronoField.SECOND_OF_MINUTE.getBaseUnit().getDuration().toMillis()
) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, ratio);
}

Expand Down Expand Up @@ -210,7 +210,7 @@ public long extraLocalOffsetLookup() {
* @param utcMillis the milliseconds since the epoch
* @return the rounded down milliseconds since the epoch
*/
abstract long roundFloor(long utcMillis);
public abstract long roundFloor(long utcMillis);

/**
* When looking up {@link LocalTimeOffset} go this many milliseconds
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,22 @@

import org.opensearch.common.Rounding;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.common.time.DateUtils;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.compositeindex.datacube.startree.utils.date.DataCubeDateTimeUnit;
import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitRounding;
import org.opensearch.index.mapper.CompositeDataCubeFieldType;
import org.opensearch.index.mapper.DateFieldMapper;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.Consumer;
import java.util.stream.Collectors;

/**
* Date dimension class
Expand All @@ -24,27 +34,79 @@
*/
@ExperimentalApi
public class DateDimension implements Dimension {
private final List<Rounding.DateTimeUnit> calendarIntervals;
private final List<DateTimeUnitRounding> calendarIntervals;
public static final String CALENDAR_INTERVALS = "calendar_intervals";
public static final String DATE = "date";
private final String field;
private final List<DateTimeUnitRounding> sortedCalendarIntervals;
private final DateFieldMapper.Resolution resolution;

public DateDimension(String field, List<Rounding.DateTimeUnit> calendarIntervals) {
public DateDimension(String field, List<DateTimeUnitRounding> calendarIntervals, DateFieldMapper.Resolution resolution) {
this.field = field;
this.calendarIntervals = calendarIntervals;
// Sort from the lowest unit to the highest unit
this.sortedCalendarIntervals = getSortedDateTimeUnits(calendarIntervals);
if (resolution == null) {
this.resolution = DateFieldMapper.Resolution.MILLISECONDS;
bharath-techie marked this conversation as resolved.
Show resolved Hide resolved
} else {
this.resolution = resolution;
}
}

public List<Rounding.DateTimeUnit> getIntervals() {
public List<DateTimeUnitRounding> getIntervals() {
return calendarIntervals;
}

public List<DateTimeUnitRounding> getSortedCalendarIntervals() {
return sortedCalendarIntervals;
}

/**
* Sets the dimension values in sorted order in the provided array starting from the given index.
*
* @param val The value to be set
* @param dimSetter Consumer which sets the dimensions
*/
@Override
public void setDimensionValues(final Long val, final Consumer<Long> dimSetter) {
for (DateTimeUnitRounding dateTimeUnit : sortedCalendarIntervals) {
if (val == null) {
dimSetter.accept(null);
} else {
Long roundedValue = dateTimeUnit.roundFloor(storedDurationSinceEpoch(val));
dimSetter.accept(roundedValue);
}
}
}

/**
* Converts nanoseconds to milliseconds based on the resolution of the field
*/
private long storedDurationSinceEpoch(long nanoSecondsSinceEpoch) {
if (resolution.equals(DateFieldMapper.Resolution.NANOSECONDS)) return DateUtils.toMilliSeconds(nanoSecondsSinceEpoch);
return nanoSecondsSinceEpoch;
}

/**
* Returns the list of fields that represent the dimension
*/
@Override
public List<String> getSubDimensionNames() {
List<String> fields = new ArrayList<>(calendarIntervals.size());
for (DateTimeUnitRounding interval : sortedCalendarIntervals) {
// TODO : revisit this post file format changes
bharath-techie marked this conversation as resolved.
Show resolved Hide resolved
fields.add(field + "_" + interval.shortName());
}
return fields;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.startObject("date_dimension");
builder.field(CompositeDataCubeFieldType.NAME, this.getField());
builder.field(CompositeDataCubeFieldType.TYPE, DATE);
builder.startArray(CALENDAR_INTERVALS);
for (Rounding.DateTimeUnit interval : calendarIntervals) {
for (DateTimeUnitRounding interval : calendarIntervals) {
builder.value(interval.shortName());
}
builder.endArray();
Expand All @@ -69,4 +131,44 @@ public int hashCode() {
public String getField() {
return field;
}

@Override
public int getNumSubDimensions() {
return calendarIntervals.size();
}

/**
* DateTimeUnit Comparator which tracks dateTimeUnits in sorted order from second unit to year unit
*/
public static class DateTimeUnitComparator implements Comparator<DateTimeUnitRounding> {
public static final Map<String, Integer> ORDERED_DATE_TIME_UNIT = new HashMap<>();

static {
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.SECOND_OF_MINUTE.shortName(), 1);
bharath-techie marked this conversation as resolved.
Show resolved Hide resolved
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.MINUTES_OF_HOUR.shortName(), 2);
ORDERED_DATE_TIME_UNIT.put(DataCubeDateTimeUnit.QUARTER_HOUR_OF_DAY.shortName(), 3);
ORDERED_DATE_TIME_UNIT.put(DataCubeDateTimeUnit.HALF_HOUR_OF_DAY.shortName(), 4);
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.HOUR_OF_DAY.shortName(), 5);
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.DAY_OF_MONTH.shortName(), 6);
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.WEEK_OF_WEEKYEAR.shortName(), 7);
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.MONTH_OF_YEAR.shortName(), 8);
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.QUARTER_OF_YEAR.shortName(), 9);
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.YEAR_OF_CENTURY.shortName(), 10);
}

@Override
public int compare(DateTimeUnitRounding unit1, DateTimeUnitRounding unit2) {
return Integer.compare(
ORDERED_DATE_TIME_UNIT.getOrDefault(unit1.shortName(), Integer.MAX_VALUE),
ORDERED_DATE_TIME_UNIT.getOrDefault(unit2.shortName(), Integer.MAX_VALUE)
);
}
}

/**
* Returns a sorted list of dateTimeUnits based on the DateTimeUnitComparator
*/
public static List<DateTimeUnitRounding> getSortedDateTimeUnits(List<DateTimeUnitRounding> dateTimeUnits) {
return dateTimeUnits.stream().sorted(new DateTimeUnitComparator()).collect(Collectors.toList());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,35 @@
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.core.xcontent.ToXContent;

import java.util.List;
import java.util.function.Consumer;

/**
* Base interface for data-cube dimensions
*
* @opensearch.experimental
*/
@ExperimentalApi
public interface Dimension extends ToXContent {

String getField();

/**
* Returns the number of dimension values that gets added to star tree document
* as part of this dimension
*/
int getNumSubDimensions();

/**
* Sets the dimension values with the consumer
*
* @param value The value to be set
* @param dimSetter Consumer which sets the dimensions
*/
void setDimensionValues(final Long value, final Consumer<Long> dimSetter);

/**
* Returns the list of dimension fields that represent the dimension
*/
List<String> getSubDimensionNames();
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,19 @@

package org.opensearch.index.compositeindex.datacube;

import org.opensearch.common.Rounding;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.common.xcontent.support.XContentMapValues;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings;
import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitRounding;
import org.opensearch.index.mapper.DateFieldMapper;
import org.opensearch.index.mapper.Mapper;

import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

import static org.opensearch.index.compositeindex.datacube.DateDimension.CALENDAR_INTERVALS;
Expand Down Expand Up @@ -70,13 +73,13 @@ private static DateDimension parseAndCreateDateDimension(
Map<String, Object> dimensionMap,
Mapper.TypeParser.ParserContext c
) {
List<Rounding.DateTimeUnit> calendarIntervals = new ArrayList<>();
Set<DateTimeUnitRounding> calendarIntervals;
List<String> intervalStrings = XContentMapValues.extractRawValues(CALENDAR_INTERVALS, dimensionMap)
.stream()
.map(Object::toString)
.collect(Collectors.toList());
if (intervalStrings == null || intervalStrings.isEmpty()) {
calendarIntervals = StarTreeIndexSettings.DEFAULT_DATE_INTERVALS.get(c.getSettings());
calendarIntervals = new LinkedHashSet<>(StarTreeIndexSettings.DEFAULT_DATE_INTERVALS.get(c.getSettings()));
} else {
if (intervalStrings.size() > StarTreeIndexSettings.STAR_TREE_MAX_DATE_INTERVALS_SETTING.get(c.getSettings())) {
throw new IllegalArgumentException(
Expand All @@ -88,12 +91,17 @@ private static DateDimension parseAndCreateDateDimension(
)
);
}
calendarIntervals = new LinkedHashSet<>();
for (String interval : intervalStrings) {
calendarIntervals.add(StarTreeIndexSettings.getTimeUnit(interval));
}
calendarIntervals = new ArrayList<>(calendarIntervals);
}
dimensionMap.remove(CALENDAR_INTERVALS);
return new DateDimension(name, calendarIntervals);
DateFieldMapper.Resolution resolution = null;
if (c != null && c.mapperService() != null && c.mapperService().fieldType(name) != null) {
resolution = ((DateFieldMapper.DateFieldType) c.mapperService().fieldType(name)).resolution();
}

return new DateDimension(name, new ArrayList<>(calendarIntervals), resolution);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
import org.opensearch.index.mapper.CompositeDataCubeFieldType;

import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.function.Consumer;

/**
* Composite index numeric dimension class
Expand All @@ -33,6 +35,22 @@ public String getField() {
return field;
}

@Override
public int getNumSubDimensions() {
return 1;
bharath-techie marked this conversation as resolved.
Show resolved Hide resolved
}

@Override
public void setDimensionValues(final Long val, final Consumer<Long> dimSetter) {
dimSetter.accept(val);
}

@Override
public List<String> getSubDimensionNames() {
// TODO : revisit this post file format changes
bharath-techie marked this conversation as resolved.
Show resolved Hide resolved
return List.of(field);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
import org.opensearch.index.mapper.CompositeDataCubeFieldType;

import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.function.Consumer;

/**
* Represents a dimension for reconstructing StarTreeField from file formats during searches and merges.
Expand All @@ -31,6 +33,21 @@
return field;
}

@Override
public int getNumSubDimensions() {
return 1;
}

@Override
public void setDimensionValues(final Long val, final Consumer<Long> dimSetter) {
dimSetter.accept(val);
}

Check warning on line 44 in server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java#L43-L44

Added lines #L43 - L44 were not covered by tests

@Override
public List<String> getSubDimensionNames() {
return List.of(field);
bharath-techie marked this conversation as resolved.
Show resolved Hide resolved
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
Expand Down
Loading
Loading