From 09a401ab98b98e3c96c0af5d5bdb6ea39d26debf Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 1 Feb 2017 15:33:20 -0500 Subject: [PATCH] Eagerly initialize dates when we move to the next document --- .../index/fielddata/ScriptDocValues.java | 128 ++++++++++++------ .../fielddata/ScriptDocValuesDatesTests.java | 76 +++++++++++ 2 files changed, 159 insertions(+), 45 deletions(-) create mode 100644 core/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesDatesTests.java diff --git a/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java b/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java index d7f3f6180f8e0..ac6c6e4534ac2 100644 --- a/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java +++ b/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java @@ -21,12 +21,11 @@ import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.common.geo.GeoHashUtils; import org.elasticsearch.common.geo.GeoPoint; import org.elasticsearch.common.geo.GeoUtils; +import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.MutableDateTime; import org.joda.time.ReadableDateTime; @@ -36,8 +35,6 @@ import java.util.List; import java.util.function.UnaryOperator; -import static java.lang.Math.max; - /** * Script level doc values, the assumption is that any implementation will implement a getValue @@ -132,12 +129,7 @@ public int size() { public static final class Longs extends ScriptDocValues { private final SortedNumericDocValues values; - /** - * Values wrapped in {@link MutableDateTime}. Null by default an allocated on first usage so there is no cost if not used. We keep - * this array so we don't have allocate new {@link MutableDateTime}s on every usage. Instead we reuse them for every document. - */ - private MutableDateTime[] dates; - private ScriptDocValues dateList; + private Dates dates; public Longs(SortedNumericDocValues values) { this.values = values; @@ -146,6 +138,9 @@ public Longs(SortedNumericDocValues values) { @Override public void setNextDocId(int docId) { values.setDocument(docId); + if (dates != null) { + dates.refreshArray(); + } } public SortedNumericDocValues getInternalValues() { @@ -161,33 +156,19 @@ public long getValue() { } public ReadableDateTime getDate() { - if (values.count() == 0) { - return dateAt(0, 0L); + if (dates == null) { + dates = new Dates(values); + dates.refreshArray(); } - return dateAt(0, values.valueAt(0)); + return dates.getValue(); } public List getDates() { - if (dateList == null) { - dateList = new ScriptDocValues() { - @Override - public ReadableDateTime get(int index) { - return dateAt(index, values.valueAt(index)); - } - - @Override - public int size() { - return values.count(); - } - - @Override - public void setNextDocId(int docId) { - // Not needed - throw new UnsupportedOperationException(); - } - }; + if (dates == null) { + dates = new Dates(values); + dates.refreshArray(); } - return dateList; + return dates; } @Override @@ -199,26 +180,83 @@ public Long get(int index) { public int size() { return values.count(); } + } + + public static final class Dates extends ScriptDocValues { + private static final ReadableDateTime EPOCH = new DateTime(0, DateTimeZone.UTC); + + private final SortedNumericDocValues values; + /** + * Values wrapped in {@link MutableDateTime}. Null by default an allocated on first usage so we allocate a reasonably size. We keep + * this array so we don't have allocate new {@link MutableDateTime}s on every usage. Instead we reuse them for every document. + */ + private MutableDateTime[] dates; + + public Dates(SortedNumericDocValues values) { + this.values = values; + } + + /** + * Fetch the first field value or 0 millis after epoch if there are no values. + */ + public ReadableDateTime getValue() { + if (values.count() == 0) { + return EPOCH; + } + return get(0); + } + + @Override + public ReadableDateTime get(int index) { + if (index >= values.count()) { + throw new IndexOutOfBoundsException( + "attempted to fetch the [" + index + "] date when there are only [" + values.count() + "] dates."); + } + return dates[index]; + } + + @Override + public int size() { + return values.count(); + } + + @Override + public void setNextDocId(int docId) { + values.setDocument(docId); + refreshArray(); + } /** - * Fetch the value at an index wrapped in a {@link ReadableDateTime}. We take care not to allocate a new {@link MutableDateTime} - * if possible. + * Refresh the backing array. Package private so it can be called when {@link Longs} loads dates. */ - private ReadableDateTime dateAt(int index, long value) { + void refreshArray() { + if (values.count() == 0) { + return; + } if (dates == null) { - dates = new MutableDateTime[ArrayUtil.oversize(max(1, values.count()), RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - dates[index] = new MutableDateTime(value, DateTimeZone.UTC); - } else { - if (index >= dates.length) { - dates = ArrayUtil.grow(dates, max(1, values.count())); + // Uninitialized + dates = new MutableDateTime[values.count()]; + for (int i = 0; i < dates.length; i++) { + dates[i] = new MutableDateTime(values.valueAt(i), DateTimeZone.UTC); + } + return; + } + if (values.count() > dates.length) { + // Values too small + MutableDateTime[] backup = dates; + dates = new MutableDateTime[values.count()]; + System.arraycopy(backup, 0, dates, 0, backup.length); + for (int i = 0; i < backup.length; i++) { + dates[i].setMillis(values.valueAt(i)); } - if (dates[index] == null) { - dates[index] = new MutableDateTime(value, DateTimeZone.UTC); - } else { - dates[index].setMillis(value); + for (int i = backup.length; i < dates.length; i++) { + dates[i] = new MutableDateTime(values.valueAt(i), DateTimeZone.UTC); } + return; + } + for (int i = 0; i < values.count(); i++) { + dates[i].setMillis(values.valueAt(i)); } - return dates[index]; } } diff --git a/core/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesDatesTests.java b/core/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesDatesTests.java new file mode 100644 index 0000000000000..f8579efae737a --- /dev/null +++ b/core/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesDatesTests.java @@ -0,0 +1,76 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata; + +import org.apache.lucene.index.SortedNumericDocValues; +import org.elasticsearch.index.fielddata.ScriptDocValues.Dates; +import org.elasticsearch.test.ESTestCase; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.ReadableDateTime; + +public class ScriptDocValuesDatesTests extends ESTestCase { + public void test() { + long[][] values = new long[between(3, 10)][]; + ReadableDateTime[][] expectedDates = new ReadableDateTime[values.length][]; + for (int d = 0; d < values.length; d++) { + values[d] = new long[randomBoolean() ? randomBoolean() ? 0 : 1 : between(2, 100)]; + expectedDates[d] = new ReadableDateTime[values[d].length]; + for (int i = 0; i < values[d].length; i++) { + expectedDates[d][i] = new DateTime(randomNonNegativeLong(), DateTimeZone.UTC); + values[d][i] = expectedDates[d][i].getMillis(); + } + } + Dates dates = wrap(values); + + for (int round = 0; round < 10; round++) { + int d = between(0, values.length - 1); + dates.setNextDocId(d); + assertEquals(expectedDates[d].length > 0 ? expectedDates[d][0] : new DateTime(0, DateTimeZone.UTC), dates.getValue()); + + assertEquals(values[d].length, dates.size()); + for (int i = 0; i < values[d].length; i++) { + assertEquals(expectedDates[d][i], dates.get(i)); + } + + Exception e = expectThrows(UnsupportedOperationException.class, () -> dates.add(new DateTime())); + assertEquals("doc values are unmodifiable", e.getMessage()); + } + } + + private Dates wrap(long[][] values) { + return new Dates(new SortedNumericDocValues() { + long[] current; + + @Override + public void setDocument(int doc) { + current = values[doc]; + } + @Override + public int count() { + return current.length; + } + @Override + public long valueAt(int index) { + return current[index]; + } + }); + } +}