Skip to content

Commit

Permalink
Eagerly initialize dates when we move to the next document
Browse files Browse the repository at this point in the history
  • Loading branch information
nik9000 committed Feb 2, 2017
1 parent e3f7e1d commit 09a401a
Show file tree
Hide file tree
Showing 2 changed files with 159 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,11 @@


import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.common.geo.GeoHashUtils;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.geo.GeoUtils;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.MutableDateTime;
import org.joda.time.ReadableDateTime;
Expand All @@ -36,8 +35,6 @@
import java.util.List;
import java.util.function.UnaryOperator;

import static java.lang.Math.max;


/**
* Script level doc values, the assumption is that any implementation will implement a <code>getValue</code>
Expand Down Expand Up @@ -132,12 +129,7 @@ public int size() {
public static final class Longs extends ScriptDocValues<Long> {

private final SortedNumericDocValues values;
/**
* Values wrapped in {@link MutableDateTime}. Null by default an allocated on first usage so there is no cost if not used. We keep
* this array so we don't have allocate new {@link MutableDateTime}s on every usage. Instead we reuse them for every document.
*/
private MutableDateTime[] dates;
private ScriptDocValues<ReadableDateTime> dateList;
private Dates dates;

public Longs(SortedNumericDocValues values) {
this.values = values;
Expand All @@ -146,6 +138,9 @@ public Longs(SortedNumericDocValues values) {
@Override
public void setNextDocId(int docId) {
values.setDocument(docId);
if (dates != null) {
dates.refreshArray();
}
}

public SortedNumericDocValues getInternalValues() {
Expand All @@ -161,33 +156,19 @@ public long getValue() {
}

public ReadableDateTime getDate() {
if (values.count() == 0) {
return dateAt(0, 0L);
if (dates == null) {
dates = new Dates(values);
dates.refreshArray();
}
return dateAt(0, values.valueAt(0));
return dates.getValue();
}

public List<ReadableDateTime> getDates() {
if (dateList == null) {
dateList = new ScriptDocValues<ReadableDateTime>() {
@Override
public ReadableDateTime get(int index) {
return dateAt(index, values.valueAt(index));
}

@Override
public int size() {
return values.count();
}

@Override
public void setNextDocId(int docId) {
// Not needed
throw new UnsupportedOperationException();
}
};
if (dates == null) {
dates = new Dates(values);
dates.refreshArray();
}
return dateList;
return dates;
}

@Override
Expand All @@ -199,26 +180,83 @@ public Long get(int index) {
public int size() {
return values.count();
}
}

public static final class Dates extends ScriptDocValues<ReadableDateTime> {
private static final ReadableDateTime EPOCH = new DateTime(0, DateTimeZone.UTC);

private final SortedNumericDocValues values;
/**
* Values wrapped in {@link MutableDateTime}. Null by default an allocated on first usage so we allocate a reasonably size. We keep
* this array so we don't have allocate new {@link MutableDateTime}s on every usage. Instead we reuse them for every document.
*/
private MutableDateTime[] dates;

public Dates(SortedNumericDocValues values) {
this.values = values;
}

/**
* Fetch the first field value or 0 millis after epoch if there are no values.
*/
public ReadableDateTime getValue() {
if (values.count() == 0) {
return EPOCH;
}
return get(0);
}

@Override
public ReadableDateTime get(int index) {
if (index >= values.count()) {
throw new IndexOutOfBoundsException(
"attempted to fetch the [" + index + "] date when there are only [" + values.count() + "] dates.");
}
return dates[index];
}

@Override
public int size() {
return values.count();
}

@Override
public void setNextDocId(int docId) {
values.setDocument(docId);
refreshArray();
}

/**
* Fetch the value at an index wrapped in a {@link ReadableDateTime}. We take care not to allocate a new {@link MutableDateTime}
* if possible.
* Refresh the backing array. Package private so it can be called when {@link Longs} loads dates.
*/
private ReadableDateTime dateAt(int index, long value) {
void refreshArray() {
if (values.count() == 0) {
return;
}
if (dates == null) {
dates = new MutableDateTime[ArrayUtil.oversize(max(1, values.count()), RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
dates[index] = new MutableDateTime(value, DateTimeZone.UTC);
} else {
if (index >= dates.length) {
dates = ArrayUtil.grow(dates, max(1, values.count()));
// Uninitialized
dates = new MutableDateTime[values.count()];
for (int i = 0; i < dates.length; i++) {
dates[i] = new MutableDateTime(values.valueAt(i), DateTimeZone.UTC);
}
return;
}
if (values.count() > dates.length) {
// Values too small
MutableDateTime[] backup = dates;
dates = new MutableDateTime[values.count()];
System.arraycopy(backup, 0, dates, 0, backup.length);
for (int i = 0; i < backup.length; i++) {
dates[i].setMillis(values.valueAt(i));
}
if (dates[index] == null) {
dates[index] = new MutableDateTime(value, DateTimeZone.UTC);
} else {
dates[index].setMillis(value);
for (int i = backup.length; i < dates.length; i++) {
dates[i] = new MutableDateTime(values.valueAt(i), DateTimeZone.UTC);
}
return;
}
for (int i = 0; i < values.count(); i++) {
dates[i].setMillis(values.valueAt(i));
}
return dates[index];
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.index.fielddata;

import org.apache.lucene.index.SortedNumericDocValues;
import org.elasticsearch.index.fielddata.ScriptDocValues.Dates;
import org.elasticsearch.test.ESTestCase;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.ReadableDateTime;

public class ScriptDocValuesDatesTests extends ESTestCase {
public void test() {
long[][] values = new long[between(3, 10)][];
ReadableDateTime[][] expectedDates = new ReadableDateTime[values.length][];
for (int d = 0; d < values.length; d++) {
values[d] = new long[randomBoolean() ? randomBoolean() ? 0 : 1 : between(2, 100)];
expectedDates[d] = new ReadableDateTime[values[d].length];
for (int i = 0; i < values[d].length; i++) {
expectedDates[d][i] = new DateTime(randomNonNegativeLong(), DateTimeZone.UTC);
values[d][i] = expectedDates[d][i].getMillis();
}
}
Dates dates = wrap(values);

for (int round = 0; round < 10; round++) {
int d = between(0, values.length - 1);
dates.setNextDocId(d);
assertEquals(expectedDates[d].length > 0 ? expectedDates[d][0] : new DateTime(0, DateTimeZone.UTC), dates.getValue());

assertEquals(values[d].length, dates.size());
for (int i = 0; i < values[d].length; i++) {
assertEquals(expectedDates[d][i], dates.get(i));
}

Exception e = expectThrows(UnsupportedOperationException.class, () -> dates.add(new DateTime()));
assertEquals("doc values are unmodifiable", e.getMessage());
}
}

private Dates wrap(long[][] values) {
return new Dates(new SortedNumericDocValues() {
long[] current;

@Override
public void setDocument(int doc) {
current = values[doc];
}
@Override
public int count() {
return current.length;
}
@Override
public long valueAt(int index) {
return current[index];
}
});
}
}

0 comments on commit 09a401a

Please sign in to comment.