From 39baeb83e8886aa1a2b79cf5dc7e10ad1726de1a Mon Sep 17 00:00:00 2001 From: Ketan Verma <9292653+ketanv3@users.noreply.github.com> Date: Thu, 16 Nov 2023 05:57:02 +0530 Subject: [PATCH] [Backport 2.x] Refactor common parts from the Rounding class into a separate 'round' package (#11023) (#11223) (cherry picked from commit 08db961e1914d93ce9d81e6da84e6e232b8d37e2) Signed-off-by: Ketan Verma --- CHANGELOG.md | 1 + .../RoundableBenchmark.java} | 16 +- .../round/BidirectionalLinearSearcher.java | 59 +++++++ .../common/round/BinarySearcher.java | 43 +++++ .../opensearch/common/round/Roundable.java | 28 ++++ .../common/round/RoundableFactory.java | 39 +++++ .../opensearch/common/round/package-info.java | 12 ++ .../common/round/RoundableTests.java | 57 +++++++ .../java/org/opensearch/common/Rounding.java | 148 ++++-------------- .../org/opensearch/common/RoundingTests.java | 22 --- 10 files changed, 279 insertions(+), 146 deletions(-) rename benchmarks/src/main/java/org/opensearch/common/{ArrayRoundingBenchmark.java => round/RoundableBenchmark.java} (89%) create mode 100644 libs/common/src/main/java/org/opensearch/common/round/BidirectionalLinearSearcher.java create mode 100644 libs/common/src/main/java/org/opensearch/common/round/BinarySearcher.java create mode 100644 libs/common/src/main/java/org/opensearch/common/round/Roundable.java create mode 100644 libs/common/src/main/java/org/opensearch/common/round/RoundableFactory.java create mode 100644 libs/common/src/main/java/org/opensearch/common/round/package-info.java create mode 100644 libs/common/src/test/java/org/opensearch/common/round/RoundableTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e046881a8139..188a6a035e00d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add instrumentation for indexing in transport bulk action and transport shard bulk action. ([#10273](https://github.com/opensearch-project/OpenSearch/pull/10273)) - Performance improvement for MultiTerm Queries on Keyword fields ([#7057](https://github.com/opensearch-project/OpenSearch/issues/7057)) - [BUG] Disable sort optimization for HALF_FLOAT ([#10999](https://github.com/opensearch-project/OpenSearch/pull/10999)) +- Refactor common parts from the Rounding class into a separate 'round' package ([#11023](https://github.com/opensearch-project/OpenSearch/issues/11023)) - Disable concurrent aggs for Diversified Sampler and Sampler aggs ([#11087](https://github.com/opensearch-project/OpenSearch/issues/11087)) - Made leader/follower check timeout setting dynamic ([#10528](https://github.com/opensearch-project/OpenSearch/pull/10528)) - Use iterative approach to evaluate Regex.simpleMatch ([#11060](https://github.com/opensearch-project/OpenSearch/pull/11060)) diff --git a/benchmarks/src/main/java/org/opensearch/common/ArrayRoundingBenchmark.java b/benchmarks/src/main/java/org/opensearch/common/round/RoundableBenchmark.java similarity index 89% rename from benchmarks/src/main/java/org/opensearch/common/ArrayRoundingBenchmark.java rename to benchmarks/src/main/java/org/opensearch/common/round/RoundableBenchmark.java index 64c0a9e1d7aa6..4e07af452968b 100644 --- a/benchmarks/src/main/java/org/opensearch/common/ArrayRoundingBenchmark.java +++ b/benchmarks/src/main/java/org/opensearch/common/round/RoundableBenchmark.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.common; +package org.opensearch.common.round; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -27,13 +27,13 @@ @Warmup(iterations = 3, time = 1) @Measurement(iterations = 1, time = 1) @BenchmarkMode(Mode.Throughput) -public class ArrayRoundingBenchmark { +public class RoundableBenchmark { @Benchmark - public void round(Blackhole bh, Options opts) { - Rounding.Prepared rounding = opts.supplier.get(); + public void floor(Blackhole bh, Options opts) { + Roundable roundable = opts.supplier.get(); for (long key : opts.queries) { - bh.consume(rounding.round(key)); + bh.consume(roundable.floor(key)); } } @@ -90,7 +90,7 @@ public static class Options { public String distribution; public long[] queries; - public Supplier supplier; + public Supplier supplier; @Setup public void setup() { @@ -130,10 +130,10 @@ public void setup() { switch (type) { case "binary": - supplier = () -> new Rounding.BinarySearchArrayRounding(values, size, null); + supplier = () -> new BinarySearcher(values, size); break; case "linear": - supplier = () -> new Rounding.BidirectionalLinearSearchArrayRounding(values, size, null); + supplier = () -> new BidirectionalLinearSearcher(values, size); break; default: throw new IllegalArgumentException("invalid type: " + type); diff --git a/libs/common/src/main/java/org/opensearch/common/round/BidirectionalLinearSearcher.java b/libs/common/src/main/java/org/opensearch/common/round/BidirectionalLinearSearcher.java new file mode 100644 index 0000000000000..5c3dcf2bd4708 --- /dev/null +++ b/libs/common/src/main/java/org/opensearch/common/round/BidirectionalLinearSearcher.java @@ -0,0 +1,59 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.round; + +import org.opensearch.common.annotation.InternalApi; + +/** + * It uses linear search on a sorted array of pre-computed round-down points. + * For small inputs (≤ 64 elements), this can be much faster than binary search as it avoids the penalty of + * branch mispredictions and pipeline stalls, and accesses memory sequentially. + * + *

+ * It uses "meet in the middle" linear search to avoid the worst case scenario when the desired element is present + * at either side of the array. This is helpful for time-series data where velocity increases over time, so more + * documents are likely to find a greater timestamp which is likely to be present on the right end of the array. + * + * @opensearch.internal + */ +@InternalApi +class BidirectionalLinearSearcher implements Roundable { + private final long[] ascending; + private final long[] descending; + + BidirectionalLinearSearcher(long[] values, int size) { + if (size <= 0) { + throw new IllegalArgumentException("at least one value must be present"); + } + + int len = (size + 1) >>> 1; // rounded-up to handle odd number of values + ascending = new long[len]; + descending = new long[len]; + + for (int i = 0; i < len; i++) { + ascending[i] = values[i]; + descending[i] = values[size - i - 1]; + } + } + + @Override + public long floor(long key) { + int i = 0; + for (; i < ascending.length; i++) { + if (descending[i] <= key) { + return descending[i]; + } + if (ascending[i] > key) { + assert i > 0 : "key must be greater than or equal to " + ascending[0]; + return ascending[i - 1]; + } + } + return ascending[i - 1]; + } +} diff --git a/libs/common/src/main/java/org/opensearch/common/round/BinarySearcher.java b/libs/common/src/main/java/org/opensearch/common/round/BinarySearcher.java new file mode 100644 index 0000000000000..b9d76945115ed --- /dev/null +++ b/libs/common/src/main/java/org/opensearch/common/round/BinarySearcher.java @@ -0,0 +1,43 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.round; + +import org.opensearch.common.annotation.InternalApi; + +import java.util.Arrays; + +/** + * It uses binary search on a sorted array of pre-computed round-down points. + * + * @opensearch.internal + */ +@InternalApi +class BinarySearcher implements Roundable { + private final long[] values; + private final int size; + + BinarySearcher(long[] values, int size) { + if (size <= 0) { + throw new IllegalArgumentException("at least one value must be present"); + } + + this.values = values; + this.size = size; + } + + @Override + public long floor(long key) { + int idx = Arrays.binarySearch(values, 0, size, key); + assert idx != -1 : "key must be greater than or equal to " + values[0]; + if (idx < 0) { + idx = -2 - idx; + } + return values[idx]; + } +} diff --git a/libs/common/src/main/java/org/opensearch/common/round/Roundable.java b/libs/common/src/main/java/org/opensearch/common/round/Roundable.java new file mode 100644 index 0000000000000..ae6f9b787c1e9 --- /dev/null +++ b/libs/common/src/main/java/org/opensearch/common/round/Roundable.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.round; + +import org.opensearch.common.annotation.InternalApi; + +/** + * Interface to round-off values. + * + * @opensearch.internal + */ +@InternalApi +@FunctionalInterface +public interface Roundable { + /** + * Returns the greatest lower bound of the given key. + * In other words, it returns the largest value such that {@code value <= key}. + * @param key to floor + * @return the floored value + */ + long floor(long key); +} diff --git a/libs/common/src/main/java/org/opensearch/common/round/RoundableFactory.java b/libs/common/src/main/java/org/opensearch/common/round/RoundableFactory.java new file mode 100644 index 0000000000000..b7422694c3013 --- /dev/null +++ b/libs/common/src/main/java/org/opensearch/common/round/RoundableFactory.java @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.round; + +import org.opensearch.common.annotation.InternalApi; + +/** + * Factory class to create and return the fastest implementation of {@link Roundable}. + * + * @opensearch.internal + */ +@InternalApi +public final class RoundableFactory { + /** + * The maximum limit up to which linear search is used, otherwise binary search is used. + * This is because linear search is much faster on small arrays. + * Benchmark results: PR #9727 + */ + private static final int LINEAR_SEARCH_MAX_SIZE = 64; + + private RoundableFactory() {} + + /** + * Creates and returns the fastest implementation of {@link Roundable}. + */ + public static Roundable create(long[] values, int size) { + if (size <= LINEAR_SEARCH_MAX_SIZE) { + return new BidirectionalLinearSearcher(values, size); + } else { + return new BinarySearcher(values, size); + } + } +} diff --git a/libs/common/src/main/java/org/opensearch/common/round/package-info.java b/libs/common/src/main/java/org/opensearch/common/round/package-info.java new file mode 100644 index 0000000000000..e79c4017de31b --- /dev/null +++ b/libs/common/src/main/java/org/opensearch/common/round/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Contains classes to round-off values. + */ +package org.opensearch.common.round; diff --git a/libs/common/src/test/java/org/opensearch/common/round/RoundableTests.java b/libs/common/src/test/java/org/opensearch/common/round/RoundableTests.java new file mode 100644 index 0000000000000..ae9f629c59024 --- /dev/null +++ b/libs/common/src/test/java/org/opensearch/common/round/RoundableTests.java @@ -0,0 +1,57 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.round; + +import org.opensearch.test.OpenSearchTestCase; + +public class RoundableTests extends OpenSearchTestCase { + + public void testFloor() { + int size = randomIntBetween(1, 256); + long[] values = new long[size]; + for (int i = 1; i < values.length; i++) { + values[i] = values[i - 1] + (randomNonNegativeLong() % 200) + 1; + } + + Roundable[] impls = { new BinarySearcher(values, size), new BidirectionalLinearSearcher(values, size) }; + + for (int i = 0; i < 100000; i++) { + // Index of the expected round-down point. + int idx = randomIntBetween(0, size - 1); + + // Value of the expected round-down point. + long expected = values[idx]; + + // Delta between the expected and the next round-down point. + long delta = (idx < size - 1) ? (values[idx + 1] - values[idx]) : 200; + + // Adding a random delta between 0 (inclusive) and delta (exclusive) to the expected + // round-down point, which will still floor to the same value. + long key = expected + (randomNonNegativeLong() % delta); + + for (Roundable roundable : impls) { + assertEquals(expected, roundable.floor(key)); + } + } + } + + public void testFailureCases() { + Throwable throwable; + + throwable = assertThrows(IllegalArgumentException.class, () -> new BinarySearcher(new long[0], 0)); + assertEquals("at least one value must be present", throwable.getMessage()); + throwable = assertThrows(IllegalArgumentException.class, () -> new BidirectionalLinearSearcher(new long[0], 0)); + assertEquals("at least one value must be present", throwable.getMessage()); + + throwable = assertThrows(AssertionError.class, () -> new BinarySearcher(new long[] { 100 }, 1).floor(50)); + assertEquals("key must be greater than or equal to 100", throwable.getMessage()); + throwable = assertThrows(AssertionError.class, () -> new BidirectionalLinearSearcher(new long[] { 100 }, 1).floor(50)); + assertEquals("key must be greater than or equal to 100", throwable.getMessage()); + } +} diff --git a/server/src/main/java/org/opensearch/common/Rounding.java b/server/src/main/java/org/opensearch/common/Rounding.java index 438822cb83725..41ec5ff8ebb9b 100644 --- a/server/src/main/java/org/opensearch/common/Rounding.java +++ b/server/src/main/java/org/opensearch/common/Rounding.java @@ -38,7 +38,8 @@ import org.opensearch.OpenSearchException; import org.opensearch.common.LocalTimeOffset.Gap; import org.opensearch.common.LocalTimeOffset.Overlap; -import org.opensearch.common.annotation.InternalApi; +import org.opensearch.common.round.Roundable; +import org.opensearch.common.round.RoundableFactory; import org.opensearch.common.time.DateUtils; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.common.io.stream.StreamInput; @@ -60,7 +61,6 @@ import java.time.temporal.TemporalQueries; import java.time.zone.ZoneOffsetTransition; import java.time.zone.ZoneRules; -import java.util.Arrays; import java.util.List; import java.util.Locale; import java.util.Objects; @@ -422,13 +422,6 @@ private abstract class PreparedRounding implements Prepared { */ private static final int DEFAULT_ARRAY_ROUNDING_MAX_THRESHOLD = 128; - /** - * The maximum limit up to which linear search is used, otherwise binary search is used. - * This is because linear search is much faster on small arrays. - * Benchmark results: PR #9727 - */ - private static final int LINEAR_SEARCH_ARRAY_ROUNDING_MAX_THRESHOLD = 64; - /** * Attempt to build a {@link Prepared} implementation that relies on pre-calcuated * "round down" points. If there would be more than {@code max} points then return @@ -452,9 +445,36 @@ protected Prepared maybeUseArray(long minUtcMillis, long maxUtcMillis, int max) values = ArrayUtil.grow(values, i + 1); values[i++] = rounded; } - return i <= LINEAR_SEARCH_ARRAY_ROUNDING_MAX_THRESHOLD - ? new BidirectionalLinearSearchArrayRounding(values, i, this) - : new BinarySearchArrayRounding(values, i, this); + return new ArrayRounding(RoundableFactory.create(values, i), this); + } + } + + /** + * ArrayRounding is an implementation of {@link Prepared} which uses + * pre-calculated round-down points to speed up lookups. + */ + private static class ArrayRounding implements Prepared { + private final Roundable roundable; + private final Prepared delegate; + + public ArrayRounding(Roundable roundable, Prepared delegate) { + this.roundable = roundable; + this.delegate = delegate; + } + + @Override + public long round(long utcMillis) { + return roundable.floor(utcMillis); + } + + @Override + public long nextRoundingValue(long utcMillis) { + return delegate.nextRoundingValue(utcMillis); + } + + @Override + public double roundingSize(long utcMillis, DateTimeUnit timeUnit) { + return delegate.roundingSize(utcMillis, timeUnit); } } @@ -1358,108 +1378,4 @@ public static Rounding read(StreamInput in) throws IOException { throw new OpenSearchException("unknown rounding id [" + id + "]"); } } - - /** - * Implementation of {@link Prepared} using pre-calculated "round down" points. - * - *

- * It uses binary search to find the greatest round-down point less than or equal to the given timestamp. - * - * @opensearch.internal - */ - @InternalApi - static class BinarySearchArrayRounding implements Prepared { - private final long[] values; - private final int max; - private final Prepared delegate; - - BinarySearchArrayRounding(long[] values, int max, Prepared delegate) { - assert max > 0 : "at least one round-down point must be present"; - this.values = values; - this.max = max; - this.delegate = delegate; - } - - @Override - public long round(long utcMillis) { - assert values[0] <= utcMillis : "utcMillis must be after " + values[0]; - int idx = Arrays.binarySearch(values, 0, max, utcMillis); - assert idx != -1 : "The insertion point is before the array! This should have tripped the assertion above."; - assert -1 - idx <= values.length : "This insertion point is after the end of the array."; - if (idx < 0) { - idx = -2 - idx; - } - return values[idx]; - } - - @Override - public long nextRoundingValue(long utcMillis) { - return delegate.nextRoundingValue(utcMillis); - } - - @Override - public double roundingSize(long utcMillis, DateTimeUnit timeUnit) { - return delegate.roundingSize(utcMillis, timeUnit); - } - } - - /** - * Implementation of {@link Prepared} using pre-calculated "round down" points. - * - *

- * It uses linear search to find the greatest round-down point less than or equal to the given timestamp. - * For small inputs (≤ 64 elements), this can be much faster than binary search as it avoids the penalty of - * branch mispredictions and pipeline stalls, and accesses memory sequentially. - * - *

- * It uses "meet in the middle" linear search to avoid the worst case scenario when the desired element is present - * at either side of the array. This is helpful for time-series data where velocity increases over time, so more - * documents are likely to find a greater timestamp which is likely to be present on the right end of the array. - * - * @opensearch.internal - */ - @InternalApi - static class BidirectionalLinearSearchArrayRounding implements Prepared { - private final long[] ascending; - private final long[] descending; - private final Prepared delegate; - - BidirectionalLinearSearchArrayRounding(long[] values, int max, Prepared delegate) { - assert max > 0 : "at least one round-down point must be present"; - this.delegate = delegate; - int len = (max + 1) >>> 1; // rounded-up to handle odd number of values - ascending = new long[len]; - descending = new long[len]; - - for (int i = 0; i < len; i++) { - ascending[i] = values[i]; - descending[i] = values[max - i - 1]; - } - } - - @Override - public long round(long utcMillis) { - int i = 0; - for (; i < ascending.length; i++) { - if (descending[i] <= utcMillis) { - return descending[i]; - } - if (ascending[i] > utcMillis) { - assert i > 0 : "utcMillis must be after " + ascending[0]; - return ascending[i - 1]; - } - } - return ascending[i - 1]; - } - - @Override - public long nextRoundingValue(long utcMillis) { - return delegate.nextRoundingValue(utcMillis); - } - - @Override - public double roundingSize(long utcMillis, DateTimeUnit timeUnit) { - return delegate.roundingSize(utcMillis, timeUnit); - } - } } diff --git a/server/src/test/java/org/opensearch/common/RoundingTests.java b/server/src/test/java/org/opensearch/common/RoundingTests.java index cc71ee08abcca..9a6e6a6ac54d0 100644 --- a/server/src/test/java/org/opensearch/common/RoundingTests.java +++ b/server/src/test/java/org/opensearch/common/RoundingTests.java @@ -1142,28 +1142,6 @@ public void testNonMillisecondsBasedUnitCalendarRoundingSize() { assertThat(prepared.roundingSize(thirdQuarter, Rounding.DateTimeUnit.HOUR_OF_DAY), closeTo(2208.0, 0.000001)); } - public void testArrayRoundingImplementations() { - int length = randomIntBetween(1, 256); - long[] values = new long[length]; - for (int i = 1; i < values.length; i++) { - values[i] = values[i - 1] + (randomNonNegativeLong() % 100); - } - - Rounding.Prepared binarySearchImpl = new Rounding.BinarySearchArrayRounding(values, length, null); - Rounding.Prepared linearSearchImpl = new Rounding.BidirectionalLinearSearchArrayRounding(values, length, null); - - for (int i = 0; i < 100000; i++) { - long key = values[0] + (randomNonNegativeLong() % (100 + values[length - 1] - values[0])); - assertEquals(binarySearchImpl.round(key), linearSearchImpl.round(key)); - } - - AssertionError exception = expectThrows(AssertionError.class, () -> { binarySearchImpl.round(values[0] - 1); }); - assertEquals("utcMillis must be after " + values[0], exception.getMessage()); - - exception = expectThrows(AssertionError.class, () -> { linearSearchImpl.round(values[0] - 1); }); - assertEquals("utcMillis must be after " + values[0], exception.getMessage()); - } - private void assertInterval(long rounded, long nextRoundingValue, Rounding rounding, int minutes, ZoneId tz) { assertInterval(rounded, dateBetween(rounded, nextRoundingValue), nextRoundingValue, rounding, tz); long millisPerMinute = 60_000;