elastic · nik9000 · Sep 15, 2020 · Sep 14, 2020 · Sep 14, 2020 · Sep 15, 2020
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -78,7 +78,6 @@ cd fcml*
 make
 cd example/hsdis
 make
-cp .libs/libhsdis.so.0.0.0
 sudo cp .libs/libhsdis.so.0.0.0 /usr/lib/jvm/java-14-adoptopenjdk/lib/hsdis-amd64.so
 ```
 

diff --git a/...csearch/benchmark/search/aggregations/bucket/terms/StringTermsSerializationBenchmark.java b/...csearch/benchmark/search/aggregations/bucket/terms/StringTermsSerializationBenchmark.java
@@ -0,0 +1,77 @@
+package org.elasticsearch.benchmark.search.aggregations.bucket.terms;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.Version;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.io.stream.BytesStreamOutput;
+import org.elasticsearch.common.io.stream.DelayableWriteable;
+import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.BucketOrder;
+import org.elasticsearch.search.aggregations.InternalAggregation;
+import org.elasticsearch.search.aggregations.InternalAggregations;
+import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+@Fork(2)
+@Warmup(iterations = 10)
+@Measurement(iterations = 5)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+public class StringTermsSerializationBenchmark {
+    private static final NamedWriteableRegistry REGISTRY = new NamedWriteableRegistry(
+        List.of(new NamedWriteableRegistry.Entry(InternalAggregation.class, StringTerms.NAME, StringTerms::new))
+    );
+    @Param(value = { "1000" })
+    private int buckets;
+
+    private DelayableWriteable<InternalAggregations> results;
+
+    @Setup
+    public void initResults() {
+        results = DelayableWriteable.referencing(InternalAggregations.from(List.of(newTerms(true))));
+    }
+
+    private StringTerms newTerms(boolean withNested) {
+        List<StringTerms.Bucket> resultBuckets = new ArrayList<>(buckets);
+        for (int i = 0; i < buckets; i++) {
+            InternalAggregations inner = withNested ? InternalAggregations.from(List.of(newTerms(false))) : InternalAggregations.EMPTY;
+            resultBuckets.add(new StringTerms.Bucket(new BytesRef("test" + i), i, inner, false, 0, DocValueFormat.RAW));
+        }
+        return new StringTerms(
+            "test",
+            BucketOrder.key(true),
+            BucketOrder.key(true),
+            buckets,
+            1,
+            null,
+            DocValueFormat.RAW,
+            buckets,
+            false,
+            100000,
+            resultBuckets,
+            0
+        );
+    }
+
+    @Benchmark
+    public DelayableWriteable<InternalAggregations> serialize() {
+        return results.asSerialized(InternalAggregations::readFrom, REGISTRY);
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java
@@ -218,14 +218,61 @@ public void writeInt(int i) throws IOException {
      * using {@link #writeInt}
      */
     public void writeVInt(int i) throws IOException {
-        final byte[] buffer = scratch.get();
-        int index = 0;
-        while ((i & ~0x7F) != 0) {
-            buffer[index++] = ((byte) ((i & 0x7f) | 0x80));
-            i >>>= 7;
+        /*
+         * Pick the number of bytes that we need based on the value and then
+         * encode the int, unrolling the loops by hand. This allows writing
+         * small numbers to use `writeByte` which is simple and fast. The
+         * unrolling saves a few comparisons and bitwise operations. All
+         * together this saves quite a bit of time compared to a naive
+         * implementation.
+         */
+        if (i < 0x7f) {
+            if (i >= 0) {
+                writeByte((byte) i);
+                return;
+            }
+            byte[] buffer = scratch.get();
+            buffer[0] = (byte) (i & 0x7f | 0x80);
+            buffer[1] = (byte) ((i >>> 7) & 0x7f | 0x80);
+            buffer[2] = (byte) ((i >>> 14) & 0x7f | 0x80);
+            buffer[3] = (byte) ((i >>> 21) & 0x7f | 0x80);
+            buffer[4] = (byte) (i >>> 28);
+            assert buffer[4] <= 0x7f;
+            writeBytes(buffer, 0, 5);
+            return;
         }
-        buffer[index++] = ((byte) i);
-        writeBytes(buffer, 0, index);
+        byte[] buffer = scratch.get();
+        if (i < 0x3fff) {
+            buffer[0] = (byte) (i & 0x7f | 0x80);
+            buffer[1] = (byte) (i >>> 7);
+            assert buffer[1] <= 0x7f;
+            writeBytes(buffer, 0, 2);
+            return;
+        }
+        if (i < 0x1f_ffff) {
+            buffer[0] = (byte) (i & 0x7f | 0x80);
+            buffer[1] = (byte) ((i >>> 7) & 0x7f | 0x80);
+            buffer[2] = (byte) (i >>> 14);
+            assert buffer[2] <= 0x7f;
+            writeBytes(buffer, 0, 3);
+            return;
+        }
+        if (i < 0x0fff_ffff) {
+            buffer[0] = (byte) (i & 0x7f | 0x80);
+            buffer[1] = (byte) ((i >>> 7) & 0x7f | 0x80);
+            buffer[2] = (byte) ((i >>> 14) & 0x7f | 0x80);
+            buffer[3] = (byte) (i >>> 21);
+            assert buffer[3] <= 0x7f;
+            writeBytes(buffer, 0, 4);
+            return;
+        }
+        buffer[0] = (byte) ((i & 0x7f) | 0x80);
+        buffer[1] = (byte) ((i >>> 7) & 0x7f | 0x80);
+        buffer[2] = (byte) ((i >>> 14) & 0x7f | 0x80);
+        buffer[3] = (byte) ((i >>> 21) & 0x7f | 0x80);
+        buffer[4] = (byte) (i >>> 28);
+        assert buffer[4] <= 0x7f;
+        writeBytes(buffer, 0, 5);
     }
 
     /**

diff --git a/server/src/test/java/org/elasticsearch/common/io/stream/BytesStreamsTests.java b/server/src/test/java/org/elasticsearch/common/io/stream/BytesStreamsTests.java
@@ -61,7 +61,7 @@
 import static org.hamcrest.Matchers.sameInstance;
 
 /**
- * Tests for {@link BytesStreamOutput} paging behaviour.
+ * Tests for {@link StreamOutput}.
  */
 public class BytesStreamsTests extends ESTestCase {
     public void testEmpty() throws Exception {
@@ -829,6 +829,15 @@ public void testVInt() throws IOException {
         output.writeVInt(value);
         StreamInput input = output.bytes().streamInput();
         assertEquals(value, input.readVInt());
+
+        BytesStreamOutput simple = new BytesStreamOutput();
+        int i = value;
+        while ((i & ~0x7F) != 0) {
+            simple.writeByte(((byte) ((i & 0x7f) | 0x80)));
+            i >>>= 7;
+        }
+        simple.writeByte((byte) i);
+        assertEquals(simple.bytes().toBytesRef().toString(), output.bytes().toBytesRef().toString());
     }
 
     public void testVLong() throws IOException {
-Original file line number
+Diff line change
@@ Expand Up / @@ -78,7 +78,6 @@ cd fcml* @@
     make
     cd example/hsdis
     make
-    cp .libs/libhsdis.so.0.0.0
     sudo cp .libs/libhsdis.so.0.0.0 /usr/lib/jvm/java-14-adoptopenjdk/lib/hsdis-amd64.so
     ```
@@ Expand Down @@