From 45b6ad31f87760d858324b9c8619fe06436ecd5e Mon Sep 17 00:00:00 2001 From: skr Date: Fri, 23 Feb 2024 11:23:00 +0530 Subject: [PATCH] Directly encode key/value to `ByteBuf` when codec knows exact byte sizes #2610 * adds benchmarks to show perf gains * about 10x improvement in perf, with no added gc overhead Original pull request: #2768 --- .../io/lettuce/core/codec/ByteArrayCodec.java | 6 ++ .../lettuce/core/codec/ToByteBufEncoder.java | 11 +++ .../io/lettuce/core/protocol/CommandArgs.java | 48 +++++++--- .../ExactVsEstimatedSizeCodecBenchmark.java | 89 +++++++++++++++++++ .../jmh/io/lettuce/core/codec/JmhMain.java | 19 +++- 5 files changed, 159 insertions(+), 14 deletions(-) create mode 100644 src/test/jmh/io/lettuce/core/codec/ExactVsEstimatedSizeCodecBenchmark.java diff --git a/src/main/java/io/lettuce/core/codec/ByteArrayCodec.java b/src/main/java/io/lettuce/core/codec/ByteArrayCodec.java index 3353e82c95..b1ea5d27a9 100644 --- a/src/main/java/io/lettuce/core/codec/ByteArrayCodec.java +++ b/src/main/java/io/lettuce/core/codec/ByteArrayCodec.java @@ -23,6 +23,7 @@ * A {@link RedisCodec} that uses plain byte arrays without further transformations. * * @author Mark Paluch + * @author shikharid * @since 3.3 */ public class ByteArrayCodec implements RedisCodec, ToByteBufEncoder { @@ -54,6 +55,11 @@ public int estimateSize(Object keyOrValue) { return ((byte[]) keyOrValue).length; } + @Override + public boolean isEstimateExact() { + return true; + } + @Override public byte[] decodeKey(ByteBuffer bytes) { return getBytes(bytes); diff --git a/src/main/java/io/lettuce/core/codec/ToByteBufEncoder.java b/src/main/java/io/lettuce/core/codec/ToByteBufEncoder.java index ba63d94072..beedc8cd34 100644 --- a/src/main/java/io/lettuce/core/codec/ToByteBufEncoder.java +++ b/src/main/java/io/lettuce/core/codec/ToByteBufEncoder.java @@ -27,6 +27,7 @@ *

* * @author Mark Paluch + * @author shikharid * @since 4.3 */ public interface ToByteBufEncoder { @@ -56,4 +57,14 @@ public interface ToByteBufEncoder { */ int estimateSize(Object keyOrValue); + /** + * Returns true if {@link ToByteBufEncoder#estimateSize(Object)} returns exact size + * This is used as an optimisation to reduce memory allocations when encoding data + * + * @return true if {@link ToByteBufEncoder#estimateSize(Object)} returns exact size + */ + default boolean isEstimateExact() { + return false; + } + } diff --git a/src/main/java/io/lettuce/core/protocol/CommandArgs.java b/src/main/java/io/lettuce/core/protocol/CommandArgs.java index 856a9a0646..3563bbaab2 100644 --- a/src/main/java/io/lettuce/core/protocol/CommandArgs.java +++ b/src/main/java/io/lettuce/core/protocol/CommandArgs.java @@ -45,6 +45,7 @@ * @param Value type. * @author Will Glozer * @author Mark Paluch + * @author shikharid */ public class CommandArgs { @@ -682,14 +683,25 @@ void encode(ByteBuf target) { if (codec instanceof ToByteBufEncoder) { ToByteBufEncoder toByteBufEncoder = (ToByteBufEncoder) codec; - ByteBuf temporaryBuffer = target.alloc().buffer(toByteBufEncoder.estimateSize(key) + 6); - try { + if (toByteBufEncoder.isEstimateExact()) { + target.writeByte('$'); - toByteBufEncoder.encodeKey(key, temporaryBuffer); - ByteBufferArgument.writeByteBuf(target, temporaryBuffer); - } finally { - temporaryBuffer.release(); + IntegerArgument.writeInteger(target, toByteBufEncoder.estimateSize(key)); + target.writeBytes(CRLF); + + toByteBufEncoder.encodeKey(key, target); + target.writeBytes(CRLF); + } else { + ByteBuf temporaryBuffer = target.alloc().buffer(toByteBufEncoder.estimateSize(key) + 6); + + try { + + toByteBufEncoder.encodeKey(key, temporaryBuffer); + ByteBufferArgument.writeByteBuf(target, temporaryBuffer); + } finally { + temporaryBuffer.release(); + } } return; @@ -727,13 +739,23 @@ void encode(ByteBuf target) { if (codec instanceof ToByteBufEncoder) { ToByteBufEncoder toByteBufEncoder = (ToByteBufEncoder) codec; - ByteBuf temporaryBuffer = target.alloc().buffer(toByteBufEncoder.estimateSize(val) + 6); - - try { - toByteBufEncoder.encodeValue(val, temporaryBuffer); - ByteBufferArgument.writeByteBuf(target, temporaryBuffer); - } finally { - temporaryBuffer.release(); + if (toByteBufEncoder.isEstimateExact()) { + target.writeByte('$'); + + IntegerArgument.writeInteger(target, toByteBufEncoder.estimateSize(val)); + target.writeBytes(CRLF); + + toByteBufEncoder.encodeValue(val, target); + target.writeBytes(CRLF); + } else { + ByteBuf temporaryBuffer = target.alloc().buffer(toByteBufEncoder.estimateSize(val) + 6); + + try { + toByteBufEncoder.encodeValue(val, temporaryBuffer); + ByteBufferArgument.writeByteBuf(target, temporaryBuffer); + } finally { + temporaryBuffer.release(); + } } return; diff --git a/src/test/jmh/io/lettuce/core/codec/ExactVsEstimatedSizeCodecBenchmark.java b/src/test/jmh/io/lettuce/core/codec/ExactVsEstimatedSizeCodecBenchmark.java new file mode 100644 index 0000000000..11224a4515 --- /dev/null +++ b/src/test/jmh/io/lettuce/core/codec/ExactVsEstimatedSizeCodecBenchmark.java @@ -0,0 +1,89 @@ +package io.lettuce.core.codec; + +import io.lettuce.core.protocol.CommandArgs; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.PooledByteBufAllocator; +import io.netty.buffer.Unpooled; +import org.checkerframework.checker.units.qual.C; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Benchmark to measure perf gains when codec knows the exact byte size when encoding args + * + * @author shikharid + */ +public class ExactVsEstimatedSizeCodecBenchmark { + + @Benchmark + public void encodeKeyExactSize(Input input, Blackhole blackhole) { + encodeKey(input.testBytes, ByteArrayCodec.INSTANCE, input.target); + blackhole.consume(input.target); + input.target.clear(); + } + + @Benchmark + public void encodeKeyEstimatedSize(Input input, Blackhole blackhole) { + encodeKey(input.testBytes, EstimatedSizeByteArrayCodec.INSTANCE, input.target); + blackhole.consume(input.target); + input.target.clear(); + } + + @Benchmark + public void encodeValueExactSize(Input input, Blackhole blackhole) { + encodeValue(input.testBytes, ByteArrayCodec.INSTANCE, input.target); + blackhole.consume(input.target); + input.target.clear(); + } + + @Benchmark + public void encodeValueEstimatedSize(Input input, Blackhole blackhole) { + encodeValue(input.testBytes, EstimatedSizeByteArrayCodec.INSTANCE, input.target); + blackhole.consume(input.target); + input.target.clear(); + } + + private static void encodeKey(byte[] key, RedisCodec codec, ByteBuf target) { + CommandArgs commandArgs = new CommandArgs<>(codec); + commandArgs.addKey(key); + commandArgs.encode(target); + } + + private static void encodeValue(byte[] value, RedisCodec codec, ByteBuf target) { + CommandArgs commandArgs = new CommandArgs<>(codec); + commandArgs.addValue(value); + commandArgs.encode(target); + } + + @State(Scope.Thread) + public static class Input { + final byte[] testBytes = "some (not-so-)random bytes".getBytes(); + + /* + By default, used an Unpooled heap buffer so that "GC" specific improvements are visible in benchmark thorugh profiling + + But Using a pooled direct buffer gives us the FULL story for most real world uses + Most usages are of a direct pooled bytebuf allocator for Netty, Which has its own jemalloc based GC + + Replace this with a pooled direct allocator to see real-world gains + Note that GC profiling in that case won't show much diff, as we only save a couple of allocs afa heap is concerned + But you will still see the perf gains + */ + final ByteBuf target = Unpooled.buffer(512); + //final ByteBuf target = PooledByteBufAllocator.DEFAULT.directBuffer(512); + } + + // Emulates older ByteArrayCodec behaviour (no concept of exact estimates) + public static class EstimatedSizeByteArrayCodec extends ByteArrayCodec { + + public static final EstimatedSizeByteArrayCodec INSTANCE = new EstimatedSizeByteArrayCodec(); + + @Override + public boolean isEstimateExact() { + return false; + } + + } +} diff --git a/src/test/jmh/io/lettuce/core/codec/JmhMain.java b/src/test/jmh/io/lettuce/core/codec/JmhMain.java index c59762c07c..6a5f344ccc 100644 --- a/src/test/jmh/io/lettuce/core/codec/JmhMain.java +++ b/src/test/jmh/io/lettuce/core/codec/JmhMain.java @@ -28,12 +28,14 @@ * Manual JMH Test Launcher. * * @author Mark Paluch + * @author shikharid */ public class JmhMain { public static void main(String... args) throws RunnerException { - runCommandBenchmark(); + //runCommandBenchmark(); + runExactVsEstimatedSizeEncoderBenchmark(); } private static void runCommandBenchmark() throws RunnerException { @@ -44,6 +46,21 @@ private static void runCommandBenchmark() throws RunnerException { .build()).run(); } + private static void runExactVsEstimatedSizeEncoderBenchmark() throws RunnerException { + + // measure time-per-op + new Runner(prepareOptions().mode(Mode.AverageTime).timeUnit(TimeUnit.NANOSECONDS) + .include(".*ExactVsEstimatedSizeCodecBenchmark.*") + .addProfiler("gc") + .build()).run(); + + // measure thrpt (ops/sec) + new Runner(prepareOptions().mode(Mode.Throughput).timeUnit(TimeUnit.SECONDS) + .include(".*ExactVsEstimatedSizeCodecBenchmark.*") + .addProfiler("gc") + .build()).run(); + } + private static ChainedOptionsBuilder prepareOptions() { return new OptionsBuilder()// .forks(1) //