From 10aac0e25382b962476dcba65dbd420864b9134f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wojciech=20Ma=C5=82ota-W=C3=B3jcik?= <59281144+outofforest@users.noreply.github.com> Date: Thu, 21 Nov 2024 21:35:54 +0100 Subject: [PATCH] Benchmark double copy (#220) --- wal/asm.s | 14 ++++++ wal/asm_stub.go | 6 +++ wal/avo/avo.go | 31 +++++++++++++ wal/benchmark_test.go | 103 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 154 insertions(+) create mode 100644 wal/asm.s create mode 100644 wal/asm_stub.go create mode 100644 wal/avo/avo.go create mode 100644 wal/benchmark_test.go diff --git a/wal/asm.s b/wal/asm.s new file mode 100644 index 0000000..a7aa0a8 --- /dev/null +++ b/wal/asm.s @@ -0,0 +1,14 @@ +// Code generated by command: go run avo.go -out ../asm.s -stubs ../asm_stub.go -pkg wal. DO NOT EDIT. + +#include "textflag.h" + +// func Copy(x *byte, y *byte, z *byte) +// Requires: AVX512F +TEXT ·Copy(SB), NOSPLIT, $0-24 + MOVQ z+16(FP), AX + VMOVDQU64 (AX), Z0 + MOVQ x+0(FP), AX + VMOVDQU64 Z0, (AX) + MOVQ y+8(FP), AX + VMOVDQU64 Z0, (AX) + RET diff --git a/wal/asm_stub.go b/wal/asm_stub.go new file mode 100644 index 0000000..16b4b39 --- /dev/null +++ b/wal/asm_stub.go @@ -0,0 +1,6 @@ +// Code generated by command: go run avo.go -out ../asm.s -stubs ../asm_stub.go -pkg wal. DO NOT EDIT. + +package wal + +// Copy copies data. +func Copy(x *byte, y *byte, z *byte) diff --git a/wal/avo/avo.go b/wal/avo/avo.go new file mode 100644 index 0000000..c16c7ee --- /dev/null +++ b/wal/avo/avo.go @@ -0,0 +1,31 @@ +package main + +//go:generate go run . -out ../asm.s -stubs ../asm_stub.go -pkg wal + +import ( + . "github.com/mmcloughlin/avo/build" + . "github.com/mmcloughlin/avo/operand" +) + +// Copy copies data. +func Copy() { + TEXT("Copy", NOSPLIT, "func(x *byte, y *byte, z *byte)") + Doc("Copy copies data.") + + r := ZMM() + memZ := Mem{Base: Load(Param("z"), GP64())} + VMOVDQU64(memZ, r) + + memX := Mem{Base: Load(Param("x"), GP64())} + VMOVDQU64(r, memX) + memY := Mem{Base: Load(Param("y"), GP64())} + VMOVDQU64(r, memY) + + RET() +} + +func main() { + Copy() + + Generate() +} diff --git a/wal/benchmark_test.go b/wal/benchmark_test.go new file mode 100644 index 0000000..20614f3 --- /dev/null +++ b/wal/benchmark_test.go @@ -0,0 +1,103 @@ +package wal_test + +import ( + "crypto/rand" + "fmt" + "io" + "testing" + "unsafe" + + "github.com/outofforest/quantum/wal" +) + +type item struct { + Field00 uint32 + Field01 uint32 + Field02 uint64 + Field03 uint64 + Field04 uint16 + Field05 uint16 + Field06 uint16 + Field07 uint16 + Field08 uint64 + Field09 uint64 + Field10 uint64 + Field11 uint64 +} + +var ( + data = func() item { + var i item + _, _ = rand.Read(unsafe.Slice((*byte)(unsafe.Pointer(&i)), unsafe.Sizeof(i))) + return i + }() +) + +func BenchmarkGoCopy(b *testing.B) { + b.StopTimer() + b.ResetTimer() + + var data0, data1, data2, data3, data4, data5, data6, data7, data8, data9 item + + dataB := unsafe.Slice((*byte)(unsafe.Pointer(&data)), unsafe.Sizeof(data)) + data0B := unsafe.Slice((*byte)(unsafe.Pointer(&data0)), unsafe.Sizeof(data0)) + data1B := unsafe.Slice((*byte)(unsafe.Pointer(&data1)), unsafe.Sizeof(data1)) + data2B := unsafe.Slice((*byte)(unsafe.Pointer(&data2)), unsafe.Sizeof(data2)) + data3B := unsafe.Slice((*byte)(unsafe.Pointer(&data3)), unsafe.Sizeof(data3)) + data4B := unsafe.Slice((*byte)(unsafe.Pointer(&data4)), unsafe.Sizeof(data4)) + data5B := unsafe.Slice((*byte)(unsafe.Pointer(&data5)), unsafe.Sizeof(data5)) + data6B := unsafe.Slice((*byte)(unsafe.Pointer(&data6)), unsafe.Sizeof(data6)) + data7B := unsafe.Slice((*byte)(unsafe.Pointer(&data7)), unsafe.Sizeof(data7)) + data8B := unsafe.Slice((*byte)(unsafe.Pointer(&data8)), unsafe.Sizeof(data8)) + data9B := unsafe.Slice((*byte)(unsafe.Pointer(&data9)), unsafe.Sizeof(data9)) + + b.StartTimer() + for range b.N { + copy(data0B, dataB) + copy(data1B, dataB) + copy(data2B, dataB) + copy(data3B, dataB) + copy(data4B, dataB) + copy(data5B, dataB) + copy(data6B, dataB) + copy(data7B, dataB) + copy(data8B, dataB) + copy(data9B, dataB) + } + b.StopTimer() + + _, _ = fmt.Fprint(io.Discard, data0B) + _, _ = fmt.Fprint(io.Discard, data1B) + _, _ = fmt.Fprint(io.Discard, data2B) + _, _ = fmt.Fprint(io.Discard, data3) + _, _ = fmt.Fprint(io.Discard, data4B) + _, _ = fmt.Fprint(io.Discard, data5B) + _, _ = fmt.Fprint(io.Discard, data6B) + _, _ = fmt.Fprint(io.Discard, data7B) + _, _ = fmt.Fprint(io.Discard, data8B) + _, _ = fmt.Fprint(io.Discard, data9B) +} + +func BenchmarkAVX(b *testing.B) { + b.StopTimer() + b.ResetTimer() + + var data0, data1 item + + dataB := (*byte)(unsafe.Pointer(&data)) + data0B := (*byte)(unsafe.Pointer(&data0)) + data1B := (*byte)(unsafe.Pointer(&data1)) + + b.StartTimer() + for range b.N { + wal.Copy(data0B, data1B, dataB) + wal.Copy(data0B, data1B, dataB) + wal.Copy(data0B, data1B, dataB) + wal.Copy(data0B, data1B, dataB) + wal.Copy(data0B, data1B, dataB) + } + b.StopTimer() + + _, _ = fmt.Fprint(io.Discard, data0) + _, _ = fmt.Fprint(io.Discard, data1) +}