-
Notifications
You must be signed in to change notification settings - Fork 478
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
On Linux, preallocation makes a huge difference in sync performance. WAL reuse (aka recycling...not implemented yet) provides a further improvement. And direct IO provides more stable performance on GCE Local SSD. Note that direct IO implies WAL reuse. The numbers below were gathered on an AWS m5.xlarge. name time/op DirectIOWrite/wsize=4096-4 34.4µs ± 1% DirectIOWrite/wsize=8192-4 61.0µs ± 0% DirectIOWrite/wsize=16384-4 122µs ± 0% DirectIOWrite/wsize=32768-4 244µs ± 0% SyncWrite/no-prealloc/wsize=64-4 128µs ± 8% SyncWrite/no-prealloc/wsize=512-4 146µs ± 0% SyncWrite/no-prealloc/wsize=1024-4 155µs ± 0% SyncWrite/no-prealloc/wsize=2048-4 172µs ± 0% SyncWrite/no-prealloc/wsize=4096-4 206µs ± 0% SyncWrite/no-prealloc/wsize=8192-4 206µs ± 0% SyncWrite/no-prealloc/wsize=16384-4 274µs ± 0% SyncWrite/no-prealloc/wsize=32768-4 407µs ± 4% SyncWrite/prealloc-4MB/wsize=64-4 34.2µs ± 7% SyncWrite/prealloc-4MB/wsize=512-4 47.5µs ± 0% SyncWrite/prealloc-4MB/wsize=1024-4 60.4µs ± 0% SyncWrite/prealloc-4MB/wsize=2048-4 86.4µs ± 0% SyncWrite/prealloc-4MB/wsize=4096-4 137µs ± 0% SyncWrite/prealloc-4MB/wsize=8192-4 143µs ± 7% SyncWrite/prealloc-4MB/wsize=16384-4 214µs ± 0% SyncWrite/prealloc-4MB/wsize=32768-4 337µs ± 0% SyncWrite/reuse/wsize=64-4 31.6µs ± 4% SyncWrite/reuse/wsize=512-4 31.8µs ± 4% SyncWrite/reuse/wsize=1024-4 32.4µs ± 7% SyncWrite/reuse/wsize=2048-4 31.3µs ± 1% SyncWrite/reuse/wsize=4096-4 32.2µs ± 5% SyncWrite/reuse/wsize=8192-4 61.1µs ± 0% SyncWrite/reuse/wsize=16384-4 122µs ± 0% SyncWrite/reuse/wsize=32768-4 244µs ± 0% See #41
- Loading branch information
1 parent
86ac56a
commit 00f6085
Showing
9 changed files
with
367 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
// Copyright 2016 The etcd Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
// +build darwin | ||
|
||
package storage | ||
|
||
import ( | ||
"syscall" | ||
"unsafe" | ||
) | ||
|
||
func preallocExtend(fd uintptr, offset, length int64) error { | ||
if err := preallocFixed(fd, offset, length); err != nil { | ||
return err | ||
} | ||
return syscall.Ftruncate(int(fd), offset+length) | ||
} | ||
|
||
func preallocFixed(fd uintptr, offset, length int64) error { | ||
// allocate all requested space or no space at all | ||
// TODO: allocate contiguous space on disk with F_ALLOCATECONTIG flag | ||
fstore := &syscall.Fstore_t{ | ||
Flags: syscall.F_ALLOCATEALL, | ||
Posmode: syscall.F_PEOFPOSMODE, | ||
Length: length} | ||
p := unsafe.Pointer(fstore) | ||
_, _, errno := syscall.Syscall(syscall.SYS_FCNTL, fd, uintptr(syscall.F_PREALLOCATE), uintptr(p)) | ||
if errno == 0 || errno == syscall.ENOTSUP { | ||
return nil | ||
} | ||
|
||
// wrong argument to fallocate syscall | ||
if errno == syscall.EINVAL { | ||
// filesystem "st_blocks" are allocated in the units of | ||
// "Allocation Block Size" (run "diskutil info /" command) | ||
var stat syscall.Stat_t | ||
syscall.Fstat(int(fd), &stat) | ||
|
||
// syscall.Statfs_t.Bsize is "optimal transfer block size" | ||
// and contains matching 4096 value when latest OS X kernel | ||
// supports 4,096 KB filesystem block size | ||
var statfs syscall.Statfs_t | ||
syscall.Fstatfs(int(fd), &statfs) | ||
blockSize := int64(statfs.Bsize) | ||
|
||
if stat.Blocks*blockSize >= offset+length { | ||
// enough blocks are already allocated | ||
return nil | ||
} | ||
} | ||
return errno | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
// Copyright 2016 The etcd Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
// +build linux | ||
|
||
package storage | ||
|
||
import ( | ||
"syscall" | ||
) | ||
|
||
func preallocExtend(fd uintptr, offset, length int64) error { | ||
err := syscall.Fallocate(int(fd), 0 /* mode */, offset, length) | ||
if err != nil { | ||
errno, ok := err.(syscall.Errno) | ||
// not supported; fallback | ||
// fallocate EINTRs frequently in some environments; fallback | ||
if ok && (errno == syscall.ENOTSUP || errno == syscall.EINTR) { | ||
return syscall.Ftruncate(int(fd), offset+length) | ||
} | ||
} | ||
return err | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use | ||
// of this source code is governed by a BSD-style license that can be found in | ||
// the LICENSE file. | ||
|
||
// +build linux | ||
|
||
package storage | ||
|
||
import ( | ||
"fmt" | ||
"io/ioutil" | ||
"os" | ||
"syscall" | ||
"testing" | ||
"unsafe" | ||
) | ||
|
||
func BenchmarkDirectIOWrite(b *testing.B) { | ||
const targetSize = 16 << 20 | ||
const alignment = 4096 | ||
|
||
var wsizes []int | ||
if testing.Verbose() { | ||
wsizes = []int{4 << 10, 8 << 10, 16 << 10, 32 << 10} | ||
} else { | ||
wsizes = []int{4096} | ||
} | ||
|
||
for _, wsize := range wsizes { | ||
b.Run(fmt.Sprintf("wsize=%d", wsize), func(b *testing.B) { | ||
tmpf, err := ioutil.TempFile("", "pebble-db-syncing-file-") | ||
if err != nil { | ||
b.Fatal(err) | ||
} | ||
filename := tmpf.Name() | ||
_ = tmpf.Close() | ||
defer os.Remove(filename) | ||
|
||
var f *os.File | ||
var size int | ||
buf := make([]byte, wsize+alignment) | ||
if a := uintptr(unsafe.Pointer(&buf[0])) & uintptr(alignment-1); a != 0 { | ||
buf = buf[alignment-a:] | ||
} | ||
buf = buf[:wsize] | ||
init := true | ||
|
||
b.ResetTimer() | ||
for i := 0; i < b.N; i++ { | ||
if f == nil { | ||
b.StopTimer() | ||
f, err = os.OpenFile(filename, syscall.O_DIRECT|os.O_RDWR, 0666) | ||
if err != nil { | ||
b.Fatal(err) | ||
} | ||
if init { | ||
for size = 0; size < targetSize; size += len(buf) { | ||
if _, err := f.WriteAt(buf, int64(size)); err != nil { | ||
b.Fatal(err) | ||
} | ||
} | ||
} | ||
if err := f.Sync(); err != nil { | ||
b.Fatal(err) | ||
} | ||
size = 0 | ||
b.StartTimer() | ||
} | ||
if _, err := f.WriteAt(buf, int64(size)); err != nil { | ||
b.Fatal(err) | ||
} | ||
size += len(buf) | ||
if size >= targetSize { | ||
_ = f.Close() | ||
f = nil | ||
} | ||
} | ||
b.StopTimer() | ||
}) | ||
} | ||
} |
Oops, something went wrong.