Skip to content

Commit

Permalink
fs: support passthrough mode
Browse files Browse the repository at this point in the history
If a returned file implements the FilePassthroughFder interface, we
try to register the file in the kernel.

Implement this for loopbackFile, and test the behavior.

For benchmarking, use a single reader. With multiple readers, contents
are served out of kernel cache, and do not reflect FUSE performance. 

Benchmark (CPU i5-8350U pinned at 2Ghz):

$ go build -v && go test -run "abc" -bench '(Libfuse|FD)' --passthrough_hp ~/vc/libfuse/build/example/passthrough_hp -test.cpu=1
BenchmarkGoFuseFDRead 	   27444	     45997 ns/op	1424.80 MB/s	      87 B/op	       1 allocs/op
BenchmarkLibfuseHP    	   35377	     32198 ns/op	2035.43 MB/s	       0 B/op	       0 allocs/op

$ go build -v && sudo go test -run "abc" -bench '(Libfuse|FD)' --passthrough_hp ~/vc/libfuse/build/example/passthrough_hp -test.cpu=1
BenchmarkGoFuseFDRead 	   91788	     11902 ns/op	5506.23 MB/s	       3 B/op	       0 allocs/op
BenchmarkLibfuseHP    	  100556	     11831 ns/op	5539.38 MB/s	       0 B/op	       0 allocs/op

Change-Id: If8bde502a3450028f4d87ba61fa9c76ea3ea6c63
  • Loading branch information
hanwen committed Aug 28, 2024
1 parent ff286a5 commit e0a0b09
Show file tree
Hide file tree
Showing 7 changed files with 222 additions and 12 deletions.
2 changes: 1 addition & 1 deletion all.bash
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ GO_TEST="go test -timeout 5m -p 1 -count 1"
# Run all tests as current user
$GO_TEST ./...
# Direct-mount tests need to run as root
sudo env PATH=$PATH $GO_TEST -run TestDirectMount ./fs ./fuse
sudo env PATH=$PATH $GO_TEST -run 'Test(DirectMount|Passthrough)' ./fs ./fuse

make -C benchmark
go test ./benchmark -test.bench '.*' -test.cpu 1,2
4 changes: 2 additions & 2 deletions benchmark/read_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func BenchmarkGoFuseFDRead(b *testing.B) {
b.Fatal(err)
}
mnt := setupFS(root, b.N, b)
benchmarkRead(mnt, b, 32, "")
benchmarkRead(mnt, b, 1, "")
}

var libfusePath = flag.String("passthrough_hp", "", "path to libfuse's passthrough_hp")
Expand Down Expand Up @@ -130,5 +130,5 @@ func BenchmarkLibfuseHP(b *testing.B) {
}
}

benchmarkRead(mnt, b, 32, "")
benchmarkRead(mnt, b, 1, "")
}
21 changes: 16 additions & 5 deletions fs/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
// nodes of the file system tree.
//
// type myNode struct {
// fs.Inode
// fs.Inode
// }
//
// // Node types must be InodeEmbedders
Expand All @@ -20,10 +20,10 @@
// var _ = (fs.NodeLookuper)((*myNode)(nil))
//
// func (n *myNode) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*Inode, syscall.Errno) {
// ops := myNode{}
// out.Mode = 0755
// out.Size = 42
// return n.NewInode(ctx, &ops, fs.StableAttr{Mode: syscall.S_IFREG}), 0
// ops := myNode{}
// out.Mode = 0755
// out.Size = 42
// return n.NewInode(ctx, &ops, fs.StableAttr{Mode: syscall.S_IFREG}), 0
// }
//
// The method names are inspired on the system call names, so we have
Expand Down Expand Up @@ -568,6 +568,17 @@ type NodeRenamer interface {
type FileHandle interface {
}

// FilePassthroughFder is a file backed by a physical
// file. PassthroughFd should return an open file descriptor (and
// true), and the kernel will execute read/write operations directly
// on the backing file, bypassing the FUSE process. This function will
// be called once when processing the Create or Open operation, so
// there is no concern about concurrent access to the Fd. If the
// function returns false, passthrough will not be used for this file.
type FilePassthroughFder interface {
PassthroughFd() (int, bool)
}

// See NodeReleaser.
type FileReleaser interface {
Release(ctx context.Context) syscall.Errno
Expand Down
85 changes: 81 additions & 4 deletions fs/bridge.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ type ServerCallbacks interface {
InodeNotifyStoreCache(node uint64, offset int64, data []byte) fuse.Status
}

// TODO: fold serverBackingFdCallbacks into ServerCallbacks and bump API version
type serverBackingFdCallbacks interface {
RegisterBackingFd(*fuse.BackingMap) (int32, syscall.Errno)
UnregisterBackingFd(id int32) syscall.Errno
}

type rawBridge struct {
options Options
root *Inode
Expand Down Expand Up @@ -98,8 +104,13 @@ type rawBridge struct {
// estimate for stableAttrs.
nodeCountHigh int

files []*fileEntry
files []*fileEntry

// indices of files that are not allocated.
freeFiles []uint32

// If set, don't try to register backing file for Create/Open calls.
disableBackingFiles bool
}

// newInode creates creates new inode pointing to ops.
Expand Down Expand Up @@ -480,10 +491,10 @@ func (b *rawBridge) Create(cancel <-chan struct{}, input *fuse.CreateIn, name st
}

child, fe := b.addNewChild(parent, name, child, f, input.Flags|syscall.O_CREAT|syscall.O_EXCL, &out.EntryOut)

out.Fh = uint64(fe.fh)
out.OpenFlags = flags

b.addBackingID(child, f, &out.OpenOut)
child.setEntryOut(&out.EntryOut)
b.setEntryOutTimeout(&out.EntryOut)
return fuse.OK
Expand Down Expand Up @@ -736,20 +747,82 @@ func (b *rawBridge) Open(cancel <-chan struct{}, input *fuse.OpenIn, out *fuse.O
if errno != 0 {
return errnoToStatus(errno)
}
out.OpenFlags = flags

if f != nil {
b.mu.Lock()
defer b.mu.Unlock()
fe := b.registerFile(n, f, input.Flags)
out.Fh = uint64(fe.fh)

b.addBackingID(n, f, out)
}
out.OpenFlags = flags
return fuse.OK
}

return fuse.ENOTSUP
}

// must hold bridge.mu
func (b *rawBridge) addBackingID(n *Inode, f FileHandle, out *fuse.OpenOut) {
if b.disableBackingFiles {
return
}

bc, ok := b.server.(serverBackingFdCallbacks)
if !ok {
b.disableBackingFiles = true
return
}
pth, ok := f.(FilePassthroughFder)
if !ok {
return
}

if n.backingID == 0 {
fd, ok := pth.PassthroughFd()
if !ok {
return
}
m := fuse.BackingMap{
Fd: int32(fd),
}
id, errno := bc.RegisterBackingFd(&m)
if errno != 0 {
// This happens if we're not root or CAP_PASSTHROUGH is missing.
b.disableBackingFiles = true
} else {
n.backingID = id
}
}

if n.backingID != 0 {
out.BackingID = n.backingID
out.OpenFlags |= fuse.FOPEN_PASSTHROUGH
out.OpenFlags &= ^uint32(fuse.FOPEN_KEEP_CACHE)
n.backingIDRefcount++
}
}

// must hold bridge.mu
func (b *rawBridge) releaseBackingIDRef(n *Inode) {
if n.backingID == 0 {
return
}

n.backingIDRefcount--
if n.backingIDRefcount == 0 {
errno := b.server.(serverBackingFdCallbacks).UnregisterBackingFd(n.backingID)
if errno != 0 {
b.logf("UnregisterBackingFd: %v", errno)
}
n.backingID = 0
n.backingIDRefcount = 0
} else if n.backingIDRefcount < 0 {
log.Panic("backingIDRefcount underflow")
}
}

// registerFile hands out a file handle. Must have bridge.mu
func (b *rawBridge) registerFile(n *Inode, f FileHandle, flags uint32) *fileEntry {
fe := &fileEntry{}
Expand All @@ -766,6 +839,7 @@ func (b *rawBridge) registerFile(n *Inode, f FileHandle, flags uint32) *fileEntr
fe.nodeIndex = len(n.openFiles)
fe.file = f
n.openFiles = append(n.openFiles, fe.fh)

return fe
}

Expand Down Expand Up @@ -838,11 +912,13 @@ func (b *rawBridge) Release(cancel <-chan struct{}, input *fuse.ReleaseIn) {

b.mu.Lock()
defer b.mu.Unlock()

b.releaseBackingIDRef(n)
b.freeFiles = append(b.freeFiles, uint32(input.Fh))
}

func (b *rawBridge) ReleaseDir(input *fuse.ReleaseIn) {
_, f := b.releaseFileEntry(input.NodeId, input.Fh)
n, f := b.releaseFileEntry(input.NodeId, input.Fh)
f.wg.Wait()

f.mu.Lock()
Expand All @@ -854,6 +930,7 @@ func (b *rawBridge) ReleaseDir(input *fuse.ReleaseIn) {

b.mu.Lock()
defer b.mu.Unlock()
b.releaseBackingIDRef(n)
b.freeFiles = append(b.freeFiles, uint32(input.Fh))
}

Expand Down
8 changes: 8 additions & 0 deletions fs/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ var _ = (FileFlusher)((*loopbackFile)(nil))
var _ = (FileFsyncer)((*loopbackFile)(nil))
var _ = (FileSetattrer)((*loopbackFile)(nil))
var _ = (FileAllocater)((*loopbackFile)(nil))
var _ = (FilePassthroughFder)((*loopbackFile)(nil))

func (f *loopbackFile) PassthroughFd() (int, bool) {
// This Fd is not accessed concurrently, but lock anyway for uniformity.
f.mu.Lock()
defer f.mu.Unlock()
return f.fd, true
}

func (f *loopbackFile) Read(ctx context.Context, buf []byte, off int64) (res fuse.ReadResult, errno syscall.Errno) {
f.mu.Lock()
Expand Down
5 changes: 5 additions & 0 deletions fs/inode.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ type Inode struct {
// protected by bridge.mu
openFiles []uint32

// backing files, protected by bridge.mu
backingIDRefcount int
backingID int32
backingFd int

// mu protects the following mutable fields. When locking
// multiple Inodes, locks must be acquired using
// lockNodes/unlockNodes
Expand Down
109 changes: 109 additions & 0 deletions fs/passthrough_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// Copyright 2024 the Go-FUSE Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package fs

import (
"context"
"io"
"os"
"sync"
"syscall"
"testing"

"github.com/hanwen/go-fuse/v2/fuse"
"github.com/hanwen/go-fuse/v2/internal/testutil"
)

type rwRegisteringNode struct {
LoopbackNode

mu sync.Mutex
reads int
writes int
}

func (n *rwRegisteringNode) Read(ctx context.Context, f FileHandle, dest []byte, off int64) (fuse.ReadResult, syscall.Errno) {
n.mu.Lock()
defer n.mu.Unlock()
n.reads++
return f.(FileReader).Read(ctx, dest, off)
}

func (n *rwRegisteringNode) Write(ctx context.Context, f FileHandle, data []byte, off int64) (written uint32, errno syscall.Errno) {
n.mu.Lock()
defer n.mu.Unlock()
n.writes++
return f.(FileWriter).Write(ctx, data, off)
}

func TestPassthrough(t *testing.T) {
if os.Geteuid() != 0 {
t.Skip("passthrough requires CAP_SYS_ADMIN")
}

mnt := t.TempDir()
n := &rwRegisteringNode{}

rootData := &LoopbackRoot{
Path: t.TempDir(),
NewNode: func(rootData *LoopbackRoot, parent *Inode, name string, st *syscall.Stat_t) InodeEmbedder {
return n
},
}
n.RootData = rootData
root := &LoopbackNode{
RootData: rootData,
}
opts := &Options{}
opts.Debug = testutil.VerboseTest()
server, err := Mount(mnt, root, opts)
if err != nil {
t.Fatal(err)
}
defer server.Unmount()

fn := mnt + "/file"
want := "hello there"
if err := os.WriteFile(fn, []byte(want), 0666); err != nil {
t.Fatalf("WriteFile: %v", err)
}

f, err := os.Open(fn)
if err != nil {
t.Fatalf("Open: %v", err)
}
defer f.Close()

got, err := io.ReadAll(f)
if err != nil {
t.Fatalf("Open: %v", err)
}
if want != string(got) {
t.Errorf("got %q want %q", got, want)
}

want2 := "xxxx"
if err := os.WriteFile(fn, []byte(want2), 0666); err != nil {
t.Fatalf("WriteFile: %v", err)
}

got2, err := os.ReadFile(fn)
if err != nil {
t.Fatalf("ReadFile: %v", err)
}
if string(got2) != want2 {
t.Errorf("got %q want %q", got2, want2)
}

f.Close()
server.Unmount()

if n.reads > 0 {
t.Errorf("got readcount %d want 0", n.reads)
}
if n.writes > 0 {
t.Errorf("got writecount %d want 0", n.writes)
}
}

0 comments on commit e0a0b09

Please sign in to comment.