From e885cea8d4d40a5a9bb92bc3cef7193f2a316f59 Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Mon, 9 Sep 2024 13:31:33 +0200 Subject: [PATCH] fs: new directory API The new API follows the pattern of the other file API: InodeEmbedder can implement a OpendirHandle method, which returns a FileHandle. Directories can implement the following APIs, * FileSyncdirer * FileReaddirenter * FileReleasedirer * FileSeekdirer along with the opened directory, FOPEN_* flags may be returned. In a follow-up change, we'll demonstrate how to use FOPEN_CACHE_DIR for directory caching. This change is backward compatible in a compilation sense, but support for seeking in DirStreams has been removed: it was incorrect (it made the assumption that the dir offset was monotonically increasing), inefficient and complicated. Change-Id: I662713321f0f812e92e4059ee11e8b1427c6aa0f --- fs/api.go | 30 +++++++ fs/bridge.go | 191 +++++++++++++++++++++---------------------- fs/dir_test.go | 166 +++++++++++++++++++++++++++++++++++++ fs/dirstream.go | 28 +++++++ fs/dirstream_unix.go | 42 +++++++++- fs/loopback.go | 13 ++- 6 files changed, 366 insertions(+), 104 deletions(-) diff --git a/fs/api.go b/fs/api.go index e237270b7..1e7754ac8 100644 --- a/fs/api.go +++ b/fs/api.go @@ -654,6 +654,36 @@ type FileAllocater interface { Allocate(ctx context.Context, off uint64, size uint64, mode uint32) syscall.Errno } +// Opens a directory. This supersedes NodeOpendirer, allowing to pass +// back flags (eg. FOPEN_CACHE_DIR). +type NodeOpendirHandler interface { + OpendirHandle(ctx context.Context, flags uint32) (fh FileHandle, fuseFlags uint32, errno syscall.Errno) +} + +// FileReaddirenter is a directory that supports reading. +type FileReaddirenter interface { + // Read a single directory entry. + Readdirent(ctx context.Context) (*fuse.DirEntry, syscall.Errno) +} + +// FileFsyncer is a directory that supports fsyncdir. +type FileFsyncdirer interface { + Fsyncdir(ctx context.Context, flags uint32) syscall.Errno +} + +// FileSeekdirer is directory that supports seeking. `off` is an +// opaque uint64 value, where only the value 0 is reserved for the +// start of the stream. (See https://lwn.net/Articles/544520/ for +// background). +type FileSeekdirer interface { + Seekdir(ctx context.Context, off uint64) syscall.Errno +} + +// FileReleasedirer is a directory that supports a cleanup operation. +type FileReleasedirer interface { + Releasedir(ctx context.Context, releaseFlags uint32) +} + // Options sets options for the entire filesystem type Options struct { // MountOptions contain the options for mounting the fuse server diff --git a/fs/bridge.go b/fs/bridge.go index 24f7bf846..a3d009196 100644 --- a/fs/bridge.go +++ b/fs/bridge.go @@ -33,7 +33,6 @@ type fileEntry struct { mu sync.Mutex // Directory - dirStream DirStream hasOverflow bool overflow fuse.DirEntry overflowErrno syscall.Errno @@ -821,7 +820,8 @@ func (b *rawBridge) releaseBackingIDRef(n *Inode) { } } -// registerFile hands out a file handle. Must have bridge.mu +// registerFile hands out a file handle. Must have bridge.mu. Flags are the open flags +// (eg. syscall.O_EXCL). func (b *rawBridge) registerFile(n *Inode, f FileHandle, flags uint32) *fileEntry { fe := &fileEntry{} if len(b.freeFiles) > 0 { @@ -919,12 +919,9 @@ func (b *rawBridge) ReleaseDir(input *fuse.ReleaseIn) { n, f := b.releaseFileEntry(input.NodeId, input.Fh) f.wg.Wait() - f.mu.Lock() - if f.dirStream != nil { - f.dirStream.Close() - f.dirStream = nil + if frd, ok := f.file.(FileReleasedirer); ok { + frd.Releasedir(context.Background(), input.ReleaseFlags) } - f.mu.Unlock() b.mu.Lock() defer b.mu.Unlock() @@ -1006,79 +1003,59 @@ func (b *rawBridge) Fallocate(cancel <-chan struct{}, input *fuse.FallocateIn) f func (b *rawBridge) OpenDir(cancel <-chan struct{}, input *fuse.OpenIn, out *fuse.OpenOut) fuse.Status { n, _ := b.inode(input.NodeId, 0) - if od, ok := n.ops.(NodeOpendirer); ok { - errno := od.Opendir(&fuse.Context{Caller: input.Caller, Cancel: cancel}) - if errno != 0 { - return errnoToStatus(errno) - } - } + var fh FileHandle + var fuseFlags uint32 + var errno syscall.Errno - b.mu.Lock() - defer b.mu.Unlock() - fe := b.registerFile(n, nil, 0) - out.Fh = uint64(fe.fh) - return fuse.OK -} + ctx := &fuse.Context{Caller: input.Caller, Cancel: cancel} -// setStream makes sure `f.dirStream` and associated state variables are set and -// seeks to offset requested in `input`. Caller must hold `f.mu`. -// The `eof` return value shows if `f.dirStream` ended before the requested -// offset was reached. -func (b *rawBridge) setStream(cancel <-chan struct{}, input *fuse.ReadIn, inode *Inode, f *fileEntry) (errno syscall.Errno, eof bool) { - // Get a new directory stream in the following cases: - // 1) f.dirStream == nil ............ First READDIR[PLUS] on this file handle. - // 2) input.Offset == 0 ............. Start reading the directory again from - // the beginning (user called rewinddir(3) or lseek(2)). - // 3) input.Offset < f.nextOffset ... Seek back (user called seekdir(3) or lseek(2)). - if f.dirStream == nil || input.Offset == 0 || input.Offset < f.dirOffset { - if f.dirStream != nil { - f.dirStream.Close() - f.dirStream = nil - } - str, errno := b.getStream(&fuse.Context{Caller: input.Caller, Cancel: cancel}, inode) - if errno != 0 { - return errno, false - } + nod, _ := n.ops.(NodeOpendirer) + nrd, _ := n.ops.(NodeReaddirer) - f.dirOffset = 0 - f.hasOverflow = false - f.dirStream = str - } + if odh, ok := n.ops.(NodeOpendirHandler); ok { + fh, fuseFlags, errno = odh.OpendirHandle(ctx, input.Flags) - // Seek forward? - for f.dirOffset < input.Offset { - f.hasOverflow = false - if !f.dirStream.HasNext() { - // Seek past end of directory. This is not an error, but the - // user will get an empty directory listing. - return 0, true - } - de, errno := f.dirStream.Next() if errno != 0 { - return errno, true + return errnoToStatus(errno) + } + } else { + if nod != nil { + errno = nod.Opendir(ctx) + if errno != 0 { + return errnoToStatus(errno) + } } - if de.Off == 0 { - de.Off = f.dirOffset + 1 + + var ctor func(context.Context) (DirStream, syscall.Errno) + if nrd != nil { + ctor = func(ctx context.Context) (DirStream, syscall.Errno) { + return nrd.Readdir(ctx) + } + } else { + ctor = func(ctx context.Context) (DirStream, syscall.Errno) { + return n.childrenAsDirstream(), 0 + } } - f.dirOffset = de.Off + fh = &dirStreamAsFile{creator: ctor} } - return 0, false + b.mu.Lock() + defer b.mu.Unlock() + fe := b.registerFile(n, fh, 0) + out.Fh = uint64(fe.fh) + out.OpenFlags = fuseFlags + return fuse.OK } -func (b *rawBridge) getStream(ctx context.Context, inode *Inode) (DirStream, syscall.Errno) { - if rd, ok := inode.ops.(NodeReaddirer); ok { - return rd.Readdir(ctx) - } - - lst := inode.childrenList() +func (n *Inode) childrenAsDirstream() DirStream { + lst := n.childrenList() r := make([]fuse.DirEntry, 0, len(lst)) for _, e := range lst { r = append(r, fuse.DirEntry{Mode: e.Inode.Mode(), Name: e.Name, Ino: e.Inode.StableAttr().Ino}) } - return NewListDirStream(r), 0 + return NewListDirStream(r) } func (b *rawBridge) ReadDirPlus(cancel <-chan struct{}, input *fuse.ReadIn, out *fuse.DirEntryList) fuse.Status { @@ -1095,53 +1072,72 @@ func (b *rawBridge) readDirMaybeLookup(cancel <-chan struct{}, input *fuse.ReadI f.mu.Lock() defer f.mu.Unlock() - defer func() { f.dirOffset = out.Offset }() - - errno, eof := b.setStream(cancel, input, n, f) - if errno != 0 { - return errnoToStatus(errno) - } else if eof { - return fuse.OK + ctx := &fuse.Context{Caller: input.Caller, Cancel: cancel} + if input.Offset != f.dirOffset { + if sd, ok := f.file.(FileSeekdirer); ok { + errno := sd.Seekdir(ctx, input.Offset) + if errno != 0 { + return errnoToStatus(errno) + } + f.dirOffset = input.Offset + f.overflowErrno = 0 + f.hasOverflow = false + } else { + return fuse.ENOTSUP + } } - ctx := &fuse.Context{Caller: input.Caller, Cancel: cancel} + defer func() { + f.dirOffset = out.Offset + }() + fre, ok := f.file.(FileReaddirenter) + if !ok { + return fuse.OK + } + getdent := fre.Readdirent first := true - for f.dirStream.HasNext() || f.hasOverflow { - var e fuse.DirEntry - var errno syscall.Errno + for { + var de *fuse.DirEntry + var errno syscall.Errno if f.hasOverflow { - e = f.overflow - errno = f.overflowErrno f.hasOverflow = false + if f.overflowErrno != 0 { + return errnoToStatus(f.overflowErrno) + } + de = &f.overflow } else { - e, errno = f.dirStream.Next() + de, errno = getdent(ctx) + if errno != 0 { + if first { + return errnoToStatus(errno) + } else { + f.hasOverflow = true + f.overflowErrno = errno + return fuse.OK + } + } } - if errno != 0 { - if first { - return errnoToStatus(errno) - } else { - f.overflowErrno = errno - f.hasOverflow = true - return fuse.OK - } + if de == nil { + break } + first = false if !lookup { - if !out.AddDirEntry(e) { - f.overflow = e + if !out.AddDirEntry(*de) { + f.overflow = *de f.hasOverflow = true return fuse.OK } continue } - entryOut := out.AddDirLookupEntry(e) + entryOut := out.AddDirLookupEntry(*de) if entryOut == nil { - f.overflow = e + f.overflow = *de f.hasOverflow = true return fuse.OK } @@ -1151,20 +1147,20 @@ func (b *rawBridge) readDirMaybeLookup(cancel <-chan struct{}, input *fuse.ReadI // The values in EntryOut are ignored by Linux // (see fuse_direntplus_link() in linux/fs/fuse/readdir.c), so leave // them at zero-value. - if e.Name == "." || e.Name == ".." { + if de.Name == "." || de.Name == ".." { continue } - child, errno := b.lookup(ctx, n, e.Name, entryOut) + child, errno := b.lookup(ctx, n, de.Name, entryOut) if errno != 0 { if b.options.NegativeTimeout != nil { entryOut.SetEntryTimeout(*b.options.NegativeTimeout) } } else { - child, _ = b.addNewChild(n, e.Name, child, nil, 0, entryOut) + child, _ = b.addNewChild(n, de.Name, child, nil, 0, entryOut) child.setEntryOut(entryOut) b.setEntryOutTimeout(entryOut) - if e.Mode&syscall.S_IFMT != child.stableAttr.Mode&syscall.S_IFMT { + if de.Mode&syscall.S_IFMT != child.stableAttr.Mode&syscall.S_IFMT { // The file type has changed behind our back. Use the new value. out.FixMode(child.stableAttr.Mode) } @@ -1176,9 +1172,12 @@ func (b *rawBridge) readDirMaybeLookup(cancel <-chan struct{}, input *fuse.ReadI } func (b *rawBridge) FsyncDir(cancel <-chan struct{}, input *fuse.FsyncIn) fuse.Status { - n, _ := b.inode(input.NodeId, input.Fh) - if fs, ok := n.ops.(NodeFsyncer); ok { - return errnoToStatus(fs.Fsync(&fuse.Context{Caller: input.Caller, Cancel: cancel}, nil, input.FsyncFlags)) + n, f := b.inode(input.NodeId, input.Fh) + ctx := &fuse.Context{Caller: input.Caller, Cancel: cancel} + if fsd, ok := f.file.(FileFsyncdirer); ok { + return errnoToStatus(fsd.Fsyncdir(ctx, input.FsyncFlags)) + } else if fs, ok := n.ops.(NodeFsyncer); ok { + return errnoToStatus(fs.Fsync(ctx, f.file, input.FsyncFlags)) } return fuse.ENOTSUP diff --git a/fs/dir_test.go b/fs/dir_test.go index a223a8ab7..4144a4832 100644 --- a/fs/dir_test.go +++ b/fs/dir_test.go @@ -7,6 +7,8 @@ package fs import ( "context" "fmt" + "reflect" + "sync" "syscall" "testing" @@ -73,6 +75,9 @@ func TestDirStreamError(t *testing.T) { } defer ds.Close() + if !ds.HasNext() { + t.Fatal("expect HasNext") + } if e, errno := ds.Next(); errno != 0 { t.Errorf("ds.Next: %v", errno) } else if e.Name != "first" { @@ -97,3 +102,164 @@ func TestDirStreamError(t *testing.T) { }) } } + +type dirStreamSeekNode struct { + Inode + num int +} + +type listDirEntries struct { + entries []fuse.DirEntry + next int +} + +var _ = (FileReaddirenter)((*listDirEntries)(nil)) + +func (l *listDirEntries) Readdirent(ctx context.Context) (*fuse.DirEntry, syscall.Errno) { + if l.next >= len(l.entries) { + return nil, 0 + } + de := &l.entries[l.next] + l.next++ + return de, 0 +} + +var _ = (FileSeekdirer)((*listDirEntries)(nil)) + +func (l *listDirEntries) Seekdir(ctx context.Context, off uint64) syscall.Errno { + if off == 0 { + l.next = 0 + } else { + for i, e := range l.entries { + if e.Off == off { + l.next = i + 1 + return 0 + } + } + } + // TODO: error code if not found? + return 0 +} + +var _ = (NodeOpendirHandler)((*dirStreamSeekNode)(nil)) + +func (n *dirStreamSeekNode) OpendirHandle(ctx context.Context, flags uint32) (FileHandle, uint32, syscall.Errno) { + var l []fuse.DirEntry + + for i := 0; i < n.num; i++ { + l = append(l, fuse.DirEntry{ + Name: fmt.Sprintf("name%d", i), + Mode: fuse.S_IFREG, + Ino: uint64(i + 100), + Off: uint64((1 + (i*7)%n.num) * 100), + }) + } + + return &listDirEntries{entries: l}, 0, 0 +} + +func testDirSeek(t *testing.T, mnt string) { + ds, errno := NewLoopbackDirStream(mnt) + if errno != 0 { + t.Fatalf("NewLoopbackDirStream: %v", errno) + } + defer ds.Close() + + fullResult, errno := readDirStream(ds) + if errno != 0 { + t.Fatalf("readDirStream: %v", errno) + } + + for i, res := range fullResult { + func() { + ds, errno := NewLoopbackDirStream(mnt) + if errno != 0 { + t.Fatalf("NewLoopbackDirStream: %v", errno) + } + defer ds.Close() + + if errno := ds.(*loopbackDirStream).Seekdir(context.Background(), res.Off); errno != 0 { + t.Fatalf("seek: %v", errno) + } + + rest, errno := readDirStream(ds) + if errno != 0 { + t.Fatalf("readDirStream: %v", errno) + } + if rest == nil { + rest = fullResult[:0] + } + if want := fullResult[i+1:]; !reflect.DeepEqual(rest, want) { + t.Errorf("got %v, want %v", rest, want) + } + }() + } +} + +func TestDirStreamSeek(t *testing.T) { + for _, rdp := range []bool{false, true} { + t.Run(fmt.Sprintf("readdirplus=%v", rdp), + func(t *testing.T) { + N := 11 + + root := &dirStreamSeekNode{num: N} + opts := Options{} + opts.DisableReadDirPlus = !rdp + + mnt, _ := testMount(t, root, &opts) + testDirSeek(t, mnt) + }) + } +} + +type syncNode struct { + Inode + + mu sync.Mutex + syncDirCount int +} + +type syncDir struct { + node *syncNode +} + +func (d *syncDir) Readdirent(ctx context.Context) (*fuse.DirEntry, syscall.Errno) { + return nil, 0 +} + +var _ = (FileFsyncdirer)((*syncDir)(nil)) + +func (d *syncDir) Fsyncdir(ctx context.Context, flags uint32) syscall.Errno { + d.node.mu.Lock() + defer d.node.mu.Unlock() + d.node.syncDirCount++ + return 0 +} + +var _ = (NodeOpendirHandler)((*syncNode)(nil)) + +func (n *syncNode) OpendirHandle(ctx context.Context, flags uint32) (FileHandle, uint32, syscall.Errno) { + return &syncDir{n}, 0, 0 +} + +func TestFsyncDir(t *testing.T) { + root := &syncNode{} + opts := Options{} + mnt, _ := testMount(t, root, &opts) + + fd, err := syscall.Open(mnt, syscall.O_DIRECTORY, 0) + if err != nil { + t.Fatal(err) + } + + if err := syscall.Fsync(fd); err != nil { + t.Fatal(err) + } + syscall.Close(fd) + root.mu.Lock() + defer root.mu.Unlock() + if root.syncDirCount != 1 { + t.Errorf("got %d, want 1", root.syncDirCount) + } + +} diff --git a/fs/dirstream.go b/fs/dirstream.go index 363aef6ab..0c2564729 100644 --- a/fs/dirstream.go +++ b/fs/dirstream.go @@ -5,6 +5,7 @@ package fs import ( + "context" "syscall" "github.com/hanwen/go-fuse/v2/fuse" @@ -32,3 +33,30 @@ func (a *dirArray) Close() { func NewListDirStream(list []fuse.DirEntry) DirStream { return &dirArray{list} } + +// implement FileReaddirenter/FileReleasedirer +type dirStreamAsFile struct { + creator func(context.Context) (DirStream, syscall.Errno) + ds DirStream +} + +func (d *dirStreamAsFile) Releasedir(ctx context.Context, releaseFlags uint32) { + if d.ds != nil { + d.ds.Close() + } +} + +func (d *dirStreamAsFile) Readdirent(ctx context.Context) (de *fuse.DirEntry, errno syscall.Errno) { + if d.ds == nil { + d.ds, errno = d.creator(ctx) + if errno != 0 { + return nil, errno + } + } + if !d.ds.HasNext() { + return nil, 0 + } + + e, errno := d.ds.Next() + return &e, errno +} diff --git a/fs/dirstream_unix.go b/fs/dirstream_unix.go index be0f49776..bc3292f72 100644 --- a/fs/dirstream_unix.go +++ b/fs/dirstream_unix.go @@ -7,6 +7,7 @@ package fs import ( + "context" "sync" "syscall" @@ -38,7 +39,6 @@ func NewLoopbackDirStream(name string) (DirStream, syscall.Errno) { buf: make([]byte, 4096), fd: fd, } - ds.load() return ds, OK } @@ -52,12 +52,52 @@ func (ds *loopbackDirStream) Close() { } } +var _ = (FileReleasedirer)((*loopbackDirStream)(nil)) + +func (ds *loopbackDirStream) Releasedir(ctx context.Context, flags uint32) { + ds.Close() +} + +var _ = (FileSeekdirer)((*loopbackDirStream)(nil)) + +func (ds *loopbackDirStream) Seekdir(ctx context.Context, off uint64) syscall.Errno { + ds.mu.Lock() + defer ds.mu.Unlock() + _, errno := unix.Seek(ds.fd, int64(off), unix.SEEK_SET) + if errno != nil { + return ToErrno(errno) + } + + ds.todo = nil + ds.todoErrno = 0 + ds.load() + return 0 +} + +var _ = (FileFsyncdirer)((*loopbackDirStream)(nil)) + +func (ds *loopbackDirStream) Fsyncdir(ctx context.Context, flags uint32) syscall.Errno { + ds.mu.Lock() + defer ds.mu.Unlock() + return ToErrno(syscall.Fsync(ds.fd)) +} + func (ds *loopbackDirStream) HasNext() bool { ds.mu.Lock() defer ds.mu.Unlock() return len(ds.todo) > 0 || ds.todoErrno != 0 } +var _ = (FileReaddirenter)((*loopbackDirStream)(nil)) + +func (ds *loopbackDirStream) Readdirent(ctx context.Context) (*fuse.DirEntry, syscall.Errno) { + if !ds.HasNext() { + return nil, 0 + } + de, errno := ds.Next() + return &de, errno +} + func (ds *loopbackDirStream) Next() (fuse.DirEntry, syscall.Errno) { ds.mu.Lock() defer ds.mu.Unlock() diff --git a/fs/loopback.go b/fs/loopback.go index b1ba04655..2868e6cff 100644 --- a/fs/loopback.go +++ b/fs/loopback.go @@ -343,15 +343,14 @@ func (n *LoopbackNode) Open(ctx context.Context, flags uint32) (fh FileHandle, f return lf, 0, 0 } -var _ = (NodeOpendirer)((*LoopbackNode)(nil)) +var _ = (NodeOpendirHandler)((*LoopbackNode)(nil)) -func (n *LoopbackNode) Opendir(ctx context.Context) syscall.Errno { - fd, err := syscall.Open(n.path(), syscall.O_DIRECTORY, 0755) - if err != nil { - return ToErrno(err) +func (n *LoopbackNode) OpendirHandle(ctx context.Context, flags uint32) (FileHandle, uint32, syscall.Errno) { + ds, errno := NewLoopbackDirStream(n.path()) + if errno != 0 { + return nil, 0, errno } - syscall.Close(fd) - return OK + return ds, 0, errno } var _ = (NodeReaddirer)((*LoopbackNode)(nil))