Skip to content

Commit

Permalink
virtio-fs: use multiple dax mappings in filesystem
Browse files Browse the repository at this point in the history
This integrates the dax window manager with the virtio-fs file system
operations:

- Removes the lock from virtio::fs::dax_window, since this is now
  handled by the window manager.
- A single manager instance is maintained for each virtio-fs device
  having at least a singe mount. Multiple, concurrent mounts of the same
  device share a common dax_manager.
- The FUSE_READ fallback as well as the logic of using it when the DAX
  window is not available or a read using it fails are untouched.

Signed-off-by: Fotis Xenakis <[email protected]>
Message-Id: <AM0PR03MB629260F9F8CC3272E9CF435AA6960@AM0PR03MB6292.eurprd03.prod.outlook.com>
  • Loading branch information
foxeng authored and wkozaczuk committed Jun 26, 2020
1 parent 253919f commit df5ce40
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 139 deletions.
11 changes: 10 additions & 1 deletion drivers/virtio-fs.hh
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@
#ifndef VIRTIO_FS_DRIVER_H
#define VIRTIO_FS_DRIVER_H

#include <functional>

#include <osv/mutex.h>
#include <osv/waitqueue.hh>

#include "drivers/virtio.hh"
#include "drivers/virtio-device.hh"
#include "fs/virtiofs/fuse_kernel.h"
Expand Down Expand Up @@ -49,7 +52,13 @@ public:
struct dax_window {
mmioaddr_t addr;
u64 len;
mutex lock;
};

// Helper enabling the use of fs* as key type in an unordered_* container.
struct hasher {
size_t operator()(fs* const _fs) const noexcept {
return std::hash<int>{}(_fs->_id);
}
};

explicit fs(virtio_device& dev);
Expand Down
21 changes: 17 additions & 4 deletions fs/virtiofs/virtiofs.hh
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
#ifndef __INCLUDE_VIRTIOFS_H__
#define __INCLUDE_VIRTIOFS_H__

#include <osv/vnode.h>
#include <memory>

#include <osv/debug.h>
#include <osv/mount.h>
#include <osv/dentry.h>
#include <osv/prex.h>
#include <osv/buf.h>
#include <osv/vnode.h>

#include "drivers/virtio-fs.hh"
#include "fuse_kernel.h"

#define VIRTIOFS_DEBUG_ENABLED 1
Expand All @@ -23,6 +25,17 @@
#define virtiofs_debug(...)
#endif

// Necessary pre-declaration because virtiofs::dax depends on virtiofs_inode,
// declared below.
namespace virtiofs {
class dax_manager;
}

struct virtiofs_mount_data {
virtio::fs* drv;
std::shared_ptr<virtiofs::dax_manager> dax_mgr;
};

struct virtiofs_inode {
uint64_t nodeid;
struct fuse_attr attr;
Expand Down
3 changes: 0 additions & 3 deletions fs/virtiofs/virtiofs_i.hh
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
#ifndef VIRTIOFS_IO_H
#define VIRTIOFS_IO_H

#include "fuse_kernel.h"
#include <osv/mutex.h>
#include <osv/waitqueue.hh>
#include "drivers/virtio-fs.hh"

int fuse_req_send_and_receive_reply(virtio::fs* drv, uint32_t opcode,
Expand Down
53 changes: 48 additions & 5 deletions fs/virtiofs/virtiofs_vfsops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,32 @@
* BSD license as described in the LICENSE file in the top-level directory.
*/

#include <atomic>
#include <memory>
#include <mutex>
#include <new>
#include <sys/types.h>
#include <osv/device.h>

#include <api/assert.h>
#include <osv/debug.h>
#include <iomanip>
#include <iostream>
#include <osv/device.h>
#include <osv/mutex.h>

#include "drivers/virtio-fs.hh"
#include "virtiofs.hh"
#include "virtiofs_dax.hh"
#include "virtiofs_i.hh"
#include "drivers/virtio-fs.hh"

using fuse_request = virtio::fs::fuse_request;

static std::atomic<uint64_t> fuse_unique_id(1);

static struct {
std::unordered_map<virtio::fs*, std::shared_ptr<virtiofs::dax_manager>,
virtio::fs::hasher> mgrs;
mutex lock;
} dax_managers;

int fuse_req_send_and_receive_reply(virtio::fs* drv, uint32_t opcode,
uint64_t nodeid, void* input_args_data, size_t input_args_size,
void* output_args_data, size_t output_args_size)
Expand Down Expand Up @@ -115,7 +128,28 @@ static int virtiofs_mount(struct mount* mp, const char* dev, int flags,

virtiofs_set_vnode(mp->m_root->d_vnode, root_node);

mp->m_data = drv;
auto* m_data = new (std::nothrow) virtiofs_mount_data;
if (!m_data) {
return ENOMEM;
}
m_data->drv = drv;
if (drv->get_dax()) {
// The device supports the DAX window
std::lock_guard<mutex> guard {dax_managers.lock};
auto found = dax_managers.mgrs.find(drv);
if (found != dax_managers.mgrs.end()) {
// There is a dax_manager already associated with this device (the
// device is already mounted)
m_data->dax_mgr = found->second;
} else {
m_data->dax_mgr = std::make_shared<virtiofs::dax_manager>(*drv);
if (!m_data->dax_mgr) {
return ENOMEM;
}
}
}

mp->m_data = m_data;
mp->m_dev = device;

return 0;
Expand All @@ -141,6 +175,15 @@ static int virtiofs_statfs(struct mount* mp, struct statfs* statp)

static int virtiofs_unmount(struct mount* mp, int flags)
{
auto* m_data = static_cast<virtiofs_mount_data*>(mp->m_data);
std::lock_guard<mutex> guard {dax_managers.lock};
if (m_data->dax_mgr && m_data->dax_mgr.use_count() == 2) {
// This was the last mount of this device. It's safe to delete the
// window manager.
dax_managers.mgrs.erase(m_data->drv);
}
delete m_data;

struct device* dev = mp->m_dev;
return device_close(dev);
}
Expand Down
151 changes: 25 additions & 126 deletions fs/virtiofs/virtiofs_vnops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,27 @@
* BSD license as described in the LICENSE file in the top-level directory.
*/

#include <sys/stat.h>
#include <cstdlib>
#include <cstring>
#include <dirent.h>
#include <sys/param.h>

#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/types.h>

#include <osv/prex.h>
#include <osv/vnode.h>
#include <osv/file.h>
#include <osv/mount.h>
#include <osv/contiguous_alloc.hh>
#include <osv/debug.h>

#include <sys/types.h>
#include <osv/device.h>
#include <osv/sched.hh>
#include <osv/file.h>
#include <osv/mmio.hh>
#include <osv/contiguous_alloc.hh>
#include <osv/mount.h>
#include <osv/prex.h>
#include <osv/sched.hh>
#include <osv/vnode.h>

#include "virtiofs.hh"
#include "virtiofs_dax.hh"
#include "virtiofs_i.hh"

static constexpr uint32_t OPEN_FLAGS = O_RDONLY;
Expand Down Expand Up @@ -59,7 +58,8 @@ static int virtiofs_lookup(struct vnode* vnode, char* name, struct vnode** vpp)
}
strcpy(in_args.get(), name);

auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data);
auto* m_data = static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data);
auto* drv = m_data->drv;
auto error = fuse_req_send_and_receive_reply(drv, FUSE_LOOKUP,
inode->nodeid, in_args.get(), in_args_len, out_args.get(),
sizeof(*out_args));
Expand Down Expand Up @@ -110,7 +110,8 @@ static int virtiofs_open(struct file* fp)
}
in_args->flags = OPEN_FLAGS;

auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data);
auto* m_data = static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data);
auto* drv = m_data->drv;
auto error = fuse_req_send_and_receive_reply(drv, FUSE_OPEN,
inode->nodeid, in_args.get(), sizeof(*in_args), out_args.get(),
sizeof(*out_args));
Expand Down Expand Up @@ -145,7 +146,8 @@ static int virtiofs_close(struct vnode* vnode, struct file* fp)
in_args->fh = f_data->file_handle;
in_args->flags = OPEN_FLAGS; // need to be same as in FUSE_OPEN

auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data);
auto* m_data = static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data);
auto* drv = m_data->drv;
auto error = fuse_req_send_and_receive_reply(drv, FUSE_RELEASE,
inode->nodeid, in_args.get(), sizeof(*in_args), nullptr, 0);
if (error) {
Expand All @@ -172,7 +174,8 @@ static int virtiofs_readlink(struct vnode* vnode, struct uio* uio)
return ENOMEM;
}

auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data);
auto* m_data = static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data);
auto* drv = m_data->drv;
auto error = fuse_req_send_and_receive_reply(drv, FUSE_READLINK,
inode->nodeid, nullptr, 0, link_path.get(), PATH_MAX);
if (error) {
Expand All @@ -185,107 +188,6 @@ static int virtiofs_readlink(struct vnode* vnode, struct uio* uio)
return uiomove(link_path.get(), strlen(link_path.get()), uio);
}

// Read @read_amt bytes from @inode, using the DAX window.
static int virtiofs_read_direct(virtiofs_inode& inode, u64 file_handle,
u64 read_amt, virtio::fs& drv, struct uio& uio)
{
auto* dax = drv.get_dax();
// Enter the critical path: setup mapping -> read -> remove mapping
std::lock_guard<mutex> guard {dax->lock};

// Setup mapping
// NOTE: There are restrictions on the arguments to FUSE_SETUPMAPPING, from
// the spec: "Alignment constraints for FUSE_SETUPMAPPING and
// FUSE_REMOVEMAPPING requests are communicated during FUSE_INIT
// negotiation"):
// - foffset: multiple of map_alignment from FUSE_INIT
// - len: not larger than remaining file?
// - moffset: multiple of map_alignment from FUSE_INIT
// In practice, map_alignment is the host's page size, because foffset and
// moffset are passed to mmap() on the host.
std::unique_ptr<fuse_setupmapping_in> in_args {
new (std::nothrow) fuse_setupmapping_in()};
if (!in_args) {
return ENOMEM;
}
in_args->fh = file_handle;
in_args->flags = 0;
uint64_t moffset = 0;
in_args->moffset = moffset;

auto map_align = drv.get_map_alignment();
if (map_align < 0) {
kprintf("[virtiofs] inode %lld, map alignment not set\n", inode.nodeid);
return ENOTSUP;
}
uint64_t alignment = 1ul << map_align;
auto foffset = align_down(static_cast<uint64_t>(uio.uio_offset), alignment);
in_args->foffset = foffset;

// The possible excess part of the file mapped due to alignment constraints
// NOTE: map_excess <= alignemnt
auto map_excess = uio.uio_offset - foffset;
if (moffset + map_excess >= dax->len) {
// No usable room in DAX window due to map_excess
return ENOBUFS;
}
// Actual read amount is read_amt, or what fits in the DAX window
auto read_amt_act = std::min<uint64_t>(read_amt,
dax->len - moffset - map_excess);
in_args->len = read_amt_act + map_excess;

virtiofs_debug("inode %lld, setting up mapping (foffset=%lld, len=%lld, "
"moffset=%lld)\n", inode.nodeid, in_args->foffset,
in_args->len, in_args->moffset);
auto error = fuse_req_send_and_receive_reply(&drv, FUSE_SETUPMAPPING,
inode.nodeid, in_args.get(), sizeof(*in_args), nullptr, 0);
if (error) {
kprintf("[virtiofs] inode %lld, mapping setup failed\n", inode.nodeid);
return error;
}

// Read from the DAX window
// NOTE: It shouldn't be necessary to use the mmio* interface (i.e. volatile
// accesses). From the spec: "Drivers map this shared memory region with
// writeback caching as if it were regular RAM."
// The location of the requested data in the DAX window
auto req_data = dax->addr + moffset + map_excess;
error = uiomove(const_cast<void*>(req_data), read_amt_act, &uio);
if (error) {
kprintf("[virtiofs] inode %lld, uiomove failed\n", inode.nodeid);
return error;
}

// Remove mapping
// NOTE: This is only necessary when FUSE_SETUPMAPPING fails. From the spec:
// "If the device runs out of resources the FUSE_SETUPMAPPING request fails
// until resources are available again following FUSE_REMOVEMAPPING."
auto r_in_args_size = sizeof(fuse_removemapping_in) +
sizeof(fuse_removemapping_one);
std::unique_ptr<u8> r_in_args {new (std::nothrow) u8[r_in_args_size]};
if (!r_in_args) {
return ENOMEM;
}
auto r_in = new (r_in_args.get()) fuse_removemapping_in();
auto r_one = new (r_in_args.get() + sizeof(fuse_removemapping_in))
fuse_removemapping_one();
r_in->count = 1;
r_one->moffset = in_args->moffset;
r_one->len = in_args->len;

virtiofs_debug("inode %lld, removing mapping (moffset=%lld, len=%lld)\n",
inode.nodeid, r_one->moffset, r_one->len);
error = fuse_req_send_and_receive_reply(&drv, FUSE_REMOVEMAPPING,
inode.nodeid, r_in_args.get(), r_in_args_size, nullptr, 0);
if (error) {
kprintf("[virtiofs] inode %lld, mapping removal failed\n",
inode.nodeid);
return error;
}

return 0;
}

// Read @read_amt bytes from @inode, using the fallback FUSE_READ mechanism.
static int virtiofs_read_fallback(virtiofs_inode& inode, u64 file_handle,
u32 read_amt, u32 flags, virtio::fs& drv, struct uio& uio)
Expand Down Expand Up @@ -314,9 +216,6 @@ static int virtiofs_read_fallback(virtiofs_inode& inode, u64 file_handle,
return uiomove(buf.get(), read_amt, &uio);
}

// TODO: Optimize it to reduce number of exits to host (each
// fuse_req_send_and_receive_reply()) by reading eagerly "ahead/around" just
// like ROFS does and caching it
static int virtiofs_read(struct vnode* vnode, struct file* fp, struct uio* uio,
int ioflag)
{
Expand All @@ -343,17 +242,17 @@ static int virtiofs_read(struct vnode* vnode, struct file* fp, struct uio* uio,

auto* inode = static_cast<virtiofs_inode*>(vnode->v_data);
auto* file_data = static_cast<virtiofs_file_data*>(fp->f_data);
auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data);
auto* m_data = static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data);
auto* drv = m_data->drv;
auto dax_mgr = m_data->dax_mgr;

// Total read amount is what they requested, or what is left
auto read_amt = std::min<uint64_t>(uio->uio_resid,
inode->attr.size - uio->uio_offset);

if (drv->get_dax()) {
if (dax_mgr) {
// Try to read from DAX
if (!virtiofs_read_direct(*inode, file_data->file_handle, read_amt,
*drv, *uio)) {

if (!dax_mgr->read(*inode, file_data->file_handle, read_amt, *uio)) {
return 0;
}
}
Expand Down

0 comments on commit df5ce40

Please sign in to comment.