Skip to content

Commit

Permalink
[LibOS] Add O_APPEND emulation for single process
Browse files Browse the repository at this point in the history
Note that Gramine doesn't support multi-process operations on the same
file anyhow, in particular, the file position is not synchronized among
multiple processes. This commit doesn't change this limitation of
Gramine, thus `O_APPEND` writes don't work correctly (may overwrite file
contents).

Note that Linux has a bug in `pwrite()`. Gramine emulates this bug
in the same way.

A new FS test is added. One LTP test is uncommented.

Signed-off-by: Dmitrii Kuvaiskii <[email protected]>
  • Loading branch information
Dmitrii Kuvaiskii authored and kailun-qin committed Nov 15, 2024
1 parent 72b5631 commit e140552
Show file tree
Hide file tree
Showing 16 changed files with 187 additions and 28 deletions.
6 changes: 4 additions & 2 deletions Documentation/devel/features.md
Original file line number Diff line number Diff line change
Expand Up @@ -2058,8 +2058,10 @@ Gramine implements all classic filesystem operations, but with limitations descr

Gramine supports opening files and directories (via `open()` and `openat()` system calls).
`O_CLOEXEC`, `O_CREAT`, `O_DIRECTORY`, `O_EXCL`, `O_NOFOLLOW`, `O_PATH`, `O_TRUNC` flags are
supported. Other flags are ignored. Notable ignored flags are `O_APPEND` (not yet implemented in
Gramine) and `O_TMPFILE` (bug in Gramine: should not be silently ignored).
supported. `O_APPEND` is supported, but is currently limited only to a single process, i.e., opening
the same file in append mode in two processes may result in file contents being overwritten. Other
flags are ignored. Notable ignored flag is `O_TMPFILE` (bug in Gramine: should not be silently
ignored).

Trusted files can be opened only for reading. Already-existing encrypted files can be opened only if
they were not moved or renamed on the host (this is for protection against file renaming attacks).
Expand Down
1 change: 1 addition & 0 deletions libos/include/libos_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,7 @@ int get_dirfd_dentry(int dirfd, struct libos_dentry** dir);
* - O_DIRECTORY: expect/create a directory instead of regular file
* - O_NOFOLLOW: don't follow symbolic links when resolving a path
* - O_TRUNC: truncate the file after opening
* - O_APPEND: open file in append mode for writing
*
* The flags (including any not listed above), as well as file mode, are passed to the underlying
* filesystem.
Expand Down
5 changes: 3 additions & 2 deletions libos/src/fs/chroot/encrypted.c
Original file line number Diff line number Diff line change
Expand Up @@ -477,14 +477,15 @@ static ssize_t chroot_encrypted_write(struct libos_handle* hdl, const void* buf,

lock(&hdl->inode->lock);

int ret = encrypted_file_write(enc, buf, count, *pos, &actual_count);
file_off_t actual_pos = (hdl->flags & O_APPEND) ? hdl->inode->size : *pos;
int ret = encrypted_file_write(enc, buf, count, actual_pos, &actual_count);
if (ret < 0) {
unlock(&hdl->inode->lock);
return ret;
}

assert(actual_count <= count);
*pos += actual_count;
*pos = actual_pos + actual_count;
if (hdl->inode->size < *pos)
hdl->inode->size = *pos;

Expand Down
10 changes: 8 additions & 2 deletions libos/src/fs/chroot/fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -623,14 +623,20 @@ static ssize_t chroot_write(struct libos_handle* hdl, const void* buf, size_t co
return -EACCES;
}

int ret = PalStreamWrite(hdl->pal_handle, *pos, &count, (void*)buf);
file_off_t actual_pos = *pos;
lock(&hdl->inode->lock);
if (hdl->inode->type == S_IFREG && (hdl->flags & O_APPEND))
actual_pos = hdl->inode->size;
unlock(&hdl->inode->lock);

int ret = PalStreamWrite(hdl->pal_handle, actual_pos, &count, (void*)buf);
if (ret < 0) {
return pal_to_unix_errno(ret);
}

size_t new_size = 0;
if (hdl->inode->type == S_IFREG) {
*pos += count;
*pos = actual_pos + count;
/* Update file size if we just wrote past the end of file */
lock(&hdl->inode->lock);
if (hdl->inode->size < *pos)
Expand Down
5 changes: 3 additions & 2 deletions libos/src/fs/tmpfs/fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -259,15 +259,16 @@ static ssize_t tmpfs_write(struct libos_handle* hdl, const void* buf, size_t siz
lock(&inode->lock);
struct libos_mem_file* mem = inode->data;

ret = mem_file_write(mem, *pos, buf, size);
file_off_t actual_pos = (hdl->flags & O_APPEND) ? hdl->inode->size : *pos;
ret = mem_file_write(mem, actual_pos, buf, size);
if (ret < 0) {
unlock(&inode->lock);
return ret;
}

inode->size = mem->size;

*pos += ret;
*pos = actual_pos + ret;
inode->mtime = time_us / USEC_IN_SEC;
/* keep `ret` */

Expand Down
12 changes: 6 additions & 6 deletions libos/src/sys/libos_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,12 @@ long libos_syscall_sendfile(int out_fd, int in_fd, off_t* offset, size_t count)
goto out;
}

if (!(in_hdl->acc_mode & MAY_READ) || !(out_hdl->acc_mode & MAY_WRITE)) {
/* error out if input fd isn't readable or output fd isn't writable */
ret = -EBADF;
goto out;
}

if (out_hdl->flags & O_APPEND) {
/* Linux errors out if output fd has the O_APPEND flag set; comply with this behavior */
ret = -EINVAL;
Expand Down Expand Up @@ -493,12 +499,6 @@ long libos_syscall_sendfile(int out_fd, int in_fd, off_t* offset, size_t count)
maybe_unlock_pos_handle(in_hdl);
}

if (!(out_hdl->acc_mode & MAY_WRITE)) {
/* Linux errors out if output fd isn't writable */
ret = -EBADF;
goto out;
}

while (copied_to_out < count) {
size_t to_copy = count - copied_to_out > BUF_SIZE ? BUF_SIZE : count - copied_to_out;

Expand Down
8 changes: 8 additions & 0 deletions libos/src/sys/libos_open.c
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,14 @@ long libos_syscall_pwrite64(int fd, char* buf, size_t count, loff_t offset) {
goto out;
}

/*
* Note that Linux has the following bug: POSIX requires that opening a file with the O_APPEND
* flag should have no effect on the location at which pwrite() writes data. However, on Linux,
* if a file is opened with O_APPEND, pwrite() appends data to the end of the file, regardless
* of the value of offset. See man page pwrite(2), section BUGS.
*
* Gramine complies with this behavior, see implementations of the write() callback.
*/
file_off_t pos = offset;
ret = fs->fs_ops->write(hdl, buf, count, &pos);
out:
Expand Down
7 changes: 7 additions & 0 deletions libos/test/fs/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,13 @@ int open_output_fd(const char* path, bool rdwr) {
return fd;
}

int open_output_fd_append(const char* path) {
int fd = open(path, O_RDWR | O_CREAT | O_APPEND, 0664);
if (fd < 0)
fatal_error("Failed to open output file for append %s: %s\n", path, strerror(errno));
return fd;
}

void write_fd(const char* path, int fd, const void* buffer, size_t size) {
off_t offset = 0;
while (size > 0) {
Expand Down
1 change: 1 addition & 0 deletions libos/test/fs/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ void read_fd(const char* path, int fd, void* buffer, size_t size);
void seek_fd(const char* path, int fd, off_t offset, int mode);
off_t tell_fd(const char* path, int fd);
int open_output_fd(const char* path, bool rdwr);
int open_output_fd_append(const char* path);
void write_fd(const char* path, int fd, const void* buffer, size_t size);
void sendfile_fd(const char* input_path, const char* output_path, int fi, int fo, size_t size);
void close_fd(const char* path, int fd);
Expand Down
1 change: 1 addition & 0 deletions libos/test/fs/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ tests = {
},
'open_close': {},
'open_flags': {},
'read_append': {},
'read_write': {},
'read_write_mmap': {},
'seek_tell': {},
Expand Down
124 changes: 124 additions & 0 deletions libos/test/fs/read_append.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#include "common.h"

static void read_append(const char* file_path) {
const size_t size = 1024 * 1024;

void* buf1 = alloc_buffer(size);
void* buf2 = alloc_buffer(size);
fill_random(buf1, size);

ssize_t bytes_read;

/* test 1: create new file, append buf1 in two chunks to it, try to read (this should return EOF
* because appends moved file pos), then reset the file pos and read again */
printf("TEST 1 (%s)\n", file_path);
int fd = open_output_fd_append(file_path);
printf("open(%s) OK\n", file_path);
write_fd(file_path, fd, buf1, size / 2);
printf("first write(%s) OK\n", file_path);
write_fd(file_path, fd, buf1 + size / 2, size - size / 2);
printf("second write(%s) OK\n", file_path);
bytes_read = read(fd, buf2, size);
if (bytes_read != 0)
fatal_error("Read after append did not indicate EOF\n");
printf("first read(%s) OK\n", file_path);
seek_fd(file_path, fd, 0, SEEK_SET);
printf("seek(%s) OK\n", file_path);
read_fd(file_path, fd, buf2, size);
printf("second read(%s) OK\n", file_path);
if (memcmp(buf1, buf2, size) != 0)
fatal_error("Read data is different from what was written\n");
printf("compare(%s) OK\n", file_path);
close_fd(file_path, fd);
printf("close(%s) OK\n", file_path);
size_t file_size1 = file_size(file_path);
if (file_size1 != size)
fatal_error("File size is wrong (expected %lu got %lu)\n", size, file_size1);
printf("file_size(%s) OK\n", file_path);


/* test 2: open the file created previously (it must contain buf1), then read (this should
* return buf1 as there was no write/append operation yet, so file pos is zero), then reset the
* file pos and append buf1 (this should result in buf1 + buf1 contents of the file) */
printf("TEST 2 (%s)\n", file_path);
fd = open_output_fd_append(file_path);
printf("open(%s) OK\n", file_path);
read_fd(file_path, fd, buf2, size);
printf("first read(%s) OK\n", file_path);
if (memcmp(buf1, buf2, size) != 0)
fatal_error("Read data is different from what was written\n");
printf("first compare(%s) OK\n", file_path);
bytes_read = read(fd, buf2, size);
if (bytes_read != 0)
fatal_error("Second read did not indicate EOF\n");
printf("second read(%s) OK\n", file_path);
seek_fd(file_path, fd, 0, SEEK_SET);
printf("seek(%s) OK\n", file_path);
write_fd(file_path, fd, buf1, size);
printf("write(%s) OK\n", file_path);
seek_fd(file_path, fd, 0, SEEK_SET);
printf("seek(%s) OK\n", file_path);
read_fd(file_path, fd, buf2, size);
printf("first read(%s) after appending OK\n", file_path);
if (memcmp(buf1, buf2, size) != 0)
fatal_error("Read data is different from what was written\n");
read_fd(file_path, fd, buf2, size);
printf("second read(%s) after appending OK\n", file_path);
if (memcmp(buf1, buf2, size) != 0)
fatal_error("Read data is different from what was written\n");
printf("compare(%s) OK\n", file_path);
close_fd(file_path, fd);
printf("close(%s) OK\n", file_path);
size_t file_size2 = file_size(file_path);
if (file_size2 != size * 2)
fatal_error("File size is wrong (expected %lu got %lu)\n", size * 2, file_size2);
printf("file_size(%s) OK\n", file_path);

/* test 3: open the file created previously (size must be sizeof(buf1) * 2) in no-append mode,
* reset file pos and write buf1 (must overwrite contents, so file size must stay the same),
* then change to append mode, reset file pos and write buf1 (must append, so size will
* increase) */
printf("TEST 3 (%s)\n", file_path);
fd = open_output_fd(file_path, /*rdwr=*/true);
printf("open(%s) OK\n", file_path);
seek_fd(file_path, fd, 0, SEEK_SET);
printf("first seek(%s) OK\n", file_path);
write_fd(file_path, fd, buf1, size);
printf("first write(%s) OK\n", file_path);
int fcntl_ret = fcntl(fd, F_SETFL, O_APPEND);
if (fcntl_ret < 0)
fatal_error("Could not set O_APPEND flag using fcntl()\n");
seek_fd(file_path, fd, 0, SEEK_SET);
printf("second seek(%s) OK\n", file_path);
write_fd(file_path, fd, buf1, size);
printf("second write(%s) OK\n", file_path);
seek_fd(file_path, fd, 0, SEEK_SET);
printf("third seek(%s) OK\n", file_path);
for (int i = 0; i < 2; i++) {
read_fd(file_path, fd, buf2, size);
printf("read number %d (%s) OK\n", i + 1, file_path);
if (memcmp(buf1, buf2, size) != 0)
fatal_error("read number %d: Read data is different from what was written\n", i + 1);
}
printf("compare(%s) OK\n", file_path);
close_fd(file_path, fd);
printf("close(%s) OK\n", file_path);
size_t file_size3 = file_size(file_path);
if (file_size3 != size * 3)
fatal_error("File size is wrong (expected %lu got %lu)\n", size * 3, file_size3);
printf("file_size(%s) OK\n", file_path);

free(buf1);
free(buf2);
}

int main(int argc, char* argv[]) {
if (argc < 2)
fatal_error("Usage: %s <file_path>\n", argv[0]);

setup();
read_append(argv[1]);

printf("TEST OK\n");
return 0;
}
10 changes: 10 additions & 0 deletions libos/test/fs/test_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,16 @@ def test_111_read_write_mmap(self):
self.assertIn('close(' + file_path + ') RW fd1 (mmap) OK', stdout)
self.assertIn('close(' + file_path + ') RW fd2 OK', stdout)

def test_112_read_append(self):
file_path = os.path.join(self.OUTPUT_DIR, 'test_112') # new file to be created
stdout, stderr = self.run_binary(['read_append', file_path])
self.assertNotIn('ERROR: ', stderr)
self.assertTrue(os.path.isfile(file_path))
self.assertIn('TEST 1 (' + file_path + ')', stdout)
self.assertIn('TEST 2 (' + file_path + ')', stdout)
self.assertIn('TEST 3 (' + file_path + ')', stdout)
self.assertIn('TEST OK', stdout)

# pylint: disable=too-many-arguments
def verify_seek_tell(self, stdout, output_path_1, output_path_2, size):
self.assertIn('Test passed', stdout)
Expand Down
10 changes: 10 additions & 0 deletions libos/test/fs/test_tmpfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,16 @@ def test_111_read_write_mmap(self):
self.assertIn('close(' + file_path + ') RW fd1 (mmap) OK', stdout)
self.assertIn('close(' + file_path + ') RW fd2 OK', stdout)

# overrides TC_00_FileSystem to skip verification of file existence
def test_112_read_append(self):
file_path = os.path.join(self.OUTPUT_DIR, 'test_112') # new file to be created
stdout, stderr = self.run_binary(['read_append', file_path])
self.assertNotIn('ERROR: ', stderr)
self.assertIn('TEST 1 (' + file_path + ')', stdout)
self.assertIn('TEST 2 (' + file_path + ')', stdout)
self.assertIn('TEST 3 (' + file_path + ')', stdout)
self.assertIn('TEST OK', stdout)

@unittest.skip("impossible to do setup on tmpfs with python only")
def test_115_seek_tell(self):
test_fs.TC_00_FileSystem.test_115_seek_tell(self)
Expand Down
1 change: 1 addition & 0 deletions libos/test/fs/tests.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ manifests = [
"multiple_writers",
"open_close",
"open_flags",
"read_append",
"read_write",
"read_write_mmap",
"seek_tell",
Expand Down
9 changes: 0 additions & 9 deletions libos/test/ltp/ltp.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -1558,15 +1558,6 @@ skip = yes
[pwrite01_64]
timeout = 40

[pwrite04]
must-pass =
2

[pwrite04_64]
timeout = 40
must-pass =
2

[pwritev01]
skip = yes

Expand Down
5 changes: 0 additions & 5 deletions libos/test/ltp/ltp_sgx.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -493,11 +493,6 @@ skip = yes
[pwrite02_64]
skip = yes

[pwrite04_64]
timeout = 60
must-pass =
2

[pwritev01]
skip = yes
timeout = 60
Expand Down

0 comments on commit e140552

Please sign in to comment.