Skip to content

Commit

Permalink
[runtime-security] fix container_path and inode (#6461)
Browse files Browse the repository at this point in the history
* Fix open dentry inode

* Fix mkdir inode

* Fix rm/unlink inode

* Fix rename inode and marshal old.inode

* Fix setattr inode

* Fix rmdir inode

* Fix link inode and marshal

* Fix setxattr inode

* Add open inode tests

* Pop/Peek syscall based on syscall type

* FIX Mount

* Rename second pass inode/dentry with real_ prefix

* Fix mountinfo parsing based on host_proc

* Make Mkdir tests passing in docker env

* Make chown and link tests passing in docker env

* Make Rmdir tests working in docker env

* Make rename test working in docker env

* Make unlink test passing in docker env

* Make utimes test passing in docker env

* Make setxattr test working in docker env and container path working with inner mount

* Make open test working in docker env

* Fix mount unit test and improve delete handling

* Do not append container path with an invalid dentry placeholder

* Avoid dentry entry with inode set to 0

* Do not fail if overlayfs is not present

* Allow to filter the event in test and fix mount/utimes
  • Loading branch information
safchain authored Oct 23, 2020
1 parent 24e2e83 commit 24a6fd3
Show file tree
Hide file tree
Showing 42 changed files with 8,132 additions and 7,160 deletions.
13,888 changes: 7,221 additions & 6,667 deletions pkg/ebpf/bytecode/tracer-ebpf.go

Large diffs are not rendered by default.

13 changes: 10 additions & 3 deletions pkg/security/ebpf/c/chmod.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ struct chmod_event_t {

int __attribute__((always_inline)) trace__sys_chmod(umode_t mode) {
struct syscall_cache_t syscall = {
.type = EVENT_CHMOD,
.type = SYSCALL_CHMOD,
.setattr = {
.mode = mode
}
Expand All @@ -38,14 +38,21 @@ SYSCALL_KPROBE3(fchmodat, int, dirfd, const char*, filename, umode_t, mode) {
}

int __attribute__((always_inline)) trace__sys_chmod_ret(struct pt_regs *ctx) {
struct syscall_cache_t *syscall = pop_syscall();
struct syscall_cache_t *syscall = pop_syscall(SYSCALL_CHMOD);
if (!syscall)
return 0;

int retval = PT_REGS_RC(ctx);
if (IS_UNHANDLED_ERROR(retval))
return 0;

// add an real entry to reach the first dentry with the proper inode
u64 inode = syscall->setattr.path_key.ino;
if (syscall->setattr.real_inode) {
inode = syscall->setattr.real_inode;
link_dentry_inode(syscall->setattr.path_key, inode);
}

struct chmod_event_t event = {
.event.type = EVENT_CHMOD,
.syscall = {
Expand All @@ -54,7 +61,7 @@ int __attribute__((always_inline)) trace__sys_chmod_ret(struct pt_regs *ctx) {
},
.file = {
.mount_id = syscall->setattr.path_key.mount_id,
.inode = syscall->setattr.path_key.ino,
.inode = inode,
.overlay_numlower = get_overlay_numlower(syscall->setattr.dentry),
},
.padding = 0,
Expand Down
13 changes: 10 additions & 3 deletions pkg/security/ebpf/c/chown.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ struct chown_event_t {

int __attribute__((always_inline)) trace__sys_chown(uid_t user, gid_t group) {
struct syscall_cache_t syscall = {
.type = EVENT_CHOWN,
.type = SYSCALL_CHOWN,
.setattr = {
.user = user,
.group = group
Expand Down Expand Up @@ -55,22 +55,29 @@ SYSCALL_KPROBE4(fchownat, int, dirfd, const char*, filename, uid_t, user, gid_t,
}

int __attribute__((always_inline)) trace__sys_chown_ret(struct pt_regs *ctx) {
struct syscall_cache_t *syscall = pop_syscall();
struct syscall_cache_t *syscall = pop_syscall(SYSCALL_CHOWN);
if (!syscall)
return 0;

int retval = PT_REGS_RC(ctx);
if (IS_UNHANDLED_ERROR(retval))
return 0;

// add an real entry to reach the first dentry with the proper inode
u64 inode = syscall->setattr.path_key.ino;
if (syscall->setattr.real_inode) {
inode = syscall->setattr.real_inode;
link_dentry_inode(syscall->setattr.path_key, inode);
}

struct chown_event_t event = {
.event.type = EVENT_CHOWN,
.syscall = {
.retval = retval,
.timestamp = bpf_ktime_get_ns(),
},
.file = {
.inode = syscall->setattr.path_key.ino,
.inode = inode,
.mount_id = syscall->setattr.path_key.mount_id,
.overlay_numlower = get_overlay_numlower(syscall->setattr.dentry),
},
Expand Down
18 changes: 18 additions & 0 deletions pkg/security/ebpf/c/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,24 @@ enum event_type
EVENT_EXEC,
};

enum syscall_type
{
SYSCALL_OPEN = 1 << EVENT_OPEN,
SYSCALL_MKDIR = 1 << EVENT_MKDIR,
SYSCALL_LINK = 1 << EVENT_LINK,
SYSCALL_RENAME = 1 << EVENT_RENAME,
SYSCALL_UNLINK = 1 << EVENT_UNLINK,
SYSCALL_RMDIR = 1 << EVENT_RMDIR,
SYSCALL_CHMOD = 1 << EVENT_CHMOD,
SYSCALL_CHOWN = 1 << EVENT_CHOWN,
SYSCALL_UTIME = 1 << EVENT_UTIME,
SYSCALL_MOUNT = 1 << EVENT_MOUNT,
SYSCALL_UMOUNT = 1 << EVENT_UMOUNT,
SYSCALL_SETXATTR = 1 << EVENT_SETXATTR,
SYSCALL_REMOVEXATTR = 1 << EVENT_REMOVEXATTR,
SYSCALL_EXEC = 1 << EVENT_EXEC,
};

struct kevent_t {
u64 type;
};
Expand Down
29 changes: 26 additions & 3 deletions pkg/security/ebpf/c/dentry.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ struct bpf_map_def SEC("maps/mount_id_offset") mount_id_offset = {
};

struct path_key_t {
unsigned long ino;
int mount_id;
u64 ino;
u32 mount_id;
u32 padding;
};

Expand All @@ -50,7 +50,7 @@ unsigned long __attribute__((always_inline)) get_inode_ino(struct inode *inode)
return ino;
}

void __attribute__((always_inline)) write_inode_ino(struct inode *inode, unsigned long *ino) {
void __attribute__((always_inline)) write_inode_ino(struct inode *inode, u64 *ino) {
bpf_probe_read(ino, sizeof(inode), &inode->i_ino);
}

Expand Down Expand Up @@ -198,13 +198,36 @@ void __attribute__((always_inline)) get_dentry_name(struct dentry *dentry, void

#define get_key(dentry, path) (struct path_key_t) { .ino = get_dentry_ino(dentry), .mount_id = get_path_mount_id(path) }

#define get_inode_key_path(inode, path) (struct path_key_t) { .ino = get_inode_ino(inode), .mount_id = get_path_mount_id(path) }

static __attribute__((always_inline)) void link_dentry_inode(struct path_key_t key, u64 inode) {
// avoid a infinite loop, parent a child have the same inode
if (key.ino == inode) {
return;
}

struct path_key_t new_key = {
.mount_id = key.mount_id,
.ino = inode,
};
struct path_leaf_t map_value = {
.parent = key
};

bpf_map_update_elem(&pathnames, &new_key, &map_value, BPF_ANY);
}

static __attribute__((always_inline)) int resolve_dentry(struct dentry *dentry, struct path_key_t key, struct bpf_map_def *discarders_table) {
struct path_leaf_t map_value = {};
struct path_key_t next_key = key;
struct qstr qstr;
struct dentry *d_parent;
struct inode *d_inode = NULL;

if (key.ino == 0 || key.mount_id == 0) {
return -1;
}

#pragma unroll
for (int i = 0; i < DENTRY_MAX_DEPTH; i++)
{
Expand Down
18 changes: 11 additions & 7 deletions pkg/security/ebpf/c/exec.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ struct bpf_map_def SEC("maps/pid_cookie") pid_cookie = {

int __attribute__((always_inline)) trace__sys_execveat() {
struct syscall_cache_t syscall = {
.type = EVENT_EXEC,
.type = SYSCALL_EXEC,
};

cache_syscall(&syscall);
Expand Down Expand Up @@ -72,13 +72,18 @@ struct proc_cache_t * __attribute__((always_inline)) get_pid_cache(u32 tgid) {
return entry;
}

int __attribute__((always_inline)) vfs_handle_exec_event(struct pt_regs *ctx, struct syscall_cache_t *syscall) {
struct path *path = (struct path *)PT_REGS_PARM1(ctx);
int __attribute__((always_inline)) handle_exec_event(struct pt_regs *ctx, struct syscall_cache_t *syscall) {
struct file *file = (struct file *)PT_REGS_PARM1(ctx);
struct inode *inode = (struct inode *)PT_REGS_PARM2(ctx);
struct path *path = &file->f_path;

syscall->open.dentry = get_file_dentry(file);
syscall->open.path_key = get_inode_key_path(inode, &file->f_path);

// new cache entry
struct proc_cache_t entry = {
.executable = {
.inode = get_path_ino(path),
.inode = syscall->open.path_key.ino,
.overlay_numlower = get_overlay_numlower(get_path_dentry(path)),
.mount_id = get_path_mount_id(path),
},
Expand All @@ -102,14 +107,13 @@ int __attribute__((always_inline)) vfs_handle_exec_event(struct pt_regs *ctx, st
// insert pid <-> cookie mapping
bpf_map_update_elem(&pid_cookie, &tgid, &cookie, BPF_ANY);

pop_syscall();
pop_syscall(SYSCALL_EXEC);

return 0;
}

SEC("tracepoint/sched/sched_process_fork")
int sched_process_fork(struct _tracepoint_sched_process_fork *args)
{
int sched_process_fork(struct _tracepoint_sched_process_fork *args) {
u32 pid = 0;
u32 ppid = 0;
bpf_probe_read(&pid, sizeof(pid), &args->child_pid);
Expand Down
8 changes: 4 additions & 4 deletions pkg/security/ebpf/c/filename.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@

SEC("kprobe/filename_create")
int kprobe__filename_create(struct pt_regs *ctx) {
struct syscall_cache_t *syscall = peek_syscall();
struct syscall_cache_t *syscall = peek_syscall(SYSCALL_MKDIR | SYSCALL_LINK);
if (!syscall)
return 0;

switch (syscall->type) {
case EVENT_MKDIR:
syscall->mkdir.dir = (struct path *)PT_REGS_PARM3(ctx);
case SYSCALL_MKDIR:
syscall->mkdir.path = (struct path *)PT_REGS_PARM3(ctx);
break;
case EVENT_LINK:
case SYSCALL_LINK:
syscall->link.target_path = (struct path *)PT_REGS_PARM3(ctx);
break;
}
Expand Down
28 changes: 19 additions & 9 deletions pkg/security/ebpf/c/link.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ struct link_event_t {

int __attribute__((always_inline)) trace__sys_link() {
struct syscall_cache_t syscall = {
.type = EVENT_LINK,
.type = SYSCALL_LINK,
};
cache_syscall(&syscall);

Expand All @@ -31,15 +31,18 @@ SYSCALL_KPROBE0(linkat) {

SEC("kprobe/vfs_link")
int kprobe__vfs_link(struct pt_regs *ctx) {
struct syscall_cache_t *syscall = peek_syscall();
struct syscall_cache_t *syscall = peek_syscall(SYSCALL_LINK);
if (!syscall)
return 0;
// In a container, vfs_link can be called multiple times to handle the different layers of the overlay filesystem.
// The first call is the only one we really care about, the subsequent calls contain paths to the overlay work layer.
if (syscall->link.target_dentry)
return 0;

struct dentry *dentry = (struct dentry *)PT_REGS_PARM1(ctx);

// if second pass, ex: overlayfs, just cache the inode that will be used in ret
if (syscall->link.target_dentry) {
syscall->link.real_src_inode = get_dentry_ino(dentry);
return 0;
}

syscall->link.target_dentry = (struct dentry *)PT_REGS_PARM3(ctx);
syscall->link.src_overlay_numlower = get_overlay_numlower(dentry);
// this is a hard link, source and target dentries are on the same filesystem & mount point
Expand All @@ -48,29 +51,36 @@ int kprobe__vfs_link(struct pt_regs *ctx) {
// we generate a fake target key as the inode is the same
syscall->link.target_key.ino = bpf_get_prandom_u32() << 32 | bpf_get_prandom_u32();
syscall->link.target_key.mount_id = syscall->link.src_key.mount_id;
get_key(syscall->link.target_dentry, syscall->link.target_path);

resolve_dentry(dentry, syscall->link.src_key, NULL);

return 0;
}

int __attribute__((always_inline)) trace__sys_link_ret(struct pt_regs *ctx) {
struct syscall_cache_t *syscall = pop_syscall();
struct syscall_cache_t *syscall = pop_syscall(SYSCALL_LINK);
if (!syscall)
return 0;

int retval = PT_REGS_RC(ctx);
if (IS_UNHANDLED_ERROR(retval))
return 0;

// add an real entry to reach the first dentry with the proper inode
u64 inode = syscall->link.src_key.ino;
if (syscall->link.real_src_inode) {
inode = syscall->link.real_src_inode;
link_dentry_inode(syscall->link.src_key, inode);
}

struct link_event_t event = {
.event.type = EVENT_LINK,
.syscall = {
.retval = retval,
.timestamp = bpf_ktime_get_ns(),
},
.source = {
.inode = syscall->link.src_key.ino,
.inode = inode,
.mount_id = syscall->link.src_key.mount_id,
.overlay_numlower = syscall->link.src_overlay_numlower,
},
Expand Down
35 changes: 24 additions & 11 deletions pkg/security/ebpf/c/mkdir.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ struct mkdir_event_t {

long __attribute__((always_inline)) trace__sys_mkdir(umode_t mode) {
struct syscall_cache_t syscall = {
.type = EVENT_MKDIR,
.type = SYSCALL_MKDIR,
.mkdir = {
.mode = mode
}
Expand All @@ -37,21 +37,26 @@ SYSCALL_KPROBE3(mkdirat, int, dirfd, const char*, filename, umode_t, mode)

SEC("kprobe/vfs_mkdir")
int kprobe__security_path_mkdir(struct pt_regs *ctx) {
struct syscall_cache_t *syscall = peek_syscall();
struct syscall_cache_t *syscall = peek_syscall(SYSCALL_MKDIR);
if (!syscall)
return 0;
// In a container, vfs_mkdir can be called multiple times to handle the different layers of the overlay filesystem.
// The first call is the only one we really care about, the subsequent calls contain paths to the overlay work layer.
if (syscall->mkdir.dentry)

struct dentry *dentry = (struct dentry *)PT_REGS_PARM2(ctx);

// if second pass, ex: overlayfs, just cache the inode that will be used in ret
if (syscall->mkdir.dentry) {
syscall->mkdir.real_dentry = dentry;
return 0;
}

syscall->mkdir.dentry = dentry;
syscall->mkdir.path_key = get_key(syscall->mkdir.dentry, syscall->mkdir.path);

syscall->mkdir.dentry = (struct dentry *)PT_REGS_PARM2(ctx);
syscall->mkdir.path_key = get_key(syscall->mkdir.dentry, syscall->mkdir.dir);
return 0;
}

int __attribute__((always_inline)) trace__sys_mkdir_ret(struct pt_regs *ctx) {
struct syscall_cache_t *syscall = pop_syscall();
struct syscall_cache_t *syscall = pop_syscall(SYSCALL_MKDIR);
if (!syscall)
return 0;

Expand All @@ -61,14 +66,24 @@ int __attribute__((always_inline)) trace__sys_mkdir_ret(struct pt_regs *ctx) {

// the inode of the dentry was not properly set when kprobe/security_path_mkdir was called, make sur we grab it now
syscall->mkdir.path_key.ino = get_dentry_ino(syscall->mkdir.dentry);

resolve_dentry(syscall->mkdir.dentry, syscall->mkdir.path_key, NULL);

// add an real entry to reach the first dentry with the proper inode
u64 inode = syscall->mkdir.path_key.ino;
if (syscall->mkdir.real_dentry) {
inode = get_dentry_ino(syscall->mkdir.real_dentry);
link_dentry_inode(syscall->mkdir.path_key, inode);
}

struct mkdir_event_t event = {
.event.type = EVENT_MKDIR,
.syscall = {
.retval = retval,
.timestamp = bpf_ktime_get_ns(),
},
.file = {
.inode = syscall->mkdir.path_key.ino,
.inode = inode,
.mount_id = syscall->mkdir.path_key.mount_id,
.overlay_numlower = get_overlay_numlower(syscall->mkdir.dentry),
},
Expand All @@ -78,8 +93,6 @@ int __attribute__((always_inline)) trace__sys_mkdir_ret(struct pt_regs *ctx) {
struct proc_cache_t *entry = fill_process_data(&event.process);
fill_container_data(entry, &event.container);

resolve_dentry(syscall->mkdir.dentry, syscall->mkdir.path_key, NULL);

send_event(ctx, event);

return 0;
Expand Down
Loading

0 comments on commit 24a6fd3

Please sign in to comment.