Skip to content

Commit

Permalink
Fix checking of parent directories
Browse files Browse the repository at this point in the history
Taken from the justification in the launchpad bug:

To a task in freezer cgroup /a/b/c/d, it should appear that there are no
cgroups other than its descendents. Since this is a filesystem, we must have
the parent directories, but each parent cgroup should only contain the child
which the task can see.

So, when this task looks at /a/b, it should see only directory 'c' and no
files. Attempt to create /a/b/x should result in -EPERM, whether /a/b/x already
exists or not. Attempts to query /a/b/x should result in -ENOENT whether /a/b/x
exists or not. Opening /a/b/tasks should result in -ENOENT.

The caller_may_see_dir checks specifically whether a task may see a cgroup
directory - i.e. /a/b/x if opening /a/b/x/tasks, and /a/b/c/d if doing
opendir('/a/b/c/d').

caller_is_in_ancestor() will return true if the caller in /a/b/c/d looks at
/a/b/c/d/e. If the caller is in a child cgroup of the queried one - i.e. if the
task in /a/b/c/d queries /a/b, then *nextcg will container the next (the only)
directory which he can see in the path - 'c'.

Beyond this, regular DAC permissions should apply, with the
root-in-user-namespace privilege over its mapped uids being respected. The
fc_may_access check does this check for both directories and files.

This is CVE-2015-1342 (LP: #1508481)

Signed-off-by: Serge Hallyn <[email protected]>
  • Loading branch information
hallyn committed Nov 17, 2015
1 parent d09b7ff commit a8b6c3e
Show file tree
Hide file tree
Showing 4 changed files with 646 additions and 46 deletions.
8 changes: 5 additions & 3 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,12 @@ endif

TEST_READ: tests/test-read.c
$(CC) -o tests/test-read tests/test-read.c

TEST_CPUSET: tests/cpusetrange.c cpuset.c
$(CC) -o tests/cpusetrange tests/cpusetrange.c cpuset.c
TEST_SYSCALLS: tests/test_syscalls.c
$(CC) -o tests/test_syscalls tests/test_syscalls.c

tests: TEST_READ TEST_CPUSET
tests: TEST_READ TEST_CPUSET TEST_SYSCALLS

distclean:
rm -rf .deps/ \
Expand All @@ -60,4 +61,5 @@ distclean:
lxcfs.o \
m4/ \
missing \
stamp-h1
stamp-h1 \
tests/test_syscalls
137 changes: 94 additions & 43 deletions lxcfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,12 @@ static bool perms_include(int fmode, mode_t req_mode)
return ((fmode & r) == r);
}


/*
* taskcg is a/b/c
* querycg is /a/b/c/d/e
* we return 'd'
*/
static char *get_next_cgroup_dir(const char *taskcg, const char *querycg)
{
char *start, *end;
Expand Down Expand Up @@ -378,53 +384,71 @@ static void prune_init_slice(char *cg)
*/
static bool caller_is_in_ancestor(pid_t pid, const char *contrl, const char *cg, char **nextcg)
{
char fnam[PROCLEN];
FILE *f;
bool answer = false;
char *line = NULL;
size_t len = 0;
int ret;
char *c2 = get_pid_cgroup(pid, contrl);
char *linecmp;

ret = snprintf(fnam, PROCLEN, "/proc/%d/cgroup", pid);
if (ret < 0 || ret >= PROCLEN)
return false;
if (!(f = fopen(fnam, "r")))
if (!c2)
return false;
prune_init_slice(c2);

while (getline(&line, &len, f) != -1) {
char *c1, *c2, *linecmp;
if (!line[0])
continue;
c1 = strchr(line, ':');
if (!c1)
goto out;
c1++;
c2 = strchr(c1, ':');
if (!c2)
goto out;
*c2 = '\0';
if (strcmp(c1, contrl) != 0)
continue;
c2++;
stripnewline(c2);
prune_init_slice(c2);
/*
* callers pass in '/' for root cgroup, otherwise they pass
* in a cgroup without leading '/'
*/
linecmp = *cg == '/' ? c2 : c2+1;
if (strncmp(linecmp, cg, strlen(linecmp)) != 0) {
if (nextcg)
*nextcg = get_next_cgroup_dir(linecmp, cg);
goto out;
/*
* callers pass in '/' for root cgroup, otherwise they pass
* in a cgroup without leading '/'
*/
linecmp = *cg == '/' ? c2 : c2+1;
if (strncmp(linecmp, cg, strlen(linecmp)) != 0) {
if (nextcg) {
*nextcg = get_next_cgroup_dir(linecmp, cg);
}
goto out;
}
answer = true;

out:
free(c2);
return answer;
}

/*
* If caller is in /a/b/c, he may see that /a exists, but not /b or /a/c.
*/
static bool caller_may_see_dir(pid_t pid, const char *contrl, const char *cg)
{
bool answer = false;
char *c2, *task_cg;
size_t target_len, task_len;

if (strcmp(cg, "/") == 0)
return true;

c2 = get_pid_cgroup(pid, contrl);

if (!c2)
return false;

task_cg = c2 + 1;
target_len = strlen(cg);
task_len = strlen(task_cg);
if (strcmp(cg, task_cg) == 0) {
answer = true;
goto out;
}
if (target_len < task_len) {
/* looking up a parent dir */
if (strncmp(task_cg, cg, target_len) == 0 && task_cg[target_len] == '/')
answer = true;
goto out;
}
if (target_len > task_len) {
/* looking up a child dir */
if (strncmp(task_cg, cg, task_len) == 0 && cg[task_len] == '/')
answer = true;
goto out;
}

out:
fclose(f);
free(line);
free(c2);
return answer;
}

Expand Down Expand Up @@ -552,6 +576,10 @@ static int cg_getattr(const char *path, struct stat *sb)
* cgroup, or cgdir if fpath is a file */

if (is_child_cgroup(controller, path1, path2)) {
if (!caller_may_see_dir(fc->pid, controller, cgroup)) {
ret = -ENOENT;
goto out;
}
if (!caller_is_in_ancestor(fc->pid, controller, cgroup, NULL)) {
/* this is just /cgroup/controller, return it as a dir */
sb->st_mode = S_IFDIR | 00555;
Expand Down Expand Up @@ -630,8 +658,11 @@ static int cg_opendir(const char *path, struct fuse_file_info *fi)
}
}

if (cgroup && !fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) {
return -EACCES;
if (cgroup) {
if (!caller_may_see_dir(fc->pid, controller, cgroup))
return -ENOENT;
if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY))
return -EACCES;
}

/* we'll free this at cg_releasedir */
Expand Down Expand Up @@ -780,6 +811,10 @@ static int cg_open(const char *path, struct fuse_file_info *fi)
}
free_key(k);

if (!caller_may_see_dir(fc->pid, controller, path1)) {
ret = -ENOENT;
goto out;
}
if (!fc_may_access(fc, controller, path1, path2, fi->flags)) {
// should never get here
ret = -EACCES;
Expand Down Expand Up @@ -1563,7 +1598,7 @@ int cg_chmod(const char *path, mode_t mode)
int cg_mkdir(const char *path, mode_t mode)
{
struct fuse_context *fc = fuse_get_context();
char *fpath = NULL, *path1, *cgdir = NULL, *controller;
char *fpath = NULL, *path1, *cgdir = NULL, *controller, *next = NULL;
const char *cgroup;
int ret;

Expand All @@ -1585,6 +1620,14 @@ int cg_mkdir(const char *path, mode_t mode)
else
path1 = cgdir;

if (!caller_is_in_ancestor(fc->pid, controller, path1, &next)) {
if (fpath && strcmp(next, fpath) == 0)
ret = -EEXIST;
else
ret = -ENOENT;
goto out;
}

if (!fc_may_access(fc, controller, path1, NULL, O_RDWR)) {
ret = -EACCES;
goto out;
Expand All @@ -1599,13 +1642,14 @@ int cg_mkdir(const char *path, mode_t mode)

out:
free(cgdir);
free(next);
return ret;
}

static int cg_rmdir(const char *path)
{
struct fuse_context *fc = fuse_get_context();
char *fpath = NULL, *cgdir = NULL, *controller;
char *fpath = NULL, *cgdir = NULL, *controller, *next = NULL;
const char *cgroup;
int ret;

Expand All @@ -1626,8 +1670,14 @@ static int cg_rmdir(const char *path)
goto out;
}

fprintf(stderr, "rmdir: verifying access to %s:%s (req path %s)\n",
controller, cgdir, path);
if (!caller_is_in_ancestor(fc->pid, controller, cgroup, &next)) {
if (!fpath || strcmp(next, fpath) == 0)
ret = -EBUSY;
else
ret = -ENOENT;
goto out;
}

if (!fc_may_access(fc, controller, cgdir, NULL, O_WRONLY)) {
ret = -EACCES;
goto out;
Expand All @@ -1646,6 +1696,7 @@ static int cg_rmdir(const char *path)

out:
free(cgdir);
free(next);
return ret;
}

Expand Down
96 changes: 96 additions & 0 deletions tests/test_confinement.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/bin/bash

set -ex

[ $(id -u) -eq 0 ]

d=$(mktemp -t -d tmp.XXX)
d2=$(mktemp -t -d tmp.XXX)

pid=-1
cleanup() {
[ $pid -ne -1 ] && kill -9 $pid
umount -l $d || true
umount -l $d2 || true
rm -rf $d $d2
}

cmdline=$(realpath $0)
dirname=$(dirname ${cmdline})
topdir=$(dirname ${dirname})

trap cleanup EXIT HUP INT TERM

${topdir}/lxcfs $d &
pid=$!

# put ourselves into x1
cgm movepidabs freezer / $$
cgm create freezer x1
cgm movepid freezer x1 $$

mount -t cgroup -o freezer freezer $d2
sudo rmdir $d2/lxcfs_test_a1/lxcfs_test_a2 || true
sudo rmdir $d2/lxcfs_test_a1 || true

echo "Making sure root cannot mkdir"
bad=0
mkdir $d/cgroup/freezer/lxcfs_test_a1 && bad=1
if [ "${bad}" -eq 1 ]; then
false
fi

echo "Making sure root cannot rmdir"
mkdir $d2/lxcfs_test_a1
mkdir $d2/lxcfs_test_a1/lxcfs_test_a2
rmdir $d/cgroup/freezer/lxcfs_test_a1 && bad=1
if [ "${bad}" -eq 1 ]; then
false
fi
[ -d $d2/lxcfs_test_a1 ]
rmdir $d/cgroup/freezer/lxcfs_test_a1/lxcfs_test_a2 && bad=1
if [ "${bad}" -eq 1 ]; then
false
fi
[ -d $d2/lxcfs_test_a1/lxcfs_test_a2 ]

echo "Making sure root cannot read/write"
sleep 200 &
p=$!
echo $p > $d/cgroup/freezer/lxcfs_test_a1/tasks && bad=1
if [ "${bad}" -eq 1 ]; then
false
fi
cat $d/cgroup/freezer/lxcfs_test_a1/tasks && bad=1
if [ "${bad}" -eq 1 ]; then
false
fi
echo $p > $d/cgroup/freezer/lxcfs_test_a1/lxcfs_test_a2/tasks && bad=1
if [ "${bad}" -eq 1 ]; then
false
fi
cat $d/cgroup/freezer/lxcfs_test_a1/lxcfs_test_a2/tasks && bad=1
if [ "${bad}" -eq 1 ]; then
false
fi

# make sure things like truncate and access don't leak info about
# the /lxcfs_test_a1 cgroup which we shouldn't be able to reach
echo "Testing other system calls"
${dirname}/test_syscalls $d/cgroup/freezer/lxcfs_test_a1
${dirname}/test_syscalls $d/cgroup/freezer/lxcfs_test_a1/lxcfs_test_a2

echo "Making sure root can act on descendents"
mycg=$(cgm getpidcgroupabs freezer $$)
newcg=${mycg}/lxcfs_test_a1
rmdir $d2/$newcg || true # cleanup previosu run
mkdir $d/cgroup/freezer/$newcg
echo $p > $d/cgroup/freezer/$newcg/tasks
cat $d/cgroup/freezer/$newcg/tasks
kill -9 $p
while [ `wc -l $d/cgroup/freezer/$newcg/tasks | awk '{ print $1 }'` -ne 0 ]; do
sleep 1
done
rmdir $d/cgroup/freezer/$newcg

echo "All tests passed!"
Loading

0 comments on commit a8b6c3e

Please sign in to comment.