Skip to content

Commit

Permalink
bpf: introduce BPF_F_LOCK flag
Browse files Browse the repository at this point in the history
Introduce BPF_F_LOCK flag for map_lookup and map_update syscall commands
and for map_update() helper function.
In all these cases take a lock of existing element (which was provided
in BTF description) before copying (in or out) the rest of map value.

Implementation details that are part of uapi:

Array:
The array map takes the element lock for lookup/update.

Hash:
hash map also takes the lock for lookup/update and tries to avoid the bucket lock.
If old element exists it takes the element lock and updates the element in place.
If element doesn't exist it allocates new one and inserts into hash table
while holding the bucket lock.
In rare case the hashmap has to take both the bucket lock and the element lock
to update old value in place.

Cgroup local storage:
It is similar to array. update in place and lookup are done with lock taken.

Signed-off-by: Alexei Starovoitov <[email protected]>
Signed-off-by: Daniel Borkmann <[email protected]>
  • Loading branch information
Alexei Starovoitov authored and borkmann committed Feb 1, 2019
1 parent ab963be commit 96049f3
Show file tree
Hide file tree
Showing 7 changed files with 110 additions and 14 deletions.
2 changes: 2 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
memcpy(dst, src, map->value_size);
}
}
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
bool lock_src);

struct bpf_offload_dev;
struct bpf_offloaded_map;
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ enum bpf_attach_type {
#define BPF_ANY 0 /* create new element or update existing */
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
#define BPF_EXIST 2 /* update existing element */
#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */

/* flags for BPF_MAP_CREATE command */
#define BPF_F_NO_PREALLOC (1U << 0)
Expand Down
24 changes: 16 additions & 8 deletions kernel/bpf/arraymap.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,27 +253,35 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
char *val;

if (unlikely(map_flags > BPF_EXIST))
if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
/* unknown flags */
return -EINVAL;

if (unlikely(index >= array->map.max_entries))
/* all elements were pre-allocated, cannot insert a new one */
return -E2BIG;

if (unlikely(map_flags == BPF_NOEXIST))
if (unlikely(map_flags & BPF_NOEXIST))
/* all elements already exist */
return -EEXIST;

if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
if (unlikely((map_flags & BPF_F_LOCK) &&
!map_value_has_spin_lock(map)))
return -EINVAL;

if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
value, map->value_size);
else
copy_map_value(map,
array->value +
array->elem_size * (index & array->index_mask),
value);
} else {
val = array->value +
array->elem_size * (index & array->index_mask);
if (map_flags & BPF_F_LOCK)
copy_map_value_locked(map, val, value, false);
else
copy_map_value(map, val, value);
}
return 0;
}

Expand Down
42 changes: 39 additions & 3 deletions kernel/bpf/hashtab.c
Original file line number Diff line number Diff line change
Expand Up @@ -804,11 +804,11 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
u64 map_flags)
{
if (l_old && map_flags == BPF_NOEXIST)
if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
/* elem already exists */
return -EEXIST;

if (!l_old && map_flags == BPF_EXIST)
if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
/* elem doesn't exist, cannot update it */
return -ENOENT;

Expand All @@ -827,7 +827,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
u32 key_size, hash;
int ret;

if (unlikely(map_flags > BPF_EXIST))
if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
/* unknown flags */
return -EINVAL;

Expand All @@ -840,6 +840,28 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
b = __select_bucket(htab, hash);
head = &b->head;

if (unlikely(map_flags & BPF_F_LOCK)) {
if (unlikely(!map_value_has_spin_lock(map)))
return -EINVAL;
/* find an element without taking the bucket lock */
l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
htab->n_buckets);
ret = check_flags(htab, l_old, map_flags);
if (ret)
return ret;
if (l_old) {
/* grab the element lock and update value in place */
copy_map_value_locked(map,
l_old->key + round_up(key_size, 8),
value, false);
return 0;
}
/* fall through, grab the bucket lock and lookup again.
* 99.9% chance that the element won't be found,
* but second lookup under lock has to be done.
*/
}

/* bpf_map_update_elem() can be called in_irq() */
raw_spin_lock_irqsave(&b->lock, flags);

Expand All @@ -849,6 +871,20 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
if (ret)
goto err;

if (unlikely(l_old && (map_flags & BPF_F_LOCK))) {
/* first lookup without the bucket lock didn't find the element,
* but second lookup with the bucket lock found it.
* This case is highly unlikely, but has to be dealt with:
* grab the element lock in addition to the bucket lock
* and update element in place
*/
copy_map_value_locked(map,
l_old->key + round_up(key_size, 8),
value, false);
ret = 0;
goto err;
}

l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
l_old);
if (IS_ERR(l_new)) {
Expand Down
16 changes: 16 additions & 0 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,22 @@ const struct bpf_func_proto bpf_spin_unlock_proto = {
.arg1_type = ARG_PTR_TO_SPIN_LOCK,
};

void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
bool lock_src)
{
struct bpf_spin_lock *lock;

if (lock_src)
lock = src + map->spin_lock_off;
else
lock = dst + map->spin_lock_off;
preempt_disable();
____bpf_spin_lock(lock);
copy_map_value(map, dst, src);
____bpf_spin_unlock(lock);
preempt_enable();
}

#ifdef CONFIG_CGROUPS
BPF_CALL_0(bpf_get_current_cgroup_id)
{
Expand Down
14 changes: 13 additions & 1 deletion kernel/bpf/local_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,26 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
struct bpf_cgroup_storage *storage;
struct bpf_storage_buffer *new;

if (flags != BPF_ANY && flags != BPF_EXIST)
if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST)))
return -EINVAL;

if (unlikely(flags & BPF_NOEXIST))
return -EINVAL;

if (unlikely((flags & BPF_F_LOCK) &&
!map_value_has_spin_lock(map)))
return -EINVAL;

storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
key, false);
if (!storage)
return -ENOENT;

if (flags & BPF_F_LOCK) {
copy_map_value_locked(map, storage->buf->data, value, false);
return 0;
}

new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
map->value_size,
__GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
Expand Down
25 changes: 23 additions & 2 deletions kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,7 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size)
}

/* last field in 'union bpf_attr' used by this command */
#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags

static int map_lookup_elem(union bpf_attr *attr)
{
Expand All @@ -698,6 +698,9 @@ static int map_lookup_elem(union bpf_attr *attr)
if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
return -EINVAL;

if (attr->flags & ~BPF_F_LOCK)
return -EINVAL;

f = fdget(ufd);
map = __bpf_map_get(f);
if (IS_ERR(map))
Expand All @@ -708,6 +711,12 @@ static int map_lookup_elem(union bpf_attr *attr)
goto err_put;
}

if ((attr->flags & BPF_F_LOCK) &&
!map_value_has_spin_lock(map)) {
err = -EINVAL;
goto err_put;
}

key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
Expand Down Expand Up @@ -758,7 +767,13 @@ static int map_lookup_elem(union bpf_attr *attr)
err = -ENOENT;
} else {
err = 0;
copy_map_value(map, value, ptr);
if (attr->flags & BPF_F_LOCK)
/* lock 'ptr' and copy everything but lock */
copy_map_value_locked(map, value, ptr, true);
else
copy_map_value(map, value, ptr);
/* mask lock, since value wasn't zero inited */
check_and_init_map_lock(map, value);
}
rcu_read_unlock();
}
Expand Down Expand Up @@ -818,6 +833,12 @@ static int map_update_elem(union bpf_attr *attr)
goto err_put;
}

if ((attr->flags & BPF_F_LOCK) &&
!map_value_has_spin_lock(map)) {
err = -EINVAL;
goto err_put;
}

key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
Expand Down

0 comments on commit 96049f3

Please sign in to comment.