Skip to content

Commit

Permalink
bpf: add queue and stack maps
Browse files Browse the repository at this point in the history
Queue/stack maps implement a FIFO/LIFO data storage for ebpf programs.
These maps support peek, pop and push operations that are exposed to eBPF
programs through the new bpf_map[peek/pop/push] helpers.  Those operations
are exposed to userspace applications through the already existing
syscalls in the following way:

BPF_MAP_LOOKUP_ELEM            -> peek
BPF_MAP_LOOKUP_AND_DELETE_ELEM -> pop
BPF_MAP_UPDATE_ELEM            -> push

Queue/stack maps are implemented using a buffer, tail and head indexes,
hence BPF_F_NO_PREALLOC is not supported.

As opposite to other maps, queue and stack do not use RCU for protecting
maps values, the bpf_map[peek/pop] have a ARG_PTR_TO_UNINIT_MAP_VALUE
argument that is a pointer to a memory zone where to save the value of a
map.  Basically the same as ARG_PTR_TO_UNINIT_MEM, but the size has not
be passed as an extra argument.

Our main motivation for implementing queue/stack maps was to keep track
of a pool of elements, like network ports in a SNAT, however we forsee
other use cases, like for exampling saving last N kernel events in a map
and then analysing from userspace.

Signed-off-by: Mauricio Vasquez B <[email protected]>
Acked-by: Song Liu <[email protected]>
Signed-off-by: Alexei Starovoitov <[email protected]>
  • Loading branch information
mauriciovasquezbernal authored and Alexei Starovoitov committed Oct 19, 2018
1 parent 2ea864c commit f1a2e44
Show file tree
Hide file tree
Showing 10 changed files with 401 additions and 3 deletions.
6 changes: 6 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ struct bpf_map_ops {
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
int (*map_pop_elem)(struct bpf_map *map, void *value);
int (*map_peek_elem)(struct bpf_map *map, void *value);

/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
Expand Down Expand Up @@ -811,6 +814,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
extern const struct bpf_func_proto bpf_map_update_elem_proto;
extern const struct bpf_func_proto bpf_map_delete_elem_proto;
extern const struct bpf_func_proto bpf_map_push_elem_proto;
extern const struct bpf_func_proto bpf_map_pop_elem_proto;
extern const struct bpf_func_proto bpf_map_peek_elem_proto;

extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
Expand Down
2 changes: 2 additions & 0 deletions include/linux/bpf_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
#endif
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
29 changes: 28 additions & 1 deletion include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
BPF_MAP_TYPE_QUEUE,
BPF_MAP_TYPE_STACK,
};

enum bpf_prog_type {
Expand Down Expand Up @@ -462,6 +464,28 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
* Description
* Push an element *value* in *map*. *flags* is one of:
*
* **BPF_EXIST**
* If the queue/stack is full, the oldest element is removed to
* make room for this.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_map_pop_elem(struct bpf_map *map, void *value)
* Description
* Pop an element from *map*.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_map_peek_elem(struct bpf_map *map, void *value)
* Description
* Get an element from *map* without removing it.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read(void *dst, u32 size, const void *src)
* Description
* For tracing programs, safely attempt to read *size* bytes from
Expand Down Expand Up @@ -2303,7 +2327,10 @@ union bpf_attr {
FN(skb_ancestor_cgroup_id), \
FN(sk_lookup_tcp), \
FN(sk_lookup_udp), \
FN(sk_release),
FN(sk_release), \
FN(map_push_elem), \
FN(map_pop_elem), \
FN(map_peek_elem),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ obj-y := core.o

obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
ifeq ($(CONFIG_NET),y)
Expand Down
3 changes: 3 additions & 0 deletions kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1783,6 +1783,9 @@ BPF_CALL_0(bpf_user_rnd_u32)
const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
const struct bpf_func_proto bpf_map_update_elem_proto __weak;
const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
const struct bpf_func_proto bpf_map_push_elem_proto __weak;
const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
const struct bpf_func_proto bpf_map_peek_elem_proto __weak;

const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
Expand Down
43 changes: 43 additions & 0 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,49 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = {
.arg2_type = ARG_PTR_TO_MAP_KEY,
};

BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
{
return map->ops->map_push_elem(map, value, flags);
}

const struct bpf_func_proto bpf_map_push_elem_proto = {
.func = bpf_map_push_elem,
.gpl_only = false,
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_MAP_VALUE,
.arg3_type = ARG_ANYTHING,
};

BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
{
return map->ops->map_pop_elem(map, value);
}

const struct bpf_func_proto bpf_map_pop_elem_proto = {
.func = bpf_map_pop_elem,
.gpl_only = false,
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
};

BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
{
return map->ops->map_peek_elem(map, value);
}

const struct bpf_func_proto bpf_map_peek_elem_proto = {
.func = bpf_map_pop_elem,
.gpl_only = false,
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
};

const struct bpf_func_proto bpf_get_prandom_u32_proto = {
.func = bpf_user_rnd_u32,
.gpl_only = false,
Expand Down
Loading

0 comments on commit f1a2e44

Please sign in to comment.