Skip to content

Commit

Permalink
bpf: Do not allocate percpu memory at init stage
Browse files Browse the repository at this point in the history
Kirill Shutemov reported significant percpu memory increase after booting
in 288-cpu VM ([1]) due to commit 41a5db8 ("bpf: Add support for
non-fix-size percpu mem allocation"). The percpu memory is increased
from 111MB to 969MB. The number is from /proc/meminfo.

I tried to reproduce the issue with my local VM which at most supports
upto 255 cpus. With 252 cpus, without the above commit, the percpu memory
immediately after boot is 57MB while with the above commit the percpu
memory is 231MB.

This is not good since so far percpu memory from bpf memory allocator
is not widely used yet. Let us change pre-allocation in init stage
to on-demand allocation when verifier detects there is a need of
percpu memory for bpf program. With this change, percpu memory
consumption after boot can be reduced signicantly.

  [1] https://lore.kernel.org/lkml/[email protected]/

Fixes: 41a5db8 ("bpf: Add support for non-fix-size percpu mem allocation")
Cc: Kirill A. Shutemov <[email protected]>
Signed-off-by: Yonghong Song <[email protected]>
  • Loading branch information
Yonghong Song authored and d-e-s-o committed Nov 10, 2023
1 parent 32f9f95 commit 8cc180b
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 8 deletions.
2 changes: 1 addition & 1 deletion include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
extern struct kobject *btf_kobj;
extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
extern bool bpf_global_ma_set, bpf_global_percpu_ma_set;
extern bool bpf_global_ma_set;

typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
Expand Down
8 changes: 3 additions & 5 deletions kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
#define OFF insn->off
#define IMM insn->imm

struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
bool bpf_global_ma_set, bpf_global_percpu_ma_set;
struct bpf_mem_alloc bpf_global_ma;
bool bpf_global_ma_set;

/* No hurry in this branch
*
Expand Down Expand Up @@ -2934,9 +2934,7 @@ static int __init bpf_global_ma_init(void)

ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
bpf_global_ma_set = !ret;
ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
bpf_global_percpu_ma_set = !ret;
return !bpf_global_ma_set || !bpf_global_percpu_ma_set;
return ret;
}
late_initcall(bpf_global_ma_init);
#endif
Expand Down
17 changes: 15 additions & 2 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <linux/poison.h>
#include <linux/module.h>
#include <linux/cpumask.h>
#include <linux/bpf_mem_alloc.h>
#include <net/xdp.h>

#include "disasm.h"
Expand All @@ -41,6 +42,9 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
#undef BPF_LINK_TYPE
};

struct bpf_mem_alloc bpf_global_percpu_ma;
static bool bpf_global_percpu_ma_set;

/* bpf_check() is a static code analyzer that walks eBPF program
* instruction by instruction and updates register/stack state.
* All paths of conditional branches are analyzed until 'bpf_exit' insn.
Expand Down Expand Up @@ -12287,8 +12291,17 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
return -ENOMEM;

if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set)
return -ENOMEM;
if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
mutex_lock(&bpf_verifier_lock);
if (!bpf_global_percpu_ma_set) {
err = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
if (!err)
bpf_global_percpu_ma_set = true;
}
mutex_unlock(&bpf_verifier_lock);
if (err)
return err;
}

if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
Expand Down

0 comments on commit 8cc180b

Please sign in to comment.