From 6e866b328a97529f535f908bdfb4bc1e66b0bfa7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 22 Jun 2023 15:00:17 -1000 Subject: [PATCH 1/6] SCX: Drop _example prefix from scheduler names and make the naming more consistent The idea was to signify that the scheduler isn't ready for any kind of production use with the _example prefix. The distinction, however, isn't clear cut and changes over time. We already have README and help message detailing the behavior characteristics. Let's drop the prefix. While at it, make file and directory names more consistent too: * s/scx_example_userland_common.h/scx_userland.h/ * s/atropos/scx_atropos/ --- Documentation/scheduler/sched-ext.rst | 17 +++++---- tools/sched_ext/.gitignore | 12 +++---- tools/sched_ext/Makefile | 33 ++++++++--------- tools/sched_ext/README | 36 +++++++++---------- .../{atropos => scx_atropos}/.gitignore | 0 .../{atropos => scx_atropos}/Cargo.toml | 0 .../{atropos => scx_atropos}/build.rs | 0 .../{atropos => scx_atropos}/rustfmt.toml | 0 .../src/atropos_sys.rs | 0 .../src/bpf/atropos.bpf.c | 0 .../src/bpf/atropos.h | 0 .../{atropos => scx_atropos}/src/main.rs | 0 ...xample_central.bpf.c => scx_central.bpf.c} | 0 .../{scx_example_central.c => scx_central.c} | 10 +++--- ..._example_flatcg.bpf.c => scx_flatcg.bpf.c} | 2 +- .../{scx_example_flatcg.c => scx_flatcg.c} | 14 ++++---- .../{scx_example_flatcg.h => scx_flatcg.h} | 0 ...{scx_example_pair.bpf.c => scx_pair.bpf.c} | 2 +- .../{scx_example_pair.c => scx_pair.c} | 12 +++---- .../{scx_example_pair.h => scx_pair.h} | 0 ...{scx_example_qmap.bpf.c => scx_qmap.bpf.c} | 0 .../{scx_example_qmap.c => scx_qmap.c} | 10 +++--- ..._example_simple.bpf.c => scx_simple.bpf.c} | 0 .../{scx_example_simple.c => scx_simple.c} | 12 +++---- ...mple_userland.bpf.c => scx_userland.bpf.c} | 2 +- ...{scx_example_userland.c => scx_userland.c} | 14 ++++---- ...ample_userland_common.h => scx_userland.h} | 0 27 files changed, 86 insertions(+), 90 deletions(-) rename tools/sched_ext/{atropos => scx_atropos}/.gitignore (100%) rename tools/sched_ext/{atropos => scx_atropos}/Cargo.toml (100%) rename tools/sched_ext/{atropos => scx_atropos}/build.rs (100%) rename tools/sched_ext/{atropos => scx_atropos}/rustfmt.toml (100%) rename tools/sched_ext/{atropos => scx_atropos}/src/atropos_sys.rs (100%) rename tools/sched_ext/{atropos => scx_atropos}/src/bpf/atropos.bpf.c (100%) rename tools/sched_ext/{atropos => scx_atropos}/src/bpf/atropos.h (100%) rename tools/sched_ext/{atropos => scx_atropos}/src/main.rs (100%) rename tools/sched_ext/{scx_example_central.bpf.c => scx_central.bpf.c} (100%) rename tools/sched_ext/{scx_example_central.c => scx_central.c} (91%) rename tools/sched_ext/{scx_example_flatcg.bpf.c => scx_flatcg.bpf.c} (99%) rename tools/sched_ext/{scx_example_flatcg.c => scx_flatcg.c} (95%) rename tools/sched_ext/{scx_example_flatcg.h => scx_flatcg.h} (100%) rename tools/sched_ext/{scx_example_pair.bpf.c => scx_pair.bpf.c} (99%) rename tools/sched_ext/{scx_example_pair.c => scx_pair.c} (94%) rename tools/sched_ext/{scx_example_pair.h => scx_pair.h} (100%) rename tools/sched_ext/{scx_example_qmap.bpf.c => scx_qmap.bpf.c} (100%) rename tools/sched_ext/{scx_example_qmap.c => scx_qmap.c} (94%) rename tools/sched_ext/{scx_example_simple.bpf.c => scx_simple.bpf.c} (100%) rename tools/sched_ext/{scx_example_simple.c => scx_simple.c} (88%) rename tools/sched_ext/{scx_example_userland.bpf.c => scx_userland.bpf.c} (99%) rename tools/sched_ext/{scx_example_userland.c => scx_userland.c} (97%) rename tools/sched_ext/{scx_example_userland_common.h => scx_userland.h} (100%) diff --git a/Documentation/scheduler/sched-ext.rst b/Documentation/scheduler/sched-ext.rst index 2ef2f409f4a668..25ddb535c29723 100644 --- a/Documentation/scheduler/sched-ext.rst +++ b/Documentation/scheduler/sched-ext.rst @@ -43,7 +43,7 @@ BPF scheduler and reverts all tasks back to CFS. .. code-block:: none # make -j16 -C tools/sched_ext - # tools/sched_ext/scx_example_simple + # tools/sched_ext/scx_simple local=0 global=3 local=5 global=24 local=9 global=44 @@ -73,8 +73,7 @@ Userspace can implement an arbitrary BPF scheduler by loading a set of BPF programs that implement ``struct sched_ext_ops``. The only mandatory field is ``ops.name`` which must be a valid BPF object name. All operations are optional. The following modified excerpt is from -``tools/sched/scx_example_simple.bpf.c`` showing a minimal global FIFO -scheduler. +``tools/sched/scx_simple.bpf.c`` showing a minimal global FIFO scheduler. .. code-block:: c @@ -196,8 +195,8 @@ DSQs are consumed automatically. ``scx_bpf_dispatch()`` queues the task on the FIFO of the target DSQ. Use ``scx_bpf_dispatch_vtime()`` for the priority queue. See the function -documentation and usage in ``tools/sched_ext/scx_example_simple.bpf.c`` for -more information. +documentation and usage in ``tools/sched_ext/scx_simple.bpf.c`` for more +information. Where to Look ============= @@ -211,11 +210,11 @@ Where to Look * ``tools/sched_ext/`` hosts example BPF scheduler implementations. - * ``scx_example_simple[.bpf].c``: Minimal global FIFO scheduler example - using a custom DSQ. + * ``scx_simple[.bpf].c``: Minimal global FIFO scheduler example using a + custom DSQ. - * ``scx_example_qmap[.bpf].c``: A multi-level FIFO scheduler supporting - five levels of priority implemented with ``BPF_MAP_TYPE_QUEUE``. + * ``scx_qmap[.bpf].c``: A multi-level FIFO scheduler supporting five + levels of priority implemented with ``BPF_MAP_TYPE_QUEUE``. ABI Instability =============== diff --git a/tools/sched_ext/.gitignore b/tools/sched_ext/.gitignore index a3240f9f7ebae5..c63ee5e4f4bb08 100644 --- a/tools/sched_ext/.gitignore +++ b/tools/sched_ext/.gitignore @@ -1,9 +1,9 @@ -scx_example_simple -scx_example_qmap -scx_example_central -scx_example_pair -scx_example_flatcg -scx_example_userland +scx_simple +scx_qmap +scx_central +scx_pair +scx_flatcg +scx_userland *.skel.h *.subskel.h /tools/ diff --git a/tools/sched_ext/Makefile b/tools/sched_ext/Makefile index 73c43782837d48..1515ff9cce7f93 100644 --- a/tools/sched_ext/Makefile +++ b/tools/sched_ext/Makefile @@ -117,8 +117,7 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \ -Wall -Wno-compare-distinct-pointer-types \ -O2 -mcpu=v3 -all: scx_example_simple scx_example_qmap scx_example_central scx_example_pair \ - scx_example_flatcg scx_example_userland atropos +all: scx_simple scx_qmap scx_central scx_pair scx_flatcg scx_userland scx_atropos # sort removes libbpf duplicates when not cross-building MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \ @@ -169,45 +168,43 @@ endif $(Q)$(BPFTOOL) gen skeleton $(<:.o=.linked3.o) name $(<:.bpf.o=) > $@ $(Q)$(BPFTOOL) gen subskeleton $(<:.o=.linked3.o) name $(<:.bpf.o=) > $(@:.skel.h=.subskel.h) -scx_example_simple: scx_example_simple.c scx_example_simple.skel.h user_exit_info.h +scx_simple: scx_simple.c scx_simple.skel.h user_exit_info.h $(CC) $(CFLAGS) -c $< -o $@.o $(CC) -o $@ $@.o $(HOST_BPFOBJ) $(LDFLAGS) -scx_example_qmap: scx_example_qmap.c scx_example_qmap.skel.h user_exit_info.h +scx_qmap: scx_qmap.c scx_qmap.skel.h user_exit_info.h $(CC) $(CFLAGS) -c $< -o $@.o $(CC) -o $@ $@.o $(HOST_BPFOBJ) $(LDFLAGS) -scx_example_central: scx_example_central.c scx_example_central.skel.h user_exit_info.h +scx_central: scx_central.c scx_central.skel.h user_exit_info.h $(CC) $(CFLAGS) -c $< -o $@.o $(CC) -o $@ $@.o $(HOST_BPFOBJ) $(LDFLAGS) -scx_example_pair: scx_example_pair.c scx_example_pair.skel.h user_exit_info.h +scx_pair: scx_pair.c scx_pair.skel.h user_exit_info.h $(CC) $(CFLAGS) -c $< -o $@.o $(CC) -o $@ $@.o $(HOST_BPFOBJ) $(LDFLAGS) -scx_example_flatcg: scx_example_flatcg.c scx_example_flatcg.skel.h user_exit_info.h +scx_flatcg: scx_flatcg.c scx_flatcg.skel.h user_exit_info.h $(CC) $(CFLAGS) -c $< -o $@.o $(CC) -o $@ $@.o $(HOST_BPFOBJ) $(LDFLAGS) -scx_example_userland: scx_example_userland.c scx_example_userland.skel.h \ - scx_example_userland_common.h user_exit_info.h +scx_userland: scx_userland.c scx_userland.skel.h scx_userland.h user_exit_info.h $(CC) $(CFLAGS) -c $< -o $@.o $(CC) -o $@ $@.o $(HOST_BPFOBJ) $(LDFLAGS) -atropos: export RUSTFLAGS = -C link-args=-lzstd -C link-args=-lz -C link-args=-lelf -L $(BPFOBJ_DIR) -atropos: export ATROPOS_CLANG = $(CLANG) -atropos: export ATROPOS_BPF_CFLAGS = $(BPF_CFLAGS) -atropos: $(INCLUDE_DIR)/vmlinux.h - cargo build --manifest-path=atropos/Cargo.toml $(CARGOFLAGS) +scx_atropos: export RUSTFLAGS = -C link-args=-lzstd -C link-args=-lz -C link-args=-lelf -L $(BPFOBJ_DIR) +scx_atropos: export ATROPOS_CLANG = $(CLANG) +scx_atropos: export ATROPOS_BPF_CFLAGS = $(BPF_CFLAGS) +scx_atropos: $(INCLUDE_DIR)/vmlinux.h + cargo build --manifest-path=scx_atropos/Cargo.toml $(CARGOFLAGS) clean: - cargo clean --manifest-path=atropos/Cargo.toml + cargo clean --manifest-path=scx_atropos/Cargo.toml rm -rf $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) rm -f *.o *.bpf.o *.skel.h *.subskel.h - rm -f scx_example_simple scx_example_qmap scx_example_central \ - scx_example_pair scx_example_flatcg scx_example_userland + rm -f scx_simple scx_qmap scx_central scx_pair scx_flatcg scx_userland -.PHONY: all atropos clean +.PHONY: all scx_atropos clean # delete failed targets .DELETE_ON_ERROR: diff --git a/tools/sched_ext/README b/tools/sched_ext/README index 4a748aaacb20e1..33f413f8a4034d 100644 --- a/tools/sched_ext/README +++ b/tools/sched_ext/README @@ -94,8 +94,8 @@ architectures and workloads. -------------------------------------------------------------------------------- -scx_example_central -------------------- +scx_central +----------- Overview ~~~~~~~~ @@ -123,8 +123,8 @@ and does not yet have any kind of priority mechanism. -------------------------------------------------------------------------------- -scx_example_flatcg ------------------- +scx_flatcg +---------- Overview ~~~~~~~~ @@ -152,8 +152,8 @@ able to consume more CPU cycles than they are entitled to. -------------------------------------------------------------------------------- -scx_example_pair ----------------- +scx_pair +-------- Overview ~~~~~~~~ @@ -178,8 +178,8 @@ No -------------------------------------------------------------------------------- -scx_example_qmap ----------------- +scx_qmap +-------- Overview ~~~~~~~~ @@ -201,15 +201,15 @@ No -------------------------------------------------------------------------------- -scx_example_simple ------------------- +scx_simple +---------- Overview ~~~~~~~~ -A simple scheduler that provides an example of a minimal sched_ext scheduler. -scx_example_simple can be run in either global weighted vtime mode, or FIFO -mode. +A simple scheduler that provides an example of a minimal sched_ext +scheduler. scx_simple can be run in either global weighted vtime mode, or +FIFO mode. Typical Use Case ~~~~~~~~~~~~~~~~ @@ -228,8 +228,8 @@ simple scheduling policy. -------------------------------------------------------------------------------- -scx_example_userland --------------------- +scx_userland +------------ Overview ~~~~~~~~ @@ -259,6 +259,6 @@ Production Ready? ~~~~~~~~~~~~~~~~~ No. This scheduler uses an ordered list for vtime scheduling, and is stricly -less performant than just using something like `scx_example_simple`. It is -purely meant to illustrate that it's possible to build a user space scheduler -on top of sched_ext. +less performant than just using something like `scx_simple`. It is purely +meant to illustrate that it's possible to build a user space scheduler on +top of sched_ext. diff --git a/tools/sched_ext/atropos/.gitignore b/tools/sched_ext/scx_atropos/.gitignore similarity index 100% rename from tools/sched_ext/atropos/.gitignore rename to tools/sched_ext/scx_atropos/.gitignore diff --git a/tools/sched_ext/atropos/Cargo.toml b/tools/sched_ext/scx_atropos/Cargo.toml similarity index 100% rename from tools/sched_ext/atropos/Cargo.toml rename to tools/sched_ext/scx_atropos/Cargo.toml diff --git a/tools/sched_ext/atropos/build.rs b/tools/sched_ext/scx_atropos/build.rs similarity index 100% rename from tools/sched_ext/atropos/build.rs rename to tools/sched_ext/scx_atropos/build.rs diff --git a/tools/sched_ext/atropos/rustfmt.toml b/tools/sched_ext/scx_atropos/rustfmt.toml similarity index 100% rename from tools/sched_ext/atropos/rustfmt.toml rename to tools/sched_ext/scx_atropos/rustfmt.toml diff --git a/tools/sched_ext/atropos/src/atropos_sys.rs b/tools/sched_ext/scx_atropos/src/atropos_sys.rs similarity index 100% rename from tools/sched_ext/atropos/src/atropos_sys.rs rename to tools/sched_ext/scx_atropos/src/atropos_sys.rs diff --git a/tools/sched_ext/atropos/src/bpf/atropos.bpf.c b/tools/sched_ext/scx_atropos/src/bpf/atropos.bpf.c similarity index 100% rename from tools/sched_ext/atropos/src/bpf/atropos.bpf.c rename to tools/sched_ext/scx_atropos/src/bpf/atropos.bpf.c diff --git a/tools/sched_ext/atropos/src/bpf/atropos.h b/tools/sched_ext/scx_atropos/src/bpf/atropos.h similarity index 100% rename from tools/sched_ext/atropos/src/bpf/atropos.h rename to tools/sched_ext/scx_atropos/src/bpf/atropos.h diff --git a/tools/sched_ext/atropos/src/main.rs b/tools/sched_ext/scx_atropos/src/main.rs similarity index 100% rename from tools/sched_ext/atropos/src/main.rs rename to tools/sched_ext/scx_atropos/src/main.rs diff --git a/tools/sched_ext/scx_example_central.bpf.c b/tools/sched_ext/scx_central.bpf.c similarity index 100% rename from tools/sched_ext/scx_example_central.bpf.c rename to tools/sched_ext/scx_central.bpf.c diff --git a/tools/sched_ext/scx_example_central.c b/tools/sched_ext/scx_central.c similarity index 91% rename from tools/sched_ext/scx_example_central.c rename to tools/sched_ext/scx_central.c index 7ad591cbdc65c4..7481d3c9123a84 100644 --- a/tools/sched_ext/scx_example_central.c +++ b/tools/sched_ext/scx_central.c @@ -12,7 +12,7 @@ #include #include #include "user_exit_info.h" -#include "scx_example_central.skel.h" +#include "scx_central.skel.h" const char help_fmt[] = "A central FIFO sched_ext scheduler.\n" @@ -34,7 +34,7 @@ static void sigint_handler(int dummy) int main(int argc, char **argv) { - struct scx_example_central *skel; + struct scx_central *skel; struct bpf_link *link; u64 seq = 0; s32 opt; @@ -44,7 +44,7 @@ int main(int argc, char **argv) libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - skel = scx_example_central__open(); + skel = scx_central__open(); assert(skel); skel->rodata->central_cpu = 0; @@ -64,7 +64,7 @@ int main(int argc, char **argv) } } - assert(!scx_example_central__load(skel)); + assert(!scx_central__load(skel)); link = bpf_map__attach_struct_ops(skel->maps.central_ops); assert(link); @@ -89,6 +89,6 @@ int main(int argc, char **argv) bpf_link__destroy(link); uei_print(&skel->bss->uei); - scx_example_central__destroy(skel); + scx_central__destroy(skel); return 0; } diff --git a/tools/sched_ext/scx_example_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c similarity index 99% rename from tools/sched_ext/scx_example_flatcg.bpf.c rename to tools/sched_ext/scx_flatcg.bpf.c index e79f941d588d9d..6d8c6f396577a2 100644 --- a/tools/sched_ext/scx_example_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -45,7 +45,7 @@ */ #include "scx_common.bpf.h" #include "user_exit_info.h" -#include "scx_example_flatcg.h" +#include "scx_flatcg.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/sched_ext/scx_example_flatcg.c b/tools/sched_ext/scx_flatcg.c similarity index 95% rename from tools/sched_ext/scx_example_flatcg.c rename to tools/sched_ext/scx_flatcg.c index f9c8a5b84a7034..40aa464c55b1a5 100644 --- a/tools/sched_ext/scx_example_flatcg.c +++ b/tools/sched_ext/scx_flatcg.c @@ -14,8 +14,8 @@ #include #include #include "user_exit_info.h" -#include "scx_example_flatcg.h" -#include "scx_example_flatcg.skel.h" +#include "scx_flatcg.h" +#include "scx_flatcg.skel.h" #ifndef FILEID_KERNFS #define FILEID_KERNFS 0xfe @@ -91,7 +91,7 @@ static float read_cpu_util(__u64 *last_sum, __u64 *last_idle) return delta_sum ? (float)(delta_sum - delta_idle) / delta_sum : 0.0; } -static void fcg_read_stats(struct scx_example_flatcg *skel, __u64 *stats) +static void fcg_read_stats(struct scx_flatcg *skel, __u64 *stats) { __u64 cnts[FCG_NR_STATS][skel->rodata->nr_cpus]; __u32 idx; @@ -112,7 +112,7 @@ static void fcg_read_stats(struct scx_example_flatcg *skel, __u64 *stats) int main(int argc, char **argv) { - struct scx_example_flatcg *skel; + struct scx_flatcg *skel; struct bpf_link *link; struct timespec intv_ts = { .tv_sec = 2, .tv_nsec = 0 }; bool dump_cgrps = false; @@ -126,7 +126,7 @@ int main(int argc, char **argv) libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - skel = scx_example_flatcg__open(); + skel = scx_flatcg__open(); if (!skel) { fprintf(stderr, "Failed to open: %s\n", strerror(errno)); return 1; @@ -168,7 +168,7 @@ int main(int argc, char **argv) (double)intv_ts.tv_sec + (double)intv_ts.tv_nsec / 1000000000.0, dump_cgrps); - if (scx_example_flatcg__load(skel)) { + if (scx_flatcg__load(skel)) { fprintf(stderr, "Failed to load: %s\n", strerror(errno)); return 1; } @@ -227,6 +227,6 @@ int main(int argc, char **argv) bpf_link__destroy(link); uei_print(&skel->bss->uei); - scx_example_flatcg__destroy(skel); + scx_flatcg__destroy(skel); return 0; } diff --git a/tools/sched_ext/scx_example_flatcg.h b/tools/sched_ext/scx_flatcg.h similarity index 100% rename from tools/sched_ext/scx_example_flatcg.h rename to tools/sched_ext/scx_flatcg.h diff --git a/tools/sched_ext/scx_example_pair.bpf.c b/tools/sched_ext/scx_pair.bpf.c similarity index 99% rename from tools/sched_ext/scx_example_pair.bpf.c rename to tools/sched_ext/scx_pair.bpf.c index 078bdd94c98773..cda126980ed51a 100644 --- a/tools/sched_ext/scx_example_pair.bpf.c +++ b/tools/sched_ext/scx_pair.bpf.c @@ -116,7 +116,7 @@ * Copyright (c) 2022 David Vernet */ #include "scx_common.bpf.h" -#include "scx_example_pair.h" +#include "scx_pair.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/sched_ext/scx_example_pair.c b/tools/sched_ext/scx_pair.c similarity index 94% rename from tools/sched_ext/scx_example_pair.c rename to tools/sched_ext/scx_pair.c index 18e032bbc173b7..b35e4f511de6dd 100644 --- a/tools/sched_ext/scx_example_pair.c +++ b/tools/sched_ext/scx_pair.c @@ -12,8 +12,8 @@ #include #include #include "user_exit_info.h" -#include "scx_example_pair.h" -#include "scx_example_pair.skel.h" +#include "scx_pair.h" +#include "scx_pair.skel.h" const char help_fmt[] = "A demo sched_ext core-scheduler which always makes every sibling CPU pair\n" @@ -36,7 +36,7 @@ static void sigint_handler(int dummy) int main(int argc, char **argv) { - struct scx_example_pair *skel; + struct scx_pair *skel; struct bpf_link *link; u64 seq = 0; s32 stride, i, opt, outer_fd; @@ -46,7 +46,7 @@ int main(int argc, char **argv) libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - skel = scx_example_pair__open(); + skel = scx_pair__open(); assert(skel); skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus(); @@ -79,7 +79,7 @@ int main(int argc, char **argv) } } - assert(!scx_example_pair__load(skel)); + assert(!scx_pair__load(skel)); /* * Populate the cgrp_q_arr map which is an array containing per-cgroup @@ -138,6 +138,6 @@ int main(int argc, char **argv) bpf_link__destroy(link); uei_print(&skel->bss->uei); - scx_example_pair__destroy(skel); + scx_pair__destroy(skel); return 0; } diff --git a/tools/sched_ext/scx_example_pair.h b/tools/sched_ext/scx_pair.h similarity index 100% rename from tools/sched_ext/scx_example_pair.h rename to tools/sched_ext/scx_pair.h diff --git a/tools/sched_ext/scx_example_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c similarity index 100% rename from tools/sched_ext/scx_example_qmap.bpf.c rename to tools/sched_ext/scx_qmap.bpf.c diff --git a/tools/sched_ext/scx_example_qmap.c b/tools/sched_ext/scx_qmap.c similarity index 94% rename from tools/sched_ext/scx_example_qmap.c rename to tools/sched_ext/scx_qmap.c index ccb4814ee61ba9..0a02aa166b4788 100644 --- a/tools/sched_ext/scx_example_qmap.c +++ b/tools/sched_ext/scx_qmap.c @@ -13,7 +13,7 @@ #include #include #include "user_exit_info.h" -#include "scx_example_qmap.skel.h" +#include "scx_qmap.skel.h" const char help_fmt[] = "A simple five-level FIFO queue sched_ext scheduler.\n" @@ -40,7 +40,7 @@ static void sigint_handler(int dummy) int main(int argc, char **argv) { - struct scx_example_qmap *skel; + struct scx_qmap *skel; struct bpf_link *link; int opt; @@ -49,7 +49,7 @@ int main(int argc, char **argv) libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - skel = scx_example_qmap__open(); + skel = scx_qmap__open(); assert(skel); while ((opt = getopt(argc, argv, "s:e:t:T:l:d:ph")) != -1) { @@ -83,7 +83,7 @@ int main(int argc, char **argv) } } - assert(!scx_example_qmap__load(skel)); + assert(!scx_qmap__load(skel)); link = bpf_map__attach_struct_ops(skel->maps.qmap_ops); assert(link); @@ -102,6 +102,6 @@ int main(int argc, char **argv) bpf_link__destroy(link); uei_print(&skel->bss->uei); - scx_example_qmap__destroy(skel); + scx_qmap__destroy(skel); return 0; } diff --git a/tools/sched_ext/scx_example_simple.bpf.c b/tools/sched_ext/scx_simple.bpf.c similarity index 100% rename from tools/sched_ext/scx_example_simple.bpf.c rename to tools/sched_ext/scx_simple.bpf.c diff --git a/tools/sched_ext/scx_example_simple.c b/tools/sched_ext/scx_simple.c similarity index 88% rename from tools/sched_ext/scx_example_simple.c rename to tools/sched_ext/scx_simple.c index 486b401f7c9510..4b2f0c16a9d1e0 100644 --- a/tools/sched_ext/scx_example_simple.c +++ b/tools/sched_ext/scx_simple.c @@ -12,7 +12,7 @@ #include #include #include "user_exit_info.h" -#include "scx_example_simple.skel.h" +#include "scx_simple.skel.h" const char help_fmt[] = "A simple sched_ext scheduler.\n" @@ -32,7 +32,7 @@ static void sigint_handler(int simple) exit_req = 1; } -static void read_stats(struct scx_example_simple *skel, u64 *stats) +static void read_stats(struct scx_simple *skel, u64 *stats) { int nr_cpus = libbpf_num_possible_cpus(); u64 cnts[2][nr_cpus]; @@ -54,7 +54,7 @@ static void read_stats(struct scx_example_simple *skel, u64 *stats) int main(int argc, char **argv) { - struct scx_example_simple *skel; + struct scx_simple *skel; struct bpf_link *link; u32 opt; @@ -63,7 +63,7 @@ int main(int argc, char **argv) libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - skel = scx_example_simple__open(); + skel = scx_simple__open(); assert(skel); while ((opt = getopt(argc, argv, "fph")) != -1) { @@ -80,7 +80,7 @@ int main(int argc, char **argv) } } - assert(!scx_example_simple__load(skel)); + assert(!scx_simple__load(skel)); link = bpf_map__attach_struct_ops(skel->maps.simple_ops); assert(link); @@ -96,6 +96,6 @@ int main(int argc, char **argv) bpf_link__destroy(link); uei_print(&skel->bss->uei); - scx_example_simple__destroy(skel); + scx_simple__destroy(skel); return 0; } diff --git a/tools/sched_ext/scx_example_userland.bpf.c b/tools/sched_ext/scx_userland.bpf.c similarity index 99% rename from tools/sched_ext/scx_example_userland.bpf.c rename to tools/sched_ext/scx_userland.bpf.c index b62cce0b54e1b6..9e107a874a92d4 100644 --- a/tools/sched_ext/scx_example_userland.bpf.c +++ b/tools/sched_ext/scx_userland.bpf.c @@ -22,7 +22,7 @@ */ #include #include "scx_common.bpf.h" -#include "scx_example_userland_common.h" +#include "scx_userland.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/sched_ext/scx_example_userland.c b/tools/sched_ext/scx_userland.c similarity index 97% rename from tools/sched_ext/scx_example_userland.c rename to tools/sched_ext/scx_userland.c index 4152b1e65fe1a0..a63adae74f21f2 100644 --- a/tools/sched_ext/scx_example_userland.c +++ b/tools/sched_ext/scx_userland.c @@ -29,8 +29,8 @@ #include #include "user_exit_info.h" -#include "scx_example_userland_common.h" -#include "scx_example_userland.skel.h" +#include "scx_userland.h" +#include "scx_userland.skel.h" const char help_fmt[] = "A minimal userland sched_ext scheduler.\n" @@ -52,7 +52,7 @@ static __u32 batch_size = 8; static volatile int exit_req; static int enqueued_fd, dispatched_fd; -static struct scx_example_userland *skel; +static struct scx_userland *skel; static struct bpf_link *ops_link; /* Stats collected in user space. */ @@ -316,7 +316,7 @@ static int bootstrap(int argc, char **argv) return err; } - skel = scx_example_userland__open(); + skel = scx_userland__open(); if (!skel) { fprintf(stderr, "Failed to open scheduler: %s\n", strerror(errno)); return errno; @@ -327,7 +327,7 @@ static int bootstrap(int argc, char **argv) assert(skel->rodata->usersched_pid > 0); skel->rodata->switch_partial = switch_partial; - err = scx_example_userland__load(skel); + err = scx_userland__load(skel); if (err) { fprintf(stderr, "Failed to load scheduler: %s\n", strerror(err)); goto destroy_skel; @@ -354,7 +354,7 @@ static int bootstrap(int argc, char **argv) return 0; destroy_skel: - scx_example_userland__destroy(skel); + scx_userland__destroy(skel); exit_req = 1; return err; } @@ -397,6 +397,6 @@ int main(int argc, char **argv) exit_req = 1; bpf_link__destroy(ops_link); uei_print(&skel->bss->uei); - scx_example_userland__destroy(skel); + scx_userland__destroy(skel); return 0; } diff --git a/tools/sched_ext/scx_example_userland_common.h b/tools/sched_ext/scx_userland.h similarity index 100% rename from tools/sched_ext/scx_example_userland_common.h rename to tools/sched_ext/scx_userland.h From 6bd10f3beddbe1f1377e71ebdde856000a4bfb13 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 22 Jun 2023 15:40:07 -1000 Subject: [PATCH 2/6] SCX: A couple cosmetic updates --- kernel/sched/ext.c | 5 ++--- kernel/sched/ext.h | 10 +++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 6b762bf2fa51c6..7da59604b00240 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3723,9 +3723,8 @@ __diag_ignore_all("-Wmissing-prototypes", /** * scx_bpf_switch_all - Switch all tasks into SCX * - * Switch all existing and future non-dl/rt tasks to SCX. - * This can only be called from ops.init(), and actual switching - * is performed asynchronously. + * Switch all existing and future non-dl/rt tasks to SCX. This can only be + * called from ops.init(), and actual switching is performed asynchronously. */ void scx_bpf_switch_all(void) { diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index 7e2900b8f6f21c..5001bcfec570c0 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -81,9 +81,8 @@ enum scx_deq_flags { SCX_DEQ_CORE_SCHED_EXEC = 1LLU << 32, }; -enum scx_tg_flags { - SCX_TG_ONLINE = 1U << 0, - SCX_TG_INITED = 1U << 1, +enum scx_pick_idle_cpu_flags { + SCX_PICK_IDLE_CORE = 1LLU << 0, /* pick a CPU whose SMT siblings are also idle */ }; enum scx_kick_flags { @@ -91,8 +90,9 @@ enum scx_kick_flags { SCX_KICK_WAIT = 1LLU << 1, /* wait for the CPU to be rescheduled */ }; -enum scx_pick_idle_cpu_flags { - SCX_PICK_IDLE_CORE = 1LLU << 0, /* pick a CPU whose SMT siblings are also idle */ +enum scx_tg_flags { + SCX_TG_ONLINE = 1U << 0, + SCX_TG_INITED = 1U << 1, }; #ifdef CONFIG_SCHED_CLASS_EXT From 3bce5ab15ff327a0b8ef7a7cb83cfe969d85c421 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 22 Jun 2023 16:50:27 -1000 Subject: [PATCH 3/6] sched, SCX: Drop SCHED_CHANGE_BLOCK Upstream is adopting a generic guard block mechanism. Let's drop SCHED_CHANGE_BLOCK from SCX patchset so that it's easier to adapt to the new mechanism later. No functional change intended. --- kernel/sched/core.c | 264 +++++++++++++++++++++++++++---------------- kernel/sched/ext.c | 41 ++++--- kernel/sched/ext.h | 11 ++ kernel/sched/sched.h | 41 ------- 4 files changed, 202 insertions(+), 155 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3ff57b0b2bc844..98236b1bec4987 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2132,40 +2132,6 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags) dequeue_task(rq, p, flags); } -struct sched_change_guard -sched_change_guard_init(struct rq *rq, struct task_struct *p, int flags) -{ - struct sched_change_guard cg = { - .rq = rq, - .p = p, - .queued = task_on_rq_queued(p), - .running = task_current(rq, p), - }; - - if (cg.queued) { - /* - * __kthread_bind() may call this on blocked tasks without - * holding rq->lock through __do_set_cpus_allowed(). Assert @rq - * locked iff @p is queued. - */ - lockdep_assert_rq_held(rq); - dequeue_task(rq, p, flags); - } - if (cg.running) - put_prev_task(rq, p); - - return cg; -} - -void sched_change_guard_fini(struct sched_change_guard *cg, int flags) -{ - if (cg->queued) - enqueue_task(cg->rq, cg->p, flags | ENQUEUE_NOCLOCK); - if (cg->running) - set_next_task(cg->rq, cg->p); - cg->done = true; -} - static inline int __normal_prio(int policy, int rt_prio, int nice) { int prio; @@ -2635,6 +2601,7 @@ static void __do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx) { struct rq *rq = task_rq(p); + bool queued, running; /* * This here violates the locking rules for affinity, since we're only @@ -2653,9 +2620,26 @@ __do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx) else lockdep_assert_held(&p->pi_lock); - SCHED_CHANGE_BLOCK(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK) { - p->sched_class->set_cpus_allowed(p, ctx); + queued = task_on_rq_queued(p); + running = task_current(rq, p); + + if (queued) { + /* + * Because __kthread_bind() calls this on blocked tasks without + * holding rq->lock. + */ + lockdep_assert_rq_held(rq); + dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK); } + if (running) + put_prev_task(rq, p); + + p->sched_class->set_cpus_allowed(p, ctx); + + if (queued) + enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); + if (running) + set_next_task(rq, p); } /* @@ -7132,7 +7116,7 @@ static inline int rt_effective_prio(struct task_struct *p, int prio) */ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) { - int prio, oldprio, queue_flag = + int prio, oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; const struct sched_class *prev_class; struct rq_flags rf; @@ -7192,41 +7176,51 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) queue_flag &= ~DEQUEUE_MOVE; prev_class = p->sched_class; - SCHED_CHANGE_BLOCK(rq, p, queue_flag) { - /* - * Boosting condition are: - * 1. -rt task is running and holds mutex A - * --> -dl task blocks on mutex A - * - * 2. -dl task is running and holds mutex A - * --> -dl task blocks on mutex A and could preempt the - * running task - */ - if (dl_prio(prio)) { - if (!dl_prio(p->normal_prio) || - (pi_task && dl_prio(pi_task->prio) && - dl_entity_preempt(&pi_task->dl, &p->dl))) { - p->dl.pi_se = pi_task->dl.pi_se; - queue_flag |= ENQUEUE_REPLENISH; - } else { - p->dl.pi_se = &p->dl; - } - } else if (rt_prio(prio)) { - if (dl_prio(oldprio)) - p->dl.pi_se = &p->dl; - if (oldprio < prio) - queue_flag |= ENQUEUE_HEAD; + queued = task_on_rq_queued(p); + running = task_current(rq, p); + if (queued) + dequeue_task(rq, p, queue_flag); + if (running) + put_prev_task(rq, p); + + /* + * Boosting condition are: + * 1. -rt task is running and holds mutex A + * --> -dl task blocks on mutex A + * + * 2. -dl task is running and holds mutex A + * --> -dl task blocks on mutex A and could preempt the + * running task + */ + if (dl_prio(prio)) { + if (!dl_prio(p->normal_prio) || + (pi_task && dl_prio(pi_task->prio) && + dl_entity_preempt(&pi_task->dl, &p->dl))) { + p->dl.pi_se = pi_task->dl.pi_se; + queue_flag |= ENQUEUE_REPLENISH; } else { - if (dl_prio(oldprio)) - p->dl.pi_se = &p->dl; - if (rt_prio(oldprio)) - p->rt.timeout = 0; + p->dl.pi_se = &p->dl; } - - __setscheduler_prio(p, prio); - check_class_changing(rq, p, prev_class); + } else if (rt_prio(prio)) { + if (dl_prio(oldprio)) + p->dl.pi_se = &p->dl; + if (oldprio < prio) + queue_flag |= ENQUEUE_HEAD; + } else { + if (dl_prio(oldprio)) + p->dl.pi_se = &p->dl; + if (rt_prio(oldprio)) + p->rt.timeout = 0; } + __setscheduler_prio(p, prio); + check_class_changing(rq, p, prev_class); + + if (queued) + enqueue_task(rq, p, queue_flag); + if (running) + set_next_task(rq, p); + check_class_changed(rq, p, prev_class, oldprio); out_unlock: /* Avoid rq from going away on us: */ @@ -7247,6 +7241,7 @@ static inline int rt_effective_prio(struct task_struct *p, int prio) void set_user_nice(struct task_struct *p, long nice) { + bool queued, running; int old_prio; struct rq_flags rf; struct rq *rq; @@ -7270,13 +7265,22 @@ void set_user_nice(struct task_struct *p, long nice) p->static_prio = NICE_TO_PRIO(nice); goto out_unlock; } + queued = task_on_rq_queued(p); + running = task_current(rq, p); + if (queued) + dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK); + if (running) + put_prev_task(rq, p); - SCHED_CHANGE_BLOCK(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK) { - p->static_prio = NICE_TO_PRIO(nice); - set_load_weight(p, true); - old_prio = p->prio; - p->prio = effective_prio(p); - } + p->static_prio = NICE_TO_PRIO(nice); + set_load_weight(p, true); + old_prio = p->prio; + p->prio = effective_prio(p); + + if (queued) + enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); + if (running) + set_next_task(rq, p); /* * If the task increased its priority or is running and @@ -7660,7 +7664,7 @@ static int __sched_setscheduler(struct task_struct *p, bool user, bool pi) { int oldpolicy = -1, policy = attr->sched_policy; - int retval, oldprio, newprio; + int retval, oldprio, newprio, queued, running; const struct sched_class *prev_class; struct balance_callback *head; struct rq_flags rf; @@ -7829,24 +7833,34 @@ static int __sched_setscheduler(struct task_struct *p, queue_flags &= ~DEQUEUE_MOVE; } - SCHED_CHANGE_BLOCK(rq, p, queue_flags) { - prev_class = p->sched_class; + queued = task_on_rq_queued(p); + running = task_current(rq, p); + if (queued) + dequeue_task(rq, p, queue_flags); + if (running) + put_prev_task(rq, p); - if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { - __setscheduler_params(p, attr); - __setscheduler_prio(p, newprio); - } - __setscheduler_uclamp(p, attr); + prev_class = p->sched_class; - check_class_changing(rq, p, prev_class); + if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { + __setscheduler_params(p, attr); + __setscheduler_prio(p, newprio); + } + __setscheduler_uclamp(p, attr); + check_class_changing(rq, p, prev_class); + if (queued) { /* * We enqueue to tail when the priority of a task is * increased (user space view). */ if (oldprio < p->prio) queue_flags |= ENQUEUE_HEAD; + + enqueue_task(rq, p, queue_flags); } + if (running) + set_next_task(rq, p); check_class_changed(rq, p, prev_class, oldprio); @@ -9419,15 +9433,25 @@ int migrate_task_to(struct task_struct *p, int target_cpu) */ void sched_setnuma(struct task_struct *p, int nid) { + bool queued, running; struct rq_flags rf; struct rq *rq; rq = task_rq_lock(p, &rf); + queued = task_on_rq_queued(p); + running = task_current(rq, p); - SCHED_CHANGE_BLOCK(rq, p, DEQUEUE_SAVE) { - p->numa_preferred_nid = nid; - } + if (queued) + dequeue_task(rq, p, DEQUEUE_SAVE); + if (running) + put_prev_task(rq, p); + p->numa_preferred_nid = nid; + + if (queued) + enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); + if (running) + set_next_task(rq, p); task_rq_unlock(rq, p, &rf); } #endif /* CONFIG_NUMA_BALANCING */ @@ -10534,6 +10558,8 @@ static void sched_change_group(struct task_struct *tsk, struct task_group *group */ void sched_move_task(struct task_struct *tsk) { + int queued, running, queue_flags = + DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; struct task_group *group; struct rq_flags rf; struct rq *rq; @@ -10549,19 +10575,28 @@ void sched_move_task(struct task_struct *tsk) update_rq_clock(rq); - SCHED_CHANGE_BLOCK(rq, tsk, - DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK) { - sched_change_group(tsk, group); - scx_move_task(tsk); - } + running = task_current(rq, tsk); + queued = task_on_rq_queued(tsk); - /* - * After changing group, the running task may have joined a throttled - * one but it's still the running task. Trigger a resched to make sure - * that task can still run. - */ - if (task_current(rq, tsk)) + if (queued) + dequeue_task(rq, tsk, queue_flags); + if (running) + put_prev_task(rq, tsk); + + sched_change_group(tsk, group); + scx_move_task(tsk); + + if (queued) + enqueue_task(rq, tsk, queue_flags); + if (running) { + set_next_task(rq, tsk); + /* + * After changing group, the running task may have joined a + * throttled one but it's still the running task. Trigger a + * resched to make sure that task can still run. + */ resched_curr(rq); + } unlock: task_rq_unlock(rq, tsk, &rf); @@ -12121,3 +12156,38 @@ void sched_mm_cid_fork(struct task_struct *t) t->mm_cid_active = 1; } #endif + +#ifdef CONFIG_SCHED_CLASS_EXT +void sched_deq_and_put_task(struct task_struct *p, int queue_flags, + struct sched_enq_and_set_ctx *ctx) +{ + struct rq *rq = task_rq(p); + + lockdep_assert_rq_held(rq); + + *ctx = (struct sched_enq_and_set_ctx){ + .p = p, + .queue_flags = queue_flags | DEQUEUE_NOCLOCK, + .queued = task_on_rq_queued(p), + .running = task_current(rq, p), + }; + + update_rq_clock(rq); + if (ctx->queued) + dequeue_task(rq, p, queue_flags); + if (ctx->running) + put_prev_task(rq, p); +} + +void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx) +{ + struct rq *rq = task_rq(ctx->p); + + lockdep_assert_rq_held(rq); + + if (ctx->queued) + enqueue_task(rq, ctx->p, ctx->queue_flags); + if (ctx->running) + set_next_task(rq, ctx->p); +} +#endif /* CONFIG_SCHED_CLASS_EXT */ diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 7da59604b00240..7906ba5e1564b1 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2984,10 +2984,11 @@ static void scx_ops_disable_workfn(struct kthread_work *work) scx_task_iter_init(&sti); while ((p = scx_task_iter_next_filtered_locked(&sti))) { if (READ_ONCE(p->__state) != TASK_DEAD) { - SCHED_CHANGE_BLOCK(task_rq(p), p, - DEQUEUE_SAVE | DEQUEUE_MOVE) { - /* cycling deq/enq is enough, see above */ - } + struct sched_enq_and_set_ctx ctx; + + /* cycling deq/enq is enough, see above */ + sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx); + sched_enq_and_set_task(&ctx); } } scx_task_iter_exit(&sti); @@ -3018,18 +3019,21 @@ static void scx_ops_disable_workfn(struct kthread_work *work) while ((p = scx_task_iter_next_filtered_locked(&sti))) { const struct sched_class *old_class = p->sched_class; struct rq *rq = task_rq(p); + struct sched_enq_and_set_ctx ctx; bool alive = READ_ONCE(p->__state) != TASK_DEAD; update_rq_clock(rq); - SCHED_CHANGE_BLOCK(rq, p, DEQUEUE_SAVE | DEQUEUE_MOVE | - DEQUEUE_NOCLOCK) { - p->scx.slice = min_t(u64, p->scx.slice, SCX_SLICE_DFL); + sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE | + DEQUEUE_NOCLOCK, &ctx); - __setscheduler_prio(p, p->prio); - if (alive) - check_class_changing(task_rq(p), p, old_class); - } + p->scx.slice = min_t(u64, p->scx.slice, SCX_SLICE_DFL); + + __setscheduler_prio(p, p->prio); + if (alive) + check_class_changing(task_rq(p), p, old_class); + + sched_enq_and_set_task(&ctx); if (alive) check_class_changed(task_rq(p), p, old_class, p->prio); @@ -3333,15 +3337,18 @@ static int scx_ops_enable(struct sched_ext_ops *ops) if (READ_ONCE(p->__state) != TASK_DEAD) { const struct sched_class *old_class = p->sched_class; struct rq *rq = task_rq(p); + struct sched_enq_and_set_ctx ctx; update_rq_clock(rq); - SCHED_CHANGE_BLOCK(rq, p, DEQUEUE_SAVE | DEQUEUE_MOVE | - DEQUEUE_NOCLOCK) { - scx_ops_enable_task(p); - __setscheduler_prio(p, p->prio); - check_class_changing(task_rq(p), p, old_class); - } + sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE | + DEQUEUE_NOCLOCK, &ctx); + + scx_ops_enable_task(p); + __setscheduler_prio(p, p->prio); + check_class_changing(task_rq(p), p, old_class); + + sched_enq_and_set_task(&ctx); check_class_changed(task_rq(p), p, old_class, p->prio); } else { diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index 5001bcfec570c0..405037a4e6ce71 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -97,6 +97,17 @@ enum scx_tg_flags { #ifdef CONFIG_SCHED_CLASS_EXT +struct sched_enq_and_set_ctx { + struct task_struct *p; + int queue_flags; + bool queued; + bool running; +}; + +void sched_deq_and_put_task(struct task_struct *p, int queue_flags, + struct sched_enq_and_set_ctx *ctx); +void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx); + extern const struct sched_class ext_sched_class; extern const struct bpf_verifier_ops bpf_sched_ext_verifier_ops; extern const struct file_operations sched_ext_fops; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6d2876a981c52f..e7b15bd7adbc24 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2527,47 +2527,6 @@ static inline void sub_nr_running(struct rq *rq, unsigned count) extern void activate_task(struct rq *rq, struct task_struct *p, int flags); extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags); -struct sched_change_guard { - struct task_struct *p; - struct rq *rq; - bool queued; - bool running; - bool done; -}; - -extern struct sched_change_guard -sched_change_guard_init(struct rq *rq, struct task_struct *p, int flags); - -extern void sched_change_guard_fini(struct sched_change_guard *cg, int flags); - -/** - * SCHED_CHANGE_BLOCK - Nested block for task attribute updates - * @__rq: Runqueue the target task belongs to - * @__p: Target task - * @__flags: DEQUEUE/ENQUEUE_* flags - * - * A task may need to be dequeued and put_prev_task'd for attribute updates and - * set_next_task'd and re-enqueued afterwards. This helper defines a nested - * block which automatically handles these preparation and cleanup operations. - * - * SCHED_CHANGE_BLOCK(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK) { - * update_attribute(p); - * ... - * } - * - * If @__flags is a variable, the variable may be updated in the block body and - * the updated value will be used when re-enqueueing @p. - * - * If %DEQUEUE_NOCLOCK is specified, the caller is responsible for calling - * update_rq_clock() beforehand. Otherwise, the rq clock is automatically - * updated iff the task needs to be dequeued and re-enqueued. Only the former - * case guarantees that the rq clock is up-to-date inside and after the block. - */ -#define SCHED_CHANGE_BLOCK(__rq, __p, __flags) \ - for (struct sched_change_guard __cg = \ - sched_change_guard_init(__rq, __p, __flags); \ - !__cg.done; sched_change_guard_fini(&__cg, __flags)) - extern void check_class_changing(struct rq *rq, struct task_struct *p, const struct sched_class *prev_class); extern void check_class_changed(struct rq *rq, struct task_struct *p, From 97eb64719aef365adb91ed79b7d657cfef1ed73b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 23 Jun 2023 10:48:24 -1000 Subject: [PATCH 4/6] SCX: Drop implicit DEQUEUE_NOCLOCK from sched_deq_and_put_task() This snuck in while forward porting. The current users of deq_and_put manage DEQUEUE_NOCLOCK themselves and it shouldn't be set implicitly. --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 98236b1bec4987..4b8245b223ab0d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -12167,7 +12167,7 @@ void sched_deq_and_put_task(struct task_struct *p, int queue_flags, *ctx = (struct sched_enq_and_set_ctx){ .p = p, - .queue_flags = queue_flags | DEQUEUE_NOCLOCK, + .queue_flags = queue_flags, .queued = task_on_rq_queued(p), .running = task_current(rq, p), }; From 8460dc07829a936ef37eeea2f15f65237b34bdd2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 23 Jun 2023 11:53:04 -1000 Subject: [PATCH 5/6] SCX: Build fixes when !CONFIG_SCHED_SMT or !CONFIG_CGROUP_SCHED --- kernel/sched/ext.c | 54 ++++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 7906ba5e1564b1..6d524ffe37d259 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1599,6 +1599,7 @@ static int balance_scx(struct rq *rq, struct task_struct *prev, ret = balance_one(rq, prev, rf, true); +#ifdef CONFIG_SCHED_SMT /* * When core-sched is enabled, this ops.balance() call will be followed * by put_prev_scx() and pick_task_scx() on this CPU and pick_task_scx() @@ -1629,7 +1630,7 @@ static int balance_scx(struct rq *rq, struct task_struct *prev, rq_repin_lock(rq, rf); } } - +#endif return ret; } @@ -1925,13 +1926,14 @@ void __scx_notify_pick_next_task(struct rq *rq, struct task_struct *task, static bool test_and_clear_cpu_idle(int cpu) { +#ifdef CONFIG_SCHED_SMT /* * SMT mask should be cleared whether we can claim @cpu or not. The SMT * cluster is not wholly idle either way. This also prevents * scx_pick_idle_cpu() from getting caught in an infinite loop. */ if (sched_smt_active()) { - const struct cpumask *sbm = topology_sibling_cpumask(cpu); + const struct cpumask *smt = cpu_smt_mask(cpu); /* * If offline, @cpu is not its own sibling and @@ -1939,12 +1941,12 @@ static bool test_and_clear_cpu_idle(int cpu) * @cpu is never cleared from idle_masks.smt. Ensure that @cpu * is eventually cleared. */ - if (cpumask_intersects(sbm, idle_masks.smt)) - cpumask_andnot(idle_masks.smt, idle_masks.smt, sbm); + if (cpumask_intersects(smt, idle_masks.smt)) + cpumask_andnot(idle_masks.smt, idle_masks.smt, smt); else if (cpumask_test_cpu(cpu, idle_masks.smt)) __cpumask_clear_cpu(cpu, idle_masks.smt); } - +#endif return cpumask_test_and_clear_cpu(cpu, idle_masks.cpu); } @@ -2076,7 +2078,6 @@ static void reset_idle_masks(void) void __scx_update_idle(struct rq *rq, bool idle) { int cpu = cpu_of(rq); - struct cpumask *sib_mask = topology_sibling_cpumask(cpu); if (SCX_HAS_OP(update_idle)) { SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle); @@ -2084,22 +2085,30 @@ void __scx_update_idle(struct rq *rq, bool idle) return; } - if (idle) { + if (idle) cpumask_set_cpu(cpu, idle_masks.cpu); + else + cpumask_clear_cpu(cpu, idle_masks.cpu); - /* - * idle_masks.smt handling is racy but that's fine as it's only - * for optimization and self-correcting. - */ - for_each_cpu(cpu, sib_mask) { - if (!cpumask_test_cpu(cpu, idle_masks.cpu)) - return; +#ifdef CONFIG_SCHED_SMT + if (sched_smt_active()) { + const struct cpumask *smt = cpu_smt_mask(cpu); + + if (idle) { + /* + * idle_masks.smt handling is racy but that's fine as + * it's only for optimization and self-correcting. + */ + for_each_cpu(cpu, smt) { + if (!cpumask_test_cpu(cpu, idle_masks.cpu)) + return; + } + cpumask_or(idle_masks.smt, idle_masks.smt, smt); + } else { + cpumask_andnot(idle_masks.smt, idle_masks.smt, smt); } - cpumask_or(idle_masks.smt, idle_masks.smt, sib_mask); - } else { - cpumask_clear_cpu(cpu, idle_masks.cpu); - cpumask_andnot(idle_masks.smt, idle_masks.smt, sib_mask); } +#endif } static void rq_online_scx(struct rq *rq, enum rq_onoff_reason reason) @@ -4212,7 +4221,10 @@ const struct cpumask *scx_bpf_get_idle_smtmask(void) } #ifdef CONFIG_SMP - return idle_masks.smt; + if (sched_smt_active()) + return idle_masks.smt; + else + return idle_masks.cpu; #else return cpu_none_mask; #endif @@ -4336,6 +4348,7 @@ s32 scx_bpf_task_cpu(const struct task_struct *p) * rq-locked operations. Can be called on the parameter tasks of rq-locked * operations. The restriction guarantees that @p's rq is locked by the caller. */ +#ifdef CONFIG_CGROUP_SCHED struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) { struct task_group *tg = p->sched_task_group; @@ -4357,6 +4370,7 @@ struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) cgroup_get(cgrp); return cgrp; } +#endif BTF_SET8_START(scx_kfunc_ids_any) BTF_ID_FLAGS(func, scx_bpf_kick_cpu) @@ -4371,7 +4385,9 @@ BTF_ID_FLAGS(func, scx_bpf_error_bstr, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, scx_bpf_destroy_dsq) BTF_ID_FLAGS(func, scx_bpf_task_running, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_task_cpu, KF_RCU) +#ifdef CONFIG_CGROUP_SCHED BTF_ID_FLAGS(func, scx_bpf_task_cgroup, KF_RCU | KF_ACQUIRE) +#endif BTF_SET8_END(scx_kfunc_ids_any) static const struct btf_kfunc_id_set scx_kfunc_set_any = { From d0a8cea8900ecca6fb2e259ca2b02393e3681807 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 26 Jun 2023 14:10:58 -1000 Subject: [PATCH 6/6] SCX: Always imply NOCLOCK in sched_deq_and_put_task/sched_enq_and_set_task() SCHED_CHANGE_GUARD() was used in other places where avoiding clock updates may be a useful optimization. However, sched_deq_and_put_task() and sched_enq_and_set_task() pair are only used by SCX switching paths which want the clock always updated. The recent conversion away from SCHED_CHANGE_GUARD() left out ENQUEUE_NOCLOCK when re-enqueueing which could lead to double rq update warnings. Let's restore the original behavior where sched_deq_and_put_task() always calls rq_update_clock() and the deq and enq calls implicitly set the NOCLOCK flags. --- kernel/sched/core.c | 4 ++-- kernel/sched/ext.c | 13 +++---------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4b8245b223ab0d..9128160b6264bd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -12174,7 +12174,7 @@ void sched_deq_and_put_task(struct task_struct *p, int queue_flags, update_rq_clock(rq); if (ctx->queued) - dequeue_task(rq, p, queue_flags); + dequeue_task(rq, p, queue_flags | DEQUEUE_NOCLOCK); if (ctx->running) put_prev_task(rq, p); } @@ -12186,7 +12186,7 @@ void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx) lockdep_assert_rq_held(rq); if (ctx->queued) - enqueue_task(rq, ctx->p, ctx->queue_flags); + enqueue_task(rq, ctx->p, ctx->queue_flags | ENQUEUE_NOCLOCK); if (ctx->running) set_next_task(rq, ctx->p); } diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 6d524ffe37d259..0db6d400d340e6 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3027,14 +3027,10 @@ static void scx_ops_disable_workfn(struct kthread_work *work) scx_task_iter_init(&sti); while ((p = scx_task_iter_next_filtered_locked(&sti))) { const struct sched_class *old_class = p->sched_class; - struct rq *rq = task_rq(p); struct sched_enq_and_set_ctx ctx; bool alive = READ_ONCE(p->__state) != TASK_DEAD; - update_rq_clock(rq); - - sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE | - DEQUEUE_NOCLOCK, &ctx); + sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx); p->scx.slice = min_t(u64, p->scx.slice, SCX_SLICE_DFL); @@ -3345,13 +3341,10 @@ static int scx_ops_enable(struct sched_ext_ops *ops) while ((p = scx_task_iter_next_filtered_locked(&sti))) { if (READ_ONCE(p->__state) != TASK_DEAD) { const struct sched_class *old_class = p->sched_class; - struct rq *rq = task_rq(p); struct sched_enq_and_set_ctx ctx; - update_rq_clock(rq); - - sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE | - DEQUEUE_NOCLOCK, &ctx); + sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, + &ctx); scx_ops_enable_task(p); __setscheduler_prio(p, p->prio);