Skip to content

Commit

Permalink
Merge pull request torvalds#104 from sched-ext/select_cpu_dfl
Browse files Browse the repository at this point in the history
Allow dispatching from ops.select_cpu()
  • Loading branch information
Byte-Lab authored Jan 4, 2024
2 parents 79d694e + 9fd2c3b commit d788214
Show file tree
Hide file tree
Showing 23 changed files with 938 additions and 75 deletions.
67 changes: 57 additions & 10 deletions Documentation/scheduler/sched-ext.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,57 @@ optional. The following modified excerpt is from

.. code-block:: c
s32 BPF_STRUCT_OPS(simple_init)
/*
* Decide which CPU a task should be migrated to before being
* enqueued (either at wakeup, fork time, or exec time). If an
* idle core is found by the default ops.select_cpu() implementation,
* then dispatch the task directly to SCX_DSQ_LOCAL and skip the
* ops.enqueue() callback.
*
* Note that this implemenation has exactly the same behavior as the
* default ops.select_cpu implementation. The behavior of the scheduler
* would be exactly same if the implementation just didn't define the
* simple_select_cpu() struct_ops prog.
*/
s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p,
s32 prev_cpu, u64 wake_flags)
{
if (!switch_partial)
scx_bpf_switch_all();
return 0;
s32 cpu;
/* Need to initialize or the BPF verifier will reject the program */
bool direct = false;
cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &direct);
if (direct)
scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
return cpu;
}
/*
* Do a direct dispatch of a task to the global DSQ. This ops.enqueue()
* callback will only be invoked if we failed to find a core to dispatch
* to in ops.select_cpu() above.
*
* Note that this implemenation has exactly the same behavior as the
* default ops.enqueue implementation, which just dispatches the task
* to SCX_DSQ_GLOBAL. The behavior of the scheduler would be exactly same
* if the implementation just didn't define the simple_enqueue struct_ops
* prog.
*/
void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags)
{
if (enq_flags & SCX_ENQ_LOCAL)
scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, enq_flags);
else
scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
}
s32 BPF_STRUCT_OPS(simple_init)
{
/*
* All SCHED_OTHER, SCHED_IDLE, and SCHED_BATCH tasks should
* use sched_ext.
*/
scx_bpf_switch_all();
return 0;
}
void BPF_STRUCT_OPS(simple_exit, struct scx_exit_info *ei)
Expand All @@ -99,6 +137,7 @@ optional. The following modified excerpt is from
SEC(".struct_ops")
struct sched_ext_ops simple_ops = {
.select_cpu = (void *)simple_select_cpu,
.enqueue = (void *)simple_enqueue,
.init = (void *)simple_init,
.exit = (void *)simple_exit,
Expand Down Expand Up @@ -142,11 +181,19 @@ The following briefly shows how a waking task is scheduled and executed.
scheduler can wake up any cpu using the ``scx_bpf_kick_cpu()`` helper,
using ``ops.select_cpu()`` judiciously can be simpler and more efficient.

A task can be immediately dispatched to a DSQ from ``ops.select_cpu()`` by
calling ``scx_bpf_dispatch()``. If the task is dispatched to
``SCX_DSQ_LOCAL`` from ``ops.select_cpu()``, it will be dispatched to the
local DSQ of whichever CPU is returned from ``ops.select_cpu()``.
Additionally, dispatching directly from ``ops.select_cpu()`` will cause the
``ops.enqueue()`` callback to be skipped.

Note that the scheduler core will ignore an invalid CPU selection, for
example, if it's outside the allowed cpumask of the task.

2. Once the target CPU is selected, ``ops.enqueue()`` is invoked. It can
make one of the following decisions:
2. Once the target CPU is selected, ``ops.enqueue()`` is invoked (unless the
task was dispatched directly from ``ops.select_cpu()``). ``ops.enqueue()``
can make one of the following decisions:

* Immediately dispatch the task to either the global or local DSQ by
calling ``scx_bpf_dispatch()`` with ``SCX_DSQ_GLOBAL`` or
Expand Down
25 changes: 20 additions & 5 deletions include/linux/sched/ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,11 @@ struct sched_ext_ops {
* If an idle CPU is returned, the CPU is kicked and will try to
* dispatch. While an explicit custom mechanism can be added,
* select_cpu() serves as the default way to wake up idle CPUs.
*
* @p may be dispatched directly by calling scx_bpf_dispatch(). If @p
* is dispatched, the ops.enqueue() callback will be skipped. Finally,
* if @p is dispatched to SCX_DSQ_LOCAL, it will be dispatched to the
* local DSQ of whatever CPU is returned by this callback.
*/
s32 (*select_cpu)(struct task_struct *p, s32 prev_cpu, u64 wake_flags);

Expand All @@ -196,6 +201,9 @@ struct sched_ext_ops {
* or enqueue on the BPF scheduler. If not directly dispatched, the bpf
* scheduler owns @p and if it fails to dispatch @p, the task will
* stall.
*
* If @p was dispatched from ops.select_cpu(), this callback is
* skipped.
*/
void (*enqueue)(struct task_struct *p, u64 enq_flags);

Expand Down Expand Up @@ -597,7 +605,7 @@ struct scx_dispatch_q {
enum scx_ent_flags {
SCX_TASK_QUEUED = 1 << 0, /* on ext runqueue */
SCX_TASK_BAL_KEEP = 1 << 1, /* balance decided to keep current */
SCX_TASK_ENQ_LOCAL = 1 << 2, /* used by scx_select_cpu_dfl() to set SCX_ENQ_LOCAL */
SCX_TASK_DDSP_PRIQ = 1 << 2, /* task should be enqueued on priq when directly dispatched */

SCX_TASK_OPS_PREPPED = 1 << 8, /* prepared for BPF scheduler enable */
SCX_TASK_OPS_ENABLED = 1 << 9, /* task has BPF scheduler enabled */
Expand Down Expand Up @@ -630,12 +638,13 @@ enum scx_kf_mask {
SCX_KF_CPU_RELEASE = 1 << 2, /* ops.cpu_release() */
/* ops.dequeue (in REST) may be nested inside DISPATCH */
SCX_KF_DISPATCH = 1 << 3, /* ops.dispatch() */
SCX_KF_ENQUEUE = 1 << 4, /* ops.enqueue() */
SCX_KF_REST = 1 << 5, /* other rq-locked operations */
SCX_KF_ENQUEUE = 1 << 4, /* ops.enqueue() and ops.select_cpu() */
SCX_KF_SELECT_CPU = 1 << 5, /* ops.select_cpu() */
SCX_KF_REST = 1 << 6, /* other rq-locked operations */

__SCX_KF_RQ_LOCKED = SCX_KF_CPU_RELEASE | SCX_KF_DISPATCH |
SCX_KF_ENQUEUE | SCX_KF_REST,
__SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_REST,
SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
__SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
};

/*
Expand Down Expand Up @@ -685,6 +694,12 @@ struct sched_ext_entity {
*/
u64 dsq_vtime;

/*
* Used to track when a task has requested a direct dispatch from the
* ops.select_cpu() path.
*/
u64 ddsq_id;

/*
* If set, reject future sched_setscheduler(2) calls updating the policy
* to %SCHED_EXT with -%EACCES.
Expand Down
1 change: 1 addition & 0 deletions init/init_task.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ struct task_struct init_task
.ops_state = ATOMIC_INIT(0),
.runnable_at = INITIAL_JIFFIES,
.slice = SCX_SLICE_DFL,
.ddsq_id = SCX_DSQ_INVALID,
},
#endif
.ptraced = LIST_HEAD_INIT(init_task.ptraced),
Expand Down
1 change: 1 addition & 0 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -4564,6 +4564,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
atomic_long_set(&p->scx.ops_state, 0);
p->scx.runnable_at = INITIAL_JIFFIES;
p->scx.slice = SCX_SLICE_DFL;
p->scx.ddsq_id = SCX_DSQ_INVALID;
#endif

#ifdef CONFIG_PREEMPT_NOTIFIERS
Expand Down
Loading

0 comments on commit d788214

Please sign in to comment.