diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-19 15:55:58 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-19 15:55:58 +0200 |
commit | 2004cef11ea072838f99bd95cefa5c8e45df0847 (patch) | |
tree | d7162235ad3c3985abbb5233657eef0c03819b28 /kernel/sched/sched.h | |
parent | 509d2cd12a10d057fdf72f565b930f9a81140d59 (diff) | |
parent | bc9057da1a220ff2cb6c8885fd5352558aceba2c (diff) | |
download | linux-2004cef11ea072838f99bd95cefa5c8e45df0847.tar.gz linux-2004cef11ea072838f99bd95cefa5c8e45df0847.tar.bz2 linux-2004cef11ea072838f99bd95cefa5c8e45df0847.zip |
Merge tag 'sched-core-2024-09-19' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
- Implement the SCHED_DEADLINE server infrastructure - Daniel Bristot
de Oliveira's last major contribution to the kernel:
"SCHED_DEADLINE servers can help fixing starvation issues of low
priority tasks (e.g., SCHED_OTHER) when higher priority tasks
monopolize CPU cycles. Today we have RT Throttling; DEADLINE
servers should be able to replace and improve that."
(Daniel Bristot de Oliveira, Peter Zijlstra, Joel Fernandes, Youssef
Esmat, Huang Shijie)
- Preparatory changes for sched_ext integration:
- Use set_next_task(.first) where required
- Fix up set_next_task() implementations
- Clean up DL server vs. core sched
- Split up put_prev_task_balance()
- Rework pick_next_task()
- Combine the last put_prev_task() and the first set_next_task()
- Rework dl_server
- Add put_prev_task(.next)
(Peter Zijlstra, with a fix by Tejun Heo)
- Complete the EEVDF transition and refine EEVDF scheduling:
- Implement delayed dequeue
- Allow shorter slices to wakeup-preempt
- Use sched_attr::sched_runtime to set request/slice suggestion
- Document the new feature flags
- Remove unused and duplicate-functionality fields
- Simplify & unify pick_next_task_fair()
- Misc debuggability enhancements
(Peter Zijlstra, with fixes/cleanups by Dietmar Eggemann, Valentin
Schneider and Chuyi Zhou)
- Initialize the vruntime of a new task when it is first enqueued,
resulting in significant decrease in latency of newly woken tasks
(Zhang Qiao)
- Introduce SM_IDLE and an idle re-entry fast-path in __schedule()
(K Prateek Nayak, Peter Zijlstra)
- Clean up and clarify the usage of Clean up usage of rt_task()
(Qais Yousef)
- Preempt SCHED_IDLE entities in strict cgroup hierarchies
(Tianchen Ding)
- Clarify the documentation of time units for deadline scheduler
parameters (Christian Loehle)
- Remove the HZ_BW chicken-bit feature flag introduced a year ago,
the original change seems to be working fine (Phil Auld)
- Misc fixes and cleanups (Chen Yu, Dan Carpenter, Huang Shijie,
Peilin He, Qais Yousefm and Vincent Guittot)
* tag 'sched-core-2024-09-19' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (64 commits)
sched/cpufreq: Use NSEC_PER_MSEC for deadline task
cpufreq/cppc: Use NSEC_PER_MSEC for deadline task
sched/deadline: Clarify nanoseconds in uapi
sched/deadline: Convert schedtool example to chrt
sched/debug: Fix the runnable tasks output
sched: Fix sched_delayed vs sched_core
kernel/sched: Fix util_est accounting for DELAY_DEQUEUE
kthread: Fix task state in kthread worker if being frozen
sched/pelt: Use rq_clock_task() for hw_pressure
sched/fair: Move effective_cpu_util() and effective_cpu_util() in fair.c
sched/core: Introduce SM_IDLE and an idle re-entry fast-path in __schedule()
sched: Add put_prev_task(.next)
sched: Rework dl_server
sched: Combine the last put_prev_task() and the first set_next_task()
sched: Rework pick_next_task()
sched: Split up put_prev_task_balance()
sched: Clean up DL server vs core sched
sched: Fixup set_next_task() implementations
sched: Use set_next_task(.first) where required
sched/fair: Properly deactivate sched_delayed task upon class change
...
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r-- | kernel/sched/sched.h | 101 |
1 files changed, 78 insertions, 23 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 4c36cc680361..3744f16a1293 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -68,6 +68,7 @@ #include <linux/wait_api.h> #include <linux/wait_bit.h> #include <linux/workqueue_api.h> +#include <linux/delayacct.h> #include <trace/events/power.h> #include <trace/events/sched.h> @@ -335,7 +336,7 @@ extern bool __checkparam_dl(const struct sched_attr *attr); extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr); extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); extern int dl_bw_check_overflow(int cpu); - +extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec); /* * SCHED_DEADLINE supports servers (nested scheduling) with the following * interface: @@ -361,7 +362,14 @@ extern void dl_server_start(struct sched_dl_entity *dl_se); extern void dl_server_stop(struct sched_dl_entity *dl_se); extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, dl_server_has_tasks_f has_tasks, - dl_server_pick_f pick); + dl_server_pick_f pick_task); + +extern void dl_server_update_idle_time(struct rq *rq, + struct task_struct *p); +extern void fair_server_init(struct rq *rq); +extern void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq); +extern int dl_server_apply_params(struct sched_dl_entity *dl_se, + u64 runtime, u64 period, bool init); #ifdef CONFIG_CGROUP_SCHED @@ -599,17 +607,12 @@ struct cfs_rq { s64 avg_vruntime; u64 avg_load; - u64 exec_clock; u64 min_vruntime; #ifdef CONFIG_SCHED_CORE unsigned int forceidle_seq; u64 min_vruntime_fi; #endif -#ifndef CONFIG_64BIT - u64 min_vruntime_copy; -#endif - struct rb_root_cached tasks_timeline; /* @@ -619,10 +622,6 @@ struct cfs_rq { struct sched_entity *curr; struct sched_entity *next; -#ifdef CONFIG_SCHED_DEBUG - unsigned int nr_spread_over; -#endif - #ifdef CONFIG_SMP /* * CFS load tracking @@ -726,13 +725,13 @@ struct rt_rq { #endif /* CONFIG_SMP */ int rt_queued; +#ifdef CONFIG_RT_GROUP_SCHED int rt_throttled; u64 rt_time; u64 rt_runtime; /* Nests inside the rq lock: */ raw_spinlock_t rt_runtime_lock; -#ifdef CONFIG_RT_GROUP_SCHED unsigned int rt_nr_boosted; struct rq *rq; @@ -820,6 +819,9 @@ static inline void se_update_runnable(struct sched_entity *se) static inline long se_runnable(struct sched_entity *se) { + if (se->sched_delayed) + return false; + if (entity_is_task(se)) return !!se->on_rq; else @@ -834,6 +836,9 @@ static inline void se_update_runnable(struct sched_entity *se) { } static inline long se_runnable(struct sched_entity *se) { + if (se->sched_delayed) + return false; + return !!se->on_rq; } @@ -1044,6 +1049,8 @@ struct rq { struct rt_rq rt; struct dl_rq dl; + struct sched_dl_entity fair_server; + #ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this CPU: */ struct list_head leaf_cfs_rq_list; @@ -1059,6 +1066,7 @@ struct rq { unsigned int nr_uninterruptible; struct task_struct __rcu *curr; + struct sched_dl_entity *dl_server; struct task_struct *idle; struct task_struct *stop; unsigned long next_balance; @@ -1158,7 +1166,6 @@ struct rq { /* latency stats */ struct sched_info rq_sched_info; unsigned long long rq_cpu_time; - /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ /* sys_sched_yield() stats */ unsigned int yld_count; @@ -1187,6 +1194,7 @@ struct rq { /* per rq */ struct rq *core; struct task_struct *core_pick; + struct sched_dl_entity *core_dl_server; unsigned int core_enabled; unsigned int core_sched_seq; struct rb_root core_tree; @@ -2247,11 +2255,13 @@ extern const u32 sched_prio_to_wmult[40]; * */ -#define DEQUEUE_SLEEP 0x01 +#define DEQUEUE_SLEEP 0x01 /* Matches ENQUEUE_WAKEUP */ #define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */ #define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */ #define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */ +#define DEQUEUE_SPECIAL 0x10 #define DEQUEUE_MIGRATING 0x100 /* Matches ENQUEUE_MIGRATING */ +#define DEQUEUE_DELAYED 0x200 /* Matches ENQUEUE_DELAYED */ #define ENQUEUE_WAKEUP 0x01 #define ENQUEUE_RESTORE 0x02 @@ -2267,6 +2277,7 @@ extern const u32 sched_prio_to_wmult[40]; #endif #define ENQUEUE_INITIAL 0x80 #define ENQUEUE_MIGRATING 0x100 +#define ENQUEUE_DELAYED 0x200 #define RETRY_TASK ((void *)-1UL) @@ -2285,23 +2296,31 @@ struct sched_class { #endif void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); - void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); + bool (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); void (*yield_task) (struct rq *rq); bool (*yield_to_task)(struct rq *rq, struct task_struct *p); void (*wakeup_preempt)(struct rq *rq, struct task_struct *p, int flags); - struct task_struct *(*pick_next_task)(struct rq *rq); + struct task_struct *(*pick_task)(struct rq *rq); + /* + * Optional! When implemented pick_next_task() should be equivalent to: + * + * next = pick_task(); + * if (next) { + * put_prev_task(prev); + * set_next_task_first(next); + * } + */ + struct task_struct *(*pick_next_task)(struct rq *rq, struct task_struct *prev); - void (*put_prev_task)(struct rq *rq, struct task_struct *p); + void (*put_prev_task)(struct rq *rq, struct task_struct *p, struct task_struct *next); void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first); #ifdef CONFIG_SMP int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); int (*select_task_rq)(struct task_struct *p, int task_cpu, int flags); - struct task_struct * (*pick_task)(struct rq *rq); - void (*migrate_task_rq)(struct task_struct *p, int new_cpu); void (*task_woken)(struct rq *this_rq, struct task_struct *task); @@ -2345,7 +2364,7 @@ struct sched_class { static inline void put_prev_task(struct rq *rq, struct task_struct *prev) { WARN_ON_ONCE(rq->curr != prev); - prev->sched_class->put_prev_task(rq, prev); + prev->sched_class->put_prev_task(rq, prev, NULL); } static inline void set_next_task(struct rq *rq, struct task_struct *next) @@ -2353,6 +2372,30 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next) next->sched_class->set_next_task(rq, next, false); } +static inline void +__put_prev_set_next_dl_server(struct rq *rq, + struct task_struct *prev, + struct task_struct *next) +{ + prev->dl_server = NULL; + next->dl_server = rq->dl_server; + rq->dl_server = NULL; +} + +static inline void put_prev_set_next_task(struct rq *rq, + struct task_struct *prev, + struct task_struct *next) +{ + WARN_ON_ONCE(rq->curr != prev); + + __put_prev_set_next_dl_server(rq, prev, next); + + if (next == prev) + return; + + prev->sched_class->put_prev_task(rq, prev, next); + next->sched_class->set_next_task(rq, next, true); +} /* * Helper to define a sched_class instance; each one is placed in a separate @@ -2408,7 +2451,7 @@ static inline bool sched_fair_runnable(struct rq *rq) } extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); -extern struct task_struct *pick_next_task_idle(struct rq *rq); +extern struct task_struct *pick_task_idle(struct rq *rq); #define SCA_CHECK 0x01 #define SCA_MIGRATE_DISABLE 0x02 @@ -2515,7 +2558,6 @@ extern void reweight_task(struct task_struct *p, const struct load_weight *lw); extern void resched_curr(struct rq *rq); extern void resched_cpu(int cpu); -extern struct rt_bandwidth def_rt_bandwidth; extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); @@ -2586,6 +2628,19 @@ static inline void sub_nr_running(struct rq *rq, unsigned count) sched_update_tick_dependency(rq); } +static inline void __block_task(struct rq *rq, struct task_struct *p) +{ + WRITE_ONCE(p->on_rq, 0); + ASSERT_EXCLUSIVE_WRITER(p->on_rq); + if (p->sched_contributes_to_load) + rq->nr_uninterruptible++; + + if (p->in_iowait) { + atomic_inc(&rq->nr_iowait); + delayacct_blkio_start(); + } +} + extern void activate_task(struct rq *rq, struct task_struct *p, int flags); extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags); @@ -3607,7 +3662,7 @@ extern int __sched_setaffinity(struct task_struct *p, struct affinity_context *c extern void __setscheduler_prio(struct task_struct *p, int prio); extern void set_load_weight(struct task_struct *p, bool update_load); extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags); -extern void dequeue_task(struct rq *rq, struct task_struct *p, int flags); +extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags); extern void check_class_changed(struct rq *rq, struct task_struct *p, const struct sched_class *prev_class, |