diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-19 15:55:58 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-19 15:55:58 +0200 |
commit | 2004cef11ea072838f99bd95cefa5c8e45df0847 (patch) | |
tree | d7162235ad3c3985abbb5233657eef0c03819b28 /kernel/sched/debug.c | |
parent | 509d2cd12a10d057fdf72f565b930f9a81140d59 (diff) | |
parent | bc9057da1a220ff2cb6c8885fd5352558aceba2c (diff) | |
download | linux-2004cef11ea072838f99bd95cefa5c8e45df0847.tar.gz linux-2004cef11ea072838f99bd95cefa5c8e45df0847.tar.bz2 linux-2004cef11ea072838f99bd95cefa5c8e45df0847.zip |
Merge tag 'sched-core-2024-09-19' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
- Implement the SCHED_DEADLINE server infrastructure - Daniel Bristot
de Oliveira's last major contribution to the kernel:
"SCHED_DEADLINE servers can help fixing starvation issues of low
priority tasks (e.g., SCHED_OTHER) when higher priority tasks
monopolize CPU cycles. Today we have RT Throttling; DEADLINE
servers should be able to replace and improve that."
(Daniel Bristot de Oliveira, Peter Zijlstra, Joel Fernandes, Youssef
Esmat, Huang Shijie)
- Preparatory changes for sched_ext integration:
- Use set_next_task(.first) where required
- Fix up set_next_task() implementations
- Clean up DL server vs. core sched
- Split up put_prev_task_balance()
- Rework pick_next_task()
- Combine the last put_prev_task() and the first set_next_task()
- Rework dl_server
- Add put_prev_task(.next)
(Peter Zijlstra, with a fix by Tejun Heo)
- Complete the EEVDF transition and refine EEVDF scheduling:
- Implement delayed dequeue
- Allow shorter slices to wakeup-preempt
- Use sched_attr::sched_runtime to set request/slice suggestion
- Document the new feature flags
- Remove unused and duplicate-functionality fields
- Simplify & unify pick_next_task_fair()
- Misc debuggability enhancements
(Peter Zijlstra, with fixes/cleanups by Dietmar Eggemann, Valentin
Schneider and Chuyi Zhou)
- Initialize the vruntime of a new task when it is first enqueued,
resulting in significant decrease in latency of newly woken tasks
(Zhang Qiao)
- Introduce SM_IDLE and an idle re-entry fast-path in __schedule()
(K Prateek Nayak, Peter Zijlstra)
- Clean up and clarify the usage of Clean up usage of rt_task()
(Qais Yousef)
- Preempt SCHED_IDLE entities in strict cgroup hierarchies
(Tianchen Ding)
- Clarify the documentation of time units for deadline scheduler
parameters (Christian Loehle)
- Remove the HZ_BW chicken-bit feature flag introduced a year ago,
the original change seems to be working fine (Phil Auld)
- Misc fixes and cleanups (Chen Yu, Dan Carpenter, Huang Shijie,
Peilin He, Qais Yousefm and Vincent Guittot)
* tag 'sched-core-2024-09-19' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (64 commits)
sched/cpufreq: Use NSEC_PER_MSEC for deadline task
cpufreq/cppc: Use NSEC_PER_MSEC for deadline task
sched/deadline: Clarify nanoseconds in uapi
sched/deadline: Convert schedtool example to chrt
sched/debug: Fix the runnable tasks output
sched: Fix sched_delayed vs sched_core
kernel/sched: Fix util_est accounting for DELAY_DEQUEUE
kthread: Fix task state in kthread worker if being frozen
sched/pelt: Use rq_clock_task() for hw_pressure
sched/fair: Move effective_cpu_util() and effective_cpu_util() in fair.c
sched/core: Introduce SM_IDLE and an idle re-entry fast-path in __schedule()
sched: Add put_prev_task(.next)
sched: Rework dl_server
sched: Combine the last put_prev_task() and the first set_next_task()
sched: Rework pick_next_task()
sched: Split up put_prev_task_balance()
sched: Clean up DL server vs core sched
sched: Fixup set_next_task() implementations
sched: Use set_next_task(.first) where required
sched/fair: Properly deactivate sched_delayed task upon class change
...
Diffstat (limited to 'kernel/sched/debug.c')
-rw-r--r-- | kernel/sched/debug.c | 198 |
1 files changed, 186 insertions, 12 deletions
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index c1eb9a1afd13..de1dc5264b3f 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -333,8 +333,165 @@ static const struct file_operations sched_debug_fops = { .release = seq_release, }; +enum dl_param { + DL_RUNTIME = 0, + DL_PERIOD, +}; + +static unsigned long fair_server_period_max = (1UL << 22) * NSEC_PER_USEC; /* ~4 seconds */ +static unsigned long fair_server_period_min = (100) * NSEC_PER_USEC; /* 100 us */ + +static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos, enum dl_param param) +{ + long cpu = (long) ((struct seq_file *) filp->private_data)->private; + struct rq *rq = cpu_rq(cpu); + u64 runtime, period; + size_t err; + int retval; + u64 value; + + err = kstrtoull_from_user(ubuf, cnt, 10, &value); + if (err) + return err; + + scoped_guard (rq_lock_irqsave, rq) { + runtime = rq->fair_server.dl_runtime; + period = rq->fair_server.dl_period; + + switch (param) { + case DL_RUNTIME: + if (runtime == value) + break; + runtime = value; + break; + case DL_PERIOD: + if (value == period) + break; + period = value; + break; + } + + if (runtime > period || + period > fair_server_period_max || + period < fair_server_period_min) { + return -EINVAL; + } + + if (rq->cfs.h_nr_running) { + update_rq_clock(rq); + dl_server_stop(&rq->fair_server); + } + + retval = dl_server_apply_params(&rq->fair_server, runtime, period, 0); + if (retval) + cnt = retval; + + if (!runtime) + printk_deferred("Fair server disabled in CPU %d, system may crash due to starvation.\n", + cpu_of(rq)); + + if (rq->cfs.h_nr_running) + dl_server_start(&rq->fair_server); + } + + *ppos += cnt; + return cnt; +} + +static size_t sched_fair_server_show(struct seq_file *m, void *v, enum dl_param param) +{ + unsigned long cpu = (unsigned long) m->private; + struct rq *rq = cpu_rq(cpu); + u64 value; + + switch (param) { + case DL_RUNTIME: + value = rq->fair_server.dl_runtime; + break; + case DL_PERIOD: + value = rq->fair_server.dl_period; + break; + } + + seq_printf(m, "%llu\n", value); + return 0; + +} + +static ssize_t +sched_fair_server_runtime_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_RUNTIME); +} + +static int sched_fair_server_runtime_show(struct seq_file *m, void *v) +{ + return sched_fair_server_show(m, v, DL_RUNTIME); +} + +static int sched_fair_server_runtime_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_fair_server_runtime_show, inode->i_private); +} + +static const struct file_operations fair_server_runtime_fops = { + .open = sched_fair_server_runtime_open, + .write = sched_fair_server_runtime_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static ssize_t +sched_fair_server_period_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_PERIOD); +} + +static int sched_fair_server_period_show(struct seq_file *m, void *v) +{ + return sched_fair_server_show(m, v, DL_PERIOD); +} + +static int sched_fair_server_period_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_fair_server_period_show, inode->i_private); +} + +static const struct file_operations fair_server_period_fops = { + .open = sched_fair_server_period_open, + .write = sched_fair_server_period_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static struct dentry *debugfs_sched; +static void debugfs_fair_server_init(void) +{ + struct dentry *d_fair; + unsigned long cpu; + + d_fair = debugfs_create_dir("fair_server", debugfs_sched); + if (!d_fair) + return; + + for_each_possible_cpu(cpu) { + struct dentry *d_cpu; + char buf[32]; + + snprintf(buf, sizeof(buf), "cpu%lu", cpu); + d_cpu = debugfs_create_dir(buf, d_fair); + + debugfs_create_file("runtime", 0644, d_cpu, (void *) cpu, &fair_server_runtime_fops); + debugfs_create_file("period", 0644, d_cpu, (void *) cpu, &fair_server_period_fops); + } +} + static __init int sched_init_debug(void) { struct dentry __maybe_unused *numa; @@ -374,6 +531,8 @@ static __init int sched_init_debug(void) debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops); + debugfs_fair_server_init(); + return 0; } late_initcall(sched_init_debug); @@ -580,27 +739,27 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) else SEQ_printf(m, " %c", task_state_to_char(p)); - SEQ_printf(m, "%15s %5d %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld.%06ld %9Ld %5d ", + SEQ_printf(m, " %15s %5d %9Ld.%06ld %c %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld %5d ", p->comm, task_pid_nr(p), SPLIT_NS(p->se.vruntime), entity_eligible(cfs_rq_of(&p->se), &p->se) ? 'E' : 'N', SPLIT_NS(p->se.deadline), + p->se.custom_slice ? 'S' : ' ', SPLIT_NS(p->se.slice), SPLIT_NS(p->se.sum_exec_runtime), (long long)(p->nvcsw + p->nivcsw), p->prio); - SEQ_printf(m, "%9lld.%06ld %9lld.%06ld %9lld.%06ld %9lld.%06ld", + SEQ_printf(m, "%9lld.%06ld %9lld.%06ld %9lld.%06ld", SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)), - SPLIT_NS(p->se.sum_exec_runtime), SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)), SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime))); #ifdef CONFIG_NUMA_BALANCING - SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); + SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); #endif #ifdef CONFIG_CGROUP_SCHED - SEQ_printf_task_group_path(m, task_group(p), " %s") + SEQ_printf_task_group_path(m, task_group(p), " %s") #endif SEQ_printf(m, "\n"); @@ -612,10 +771,26 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) SEQ_printf(m, "\n"); SEQ_printf(m, "runnable tasks:\n"); - SEQ_printf(m, " S task PID tree-key switches prio" - " wait-time sum-exec sum-sleep\n"); + SEQ_printf(m, " S task PID vruntime eligible " + "deadline slice sum-exec switches " + "prio wait-time sum-sleep sum-block" +#ifdef CONFIG_NUMA_BALANCING + " node group-id" +#endif +#ifdef CONFIG_CGROUP_SCHED + " group-path" +#endif + "\n"); SEQ_printf(m, "-------------------------------------------------------" - "------------------------------------------------------\n"); + "------------------------------------------------------" + "------------------------------------------------------" +#ifdef CONFIG_NUMA_BALANCING + "--------------" +#endif +#ifdef CONFIG_CGROUP_SCHED + "--------------" +#endif + "\n"); rcu_read_lock(); for_each_process_thread(g, p) { @@ -641,8 +816,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, "\n"); SEQ_printf(m, "cfs_rq[%d]:\n", cpu); #endif - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", - SPLIT_NS(cfs_rq->exec_clock)); raw_spin_rq_lock_irqsave(rq, flags); root = __pick_root_entity(cfs_rq); @@ -669,8 +842,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SPLIT_NS(right_vruntime)); spread = right_vruntime - left_vruntime; SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread)); - SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", - cfs_rq->nr_spread_over); SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running); SEQ_printf(m, " .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running); SEQ_printf(m, " .%-30s: %d\n", "idle_nr_running", @@ -730,9 +901,12 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x)) PU(rt_nr_running); + +#ifdef CONFIG_RT_GROUP_SCHED P(rt_throttled); PN(rt_time); PN(rt_runtime); +#endif #undef PN #undef PU |