aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched/topology.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 18:29:11 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 18:29:11 -0800
commitadb35e8dc98ba9bda99ff79ac6a05b8fcde2a762 (patch)
treeceb0334110d80b5a756764c3d089257c83faaec9 /kernel/sched/topology.c
parent533369b145d8d1bc44b8ed7f0dd0ecffb16384cc (diff)
parent5b78f2dc315354c05300795064f587366a02c6ff (diff)
downloadlinux-adb35e8dc98ba9bda99ff79ac6a05b8fcde2a762.tar.gz
linux-adb35e8dc98ba9bda99ff79ac6a05b8fcde2a762.tar.bz2
linux-adb35e8dc98ba9bda99ff79ac6a05b8fcde2a762.zip
Merge tag 'sched-core-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Thomas Gleixner: - migrate_disable/enable() support which originates from the RT tree and is now a prerequisite for the new preemptible kmap_local() API which aims to replace kmap_atomic(). - A fair amount of topology and NUMA related improvements - Improvements for the frequency invariant calculations - Enhanced robustness for the global CPU priority tracking and decision making - The usual small fixes and enhancements all over the place * tag 'sched-core-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (61 commits) sched/fair: Trivial correction of the newidle_balance() comment sched/fair: Clear SMT siblings after determining the core is not idle sched: Fix kernel-doc markup x86: Print ratio freq_max/freq_base used in frequency invariance calculations x86, sched: Use midpoint of max_boost and max_P for frequency invariance on AMD EPYC x86, sched: Calculate frequency invariance for AMD systems irq_work: Optimize irq_work_single() smp: Cleanup smp_call_function*() irq_work: Cleanup sched: Limit the amount of NUMA imbalance that can exist at fork time sched/numa: Allow a floating imbalance between NUMA nodes sched: Avoid unnecessary calculation of load imbalance at clone time sched/numa: Rename nr_running and break out the magic number sched: Make migrate_disable/enable() independent of RT sched/topology: Condition EAS enablement on FIE support arm64: Rebuild sched domains on invariance status changes sched/topology,schedutil: Wrap sched domains rebuild sched/uclamp: Allow to reset a task uclamp constraint value sched/core: Fix typos in comments Documentation: scheduler: fix information on arch SD flags, sched_domain and sched_debug ...
Diffstat (limited to 'kernel/sched/topology.c')
-rw-r--r--kernel/sched/topology.c61
1 files changed, 54 insertions, 7 deletions
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index dd7770226086..5d3675c7a76b 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -211,6 +211,15 @@ unsigned int sysctl_sched_energy_aware = 1;
DEFINE_MUTEX(sched_energy_mutex);
bool sched_energy_update;
+void rebuild_sched_domains_energy(void)
+{
+ mutex_lock(&sched_energy_mutex);
+ sched_energy_update = true;
+ rebuild_sched_domains();
+ sched_energy_update = false;
+ mutex_unlock(&sched_energy_mutex);
+}
+
#ifdef CONFIG_PROC_SYSCTL
int sched_energy_aware_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -223,13 +232,8 @@ int sched_energy_aware_handler(struct ctl_table *table, int write,
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write) {
state = static_branch_unlikely(&sched_energy_present);
- if (state != sysctl_sched_energy_aware) {
- mutex_lock(&sched_energy_mutex);
- sched_energy_update = 1;
- rebuild_sched_domains();
- sched_energy_update = 0;
- mutex_unlock(&sched_energy_mutex);
- }
+ if (state != sysctl_sched_energy_aware)
+ rebuild_sched_domains_energy();
}
return ret;
@@ -324,6 +328,7 @@ static void sched_energy_set(bool has_eas)
* 3. no SMT is detected.
* 4. the EM complexity is low enough to keep scheduling overheads low;
* 5. schedutil is driving the frequency of all CPUs of the rd;
+ * 6. frequency invariance support is present;
*
* The complexity of the Energy Model is defined as:
*
@@ -372,6 +377,14 @@ static bool build_perf_domains(const struct cpumask *cpu_map)
goto free;
}
+ if (!arch_scale_freq_invariant()) {
+ if (sched_debug()) {
+ pr_warn("rd %*pbl: Disabling EAS: frequency-invariant load tracking not yet supported",
+ cpumask_pr_args(cpu_map));
+ }
+ goto free;
+ }
+
for_each_cpu(i, cpu_map) {
/* Skip already covered CPUs. */
if (find_pd(pd, i))
@@ -516,6 +529,7 @@ static int init_rootdomain(struct root_domain *rd)
init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
#endif
+ rd->visit_gen = 0;
init_dl_bw(&rd->dl_bw);
if (cpudl_init(&rd->cpudl) != 0)
goto free_rto_mask;
@@ -674,6 +688,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
{
struct rq *rq = cpu_rq(cpu);
struct sched_domain *tmp;
+ int numa_distance = 0;
/* Remove the sched domains which do not contribute to scheduling. */
for (tmp = sd; tmp; ) {
@@ -705,6 +720,38 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
sd->child = NULL;
}
+ for (tmp = sd; tmp; tmp = tmp->parent)
+ numa_distance += !!(tmp->flags & SD_NUMA);
+
+ /*
+ * FIXME: Diameter >=3 is misrepresented.
+ *
+ * Smallest diameter=3 topology is:
+ *
+ * node 0 1 2 3
+ * 0: 10 20 30 40
+ * 1: 20 10 20 30
+ * 2: 30 20 10 20
+ * 3: 40 30 20 10
+ *
+ * 0 --- 1 --- 2 --- 3
+ *
+ * NUMA-3 0-3 N/A N/A 0-3
+ * groups: {0-2},{1-3} {1-3},{0-2}
+ *
+ * NUMA-2 0-2 0-3 0-3 1-3
+ * groups: {0-1},{1-3} {0-2},{2-3} {1-3},{0-1} {2-3},{0-2}
+ *
+ * NUMA-1 0-1 0-2 1-3 2-3
+ * groups: {0},{1} {1},{2},{0} {2},{3},{1} {3},{2}
+ *
+ * NUMA-0 0 1 2 3
+ *
+ * The NUMA-2 groups for nodes 0 and 3 are obviously buggered, as the
+ * group span isn't a subset of the domain span.
+ */
+ WARN_ONCE(numa_distance > 2, "Shortest NUMA path spans too many nodes\n");
+
sched_domain_debug(sd, cpu);
rq_attach_root(rq, rd);