aboutsummaryrefslogtreecommitdiff
path: root/drivers/cpufreq/cppc_cpufreq.c
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2022-05-23 19:28:41 +0200
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2022-05-23 19:28:41 +0200
commitd988c9134221af08908e4e3941952259b4207ce8 (patch)
treed613234376893ceb714a334deacc4518aff39e02 /drivers/cpufreq/cppc_cpufreq.c
parent16a23f394dd926c35d44ae420a76ac12c9203f96 (diff)
parent2d41dc23804d075e248ff01d16d9d424bcbc1180 (diff)
downloadlinux-d988c9134221af08908e4e3941952259b4207ce8.tar.gz
linux-d988c9134221af08908e4e3941952259b4207ce8.tar.bz2
linux-d988c9134221af08908e4e3941952259b4207ce8.zip
Merge branch 'pm-cpufreq'
Merge cpufreq updates for 5.19-rc1: - Fix cpufreq governor clean up code to avoid using kfree() directly to free kobject-based items (Kevin Hao). - Prepare cpufreq for powerpc's asm/prom.h cleanup (Christophe Leroy). - Make intel_pstate notify frequency invariance code when no_turbo is turned on and off (Chen Yu). - Add Sapphire Rapids OOB mode support to intel_pstate (Srinivas Pandruvada). - Make cpufreq avoid unnecessary frequency updates due to mismatch between hardware and the frequency table (Viresh Kumar). - Make remove_cpu_dev_symlink() clear the real_cpus mask to simplify code (Viresh Kumar). - Rearrange cpufreq_offline() and cpufreq_remove_dev() to make the calling convention for some driver callbacks consistent (Rafael Wysocki). - Avoid accessing half-initialized cpufreq policies from the show() and store() sysfs functions (Schspa Shi). - Rearrange cpufreq_offline() to make the calling convention for some driver callbacks consistent (Schspa Shi). - Update CPPC handling in cpufreq (Pierre Gondois): * Add per_cpu efficiency_class to the CPPC driver. * Make the CPPC driver Register EM based on efficiency class information. * Adjust _OSC for flexible address space in the ACPI platform initialization code and always set CPPC _OSC bits if CPPC_LIB is supported. * Assume no transition latency if no PCCT in the CPPC driver. * Add fast_switch and dvfs_possible_from_any_cpu support to the CPPC driver. * pm-cpufreq: cpufreq: CPPC: Enable dvfs_possible_from_any_cpu cpufreq: CPPC: Enable fast_switch ACPI: CPPC: Assume no transition latency if no PCCT ACPI: bus: Set CPPC _OSC bits for all and when CPPC_LIB is supported ACPI: CPPC: Check _OSC for flexible address space cpufreq: make interface functions and lock holding state clear cpufreq: Abort show()/store() for half-initialized policies cpufreq: Rearrange locking in cpufreq_remove_dev() cpufreq: Split cpufreq_offline() cpufreq: Reorganize checks in cpufreq_offline() cpufreq: Clear real_cpus mask from remove_cpu_dev_symlink() cpufreq: intel_pstate: Support Sapphire Rapids OOB mode Revert "cpufreq: Fix possible race in cpufreq online error path" cpufreq: CPPC: Register EM based on efficiency class information cpufreq: CPPC: Add per_cpu efficiency_class cpufreq: Avoid unnecessary frequency updates due to mismatch cpufreq: Fix possible race in cpufreq online error path cpufreq: intel_pstate: Handle no_turbo in frequency invariance cpufreq: Prepare cleanup of powerpc's asm/prom.h cpufreq: governor: Use kobject release() method to free dbs_data
Diffstat (limited to 'drivers/cpufreq/cppc_cpufreq.c')
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c211
1 files changed, 211 insertions, 0 deletions
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 82d370ae6a4a..d092c9bb4ba3 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -389,6 +389,27 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
return ret;
}
+static unsigned int cppc_cpufreq_fast_switch(struct cpufreq_policy *policy,
+ unsigned int target_freq)
+{
+ struct cppc_cpudata *cpu_data = policy->driver_data;
+ unsigned int cpu = policy->cpu;
+ u32 desired_perf;
+ int ret;
+
+ desired_perf = cppc_cpufreq_khz_to_perf(cpu_data, target_freq);
+ cpu_data->perf_ctrls.desired_perf = desired_perf;
+ ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
+
+ if (ret) {
+ pr_debug("Failed to set target on CPU:%d. ret:%d\n",
+ cpu, ret);
+ return 0;
+ }
+
+ return target_freq;
+}
+
static int cppc_verify_policy(struct cpufreq_policy_data *policy)
{
cpufreq_verify_within_cpu_limits(policy);
@@ -420,12 +441,197 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
}
+static DEFINE_PER_CPU(unsigned int, efficiency_class);
+static void cppc_cpufreq_register_em(struct cpufreq_policy *policy);
+
+/* Create an artificial performance state every CPPC_EM_CAP_STEP capacity unit. */
+#define CPPC_EM_CAP_STEP (20)
+/* Increase the cost value by CPPC_EM_COST_STEP every performance state. */
+#define CPPC_EM_COST_STEP (1)
+/* Add a cost gap correspnding to the energy of 4 CPUs. */
+#define CPPC_EM_COST_GAP (4 * SCHED_CAPACITY_SCALE * CPPC_EM_COST_STEP \
+ / CPPC_EM_CAP_STEP)
+
+static unsigned int get_perf_level_count(struct cpufreq_policy *policy)
+{
+ struct cppc_perf_caps *perf_caps;
+ unsigned int min_cap, max_cap;
+ struct cppc_cpudata *cpu_data;
+ int cpu = policy->cpu;
+
+ cpu_data = policy->driver_data;
+ perf_caps = &cpu_data->perf_caps;
+ max_cap = arch_scale_cpu_capacity(cpu);
+ min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf);
+ if ((min_cap == 0) || (max_cap < min_cap))
+ return 0;
+ return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP;
+}
+
+/*
+ * The cost is defined as:
+ * cost = power * max_frequency / frequency
+ */
+static inline unsigned long compute_cost(int cpu, int step)
+{
+ return CPPC_EM_COST_GAP * per_cpu(efficiency_class, cpu) +
+ step * CPPC_EM_COST_STEP;
+}
+
+static int cppc_get_cpu_power(struct device *cpu_dev,
+ unsigned long *power, unsigned long *KHz)
+{
+ unsigned long perf_step, perf_prev, perf, perf_check;
+ unsigned int min_step, max_step, step, step_check;
+ unsigned long prev_freq = *KHz;
+ unsigned int min_cap, max_cap;
+ struct cpufreq_policy *policy;
+
+ struct cppc_perf_caps *perf_caps;
+ struct cppc_cpudata *cpu_data;
+
+ policy = cpufreq_cpu_get_raw(cpu_dev->id);
+ cpu_data = policy->driver_data;
+ perf_caps = &cpu_data->perf_caps;
+ max_cap = arch_scale_cpu_capacity(cpu_dev->id);
+ min_cap = div_u64(max_cap * perf_caps->lowest_perf,
+ perf_caps->highest_perf);
+
+ perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
+ min_step = min_cap / CPPC_EM_CAP_STEP;
+ max_step = max_cap / CPPC_EM_CAP_STEP;
+
+ perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
+ step = perf_prev / perf_step;
+
+ if (step > max_step)
+ return -EINVAL;
+
+ if (min_step == max_step) {
+ step = max_step;
+ perf = perf_caps->highest_perf;
+ } else if (step < min_step) {
+ step = min_step;
+ perf = perf_caps->lowest_perf;
+ } else {
+ step++;
+ if (step == max_step)
+ perf = perf_caps->highest_perf;
+ else
+ perf = step * perf_step;
+ }
+
+ *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
+ perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
+ step_check = perf_check / perf_step;
+
+ /*
+ * To avoid bad integer approximation, check that new frequency value
+ * increased and that the new frequency will be converted to the
+ * desired step value.
+ */
+ while ((*KHz == prev_freq) || (step_check != step)) {
+ perf++;
+ *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
+ perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
+ step_check = perf_check / perf_step;
+ }
+
+ /*
+ * With an artificial EM, only the cost value is used. Still the power
+ * is populated such as 0 < power < EM_MAX_POWER. This allows to add
+ * more sense to the artificial performance states.
+ */
+ *power = compute_cost(cpu_dev->id, step);
+
+ return 0;
+}
+
+static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz,
+ unsigned long *cost)
+{
+ unsigned long perf_step, perf_prev;
+ struct cppc_perf_caps *perf_caps;
+ struct cpufreq_policy *policy;
+ struct cppc_cpudata *cpu_data;
+ unsigned int max_cap;
+ int step;
+
+ policy = cpufreq_cpu_get_raw(cpu_dev->id);
+ cpu_data = policy->driver_data;
+ perf_caps = &cpu_data->perf_caps;
+ max_cap = arch_scale_cpu_capacity(cpu_dev->id);
+
+ perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz);
+ perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
+ step = perf_prev / perf_step;
+
+ *cost = compute_cost(cpu_dev->id, step);
+
+ return 0;
+}
+
+static int populate_efficiency_class(void)
+{
+ struct acpi_madt_generic_interrupt *gicc;
+ DECLARE_BITMAP(used_classes, 256) = {};
+ int class, cpu, index;
+
+ for_each_possible_cpu(cpu) {
+ gicc = acpi_cpu_get_madt_gicc(cpu);
+ class = gicc->efficiency_class;
+ bitmap_set(used_classes, class, 1);
+ }
+
+ if (bitmap_weight(used_classes, 256) <= 1) {
+ pr_debug("Efficiency classes are all equal (=%d). "
+ "No EM registered", class);
+ return -EINVAL;
+ }
+
+ /*
+ * Squeeze efficiency class values on [0:#efficiency_class-1].
+ * Values are per spec in [0:255].
+ */
+ index = 0;
+ for_each_set_bit(class, used_classes, 256) {
+ for_each_possible_cpu(cpu) {
+ gicc = acpi_cpu_get_madt_gicc(cpu);
+ if (gicc->efficiency_class == class)
+ per_cpu(efficiency_class, cpu) = index;
+ }
+ index++;
+ }
+ cppc_cpufreq_driver.register_em = cppc_cpufreq_register_em;
+
+ return 0;
+}
+
+static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
+{
+ struct cppc_cpudata *cpu_data;
+ struct em_data_callback em_cb =
+ EM_ADV_DATA_CB(cppc_get_cpu_power, cppc_get_cpu_cost);
+
+ cpu_data = policy->driver_data;
+ em_dev_register_perf_domain(get_cpu_device(policy->cpu),
+ get_perf_level_count(policy), &em_cb,
+ cpu_data->shared_cpu_map, 0);
+}
+
#else
static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
{
return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
}
+static int populate_efficiency_class(void)
+{
+ return 0;
+}
+static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
+{
+}
#endif
@@ -536,6 +742,9 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
goto out;
}
+ policy->fast_switch_possible = cppc_allow_fast_switch();
+ policy->dvfs_possible_from_any_cpu = true;
+
/*
* If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost
* is supported.
@@ -681,6 +890,7 @@ static struct cpufreq_driver cppc_cpufreq_driver = {
.verify = cppc_verify_policy,
.target = cppc_cpufreq_set_target,
.get = cppc_cpufreq_get_rate,
+ .fast_switch = cppc_cpufreq_fast_switch,
.init = cppc_cpufreq_cpu_init,
.exit = cppc_cpufreq_cpu_exit,
.set_boost = cppc_cpufreq_set_boost,
@@ -742,6 +952,7 @@ static int __init cppc_cpufreq_init(void)
cppc_check_hisi_workaround();
cppc_freq_invariance_init();
+ populate_efficiency_class();
ret = cpufreq_register_driver(&cppc_cpufreq_driver);
if (ret)