diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2022-05-23 19:28:41 +0200 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2022-05-23 19:28:41 +0200 |
commit | d988c9134221af08908e4e3941952259b4207ce8 (patch) | |
tree | d613234376893ceb714a334deacc4518aff39e02 /drivers/cpufreq/cppc_cpufreq.c | |
parent | 16a23f394dd926c35d44ae420a76ac12c9203f96 (diff) | |
parent | 2d41dc23804d075e248ff01d16d9d424bcbc1180 (diff) | |
download | linux-d988c9134221af08908e4e3941952259b4207ce8.tar.gz linux-d988c9134221af08908e4e3941952259b4207ce8.tar.bz2 linux-d988c9134221af08908e4e3941952259b4207ce8.zip |
Merge branch 'pm-cpufreq'
Merge cpufreq updates for 5.19-rc1:
- Fix cpufreq governor clean up code to avoid using kfree() directly
to free kobject-based items (Kevin Hao).
- Prepare cpufreq for powerpc's asm/prom.h cleanup (Christophe Leroy).
- Make intel_pstate notify frequency invariance code when no_turbo is
turned on and off (Chen Yu).
- Add Sapphire Rapids OOB mode support to intel_pstate (Srinivas
Pandruvada).
- Make cpufreq avoid unnecessary frequency updates due to mismatch
between hardware and the frequency table (Viresh Kumar).
- Make remove_cpu_dev_symlink() clear the real_cpus mask to simplify
code (Viresh Kumar).
- Rearrange cpufreq_offline() and cpufreq_remove_dev() to make the
calling convention for some driver callbacks consistent (Rafael
Wysocki).
- Avoid accessing half-initialized cpufreq policies from the show()
and store() sysfs functions (Schspa Shi).
- Rearrange cpufreq_offline() to make the calling convention for some
driver callbacks consistent (Schspa Shi).
- Update CPPC handling in cpufreq (Pierre Gondois):
* Add per_cpu efficiency_class to the CPPC driver.
* Make the CPPC driver Register EM based on efficiency class
information.
* Adjust _OSC for flexible address space in the ACPI platform
initialization code and always set CPPC _OSC bits if CPPC_LIB is
supported.
* Assume no transition latency if no PCCT in the CPPC driver.
* Add fast_switch and dvfs_possible_from_any_cpu support to the CPPC
driver.
* pm-cpufreq:
cpufreq: CPPC: Enable dvfs_possible_from_any_cpu
cpufreq: CPPC: Enable fast_switch
ACPI: CPPC: Assume no transition latency if no PCCT
ACPI: bus: Set CPPC _OSC bits for all and when CPPC_LIB is supported
ACPI: CPPC: Check _OSC for flexible address space
cpufreq: make interface functions and lock holding state clear
cpufreq: Abort show()/store() for half-initialized policies
cpufreq: Rearrange locking in cpufreq_remove_dev()
cpufreq: Split cpufreq_offline()
cpufreq: Reorganize checks in cpufreq_offline()
cpufreq: Clear real_cpus mask from remove_cpu_dev_symlink()
cpufreq: intel_pstate: Support Sapphire Rapids OOB mode
Revert "cpufreq: Fix possible race in cpufreq online error path"
cpufreq: CPPC: Register EM based on efficiency class information
cpufreq: CPPC: Add per_cpu efficiency_class
cpufreq: Avoid unnecessary frequency updates due to mismatch
cpufreq: Fix possible race in cpufreq online error path
cpufreq: intel_pstate: Handle no_turbo in frequency invariance
cpufreq: Prepare cleanup of powerpc's asm/prom.h
cpufreq: governor: Use kobject release() method to free dbs_data
Diffstat (limited to 'drivers/cpufreq/cppc_cpufreq.c')
-rw-r--r-- | drivers/cpufreq/cppc_cpufreq.c | 211 |
1 files changed, 211 insertions, 0 deletions
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 82d370ae6a4a..d092c9bb4ba3 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -389,6 +389,27 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, return ret; } +static unsigned int cppc_cpufreq_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct cppc_cpudata *cpu_data = policy->driver_data; + unsigned int cpu = policy->cpu; + u32 desired_perf; + int ret; + + desired_perf = cppc_cpufreq_khz_to_perf(cpu_data, target_freq); + cpu_data->perf_ctrls.desired_perf = desired_perf; + ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); + + if (ret) { + pr_debug("Failed to set target on CPU:%d. ret:%d\n", + cpu, ret); + return 0; + } + + return target_freq; +} + static int cppc_verify_policy(struct cpufreq_policy_data *policy) { cpufreq_verify_within_cpu_limits(policy); @@ -420,12 +441,197 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) return cppc_get_transition_latency(cpu) / NSEC_PER_USEC; } +static DEFINE_PER_CPU(unsigned int, efficiency_class); +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy); + +/* Create an artificial performance state every CPPC_EM_CAP_STEP capacity unit. */ +#define CPPC_EM_CAP_STEP (20) +/* Increase the cost value by CPPC_EM_COST_STEP every performance state. */ +#define CPPC_EM_COST_STEP (1) +/* Add a cost gap correspnding to the energy of 4 CPUs. */ +#define CPPC_EM_COST_GAP (4 * SCHED_CAPACITY_SCALE * CPPC_EM_COST_STEP \ + / CPPC_EM_CAP_STEP) + +static unsigned int get_perf_level_count(struct cpufreq_policy *policy) +{ + struct cppc_perf_caps *perf_caps; + unsigned int min_cap, max_cap; + struct cppc_cpudata *cpu_data; + int cpu = policy->cpu; + + cpu_data = policy->driver_data; + perf_caps = &cpu_data->perf_caps; + max_cap = arch_scale_cpu_capacity(cpu); + min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf); + if ((min_cap == 0) || (max_cap < min_cap)) + return 0; + return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP; +} + +/* + * The cost is defined as: + * cost = power * max_frequency / frequency + */ +static inline unsigned long compute_cost(int cpu, int step) +{ + return CPPC_EM_COST_GAP * per_cpu(efficiency_class, cpu) + + step * CPPC_EM_COST_STEP; +} + +static int cppc_get_cpu_power(struct device *cpu_dev, + unsigned long *power, unsigned long *KHz) +{ + unsigned long perf_step, perf_prev, perf, perf_check; + unsigned int min_step, max_step, step, step_check; + unsigned long prev_freq = *KHz; + unsigned int min_cap, max_cap; + struct cpufreq_policy *policy; + + struct cppc_perf_caps *perf_caps; + struct cppc_cpudata *cpu_data; + + policy = cpufreq_cpu_get_raw(cpu_dev->id); + cpu_data = policy->driver_data; + perf_caps = &cpu_data->perf_caps; + max_cap = arch_scale_cpu_capacity(cpu_dev->id); + min_cap = div_u64(max_cap * perf_caps->lowest_perf, + perf_caps->highest_perf); + + perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap; + min_step = min_cap / CPPC_EM_CAP_STEP; + max_step = max_cap / CPPC_EM_CAP_STEP; + + perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); + step = perf_prev / perf_step; + + if (step > max_step) + return -EINVAL; + + if (min_step == max_step) { + step = max_step; + perf = perf_caps->highest_perf; + } else if (step < min_step) { + step = min_step; + perf = perf_caps->lowest_perf; + } else { + step++; + if (step == max_step) + perf = perf_caps->highest_perf; + else + perf = step * perf_step; + } + + *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf); + perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); + step_check = perf_check / perf_step; + + /* + * To avoid bad integer approximation, check that new frequency value + * increased and that the new frequency will be converted to the + * desired step value. + */ + while ((*KHz == prev_freq) || (step_check != step)) { + perf++; + *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf); + perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); + step_check = perf_check / perf_step; + } + + /* + * With an artificial EM, only the cost value is used. Still the power + * is populated such as 0 < power < EM_MAX_POWER. This allows to add + * more sense to the artificial performance states. + */ + *power = compute_cost(cpu_dev->id, step); + + return 0; +} + +static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz, + unsigned long *cost) +{ + unsigned long perf_step, perf_prev; + struct cppc_perf_caps *perf_caps; + struct cpufreq_policy *policy; + struct cppc_cpudata *cpu_data; + unsigned int max_cap; + int step; + + policy = cpufreq_cpu_get_raw(cpu_dev->id); + cpu_data = policy->driver_data; + perf_caps = &cpu_data->perf_caps; + max_cap = arch_scale_cpu_capacity(cpu_dev->id); + + perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz); + perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap; + step = perf_prev / perf_step; + + *cost = compute_cost(cpu_dev->id, step); + + return 0; +} + +static int populate_efficiency_class(void) +{ + struct acpi_madt_generic_interrupt *gicc; + DECLARE_BITMAP(used_classes, 256) = {}; + int class, cpu, index; + + for_each_possible_cpu(cpu) { + gicc = acpi_cpu_get_madt_gicc(cpu); + class = gicc->efficiency_class; + bitmap_set(used_classes, class, 1); + } + + if (bitmap_weight(used_classes, 256) <= 1) { + pr_debug("Efficiency classes are all equal (=%d). " + "No EM registered", class); + return -EINVAL; + } + + /* + * Squeeze efficiency class values on [0:#efficiency_class-1]. + * Values are per spec in [0:255]. + */ + index = 0; + for_each_set_bit(class, used_classes, 256) { + for_each_possible_cpu(cpu) { + gicc = acpi_cpu_get_madt_gicc(cpu); + if (gicc->efficiency_class == class) + per_cpu(efficiency_class, cpu) = index; + } + index++; + } + cppc_cpufreq_driver.register_em = cppc_cpufreq_register_em; + + return 0; +} + +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy) +{ + struct cppc_cpudata *cpu_data; + struct em_data_callback em_cb = + EM_ADV_DATA_CB(cppc_get_cpu_power, cppc_get_cpu_cost); + + cpu_data = policy->driver_data; + em_dev_register_perf_domain(get_cpu_device(policy->cpu), + get_perf_level_count(policy), &em_cb, + cpu_data->shared_cpu_map, 0); +} + #else static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) { return cppc_get_transition_latency(cpu) / NSEC_PER_USEC; } +static int populate_efficiency_class(void) +{ + return 0; +} +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy) +{ +} #endif @@ -536,6 +742,9 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) goto out; } + policy->fast_switch_possible = cppc_allow_fast_switch(); + policy->dvfs_possible_from_any_cpu = true; + /* * If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost * is supported. @@ -681,6 +890,7 @@ static struct cpufreq_driver cppc_cpufreq_driver = { .verify = cppc_verify_policy, .target = cppc_cpufreq_set_target, .get = cppc_cpufreq_get_rate, + .fast_switch = cppc_cpufreq_fast_switch, .init = cppc_cpufreq_cpu_init, .exit = cppc_cpufreq_cpu_exit, .set_boost = cppc_cpufreq_set_boost, @@ -742,6 +952,7 @@ static int __init cppc_cpufreq_init(void) cppc_check_hisi_workaround(); cppc_freq_invariance_init(); + populate_efficiency_class(); ret = cpufreq_register_driver(&cppc_cpufreq_driver); if (ret) |