From df24014abe3694e7c34ce5e50248611b7a93fe83 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 29 Apr 2019 15:03:58 +0530 Subject: [PATCH 1/6] cpufreq: Call transition notifier only once for each policy Currently, the notifiers are called once for each CPU of the policy->cpus cpumask. It would be more optimal if the notifier can be called only once and all the relevant information be provided to it. Out of the 23 drivers that register for the transition notifiers today, only 4 of them do per-cpu updates and the callback for the rest can be called only once for the policy without any impact. This would also avoid multiple function calls to the notifier callbacks and reduce multiple iterations of notifier core's code (which does locking as well). This patch adds pointer to the cpufreq policy to the struct cpufreq_freqs, so the notifier callback has all the information available to it with a single call. The five drivers which perform per-cpu updates are updated to use the cpufreq policy. The freqs->cpu field is redundant now and is removed. Acked-by: David S. Miller (sparc) Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- arch/arm/kernel/smp.c | 24 +++++++++++++++--------- arch/sparc/kernel/time_64.c | 28 ++++++++++++++++------------ arch/x86/kernel/tsc.c | 2 +- arch/x86/kvm/x86.c | 31 ++++++++++++++++++++----------- drivers/cpufreq/cpufreq.c | 19 ++++++++++--------- include/linux/cpufreq.h | 14 +++++++------- 6 files changed, 69 insertions(+), 49 deletions(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index facd4240ca02..c6d37563610a 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -754,15 +754,20 @@ static int cpufreq_callback(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; - int cpu = freq->cpu; + struct cpumask *cpus = freq->policy->cpus; + int cpu, first = cpumask_first(cpus); + unsigned int lpj; if (freq->flags & CPUFREQ_CONST_LOOPS) return NOTIFY_OK; - if (!per_cpu(l_p_j_ref, cpu)) { - per_cpu(l_p_j_ref, cpu) = - per_cpu(cpu_data, cpu).loops_per_jiffy; - per_cpu(l_p_j_ref_freq, cpu) = freq->old; + if (!per_cpu(l_p_j_ref, first)) { + for_each_cpu(cpu, cpus) { + per_cpu(l_p_j_ref, cpu) = + per_cpu(cpu_data, cpu).loops_per_jiffy; + per_cpu(l_p_j_ref_freq, cpu) = freq->old; + } + if (!global_l_p_j_ref) { global_l_p_j_ref = loops_per_jiffy; global_l_p_j_ref_freq = freq->old; @@ -774,10 +779,11 @@ static int cpufreq_callback(struct notifier_block *nb, loops_per_jiffy = cpufreq_scale(global_l_p_j_ref, global_l_p_j_ref_freq, freq->new); - per_cpu(cpu_data, cpu).loops_per_jiffy = - cpufreq_scale(per_cpu(l_p_j_ref, cpu), - per_cpu(l_p_j_ref_freq, cpu), - freq->new); + + lpj = cpufreq_scale(per_cpu(l_p_j_ref, first), + per_cpu(l_p_j_ref_freq, first), freq->new); + for_each_cpu(cpu, cpus) + per_cpu(cpu_data, cpu).loops_per_jiffy = lpj; } return NOTIFY_OK; } diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 3eb77943ce12..89fb05f90609 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -653,19 +653,23 @@ static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val void *data) { struct cpufreq_freqs *freq = data; - unsigned int cpu = freq->cpu; - struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu); + unsigned int cpu; + struct freq_table *ft; - if (!ft->ref_freq) { - ft->ref_freq = freq->old; - ft->clock_tick_ref = cpu_data(cpu).clock_tick; - } - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { - cpu_data(cpu).clock_tick = - cpufreq_scale(ft->clock_tick_ref, - ft->ref_freq, - freq->new); + for_each_cpu(cpu, freq->policy->cpus) { + ft = &per_cpu(sparc64_freq_table, cpu); + + if (!ft->ref_freq) { + ft->ref_freq = freq->old; + ft->clock_tick_ref = cpu_data(cpu).clock_tick; + } + + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { + cpu_data(cpu).clock_tick = + cpufreq_scale(ft->clock_tick_ref, ft->ref_freq, + freq->new); + } } return 0; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 15b5e98a86f9..356dfc555a27 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -979,7 +979,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, if (!(freq->flags & CPUFREQ_CONST_LOOPS)) mark_tsc_unstable("cpufreq changes"); - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); + set_cyc2ns_scale(tsc_khz, freq->policy->cpu, rdtsc()); } return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b5edc8e3ce1d..22cc90baa67f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6698,10 +6698,8 @@ static void kvm_hyperv_tsc_notifier(void) } #endif -static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) +static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu) { - struct cpufreq_freqs *freq = data; struct kvm *kvm; struct kvm_vcpu *vcpu; int i, send_ipi = 0; @@ -6745,17 +6743,12 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va * */ - if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) - return 0; - if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) - return 0; - - smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); + smp_call_function_single(cpu, tsc_khz_changed, freq, 1); spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { - if (vcpu->cpu != freq->cpu) + if (vcpu->cpu != cpu) continue; kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); if (vcpu->cpu != smp_processor_id()) @@ -6777,8 +6770,24 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va * guest context is entered kvmclock will be updated, * so the guest will not see stale values. */ - smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); + smp_call_function_single(cpu, tsc_khz_changed, freq, 1); } +} + +static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + int cpu; + + if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) + return 0; + if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) + return 0; + + for_each_cpu(cpu, freq->policy->cpus) + __kvmclock_cpufreq_notifier(freq, cpu); + return 0; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7ea217c88c2e..c66eefe928c5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -340,11 +340,14 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs, unsigned int state) { + int cpu; + BUG_ON(irqs_disabled()); if (cpufreq_disabled()) return; + freqs->policy = policy; freqs->flags = cpufreq_driver->flags; pr_debug("notification %u of frequency transition to %u kHz\n", state, freqs->new); @@ -364,10 +367,8 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, } } - for_each_cpu(freqs->cpu, policy->cpus) { - srcu_notifier_call_chain(&cpufreq_transition_notifier_list, - CPUFREQ_PRECHANGE, freqs); - } + srcu_notifier_call_chain(&cpufreq_transition_notifier_list, + CPUFREQ_PRECHANGE, freqs); adjust_jiffies(CPUFREQ_PRECHANGE, freqs); break; @@ -377,11 +378,11 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, pr_debug("FREQ: %u - CPUs: %*pbl\n", freqs->new, cpumask_pr_args(policy->cpus)); - for_each_cpu(freqs->cpu, policy->cpus) { - trace_cpu_frequency(freqs->new, freqs->cpu); - srcu_notifier_call_chain(&cpufreq_transition_notifier_list, - CPUFREQ_POSTCHANGE, freqs); - } + for_each_cpu(cpu, policy->cpus) + trace_cpu_frequency(freqs->new, cpu); + + srcu_notifier_call_chain(&cpufreq_transition_notifier_list, + CPUFREQ_POSTCHANGE, freqs); cpufreq_stats_record_transition(policy, freqs->new); policy->cur = freqs->new; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 684caf067003..d01a74fbc4db 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -42,13 +42,6 @@ enum cpufreq_table_sorting { CPUFREQ_TABLE_SORTED_DESCENDING }; -struct cpufreq_freqs { - unsigned int cpu; /* cpu nr */ - unsigned int old; - unsigned int new; - u8 flags; /* flags of cpufreq_driver, see below. */ -}; - struct cpufreq_cpuinfo { unsigned int max_freq; unsigned int min_freq; @@ -156,6 +149,13 @@ struct cpufreq_policy { struct thermal_cooling_device *cdev; }; +struct cpufreq_freqs { + struct cpufreq_policy *policy; + unsigned int old; + unsigned int new; + u8 flags; /* flags of cpufreq_driver, see below. */ +}; + /* Only for ACPI */ #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */ #define CPUFREQ_SHARED_TYPE_HW (1) /* HW does needed coordination */ From 2acb9bdae92d0662cd4fa11c481e80b6202b5630 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 May 2019 12:35:53 +0200 Subject: [PATCH 2/6] cpufreq: Explain the kobject_put() in cpufreq_policy_alloc() It may not be particularly clear why the kobject_put() after failing kobject_init_and_add() in cpufreq_policy_alloc() is not redundant, so add a comment to explain that. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c66eefe928c5..57a3853a55fb 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1134,6 +1134,11 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) cpufreq_global_kobject, "policy%u", cpu); if (ret) { pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); + /* + * The entire policy object will be freed below, but the extra + * memory allocated for the kobject name needs to be freed by + * releasing the kobject. + */ kobject_put(&policy->kobj); goto err_free_real_cpus; } From ab05d97a3785088fa2964219227a1ba95d97d590 Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Mon, 29 Apr 2019 15:24:18 +0800 Subject: [PATCH 3/6] cpufreq: Don't find governor for setpolicy drivers in cpufreq_init_policy() In cpufreq_init_policy() we will check if there's last_governor for target and setpolicy type. However last_governor is set only if has_target() is true in cpufreq_offline(). That means find last_governor for setpolicy type is pointless. Also new_policy.governor will not be used if ->setpolicy callback is set in cpufreq_set_policy(). Moreover, there's duplicate ->setpolicy check in using default policy path. Let's add a new helper function to avoid it. Also update comments. Signed-off-by: Yue Hu Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 118 +++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 52 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 57a3853a55fb..f941efd9b836 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -619,50 +619,52 @@ static struct cpufreq_governor *find_governor(const char *str_governor) return NULL; } +static int cpufreq_parse_policy(char *str_governor, + struct cpufreq_policy *policy) +{ + if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { + policy->policy = CPUFREQ_POLICY_PERFORMANCE; + return 0; + } + if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) { + policy->policy = CPUFREQ_POLICY_POWERSAVE; + return 0; + } + return -EINVAL; +} + /** - * cpufreq_parse_governor - parse a governor string + * cpufreq_parse_governor - parse a governor string only for !setpolicy */ static int cpufreq_parse_governor(char *str_governor, struct cpufreq_policy *policy) { - if (cpufreq_driver->setpolicy) { - if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { - policy->policy = CPUFREQ_POLICY_PERFORMANCE; - return 0; - } + struct cpufreq_governor *t; - if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) { - policy->policy = CPUFREQ_POLICY_POWERSAVE; - return 0; - } - } else { - struct cpufreq_governor *t; + mutex_lock(&cpufreq_governor_mutex); + + t = find_governor(str_governor); + if (!t) { + int ret; + + mutex_unlock(&cpufreq_governor_mutex); + + ret = request_module("cpufreq_%s", str_governor); + if (ret) + return -EINVAL; mutex_lock(&cpufreq_governor_mutex); t = find_governor(str_governor); - if (!t) { - int ret; + } + if (t && !try_module_get(t->owner)) + t = NULL; - mutex_unlock(&cpufreq_governor_mutex); + mutex_unlock(&cpufreq_governor_mutex); - ret = request_module("cpufreq_%s", str_governor); - if (ret) - return -EINVAL; - - mutex_lock(&cpufreq_governor_mutex); - - t = find_governor(str_governor); - } - if (t && !try_module_get(t->owner)) - t = NULL; - - mutex_unlock(&cpufreq_governor_mutex); - - if (t) { - policy->governor = t; - return 0; - } + if (t) { + policy->governor = t; + return 0; } return -EINVAL; @@ -784,8 +786,13 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, if (ret != 1) return -EINVAL; - if (cpufreq_parse_governor(str_governor, &new_policy)) - return -EINVAL; + if (cpufreq_driver->setpolicy) { + if (cpufreq_parse_policy(str_governor, &new_policy)) + return -EINVAL; + } else { + if (cpufreq_parse_governor(str_governor, &new_policy)) + return -EINVAL; + } ret = cpufreq_set_policy(policy, &new_policy); @@ -1051,32 +1058,39 @@ __weak struct cpufreq_governor *cpufreq_default_governor(void) static int cpufreq_init_policy(struct cpufreq_policy *policy) { - struct cpufreq_governor *gov = NULL; + struct cpufreq_governor *gov = NULL, *def_gov = NULL; struct cpufreq_policy new_policy; memcpy(&new_policy, policy, sizeof(*policy)); - /* Update governor of new_policy to the governor used before hotplug */ - gov = find_governor(policy->last_governor); - if (gov) { - pr_debug("Restoring governor %s for cpu %d\n", + def_gov = cpufreq_default_governor(); + + if (has_target()) { + /* + * Update governor of new_policy to the governor used before + * hotplug + */ + gov = find_governor(policy->last_governor); + if (gov) { + pr_debug("Restoring governor %s for cpu %d\n", policy->governor->name, policy->cpu); + } else { + if (!def_gov) + return -ENODATA; + gov = def_gov; + } + new_policy.governor = gov; } else { - gov = cpufreq_default_governor(); - if (!gov) - return -ENODATA; - } - - new_policy.governor = gov; - - /* Use the default policy if there is no last_policy. */ - if (cpufreq_driver->setpolicy) { - if (policy->last_policy) + /* Use the default policy if there is no last_policy. */ + if (policy->last_policy) { new_policy.policy = policy->last_policy; - else - cpufreq_parse_governor(gov->name, &new_policy); + } else { + if (!def_gov) + return -ENODATA; + cpufreq_parse_policy(def_gov->name, &new_policy); + } } - /* set default policy */ + return cpufreq_set_policy(policy, &new_policy); } From 32345f45379b9accb035bbad6d6a253724329ea4 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 13 May 2019 12:40:33 +0530 Subject: [PATCH 4/6] cpufreq: Update MAINTAINERS to include schedutil governor List cpufreq maintainers as well for schedutil governor apart from scheduler maintainers. Signed-off-by: Viresh Kumar Acked-by: Ingo Molnar Signed-off-by: Rafael J. Wysocki --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index bd40a852207b..8d67bc628aaf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4073,7 +4073,9 @@ F: Documentation/admin-guide/pm/intel_pstate.rst F: Documentation/cpu-freq/ F: Documentation/devicetree/bindings/cpufreq/ F: drivers/cpufreq/ +F: kernel/sched/cpufreq*.c F: include/linux/cpufreq.h +F: include/linux/sched/cpufreq.h F: tools/testing/selftests/cpufreq/ CPU FREQUENCY DRIVERS - ARM BIG LITTLE From ed61e18a4b4e445ae8622a4faec6a259601cc4a5 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 30 Apr 2019 15:06:11 +0000 Subject: [PATCH 5/6] PM / Domains: Add GENPD_FLAG_RPM_ALWAYS_ON flag This is for power domains which can only be powered off for suspend but not as part of runtime PM. Suggested-by: Ulf Hansson Signed-off-by: Leonard Crestez Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 8 ++++++-- include/linux/pm_domain.h | 4 ++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 3d899e8abd58..a887575fafa9 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -130,6 +130,7 @@ static const struct genpd_lock_ops genpd_spin_ops = { #define genpd_is_always_on(genpd) (genpd->flags & GENPD_FLAG_ALWAYS_ON) #define genpd_is_active_wakeup(genpd) (genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP) #define genpd_is_cpu_domain(genpd) (genpd->flags & GENPD_FLAG_CPU_DOMAIN) +#define genpd_is_rpm_always_on(genpd) (genpd->flags & GENPD_FLAG_RPM_ALWAYS_ON) static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev, const struct generic_pm_domain *genpd) @@ -517,7 +518,9 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, * (1) The domain is configured as always on. * (2) When the domain has a subdomain being powered on. */ - if (genpd_is_always_on(genpd) || atomic_read(&genpd->sd_count) > 0) + if (genpd_is_always_on(genpd) || + genpd_is_rpm_always_on(genpd) || + atomic_read(&genpd->sd_count) > 0) return -EBUSY; list_for_each_entry(pdd, &genpd->dev_list, list_node) { @@ -1814,7 +1817,8 @@ int pm_genpd_init(struct generic_pm_domain *genpd, } /* Always-on domains must be powered on at initialization. */ - if (genpd_is_always_on(genpd) && !genpd_status_on(genpd)) + if ((genpd_is_always_on(genpd) || genpd_is_rpm_always_on(genpd)) && + !genpd_status_on(genpd)) return -EINVAL; if (genpd_is_cpu_domain(genpd) && diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 0e8e356bed6a..b21f35f0ee2e 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -53,12 +53,16 @@ * driver must then comply with the so called, * last-man-standing algorithm, for the CPUs in the * PM domain. + * + * GENPD_FLAG_RPM_ALWAYS_ON: Instructs genpd to always keep the PM domain + * powered on except for system suspend. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) #define GENPD_FLAG_ALWAYS_ON (1U << 2) #define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3) #define GENPD_FLAG_CPU_DOMAIN (1U << 4) +#define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5) enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ From 8d5d766f00726efd0abd2e7835b0e538fe4ad6e4 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 30 Apr 2019 15:06:12 +0000 Subject: [PATCH 6/6] soc: imx: gpc: Use GENPD_FLAG_RPM_ALWAYS_ON for ERR009619 This allows PU domain to be turned off in suspend and save power. Signed-off-by: Leonard Crestez Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/soc/imx/gpc.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c index 7d14a4b4e82a..29b43651c261 100644 --- a/drivers/soc/imx/gpc.c +++ b/drivers/soc/imx/gpc.c @@ -431,10 +431,19 @@ static int imx_gpc_probe(struct platform_device *pdev) return ret; } - /* Disable PU power down in normal operation if ERR009619 is present */ + /* + * Disable PU power down by runtime PM if ERR009619 is present. + * + * The PRE clock will be paused for several cycles when turning on the + * PU domain LDO from power down state. If PRE is in use at that time, + * the IPU/PRG cannot get the correct display data from the PRE. + * + * This is not a concern when the whole system enters suspend state, so + * it's safe to power down PU in this case. + */ if (of_id_data->err009619_present) imx_gpc_domains[GPC_PGC_DOMAIN_PU].base.flags |= - GENPD_FLAG_ALWAYS_ON; + GENPD_FLAG_RPM_ALWAYS_ON; /* Keep DISP always on if ERR006287 is present */ if (of_id_data->err006287_present)