Commit 6ef746769ef5cfef84cdfdf61ecbab5a6aa4651a
Merge tag 'pm-4.20-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull more power management updates from Rafael Wysocki: "These remove a questionable heuristic from the menu cpuidle governor, fix a recent build regression in the intel_pstate driver, clean up ARM big-Little support in cpufreq and fix up hung task watchdog's interaction with system-wide power management transitions. Specifics: - Fix build regression in the intel_pstate driver that doesn't build without CONFIG_ACPI after recent changes (Dominik Brodowski). - One of the heuristics in the menu cpuidle governor is based on a function returning 0 most of the time, so drop it and clean up the scheduler code related to it (Daniel Lezcano). - Prevent the arm_big_little cpufreq driver from being used on ARM64 which is not suitable for it and drop the arm_big_little_dt driver that is not used any more (Sudeep Holla). - Prevent the hung task watchdog from triggering during resume from system-wide sleep states by disabling it before freezing tasks and enabling it again after they have been thawed (Vitaly Kuznetsov)" * tag 'pm-4.20-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: kernel: hung_task.c: disable on suspend cpufreq: remove unused arm_big_little_dt driver cpufreq: drop ARM_BIG_LITTLE_CPUFREQ support for ARM64 cpufreq: intel_pstate: Fix compilation for !CONFIG_ACPI cpuidle: menu: Remove get_loadavg() from the performance multiplier sched: Factor out nr_iowait and nr_iowait_cpu
Showing 9 changed files Side-by-side Diff
MAINTAINERS
drivers/cpufreq/Kconfig.arm
... | ... | @@ -28,19 +28,12 @@ |
28 | 28 | # big LITTLE core layer and glue drivers |
29 | 29 | config ARM_BIG_LITTLE_CPUFREQ |
30 | 30 | tristate "Generic ARM big LITTLE CPUfreq driver" |
31 | - depends on (ARM_CPU_TOPOLOGY || ARM64) && HAVE_CLK | |
31 | + depends on ARM_CPU_TOPOLOGY && HAVE_CLK | |
32 | 32 | # if CPU_THERMAL is on and THERMAL=m, ARM_BIT_LITTLE_CPUFREQ cannot be =y |
33 | 33 | depends on !CPU_THERMAL || THERMAL |
34 | 34 | select PM_OPP |
35 | 35 | help |
36 | 36 | This enables the Generic CPUfreq driver for ARM big.LITTLE platforms. |
37 | - | |
38 | -config ARM_DT_BL_CPUFREQ | |
39 | - tristate "Generic probing via DT for ARM big LITTLE CPUfreq driver" | |
40 | - depends on ARM_BIG_LITTLE_CPUFREQ && OF | |
41 | - help | |
42 | - This enables probing via DT for Generic CPUfreq driver for ARM | |
43 | - big.LITTLE platform. This gets frequency tables from DT. | |
44 | 37 | |
45 | 38 | config ARM_SCPI_CPUFREQ |
46 | 39 | tristate "SCPI based CPUfreq driver" |
drivers/cpufreq/Makefile
... | ... | @@ -48,9 +48,6 @@ |
48 | 48 | ################################################################################## |
49 | 49 | # ARM SoC drivers |
50 | 50 | obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ) += arm_big_little.o |
51 | -# big LITTLE per platform glues. Keep DT_BL_CPUFREQ as the last entry in all big | |
52 | -# LITTLE drivers, so that it is probed last. | |
53 | -obj-$(CONFIG_ARM_DT_BL_CPUFREQ) += arm_big_little_dt.o | |
54 | 51 | |
55 | 52 | obj-$(CONFIG_ARM_ARMADA_37XX_CPUFREQ) += armada-37xx-cpufreq.o |
56 | 53 | obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ) += brcmstb-avs-cpufreq.o |
drivers/cpufreq/arm_big_little_dt.c
1 | -/* | |
2 | - * Generic big.LITTLE CPUFreq Interface driver | |
3 | - * | |
4 | - * It provides necessary ops to arm_big_little cpufreq driver and gets | |
5 | - * Frequency information from Device Tree. Freq table in DT must be in KHz. | |
6 | - * | |
7 | - * Copyright (C) 2013 Linaro. | |
8 | - * Viresh Kumar <viresh.kumar@linaro.org> | |
9 | - * | |
10 | - * This program is free software; you can redistribute it and/or modify | |
11 | - * it under the terms of the GNU General Public License version 2 as | |
12 | - * published by the Free Software Foundation. | |
13 | - * | |
14 | - * This program is distributed "as is" WITHOUT ANY WARRANTY of any | |
15 | - * kind, whether express or implied; without even the implied warranty | |
16 | - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | - * GNU General Public License for more details. | |
18 | - */ | |
19 | - | |
20 | -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
21 | - | |
22 | -#include <linux/cpufreq.h> | |
23 | -#include <linux/device.h> | |
24 | -#include <linux/export.h> | |
25 | -#include <linux/module.h> | |
26 | -#include <linux/of_device.h> | |
27 | -#include <linux/pm_opp.h> | |
28 | -#include <linux/platform_device.h> | |
29 | -#include <linux/slab.h> | |
30 | -#include <linux/types.h> | |
31 | -#include "arm_big_little.h" | |
32 | - | |
33 | -/* get cpu node with valid operating-points */ | |
34 | -static struct device_node *get_cpu_node_with_valid_op(int cpu) | |
35 | -{ | |
36 | - struct device_node *np = of_cpu_device_node_get(cpu); | |
37 | - | |
38 | - if (!of_get_property(np, "operating-points", NULL)) { | |
39 | - of_node_put(np); | |
40 | - np = NULL; | |
41 | - } | |
42 | - | |
43 | - return np; | |
44 | -} | |
45 | - | |
46 | -static int dt_get_transition_latency(struct device *cpu_dev) | |
47 | -{ | |
48 | - struct device_node *np; | |
49 | - u32 transition_latency = CPUFREQ_ETERNAL; | |
50 | - | |
51 | - np = of_node_get(cpu_dev->of_node); | |
52 | - if (!np) { | |
53 | - pr_info("Failed to find cpu node. Use CPUFREQ_ETERNAL transition latency\n"); | |
54 | - return CPUFREQ_ETERNAL; | |
55 | - } | |
56 | - | |
57 | - of_property_read_u32(np, "clock-latency", &transition_latency); | |
58 | - of_node_put(np); | |
59 | - | |
60 | - pr_debug("%s: clock-latency: %d\n", __func__, transition_latency); | |
61 | - return transition_latency; | |
62 | -} | |
63 | - | |
64 | -static const struct cpufreq_arm_bL_ops dt_bL_ops = { | |
65 | - .name = "dt-bl", | |
66 | - .get_transition_latency = dt_get_transition_latency, | |
67 | - .init_opp_table = dev_pm_opp_of_cpumask_add_table, | |
68 | - .free_opp_table = dev_pm_opp_of_cpumask_remove_table, | |
69 | -}; | |
70 | - | |
71 | -static int generic_bL_probe(struct platform_device *pdev) | |
72 | -{ | |
73 | - struct device_node *np; | |
74 | - | |
75 | - np = get_cpu_node_with_valid_op(0); | |
76 | - if (!np) | |
77 | - return -ENODEV; | |
78 | - | |
79 | - of_node_put(np); | |
80 | - return bL_cpufreq_register(&dt_bL_ops); | |
81 | -} | |
82 | - | |
83 | -static int generic_bL_remove(struct platform_device *pdev) | |
84 | -{ | |
85 | - bL_cpufreq_unregister(&dt_bL_ops); | |
86 | - return 0; | |
87 | -} | |
88 | - | |
89 | -static struct platform_driver generic_bL_platdrv = { | |
90 | - .driver = { | |
91 | - .name = "arm-bL-cpufreq-dt", | |
92 | - }, | |
93 | - .probe = generic_bL_probe, | |
94 | - .remove = generic_bL_remove, | |
95 | -}; | |
96 | -module_platform_driver(generic_bL_platdrv); | |
97 | - | |
98 | -MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.org>"); | |
99 | -MODULE_DESCRIPTION("Generic ARM big LITTLE cpufreq driver via DT"); | |
100 | -MODULE_LICENSE("GPL v2"); |
drivers/cpufreq/intel_pstate.c
... | ... | @@ -386,17 +386,12 @@ |
386 | 386 | return cppc_perf.guaranteed_perf; |
387 | 387 | } |
388 | 388 | |
389 | -#else | |
389 | +#else /* CONFIG_ACPI_CPPC_LIB */ | |
390 | 390 | static void intel_pstate_set_itmt_prio(int cpu) |
391 | 391 | { |
392 | 392 | } |
393 | +#endif /* CONFIG_ACPI_CPPC_LIB */ | |
393 | 394 | |
394 | -static int intel_pstate_get_cppc_guranteed(int cpu) | |
395 | -{ | |
396 | - return -ENOTSUPP; | |
397 | -} | |
398 | -#endif | |
399 | - | |
400 | 395 | static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) |
401 | 396 | { |
402 | 397 | struct cpudata *cpu; |
... | ... | @@ -477,7 +472,7 @@ |
477 | 472 | |
478 | 473 | acpi_processor_unregister_performance(policy->cpu); |
479 | 474 | } |
480 | -#else | |
475 | +#else /* CONFIG_ACPI */ | |
481 | 476 | static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) |
482 | 477 | { |
483 | 478 | } |
... | ... | @@ -490,7 +485,14 @@ |
490 | 485 | { |
491 | 486 | return false; |
492 | 487 | } |
493 | -#endif | |
488 | +#endif /* CONFIG_ACPI */ | |
489 | + | |
490 | +#ifndef CONFIG_ACPI_CPPC_LIB | |
491 | +static int intel_pstate_get_cppc_guranteed(int cpu) | |
492 | +{ | |
493 | + return -ENOTSUPP; | |
494 | +} | |
495 | +#endif /* CONFIG_ACPI_CPPC_LIB */ | |
494 | 496 | |
495 | 497 | static inline void update_turbo_state(void) |
496 | 498 | { |
drivers/cpuidle/governors/menu.c
... | ... | @@ -130,11 +130,6 @@ |
130 | 130 | int interval_ptr; |
131 | 131 | }; |
132 | 132 | |
133 | -static inline int get_loadavg(unsigned long load) | |
134 | -{ | |
135 | - return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10; | |
136 | -} | |
137 | - | |
138 | 133 | static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters) |
139 | 134 | { |
140 | 135 | int bucket = 0; |
141 | 136 | |
... | ... | @@ -168,18 +163,10 @@ |
168 | 163 | * to be, the higher this multiplier, and thus the higher |
169 | 164 | * the barrier to go to an expensive C state. |
170 | 165 | */ |
171 | -static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned long load) | |
166 | +static inline int performance_multiplier(unsigned long nr_iowaiters) | |
172 | 167 | { |
173 | - int mult = 1; | |
174 | - | |
175 | - /* for higher loadavg, we are more reluctant */ | |
176 | - | |
177 | - mult += 2 * get_loadavg(load); | |
178 | - | |
179 | - /* for IO wait tasks (per cpu!) we add 5x each */ | |
180 | - mult += 10 * nr_iowaiters; | |
181 | - | |
182 | - return mult; | |
168 | + /* for IO wait tasks (per cpu!) we add 10x each */ | |
169 | + return 1 + 10 * nr_iowaiters; | |
183 | 170 | } |
184 | 171 | |
185 | 172 | static DEFINE_PER_CPU(struct menu_device, menu_devices); |
... | ... | @@ -297,7 +284,7 @@ |
297 | 284 | int idx; |
298 | 285 | unsigned int interactivity_req; |
299 | 286 | unsigned int predicted_us; |
300 | - unsigned long nr_iowaiters, cpu_load; | |
287 | + unsigned long nr_iowaiters; | |
301 | 288 | ktime_t delta_next; |
302 | 289 | |
303 | 290 | if (data->needs_update) { |
... | ... | @@ -308,7 +295,7 @@ |
308 | 295 | /* determine the expected residency time, round up */ |
309 | 296 | data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next)); |
310 | 297 | |
311 | - get_iowait_load(&nr_iowaiters, &cpu_load); | |
298 | + nr_iowaiters = nr_iowait_cpu(dev->cpu); | |
312 | 299 | data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); |
313 | 300 | |
314 | 301 | if (unlikely(drv->state_count <= 1 || latency_req == 0) || |
... | ... | @@ -352,7 +339,7 @@ |
352 | 339 | * Use the performance multiplier and the user-configurable |
353 | 340 | * latency_req to determine the maximum exit latency. |
354 | 341 | */ |
355 | - interactivity_req = predicted_us / performance_multiplier(nr_iowaiters, cpu_load); | |
342 | + interactivity_req = predicted_us / performance_multiplier(nr_iowaiters); | |
356 | 343 | if (latency_req > interactivity_req) |
357 | 344 | latency_req = interactivity_req; |
358 | 345 | } |
include/linux/sched/stat.h
... | ... | @@ -20,7 +20,6 @@ |
20 | 20 | extern bool single_task_running(void); |
21 | 21 | extern unsigned long nr_iowait(void); |
22 | 22 | extern unsigned long nr_iowait_cpu(int cpu); |
23 | -extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); | |
24 | 23 | |
25 | 24 | static inline int sched_info_on(void) |
26 | 25 | { |
kernel/hung_task.c
... | ... | @@ -15,6 +15,7 @@ |
15 | 15 | #include <linux/lockdep.h> |
16 | 16 | #include <linux/export.h> |
17 | 17 | #include <linux/sysctl.h> |
18 | +#include <linux/suspend.h> | |
18 | 19 | #include <linux/utsname.h> |
19 | 20 | #include <linux/sched/signal.h> |
20 | 21 | #include <linux/sched/debug.h> |
... | ... | @@ -242,6 +243,28 @@ |
242 | 243 | } |
243 | 244 | EXPORT_SYMBOL_GPL(reset_hung_task_detector); |
244 | 245 | |
246 | +static bool hung_detector_suspended; | |
247 | + | |
248 | +static int hungtask_pm_notify(struct notifier_block *self, | |
249 | + unsigned long action, void *hcpu) | |
250 | +{ | |
251 | + switch (action) { | |
252 | + case PM_SUSPEND_PREPARE: | |
253 | + case PM_HIBERNATION_PREPARE: | |
254 | + case PM_RESTORE_PREPARE: | |
255 | + hung_detector_suspended = true; | |
256 | + break; | |
257 | + case PM_POST_SUSPEND: | |
258 | + case PM_POST_HIBERNATION: | |
259 | + case PM_POST_RESTORE: | |
260 | + hung_detector_suspended = false; | |
261 | + break; | |
262 | + default: | |
263 | + break; | |
264 | + } | |
265 | + return NOTIFY_OK; | |
266 | +} | |
267 | + | |
245 | 268 | /* |
246 | 269 | * kthread which checks for tasks stuck in D state |
247 | 270 | */ |
... | ... | @@ -261,7 +284,8 @@ |
261 | 284 | interval = min_t(unsigned long, interval, timeout); |
262 | 285 | t = hung_timeout_jiffies(hung_last_checked, interval); |
263 | 286 | if (t <= 0) { |
264 | - if (!atomic_xchg(&reset_hung_task, 0)) | |
287 | + if (!atomic_xchg(&reset_hung_task, 0) && | |
288 | + !hung_detector_suspended) | |
265 | 289 | check_hung_uninterruptible_tasks(timeout); |
266 | 290 | hung_last_checked = jiffies; |
267 | 291 | continue; |
... | ... | @@ -275,6 +299,10 @@ |
275 | 299 | static int __init hung_task_init(void) |
276 | 300 | { |
277 | 301 | atomic_notifier_chain_register(&panic_notifier_list, &panic_block); |
302 | + | |
303 | + /* Disable hung task detector on suspend */ | |
304 | + pm_notifier(hungtask_pm_notify, 0); | |
305 | + | |
278 | 306 | watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); |
279 | 307 | |
280 | 308 | return 0; |
kernel/sched/core.c
... | ... | @@ -2881,6 +2881,18 @@ |
2881 | 2881 | } |
2882 | 2882 | |
2883 | 2883 | /* |
2884 | + * Consumers of these two interfaces, like for example the cpuidle menu | |
2885 | + * governor, are using nonsensical data. Preferring shallow idle state selection | |
2886 | + * for a CPU that has IO-wait which might not even end up running the task when | |
2887 | + * it does become runnable. | |
2888 | + */ | |
2889 | + | |
2890 | +unsigned long nr_iowait_cpu(int cpu) | |
2891 | +{ | |
2892 | + return atomic_read(&cpu_rq(cpu)->nr_iowait); | |
2893 | +} | |
2894 | + | |
2895 | +/* | |
2884 | 2896 | * IO-wait accounting, and how its mostly bollocks (on SMP). |
2885 | 2897 | * |
2886 | 2898 | * The idea behind IO-wait account is to account the idle time that we could |
2887 | 2899 | |
... | ... | @@ -2915,29 +2927,9 @@ |
2915 | 2927 | unsigned long i, sum = 0; |
2916 | 2928 | |
2917 | 2929 | for_each_possible_cpu(i) |
2918 | - sum += atomic_read(&cpu_rq(i)->nr_iowait); | |
2930 | + sum += nr_iowait_cpu(i); | |
2919 | 2931 | |
2920 | 2932 | return sum; |
2921 | -} | |
2922 | - | |
2923 | -/* | |
2924 | - * Consumers of these two interfaces, like for example the cpuidle menu | |
2925 | - * governor, are using nonsensical data. Preferring shallow idle state selection | |
2926 | - * for a CPU that has IO-wait which might not even end up running the task when | |
2927 | - * it does become runnable. | |
2928 | - */ | |
2929 | - | |
2930 | -unsigned long nr_iowait_cpu(int cpu) | |
2931 | -{ | |
2932 | - struct rq *this = cpu_rq(cpu); | |
2933 | - return atomic_read(&this->nr_iowait); | |
2934 | -} | |
2935 | - | |
2936 | -void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) | |
2937 | -{ | |
2938 | - struct rq *rq = this_rq(); | |
2939 | - *nr_waiters = atomic_read(&rq->nr_iowait); | |
2940 | - *load = rq->load.weight; | |
2941 | 2933 | } |
2942 | 2934 | |
2943 | 2935 | #ifdef CONFIG_SMP |