Commit c32b6b8e524d2c337767d312814484d9289550cf
Committed by
Linus Torvalds
1 parent
d434fca737
Exists in
master
and in
39 other branches
[PATCH] create and destroy cpufreq sysfs entries based on cpu notifiers
cpufreq entries in sysfs should only be populated when CPU is online state. When we either boot with maxcpus=x and then boot the other cpus by echoing to sysfs online file, these entries should be created and destroyed when CPU_DEAD is notified. Same treatement as cache entries under sysfs. We place the processor in the lowest frequency, so hw managed P-State transitions can still work on the other threads to save power. Primary goal was to just make these directories appear/disapper dynamically. There is one in this patch i had to do, which i really dont like myself but probably best if someone handling the cpufreq infrastructure could give this code right treatment if this is not acceptable. I guess its probably good for the first cut. - Converting lock_cpu_hotplug()/unlock_cpu_hotplug() to disable/enable preempt. The locking was smack in the middle of the notification path, when the hotplug is already holding the lock. I tried another solution to avoid this so avoid taking locks if we know we are from notification path. The solution was getting very ugly and i decided this was probably good for this iteration until someone who understands cpufreq could do a better job than me. (akpm: export cpucontrol to GPL modules: drivers/cpufreq/cpufreq_stats.c now does lock_cpu_hotplug()) Signed-off-by: Ashok Raj <ashok.raj@intel.com> Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> Cc: Dave Jones <davej@codemonkey.org.uk> Cc: Zwane Mwaikambo <zwane@holomorphy.com> Cc: Greg KH <greg@kroah.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 3 changed files with 103 additions and 9 deletions Side-by-side Diff
drivers/cpufreq/cpufreq.c
... | ... | @@ -4,6 +4,9 @@ |
4 | 4 | * Copyright (C) 2001 Russell King |
5 | 5 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> |
6 | 6 | * |
7 | + * Oct 2005 - Ashok Raj <ashok.raj@intel.com> | |
8 | + * Added handling for CPU hotplug | |
9 | + * | |
7 | 10 | * This program is free software; you can redistribute it and/or modify |
8 | 11 | * it under the terms of the GNU General Public License version 2 as |
9 | 12 | * published by the Free Software Foundation. |
... | ... | @@ -567,6 +570,9 @@ |
567 | 570 | unsigned long flags; |
568 | 571 | unsigned int j; |
569 | 572 | |
573 | + if (cpu_is_offline(cpu)) | |
574 | + return 0; | |
575 | + | |
570 | 576 | cpufreq_debug_disable_ratelimit(); |
571 | 577 | dprintk("adding CPU %u\n", cpu); |
572 | 578 | |
... | ... | @@ -673,7 +679,7 @@ |
673 | 679 | |
674 | 680 | nomem_out: |
675 | 681 | module_put(cpufreq_driver->owner); |
676 | - module_out: | |
682 | +module_out: | |
677 | 683 | cpufreq_debug_enable_ratelimit(); |
678 | 684 | return ret; |
679 | 685 | } |
... | ... | @@ -762,7 +768,6 @@ |
762 | 768 | down(&data->lock); |
763 | 769 | if (cpufreq_driver->target) |
764 | 770 | __cpufreq_governor(data, CPUFREQ_GOV_STOP); |
765 | - cpufreq_driver->target = NULL; | |
766 | 771 | up(&data->lock); |
767 | 772 | |
768 | 773 | kobject_unregister(&data->kobj); |
769 | 774 | |
770 | 775 | |
... | ... | @@ -1109,17 +1114,30 @@ |
1109 | 1114 | unsigned int relation) |
1110 | 1115 | { |
1111 | 1116 | int retval = -EINVAL; |
1112 | - lock_cpu_hotplug(); | |
1117 | + | |
1118 | + /* | |
1119 | + * Converted the lock_cpu_hotplug to preempt_disable() | |
1120 | + * and preempt_enable(). This is a bit kludgy and relies on how cpu | |
1121 | + * hotplug works. All we need is a guarantee that cpu hotplug won't make | |
1122 | + * progress on any cpu. Once we do preempt_disable(), this would ensure | |
1123 | + * that hotplug threads don't get onto this cpu, thereby delaying | |
1124 | + * the cpu remove process. | |
1125 | + * | |
1126 | + * We removed the lock_cpu_hotplug since we need to call this function | |
1127 | + * via cpu hotplug callbacks, which result in locking the cpu hotplug | |
1128 | + * thread itself. Agree this is not very clean, cpufreq community | |
1129 | + * could improve this if required. - Ashok Raj <ashok.raj@intel.com> | |
1130 | + */ | |
1131 | + preempt_disable(); | |
1113 | 1132 | dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu, |
1114 | 1133 | target_freq, relation); |
1115 | 1134 | if (cpu_online(policy->cpu) && cpufreq_driver->target) |
1116 | 1135 | retval = cpufreq_driver->target(policy, target_freq, relation); |
1117 | - unlock_cpu_hotplug(); | |
1136 | + preempt_enable(); | |
1118 | 1137 | return retval; |
1119 | 1138 | } |
1120 | 1139 | EXPORT_SYMBOL_GPL(__cpufreq_driver_target); |
1121 | 1140 | |
1122 | - | |
1123 | 1141 | int cpufreq_driver_target(struct cpufreq_policy *policy, |
1124 | 1142 | unsigned int target_freq, |
1125 | 1143 | unsigned int relation) |
1126 | 1144 | |
... | ... | @@ -1406,7 +1424,46 @@ |
1406 | 1424 | } |
1407 | 1425 | EXPORT_SYMBOL(cpufreq_update_policy); |
1408 | 1426 | |
1427 | +static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb, | |
1428 | + unsigned long action, void *hcpu) | |
1429 | +{ | |
1430 | + unsigned int cpu = (unsigned long)hcpu; | |
1431 | + struct cpufreq_policy *policy; | |
1432 | + struct sys_device *sys_dev; | |
1409 | 1433 | |
1434 | + sys_dev = get_cpu_sysdev(cpu); | |
1435 | + | |
1436 | + if (sys_dev) { | |
1437 | + switch (action) { | |
1438 | + case CPU_ONLINE: | |
1439 | + cpufreq_add_dev(sys_dev); | |
1440 | + break; | |
1441 | + case CPU_DOWN_PREPARE: | |
1442 | + /* | |
1443 | + * We attempt to put this cpu in lowest frequency | |
1444 | + * possible before going down. This will permit | |
1445 | + * hardware-managed P-State to switch other related | |
1446 | + * threads to min or higher speeds if possible. | |
1447 | + */ | |
1448 | + policy = cpufreq_cpu_data[cpu]; | |
1449 | + if (policy) { | |
1450 | + cpufreq_driver_target(policy, policy->min, | |
1451 | + CPUFREQ_RELATION_H); | |
1452 | + } | |
1453 | + break; | |
1454 | + case CPU_DEAD: | |
1455 | + cpufreq_remove_dev(sys_dev); | |
1456 | + break; | |
1457 | + } | |
1458 | + } | |
1459 | + return NOTIFY_OK; | |
1460 | +} | |
1461 | + | |
1462 | +static struct notifier_block cpufreq_cpu_notifier = | |
1463 | +{ | |
1464 | + .notifier_call = cpufreq_cpu_callback, | |
1465 | +}; | |
1466 | + | |
1410 | 1467 | /********************************************************************* |
1411 | 1468 | * REGISTER / UNREGISTER CPUFREQ DRIVER * |
1412 | 1469 | *********************************************************************/ |
... | ... | @@ -1466,6 +1523,7 @@ |
1466 | 1523 | } |
1467 | 1524 | |
1468 | 1525 | if (!ret) { |
1526 | + register_cpu_notifier(&cpufreq_cpu_notifier); | |
1469 | 1527 | dprintk("driver %s up and running\n", driver_data->name); |
1470 | 1528 | cpufreq_debug_enable_ratelimit(); |
1471 | 1529 | } |
... | ... | @@ -1497,6 +1555,7 @@ |
1497 | 1555 | dprintk("unregistering driver %s\n", driver->name); |
1498 | 1556 | |
1499 | 1557 | sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver); |
1558 | + unregister_cpu_notifier(&cpufreq_cpu_notifier); | |
1500 | 1559 | |
1501 | 1560 | spin_lock_irqsave(&cpufreq_driver_lock, flags); |
1502 | 1561 | cpufreq_driver = NULL; |
drivers/cpufreq/cpufreq_stats.c
... | ... | @@ -19,6 +19,7 @@ |
19 | 19 | #include <linux/percpu.h> |
20 | 20 | #include <linux/kobject.h> |
21 | 21 | #include <linux/spinlock.h> |
22 | +#include <linux/notifier.h> | |
22 | 23 | #include <asm/cputime.h> |
23 | 24 | |
24 | 25 | static spinlock_t cpufreq_stats_lock; |
... | ... | @@ -298,6 +299,27 @@ |
298 | 299 | return 0; |
299 | 300 | } |
300 | 301 | |
302 | +static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb, | |
303 | + unsigned long action, void *hcpu) | |
304 | +{ | |
305 | + unsigned int cpu = (unsigned long)hcpu; | |
306 | + | |
307 | + switch (action) { | |
308 | + case CPU_ONLINE: | |
309 | + cpufreq_update_policy(cpu); | |
310 | + break; | |
311 | + case CPU_DEAD: | |
312 | + cpufreq_stats_free_table(cpu); | |
313 | + break; | |
314 | + } | |
315 | + return NOTIFY_OK; | |
316 | +} | |
317 | + | |
318 | +static struct notifier_block cpufreq_stat_cpu_notifier = | |
319 | +{ | |
320 | + .notifier_call = cpufreq_stat_cpu_callback, | |
321 | +}; | |
322 | + | |
301 | 323 | static struct notifier_block notifier_policy_block = { |
302 | 324 | .notifier_call = cpufreq_stat_notifier_policy |
303 | 325 | }; |
... | ... | @@ -311,6 +333,7 @@ |
311 | 333 | { |
312 | 334 | int ret; |
313 | 335 | unsigned int cpu; |
336 | + | |
314 | 337 | spin_lock_init(&cpufreq_stats_lock); |
315 | 338 | if ((ret = cpufreq_register_notifier(¬ifier_policy_block, |
316 | 339 | CPUFREQ_POLICY_NOTIFIER))) |
317 | 340 | |
318 | 341 | |
... | ... | @@ -323,20 +346,31 @@ |
323 | 346 | return ret; |
324 | 347 | } |
325 | 348 | |
326 | - for_each_cpu(cpu) | |
327 | - cpufreq_update_policy(cpu); | |
349 | + register_cpu_notifier(&cpufreq_stat_cpu_notifier); | |
350 | + lock_cpu_hotplug(); | |
351 | + for_each_online_cpu(cpu) { | |
352 | + cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, CPU_ONLINE, | |
353 | + (void *)(long)cpu); | |
354 | + } | |
355 | + unlock_cpu_hotplug(); | |
328 | 356 | return 0; |
329 | 357 | } |
330 | 358 | static void |
331 | 359 | __exit cpufreq_stats_exit(void) |
332 | 360 | { |
333 | 361 | unsigned int cpu; |
362 | + | |
334 | 363 | cpufreq_unregister_notifier(¬ifier_policy_block, |
335 | 364 | CPUFREQ_POLICY_NOTIFIER); |
336 | 365 | cpufreq_unregister_notifier(¬ifier_trans_block, |
337 | 366 | CPUFREQ_TRANSITION_NOTIFIER); |
338 | - for_each_cpu(cpu) | |
339 | - cpufreq_stats_free_table(cpu); | |
367 | + unregister_cpu_notifier(&cpufreq_stat_cpu_notifier); | |
368 | + lock_cpu_hotplug(); | |
369 | + for_each_online_cpu(cpu) { | |
370 | + cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, CPU_DEAD, | |
371 | + (void *)(long)cpu); | |
372 | + } | |
373 | + unlock_cpu_hotplug(); | |
340 | 374 | } |
341 | 375 | |
342 | 376 | MODULE_AUTHOR ("Zou Nan hai <nanhai.zou@intel.com>"); |