Commit c32b6b8e524d2c337767d312814484d9289550cf

Authored by Ashok Raj
Committed by Linus Torvalds
1 parent d434fca737

[PATCH] create and destroy cpufreq sysfs entries based on cpu notifiers

cpufreq entries in sysfs should only be populated when CPU is online state.
 When we either boot with maxcpus=x and then boot the other cpus by echoing
to sysfs online file, these entries should be created and destroyed when
CPU_DEAD is notified.  Same treatement as cache entries under sysfs.

We place the processor in the lowest frequency, so hw managed P-State
transitions can still work on the other threads to save power.

Primary goal was to just make these directories appear/disapper dynamically.

There is one in this patch i had to do, which i really dont like myself but
probably best if someone handling the cpufreq infrastructure could give
this code right treatment if this is not acceptable.  I guess its probably
good for the first cut.

- Converting lock_cpu_hotplug()/unlock_cpu_hotplug() to disable/enable preempt.
  The locking was smack in the middle of the notification path, when the
  hotplug is already holding the lock. I tried another solution to avoid this
  so avoid taking locks if we know we are from notification path. The solution
  was getting very ugly and i decided this was probably good for this iteration
  until someone who understands cpufreq could do a better job than me.

(akpm: export cpucontrol to GPL modules: drivers/cpufreq/cpufreq_stats.c now
does lock_cpu_hotplug())

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Zwane Mwaikambo <zwane@holomorphy.com>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 3 changed files with 103 additions and 9 deletions Side-by-side Diff

drivers/cpufreq/cpufreq.c
... ... @@ -4,6 +4,9 @@
4 4 * Copyright (C) 2001 Russell King
5 5 * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6 6 *
  7 + * Oct 2005 - Ashok Raj <ashok.raj@intel.com>
  8 + * Added handling for CPU hotplug
  9 + *
7 10 * This program is free software; you can redistribute it and/or modify
8 11 * it under the terms of the GNU General Public License version 2 as
9 12 * published by the Free Software Foundation.
... ... @@ -567,6 +570,9 @@
567 570 unsigned long flags;
568 571 unsigned int j;
569 572  
  573 + if (cpu_is_offline(cpu))
  574 + return 0;
  575 +
570 576 cpufreq_debug_disable_ratelimit();
571 577 dprintk("adding CPU %u\n", cpu);
572 578  
... ... @@ -673,7 +679,7 @@
673 679  
674 680 nomem_out:
675 681 module_put(cpufreq_driver->owner);
676   - module_out:
  682 +module_out:
677 683 cpufreq_debug_enable_ratelimit();
678 684 return ret;
679 685 }
... ... @@ -762,7 +768,6 @@
762 768 down(&data->lock);
763 769 if (cpufreq_driver->target)
764 770 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
765   - cpufreq_driver->target = NULL;
766 771 up(&data->lock);
767 772  
768 773 kobject_unregister(&data->kobj);
769 774  
770 775  
... ... @@ -1109,17 +1114,30 @@
1109 1114 unsigned int relation)
1110 1115 {
1111 1116 int retval = -EINVAL;
1112   - lock_cpu_hotplug();
  1117 +
  1118 + /*
  1119 + * Converted the lock_cpu_hotplug to preempt_disable()
  1120 + * and preempt_enable(). This is a bit kludgy and relies on how cpu
  1121 + * hotplug works. All we need is a guarantee that cpu hotplug won't make
  1122 + * progress on any cpu. Once we do preempt_disable(), this would ensure
  1123 + * that hotplug threads don't get onto this cpu, thereby delaying
  1124 + * the cpu remove process.
  1125 + *
  1126 + * We removed the lock_cpu_hotplug since we need to call this function
  1127 + * via cpu hotplug callbacks, which result in locking the cpu hotplug
  1128 + * thread itself. Agree this is not very clean, cpufreq community
  1129 + * could improve this if required. - Ashok Raj <ashok.raj@intel.com>
  1130 + */
  1131 + preempt_disable();
1113 1132 dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1114 1133 target_freq, relation);
1115 1134 if (cpu_online(policy->cpu) && cpufreq_driver->target)
1116 1135 retval = cpufreq_driver->target(policy, target_freq, relation);
1117   - unlock_cpu_hotplug();
  1136 + preempt_enable();
1118 1137 return retval;
1119 1138 }
1120 1139 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1121 1140  
1122   -
1123 1141 int cpufreq_driver_target(struct cpufreq_policy *policy,
1124 1142 unsigned int target_freq,
1125 1143 unsigned int relation)
1126 1144  
... ... @@ -1406,7 +1424,46 @@
1406 1424 }
1407 1425 EXPORT_SYMBOL(cpufreq_update_policy);
1408 1426  
  1427 +static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
  1428 + unsigned long action, void *hcpu)
  1429 +{
  1430 + unsigned int cpu = (unsigned long)hcpu;
  1431 + struct cpufreq_policy *policy;
  1432 + struct sys_device *sys_dev;
1409 1433  
  1434 + sys_dev = get_cpu_sysdev(cpu);
  1435 +
  1436 + if (sys_dev) {
  1437 + switch (action) {
  1438 + case CPU_ONLINE:
  1439 + cpufreq_add_dev(sys_dev);
  1440 + break;
  1441 + case CPU_DOWN_PREPARE:
  1442 + /*
  1443 + * We attempt to put this cpu in lowest frequency
  1444 + * possible before going down. This will permit
  1445 + * hardware-managed P-State to switch other related
  1446 + * threads to min or higher speeds if possible.
  1447 + */
  1448 + policy = cpufreq_cpu_data[cpu];
  1449 + if (policy) {
  1450 + cpufreq_driver_target(policy, policy->min,
  1451 + CPUFREQ_RELATION_H);
  1452 + }
  1453 + break;
  1454 + case CPU_DEAD:
  1455 + cpufreq_remove_dev(sys_dev);
  1456 + break;
  1457 + }
  1458 + }
  1459 + return NOTIFY_OK;
  1460 +}
  1461 +
  1462 +static struct notifier_block cpufreq_cpu_notifier =
  1463 +{
  1464 + .notifier_call = cpufreq_cpu_callback,
  1465 +};
  1466 +
1410 1467 /*********************************************************************
1411 1468 * REGISTER / UNREGISTER CPUFREQ DRIVER *
1412 1469 *********************************************************************/
... ... @@ -1466,6 +1523,7 @@
1466 1523 }
1467 1524  
1468 1525 if (!ret) {
  1526 + register_cpu_notifier(&cpufreq_cpu_notifier);
1469 1527 dprintk("driver %s up and running\n", driver_data->name);
1470 1528 cpufreq_debug_enable_ratelimit();
1471 1529 }
... ... @@ -1497,6 +1555,7 @@
1497 1555 dprintk("unregistering driver %s\n", driver->name);
1498 1556  
1499 1557 sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
  1558 + unregister_cpu_notifier(&cpufreq_cpu_notifier);
1500 1559  
1501 1560 spin_lock_irqsave(&cpufreq_driver_lock, flags);
1502 1561 cpufreq_driver = NULL;
drivers/cpufreq/cpufreq_stats.c
... ... @@ -19,6 +19,7 @@
19 19 #include <linux/percpu.h>
20 20 #include <linux/kobject.h>
21 21 #include <linux/spinlock.h>
  22 +#include <linux/notifier.h>
22 23 #include <asm/cputime.h>
23 24  
24 25 static spinlock_t cpufreq_stats_lock;
... ... @@ -298,6 +299,27 @@
298 299 return 0;
299 300 }
300 301  
  302 +static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
  303 + unsigned long action, void *hcpu)
  304 +{
  305 + unsigned int cpu = (unsigned long)hcpu;
  306 +
  307 + switch (action) {
  308 + case CPU_ONLINE:
  309 + cpufreq_update_policy(cpu);
  310 + break;
  311 + case CPU_DEAD:
  312 + cpufreq_stats_free_table(cpu);
  313 + break;
  314 + }
  315 + return NOTIFY_OK;
  316 +}
  317 +
  318 +static struct notifier_block cpufreq_stat_cpu_notifier =
  319 +{
  320 + .notifier_call = cpufreq_stat_cpu_callback,
  321 +};
  322 +
301 323 static struct notifier_block notifier_policy_block = {
302 324 .notifier_call = cpufreq_stat_notifier_policy
303 325 };
... ... @@ -311,6 +333,7 @@
311 333 {
312 334 int ret;
313 335 unsigned int cpu;
  336 +
314 337 spin_lock_init(&cpufreq_stats_lock);
315 338 if ((ret = cpufreq_register_notifier(&notifier_policy_block,
316 339 CPUFREQ_POLICY_NOTIFIER)))
317 340  
318 341  
... ... @@ -323,20 +346,31 @@
323 346 return ret;
324 347 }
325 348  
326   - for_each_cpu(cpu)
327   - cpufreq_update_policy(cpu);
  349 + register_cpu_notifier(&cpufreq_stat_cpu_notifier);
  350 + lock_cpu_hotplug();
  351 + for_each_online_cpu(cpu) {
  352 + cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, CPU_ONLINE,
  353 + (void *)(long)cpu);
  354 + }
  355 + unlock_cpu_hotplug();
328 356 return 0;
329 357 }
330 358 static void
331 359 __exit cpufreq_stats_exit(void)
332 360 {
333 361 unsigned int cpu;
  362 +
334 363 cpufreq_unregister_notifier(&notifier_policy_block,
335 364 CPUFREQ_POLICY_NOTIFIER);
336 365 cpufreq_unregister_notifier(&notifier_trans_block,
337 366 CPUFREQ_TRANSITION_NOTIFIER);
338   - for_each_cpu(cpu)
339   - cpufreq_stats_free_table(cpu);
  367 + unregister_cpu_notifier(&cpufreq_stat_cpu_notifier);
  368 + lock_cpu_hotplug();
  369 + for_each_online_cpu(cpu) {
  370 + cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, CPU_DEAD,
  371 + (void *)(long)cpu);
  372 + }
  373 + unlock_cpu_hotplug();
340 374 }
341 375  
342 376 MODULE_AUTHOR ("Zou Nan hai <nanhai.zou@intel.com>");
... ... @@ -17,6 +17,7 @@
17 17  
18 18 /* This protects CPUs going up and down... */
19 19 DECLARE_MUTEX(cpucontrol);
  20 +EXPORT_SYMBOL_GPL(cpucontrol);
20 21  
21 22 static struct notifier_block *cpu_chain;
22 23