Commit ae74e823cb7d4cd476f623fce9a38f625f6c09a8
Committed by
Linus Torvalds
1 parent
f1eb1332b8
Exists in
master
and in
4 other branches
ipmi: add parameter to limit CPU usage in kipmid
In some cases kipmid can use a lot of CPU. This adds a way to tune the CPU used by kipmid to help in those cases. By setting kipmid_max_busy_us to a value between 100 and 500, it is possible to bring down kipmid CPU load to practically 0 without loosing too much ipmi throughput performance. Not setting the value, or setting the value to zero, operation is unaffected. Signed-off-by: Martin Wilck <martin.wilck@ts.fujitsu.com> Signed-off-by: Corey Minyard <cminyard@mvista.com> Cc: Jean Delvare <jdelvare@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 2 changed files with 76 additions and 2 deletions Side-by-side Diff
Documentation/IPMI.txt
... | ... | @@ -365,6 +365,7 @@ |
365 | 365 | regshifts=<shift1>,<shift2>,... |
366 | 366 | slave_addrs=<addr1>,<addr2>,... |
367 | 367 | force_kipmid=<enable1>,<enable2>,... |
368 | + kipmid_max_busy_us=<ustime1>,<ustime2>,... | |
368 | 369 | unload_when_empty=[0|1] |
369 | 370 | |
370 | 371 | Each of these except si_trydefaults is a list, the first item for the |
... | ... | @@ -433,6 +434,7 @@ |
433 | 434 | ipmi_si.regshifts=<shift1>,<shift2>,... |
434 | 435 | ipmi_si.slave_addrs=<addr1>,<addr2>,... |
435 | 436 | ipmi_si.force_kipmid=<enable1>,<enable2>,... |
437 | + ipmi_si.kipmid_max_busy_us=<ustime1>,<ustime2>,... | |
436 | 438 | |
437 | 439 | It works the same as the module parameters of the same names. |
438 | 440 | |
... | ... | @@ -449,6 +451,16 @@ |
449 | 451 | force this thread on or off. If you force it off and don't have |
450 | 452 | interrupts, the driver will run VERY slowly. Don't blame me, |
451 | 453 | these interfaces suck. |
454 | + | |
455 | +Unfortunately, this thread can use a lot of CPU depending on the | |
456 | +interface's performance. This can waste a lot of CPU and cause | |
457 | +various issues with detecting idle CPU and using extra power. To | |
458 | +avoid this, the kipmid_max_busy_us sets the maximum amount of time, in | |
459 | +microseconds, that kipmid will spin before sleeping for a tick. This | |
460 | +value sets a balance between performance and CPU waste and needs to be | |
461 | +tuned to your needs. Maybe, someday, auto-tuning will be added, but | |
462 | +that's not a simple thing and even the auto-tuning would need to be | |
463 | +tuned to the user's desired performance. | |
452 | 464 | |
453 | 465 | The driver supports a hot add and remove of interfaces. This way, |
454 | 466 | interfaces can be added or removed after the kernel is up and running. |
drivers/char/ipmi/ipmi_si_intf.c
... | ... | @@ -295,6 +295,9 @@ |
295 | 295 | static int force_kipmid[SI_MAX_PARMS]; |
296 | 296 | static int num_force_kipmid; |
297 | 297 | |
298 | +static unsigned int kipmid_max_busy_us[SI_MAX_PARMS]; | |
299 | +static int num_max_busy_us; | |
300 | + | |
298 | 301 | static int unload_when_empty = 1; |
299 | 302 | |
300 | 303 | static int try_smi_init(struct smi_info *smi); |
301 | 304 | |
302 | 305 | |
303 | 306 | |
304 | 307 | |
305 | 308 | |
306 | 309 | |
... | ... | @@ -925,23 +928,77 @@ |
925 | 928 | } |
926 | 929 | } |
927 | 930 | |
931 | +/* | |
932 | + * Use -1 in the nsec value of the busy waiting timespec to tell that | |
933 | + * we are spinning in kipmid looking for something and not delaying | |
934 | + * between checks | |
935 | + */ | |
936 | +static inline void ipmi_si_set_not_busy(struct timespec *ts) | |
937 | +{ | |
938 | + ts->tv_nsec = -1; | |
939 | +} | |
940 | +static inline int ipmi_si_is_busy(struct timespec *ts) | |
941 | +{ | |
942 | + return ts->tv_nsec != -1; | |
943 | +} | |
944 | + | |
945 | +static int ipmi_thread_busy_wait(enum si_sm_result smi_result, | |
946 | + const struct smi_info *smi_info, | |
947 | + struct timespec *busy_until) | |
948 | +{ | |
949 | + unsigned int max_busy_us = 0; | |
950 | + | |
951 | + if (smi_info->intf_num < num_max_busy_us) | |
952 | + max_busy_us = kipmid_max_busy_us[smi_info->intf_num]; | |
953 | + if (max_busy_us == 0 || smi_result != SI_SM_CALL_WITH_DELAY) | |
954 | + ipmi_si_set_not_busy(busy_until); | |
955 | + else if (!ipmi_si_is_busy(busy_until)) { | |
956 | + getnstimeofday(busy_until); | |
957 | + timespec_add_ns(busy_until, max_busy_us*NSEC_PER_USEC); | |
958 | + } else { | |
959 | + struct timespec now; | |
960 | + getnstimeofday(&now); | |
961 | + if (unlikely(timespec_compare(&now, busy_until) > 0)) { | |
962 | + ipmi_si_set_not_busy(busy_until); | |
963 | + return 0; | |
964 | + } | |
965 | + } | |
966 | + return 1; | |
967 | +} | |
968 | + | |
969 | + | |
970 | +/* | |
971 | + * A busy-waiting loop for speeding up IPMI operation. | |
972 | + * | |
973 | + * Lousy hardware makes this hard. This is only enabled for systems | |
974 | + * that are not BT and do not have interrupts. It starts spinning | |
975 | + * when an operation is complete or until max_busy tells it to stop | |
976 | + * (if that is enabled). See the paragraph on kimid_max_busy_us in | |
977 | + * Documentation/IPMI.txt for details. | |
978 | + */ | |
928 | 979 | static int ipmi_thread(void *data) |
929 | 980 | { |
930 | 981 | struct smi_info *smi_info = data; |
931 | 982 | unsigned long flags; |
932 | 983 | enum si_sm_result smi_result; |
984 | + struct timespec busy_until; | |
933 | 985 | |
986 | + ipmi_si_set_not_busy(&busy_until); | |
934 | 987 | set_user_nice(current, 19); |
935 | 988 | while (!kthread_should_stop()) { |
989 | + int busy_wait; | |
990 | + | |
936 | 991 | spin_lock_irqsave(&(smi_info->si_lock), flags); |
937 | 992 | smi_result = smi_event_handler(smi_info, 0); |
938 | 993 | spin_unlock_irqrestore(&(smi_info->si_lock), flags); |
994 | + busy_wait = ipmi_thread_busy_wait(smi_result, smi_info, | |
995 | + &busy_until); | |
939 | 996 | if (smi_result == SI_SM_CALL_WITHOUT_DELAY) |
940 | 997 | ; /* do nothing */ |
941 | - else if (smi_result == SI_SM_CALL_WITH_DELAY) | |
998 | + else if (smi_result == SI_SM_CALL_WITH_DELAY && busy_wait) | |
942 | 999 | schedule(); |
943 | 1000 | else |
944 | - schedule_timeout_interruptible(1); | |
1001 | + schedule_timeout_interruptible(0); | |
945 | 1002 | } |
946 | 1003 | return 0; |
947 | 1004 | } |
... | ... | @@ -1212,6 +1269,11 @@ |
1212 | 1269 | MODULE_PARM_DESC(unload_when_empty, "Unload the module if no interfaces are" |
1213 | 1270 | " specified or found, default is 1. Setting to 0" |
1214 | 1271 | " is useful for hot add of devices using hotmod."); |
1272 | +module_param_array(kipmid_max_busy_us, uint, &num_max_busy_us, 0644); | |
1273 | +MODULE_PARM_DESC(kipmid_max_busy_us, | |
1274 | + "Max time (in microseconds) to busy-wait for IPMI data before" | |
1275 | + " sleeping. 0 (default) means to wait forever. Set to 100-500" | |
1276 | + " if kipmid is using up a lot of CPU time."); | |
1215 | 1277 | |
1216 | 1278 | |
1217 | 1279 | static void std_irq_cleanup(struct smi_info *info) |