Commit b82d9fdd848abfbe7263a4ecd9bbb55e575100a6
Committed by
Ingo Molnar
1 parent
3c90e6e99b
Exists in
master
and in
4 other branches
sched: avoid large irq-latencies in smp-balancing
SMP balancing is done with IRQs disabled and can iterate the full rq. When rqs are large this can cause large irq-latencies. Limit the nr of iterations on each run. This fixes a scheduling latency regression reported by the -rt folks. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Steven Rostedt <rostedt@goodmis.org> Tested-by: Gregory Haskins <ghaskins@novell.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 3 changed files with 19 additions and 5 deletions Side-by-side Diff
include/linux/sched.h
... | ... | @@ -1466,6 +1466,7 @@ |
1466 | 1466 | extern unsigned int sysctl_sched_child_runs_first; |
1467 | 1467 | extern unsigned int sysctl_sched_features; |
1468 | 1468 | extern unsigned int sysctl_sched_migration_cost; |
1469 | +extern unsigned int sysctl_sched_nr_migrate; | |
1469 | 1470 | |
1470 | 1471 | int sched_nr_latency_handler(struct ctl_table *table, int write, |
1471 | 1472 | struct file *file, void __user *buffer, size_t *length, |
kernel/sched.c
... | ... | @@ -472,6 +472,12 @@ |
472 | 472 | #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x) |
473 | 473 | |
474 | 474 | /* |
475 | + * Number of tasks to iterate in a single balance run. | |
476 | + * Limited because this is done with IRQs disabled. | |
477 | + */ | |
478 | +const_debug unsigned int sysctl_sched_nr_migrate = 32; | |
479 | + | |
480 | +/* | |
475 | 481 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu |
476 | 482 | * clock constructed from sched_clock(): |
477 | 483 | */ |
... | ... | @@ -2235,7 +2241,7 @@ |
2235 | 2241 | enum cpu_idle_type idle, int *all_pinned, |
2236 | 2242 | int *this_best_prio, struct rq_iterator *iterator) |
2237 | 2243 | { |
2238 | - int pulled = 0, pinned = 0, skip_for_load; | |
2244 | + int loops = 0, pulled = 0, pinned = 0, skip_for_load; | |
2239 | 2245 | struct task_struct *p; |
2240 | 2246 | long rem_load_move = max_load_move; |
2241 | 2247 | |
2242 | 2248 | |
... | ... | @@ -2249,10 +2255,10 @@ |
2249 | 2255 | */ |
2250 | 2256 | p = iterator->start(iterator->arg); |
2251 | 2257 | next: |
2252 | - if (!p) | |
2258 | + if (!p || loops++ > sysctl_sched_nr_migrate) | |
2253 | 2259 | goto out; |
2254 | 2260 | /* |
2255 | - * To help distribute high priority tasks accross CPUs we don't | |
2261 | + * To help distribute high priority tasks across CPUs we don't | |
2256 | 2262 | * skip a task if it will be the highest priority task (i.e. smallest |
2257 | 2263 | * prio value) on its new queue regardless of its load weight |
2258 | 2264 | */ |
... | ... | @@ -2269,8 +2275,7 @@ |
2269 | 2275 | rem_load_move -= p->se.load.weight; |
2270 | 2276 | |
2271 | 2277 | /* |
2272 | - * We only want to steal up to the prescribed number of tasks | |
2273 | - * and the prescribed amount of weighted load. | |
2278 | + * We only want to steal up to the prescribed amount of weighted load. | |
2274 | 2279 | */ |
2275 | 2280 | if (rem_load_move > 0) { |
2276 | 2281 | if (p->prio < *this_best_prio) |
kernel/sysctl.c
... | ... | @@ -301,6 +301,14 @@ |
301 | 301 | .mode = 0644, |
302 | 302 | .proc_handler = &proc_dointvec, |
303 | 303 | }, |
304 | + { | |
305 | + .ctl_name = CTL_UNNUMBERED, | |
306 | + .procname = "sched_nr_migrate", | |
307 | + .data = &sysctl_sched_nr_migrate, | |
308 | + .maxlen = sizeof(unsigned int), | |
309 | + .mode = 644, | |
310 | + .proc_handler = &proc_dointvec, | |
311 | + }, | |
304 | 312 | #endif |
305 | 313 | { |
306 | 314 | .ctl_name = CTL_UNNUMBERED, |