Commit b82d9fdd848abfbe7263a4ecd9bbb55e575100a6

Authored by Peter Zijlstra
Committed by Ingo Molnar
1 parent 3c90e6e99b

sched: avoid large irq-latencies in smp-balancing

SMP balancing is done with IRQs disabled and can iterate the full rq.
When rqs are large this can cause large irq-latencies. Limit the nr of
iterations on each run.

This fixes a scheduling latency regression reported by the -rt folks.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Tested-by: Gregory Haskins <ghaskins@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 3 changed files with 19 additions and 5 deletions Side-by-side Diff

include/linux/sched.h
... ... @@ -1466,6 +1466,7 @@
1466 1466 extern unsigned int sysctl_sched_child_runs_first;
1467 1467 extern unsigned int sysctl_sched_features;
1468 1468 extern unsigned int sysctl_sched_migration_cost;
  1469 +extern unsigned int sysctl_sched_nr_migrate;
1469 1470  
1470 1471 int sched_nr_latency_handler(struct ctl_table *table, int write,
1471 1472 struct file *file, void __user *buffer, size_t *length,
... ... @@ -472,6 +472,12 @@
472 472 #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
473 473  
474 474 /*
  475 + * Number of tasks to iterate in a single balance run.
  476 + * Limited because this is done with IRQs disabled.
  477 + */
  478 +const_debug unsigned int sysctl_sched_nr_migrate = 32;
  479 +
  480 +/*
475 481 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
476 482 * clock constructed from sched_clock():
477 483 */
... ... @@ -2235,7 +2241,7 @@
2235 2241 enum cpu_idle_type idle, int *all_pinned,
2236 2242 int *this_best_prio, struct rq_iterator *iterator)
2237 2243 {
2238   - int pulled = 0, pinned = 0, skip_for_load;
  2244 + int loops = 0, pulled = 0, pinned = 0, skip_for_load;
2239 2245 struct task_struct *p;
2240 2246 long rem_load_move = max_load_move;
2241 2247  
2242 2248  
... ... @@ -2249,10 +2255,10 @@
2249 2255 */
2250 2256 p = iterator->start(iterator->arg);
2251 2257 next:
2252   - if (!p)
  2258 + if (!p || loops++ > sysctl_sched_nr_migrate)
2253 2259 goto out;
2254 2260 /*
2255   - * To help distribute high priority tasks accross CPUs we don't
  2261 + * To help distribute high priority tasks across CPUs we don't
2256 2262 * skip a task if it will be the highest priority task (i.e. smallest
2257 2263 * prio value) on its new queue regardless of its load weight
2258 2264 */
... ... @@ -2269,8 +2275,7 @@
2269 2275 rem_load_move -= p->se.load.weight;
2270 2276  
2271 2277 /*
2272   - * We only want to steal up to the prescribed number of tasks
2273   - * and the prescribed amount of weighted load.
  2278 + * We only want to steal up to the prescribed amount of weighted load.
2274 2279 */
2275 2280 if (rem_load_move > 0) {
2276 2281 if (p->prio < *this_best_prio)
... ... @@ -301,6 +301,14 @@
301 301 .mode = 0644,
302 302 .proc_handler = &proc_dointvec,
303 303 },
  304 + {
  305 + .ctl_name = CTL_UNNUMBERED,
  306 + .procname = "sched_nr_migrate",
  307 + .data = &sysctl_sched_nr_migrate,
  308 + .maxlen = sizeof(unsigned int),
  309 + .mode = 644,
  310 + .proc_handler = &proc_dointvec,
  311 + },
304 312 #endif
305 313 {
306 314 .ctl_name = CTL_UNNUMBERED,