Blame view

kernel/sched/fair.c 141 KB
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
  /*
   * Completely Fair Scheduling (CFS) Class (SCHED_NORMAL/SCHED_BATCH)
   *
   *  Copyright (C) 2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
   *
   *  Interactivity improvements by Mike Galbraith
   *  (C) 2007 Mike Galbraith <efault@gmx.de>
   *
   *  Various enhancements by Dmitry Adamushko.
   *  (C) 2007 Dmitry Adamushko <dmitry.adamushko@gmail.com>
   *
   *  Group scheduling enhancements by Srivatsa Vaddagiri
   *  Copyright IBM Corporation, 2007
   *  Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
   *
   *  Scaled math optimizations by Thomas Gleixner
   *  Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
218050855   Peter Zijlstra   sched: adaptive s...
18
19
20
   *
   *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
   *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
21
   */
9745512ce   Arjan van de Ven   sched: latencytop...
22
  #include <linux/latencytop.h>
1983a922a   Christian Ehrhardt   sched: Make tunab...
23
  #include <linux/sched.h>
3436ae129   Sisir Koppaka   sched: Fix rebala...
24
  #include <linux/cpumask.h>
029632fbb   Peter Zijlstra   sched: Make separ...
25
26
27
28
29
30
31
  #include <linux/slab.h>
  #include <linux/profile.h>
  #include <linux/interrupt.h>
  
  #include <trace/events/sched.h>
  
  #include "sched.h"
9745512ce   Arjan van de Ven   sched: latencytop...
32

bf0f6f24a   Ingo Molnar   sched: cfs core, ...
33
  /*
218050855   Peter Zijlstra   sched: adaptive s...
34
   * Targeted preemption latency for CPU-bound tasks:
864616ee6   Takuya Yoshikawa   sched: Comment up...
35
   * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
36
   *
218050855   Peter Zijlstra   sched: adaptive s...
37
   * NOTE: this latency value is not the same as the concept of
d274a4cee   Ingo Molnar   sched: update com...
38
39
40
   * 'timeslice length' - timeslices in CFS are of variable length
   * and have no persistent notion like in traditional, time-slice
   * based scheduling concepts.
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
41
   *
d274a4cee   Ingo Molnar   sched: update com...
42
43
   * (to see the precise effective timeslice length of your workload,
   *  run vmstat and monitor the context-switches (cs) field)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
44
   */
21406928a   Mike Galbraith   sched: Tweak sche...
45
46
  unsigned int sysctl_sched_latency = 6000000ULL;
  unsigned int normalized_sysctl_sched_latency = 6000000ULL;
2bd8e6d42   Ingo Molnar   sched: use consta...
47
48
  
  /*
1983a922a   Christian Ehrhardt   sched: Make tunab...
49
50
51
52
53
54
55
56
57
58
59
60
   * The initial- and re-scaling of tunables is configurable
   * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
   *
   * Options are:
   * SCHED_TUNABLESCALING_NONE - unscaled, always *1
   * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
   * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
   */
  enum sched_tunable_scaling sysctl_sched_tunable_scaling
  	= SCHED_TUNABLESCALING_LOG;
  
  /*
b2be5e96d   Peter Zijlstra   sched: reintroduc...
61
   * Minimal preemption granularity for CPU-bound tasks:
864616ee6   Takuya Yoshikawa   sched: Comment up...
62
   * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
2bd8e6d42   Ingo Molnar   sched: use consta...
63
   */
0bf377bbb   Ingo Molnar   sched: Improve la...
64
65
  unsigned int sysctl_sched_min_granularity = 750000ULL;
  unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
218050855   Peter Zijlstra   sched: adaptive s...
66
67
  
  /*
b2be5e96d   Peter Zijlstra   sched: reintroduc...
68
69
   * is kept at sysctl_sched_latency / sysctl_sched_min_granularity
   */
0bf377bbb   Ingo Molnar   sched: Improve la...
70
  static unsigned int sched_nr_latency = 8;
b2be5e96d   Peter Zijlstra   sched: reintroduc...
71
72
  
  /*
2bba22c50   Mike Galbraith   sched: Turn off c...
73
   * After fork, child runs first. If set to 0 (default) then
b2be5e96d   Peter Zijlstra   sched: reintroduc...
74
   * parent will (try to) run first.
218050855   Peter Zijlstra   sched: adaptive s...
75
   */
2bba22c50   Mike Galbraith   sched: Turn off c...
76
  unsigned int sysctl_sched_child_runs_first __read_mostly;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
77
78
  
  /*
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
79
   * SCHED_OTHER wake-up granularity.
172e082a9   Mike Galbraith   sched: Re-tune th...
80
   * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
81
82
83
84
85
   *
   * This option delays the preemption effects of decoupled workloads
   * and reduces their over-scheduling. Synchronous workloads will still
   * have immediate wakeup/sleep latencies.
   */
172e082a9   Mike Galbraith   sched: Re-tune th...
86
  unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
0bcdcf28c   Christian Ehrhardt   sched: Fix missin...
87
  unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
88

da84d9617   Ingo Molnar   sched: reintroduc...
89
  const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
a7a4f8a75   Paul Turner   sched: Add sysctl...
90
91
92
93
94
95
  /*
   * The exponential sliding  window over which load is averaged for shares
   * distribution.
   * (default: 10msec)
   */
  unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL;
ec12cb7f3   Paul Turner   sched: Accumulate...
96
97
98
99
100
101
102
103
104
105
106
107
108
  #ifdef CONFIG_CFS_BANDWIDTH
  /*
   * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool
   * each time a cfs_rq requests quota.
   *
   * Note: in the case that the slice exceeds the runtime remaining (either due
   * to consumption or the quota being specified to be smaller than the slice)
   * we will always only issue the remaining available time.
   *
   * default: 5 msec, units: microseconds
    */
  unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
  #endif
029632fbb   Peter Zijlstra   sched: Make separ...
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
  /*
   * Increase the granularity value when there are more CPUs,
   * because with more CPUs the 'effective latency' as visible
   * to users decreases. But the relationship is not linear,
   * so pick a second-best guess by going with the log2 of the
   * number of CPUs.
   *
   * This idea comes from the SD scheduler of Con Kolivas:
   */
  static int get_update_sysctl_factor(void)
  {
  	unsigned int cpus = min_t(int, num_online_cpus(), 8);
  	unsigned int factor;
  
  	switch (sysctl_sched_tunable_scaling) {
  	case SCHED_TUNABLESCALING_NONE:
  		factor = 1;
  		break;
  	case SCHED_TUNABLESCALING_LINEAR:
  		factor = cpus;
  		break;
  	case SCHED_TUNABLESCALING_LOG:
  	default:
  		factor = 1 + ilog2(cpus);
  		break;
  	}
  
  	return factor;
  }
  
  static void update_sysctl(void)
  {
  	unsigned int factor = get_update_sysctl_factor();
  
  #define SET_SYSCTL(name) \
  	(sysctl_##name = (factor) * normalized_sysctl_##name)
  	SET_SYSCTL(sched_min_granularity);
  	SET_SYSCTL(sched_latency);
  	SET_SYSCTL(sched_wakeup_granularity);
  #undef SET_SYSCTL
  }
  
  void sched_init_granularity(void)
  {
  	update_sysctl();
  }
  
  #if BITS_PER_LONG == 32
  # define WMULT_CONST	(~0UL)
  #else
  # define WMULT_CONST	(1UL << 32)
  #endif
  
  #define WMULT_SHIFT	32
  
  /*
   * Shift right and round:
   */
  #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
  
  /*
   * delta *= weight / lw
   */
  static unsigned long
  calc_delta_mine(unsigned long delta_exec, unsigned long weight,
  		struct load_weight *lw)
  {
  	u64 tmp;
  
  	/*
  	 * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched
  	 * entities since MIN_SHARES = 2. Treat weight as 1 if less than
  	 * 2^SCHED_LOAD_RESOLUTION.
  	 */
  	if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION)))
  		tmp = (u64)delta_exec * scale_load_down(weight);
  	else
  		tmp = (u64)delta_exec;
  
  	if (!lw->inv_weight) {
  		unsigned long w = scale_load_down(lw->weight);
  
  		if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
  			lw->inv_weight = 1;
  		else if (unlikely(!w))
  			lw->inv_weight = WMULT_CONST;
  		else
  			lw->inv_weight = WMULT_CONST / w;
  	}
  
  	/*
  	 * Check whether we'd overflow the 64-bit multiplication:
  	 */
  	if (unlikely(tmp > WMULT_CONST))
  		tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
  			WMULT_SHIFT/2);
  	else
  		tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT);
  
  	return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
  }
  
  
  const struct sched_class fair_sched_class;
a4c2f00f5   Peter Zijlstra   sched: fair sched...
213

bf0f6f24a   Ingo Molnar   sched: cfs core, ...
214
215
216
  /**************************************************************
   * CFS operations on generic schedulable entities:
   */
62160e3f4   Ingo Molnar   sched: track cfs_...
217
  #ifdef CONFIG_FAIR_GROUP_SCHED
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
218

62160e3f4   Ingo Molnar   sched: track cfs_...
219
  /* cpu runqueue to which this cfs_rq is attached */
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
220
221
  static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
  {
62160e3f4   Ingo Molnar   sched: track cfs_...
222
  	return cfs_rq->rq;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
223
  }
62160e3f4   Ingo Molnar   sched: track cfs_...
224
225
  /* An entity is a task if it doesn't "own" a runqueue */
  #define entity_is_task(se)	(!se->my_q)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
226

8f48894fc   Peter Zijlstra   sched: Add debug ...
227
228
229
230
231
232
233
  static inline struct task_struct *task_of(struct sched_entity *se)
  {
  #ifdef CONFIG_SCHED_DEBUG
  	WARN_ON_ONCE(!entity_is_task(se));
  #endif
  	return container_of(se, struct task_struct, se);
  }
b758149c0   Peter Zijlstra   sched: prepatory ...
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
  /* Walk up scheduling entities hierarchy */
  #define for_each_sched_entity(se) \
  		for (; se; se = se->parent)
  
  static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
  {
  	return p->se.cfs_rq;
  }
  
  /* runqueue on which this entity is (to be) queued */
  static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
  {
  	return se->cfs_rq;
  }
  
  /* runqueue "owned" by this group */
  static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
  {
  	return grp->my_q;
  }
3d4b47b4b   Peter Zijlstra   sched: Implement ...
254
255
256
  static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
  {
  	if (!cfs_rq->on_list) {
67e86250f   Paul Turner   sched: Introduce ...
257
258
259
260
261
262
263
264
265
266
267
268
  		/*
  		 * Ensure we either appear before our parent (if already
  		 * enqueued) or force our parent to appear after us when it is
  		 * enqueued.  The fact that we always enqueue bottom-up
  		 * reduces this to two cases.
  		 */
  		if (cfs_rq->tg->parent &&
  		    cfs_rq->tg->parent->cfs_rq[cpu_of(rq_of(cfs_rq))]->on_list) {
  			list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
  				&rq_of(cfs_rq)->leaf_cfs_rq_list);
  		} else {
  			list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
3d4b47b4b   Peter Zijlstra   sched: Implement ...
269
  				&rq_of(cfs_rq)->leaf_cfs_rq_list);
67e86250f   Paul Turner   sched: Introduce ...
270
  		}
3d4b47b4b   Peter Zijlstra   sched: Implement ...
271
272
273
274
275
276
277
278
279
280
281
282
  
  		cfs_rq->on_list = 1;
  	}
  }
  
  static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
  {
  	if (cfs_rq->on_list) {
  		list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
  		cfs_rq->on_list = 0;
  	}
  }
b758149c0   Peter Zijlstra   sched: prepatory ...
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
  /* Iterate thr' all leaf cfs_rq's on a runqueue */
  #define for_each_leaf_cfs_rq(rq, cfs_rq) \
  	list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
  
  /* Do the two (enqueued) entities belong to the same group ? */
  static inline int
  is_same_group(struct sched_entity *se, struct sched_entity *pse)
  {
  	if (se->cfs_rq == pse->cfs_rq)
  		return 1;
  
  	return 0;
  }
  
  static inline struct sched_entity *parent_entity(struct sched_entity *se)
  {
  	return se->parent;
  }
464b75273   Peter Zijlstra   sched: re-instate...
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
  /* return depth at which a sched entity is present in the hierarchy */
  static inline int depth_se(struct sched_entity *se)
  {
  	int depth = 0;
  
  	for_each_sched_entity(se)
  		depth++;
  
  	return depth;
  }
  
  static void
  find_matching_se(struct sched_entity **se, struct sched_entity **pse)
  {
  	int se_depth, pse_depth;
  
  	/*
  	 * preemption test can be made between sibling entities who are in the
  	 * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
  	 * both tasks until we find their ancestors who are siblings of common
  	 * parent.
  	 */
  
  	/* First walk up until both entities are at same depth */
  	se_depth = depth_se(*se);
  	pse_depth = depth_se(*pse);
  
  	while (se_depth > pse_depth) {
  		se_depth--;
  		*se = parent_entity(*se);
  	}
  
  	while (pse_depth > se_depth) {
  		pse_depth--;
  		*pse = parent_entity(*pse);
  	}
  
  	while (!is_same_group(*se, *pse)) {
  		*se = parent_entity(*se);
  		*pse = parent_entity(*pse);
  	}
  }
8f48894fc   Peter Zijlstra   sched: Add debug ...
343
344
345
346
347
348
  #else	/* !CONFIG_FAIR_GROUP_SCHED */
  
  static inline struct task_struct *task_of(struct sched_entity *se)
  {
  	return container_of(se, struct task_struct, se);
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
349

62160e3f4   Ingo Molnar   sched: track cfs_...
350
351
352
  static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
  {
  	return container_of(cfs_rq, struct rq, cfs);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
353
354
355
  }
  
  #define entity_is_task(se)	1
b758149c0   Peter Zijlstra   sched: prepatory ...
356
357
  #define for_each_sched_entity(se) \
  		for (; se; se = NULL)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
358

b758149c0   Peter Zijlstra   sched: prepatory ...
359
  static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
360
  {
b758149c0   Peter Zijlstra   sched: prepatory ...
361
  	return &task_rq(p)->cfs;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
362
  }
b758149c0   Peter Zijlstra   sched: prepatory ...
363
364
365
366
367
368
369
370
371
372
373
374
375
  static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
  {
  	struct task_struct *p = task_of(se);
  	struct rq *rq = task_rq(p);
  
  	return &rq->cfs;
  }
  
  /* runqueue "owned" by this group */
  static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
  {
  	return NULL;
  }
3d4b47b4b   Peter Zijlstra   sched: Implement ...
376
377
378
379
380
381
382
  static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
  {
  }
  
  static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
  {
  }
b758149c0   Peter Zijlstra   sched: prepatory ...
383
384
385
386
387
388
389
390
391
392
393
394
395
  #define for_each_leaf_cfs_rq(rq, cfs_rq) \
  		for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
  
  static inline int
  is_same_group(struct sched_entity *se, struct sched_entity *pse)
  {
  	return 1;
  }
  
  static inline struct sched_entity *parent_entity(struct sched_entity *se)
  {
  	return NULL;
  }
464b75273   Peter Zijlstra   sched: re-instate...
396
397
398
399
  static inline void
  find_matching_se(struct sched_entity **se, struct sched_entity **pse)
  {
  }
b758149c0   Peter Zijlstra   sched: prepatory ...
400
  #endif	/* CONFIG_FAIR_GROUP_SCHED */
ec12cb7f3   Paul Turner   sched: Accumulate...
401
402
  static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
  				   unsigned long delta_exec);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
403
404
405
406
  
  /**************************************************************
   * Scheduling class tree data structure manipulation methods:
   */
0702e3ebc   Ingo Molnar   sched: cleanup: f...
407
  static inline u64 max_vruntime(u64 min_vruntime, u64 vruntime)
02e0431a3   Peter Zijlstra   sched: better min...
408
  {
368059a97   Peter Zijlstra   sched: max_vrunti...
409
410
  	s64 delta = (s64)(vruntime - min_vruntime);
  	if (delta > 0)
02e0431a3   Peter Zijlstra   sched: better min...
411
412
413
414
  		min_vruntime = vruntime;
  
  	return min_vruntime;
  }
0702e3ebc   Ingo Molnar   sched: cleanup: f...
415
  static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime)
b0ffd246e   Peter Zijlstra   sched: clean up m...
416
417
418
419
420
421
422
  {
  	s64 delta = (s64)(vruntime - min_vruntime);
  	if (delta < 0)
  		min_vruntime = vruntime;
  
  	return min_vruntime;
  }
54fdc5816   Fabio Checconi   sched: Account fo...
423
424
425
426
427
  static inline int entity_before(struct sched_entity *a,
  				struct sched_entity *b)
  {
  	return (s64)(a->vruntime - b->vruntime) < 0;
  }
1af5f730f   Peter Zijlstra   sched: more accur...
428
429
430
431
432
433
434
435
436
437
438
  static void update_min_vruntime(struct cfs_rq *cfs_rq)
  {
  	u64 vruntime = cfs_rq->min_vruntime;
  
  	if (cfs_rq->curr)
  		vruntime = cfs_rq->curr->vruntime;
  
  	if (cfs_rq->rb_leftmost) {
  		struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
  						   struct sched_entity,
  						   run_node);
e17036dac   Peter Zijlstra   sched: fix update...
439
  		if (!cfs_rq->curr)
1af5f730f   Peter Zijlstra   sched: more accur...
440
441
442
443
444
445
  			vruntime = se->vruntime;
  		else
  			vruntime = min_vruntime(vruntime, se->vruntime);
  	}
  
  	cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
3fe1698b7   Peter Zijlstra   sched: Deal with ...
446
447
448
449
  #ifndef CONFIG_64BIT
  	smp_wmb();
  	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
  #endif
1af5f730f   Peter Zijlstra   sched: more accur...
450
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
451
452
453
  /*
   * Enqueue an entity into the rb-tree:
   */
0702e3ebc   Ingo Molnar   sched: cleanup: f...
454
  static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
455
456
457
458
  {
  	struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
  	struct rb_node *parent = NULL;
  	struct sched_entity *entry;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
459
460
461
462
463
464
465
466
467
468
469
470
  	int leftmost = 1;
  
  	/*
  	 * Find the right place in the rbtree:
  	 */
  	while (*link) {
  		parent = *link;
  		entry = rb_entry(parent, struct sched_entity, run_node);
  		/*
  		 * We dont care about collisions. Nodes with
  		 * the same key stay together.
  		 */
2bd2d6f2d   Stephan Baerwolf   sched: Replace us...
471
  		if (entity_before(se, entry)) {
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
472
473
474
475
476
477
478
479
480
481
482
  			link = &parent->rb_left;
  		} else {
  			link = &parent->rb_right;
  			leftmost = 0;
  		}
  	}
  
  	/*
  	 * Maintain a cache of leftmost tree entries (it is frequently
  	 * used):
  	 */
1af5f730f   Peter Zijlstra   sched: more accur...
483
  	if (leftmost)
57cb499df   Ingo Molnar   sched: remove set...
484
  		cfs_rq->rb_leftmost = &se->run_node;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
485
486
487
  
  	rb_link_node(&se->run_node, parent, link);
  	rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
488
  }
0702e3ebc   Ingo Molnar   sched: cleanup: f...
489
  static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
490
  {
3fe69747d   Peter Zijlstra   sched: min_vrunti...
491
492
  	if (cfs_rq->rb_leftmost == &se->run_node) {
  		struct rb_node *next_node;
3fe69747d   Peter Zijlstra   sched: min_vrunti...
493
494
495
  
  		next_node = rb_next(&se->run_node);
  		cfs_rq->rb_leftmost = next_node;
3fe69747d   Peter Zijlstra   sched: min_vrunti...
496
  	}
e9acbff64   Ingo Molnar   sched: introduce ...
497

bf0f6f24a   Ingo Molnar   sched: cfs core, ...
498
  	rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
499
  }
029632fbb   Peter Zijlstra   sched: Make separ...
500
  struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
501
  {
f4b6755fb   Peter Zijlstra   sched: cleanup fa...
502
503
504
505
506
507
  	struct rb_node *left = cfs_rq->rb_leftmost;
  
  	if (!left)
  		return NULL;
  
  	return rb_entry(left, struct sched_entity, run_node);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
508
  }
ac53db596   Rik van Riel   sched: Use a budd...
509
510
511
512
513
514
515
516
517
518
519
  static struct sched_entity *__pick_next_entity(struct sched_entity *se)
  {
  	struct rb_node *next = rb_next(&se->run_node);
  
  	if (!next)
  		return NULL;
  
  	return rb_entry(next, struct sched_entity, run_node);
  }
  
  #ifdef CONFIG_SCHED_DEBUG
029632fbb   Peter Zijlstra   sched: Make separ...
520
  struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
aeb73b040   Peter Zijlstra   sched: clean up n...
521
  {
7eee3e677   Ingo Molnar   sched: clean up _...
522
  	struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
aeb73b040   Peter Zijlstra   sched: clean up n...
523

70eee74b7   Balbir Singh   sched: remove dup...
524
525
  	if (!last)
  		return NULL;
7eee3e677   Ingo Molnar   sched: clean up _...
526
527
  
  	return rb_entry(last, struct sched_entity, run_node);
aeb73b040   Peter Zijlstra   sched: clean up n...
528
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
529
530
531
  /**************************************************************
   * Scheduling class statistics methods:
   */
acb4a848d   Christian Ehrhardt   sched: Update nor...
532
  int sched_proc_update_handler(struct ctl_table *table, int write,
8d65af789   Alexey Dobriyan   sysctl: remove "s...
533
  		void __user *buffer, size_t *lenp,
b2be5e96d   Peter Zijlstra   sched: reintroduc...
534
535
  		loff_t *ppos)
  {
8d65af789   Alexey Dobriyan   sysctl: remove "s...
536
  	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
acb4a848d   Christian Ehrhardt   sched: Update nor...
537
  	int factor = get_update_sysctl_factor();
b2be5e96d   Peter Zijlstra   sched: reintroduc...
538
539
540
541
542
543
  
  	if (ret || !write)
  		return ret;
  
  	sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,
  					sysctl_sched_min_granularity);
acb4a848d   Christian Ehrhardt   sched: Update nor...
544
545
546
547
548
  #define WRT_SYSCTL(name) \
  	(normalized_sysctl_##name = sysctl_##name / (factor))
  	WRT_SYSCTL(sched_min_granularity);
  	WRT_SYSCTL(sched_latency);
  	WRT_SYSCTL(sched_wakeup_granularity);
acb4a848d   Christian Ehrhardt   sched: Update nor...
549
  #undef WRT_SYSCTL
b2be5e96d   Peter Zijlstra   sched: reintroduc...
550
551
552
  	return 0;
  }
  #endif
647e7cac2   Ingo Molnar   sched: vslice fix...
553
554
  
  /*
f9c0b0950   Peter Zijlstra   sched: revert bac...
555
   * delta /= w
a7be37ac8   Peter Zijlstra   sched: revert the...
556
557
558
559
   */
  static inline unsigned long
  calc_delta_fair(unsigned long delta, struct sched_entity *se)
  {
f9c0b0950   Peter Zijlstra   sched: revert bac...
560
561
  	if (unlikely(se->load.weight != NICE_0_LOAD))
  		delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load);
a7be37ac8   Peter Zijlstra   sched: revert the...
562
563
564
565
566
  
  	return delta;
  }
  
  /*
647e7cac2   Ingo Molnar   sched: vslice fix...
567
568
569
570
571
572
573
   * The idea is to set a period in which each task runs once.
   *
   * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch
   * this period because otherwise the slices get too small.
   *
   * p = (nr <= nl) ? l : l*nr/nl
   */
4d78e7b65   Peter Zijlstra   sched: new task p...
574
575
576
  static u64 __sched_period(unsigned long nr_running)
  {
  	u64 period = sysctl_sched_latency;
b2be5e96d   Peter Zijlstra   sched: reintroduc...
577
  	unsigned long nr_latency = sched_nr_latency;
4d78e7b65   Peter Zijlstra   sched: new task p...
578
579
  
  	if (unlikely(nr_running > nr_latency)) {
4bf0b7715   Peter Zijlstra   sched: remove do_...
580
  		period = sysctl_sched_min_granularity;
4d78e7b65   Peter Zijlstra   sched: new task p...
581
  		period *= nr_running;
4d78e7b65   Peter Zijlstra   sched: new task p...
582
583
584
585
  	}
  
  	return period;
  }
647e7cac2   Ingo Molnar   sched: vslice fix...
586
587
588
589
  /*
   * We calculate the wall-time slice from the period by taking a part
   * proportional to the weight.
   *
f9c0b0950   Peter Zijlstra   sched: revert bac...
590
   * s = p*P[w/rw]
647e7cac2   Ingo Molnar   sched: vslice fix...
591
   */
6d0f0ebd0   Peter Zijlstra   sched: simplify a...
592
  static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
218050855   Peter Zijlstra   sched: adaptive s...
593
  {
0a582440f   Mike Galbraith   sched: fix sched_...
594
  	u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq);
f9c0b0950   Peter Zijlstra   sched: revert bac...
595

0a582440f   Mike Galbraith   sched: fix sched_...
596
  	for_each_sched_entity(se) {
6272d68cc   Lin Ming   sched: sched_slic...
597
  		struct load_weight *load;
3104bf03a   Christian Engelmayer   sched: Fix out of...
598
  		struct load_weight lw;
6272d68cc   Lin Ming   sched: sched_slic...
599
600
601
  
  		cfs_rq = cfs_rq_of(se);
  		load = &cfs_rq->load;
f9c0b0950   Peter Zijlstra   sched: revert bac...
602

0a582440f   Mike Galbraith   sched: fix sched_...
603
  		if (unlikely(!se->on_rq)) {
3104bf03a   Christian Engelmayer   sched: Fix out of...
604
  			lw = cfs_rq->load;
0a582440f   Mike Galbraith   sched: fix sched_...
605
606
607
608
609
610
611
  
  			update_load_add(&lw, se->load.weight);
  			load = &lw;
  		}
  		slice = calc_delta_mine(slice, se->load.weight, load);
  	}
  	return slice;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
612
  }
647e7cac2   Ingo Molnar   sched: vslice fix...
613
  /*
ac884dec6   Peter Zijlstra   sched: fair-group...
614
   * We calculate the vruntime slice of a to be inserted task
647e7cac2   Ingo Molnar   sched: vslice fix...
615
   *
f9c0b0950   Peter Zijlstra   sched: revert bac...
616
   * vs = s/w
647e7cac2   Ingo Molnar   sched: vslice fix...
617
   */
f9c0b0950   Peter Zijlstra   sched: revert bac...
618
  static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
67e9fb2a3   Peter Zijlstra   sched: add vslice
619
  {
f9c0b0950   Peter Zijlstra   sched: revert bac...
620
  	return calc_delta_fair(sched_slice(cfs_rq, se), se);
a7be37ac8   Peter Zijlstra   sched: revert the...
621
  }
d6b559182   Paul Turner   sched: Allow upda...
622
  static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update);
6d5ab2932   Paul Turner   sched: Simplify u...
623
  static void update_cfs_shares(struct cfs_rq *cfs_rq);
3b3d190ec   Paul Turner   sched: Implement ...
624

a7be37ac8   Peter Zijlstra   sched: revert the...
625
  /*
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
626
627
628
629
   * Update the current task's runtime statistics. Skip current tasks that
   * are not in our scheduling class.
   */
  static inline void
8ebc91d93   Ingo Molnar   sched: remove sta...
630
631
  __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
  	      unsigned long delta_exec)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
632
  {
bbdba7c0e   Ingo Molnar   sched: remove wai...
633
  	unsigned long delta_exec_weighted;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
634

41acab885   Lucas De Marchi   sched: Implement ...
635
636
  	schedstat_set(curr->statistics.exec_max,
  		      max((u64)delta_exec, curr->statistics.exec_max));
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
637
638
  
  	curr->sum_exec_runtime += delta_exec;
7a62eabc4   Ingo Molnar   sched: debug: upd...
639
  	schedstat_add(cfs_rq, exec_clock, delta_exec);
a7be37ac8   Peter Zijlstra   sched: revert the...
640
  	delta_exec_weighted = calc_delta_fair(delta_exec, curr);
88ec22d3e   Peter Zijlstra   sched: Remove the...
641

e9acbff64   Ingo Molnar   sched: introduce ...
642
  	curr->vruntime += delta_exec_weighted;
1af5f730f   Peter Zijlstra   sched: more accur...
643
  	update_min_vruntime(cfs_rq);
3b3d190ec   Paul Turner   sched: Implement ...
644

70caf8a6c   Peter Zijlstra   sched: Fix UP bui...
645
  #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
3b3d190ec   Paul Turner   sched: Implement ...
646
  	cfs_rq->load_unacc_exec_time += delta_exec;
3b3d190ec   Paul Turner   sched: Implement ...
647
  #endif
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
648
  }
b7cc08965   Ingo Molnar   sched: remove the...
649
  static void update_curr(struct cfs_rq *cfs_rq)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
650
  {
429d43bcc   Ingo Molnar   sched: cleanup: s...
651
  	struct sched_entity *curr = cfs_rq->curr;
305e6835e   Venkatesh Pallipadi   sched: Do not acc...
652
  	u64 now = rq_of(cfs_rq)->clock_task;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
653
654
655
656
657
658
659
660
661
662
  	unsigned long delta_exec;
  
  	if (unlikely(!curr))
  		return;
  
  	/*
  	 * Get the amount of time the current task was running
  	 * since the last time we changed load (this cannot
  	 * overflow on 32 bits):
  	 */
8ebc91d93   Ingo Molnar   sched: remove sta...
663
  	delta_exec = (unsigned long)(now - curr->exec_start);
34f28ecd0   Peter Zijlstra   sched: optimize u...
664
665
  	if (!delta_exec)
  		return;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
666

8ebc91d93   Ingo Molnar   sched: remove sta...
667
668
  	__update_curr(cfs_rq, curr, delta_exec);
  	curr->exec_start = now;
d842de871   Srivatsa Vaddagiri   sched: cpu accoun...
669
670
671
  
  	if (entity_is_task(curr)) {
  		struct task_struct *curtask = task_of(curr);
f977bb493   Ingo Molnar   perf_counter, sch...
672
  		trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
d842de871   Srivatsa Vaddagiri   sched: cpu accoun...
673
  		cpuacct_charge(curtask, delta_exec);
f06febc96   Frank Mayhar   timers: fix itime...
674
  		account_group_exec_runtime(curtask, delta_exec);
d842de871   Srivatsa Vaddagiri   sched: cpu accoun...
675
  	}
ec12cb7f3   Paul Turner   sched: Accumulate...
676
677
  
  	account_cfs_rq_runtime(cfs_rq, delta_exec);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
678
679
680
  }
  
  static inline void
5870db5b8   Ingo Molnar   sched: remove the...
681
  update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
682
  {
41acab885   Lucas De Marchi   sched: Implement ...
683
  	schedstat_set(se->statistics.wait_start, rq_of(cfs_rq)->clock);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
684
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
685
686
687
  /*
   * Task is being enqueued - update stats:
   */
d2417e5a3   Ingo Molnar   sched: remove the...
688
  static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
689
  {
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
690
691
692
693
  	/*
  	 * Are we enqueueing a waiting task? (for current tasks
  	 * a dequeue/enqueue event is a NOP)
  	 */
429d43bcc   Ingo Molnar   sched: cleanup: s...
694
  	if (se != cfs_rq->curr)
5870db5b8   Ingo Molnar   sched: remove the...
695
  		update_stats_wait_start(cfs_rq, se);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
696
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
697
  static void
9ef0a9615   Ingo Molnar   sched: remove the...
698
  update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
699
  {
41acab885   Lucas De Marchi   sched: Implement ...
700
701
702
703
704
  	schedstat_set(se->statistics.wait_max, max(se->statistics.wait_max,
  			rq_of(cfs_rq)->clock - se->statistics.wait_start));
  	schedstat_set(se->statistics.wait_count, se->statistics.wait_count + 1);
  	schedstat_set(se->statistics.wait_sum, se->statistics.wait_sum +
  			rq_of(cfs_rq)->clock - se->statistics.wait_start);
768d0c272   Peter Zijlstra   sched: Add wait, ...
705
706
707
  #ifdef CONFIG_SCHEDSTATS
  	if (entity_is_task(se)) {
  		trace_sched_stat_wait(task_of(se),
41acab885   Lucas De Marchi   sched: Implement ...
708
  			rq_of(cfs_rq)->clock - se->statistics.wait_start);
768d0c272   Peter Zijlstra   sched: Add wait, ...
709
710
  	}
  #endif
41acab885   Lucas De Marchi   sched: Implement ...
711
  	schedstat_set(se->statistics.wait_start, 0);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
712
713
714
  }
  
  static inline void
19b6a2e37   Ingo Molnar   sched: remove the...
715
  update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
716
  {
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
717
718
719
720
  	/*
  	 * Mark the end of the wait period if dequeueing a
  	 * waiting task:
  	 */
429d43bcc   Ingo Molnar   sched: cleanup: s...
721
  	if (se != cfs_rq->curr)
9ef0a9615   Ingo Molnar   sched: remove the...
722
  		update_stats_wait_end(cfs_rq, se);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
723
724
725
726
727
728
  }
  
  /*
   * We are picking a new current task - update its stats:
   */
  static inline void
79303e9e0   Ingo Molnar   sched: remove the...
729
  update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
730
731
732
733
  {
  	/*
  	 * We are starting a new run period:
  	 */
305e6835e   Venkatesh Pallipadi   sched: Do not acc...
734
  	se->exec_start = rq_of(cfs_rq)->clock_task;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
735
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
736
737
738
  /**************************************************
   * Scheduling class queueing methods:
   */
c09595f63   Peter Zijlstra   sched: revert rev...
739
740
741
742
743
744
745
746
747
748
749
750
  #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
  static void
  add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
  {
  	cfs_rq->task_weight += weight;
  }
  #else
  static inline void
  add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
  {
  }
  #endif
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
751
752
753
754
  static void
  account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
  	update_load_add(&cfs_rq->load, se->load.weight);
c09595f63   Peter Zijlstra   sched: revert rev...
755
  	if (!parent_entity(se))
029632fbb   Peter Zijlstra   sched: Make separ...
756
  		update_load_add(&rq_of(cfs_rq)->load, se->load.weight);
b87f17242   Bharata B Rao   sched: maintain o...
757
  	if (entity_is_task(se)) {
c09595f63   Peter Zijlstra   sched: revert rev...
758
  		add_cfs_task_weight(cfs_rq, se->load.weight);
b87f17242   Bharata B Rao   sched: maintain o...
759
760
  		list_add(&se->group_node, &cfs_rq->tasks);
  	}
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
761
  	cfs_rq->nr_running++;
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
762
763
764
765
766
767
  }
  
  static void
  account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
  	update_load_sub(&cfs_rq->load, se->load.weight);
c09595f63   Peter Zijlstra   sched: revert rev...
768
  	if (!parent_entity(se))
029632fbb   Peter Zijlstra   sched: Make separ...
769
  		update_load_sub(&rq_of(cfs_rq)->load, se->load.weight);
b87f17242   Bharata B Rao   sched: maintain o...
770
  	if (entity_is_task(se)) {
c09595f63   Peter Zijlstra   sched: revert rev...
771
  		add_cfs_task_weight(cfs_rq, -se->load.weight);
b87f17242   Bharata B Rao   sched: maintain o...
772
773
  		list_del_init(&se->group_node);
  	}
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
774
  	cfs_rq->nr_running--;
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
775
  }
3ff6dcac7   Yong Zhang   sched: Fix poor i...
776
  #ifdef CONFIG_FAIR_GROUP_SCHED
64660c864   Paul Turner   sched: Prevent in...
777
778
  /* we need this in update_cfs_load and load-balance functions below */
  static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
3ff6dcac7   Yong Zhang   sched: Fix poor i...
779
  # ifdef CONFIG_SMP
d6b559182   Paul Turner   sched: Allow upda...
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
  static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
  					    int global_update)
  {
  	struct task_group *tg = cfs_rq->tg;
  	long load_avg;
  
  	load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
  	load_avg -= cfs_rq->load_contribution;
  
  	if (global_update || abs(load_avg) > cfs_rq->load_contribution / 8) {
  		atomic_add(load_avg, &tg->load_weight);
  		cfs_rq->load_contribution += load_avg;
  	}
  }
  
  static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
796
  {
a7a4f8a75   Paul Turner   sched: Add sysctl...
797
  	u64 period = sysctl_sched_shares_window;
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
798
  	u64 now, delta;
e33078baa   Paul Turner   sched: Fix update...
799
  	unsigned long load = cfs_rq->load.weight;
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
800

64660c864   Paul Turner   sched: Prevent in...
801
  	if (cfs_rq->tg == &root_task_group || throttled_hierarchy(cfs_rq))
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
802
  		return;
05ca62c6c   Paul Turner   sched: Use rq->cl...
803
  	now = rq_of(cfs_rq)->clock_task;
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
804
  	delta = now - cfs_rq->load_stamp;
e33078baa   Paul Turner   sched: Fix update...
805
806
807
808
809
  	/* truncate load history at 4 idle periods */
  	if (cfs_rq->load_stamp > cfs_rq->load_last &&
  	    now - cfs_rq->load_last > 4 * period) {
  		cfs_rq->load_period = 0;
  		cfs_rq->load_avg = 0;
f07333bf6   Paul Turner   sched: Avoid expe...
810
  		delta = period - 1;
e33078baa   Paul Turner   sched: Fix update...
811
  	}
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
812
  	cfs_rq->load_stamp = now;
3b3d190ec   Paul Turner   sched: Implement ...
813
  	cfs_rq->load_unacc_exec_time = 0;
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
814
  	cfs_rq->load_period += delta;
e33078baa   Paul Turner   sched: Fix update...
815
816
817
818
  	if (load) {
  		cfs_rq->load_last = now;
  		cfs_rq->load_avg += delta * load;
  	}
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
819

d6b559182   Paul Turner   sched: Allow upda...
820
821
822
823
  	/* consider updating load contribution on each fold or truncate */
  	if (global_update || cfs_rq->load_period > period
  	    || !cfs_rq->load_period)
  		update_cfs_rq_load_contribution(cfs_rq, global_update);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
824
825
826
827
828
829
830
831
832
833
  	while (cfs_rq->load_period > period) {
  		/*
  		 * Inline assembly required to prevent the compiler
  		 * optimising this loop into a divmod call.
  		 * See __iter_div_u64_rem() for another example of this.
  		 */
  		asm("" : "+rm" (cfs_rq->load_period));
  		cfs_rq->load_period /= 2;
  		cfs_rq->load_avg /= 2;
  	}
3d4b47b4b   Peter Zijlstra   sched: Implement ...
834

e33078baa   Paul Turner   sched: Fix update...
835
836
  	if (!cfs_rq->curr && !cfs_rq->nr_running && !cfs_rq->load_avg)
  		list_del_leaf_cfs_rq(cfs_rq);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
837
  }
cf5f0acf3   Peter Zijlstra   sched: Add a comm...
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
  static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq)
  {
  	long tg_weight;
  
  	/*
  	 * Use this CPU's actual weight instead of the last load_contribution
  	 * to gain a more accurate current total weight. See
  	 * update_cfs_rq_load_contribution().
  	 */
  	tg_weight = atomic_read(&tg->load_weight);
  	tg_weight -= cfs_rq->load_contribution;
  	tg_weight += cfs_rq->load.weight;
  
  	return tg_weight;
  }
6d5ab2932   Paul Turner   sched: Simplify u...
853
  static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
3ff6dcac7   Yong Zhang   sched: Fix poor i...
854
  {
cf5f0acf3   Peter Zijlstra   sched: Add a comm...
855
  	long tg_weight, load, shares;
3ff6dcac7   Yong Zhang   sched: Fix poor i...
856

cf5f0acf3   Peter Zijlstra   sched: Add a comm...
857
  	tg_weight = calc_tg_weight(tg, cfs_rq);
6d5ab2932   Paul Turner   sched: Simplify u...
858
  	load = cfs_rq->load.weight;
3ff6dcac7   Yong Zhang   sched: Fix poor i...
859

3ff6dcac7   Yong Zhang   sched: Fix poor i...
860
  	shares = (tg->shares * load);
cf5f0acf3   Peter Zijlstra   sched: Add a comm...
861
862
  	if (tg_weight)
  		shares /= tg_weight;
3ff6dcac7   Yong Zhang   sched: Fix poor i...
863
864
865
866
867
868
869
870
871
872
873
874
875
  
  	if (shares < MIN_SHARES)
  		shares = MIN_SHARES;
  	if (shares > tg->shares)
  		shares = tg->shares;
  
  	return shares;
  }
  
  static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
  {
  	if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
  		update_cfs_load(cfs_rq, 0);
6d5ab2932   Paul Turner   sched: Simplify u...
876
  		update_cfs_shares(cfs_rq);
3ff6dcac7   Yong Zhang   sched: Fix poor i...
877
878
879
880
881
882
  	}
  }
  # else /* CONFIG_SMP */
  static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
  {
  }
6d5ab2932   Paul Turner   sched: Simplify u...
883
  static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
3ff6dcac7   Yong Zhang   sched: Fix poor i...
884
885
886
887
888
889
890
891
  {
  	return tg->shares;
  }
  
  static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
  {
  }
  # endif /* CONFIG_SMP */
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
892
893
894
  static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
  			    unsigned long weight)
  {
19e5eebb8   Paul Turner   sched: Fix intera...
895
896
897
898
  	if (se->on_rq) {
  		/* commit outstanding execution time */
  		if (cfs_rq->curr == se)
  			update_curr(cfs_rq);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
899
  		account_entity_dequeue(cfs_rq, se);
19e5eebb8   Paul Turner   sched: Fix intera...
900
  	}
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
901
902
903
904
905
906
  
  	update_load_set(&se->load, weight);
  
  	if (se->on_rq)
  		account_entity_enqueue(cfs_rq, se);
  }
6d5ab2932   Paul Turner   sched: Simplify u...
907
  static void update_cfs_shares(struct cfs_rq *cfs_rq)
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
908
909
910
  {
  	struct task_group *tg;
  	struct sched_entity *se;
3ff6dcac7   Yong Zhang   sched: Fix poor i...
911
  	long shares;
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
912

2069dd75c   Peter Zijlstra   sched: Rewrite tg...
913
914
  	tg = cfs_rq->tg;
  	se = tg->se[cpu_of(rq_of(cfs_rq))];
64660c864   Paul Turner   sched: Prevent in...
915
  	if (!se || throttled_hierarchy(cfs_rq))
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
916
  		return;
3ff6dcac7   Yong Zhang   sched: Fix poor i...
917
918
919
920
  #ifndef CONFIG_SMP
  	if (likely(se->load.weight == tg->shares))
  		return;
  #endif
6d5ab2932   Paul Turner   sched: Simplify u...
921
  	shares = calc_cfs_shares(cfs_rq, tg);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
922
923
924
925
  
  	reweight_entity(cfs_rq_of(se), se, shares);
  }
  #else /* CONFIG_FAIR_GROUP_SCHED */
d6b559182   Paul Turner   sched: Allow upda...
926
  static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
927
928
  {
  }
6d5ab2932   Paul Turner   sched: Simplify u...
929
  static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
930
931
  {
  }
43365bd7f   Paul Turner   sched: Move perio...
932
933
934
935
  
  static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
  {
  }
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
936
  #endif /* CONFIG_FAIR_GROUP_SCHED */
2396af69b   Ingo Molnar   sched: remove the...
937
  static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
938
  {
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
939
  #ifdef CONFIG_SCHEDSTATS
e414314cc   Peter Zijlstra   sched: Fix latenc...
940
941
942
943
  	struct task_struct *tsk = NULL;
  
  	if (entity_is_task(se))
  		tsk = task_of(se);
41acab885   Lucas De Marchi   sched: Implement ...
944
945
  	if (se->statistics.sleep_start) {
  		u64 delta = rq_of(cfs_rq)->clock - se->statistics.sleep_start;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
946
947
948
  
  		if ((s64)delta < 0)
  			delta = 0;
41acab885   Lucas De Marchi   sched: Implement ...
949
950
  		if (unlikely(delta > se->statistics.sleep_max))
  			se->statistics.sleep_max = delta;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
951

41acab885   Lucas De Marchi   sched: Implement ...
952
  		se->statistics.sum_sleep_runtime += delta;
9745512ce   Arjan van de Ven   sched: latencytop...
953

768d0c272   Peter Zijlstra   sched: Add wait, ...
954
  		if (tsk) {
e414314cc   Peter Zijlstra   sched: Fix latenc...
955
  			account_scheduler_latency(tsk, delta >> 10, 1);
768d0c272   Peter Zijlstra   sched: Add wait, ...
956
957
  			trace_sched_stat_sleep(tsk, delta);
  		}
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
958
  	}
41acab885   Lucas De Marchi   sched: Implement ...
959
960
  	if (se->statistics.block_start) {
  		u64 delta = rq_of(cfs_rq)->clock - se->statistics.block_start;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
961
962
963
  
  		if ((s64)delta < 0)
  			delta = 0;
41acab885   Lucas De Marchi   sched: Implement ...
964
965
  		if (unlikely(delta > se->statistics.block_max))
  			se->statistics.block_max = delta;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
966

41acab885   Lucas De Marchi   sched: Implement ...
967
  		se->statistics.sum_sleep_runtime += delta;
30084fbd1   Ingo Molnar   sched: fix profil...
968

e414314cc   Peter Zijlstra   sched: Fix latenc...
969
  		if (tsk) {
8f0dfc34e   Arjan van de Ven   sched: Provide io...
970
  			if (tsk->in_iowait) {
41acab885   Lucas De Marchi   sched: Implement ...
971
972
  				se->statistics.iowait_sum += delta;
  				se->statistics.iowait_count++;
768d0c272   Peter Zijlstra   sched: Add wait, ...
973
  				trace_sched_stat_iowait(tsk, delta);
8f0dfc34e   Arjan van de Ven   sched: Provide io...
974
  			}
b781a602a   Andrew Vagin   events, sched: Ad...
975
  			trace_sched_stat_blocked(tsk, delta);
e414314cc   Peter Zijlstra   sched: Fix latenc...
976
977
978
979
980
981
982
983
984
985
986
  			/*
  			 * Blocking time is in units of nanosecs, so shift by
  			 * 20 to get a milliseconds-range estimation of the
  			 * amount of time that the task spent sleeping:
  			 */
  			if (unlikely(prof_on == SLEEP_PROFILING)) {
  				profile_hits(SLEEP_PROFILING,
  						(void *)get_wchan(tsk),
  						delta >> 20);
  			}
  			account_scheduler_latency(tsk, delta >> 10, 0);
30084fbd1   Ingo Molnar   sched: fix profil...
987
  		}
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
988
989
990
  	}
  #endif
  }
ddc972975   Peter Zijlstra   sched debug: chec...
991
992
993
994
995
996
997
998
999
1000
1001
1002
  static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
  #ifdef CONFIG_SCHED_DEBUG
  	s64 d = se->vruntime - cfs_rq->min_vruntime;
  
  	if (d < 0)
  		d = -d;
  
  	if (d > 3*sysctl_sched_latency)
  		schedstat_inc(cfs_rq, nr_spread_over);
  #endif
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1003
  static void
aeb73b040   Peter Zijlstra   sched: clean up n...
1004
1005
  place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
  {
1af5f730f   Peter Zijlstra   sched: more accur...
1006
  	u64 vruntime = cfs_rq->min_vruntime;
94dfb5e75   Peter Zijlstra   sched: add tree b...
1007

2cb8600e6   Peter Zijlstra   sched: documentat...
1008
1009
1010
1011
1012
1013
  	/*
  	 * The 'current' period is already promised to the current tasks,
  	 * however the extra weight of the new task will slow them down a
  	 * little, place the new task so that it fits in the slot that
  	 * stays open at the end.
  	 */
94dfb5e75   Peter Zijlstra   sched: add tree b...
1014
  	if (initial && sched_feat(START_DEBIT))
f9c0b0950   Peter Zijlstra   sched: revert bac...
1015
  		vruntime += sched_vslice(cfs_rq, se);
aeb73b040   Peter Zijlstra   sched: clean up n...
1016

a2e7a7eb2   Mike Galbraith   sched: Remove unn...
1017
  	/* sleeps up to a single latency don't count. */
5ca9880c6   Mike Galbraith   sched: Remove FAI...
1018
  	if (!initial) {
a2e7a7eb2   Mike Galbraith   sched: Remove unn...
1019
  		unsigned long thresh = sysctl_sched_latency;
a7be37ac8   Peter Zijlstra   sched: revert the...
1020

a2e7a7eb2   Mike Galbraith   sched: Remove unn...
1021
  		/*
a2e7a7eb2   Mike Galbraith   sched: Remove unn...
1022
1023
1024
1025
1026
  		 * Halve their sleep time's effect, to allow
  		 * for a gentler effect of sleepers:
  		 */
  		if (sched_feat(GENTLE_FAIR_SLEEPERS))
  			thresh >>= 1;
51e0304ce   Ingo Molnar   sched: Implement ...
1027

a2e7a7eb2   Mike Galbraith   sched: Remove unn...
1028
  		vruntime -= thresh;
aeb73b040   Peter Zijlstra   sched: clean up n...
1029
  	}
b5d9d734a   Mike Galbraith   sched: Ensure tha...
1030
1031
  	/* ensure we never gain time by being placed backwards. */
  	vruntime = max_vruntime(se->vruntime, vruntime);
67e9fb2a3   Peter Zijlstra   sched: add vslice
1032
  	se->vruntime = vruntime;
aeb73b040   Peter Zijlstra   sched: clean up n...
1033
  }
d3d9dc330   Paul Turner   sched: Throttle e...
1034
  static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
aeb73b040   Peter Zijlstra   sched: clean up n...
1035
  static void
88ec22d3e   Peter Zijlstra   sched: Remove the...
1036
  enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1037
1038
  {
  	/*
88ec22d3e   Peter Zijlstra   sched: Remove the...
1039
1040
1041
  	 * Update the normalized vruntime before updating min_vruntime
  	 * through callig update_curr().
  	 */
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
1042
  	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
88ec22d3e   Peter Zijlstra   sched: Remove the...
1043
1044
1045
  		se->vruntime += cfs_rq->min_vruntime;
  
  	/*
a2a2d6807   Dmitry Adamushko   sched: cleanup, m...
1046
  	 * Update run-time statistics of the 'current'.
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1047
  	 */
b7cc08965   Ingo Molnar   sched: remove the...
1048
  	update_curr(cfs_rq);
d6b559182   Paul Turner   sched: Allow upda...
1049
  	update_cfs_load(cfs_rq, 0);
a992241de   Peter Zijlstra   sched: fix normal...
1050
  	account_entity_enqueue(cfs_rq, se);
6d5ab2932   Paul Turner   sched: Simplify u...
1051
  	update_cfs_shares(cfs_rq);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1052

88ec22d3e   Peter Zijlstra   sched: Remove the...
1053
  	if (flags & ENQUEUE_WAKEUP) {
aeb73b040   Peter Zijlstra   sched: clean up n...
1054
  		place_entity(cfs_rq, se, 0);
2396af69b   Ingo Molnar   sched: remove the...
1055
  		enqueue_sleeper(cfs_rq, se);
e9acbff64   Ingo Molnar   sched: introduce ...
1056
  	}
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1057

d2417e5a3   Ingo Molnar   sched: remove the...
1058
  	update_stats_enqueue(cfs_rq, se);
ddc972975   Peter Zijlstra   sched debug: chec...
1059
  	check_spread(cfs_rq, se);
83b699ed2   Srivatsa Vaddagiri   sched: revert rec...
1060
1061
  	if (se != cfs_rq->curr)
  		__enqueue_entity(cfs_rq, se);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
1062
  	se->on_rq = 1;
3d4b47b4b   Peter Zijlstra   sched: Implement ...
1063

d3d9dc330   Paul Turner   sched: Throttle e...
1064
  	if (cfs_rq->nr_running == 1) {
3d4b47b4b   Peter Zijlstra   sched: Implement ...
1065
  		list_add_leaf_cfs_rq(cfs_rq);
d3d9dc330   Paul Turner   sched: Throttle e...
1066
1067
  		check_enqueue_throttle(cfs_rq);
  	}
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1068
  }
2c13c919d   Rik van Riel   sched: Limit the ...
1069
  static void __clear_buddies_last(struct sched_entity *se)
2002c6959   Peter Zijlstra   sched: release bu...
1070
  {
2c13c919d   Rik van Riel   sched: Limit the ...
1071
1072
1073
1074
1075
1076
1077
1078
  	for_each_sched_entity(se) {
  		struct cfs_rq *cfs_rq = cfs_rq_of(se);
  		if (cfs_rq->last == se)
  			cfs_rq->last = NULL;
  		else
  			break;
  	}
  }
2002c6959   Peter Zijlstra   sched: release bu...
1079

2c13c919d   Rik van Riel   sched: Limit the ...
1080
1081
1082
1083
1084
1085
1086
1087
1088
  static void __clear_buddies_next(struct sched_entity *se)
  {
  	for_each_sched_entity(se) {
  		struct cfs_rq *cfs_rq = cfs_rq_of(se);
  		if (cfs_rq->next == se)
  			cfs_rq->next = NULL;
  		else
  			break;
  	}
2002c6959   Peter Zijlstra   sched: release bu...
1089
  }
ac53db596   Rik van Riel   sched: Use a budd...
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
  static void __clear_buddies_skip(struct sched_entity *se)
  {
  	for_each_sched_entity(se) {
  		struct cfs_rq *cfs_rq = cfs_rq_of(se);
  		if (cfs_rq->skip == se)
  			cfs_rq->skip = NULL;
  		else
  			break;
  	}
  }
a571bbeaf   Peter Zijlstra   sched: fix buddie...
1100
1101
  static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
2c13c919d   Rik van Riel   sched: Limit the ...
1102
1103
1104
1105
1106
  	if (cfs_rq->last == se)
  		__clear_buddies_last(se);
  
  	if (cfs_rq->next == se)
  		__clear_buddies_next(se);
ac53db596   Rik van Riel   sched: Use a budd...
1107
1108
1109
  
  	if (cfs_rq->skip == se)
  		__clear_buddies_skip(se);
a571bbeaf   Peter Zijlstra   sched: fix buddie...
1110
  }
d8b4986d3   Paul Turner   sched: Return unu...
1111
  static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1112
  static void
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
1113
  dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1114
  {
a2a2d6807   Dmitry Adamushko   sched: cleanup, m...
1115
1116
1117
1118
  	/*
  	 * Update run-time statistics of the 'current'.
  	 */
  	update_curr(cfs_rq);
19b6a2e37   Ingo Molnar   sched: remove the...
1119
  	update_stats_dequeue(cfs_rq, se);
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
1120
  	if (flags & DEQUEUE_SLEEP) {
67e9fb2a3   Peter Zijlstra   sched: add vslice
1121
  #ifdef CONFIG_SCHEDSTATS
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1122
1123
1124
1125
  		if (entity_is_task(se)) {
  			struct task_struct *tsk = task_of(se);
  
  			if (tsk->state & TASK_INTERRUPTIBLE)
41acab885   Lucas De Marchi   sched: Implement ...
1126
  				se->statistics.sleep_start = rq_of(cfs_rq)->clock;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1127
  			if (tsk->state & TASK_UNINTERRUPTIBLE)
41acab885   Lucas De Marchi   sched: Implement ...
1128
  				se->statistics.block_start = rq_of(cfs_rq)->clock;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1129
  		}
db36cc7d6   Dmitry Adamushko   sched: clean up s...
1130
  #endif
67e9fb2a3   Peter Zijlstra   sched: add vslice
1131
  	}
2002c6959   Peter Zijlstra   sched: release bu...
1132
  	clear_buddies(cfs_rq, se);
4793241be   Peter Zijlstra   sched: backward l...
1133

83b699ed2   Srivatsa Vaddagiri   sched: revert rec...
1134
  	if (se != cfs_rq->curr)
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
1135
  		__dequeue_entity(cfs_rq, se);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
1136
  	se->on_rq = 0;
d6b559182   Paul Turner   sched: Allow upda...
1137
  	update_cfs_load(cfs_rq, 0);
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
1138
  	account_entity_dequeue(cfs_rq, se);
88ec22d3e   Peter Zijlstra   sched: Remove the...
1139
1140
1141
1142
1143
1144
  
  	/*
  	 * Normalize the entity after updating the min_vruntime because the
  	 * update can refer to the ->curr item and we need to reflect this
  	 * movement in our normalized position.
  	 */
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
1145
  	if (!(flags & DEQUEUE_SLEEP))
88ec22d3e   Peter Zijlstra   sched: Remove the...
1146
  		se->vruntime -= cfs_rq->min_vruntime;
1e8762317   Peter Zijlstra   sched: Fix ->min_...
1147

d8b4986d3   Paul Turner   sched: Return unu...
1148
1149
  	/* return excess runtime on last dequeue */
  	return_cfs_rq_runtime(cfs_rq);
1e8762317   Peter Zijlstra   sched: Fix ->min_...
1150
1151
  	update_min_vruntime(cfs_rq);
  	update_cfs_shares(cfs_rq);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1152
1153
1154
1155
1156
  }
  
  /*
   * Preempt the current task with a newly woken task if needed:
   */
7c92e54f6   Peter Zijlstra   sched: simplify _...
1157
  static void
2e09bf556   Ingo Molnar   sched: wakeup gra...
1158
  check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1159
  {
116978308   Peter Zijlstra   sched: fix ideal_...
1160
  	unsigned long ideal_runtime, delta_exec;
f4cfb33ed   Wang Xingchao   sched: Remove red...
1161
1162
  	struct sched_entity *se;
  	s64 delta;
116978308   Peter Zijlstra   sched: fix ideal_...
1163

6d0f0ebd0   Peter Zijlstra   sched: simplify a...
1164
  	ideal_runtime = sched_slice(cfs_rq, curr);
116978308   Peter Zijlstra   sched: fix ideal_...
1165
  	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
a9f3e2b54   Mike Galbraith   sched: clear budd...
1166
  	if (delta_exec > ideal_runtime) {
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1167
  		resched_task(rq_of(cfs_rq)->curr);
a9f3e2b54   Mike Galbraith   sched: clear budd...
1168
1169
1170
1171
1172
  		/*
  		 * The current task ran long enough, ensure it doesn't get
  		 * re-elected due to buddy favours.
  		 */
  		clear_buddies(cfs_rq, curr);
f685ceaca   Mike Galbraith   sched: Strengthen...
1173
1174
1175
1176
1177
1178
1179
1180
  		return;
  	}
  
  	/*
  	 * Ensure that a task that missed wakeup preemption by a
  	 * narrow margin doesn't have to wait for a full slice.
  	 * This also mitigates buddy induced latencies under load.
  	 */
f685ceaca   Mike Galbraith   sched: Strengthen...
1181
1182
  	if (delta_exec < sysctl_sched_min_granularity)
  		return;
f4cfb33ed   Wang Xingchao   sched: Remove red...
1183
1184
  	se = __pick_first_entity(cfs_rq);
  	delta = curr->vruntime - se->vruntime;
f685ceaca   Mike Galbraith   sched: Strengthen...
1185

f4cfb33ed   Wang Xingchao   sched: Remove red...
1186
1187
  	if (delta < 0)
  		return;
d7d829441   Mike Galbraith   sched: Fix signed...
1188

f4cfb33ed   Wang Xingchao   sched: Remove red...
1189
1190
  	if (delta > ideal_runtime)
  		resched_task(rq_of(cfs_rq)->curr);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1191
  }
83b699ed2   Srivatsa Vaddagiri   sched: revert rec...
1192
  static void
8494f412e   Ingo Molnar   sched: remove the...
1193
  set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1194
  {
83b699ed2   Srivatsa Vaddagiri   sched: revert rec...
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
  	/* 'current' is not kept within the tree. */
  	if (se->on_rq) {
  		/*
  		 * Any task has to be enqueued before it get to execute on
  		 * a CPU. So account for the time it spent waiting on the
  		 * runqueue.
  		 */
  		update_stats_wait_end(cfs_rq, se);
  		__dequeue_entity(cfs_rq, se);
  	}
79303e9e0   Ingo Molnar   sched: remove the...
1205
  	update_stats_curr_start(cfs_rq, se);
429d43bcc   Ingo Molnar   sched: cleanup: s...
1206
  	cfs_rq->curr = se;
eba1ed4b7   Ingo Molnar   sched: debug: tra...
1207
1208
1209
1210
1211
1212
  #ifdef CONFIG_SCHEDSTATS
  	/*
  	 * Track our maximum slice length, if the CPU's load is at
  	 * least twice that of our own weight (i.e. dont track it
  	 * when there are only lesser-weight tasks around):
  	 */
495eca494   Dmitry Adamushko   sched: clean up s...
1213
  	if (rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
41acab885   Lucas De Marchi   sched: Implement ...
1214
  		se->statistics.slice_max = max(se->statistics.slice_max,
eba1ed4b7   Ingo Molnar   sched: debug: tra...
1215
1216
1217
  			se->sum_exec_runtime - se->prev_sum_exec_runtime);
  	}
  #endif
4a55b4503   Peter Zijlstra   sched: improve pr...
1218
  	se->prev_sum_exec_runtime = se->sum_exec_runtime;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1219
  }
3f3a49048   Peter Zijlstra   sched: virtual ti...
1220
1221
  static int
  wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
ac53db596   Rik van Riel   sched: Use a budd...
1222
1223
1224
1225
1226
1227
1228
  /*
   * Pick the next process, keeping these things in mind, in this order:
   * 1) keep things fair between processes/task groups
   * 2) pick the "next" process, since someone really wants that to run
   * 3) pick the "last" process, for cache locality
   * 4) do not run the "skip" process, if something else is available
   */
f4b6755fb   Peter Zijlstra   sched: cleanup fa...
1229
  static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
aa2ac2522   Peter Zijlstra   sched: fix overlo...
1230
  {
ac53db596   Rik van Riel   sched: Use a budd...
1231
  	struct sched_entity *se = __pick_first_entity(cfs_rq);
f685ceaca   Mike Galbraith   sched: Strengthen...
1232
  	struct sched_entity *left = se;
f4b6755fb   Peter Zijlstra   sched: cleanup fa...
1233

ac53db596   Rik van Riel   sched: Use a budd...
1234
1235
1236
1237
1238
1239
1240
1241
1242
  	/*
  	 * Avoid running the skip buddy, if running something else can
  	 * be done without getting too unfair.
  	 */
  	if (cfs_rq->skip == se) {
  		struct sched_entity *second = __pick_next_entity(se);
  		if (second && wakeup_preempt_entity(second, left) < 1)
  			se = second;
  	}
aa2ac2522   Peter Zijlstra   sched: fix overlo...
1243

f685ceaca   Mike Galbraith   sched: Strengthen...
1244
1245
1246
1247
1248
  	/*
  	 * Prefer last buddy, try to return the CPU to a preempted task.
  	 */
  	if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1)
  		se = cfs_rq->last;
ac53db596   Rik van Riel   sched: Use a budd...
1249
1250
1251
1252
1253
  	/*
  	 * Someone really wants this to run. If it's not unfair, run it.
  	 */
  	if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
  		se = cfs_rq->next;
f685ceaca   Mike Galbraith   sched: Strengthen...
1254
  	clear_buddies(cfs_rq, se);
4793241be   Peter Zijlstra   sched: backward l...
1255
1256
  
  	return se;
aa2ac2522   Peter Zijlstra   sched: fix overlo...
1257
  }
d3d9dc330   Paul Turner   sched: Throttle e...
1258
  static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
ab6cde269   Ingo Molnar   sched: remove the...
1259
  static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1260
1261
1262
1263
1264
1265
  {
  	/*
  	 * If still on the runqueue then deactivate_task()
  	 * was not called and update_curr() has to be done:
  	 */
  	if (prev->on_rq)
b7cc08965   Ingo Molnar   sched: remove the...
1266
  		update_curr(cfs_rq);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1267

d3d9dc330   Paul Turner   sched: Throttle e...
1268
1269
  	/* throttle cfs_rqs exceeding runtime */
  	check_cfs_rq_runtime(cfs_rq);
ddc972975   Peter Zijlstra   sched debug: chec...
1270
  	check_spread(cfs_rq, prev);
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
1271
  	if (prev->on_rq) {
5870db5b8   Ingo Molnar   sched: remove the...
1272
  		update_stats_wait_start(cfs_rq, prev);
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
1273
1274
1275
  		/* Put 'current' back into the tree. */
  		__enqueue_entity(cfs_rq, prev);
  	}
429d43bcc   Ingo Molnar   sched: cleanup: s...
1276
  	cfs_rq->curr = NULL;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1277
  }
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1278
1279
  static void
  entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1280
  {
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1281
  	/*
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
1282
  	 * Update run-time statistics of the 'current'.
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1283
  	 */
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
1284
  	update_curr(cfs_rq);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1285

43365bd7f   Paul Turner   sched: Move perio...
1286
1287
1288
1289
  	/*
  	 * Update share accounting for long-running entities.
  	 */
  	update_entity_shares_tick(cfs_rq);
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1290
1291
1292
1293
1294
  #ifdef CONFIG_SCHED_HRTICK
  	/*
  	 * queued ticks are scheduled to match the slice, so don't bother
  	 * validating it and just reschedule.
  	 */
983ed7a66   Harvey Harrison   sched: add static...
1295
1296
1297
1298
  	if (queued) {
  		resched_task(rq_of(cfs_rq)->curr);
  		return;
  	}
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1299
1300
1301
1302
1303
1304
1305
  	/*
  	 * don't let the period tick interfere with the hrtick preemption
  	 */
  	if (!sched_feat(DOUBLE_TICK) &&
  			hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
  		return;
  #endif
2c2efaed9   Yong Zhang   sched: Kill WAKEU...
1306
  	if (cfs_rq->nr_running > 1)
2e09bf556   Ingo Molnar   sched: wakeup gra...
1307
  		check_preempt_tick(cfs_rq, curr);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1308
  }
ab84d31e1   Paul Turner   sched: Introduce ...
1309
1310
1311
1312
1313
1314
  
  /**************************************************
   * CFS bandwidth control machinery
   */
  
  #ifdef CONFIG_CFS_BANDWIDTH
029632fbb   Peter Zijlstra   sched: Make separ...
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
  
  #ifdef HAVE_JUMP_LABEL
  static struct jump_label_key __cfs_bandwidth_used;
  
  static inline bool cfs_bandwidth_used(void)
  {
  	return static_branch(&__cfs_bandwidth_used);
  }
  
  void account_cfs_bandwidth_used(int enabled, int was_enabled)
  {
  	/* only need to count groups transitioning between enabled/!enabled */
  	if (enabled && !was_enabled)
  		jump_label_inc(&__cfs_bandwidth_used);
  	else if (!enabled && was_enabled)
  		jump_label_dec(&__cfs_bandwidth_used);
  }
  #else /* HAVE_JUMP_LABEL */
  static bool cfs_bandwidth_used(void)
  {
  	return true;
  }
  
  void account_cfs_bandwidth_used(int enabled, int was_enabled) {}
  #endif /* HAVE_JUMP_LABEL */
ab84d31e1   Paul Turner   sched: Introduce ...
1340
1341
1342
1343
1344
1345
1346
1347
  /*
   * default period for cfs group bandwidth.
   * default: 0.1s, units: nanoseconds
   */
  static inline u64 default_cfs_period(void)
  {
  	return 100000000ULL;
  }
ec12cb7f3   Paul Turner   sched: Accumulate...
1348
1349
1350
1351
1352
  
  static inline u64 sched_cfs_bandwidth_slice(void)
  {
  	return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC;
  }
a9cf55b28   Paul Turner   sched: Expire inv...
1353
1354
1355
1356
1357
1358
1359
  /*
   * Replenish runtime according to assigned quota and update expiration time.
   * We use sched_clock_cpu directly instead of rq->clock to avoid adding
   * additional synchronization around rq->lock.
   *
   * requires cfs_b->lock
   */
029632fbb   Peter Zijlstra   sched: Make separ...
1360
  void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
a9cf55b28   Paul Turner   sched: Expire inv...
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
  {
  	u64 now;
  
  	if (cfs_b->quota == RUNTIME_INF)
  		return;
  
  	now = sched_clock_cpu(smp_processor_id());
  	cfs_b->runtime = cfs_b->quota;
  	cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);
  }
029632fbb   Peter Zijlstra   sched: Make separ...
1371
1372
1373
1374
  static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
  {
  	return &tg->cfs_bandwidth;
  }
85dac906b   Paul Turner   sched: Add suppor...
1375
1376
  /* returns 0 on failure to allocate runtime */
  static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
ec12cb7f3   Paul Turner   sched: Accumulate...
1377
1378
1379
  {
  	struct task_group *tg = cfs_rq->tg;
  	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
a9cf55b28   Paul Turner   sched: Expire inv...
1380
  	u64 amount = 0, min_amount, expires;
ec12cb7f3   Paul Turner   sched: Accumulate...
1381
1382
1383
1384
1385
1386
1387
  
  	/* note: this is a positive sum as runtime_remaining <= 0 */
  	min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;
  
  	raw_spin_lock(&cfs_b->lock);
  	if (cfs_b->quota == RUNTIME_INF)
  		amount = min_amount;
58088ad01   Paul Turner   sched: Add a time...
1388
  	else {
a9cf55b28   Paul Turner   sched: Expire inv...
1389
1390
1391
1392
1393
1394
1395
1396
  		/*
  		 * If the bandwidth pool has become inactive, then at least one
  		 * period must have elapsed since the last consumption.
  		 * Refresh the global state and ensure bandwidth timer becomes
  		 * active.
  		 */
  		if (!cfs_b->timer_active) {
  			__refill_cfs_bandwidth_runtime(cfs_b);
58088ad01   Paul Turner   sched: Add a time...
1397
  			__start_cfs_bandwidth(cfs_b);
a9cf55b28   Paul Turner   sched: Expire inv...
1398
  		}
58088ad01   Paul Turner   sched: Add a time...
1399
1400
1401
1402
1403
1404
  
  		if (cfs_b->runtime > 0) {
  			amount = min(cfs_b->runtime, min_amount);
  			cfs_b->runtime -= amount;
  			cfs_b->idle = 0;
  		}
ec12cb7f3   Paul Turner   sched: Accumulate...
1405
  	}
a9cf55b28   Paul Turner   sched: Expire inv...
1406
  	expires = cfs_b->runtime_expires;
ec12cb7f3   Paul Turner   sched: Accumulate...
1407
1408
1409
  	raw_spin_unlock(&cfs_b->lock);
  
  	cfs_rq->runtime_remaining += amount;
a9cf55b28   Paul Turner   sched: Expire inv...
1410
1411
1412
1413
1414
1415
1416
  	/*
  	 * we may have advanced our local expiration to account for allowed
  	 * spread between our sched_clock and the one on which runtime was
  	 * issued.
  	 */
  	if ((s64)(expires - cfs_rq->runtime_expires) > 0)
  		cfs_rq->runtime_expires = expires;
85dac906b   Paul Turner   sched: Add suppor...
1417
1418
  
  	return cfs_rq->runtime_remaining > 0;
ec12cb7f3   Paul Turner   sched: Accumulate...
1419
  }
a9cf55b28   Paul Turner   sched: Expire inv...
1420
1421
1422
1423
1424
  /*
   * Note: This depends on the synchronization provided by sched_clock and the
   * fact that rq->clock snapshots this value.
   */
  static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
ec12cb7f3   Paul Turner   sched: Accumulate...
1425
  {
a9cf55b28   Paul Turner   sched: Expire inv...
1426
1427
1428
1429
1430
  	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
  	struct rq *rq = rq_of(cfs_rq);
  
  	/* if the deadline is ahead of our clock, nothing to do */
  	if (likely((s64)(rq->clock - cfs_rq->runtime_expires) < 0))
ec12cb7f3   Paul Turner   sched: Accumulate...
1431
  		return;
a9cf55b28   Paul Turner   sched: Expire inv...
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
  	if (cfs_rq->runtime_remaining < 0)
  		return;
  
  	/*
  	 * If the local deadline has passed we have to consider the
  	 * possibility that our sched_clock is 'fast' and the global deadline
  	 * has not truly expired.
  	 *
  	 * Fortunately we can check determine whether this the case by checking
  	 * whether the global deadline has advanced.
  	 */
  
  	if ((s64)(cfs_rq->runtime_expires - cfs_b->runtime_expires) >= 0) {
  		/* extend local deadline, drift is bounded above by 2 ticks */
  		cfs_rq->runtime_expires += TICK_NSEC;
  	} else {
  		/* global deadline is ahead, expiration has passed */
  		cfs_rq->runtime_remaining = 0;
  	}
  }
  
  static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
  				     unsigned long delta_exec)
  {
  	/* dock delta_exec before expiring quota (as it could span periods) */
ec12cb7f3   Paul Turner   sched: Accumulate...
1457
  	cfs_rq->runtime_remaining -= delta_exec;
a9cf55b28   Paul Turner   sched: Expire inv...
1458
1459
1460
  	expire_cfs_rq_runtime(cfs_rq);
  
  	if (likely(cfs_rq->runtime_remaining > 0))
ec12cb7f3   Paul Turner   sched: Accumulate...
1461
  		return;
85dac906b   Paul Turner   sched: Add suppor...
1462
1463
1464
1465
1466
1467
  	/*
  	 * if we're unable to extend our runtime we resched so that the active
  	 * hierarchy can be throttled
  	 */
  	if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
  		resched_task(rq_of(cfs_rq)->curr);
ec12cb7f3   Paul Turner   sched: Accumulate...
1468
1469
1470
1471
1472
  }
  
  static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
  						   unsigned long delta_exec)
  {
56f570e51   Paul Turner   sched: Use jump l...
1473
  	if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled)
ec12cb7f3   Paul Turner   sched: Accumulate...
1474
1475
1476
1477
  		return;
  
  	__account_cfs_rq_runtime(cfs_rq, delta_exec);
  }
85dac906b   Paul Turner   sched: Add suppor...
1478
1479
  static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
  {
56f570e51   Paul Turner   sched: Use jump l...
1480
  	return cfs_bandwidth_used() && cfs_rq->throttled;
85dac906b   Paul Turner   sched: Add suppor...
1481
  }
64660c864   Paul Turner   sched: Prevent in...
1482
1483
1484
  /* check whether cfs_rq, or any parent, is throttled */
  static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
  {
56f570e51   Paul Turner   sched: Use jump l...
1485
  	return cfs_bandwidth_used() && cfs_rq->throttle_count;
64660c864   Paul Turner   sched: Prevent in...
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
  }
  
  /*
   * Ensure that neither of the group entities corresponding to src_cpu or
   * dest_cpu are members of a throttled hierarchy when performing group
   * load-balance operations.
   */
  static inline int throttled_lb_pair(struct task_group *tg,
  				    int src_cpu, int dest_cpu)
  {
  	struct cfs_rq *src_cfs_rq, *dest_cfs_rq;
  
  	src_cfs_rq = tg->cfs_rq[src_cpu];
  	dest_cfs_rq = tg->cfs_rq[dest_cpu];
  
  	return throttled_hierarchy(src_cfs_rq) ||
  	       throttled_hierarchy(dest_cfs_rq);
  }
  
  /* updated child weight may affect parent so we have to do this bottom up */
  static int tg_unthrottle_up(struct task_group *tg, void *data)
  {
  	struct rq *rq = data;
  	struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
  
  	cfs_rq->throttle_count--;
  #ifdef CONFIG_SMP
  	if (!cfs_rq->throttle_count) {
  		u64 delta = rq->clock_task - cfs_rq->load_stamp;
  
  		/* leaving throttled state, advance shares averaging windows */
  		cfs_rq->load_stamp += delta;
  		cfs_rq->load_last += delta;
  
  		/* update entity weight now that we are on_rq again */
  		update_cfs_shares(cfs_rq);
  	}
  #endif
  
  	return 0;
  }
  
  static int tg_throttle_down(struct task_group *tg, void *data)
  {
  	struct rq *rq = data;
  	struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
  
  	/* group is entering throttled state, record last load */
  	if (!cfs_rq->throttle_count)
  		update_cfs_load(cfs_rq, 0);
  	cfs_rq->throttle_count++;
  
  	return 0;
  }
d3d9dc330   Paul Turner   sched: Throttle e...
1540
  static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
85dac906b   Paul Turner   sched: Add suppor...
1541
1542
1543
1544
1545
1546
1547
1548
1549
  {
  	struct rq *rq = rq_of(cfs_rq);
  	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
  	struct sched_entity *se;
  	long task_delta, dequeue = 1;
  
  	se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
  
  	/* account load preceding throttle */
64660c864   Paul Turner   sched: Prevent in...
1550
1551
1552
  	rcu_read_lock();
  	walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
  	rcu_read_unlock();
85dac906b   Paul Turner   sched: Add suppor...
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
  
  	task_delta = cfs_rq->h_nr_running;
  	for_each_sched_entity(se) {
  		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
  		/* throttled entity or throttle-on-deactivate */
  		if (!se->on_rq)
  			break;
  
  		if (dequeue)
  			dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP);
  		qcfs_rq->h_nr_running -= task_delta;
  
  		if (qcfs_rq->load.weight)
  			dequeue = 0;
  	}
  
  	if (!se)
  		rq->nr_running -= task_delta;
  
  	cfs_rq->throttled = 1;
e8da1b18b   Nikhil Rao   sched: Add export...
1573
  	cfs_rq->throttled_timestamp = rq->clock;
85dac906b   Paul Turner   sched: Add suppor...
1574
1575
1576
1577
  	raw_spin_lock(&cfs_b->lock);
  	list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
  	raw_spin_unlock(&cfs_b->lock);
  }
029632fbb   Peter Zijlstra   sched: Make separ...
1578
  void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
671fd9dab   Paul Turner   sched: Add suppor...
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
  {
  	struct rq *rq = rq_of(cfs_rq);
  	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
  	struct sched_entity *se;
  	int enqueue = 1;
  	long task_delta;
  
  	se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
  
  	cfs_rq->throttled = 0;
  	raw_spin_lock(&cfs_b->lock);
e8da1b18b   Nikhil Rao   sched: Add export...
1590
  	cfs_b->throttled_time += rq->clock - cfs_rq->throttled_timestamp;
671fd9dab   Paul Turner   sched: Add suppor...
1591
1592
  	list_del_rcu(&cfs_rq->throttled_list);
  	raw_spin_unlock(&cfs_b->lock);
e8da1b18b   Nikhil Rao   sched: Add export...
1593
  	cfs_rq->throttled_timestamp = 0;
671fd9dab   Paul Turner   sched: Add suppor...
1594

64660c864   Paul Turner   sched: Prevent in...
1595
1596
1597
  	update_rq_clock(rq);
  	/* update hierarchical throttle state */
  	walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
671fd9dab   Paul Turner   sched: Add suppor...
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
  	if (!cfs_rq->load.weight)
  		return;
  
  	task_delta = cfs_rq->h_nr_running;
  	for_each_sched_entity(se) {
  		if (se->on_rq)
  			enqueue = 0;
  
  		cfs_rq = cfs_rq_of(se);
  		if (enqueue)
  			enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
  		cfs_rq->h_nr_running += task_delta;
  
  		if (cfs_rq_throttled(cfs_rq))
  			break;
  	}
  
  	if (!se)
  		rq->nr_running += task_delta;
  
  	/* determine whether we need to wake up potentially idle cpu */
  	if (rq->curr == rq->idle && rq->cfs.nr_running)
  		resched_task(rq->curr);
  }
  
  static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
  		u64 remaining, u64 expires)
  {
  	struct cfs_rq *cfs_rq;
  	u64 runtime = remaining;
  
  	rcu_read_lock();
  	list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
  				throttled_list) {
  		struct rq *rq = rq_of(cfs_rq);
  
  		raw_spin_lock(&rq->lock);
  		if (!cfs_rq_throttled(cfs_rq))
  			goto next;
  
  		runtime = -cfs_rq->runtime_remaining + 1;
  		if (runtime > remaining)
  			runtime = remaining;
  		remaining -= runtime;
  
  		cfs_rq->runtime_remaining += runtime;
  		cfs_rq->runtime_expires = expires;
  
  		/* we check whether we're throttled above */
  		if (cfs_rq->runtime_remaining > 0)
  			unthrottle_cfs_rq(cfs_rq);
  
  next:
  		raw_spin_unlock(&rq->lock);
  
  		if (!remaining)
  			break;
  	}
  	rcu_read_unlock();
  
  	return remaining;
  }
58088ad01   Paul Turner   sched: Add a time...
1660
1661
1662
1663
1664
1665
1666
1667
  /*
   * Responsible for refilling a task_group's bandwidth and unthrottling its
   * cfs_rqs as appropriate. If there has been no activity within the last
   * period the timer is deactivated until scheduling resumes; cfs_b->idle is
   * used to track this state.
   */
  static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
  {
671fd9dab   Paul Turner   sched: Add suppor...
1668
1669
  	u64 runtime, runtime_expires;
  	int idle = 1, throttled;
58088ad01   Paul Turner   sched: Add a time...
1670
1671
1672
1673
1674
  
  	raw_spin_lock(&cfs_b->lock);
  	/* no need to continue the timer with no bandwidth constraint */
  	if (cfs_b->quota == RUNTIME_INF)
  		goto out_unlock;
671fd9dab   Paul Turner   sched: Add suppor...
1675
1676
1677
  	throttled = !list_empty(&cfs_b->throttled_cfs_rq);
  	/* idle depends on !throttled (for the case of a large deficit) */
  	idle = cfs_b->idle && !throttled;
e8da1b18b   Nikhil Rao   sched: Add export...
1678
  	cfs_b->nr_periods += overrun;
671fd9dab   Paul Turner   sched: Add suppor...
1679

a9cf55b28   Paul Turner   sched: Expire inv...
1680
1681
1682
1683
1684
  	/* if we're going inactive then everything else can be deferred */
  	if (idle)
  		goto out_unlock;
  
  	__refill_cfs_bandwidth_runtime(cfs_b);
671fd9dab   Paul Turner   sched: Add suppor...
1685
1686
1687
1688
1689
  	if (!throttled) {
  		/* mark as potentially idle for the upcoming period */
  		cfs_b->idle = 1;
  		goto out_unlock;
  	}
e8da1b18b   Nikhil Rao   sched: Add export...
1690
1691
  	/* account preceding periods in which throttling occurred */
  	cfs_b->nr_throttled += overrun;
671fd9dab   Paul Turner   sched: Add suppor...
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
  	/*
  	 * There are throttled entities so we must first use the new bandwidth
  	 * to unthrottle them before making it generally available.  This
  	 * ensures that all existing debts will be paid before a new cfs_rq is
  	 * allowed to run.
  	 */
  	runtime = cfs_b->runtime;
  	runtime_expires = cfs_b->runtime_expires;
  	cfs_b->runtime = 0;
  
  	/*
  	 * This check is repeated as we are holding onto the new bandwidth
  	 * while we unthrottle.  This can potentially race with an unthrottled
  	 * group trying to acquire new bandwidth from the global pool.
  	 */
  	while (throttled && runtime > 0) {
  		raw_spin_unlock(&cfs_b->lock);
  		/* we can't nest cfs_b->lock while distributing bandwidth */
  		runtime = distribute_cfs_runtime(cfs_b, runtime,
  						 runtime_expires);
  		raw_spin_lock(&cfs_b->lock);
  
  		throttled = !list_empty(&cfs_b->throttled_cfs_rq);
  	}
58088ad01   Paul Turner   sched: Add a time...
1716

671fd9dab   Paul Turner   sched: Add suppor...
1717
1718
1719
1720
1721
1722
1723
1724
1725
  	/* return (any) remaining runtime */
  	cfs_b->runtime = runtime;
  	/*
  	 * While we are ensured activity in the period following an
  	 * unthrottle, this also covers the case in which the new bandwidth is
  	 * insufficient to cover the existing bandwidth deficit.  (Forcing the
  	 * timer to remain active while there are any throttled entities.)
  	 */
  	cfs_b->idle = 0;
58088ad01   Paul Turner   sched: Add a time...
1726
1727
1728
1729
1730
1731
1732
  out_unlock:
  	if (idle)
  		cfs_b->timer_active = 0;
  	raw_spin_unlock(&cfs_b->lock);
  
  	return idle;
  }
d3d9dc330   Paul Turner   sched: Throttle e...
1733

d8b4986d3   Paul Turner   sched: Return unu...
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
  /* a cfs_rq won't donate quota below this amount */
  static const u64 min_cfs_rq_runtime = 1 * NSEC_PER_MSEC;
  /* minimum remaining period time to redistribute slack quota */
  static const u64 min_bandwidth_expiration = 2 * NSEC_PER_MSEC;
  /* how long we wait to gather additional slack before distributing */
  static const u64 cfs_bandwidth_slack_period = 5 * NSEC_PER_MSEC;
  
  /* are we near the end of the current quota period? */
  static int runtime_refresh_within(struct cfs_bandwidth *cfs_b, u64 min_expire)
  {
  	struct hrtimer *refresh_timer = &cfs_b->period_timer;
  	u64 remaining;
  
  	/* if the call-back is running a quota refresh is already occurring */
  	if (hrtimer_callback_running(refresh_timer))
  		return 1;
  
  	/* is a quota refresh about to occur? */
  	remaining = ktime_to_ns(hrtimer_expires_remaining(refresh_timer));
  	if (remaining < min_expire)
  		return 1;
  
  	return 0;
  }
  
  static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
  {
  	u64 min_left = cfs_bandwidth_slack_period + min_bandwidth_expiration;
  
  	/* if there's a quota refresh soon don't bother with slack */
  	if (runtime_refresh_within(cfs_b, min_left))
  		return;
  
  	start_bandwidth_timer(&cfs_b->slack_timer,
  				ns_to_ktime(cfs_bandwidth_slack_period));
  }
  
  /* we know any runtime found here is valid as update_curr() precedes return */
  static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
  {
  	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
  	s64 slack_runtime = cfs_rq->runtime_remaining - min_cfs_rq_runtime;
  
  	if (slack_runtime <= 0)
  		return;
  
  	raw_spin_lock(&cfs_b->lock);
  	if (cfs_b->quota != RUNTIME_INF &&
  	    cfs_rq->runtime_expires == cfs_b->runtime_expires) {
  		cfs_b->runtime += slack_runtime;
  
  		/* we are under rq->lock, defer unthrottling using a timer */
  		if (cfs_b->runtime > sched_cfs_bandwidth_slice() &&
  		    !list_empty(&cfs_b->throttled_cfs_rq))
  			start_cfs_slack_bandwidth(cfs_b);
  	}
  	raw_spin_unlock(&cfs_b->lock);
  
  	/* even if it's not valid for return we don't want to try again */
  	cfs_rq->runtime_remaining -= slack_runtime;
  }
  
  static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
  {
56f570e51   Paul Turner   sched: Use jump l...
1798
1799
  	if (!cfs_bandwidth_used())
  		return;
fccfdc6f0   Paul Turner   sched: Fix buglet...
1800
  	if (!cfs_rq->runtime_enabled || cfs_rq->nr_running)
d8b4986d3   Paul Turner   sched: Return unu...
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
  		return;
  
  	__return_cfs_rq_runtime(cfs_rq);
  }
  
  /*
   * This is done with a timer (instead of inline with bandwidth return) since
   * it's necessary to juggle rq->locks to unthrottle their respective cfs_rqs.
   */
  static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
  {
  	u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
  	u64 expires;
  
  	/* confirm we're still not at a refresh boundary */
  	if (runtime_refresh_within(cfs_b, min_bandwidth_expiration))
  		return;
  
  	raw_spin_lock(&cfs_b->lock);
  	if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) {
  		runtime = cfs_b->runtime;
  		cfs_b->runtime = 0;
  	}
  	expires = cfs_b->runtime_expires;
  	raw_spin_unlock(&cfs_b->lock);
  
  	if (!runtime)
  		return;
  
  	runtime = distribute_cfs_runtime(cfs_b, runtime, expires);
  
  	raw_spin_lock(&cfs_b->lock);
  	if (expires == cfs_b->runtime_expires)
  		cfs_b->runtime = runtime;
  	raw_spin_unlock(&cfs_b->lock);
  }
d3d9dc330   Paul Turner   sched: Throttle e...
1837
1838
1839
1840
1841
1842
1843
  /*
   * When a group wakes up we want to make sure that its quota is not already
   * expired/exceeded, otherwise it may be allowed to steal additional ticks of
   * runtime as update_curr() throttling can not not trigger until it's on-rq.
   */
  static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
  {
56f570e51   Paul Turner   sched: Use jump l...
1844
1845
  	if (!cfs_bandwidth_used())
  		return;
d3d9dc330   Paul Turner   sched: Throttle e...
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
  	/* an active group must be handled by the update_curr()->put() path */
  	if (!cfs_rq->runtime_enabled || cfs_rq->curr)
  		return;
  
  	/* ensure the group is not already throttled */
  	if (cfs_rq_throttled(cfs_rq))
  		return;
  
  	/* update runtime allocation */
  	account_cfs_rq_runtime(cfs_rq, 0);
  	if (cfs_rq->runtime_remaining <= 0)
  		throttle_cfs_rq(cfs_rq);
  }
  
  /* conditionally throttle active cfs_rq's from put_prev_entity() */
  static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq)
  {
56f570e51   Paul Turner   sched: Use jump l...
1863
1864
  	if (!cfs_bandwidth_used())
  		return;
d3d9dc330   Paul Turner   sched: Throttle e...
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
  	if (likely(!cfs_rq->runtime_enabled || cfs_rq->runtime_remaining > 0))
  		return;
  
  	/*
  	 * it's possible for a throttled entity to be forced into a running
  	 * state (e.g. set_curr_task), in this case we're finished.
  	 */
  	if (cfs_rq_throttled(cfs_rq))
  		return;
  
  	throttle_cfs_rq(cfs_rq);
  }
029632fbb   Peter Zijlstra   sched: Make separ...
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
  
  static inline u64 default_cfs_period(void);
  static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun);
  static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b);
  
  static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
  {
  	struct cfs_bandwidth *cfs_b =
  		container_of(timer, struct cfs_bandwidth, slack_timer);
  	do_sched_cfs_slack_timer(cfs_b);
  
  	return HRTIMER_NORESTART;
  }
  
  static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
  {
  	struct cfs_bandwidth *cfs_b =
  		container_of(timer, struct cfs_bandwidth, period_timer);
  	ktime_t now;
  	int overrun;
  	int idle = 0;
  
  	for (;;) {
  		now = hrtimer_cb_get_time(timer);
  		overrun = hrtimer_forward(timer, now, cfs_b->period);
  
  		if (!overrun)
  			break;
  
  		idle = do_sched_cfs_period_timer(cfs_b, overrun);
  	}
  
  	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
  }
  
  void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
  {
  	raw_spin_lock_init(&cfs_b->lock);
  	cfs_b->runtime = 0;
  	cfs_b->quota = RUNTIME_INF;
  	cfs_b->period = ns_to_ktime(default_cfs_period());
  
  	INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
  	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  	cfs_b->period_timer.function = sched_cfs_period_timer;
  	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  	cfs_b->slack_timer.function = sched_cfs_slack_timer;
  }
  
  static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
  {
  	cfs_rq->runtime_enabled = 0;
  	INIT_LIST_HEAD(&cfs_rq->throttled_list);
  }
  
  /* requires cfs_b->lock, may release to reprogram timer */
  void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
  {
  	/*
  	 * The timer may be active because we're trying to set a new bandwidth
  	 * period or because we're racing with the tear-down path
  	 * (timer_active==0 becomes visible before the hrtimer call-back
  	 * terminates).  In either case we ensure that it's re-programmed
  	 */
  	while (unlikely(hrtimer_active(&cfs_b->period_timer))) {
  		raw_spin_unlock(&cfs_b->lock);
  		/* ensure cfs_b->lock is available while we wait */
  		hrtimer_cancel(&cfs_b->period_timer);
  
  		raw_spin_lock(&cfs_b->lock);
  		/* if someone else restarted the timer then we're done */
  		if (cfs_b->timer_active)
  			return;
  	}
  
  	cfs_b->timer_active = 1;
  	start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period);
  }
  
  static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
  {
  	hrtimer_cancel(&cfs_b->period_timer);
  	hrtimer_cancel(&cfs_b->slack_timer);
  }
  
  void unthrottle_offline_cfs_rqs(struct rq *rq)
  {
  	struct cfs_rq *cfs_rq;
  
  	for_each_leaf_cfs_rq(rq, cfs_rq) {
  		struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
  
  		if (!cfs_rq->runtime_enabled)
  			continue;
  
  		/*
  		 * clock_task is not advancing so we just need to make sure
  		 * there's some valid quota amount
  		 */
  		cfs_rq->runtime_remaining = cfs_b->quota;
  		if (cfs_rq_throttled(cfs_rq))
  			unthrottle_cfs_rq(cfs_rq);
  	}
  }
  
  #else /* CONFIG_CFS_BANDWIDTH */
ec12cb7f3   Paul Turner   sched: Accumulate...
1983
1984
  static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
  				     unsigned long delta_exec) {}
d3d9dc330   Paul Turner   sched: Throttle e...
1985
1986
  static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
  static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
d8b4986d3   Paul Turner   sched: Return unu...
1987
  static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
85dac906b   Paul Turner   sched: Add suppor...
1988
1989
1990
1991
1992
  
  static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
  {
  	return 0;
  }
64660c864   Paul Turner   sched: Prevent in...
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
  
  static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
  {
  	return 0;
  }
  
  static inline int throttled_lb_pair(struct task_group *tg,
  				    int src_cpu, int dest_cpu)
  {
  	return 0;
  }
029632fbb   Peter Zijlstra   sched: Make separ...
2004
2005
2006
2007
2008
  
  void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
  static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
ab84d31e1   Paul Turner   sched: Introduce ...
2009
  #endif
029632fbb   Peter Zijlstra   sched: Make separ...
2010
2011
2012
2013
2014
2015
2016
2017
  static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
  {
  	return NULL;
  }
  static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
  void unthrottle_offline_cfs_rqs(struct rq *rq) {}
  
  #endif /* CONFIG_CFS_BANDWIDTH */
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2018
2019
2020
  /**************************************************
   * CFS operations on tasks:
   */
8f4d37ec0   Peter Zijlstra   sched: high-res p...
2021
2022
2023
  #ifdef CONFIG_SCHED_HRTICK
  static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
  {
8f4d37ec0   Peter Zijlstra   sched: high-res p...
2024
2025
2026
2027
  	struct sched_entity *se = &p->se;
  	struct cfs_rq *cfs_rq = cfs_rq_of(se);
  
  	WARN_ON(task_rq(p) != rq);
b39e66eaf   Mike Galbraith   sched: Save some ...
2028
  	if (cfs_rq->nr_running > 1) {
8f4d37ec0   Peter Zijlstra   sched: high-res p...
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
  		u64 slice = sched_slice(cfs_rq, se);
  		u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
  		s64 delta = slice - ran;
  
  		if (delta < 0) {
  			if (rq->curr == p)
  				resched_task(p);
  			return;
  		}
  
  		/*
  		 * Don't schedule slices shorter than 10000ns, that just
  		 * doesn't make sense. Rely on vruntime for fairness.
  		 */
31656519e   Peter Zijlstra   sched, x86: clean...
2043
  		if (rq->curr != p)
157124c11   Peter Zijlstra   sched: fix warnin...
2044
  			delta = max_t(s64, 10000LL, delta);
8f4d37ec0   Peter Zijlstra   sched: high-res p...
2045

31656519e   Peter Zijlstra   sched, x86: clean...
2046
  		hrtick_start(rq, delta);
8f4d37ec0   Peter Zijlstra   sched: high-res p...
2047
2048
  	}
  }
a4c2f00f5   Peter Zijlstra   sched: fair sched...
2049
2050
2051
2052
2053
2054
2055
2056
2057
  
  /*
   * called from enqueue/dequeue and updates the hrtick when the
   * current task is from our class and nr_running is low enough
   * to matter.
   */
  static void hrtick_update(struct rq *rq)
  {
  	struct task_struct *curr = rq->curr;
b39e66eaf   Mike Galbraith   sched: Save some ...
2058
  	if (!hrtick_enabled(rq) || curr->sched_class != &fair_sched_class)
a4c2f00f5   Peter Zijlstra   sched: fair sched...
2059
2060
2061
2062
2063
  		return;
  
  	if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency)
  		hrtick_start_fair(rq, curr);
  }
55e12e5e7   Dhaval Giani   sched: make sched...
2064
  #else /* !CONFIG_SCHED_HRTICK */
8f4d37ec0   Peter Zijlstra   sched: high-res p...
2065
2066
2067
2068
  static inline void
  hrtick_start_fair(struct rq *rq, struct task_struct *p)
  {
  }
a4c2f00f5   Peter Zijlstra   sched: fair sched...
2069
2070
2071
2072
  
  static inline void hrtick_update(struct rq *rq)
  {
  }
8f4d37ec0   Peter Zijlstra   sched: high-res p...
2073
  #endif
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2074
2075
2076
2077
2078
  /*
   * The enqueue_task method is called before nr_running is
   * increased. Here we update the fair scheduling stats and
   * then put the task into the rbtree:
   */
ea87bb785   Thomas Gleixner   sched: Extend enq...
2079
  static void
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
2080
  enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2081
2082
  {
  	struct cfs_rq *cfs_rq;
62fb18513   Peter Zijlstra   sched: revert loa...
2083
  	struct sched_entity *se = &p->se;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2084
2085
  
  	for_each_sched_entity(se) {
62fb18513   Peter Zijlstra   sched: revert loa...
2086
  		if (se->on_rq)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2087
2088
  			break;
  		cfs_rq = cfs_rq_of(se);
88ec22d3e   Peter Zijlstra   sched: Remove the...
2089
  		enqueue_entity(cfs_rq, se, flags);
85dac906b   Paul Turner   sched: Add suppor...
2090
2091
2092
2093
2094
2095
2096
2097
2098
  
  		/*
  		 * end evaluation on encountering a throttled cfs_rq
  		 *
  		 * note: in the case of encountering a throttled cfs_rq we will
  		 * post the final h_nr_running increment below.
  		*/
  		if (cfs_rq_throttled(cfs_rq))
  			break;
953bfcd10   Paul Turner   sched: Implement ...
2099
  		cfs_rq->h_nr_running++;
85dac906b   Paul Turner   sched: Add suppor...
2100

88ec22d3e   Peter Zijlstra   sched: Remove the...
2101
  		flags = ENQUEUE_WAKEUP;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2102
  	}
8f4d37ec0   Peter Zijlstra   sched: high-res p...
2103

2069dd75c   Peter Zijlstra   sched: Rewrite tg...
2104
  	for_each_sched_entity(se) {
0f3171438   Lin Ming   sched: Cleanup du...
2105
  		cfs_rq = cfs_rq_of(se);
953bfcd10   Paul Turner   sched: Implement ...
2106
  		cfs_rq->h_nr_running++;
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
2107

85dac906b   Paul Turner   sched: Add suppor...
2108
2109
  		if (cfs_rq_throttled(cfs_rq))
  			break;
d6b559182   Paul Turner   sched: Allow upda...
2110
  		update_cfs_load(cfs_rq, 0);
6d5ab2932   Paul Turner   sched: Simplify u...
2111
  		update_cfs_shares(cfs_rq);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
2112
  	}
85dac906b   Paul Turner   sched: Add suppor...
2113
2114
  	if (!se)
  		inc_nr_running(rq);
a4c2f00f5   Peter Zijlstra   sched: fair sched...
2115
  	hrtick_update(rq);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2116
  }
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
2117
  static void set_next_buddy(struct sched_entity *se);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2118
2119
2120
2121
2122
  /*
   * The dequeue_task method is called before nr_running is
   * decreased. We remove the task from the rbtree and
   * update the fair scheduling stats:
   */
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
2123
  static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2124
2125
  {
  	struct cfs_rq *cfs_rq;
62fb18513   Peter Zijlstra   sched: revert loa...
2126
  	struct sched_entity *se = &p->se;
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
2127
  	int task_sleep = flags & DEQUEUE_SLEEP;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2128
2129
2130
  
  	for_each_sched_entity(se) {
  		cfs_rq = cfs_rq_of(se);
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
2131
  		dequeue_entity(cfs_rq, se, flags);
85dac906b   Paul Turner   sched: Add suppor...
2132
2133
2134
2135
2136
2137
2138
2139
2140
  
  		/*
  		 * end evaluation on encountering a throttled cfs_rq
  		 *
  		 * note: in the case of encountering a throttled cfs_rq we will
  		 * post the final h_nr_running decrement below.
  		*/
  		if (cfs_rq_throttled(cfs_rq))
  			break;
953bfcd10   Paul Turner   sched: Implement ...
2141
  		cfs_rq->h_nr_running--;
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
2142

bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2143
  		/* Don't dequeue parent if it has other entities besides us */
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
2144
2145
2146
2147
2148
2149
2150
  		if (cfs_rq->load.weight) {
  			/*
  			 * Bias pick_next to pick a task from this cfs_rq, as
  			 * p is sleeping when it is within its sched_slice.
  			 */
  			if (task_sleep && parent_entity(se))
  				set_next_buddy(parent_entity(se));
9598c82dc   Paul Turner   sched: Don't upda...
2151
2152
2153
  
  			/* avoid re-evaluating load for this entity */
  			se = parent_entity(se);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2154
  			break;
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
2155
  		}
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
2156
  		flags |= DEQUEUE_SLEEP;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2157
  	}
8f4d37ec0   Peter Zijlstra   sched: high-res p...
2158

2069dd75c   Peter Zijlstra   sched: Rewrite tg...
2159
  	for_each_sched_entity(se) {
0f3171438   Lin Ming   sched: Cleanup du...
2160
  		cfs_rq = cfs_rq_of(se);
953bfcd10   Paul Turner   sched: Implement ...
2161
  		cfs_rq->h_nr_running--;
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
2162

85dac906b   Paul Turner   sched: Add suppor...
2163
2164
  		if (cfs_rq_throttled(cfs_rq))
  			break;
d6b559182   Paul Turner   sched: Allow upda...
2165
  		update_cfs_load(cfs_rq, 0);
6d5ab2932   Paul Turner   sched: Simplify u...
2166
  		update_cfs_shares(cfs_rq);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
2167
  	}
85dac906b   Paul Turner   sched: Add suppor...
2168
2169
  	if (!se)
  		dec_nr_running(rq);
a4c2f00f5   Peter Zijlstra   sched: fair sched...
2170
  	hrtick_update(rq);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2171
  }
e7693a362   Gregory Haskins   sched: de-SCHED_O...
2172
  #ifdef CONFIG_SMP
029632fbb   Peter Zijlstra   sched: Make separ...
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
  /* Used instead of source_load when we know the type == 0 */
  static unsigned long weighted_cpuload(const int cpu)
  {
  	return cpu_rq(cpu)->load.weight;
  }
  
  /*
   * Return a low guess at the load of a migration-source cpu weighted
   * according to the scheduling class and "nice" value.
   *
   * We want to under-estimate the load of migration sources, to
   * balance conservatively.
   */
  static unsigned long source_load(int cpu, int type)
  {
  	struct rq *rq = cpu_rq(cpu);
  	unsigned long total = weighted_cpuload(cpu);
  
  	if (type == 0 || !sched_feat(LB_BIAS))
  		return total;
  
  	return min(rq->cpu_load[type-1], total);
  }
  
  /*
   * Return a high guess at the load of a migration-target cpu weighted
   * according to the scheduling class and "nice" value.
   */
  static unsigned long target_load(int cpu, int type)
  {
  	struct rq *rq = cpu_rq(cpu);
  	unsigned long total = weighted_cpuload(cpu);
  
  	if (type == 0 || !sched_feat(LB_BIAS))
  		return total;
  
  	return max(rq->cpu_load[type-1], total);
  }
  
  static unsigned long power_of(int cpu)
  {
  	return cpu_rq(cpu)->cpu_power;
  }
  
  static unsigned long cpu_avg_load_per_task(int cpu)
  {
  	struct rq *rq = cpu_rq(cpu);
  	unsigned long nr_running = ACCESS_ONCE(rq->nr_running);
  
  	if (nr_running)
  		return rq->load.weight / nr_running;
  
  	return 0;
  }
098fb9db2   Ingo Molnar   sched: clean up w...
2227

74f8e4b23   Peter Zijlstra   sched: Remove rq ...
2228
  static void task_waking_fair(struct task_struct *p)
88ec22d3e   Peter Zijlstra   sched: Remove the...
2229
2230
2231
  {
  	struct sched_entity *se = &p->se;
  	struct cfs_rq *cfs_rq = cfs_rq_of(se);
3fe1698b7   Peter Zijlstra   sched: Deal with ...
2232
2233
2234
2235
  	u64 min_vruntime;
  
  #ifndef CONFIG_64BIT
  	u64 min_vruntime_copy;
88ec22d3e   Peter Zijlstra   sched: Remove the...
2236

3fe1698b7   Peter Zijlstra   sched: Deal with ...
2237
2238
2239
2240
2241
2242
2243
2244
  	do {
  		min_vruntime_copy = cfs_rq->min_vruntime_copy;
  		smp_rmb();
  		min_vruntime = cfs_rq->min_vruntime;
  	} while (min_vruntime != min_vruntime_copy);
  #else
  	min_vruntime = cfs_rq->min_vruntime;
  #endif
88ec22d3e   Peter Zijlstra   sched: Remove the...
2245

3fe1698b7   Peter Zijlstra   sched: Deal with ...
2246
  	se->vruntime -= min_vruntime;
88ec22d3e   Peter Zijlstra   sched: Remove the...
2247
  }
bb3469ac9   Peter Zijlstra   sched: hierarchic...
2248
  #ifdef CONFIG_FAIR_GROUP_SCHED
f5bfb7d9f   Peter Zijlstra   sched: bias effec...
2249
2250
2251
2252
2253
2254
  /*
   * effective_load() calculates the load change as seen from the root_task_group
   *
   * Adding load to a group doesn't make a group heavier, but can cause movement
   * of group shares between cpus. Assuming the shares were perfectly aligned one
   * can calculate the shift in shares.
cf5f0acf3   Peter Zijlstra   sched: Add a comm...
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
   *
   * Calculate the effective load difference if @wl is added (subtracted) to @tg
   * on this @cpu and results in a total addition (subtraction) of @wg to the
   * total group weight.
   *
   * Given a runqueue weight distribution (rw_i) we can compute a shares
   * distribution (s_i) using:
   *
   *   s_i = rw_i / \Sum rw_j						(1)
   *
   * Suppose we have 4 CPUs and our @tg is a direct child of the root group and
   * has 7 equal weight tasks, distributed as below (rw_i), with the resulting
   * shares distribution (s_i):
   *
   *   rw_i = {   2,   4,   1,   0 }
   *   s_i  = { 2/7, 4/7, 1/7,   0 }
   *
   * As per wake_affine() we're interested in the load of two CPUs (the CPU the
   * task used to run on and the CPU the waker is running on), we need to
   * compute the effect of waking a task on either CPU and, in case of a sync
   * wakeup, compute the effect of the current task going to sleep.
   *
   * So for a change of @wl to the local @cpu with an overall group weight change
   * of @wl we can compute the new shares distribution (s'_i) using:
   *
   *   s'_i = (rw_i + @wl) / (@wg + \Sum rw_j)				(2)
   *
   * Suppose we're interested in CPUs 0 and 1, and want to compute the load
   * differences in waking a task to CPU 0. The additional task changes the
   * weight and shares distributions like:
   *
   *   rw'_i = {   3,   4,   1,   0 }
   *   s'_i  = { 3/8, 4/8, 1/8,   0 }
   *
   * We can then compute the difference in effective weight by using:
   *
   *   dw_i = S * (s'_i - s_i)						(3)
   *
   * Where 'S' is the group weight as seen by its parent.
   *
   * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7)
   * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 -
   * 4/7) times the weight of the group.
f5bfb7d9f   Peter Zijlstra   sched: bias effec...
2298
   */
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
2299
  static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
bb3469ac9   Peter Zijlstra   sched: hierarchic...
2300
  {
4be9daaa1   Peter Zijlstra   sched: fix task_h...
2301
  	struct sched_entity *se = tg->se[cpu];
f1d239f73   Peter Zijlstra   sched: incrementa...
2302

cf5f0acf3   Peter Zijlstra   sched: Add a comm...
2303
  	if (!tg->parent)	/* the trivial, non-cgroup case */
f1d239f73   Peter Zijlstra   sched: incrementa...
2304
  		return wl;
4be9daaa1   Peter Zijlstra   sched: fix task_h...
2305
  	for_each_sched_entity(se) {
cf5f0acf3   Peter Zijlstra   sched: Add a comm...
2306
  		long w, W;
4be9daaa1   Peter Zijlstra   sched: fix task_h...
2307

977dda7c9   Paul Turner   sched: Update eff...
2308
  		tg = se->my_q->tg;
bb3469ac9   Peter Zijlstra   sched: hierarchic...
2309

cf5f0acf3   Peter Zijlstra   sched: Add a comm...
2310
2311
2312
2313
  		/*
  		 * W = @wg + \Sum rw_j
  		 */
  		W = wg + calc_tg_weight(tg, se->my_q);
4be9daaa1   Peter Zijlstra   sched: fix task_h...
2314

cf5f0acf3   Peter Zijlstra   sched: Add a comm...
2315
2316
2317
2318
  		/*
  		 * w = rw_i + @wl
  		 */
  		w = se->my_q->load.weight + wl;
940959e93   Peter Zijlstra   sched: fixlet for...
2319

cf5f0acf3   Peter Zijlstra   sched: Add a comm...
2320
2321
2322
2323
2324
  		/*
  		 * wl = S * s'_i; see (2)
  		 */
  		if (W > 0 && w < W)
  			wl = (w * tg->shares) / W;
977dda7c9   Paul Turner   sched: Update eff...
2325
2326
  		else
  			wl = tg->shares;
940959e93   Peter Zijlstra   sched: fixlet for...
2327

cf5f0acf3   Peter Zijlstra   sched: Add a comm...
2328
2329
2330
2331
2332
  		/*
  		 * Per the above, wl is the new se->load.weight value; since
  		 * those are clipped to [MIN_SHARES, ...) do so now. See
  		 * calc_cfs_shares().
  		 */
977dda7c9   Paul Turner   sched: Update eff...
2333
2334
  		if (wl < MIN_SHARES)
  			wl = MIN_SHARES;
cf5f0acf3   Peter Zijlstra   sched: Add a comm...
2335
2336
2337
2338
  
  		/*
  		 * wl = dw_i = S * (s'_i - s_i); see (3)
  		 */
977dda7c9   Paul Turner   sched: Update eff...
2339
  		wl -= se->load.weight;
cf5f0acf3   Peter Zijlstra   sched: Add a comm...
2340
2341
2342
2343
2344
2345
2346
2347
  
  		/*
  		 * Recursively apply this logic to all parent groups to compute
  		 * the final effective load change on the root group. Since
  		 * only the @tg group gets extra weight, all parent groups can
  		 * only redistribute existing shares. @wl is the shift in shares
  		 * resulting from this level per the above.
  		 */
4be9daaa1   Peter Zijlstra   sched: fix task_h...
2348
  		wg = 0;
4be9daaa1   Peter Zijlstra   sched: fix task_h...
2349
  	}
bb3469ac9   Peter Zijlstra   sched: hierarchic...
2350

4be9daaa1   Peter Zijlstra   sched: fix task_h...
2351
  	return wl;
bb3469ac9   Peter Zijlstra   sched: hierarchic...
2352
2353
  }
  #else
4be9daaa1   Peter Zijlstra   sched: fix task_h...
2354

83378269a   Peter Zijlstra   sched: correct wa...
2355
2356
  static inline unsigned long effective_load(struct task_group *tg, int cpu,
  		unsigned long wl, unsigned long wg)
4be9daaa1   Peter Zijlstra   sched: fix task_h...
2357
  {
83378269a   Peter Zijlstra   sched: correct wa...
2358
  	return wl;
bb3469ac9   Peter Zijlstra   sched: hierarchic...
2359
  }
4be9daaa1   Peter Zijlstra   sched: fix task_h...
2360

bb3469ac9   Peter Zijlstra   sched: hierarchic...
2361
  #endif
c88d59108   Peter Zijlstra   sched: Merge sele...
2362
  static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
098fb9db2   Ingo Molnar   sched: clean up w...
2363
  {
e37b6a7b2   Paul Turner   sched: Fix sign u...
2364
  	s64 this_load, load;
c88d59108   Peter Zijlstra   sched: Merge sele...
2365
  	int idx, this_cpu, prev_cpu;
098fb9db2   Ingo Molnar   sched: clean up w...
2366
  	unsigned long tl_per_task;
c88d59108   Peter Zijlstra   sched: Merge sele...
2367
  	struct task_group *tg;
83378269a   Peter Zijlstra   sched: correct wa...
2368
  	unsigned long weight;
b3137bc8e   Mike Galbraith   sched: stop wake_...
2369
  	int balanced;
098fb9db2   Ingo Molnar   sched: clean up w...
2370

c88d59108   Peter Zijlstra   sched: Merge sele...
2371
2372
2373
2374
2375
  	idx	  = sd->wake_idx;
  	this_cpu  = smp_processor_id();
  	prev_cpu  = task_cpu(p);
  	load	  = source_load(prev_cpu, idx);
  	this_load = target_load(this_cpu, idx);
098fb9db2   Ingo Molnar   sched: clean up w...
2376
2377
  
  	/*
b3137bc8e   Mike Galbraith   sched: stop wake_...
2378
2379
2380
2381
  	 * If sync wakeup then subtract the (maximum possible)
  	 * effect of the currently running task from the load
  	 * of the current CPU:
  	 */
83378269a   Peter Zijlstra   sched: correct wa...
2382
2383
2384
  	if (sync) {
  		tg = task_group(current);
  		weight = current->se.load.weight;
c88d59108   Peter Zijlstra   sched: Merge sele...
2385
  		this_load += effective_load(tg, this_cpu, -weight, -weight);
83378269a   Peter Zijlstra   sched: correct wa...
2386
2387
  		load += effective_load(tg, prev_cpu, 0, -weight);
  	}
b3137bc8e   Mike Galbraith   sched: stop wake_...
2388

83378269a   Peter Zijlstra   sched: correct wa...
2389
2390
  	tg = task_group(p);
  	weight = p->se.load.weight;
b3137bc8e   Mike Galbraith   sched: stop wake_...
2391

71a29aa7b   Peter Zijlstra   sched: Deal with ...
2392
2393
  	/*
  	 * In low-load situations, where prev_cpu is idle and this_cpu is idle
c88d59108   Peter Zijlstra   sched: Merge sele...
2394
2395
2396
  	 * due to the sync cause above having dropped this_load to 0, we'll
  	 * always have an imbalance, but there's really nothing you can do
  	 * about that, so that's good too.
71a29aa7b   Peter Zijlstra   sched: Deal with ...
2397
2398
2399
2400
  	 *
  	 * Otherwise check if either cpus are near enough in load to allow this
  	 * task to be woken on this_cpu.
  	 */
e37b6a7b2   Paul Turner   sched: Fix sign u...
2401
2402
  	if (this_load > 0) {
  		s64 this_eff_load, prev_eff_load;
e51fd5e22   Peter Zijlstra   sched: Fix wake_a...
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
  
  		this_eff_load = 100;
  		this_eff_load *= power_of(prev_cpu);
  		this_eff_load *= this_load +
  			effective_load(tg, this_cpu, weight, weight);
  
  		prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
  		prev_eff_load *= power_of(this_cpu);
  		prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
  
  		balanced = this_eff_load <= prev_eff_load;
  	} else
  		balanced = true;
b3137bc8e   Mike Galbraith   sched: stop wake_...
2416
2417
  
  	/*
4ae7d5cef   Ingo Molnar   sched: improve af...
2418
2419
2420
  	 * If the currently running task will sleep within
  	 * a reasonable amount of time then attract this newly
  	 * woken task:
098fb9db2   Ingo Molnar   sched: clean up w...
2421
  	 */
2fb7635c4   Peter Zijlstra   sched: sync wakeu...
2422
2423
  	if (sync && balanced)
  		return 1;
098fb9db2   Ingo Molnar   sched: clean up w...
2424

41acab885   Lucas De Marchi   sched: Implement ...
2425
  	schedstat_inc(p, se.statistics.nr_wakeups_affine_attempts);
098fb9db2   Ingo Molnar   sched: clean up w...
2426
  	tl_per_task = cpu_avg_load_per_task(this_cpu);
c88d59108   Peter Zijlstra   sched: Merge sele...
2427
2428
2429
  	if (balanced ||
  	    (this_load <= load &&
  	     this_load + target_load(prev_cpu, idx) <= tl_per_task)) {
098fb9db2   Ingo Molnar   sched: clean up w...
2430
2431
2432
2433
2434
  		/*
  		 * This domain has SD_WAKE_AFFINE and
  		 * p is cache cold in this domain, and
  		 * there is no bad imbalance.
  		 */
c88d59108   Peter Zijlstra   sched: Merge sele...
2435
  		schedstat_inc(sd, ttwu_move_affine);
41acab885   Lucas De Marchi   sched: Implement ...
2436
  		schedstat_inc(p, se.statistics.nr_wakeups_affine);
098fb9db2   Ingo Molnar   sched: clean up w...
2437
2438
2439
2440
2441
  
  		return 1;
  	}
  	return 0;
  }
aaee1203c   Peter Zijlstra   sched: Move sched...
2442
2443
2444
2445
2446
  /*
   * find_idlest_group finds and returns the least busy CPU group within the
   * domain.
   */
  static struct sched_group *
78e7ed53c   Peter Zijlstra   sched: Tweak wake...
2447
  find_idlest_group(struct sched_domain *sd, struct task_struct *p,
5158f4e44   Peter Zijlstra   sched: Clean up t...
2448
  		  int this_cpu, int load_idx)
e7693a362   Gregory Haskins   sched: de-SCHED_O...
2449
  {
b3bd3de66   Andi Kleen   gcc-4.6: kernel/*...
2450
  	struct sched_group *idlest = NULL, *group = sd->groups;
aaee1203c   Peter Zijlstra   sched: Move sched...
2451
  	unsigned long min_load = ULONG_MAX, this_load = 0;
aaee1203c   Peter Zijlstra   sched: Move sched...
2452
  	int imbalance = 100 + (sd->imbalance_pct-100)/2;
e7693a362   Gregory Haskins   sched: de-SCHED_O...
2453

aaee1203c   Peter Zijlstra   sched: Move sched...
2454
2455
2456
2457
  	do {
  		unsigned long load, avg_load;
  		int local_group;
  		int i;
e7693a362   Gregory Haskins   sched: de-SCHED_O...
2458

aaee1203c   Peter Zijlstra   sched: Move sched...
2459
2460
  		/* Skip over this group if it has no CPUs allowed */
  		if (!cpumask_intersects(sched_group_cpus(group),
fa17b507f   Peter Zijlstra   sched: Wrap sched...
2461
  					tsk_cpus_allowed(p)))
aaee1203c   Peter Zijlstra   sched: Move sched...
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
  			continue;
  
  		local_group = cpumask_test_cpu(this_cpu,
  					       sched_group_cpus(group));
  
  		/* Tally up the load of all CPUs in the group */
  		avg_load = 0;
  
  		for_each_cpu(i, sched_group_cpus(group)) {
  			/* Bias balancing toward cpus of our domain */
  			if (local_group)
  				load = source_load(i, load_idx);
  			else
  				load = target_load(i, load_idx);
  
  			avg_load += load;
  		}
  
  		/* Adjust by relative CPU power of the group */
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2481
  		avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power;
aaee1203c   Peter Zijlstra   sched: Move sched...
2482
2483
2484
  
  		if (local_group) {
  			this_load = avg_load;
aaee1203c   Peter Zijlstra   sched: Move sched...
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
  		} else if (avg_load < min_load) {
  			min_load = avg_load;
  			idlest = group;
  		}
  	} while (group = group->next, group != sd->groups);
  
  	if (!idlest || 100*this_load < imbalance*min_load)
  		return NULL;
  	return idlest;
  }
  
  /*
   * find_idlest_cpu - find the idlest cpu among the cpus in group.
   */
  static int
  find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
  {
  	unsigned long load, min_load = ULONG_MAX;
  	int idlest = -1;
  	int i;
  
  	/* Traverse only the allowed CPUs */
fa17b507f   Peter Zijlstra   sched: Wrap sched...
2507
  	for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
aaee1203c   Peter Zijlstra   sched: Move sched...
2508
2509
2510
2511
2512
  		load = weighted_cpuload(i);
  
  		if (load < min_load || (load == min_load && i == this_cpu)) {
  			min_load = load;
  			idlest = i;
e7693a362   Gregory Haskins   sched: de-SCHED_O...
2513
2514
  		}
  	}
aaee1203c   Peter Zijlstra   sched: Move sched...
2515
2516
  	return idlest;
  }
e7693a362   Gregory Haskins   sched: de-SCHED_O...
2517

aaee1203c   Peter Zijlstra   sched: Move sched...
2518
  /*
a50bde513   Peter Zijlstra   sched: Cleanup se...
2519
2520
   * Try and locate an idle CPU in the sched_domain.
   */
99bd5e2f2   Suresh Siddha   sched: Fix select...
2521
  static int select_idle_sibling(struct task_struct *p, int target)
a50bde513   Peter Zijlstra   sched: Cleanup se...
2522
2523
2524
  {
  	int cpu = smp_processor_id();
  	int prev_cpu = task_cpu(p);
99bd5e2f2   Suresh Siddha   sched: Fix select...
2525
  	struct sched_domain *sd;
4dcfe1025   Peter Zijlstra   sched: Avoid SMT ...
2526
  	struct sched_group *sg;
77e81365e   Suresh Siddha   sched: Clean up d...
2527
  	int i;
a50bde513   Peter Zijlstra   sched: Cleanup se...
2528
2529
  
  	/*
99bd5e2f2   Suresh Siddha   sched: Fix select...
2530
2531
  	 * If the task is going to be woken-up on this cpu and if it is
  	 * already idle, then it is the right target.
a50bde513   Peter Zijlstra   sched: Cleanup se...
2532
  	 */
99bd5e2f2   Suresh Siddha   sched: Fix select...
2533
2534
2535
2536
2537
2538
2539
2540
  	if (target == cpu && idle_cpu(cpu))
  		return cpu;
  
  	/*
  	 * If the task is going to be woken-up on the cpu where it previously
  	 * ran and if it is currently idle, then it the right target.
  	 */
  	if (target == prev_cpu && idle_cpu(prev_cpu))
fe3bcfe1f   Peter Zijlstra   sched: More gener...
2541
  		return prev_cpu;
a50bde513   Peter Zijlstra   sched: Cleanup se...
2542
2543
  
  	/*
99bd5e2f2   Suresh Siddha   sched: Fix select...
2544
  	 * Otherwise, iterate the domains and find an elegible idle cpu.
a50bde513   Peter Zijlstra   sched: Cleanup se...
2545
  	 */
dce840a08   Peter Zijlstra   sched: Dynamicall...
2546
  	rcu_read_lock();
99bd5e2f2   Suresh Siddha   sched: Fix select...
2547

518cd6234   Peter Zijlstra   sched: Only queue...
2548
  	sd = rcu_dereference(per_cpu(sd_llc, target));
77e81365e   Suresh Siddha   sched: Clean up d...
2549
  	for_each_lower_domain(sd) {
4dcfe1025   Peter Zijlstra   sched: Avoid SMT ...
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
  		sg = sd->groups;
  		do {
  			if (!cpumask_intersects(sched_group_cpus(sg),
  						tsk_cpus_allowed(p)))
  				goto next;
  
  			for_each_cpu(i, sched_group_cpus(sg)) {
  				if (!idle_cpu(i))
  					goto next;
  			}
  
  			target = cpumask_first_and(sched_group_cpus(sg),
  					tsk_cpus_allowed(p));
  			goto done;
  next:
  			sg = sg->next;
  		} while (sg != sd->groups);
a50bde513   Peter Zijlstra   sched: Cleanup se...
2567
  	}
4dcfe1025   Peter Zijlstra   sched: Avoid SMT ...
2568
  done:
dce840a08   Peter Zijlstra   sched: Dynamicall...
2569
  	rcu_read_unlock();
a50bde513   Peter Zijlstra   sched: Cleanup se...
2570
2571
2572
2573
2574
  
  	return target;
  }
  
  /*
aaee1203c   Peter Zijlstra   sched: Move sched...
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
   * sched_balance_self: balance the current task (running on cpu) in domains
   * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
   * SD_BALANCE_EXEC.
   *
   * Balance, ie. select the least loaded group.
   *
   * Returns the target CPU number, or the same CPU if no balancing is needed.
   *
   * preempt must be disabled.
   */
0017d7350   Peter Zijlstra   sched: Fix TASK_W...
2585
  static int
7608dec2c   Peter Zijlstra   sched: Drop the r...
2586
  select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
aaee1203c   Peter Zijlstra   sched: Move sched...
2587
  {
29cd8bae3   Peter Zijlstra   sched: Fix SD_POW...
2588
  	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
c88d59108   Peter Zijlstra   sched: Merge sele...
2589
2590
2591
  	int cpu = smp_processor_id();
  	int prev_cpu = task_cpu(p);
  	int new_cpu = cpu;
99bd5e2f2   Suresh Siddha   sched: Fix select...
2592
  	int want_affine = 0;
29cd8bae3   Peter Zijlstra   sched: Fix SD_POW...
2593
  	int want_sd = 1;
5158f4e44   Peter Zijlstra   sched: Clean up t...
2594
  	int sync = wake_flags & WF_SYNC;
c88d59108   Peter Zijlstra   sched: Merge sele...
2595

76854c7e8   Mike Galbraith   sched: Use rt.nr_...
2596
2597
  	if (p->rt.nr_cpus_allowed == 1)
  		return prev_cpu;
0763a660a   Peter Zijlstra   sched: Rename sel...
2598
  	if (sd_flag & SD_BALANCE_WAKE) {
fa17b507f   Peter Zijlstra   sched: Wrap sched...
2599
  		if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
c88d59108   Peter Zijlstra   sched: Merge sele...
2600
2601
2602
  			want_affine = 1;
  		new_cpu = prev_cpu;
  	}
aaee1203c   Peter Zijlstra   sched: Move sched...
2603

dce840a08   Peter Zijlstra   sched: Dynamicall...
2604
  	rcu_read_lock();
aaee1203c   Peter Zijlstra   sched: Move sched...
2605
  	for_each_domain(cpu, tmp) {
e4f428884   Peter Zijlstra   sched: Select_tas...
2606
2607
  		if (!(tmp->flags & SD_LOAD_BALANCE))
  			continue;
aaee1203c   Peter Zijlstra   sched: Move sched...
2608
  		/*
ae154be1f   Peter Zijlstra   sched: Weaken SD_...
2609
2610
  		 * If power savings logic is enabled for a domain, see if we
  		 * are not overloaded, if so, don't balance wider.
aaee1203c   Peter Zijlstra   sched: Move sched...
2611
  		 */
59abf0264   Peter Zijlstra   sched: Add SD_PRE...
2612
  		if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) {
ae154be1f   Peter Zijlstra   sched: Weaken SD_...
2613
2614
2615
2616
2617
2618
2619
2620
2621
  			unsigned long power = 0;
  			unsigned long nr_running = 0;
  			unsigned long capacity;
  			int i;
  
  			for_each_cpu(i, sched_domain_span(tmp)) {
  				power += power_of(i);
  				nr_running += cpu_rq(i)->cfs.nr_running;
  			}
1399fa780   Nikhil Rao   sched: Introduce ...
2622
  			capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE);
ae154be1f   Peter Zijlstra   sched: Weaken SD_...
2623

59abf0264   Peter Zijlstra   sched: Add SD_PRE...
2624
2625
2626
2627
  			if (tmp->flags & SD_POWERSAVINGS_BALANCE)
  				nr_running /= 2;
  
  			if (nr_running < capacity)
29cd8bae3   Peter Zijlstra   sched: Fix SD_POW...
2628
  				want_sd = 0;
ae154be1f   Peter Zijlstra   sched: Weaken SD_...
2629
  		}
aaee1203c   Peter Zijlstra   sched: Move sched...
2630

fe3bcfe1f   Peter Zijlstra   sched: More gener...
2631
  		/*
99bd5e2f2   Suresh Siddha   sched: Fix select...
2632
2633
  		 * If both cpu and prev_cpu are part of this domain,
  		 * cpu is a valid SD_WAKE_AFFINE target.
fe3bcfe1f   Peter Zijlstra   sched: More gener...
2634
  		 */
99bd5e2f2   Suresh Siddha   sched: Fix select...
2635
2636
2637
2638
  		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
  		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
  			affine_sd = tmp;
  			want_affine = 0;
c88d59108   Peter Zijlstra   sched: Merge sele...
2639
  		}
29cd8bae3   Peter Zijlstra   sched: Fix SD_POW...
2640
2641
  		if (!want_sd && !want_affine)
  			break;
0763a660a   Peter Zijlstra   sched: Rename sel...
2642
  		if (!(tmp->flags & sd_flag))
c88d59108   Peter Zijlstra   sched: Merge sele...
2643
  			continue;
29cd8bae3   Peter Zijlstra   sched: Fix SD_POW...
2644
2645
2646
  		if (want_sd)
  			sd = tmp;
  	}
8b911acdf   Mike Galbraith   sched: Fix select...
2647
  	if (affine_sd) {
99bd5e2f2   Suresh Siddha   sched: Fix select...
2648
  		if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
dce840a08   Peter Zijlstra   sched: Dynamicall...
2649
2650
2651
2652
  			prev_cpu = cpu;
  
  		new_cpu = select_idle_sibling(p, prev_cpu);
  		goto unlock;
8b911acdf   Mike Galbraith   sched: Fix select...
2653
  	}
e7693a362   Gregory Haskins   sched: de-SCHED_O...
2654

aaee1203c   Peter Zijlstra   sched: Move sched...
2655
  	while (sd) {
5158f4e44   Peter Zijlstra   sched: Clean up t...
2656
  		int load_idx = sd->forkexec_idx;
aaee1203c   Peter Zijlstra   sched: Move sched...
2657
  		struct sched_group *group;
c88d59108   Peter Zijlstra   sched: Merge sele...
2658
  		int weight;
098fb9db2   Ingo Molnar   sched: clean up w...
2659

0763a660a   Peter Zijlstra   sched: Rename sel...
2660
  		if (!(sd->flags & sd_flag)) {
aaee1203c   Peter Zijlstra   sched: Move sched...
2661
2662
2663
  			sd = sd->child;
  			continue;
  		}
098fb9db2   Ingo Molnar   sched: clean up w...
2664

5158f4e44   Peter Zijlstra   sched: Clean up t...
2665
2666
  		if (sd_flag & SD_BALANCE_WAKE)
  			load_idx = sd->wake_idx;
098fb9db2   Ingo Molnar   sched: clean up w...
2667

5158f4e44   Peter Zijlstra   sched: Clean up t...
2668
  		group = find_idlest_group(sd, p, cpu, load_idx);
aaee1203c   Peter Zijlstra   sched: Move sched...
2669
2670
2671
2672
  		if (!group) {
  			sd = sd->child;
  			continue;
  		}
4ae7d5cef   Ingo Molnar   sched: improve af...
2673

d7c33c493   Peter Zijlstra   sched: Fix task a...
2674
  		new_cpu = find_idlest_cpu(group, p, cpu);
aaee1203c   Peter Zijlstra   sched: Move sched...
2675
2676
2677
2678
  		if (new_cpu == -1 || new_cpu == cpu) {
  			/* Now try balancing at a lower domain level of cpu */
  			sd = sd->child;
  			continue;
e7693a362   Gregory Haskins   sched: de-SCHED_O...
2679
  		}
aaee1203c   Peter Zijlstra   sched: Move sched...
2680
2681
2682
  
  		/* Now try balancing at a lower domain level of new_cpu */
  		cpu = new_cpu;
669c55e9f   Peter Zijlstra   sched: Pre-comput...
2683
  		weight = sd->span_weight;
aaee1203c   Peter Zijlstra   sched: Move sched...
2684
2685
  		sd = NULL;
  		for_each_domain(cpu, tmp) {
669c55e9f   Peter Zijlstra   sched: Pre-comput...
2686
  			if (weight <= tmp->span_weight)
aaee1203c   Peter Zijlstra   sched: Move sched...
2687
  				break;
0763a660a   Peter Zijlstra   sched: Rename sel...
2688
  			if (tmp->flags & sd_flag)
aaee1203c   Peter Zijlstra   sched: Move sched...
2689
2690
2691
  				sd = tmp;
  		}
  		/* while loop will break here if sd == NULL */
e7693a362   Gregory Haskins   sched: de-SCHED_O...
2692
  	}
dce840a08   Peter Zijlstra   sched: Dynamicall...
2693
2694
  unlock:
  	rcu_read_unlock();
e7693a362   Gregory Haskins   sched: de-SCHED_O...
2695

c88d59108   Peter Zijlstra   sched: Merge sele...
2696
  	return new_cpu;
e7693a362   Gregory Haskins   sched: de-SCHED_O...
2697
2698
  }
  #endif /* CONFIG_SMP */
e52fb7c09   Peter Zijlstra   sched: prefer wakers
2699
2700
  static unsigned long
  wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
0bbd3336e   Peter Zijlstra   sched: fix wakeup...
2701
2702
2703
2704
  {
  	unsigned long gran = sysctl_sched_wakeup_granularity;
  
  	/*
e52fb7c09   Peter Zijlstra   sched: prefer wakers
2705
2706
  	 * Since its curr running now, convert the gran from real-time
  	 * to virtual-time in his units.
13814d42e   Mike Galbraith   sched: Remove ASY...
2707
2708
2709
2710
2711
2712
2713
2714
2715
  	 *
  	 * By using 'se' instead of 'curr' we penalize light tasks, so
  	 * they get preempted easier. That is, if 'se' < 'curr' then
  	 * the resulting gran will be larger, therefore penalizing the
  	 * lighter, if otoh 'se' > 'curr' then the resulting gran will
  	 * be smaller, again penalizing the lighter task.
  	 *
  	 * This is especially important for buddies when the leftmost
  	 * task is higher priority than the buddy.
0bbd3336e   Peter Zijlstra   sched: fix wakeup...
2716
  	 */
f4ad9bd20   Shaohua Li   sched: Eliminate ...
2717
  	return calc_delta_fair(gran, se);
0bbd3336e   Peter Zijlstra   sched: fix wakeup...
2718
2719
2720
  }
  
  /*
464b75273   Peter Zijlstra   sched: re-instate...
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
   * Should 'se' preempt 'curr'.
   *
   *             |s1
   *        |s2
   *   |s3
   *         g
   *      |<--->|c
   *
   *  w(c, s1) = -1
   *  w(c, s2) =  0
   *  w(c, s3) =  1
   *
   */
  static int
  wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
  {
  	s64 gran, vdiff = curr->vruntime - se->vruntime;
  
  	if (vdiff <= 0)
  		return -1;
e52fb7c09   Peter Zijlstra   sched: prefer wakers
2741
  	gran = wakeup_gran(curr, se);
464b75273   Peter Zijlstra   sched: re-instate...
2742
2743
2744
2745
2746
  	if (vdiff > gran)
  		return 1;
  
  	return 0;
  }
02479099c   Peter Zijlstra   sched: fix buddie...
2747
2748
  static void set_last_buddy(struct sched_entity *se)
  {
69c80f3e9   Venkatesh Pallipadi   sched: Make set_*...
2749
2750
2751
2752
2753
  	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
  		return;
  
  	for_each_sched_entity(se)
  		cfs_rq_of(se)->last = se;
02479099c   Peter Zijlstra   sched: fix buddie...
2754
2755
2756
2757
  }
  
  static void set_next_buddy(struct sched_entity *se)
  {
69c80f3e9   Venkatesh Pallipadi   sched: Make set_*...
2758
2759
2760
2761
2762
  	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
  		return;
  
  	for_each_sched_entity(se)
  		cfs_rq_of(se)->next = se;
02479099c   Peter Zijlstra   sched: fix buddie...
2763
  }
ac53db596   Rik van Riel   sched: Use a budd...
2764
2765
  static void set_skip_buddy(struct sched_entity *se)
  {
69c80f3e9   Venkatesh Pallipadi   sched: Make set_*...
2766
2767
  	for_each_sched_entity(se)
  		cfs_rq_of(se)->skip = se;
ac53db596   Rik van Riel   sched: Use a budd...
2768
  }
464b75273   Peter Zijlstra   sched: re-instate...
2769
  /*
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2770
2771
   * Preempt the current task with a newly woken task if needed:
   */
5a9b86f64   Peter Zijlstra   sched: Rename fla...
2772
  static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2773
2774
  {
  	struct task_struct *curr = rq->curr;
8651a86c3   Srivatsa Vaddagiri   sched: group sche...
2775
  	struct sched_entity *se = &curr->se, *pse = &p->se;
03e89e457   Mike Galbraith   sched: fix wakeup...
2776
  	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
f685ceaca   Mike Galbraith   sched: Strengthen...
2777
  	int scale = cfs_rq->nr_running >= sched_nr_latency;
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
2778
  	int next_buddy_marked = 0;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2779

4ae7d5cef   Ingo Molnar   sched: improve af...
2780
2781
  	if (unlikely(se == pse))
  		return;
5238cdd38   Paul Turner   sched: Prevent bu...
2782
2783
2784
2785
2786
2787
2788
2789
  	/*
  	 * This is possible from callers such as pull_task(), in which we
  	 * unconditionally check_prempt_curr() after an enqueue (which may have
  	 * lead to a throttle).  This both saves work and prevents false
  	 * next-buddy nomination below.
  	 */
  	if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
  		return;
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
2790
  	if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) {
3cb63d527   Mike Galbraith   sched: Complete b...
2791
  		set_next_buddy(pse);
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
2792
2793
  		next_buddy_marked = 1;
  	}
57fdc26d4   Peter Zijlstra   sched: fixup budd...
2794

aec0a5142   Bharata B Rao   sched: call resch...
2795
2796
2797
  	/*
  	 * We can come here with TIF_NEED_RESCHED already set from new task
  	 * wake up path.
5238cdd38   Paul Turner   sched: Prevent bu...
2798
2799
2800
2801
2802
2803
  	 *
  	 * Note: this also catches the edge-case of curr being in a throttled
  	 * group (e.g. via set_curr_task), since update_curr() (in the
  	 * enqueue of curr) will have resulted in resched being set.  This
  	 * prevents us from potentially nominating it as a false LAST_BUDDY
  	 * below.
aec0a5142   Bharata B Rao   sched: call resch...
2804
2805
2806
  	 */
  	if (test_tsk_need_resched(curr))
  		return;
a2f5c9ab7   Darren Hart   sched: Allow SCHE...
2807
2808
2809
2810
  	/* Idle tasks are by definition preempted by non-idle tasks. */
  	if (unlikely(curr->policy == SCHED_IDLE) &&
  	    likely(p->policy != SCHED_IDLE))
  		goto preempt;
91c234b4e   Ingo Molnar   sched: do not wak...
2811
  	/*
a2f5c9ab7   Darren Hart   sched: Allow SCHE...
2812
2813
  	 * Batch and idle tasks do not preempt non-idle tasks (their preemption
  	 * is driven by the tick):
91c234b4e   Ingo Molnar   sched: do not wak...
2814
  	 */
6bc912b71   Peter Zijlstra   sched: SCHED_OTHE...
2815
  	if (unlikely(p->policy != SCHED_NORMAL))
91c234b4e   Ingo Molnar   sched: do not wak...
2816
  		return;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2817

464b75273   Peter Zijlstra   sched: re-instate...
2818
  	find_matching_se(&se, &pse);
9bbd73743   Paul Turner   sched: update cor...
2819
  	update_curr(cfs_rq_of(se));
002f128b4   Paul Turner   sched: remove red...
2820
  	BUG_ON(!pse);
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
2821
2822
2823
2824
2825
2826
2827
  	if (wakeup_preempt_entity(se, pse) == 1) {
  		/*
  		 * Bias pick_next to pick the sched entity that is
  		 * triggering this preemption.
  		 */
  		if (!next_buddy_marked)
  			set_next_buddy(pse);
3a7e73a2e   Peter Zijlstra   sched: Clean up c...
2828
  		goto preempt;
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
2829
  	}
464b75273   Peter Zijlstra   sched: re-instate...
2830

3a7e73a2e   Peter Zijlstra   sched: Clean up c...
2831
  	return;
a65ac745e   Jupyung Lee   sched: Move updat...
2832

3a7e73a2e   Peter Zijlstra   sched: Clean up c...
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
  preempt:
  	resched_task(curr);
  	/*
  	 * Only set the backward buddy when the current task is still
  	 * on the rq. This can happen when a wakeup gets interleaved
  	 * with schedule on the ->pre_schedule() or idle_balance()
  	 * point, either of which can * drop the rq lock.
  	 *
  	 * Also, during early boot the idle thread is in the fair class,
  	 * for obvious reasons its a bad idea to schedule back to it.
  	 */
  	if (unlikely(!se->on_rq || curr == rq->idle))
  		return;
  
  	if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
  		set_last_buddy(se);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2849
  }
fb8d47240   Ingo Molnar   sched: remove the...
2850
  static struct task_struct *pick_next_task_fair(struct rq *rq)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2851
  {
8f4d37ec0   Peter Zijlstra   sched: high-res p...
2852
  	struct task_struct *p;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2853
2854
  	struct cfs_rq *cfs_rq = &rq->cfs;
  	struct sched_entity *se;
36ace27e3   Tim Blechmann   sched: Optimize b...
2855
  	if (!cfs_rq->nr_running)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2856
2857
2858
  		return NULL;
  
  	do {
9948f4b2a   Ingo Molnar   sched: remove the...
2859
  		se = pick_next_entity(cfs_rq);
f4b6755fb   Peter Zijlstra   sched: cleanup fa...
2860
  		set_next_entity(cfs_rq, se);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2861
2862
  		cfs_rq = group_cfs_rq(se);
  	} while (cfs_rq);
8f4d37ec0   Peter Zijlstra   sched: high-res p...
2863
  	p = task_of(se);
b39e66eaf   Mike Galbraith   sched: Save some ...
2864
2865
  	if (hrtick_enabled(rq))
  		hrtick_start_fair(rq, p);
8f4d37ec0   Peter Zijlstra   sched: high-res p...
2866
2867
  
  	return p;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2868
2869
2870
2871
2872
  }
  
  /*
   * Account for a descheduled task:
   */
31ee529cc   Ingo Molnar   sched: remove the...
2873
  static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2874
2875
2876
2877
2878
2879
  {
  	struct sched_entity *se = &prev->se;
  	struct cfs_rq *cfs_rq;
  
  	for_each_sched_entity(se) {
  		cfs_rq = cfs_rq_of(se);
ab6cde269   Ingo Molnar   sched: remove the...
2880
  		put_prev_entity(cfs_rq, se);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2881
2882
  	}
  }
ac53db596   Rik van Riel   sched: Use a budd...
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
  /*
   * sched_yield() is very simple
   *
   * The magic of dealing with the ->skip buddy is in pick_next_entity.
   */
  static void yield_task_fair(struct rq *rq)
  {
  	struct task_struct *curr = rq->curr;
  	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
  	struct sched_entity *se = &curr->se;
  
  	/*
  	 * Are we the only task in the tree?
  	 */
  	if (unlikely(rq->nr_running == 1))
  		return;
  
  	clear_buddies(cfs_rq, se);
  
  	if (curr->policy != SCHED_BATCH) {
  		update_rq_clock(rq);
  		/*
  		 * Update run-time statistics of the 'current'.
  		 */
  		update_curr(cfs_rq);
916671c08   Mike Galbraith   sched: Set skip_c...
2908
2909
2910
2911
2912
2913
  		/*
  		 * Tell update_rq_clock() that we've just updated,
  		 * so we don't do microscopic update in schedule()
  		 * and double the fastpath cost.
  		 */
  		 rq->skip_clock_update = 1;
ac53db596   Rik van Riel   sched: Use a budd...
2914
2915
2916
2917
  	}
  
  	set_skip_buddy(se);
  }
d95f41220   Mike Galbraith   sched: Add yield_...
2918
2919
2920
  static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preempt)
  {
  	struct sched_entity *se = &p->se;
5238cdd38   Paul Turner   sched: Prevent bu...
2921
2922
  	/* throttled hierarchies are not runnable */
  	if (!se->on_rq || throttled_hierarchy(cfs_rq_of(se)))
d95f41220   Mike Galbraith   sched: Add yield_...
2923
2924
2925
2926
  		return false;
  
  	/* Tell the scheduler that we'd really like pse to run next. */
  	set_next_buddy(se);
d95f41220   Mike Galbraith   sched: Add yield_...
2927
2928
2929
2930
  	yield_task_fair(rq);
  
  	return true;
  }
681f3e685   Peter Williams   sched: isolate SM...
2931
  #ifdef CONFIG_SMP
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
2932
2933
2934
  /**************************************************
   * Fair scheduling class load-balancing methods:
   */
1e3c88bde   Peter Zijlstra   sched: Move load ...
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
  /*
   * pull_task - move a task from a remote runqueue to the local runqueue.
   * Both runqueues must be locked.
   */
  static void pull_task(struct rq *src_rq, struct task_struct *p,
  		      struct rq *this_rq, int this_cpu)
  {
  	deactivate_task(src_rq, p, 0);
  	set_task_cpu(p, this_cpu);
  	activate_task(this_rq, p, 0);
  	check_preempt_curr(this_rq, p, 0);
  }
  
  /*
029632fbb   Peter Zijlstra   sched: Make separ...
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
   * Is this task likely cache-hot:
   */
  static int
  task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
  {
  	s64 delta;
  
  	if (p->sched_class != &fair_sched_class)
  		return 0;
  
  	if (unlikely(p->policy == SCHED_IDLE))
  		return 0;
  
  	/*
  	 * Buddy candidates are cache hot:
  	 */
  	if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running &&
  			(&p->se == cfs_rq_of(&p->se)->next ||
  			 &p->se == cfs_rq_of(&p->se)->last))
  		return 1;
  
  	if (sysctl_sched_migration_cost == -1)
  		return 1;
  	if (sysctl_sched_migration_cost == 0)
  		return 0;
  
  	delta = now - p->se.exec_start;
  
  	return delta < (s64)sysctl_sched_migration_cost;
  }
5b54b56be   Peter Zijlstra   sched: Replace al...
2979
  #define LBF_ALL_PINNED	0x01
bced76aea   Peter Zijlstra   sched: Fix lockup...
2980
2981
2982
2983
  #define LBF_NEED_BREAK	0x02	/* clears into HAD_BREAK */
  #define LBF_HAD_BREAK	0x04
  #define LBF_HAD_BREAKS	0x0C	/* count HAD_BREAKs overflows into ABORT */
  #define LBF_ABORT	0x10
5b54b56be   Peter Zijlstra   sched: Replace al...
2984

029632fbb   Peter Zijlstra   sched: Make separ...
2985
  /*
1e3c88bde   Peter Zijlstra   sched: Move load ...
2986
2987
2988
2989
2990
   * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
   */
  static
  int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
  		     struct sched_domain *sd, enum cpu_idle_type idle,
5b54b56be   Peter Zijlstra   sched: Replace al...
2991
  		     int *lb_flags)
1e3c88bde   Peter Zijlstra   sched: Move load ...
2992
2993
2994
2995
2996
2997
2998
2999
  {
  	int tsk_cache_hot = 0;
  	/*
  	 * We do not migrate tasks that are:
  	 * 1) running (obviously), or
  	 * 2) cannot be migrated to this CPU due to cpus_allowed, or
  	 * 3) are cache-hot on their current CPU.
  	 */
fa17b507f   Peter Zijlstra   sched: Wrap sched...
3000
  	if (!cpumask_test_cpu(this_cpu, tsk_cpus_allowed(p))) {
41acab885   Lucas De Marchi   sched: Implement ...
3001
  		schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3002
3003
  		return 0;
  	}
5b54b56be   Peter Zijlstra   sched: Replace al...
3004
  	*lb_flags &= ~LBF_ALL_PINNED;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3005
3006
  
  	if (task_running(rq, p)) {
41acab885   Lucas De Marchi   sched: Implement ...
3007
  		schedstat_inc(p, se.statistics.nr_failed_migrations_running);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3008
3009
3010
3011
3012
3013
3014
3015
  		return 0;
  	}
  
  	/*
  	 * Aggressive migration if:
  	 * 1) task is cache cold, or
  	 * 2) too many balance attempts have failed.
  	 */
305e6835e   Venkatesh Pallipadi   sched: Do not acc...
3016
  	tsk_cache_hot = task_hot(p, rq->clock_task, sd);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3017
3018
3019
3020
3021
  	if (!tsk_cache_hot ||
  		sd->nr_balance_failed > sd->cache_nice_tries) {
  #ifdef CONFIG_SCHEDSTATS
  		if (tsk_cache_hot) {
  			schedstat_inc(sd, lb_hot_gained[idle]);
41acab885   Lucas De Marchi   sched: Implement ...
3022
  			schedstat_inc(p, se.statistics.nr_forced_migrations);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3023
3024
3025
3026
3027
3028
  		}
  #endif
  		return 1;
  	}
  
  	if (tsk_cache_hot) {
41acab885   Lucas De Marchi   sched: Implement ...
3029
  		schedstat_inc(p, se.statistics.nr_failed_migrations_hot);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3030
3031
3032
3033
  		return 0;
  	}
  	return 1;
  }
897c395f4   Peter Zijlstra   sched: Remove rq_...
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
  /*
   * move_one_task tries to move exactly one task from busiest to this_rq, as
   * part of active balancing operations within "domain".
   * Returns 1 if successful and 0 otherwise.
   *
   * Called with both runqueues locked.
   */
  static int
  move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
  	      struct sched_domain *sd, enum cpu_idle_type idle)
  {
  	struct task_struct *p, *n;
  	struct cfs_rq *cfs_rq;
  	int pinned = 0;
  
  	for_each_leaf_cfs_rq(busiest, cfs_rq) {
  		list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) {
64660c864   Paul Turner   sched: Prevent in...
3051
3052
3053
  			if (throttled_lb_pair(task_group(p),
  					      busiest->cpu, this_cpu))
  				break;
897c395f4   Peter Zijlstra   sched: Remove rq_...
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
  
  			if (!can_migrate_task(p, busiest, this_cpu,
  						sd, idle, &pinned))
  				continue;
  
  			pull_task(busiest, p, this_rq, this_cpu);
  			/*
  			 * Right now, this is only the second place pull_task()
  			 * is called, so we can safely collect pull_task()
  			 * stats here rather than inside pull_task().
  			 */
  			schedstat_inc(sd, lb_gained[idle]);
  			return 1;
  		}
  	}
  
  	return 0;
  }
1e3c88bde   Peter Zijlstra   sched: Move load ...
3072
3073
3074
  static unsigned long
  balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
  	      unsigned long max_load_move, struct sched_domain *sd,
5b54b56be   Peter Zijlstra   sched: Replace al...
3075
  	      enum cpu_idle_type idle, int *lb_flags,
931aeeda0   Vladimir Davydov   sched: Remove unu...
3076
  	      struct cfs_rq *busiest_cfs_rq)
1e3c88bde   Peter Zijlstra   sched: Move load ...
3077
  {
b30aef17f   Ken Chen   sched: Fix errone...
3078
  	int loops = 0, pulled = 0;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3079
  	long rem_load_move = max_load_move;
ee00e66ff   Peter Zijlstra   sched: Remove rq_...
3080
  	struct task_struct *p, *n;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3081
3082
3083
  
  	if (max_load_move == 0)
  		goto out;
ee00e66ff   Peter Zijlstra   sched: Remove rq_...
3084
  	list_for_each_entry_safe(p, n, &busiest_cfs_rq->tasks, se.group_node) {
a195f004e   Peter Zijlstra   sched: Fix load-b...
3085
3086
  		if (loops++ > sysctl_sched_nr_migrate) {
  			*lb_flags |= LBF_NEED_BREAK;
ee00e66ff   Peter Zijlstra   sched: Remove rq_...
3087
  			break;
a195f004e   Peter Zijlstra   sched: Fix load-b...
3088
  		}
1e3c88bde   Peter Zijlstra   sched: Move load ...
3089

ee00e66ff   Peter Zijlstra   sched: Remove rq_...
3090
  		if ((p->se.load.weight >> 1) > rem_load_move ||
b30aef17f   Ken Chen   sched: Fix errone...
3091
  		    !can_migrate_task(p, busiest, this_cpu, sd, idle,
5b54b56be   Peter Zijlstra   sched: Replace al...
3092
  				      lb_flags))
ee00e66ff   Peter Zijlstra   sched: Remove rq_...
3093
  			continue;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3094

ee00e66ff   Peter Zijlstra   sched: Remove rq_...
3095
3096
3097
  		pull_task(busiest, p, this_rq, this_cpu);
  		pulled++;
  		rem_load_move -= p->se.load.weight;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3098
3099
  
  #ifdef CONFIG_PREEMPT
ee00e66ff   Peter Zijlstra   sched: Remove rq_...
3100
3101
3102
3103
3104
  		/*
  		 * NEWIDLE balancing is a source of latency, so preemptible
  		 * kernels will stop after the first task is pulled to minimize
  		 * the critical section.
  		 */
a195f004e   Peter Zijlstra   sched: Fix load-b...
3105
3106
  		if (idle == CPU_NEWLY_IDLE) {
  			*lb_flags |= LBF_ABORT;
ee00e66ff   Peter Zijlstra   sched: Remove rq_...
3107
  			break;
a195f004e   Peter Zijlstra   sched: Fix load-b...
3108
  		}
1e3c88bde   Peter Zijlstra   sched: Move load ...
3109
  #endif
ee00e66ff   Peter Zijlstra   sched: Remove rq_...
3110
3111
3112
3113
3114
3115
  		/*
  		 * We only want to steal up to the prescribed amount of
  		 * weighted load.
  		 */
  		if (rem_load_move <= 0)
  			break;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3116
3117
3118
3119
3120
3121
3122
3123
  	}
  out:
  	/*
  	 * Right now, this is one of only two places pull_task() is called,
  	 * so we can safely collect pull_task() stats here rather than
  	 * inside pull_task().
  	 */
  	schedstat_add(sd, lb_gained[idle], pulled);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3124
3125
  	return max_load_move - rem_load_move;
  }
230059de7   Peter Zijlstra   sched: Remove fro...
3126
  #ifdef CONFIG_FAIR_GROUP_SCHED
9e3081ca6   Peter Zijlstra   sched: Make tg_sh...
3127
3128
3129
  /*
   * update tg->load_weight by folding this cpu's load_avg
   */
67e86250f   Paul Turner   sched: Introduce ...
3130
  static int update_shares_cpu(struct task_group *tg, int cpu)
9e3081ca6   Peter Zijlstra   sched: Make tg_sh...
3131
3132
3133
3134
  {
  	struct cfs_rq *cfs_rq;
  	unsigned long flags;
  	struct rq *rq;
9e3081ca6   Peter Zijlstra   sched: Make tg_sh...
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
  
  	if (!tg->se[cpu])
  		return 0;
  
  	rq = cpu_rq(cpu);
  	cfs_rq = tg->cfs_rq[cpu];
  
  	raw_spin_lock_irqsave(&rq->lock, flags);
  
  	update_rq_clock(rq);
d6b559182   Paul Turner   sched: Allow upda...
3145
  	update_cfs_load(cfs_rq, 1);
9e3081ca6   Peter Zijlstra   sched: Make tg_sh...
3146
3147
3148
3149
3150
  
  	/*
  	 * We need to update shares after updating tg->load_weight in
  	 * order to adjust the weight of groups with long running tasks.
  	 */
6d5ab2932   Paul Turner   sched: Simplify u...
3151
  	update_cfs_shares(cfs_rq);
9e3081ca6   Peter Zijlstra   sched: Make tg_sh...
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
  
  	raw_spin_unlock_irqrestore(&rq->lock, flags);
  
  	return 0;
  }
  
  static void update_shares(int cpu)
  {
  	struct cfs_rq *cfs_rq;
  	struct rq *rq = cpu_rq(cpu);
  
  	rcu_read_lock();
9763b67fb   Peter Zijlstra   sched, cgroup: Op...
3164
3165
3166
3167
  	/*
  	 * Iterates the task_group tree in a bottom up fashion, see
  	 * list_add_leaf_cfs_rq() for details.
  	 */
64660c864   Paul Turner   sched: Prevent in...
3168
3169
3170
3171
  	for_each_leaf_cfs_rq(rq, cfs_rq) {
  		/* throttled entities do not contribute to load */
  		if (throttled_hierarchy(cfs_rq))
  			continue;
67e86250f   Paul Turner   sched: Introduce ...
3172
  		update_shares_cpu(cfs_rq->tg, cpu);
64660c864   Paul Turner   sched: Prevent in...
3173
  	}
9e3081ca6   Peter Zijlstra   sched: Make tg_sh...
3174
3175
  	rcu_read_unlock();
  }
9763b67fb   Peter Zijlstra   sched, cgroup: Op...
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
  /*
   * Compute the cpu's hierarchical load factor for each task group.
   * This needs to be done in a top-down fashion because the load of a child
   * group is a fraction of its parents load.
   */
  static int tg_load_down(struct task_group *tg, void *data)
  {
  	unsigned long load;
  	long cpu = (long)data;
  
  	if (!tg->parent) {
  		load = cpu_rq(cpu)->load.weight;
  	} else {
  		load = tg->parent->cfs_rq[cpu]->h_load;
  		load *= tg->se[cpu]->load.weight;
  		load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
  	}
  
  	tg->cfs_rq[cpu]->h_load = load;
  
  	return 0;
  }
  
  static void update_h_load(long cpu)
  {
  	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
  }
230059de7   Peter Zijlstra   sched: Remove fro...
3203
3204
3205
3206
  static unsigned long
  load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
  		  unsigned long max_load_move,
  		  struct sched_domain *sd, enum cpu_idle_type idle,
5b54b56be   Peter Zijlstra   sched: Replace al...
3207
  		  int *lb_flags)
230059de7   Peter Zijlstra   sched: Remove fro...
3208
3209
  {
  	long rem_load_move = max_load_move;
9763b67fb   Peter Zijlstra   sched, cgroup: Op...
3210
  	struct cfs_rq *busiest_cfs_rq;
230059de7   Peter Zijlstra   sched: Remove fro...
3211
3212
  
  	rcu_read_lock();
9763b67fb   Peter Zijlstra   sched, cgroup: Op...
3213
  	update_h_load(cpu_of(busiest));
230059de7   Peter Zijlstra   sched: Remove fro...
3214

9763b67fb   Peter Zijlstra   sched, cgroup: Op...
3215
  	for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) {
230059de7   Peter Zijlstra   sched: Remove fro...
3216
3217
3218
  		unsigned long busiest_h_load = busiest_cfs_rq->h_load;
  		unsigned long busiest_weight = busiest_cfs_rq->load.weight;
  		u64 rem_load, moved_load;
a195f004e   Peter Zijlstra   sched: Fix load-b...
3219
3220
  		if (*lb_flags & (LBF_NEED_BREAK|LBF_ABORT))
  			break;
230059de7   Peter Zijlstra   sched: Remove fro...
3221
  		/*
64660c864   Paul Turner   sched: Prevent in...
3222
  		 * empty group or part of a throttled hierarchy
230059de7   Peter Zijlstra   sched: Remove fro...
3223
  		 */
64660c864   Paul Turner   sched: Prevent in...
3224
3225
  		if (!busiest_cfs_rq->task_weight ||
  		    throttled_lb_pair(busiest_cfs_rq->tg, cpu_of(busiest), this_cpu))
230059de7   Peter Zijlstra   sched: Remove fro...
3226
3227
3228
3229
3230
3231
  			continue;
  
  		rem_load = (u64)rem_load_move * busiest_weight;
  		rem_load = div_u64(rem_load, busiest_h_load + 1);
  
  		moved_load = balance_tasks(this_rq, this_cpu, busiest,
5b54b56be   Peter Zijlstra   sched: Replace al...
3232
  				rem_load, sd, idle, lb_flags,
230059de7   Peter Zijlstra   sched: Remove fro...
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
  				busiest_cfs_rq);
  
  		if (!moved_load)
  			continue;
  
  		moved_load *= busiest_h_load;
  		moved_load = div_u64(moved_load, busiest_weight + 1);
  
  		rem_load_move -= moved_load;
  		if (rem_load_move < 0)
  			break;
  	}
  	rcu_read_unlock();
  
  	return max_load_move - rem_load_move;
  }
  #else
9e3081ca6   Peter Zijlstra   sched: Make tg_sh...
3250
3251
3252
  static inline void update_shares(int cpu)
  {
  }
230059de7   Peter Zijlstra   sched: Remove fro...
3253
3254
3255
3256
  static unsigned long
  load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
  		  unsigned long max_load_move,
  		  struct sched_domain *sd, enum cpu_idle_type idle,
5b54b56be   Peter Zijlstra   sched: Replace al...
3257
  		  int *lb_flags)
230059de7   Peter Zijlstra   sched: Remove fro...
3258
3259
  {
  	return balance_tasks(this_rq, this_cpu, busiest,
5b54b56be   Peter Zijlstra   sched: Replace al...
3260
  			max_load_move, sd, idle, lb_flags,
931aeeda0   Vladimir Davydov   sched: Remove unu...
3261
  			&busiest->cfs);
230059de7   Peter Zijlstra   sched: Remove fro...
3262
3263
  }
  #endif
1e3c88bde   Peter Zijlstra   sched: Move load ...
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
  /*
   * move_tasks tries to move up to max_load_move weighted load from busiest to
   * this_rq, as part of a balancing operation within domain "sd".
   * Returns 1 if successful and 0 otherwise.
   *
   * Called with both runqueues locked.
   */
  static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
  		      unsigned long max_load_move,
  		      struct sched_domain *sd, enum cpu_idle_type idle,
5b54b56be   Peter Zijlstra   sched: Replace al...
3274
  		      int *lb_flags)
1e3c88bde   Peter Zijlstra   sched: Move load ...
3275
  {
3d45fd804   Peter Zijlstra   sched: Remove the...
3276
  	unsigned long total_load_moved = 0, load_moved;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3277
3278
  
  	do {
3d45fd804   Peter Zijlstra   sched: Remove the...
3279
  		load_moved = load_balance_fair(this_rq, this_cpu, busiest,
1e3c88bde   Peter Zijlstra   sched: Move load ...
3280
  				max_load_move - total_load_moved,
5b54b56be   Peter Zijlstra   sched: Replace al...
3281
  				sd, idle, lb_flags);
3d45fd804   Peter Zijlstra   sched: Remove the...
3282
3283
  
  		total_load_moved += load_moved;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3284

a195f004e   Peter Zijlstra   sched: Fix load-b...
3285
3286
  		if (*lb_flags & (LBF_NEED_BREAK|LBF_ABORT))
  			break;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3287
3288
3289
3290
3291
3292
  #ifdef CONFIG_PREEMPT
  		/*
  		 * NEWIDLE balancing is a source of latency, so preemptible
  		 * kernels will stop after the first task is pulled to minimize
  		 * the critical section.
  		 */
a195f004e   Peter Zijlstra   sched: Fix load-b...
3293
3294
  		if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) {
  			*lb_flags |= LBF_ABORT;
baa8c1102   Peter Zijlstra   sched: Add a lock...
3295
  			break;
a195f004e   Peter Zijlstra   sched: Fix load-b...
3296
  		}
1e3c88bde   Peter Zijlstra   sched: Move load ...
3297
  #endif
3d45fd804   Peter Zijlstra   sched: Remove the...
3298
  	} while (load_moved && max_load_move > total_load_moved);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3299
3300
3301
  
  	return total_load_moved > 0;
  }
1e3c88bde   Peter Zijlstra   sched: Move load ...
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
  /********** Helpers for find_busiest_group ************************/
  /*
   * sd_lb_stats - Structure to store the statistics of a sched_domain
   * 		during load balancing.
   */
  struct sd_lb_stats {
  	struct sched_group *busiest; /* Busiest group in this sd */
  	struct sched_group *this;  /* Local group in this sd */
  	unsigned long total_load;  /* Total load of all groups in sd */
  	unsigned long total_pwr;   /*	Total power of all groups in sd */
  	unsigned long avg_load;	   /* Average load across all groups in sd */
  
  	/** Statistics of this group */
  	unsigned long this_load;
  	unsigned long this_load_per_task;
  	unsigned long this_nr_running;
fab476228   Nikhil Rao   sched: Force bala...
3318
  	unsigned long this_has_capacity;
aae6d3ddd   Suresh Siddha   sched: Use group ...
3319
  	unsigned int  this_idle_cpus;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3320
3321
  
  	/* Statistics of the busiest group */
aae6d3ddd   Suresh Siddha   sched: Use group ...
3322
  	unsigned int  busiest_idle_cpus;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3323
3324
3325
  	unsigned long max_load;
  	unsigned long busiest_load_per_task;
  	unsigned long busiest_nr_running;
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
3326
  	unsigned long busiest_group_capacity;
fab476228   Nikhil Rao   sched: Force bala...
3327
  	unsigned long busiest_has_capacity;
aae6d3ddd   Suresh Siddha   sched: Use group ...
3328
  	unsigned int  busiest_group_weight;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
  
  	int group_imb; /* Is there imbalance in this sd */
  #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
  	int power_savings_balance; /* Is powersave balance needed for this sd */
  	struct sched_group *group_min; /* Least loaded group in sd */
  	struct sched_group *group_leader; /* Group which relieves group_min */
  	unsigned long min_load_per_task; /* load_per_task in group_min */
  	unsigned long leader_nr_running; /* Nr running of group_leader */
  	unsigned long min_nr_running; /* Nr running of group_min */
  #endif
  };
  
  /*
   * sg_lb_stats - stats of a sched_group required for load_balancing
   */
  struct sg_lb_stats {
  	unsigned long avg_load; /*Avg load across the CPUs of the group */
  	unsigned long group_load; /* Total load over the CPUs of the group */
  	unsigned long sum_nr_running; /* Nr tasks running in the group */
  	unsigned long sum_weighted_load; /* Weighted load of group's tasks */
  	unsigned long group_capacity;
aae6d3ddd   Suresh Siddha   sched: Use group ...
3350
3351
  	unsigned long idle_cpus;
  	unsigned long group_weight;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3352
  	int group_imb; /* Is there an imbalance in the group ? */
fab476228   Nikhil Rao   sched: Force bala...
3353
  	int group_has_capacity; /* Is there extra capacity in the group? */
1e3c88bde   Peter Zijlstra   sched: Move load ...
3354
3355
3356
  };
  
  /**
1e3c88bde   Peter Zijlstra   sched: Move load ...
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
   * get_sd_load_idx - Obtain the load index for a given sched domain.
   * @sd: The sched_domain whose load_idx is to be obtained.
   * @idle: The Idle status of the CPU for whose sd load_icx is obtained.
   */
  static inline int get_sd_load_idx(struct sched_domain *sd,
  					enum cpu_idle_type idle)
  {
  	int load_idx;
  
  	switch (idle) {
  	case CPU_NOT_IDLE:
  		load_idx = sd->busy_idx;
  		break;
  
  	case CPU_NEWLY_IDLE:
  		load_idx = sd->newidle_idx;
  		break;
  	default:
  		load_idx = sd->idle_idx;
  		break;
  	}
  
  	return load_idx;
  }
  
  
  #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
  /**
   * init_sd_power_savings_stats - Initialize power savings statistics for
   * the given sched_domain, during load balancing.
   *
   * @sd: Sched domain whose power-savings statistics are to be initialized.
   * @sds: Variable containing the statistics for sd.
   * @idle: Idle status of the CPU at which we're performing load-balancing.
   */
  static inline void init_sd_power_savings_stats(struct sched_domain *sd,
  	struct sd_lb_stats *sds, enum cpu_idle_type idle)
  {
  	/*
  	 * Busy processors will not participate in power savings
  	 * balance.
  	 */
  	if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
  		sds->power_savings_balance = 0;
  	else {
  		sds->power_savings_balance = 1;
  		sds->min_nr_running = ULONG_MAX;
  		sds->leader_nr_running = 0;
  	}
  }
  
  /**
   * update_sd_power_savings_stats - Update the power saving stats for a
   * sched_domain while performing load balancing.
   *
   * @group: sched_group belonging to the sched_domain under consideration.
   * @sds: Variable containing the statistics of the sched_domain
   * @local_group: Does group contain the CPU for which we're performing
   * 		load balancing ?
   * @sgs: Variable containing the statistics of the group.
   */
  static inline void update_sd_power_savings_stats(struct sched_group *group,
  	struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
  {
  
  	if (!sds->power_savings_balance)
  		return;
  
  	/*
  	 * If the local group is idle or completely loaded
  	 * no need to do power savings balance at this domain
  	 */
  	if (local_group && (sds->this_nr_running >= sgs->group_capacity ||
  				!sds->this_nr_running))
  		sds->power_savings_balance = 0;
  
  	/*
  	 * If a group is already running at full capacity or idle,
  	 * don't include that group in power savings calculations
  	 */
  	if (!sds->power_savings_balance ||
  		sgs->sum_nr_running >= sgs->group_capacity ||
  		!sgs->sum_nr_running)
  		return;
  
  	/*
  	 * Calculate the group which has the least non-idle load.
  	 * This is the group from where we need to pick up the load
  	 * for saving power
  	 */
  	if ((sgs->sum_nr_running < sds->min_nr_running) ||
  	    (sgs->sum_nr_running == sds->min_nr_running &&
  	     group_first_cpu(group) > group_first_cpu(sds->group_min))) {
  		sds->group_min = group;
  		sds->min_nr_running = sgs->sum_nr_running;
  		sds->min_load_per_task = sgs->sum_weighted_load /
  						sgs->sum_nr_running;
  	}
  
  	/*
  	 * Calculate the group which is almost near its
  	 * capacity but still has some space to pick up some load
  	 * from other group and save more power
  	 */
  	if (sgs->sum_nr_running + 1 > sgs->group_capacity)
  		return;
  
  	if (sgs->sum_nr_running > sds->leader_nr_running ||
  	    (sgs->sum_nr_running == sds->leader_nr_running &&
  	     group_first_cpu(group) < group_first_cpu(sds->group_leader))) {
  		sds->group_leader = group;
  		sds->leader_nr_running = sgs->sum_nr_running;
  	}
  }
  
  /**
   * check_power_save_busiest_group - see if there is potential for some power-savings balance
   * @sds: Variable containing the statistics of the sched_domain
   *	under consideration.
   * @this_cpu: Cpu at which we're currently performing load-balancing.
   * @imbalance: Variable to store the imbalance.
   *
   * Description:
   * Check if we have potential to perform some power-savings balance.
   * If yes, set the busiest group to be the least loaded group in the
   * sched_domain, so that it's CPUs can be put to idle.
   *
   * Returns 1 if there is potential to perform power-savings balance.
   * Else returns 0.
   */
  static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
  					int this_cpu, unsigned long *imbalance)
  {
  	if (!sds->power_savings_balance)
  		return 0;
  
  	if (sds->this != sds->group_leader ||
  			sds->group_leader == sds->group_min)
  		return 0;
  
  	*imbalance = sds->min_load_per_task;
  	sds->busiest = sds->group_min;
  
  	return 1;
  
  }
  #else /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
  static inline void init_sd_power_savings_stats(struct sched_domain *sd,
  	struct sd_lb_stats *sds, enum cpu_idle_type idle)
  {
  	return;
  }
  
  static inline void update_sd_power_savings_stats(struct sched_group *group,
  	struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
  {
  	return;
  }
  
  static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
  					int this_cpu, unsigned long *imbalance)
  {
  	return 0;
  }
  #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
  
  
  unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
  {
1399fa780   Nikhil Rao   sched: Introduce ...
3526
  	return SCHED_POWER_SCALE;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3527
3528
3529
3530
3531
3532
3533
3534
3535
  }
  
  unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
  {
  	return default_scale_freq_power(sd, cpu);
  }
  
  unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
  {
669c55e9f   Peter Zijlstra   sched: Pre-comput...
3536
  	unsigned long weight = sd->span_weight;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
  	unsigned long smt_gain = sd->smt_gain;
  
  	smt_gain /= weight;
  
  	return smt_gain;
  }
  
  unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
  {
  	return default_scale_smt_power(sd, cpu);
  }
  
  unsigned long scale_rt_power(int cpu)
  {
  	struct rq *rq = cpu_rq(cpu);
  	u64 total, available;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3553
  	total = sched_avg_period() + (rq->clock - rq->age_stamp);
aa4838085   Venkatesh Pallipadi   sched: Remove irq...
3554
3555
3556
3557
3558
3559
3560
  
  	if (unlikely(total < rq->rt_avg)) {
  		/* Ensures that power won't end up being negative */
  		available = 0;
  	} else {
  		available = total - rq->rt_avg;
  	}
1e3c88bde   Peter Zijlstra   sched: Move load ...
3561

1399fa780   Nikhil Rao   sched: Introduce ...
3562
3563
  	if (unlikely((s64)total < SCHED_POWER_SCALE))
  		total = SCHED_POWER_SCALE;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3564

1399fa780   Nikhil Rao   sched: Introduce ...
3565
  	total >>= SCHED_POWER_SHIFT;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3566
3567
3568
3569
3570
3571
  
  	return div_u64(available, total);
  }
  
  static void update_cpu_power(struct sched_domain *sd, int cpu)
  {
669c55e9f   Peter Zijlstra   sched: Pre-comput...
3572
  	unsigned long weight = sd->span_weight;
1399fa780   Nikhil Rao   sched: Introduce ...
3573
  	unsigned long power = SCHED_POWER_SCALE;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3574
  	struct sched_group *sdg = sd->groups;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3575
3576
3577
3578
3579
  	if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
  		if (sched_feat(ARCH_POWER))
  			power *= arch_scale_smt_power(sd, cpu);
  		else
  			power *= default_scale_smt_power(sd, cpu);
1399fa780   Nikhil Rao   sched: Introduce ...
3580
  		power >>= SCHED_POWER_SHIFT;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3581
  	}
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3582
  	sdg->sgp->power_orig = power;
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
3583
3584
3585
3586
3587
  
  	if (sched_feat(ARCH_POWER))
  		power *= arch_scale_freq_power(sd, cpu);
  	else
  		power *= default_scale_freq_power(sd, cpu);
1399fa780   Nikhil Rao   sched: Introduce ...
3588
  	power >>= SCHED_POWER_SHIFT;
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
3589

1e3c88bde   Peter Zijlstra   sched: Move load ...
3590
  	power *= scale_rt_power(cpu);
1399fa780   Nikhil Rao   sched: Introduce ...
3591
  	power >>= SCHED_POWER_SHIFT;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3592
3593
3594
  
  	if (!power)
  		power = 1;
e51fd5e22   Peter Zijlstra   sched: Fix wake_a...
3595
  	cpu_rq(cpu)->cpu_power = power;
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3596
  	sdg->sgp->power = power;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3597
  }
029632fbb   Peter Zijlstra   sched: Make separ...
3598
  void update_group_power(struct sched_domain *sd, int cpu)
1e3c88bde   Peter Zijlstra   sched: Move load ...
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
  {
  	struct sched_domain *child = sd->child;
  	struct sched_group *group, *sdg = sd->groups;
  	unsigned long power;
  
  	if (!child) {
  		update_cpu_power(sd, cpu);
  		return;
  	}
  
  	power = 0;
  
  	group = child->groups;
  	do {
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3613
  		power += group->sgp->power;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3614
3615
  		group = group->next;
  	} while (group != child->groups);
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3616
  	sdg->sgp->power = power;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3617
  }
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
  /*
   * Try and fix up capacity for tiny siblings, this is needed when
   * things like SD_ASYM_PACKING need f_b_g to select another sibling
   * which on its own isn't powerful enough.
   *
   * See update_sd_pick_busiest() and check_asym_packing().
   */
  static inline int
  fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
  {
  	/*
1399fa780   Nikhil Rao   sched: Introduce ...
3629
  	 * Only siblings can have significantly less than SCHED_POWER_SCALE
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
3630
  	 */
a6c75f2f8   Peter Zijlstra   sched: Avoid usin...
3631
  	if (!(sd->flags & SD_SHARE_CPUPOWER))
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
3632
3633
3634
3635
3636
  		return 0;
  
  	/*
  	 * If ~90% of the cpu_power is still there, we're good.
  	 */
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3637
  	if (group->sgp->power * 32 > group->sgp->power_orig * 29)
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
3638
3639
3640
3641
  		return 1;
  
  	return 0;
  }
1e3c88bde   Peter Zijlstra   sched: Move load ...
3642
3643
3644
3645
3646
3647
3648
  /**
   * update_sg_lb_stats - Update sched_group's statistics for load balancing.
   * @sd: The sched_domain whose statistics are to be updated.
   * @group: sched_group whose statistics are to be updated.
   * @this_cpu: Cpu for which load balance is currently performed.
   * @idle: Idle status of this_cpu
   * @load_idx: Load index of sched_domain of this_cpu for load calc.
1e3c88bde   Peter Zijlstra   sched: Move load ...
3649
3650
3651
3652
3653
3654
3655
   * @local_group: Does group contain this_cpu.
   * @cpus: Set of cpus considered for load balancing.
   * @balance: Should we balance.
   * @sgs: variable to hold the statistics for this group.
   */
  static inline void update_sg_lb_stats(struct sched_domain *sd,
  			struct sched_group *group, int this_cpu,
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
3656
  			enum cpu_idle_type idle, int load_idx,
1e3c88bde   Peter Zijlstra   sched: Move load ...
3657
3658
3659
  			int local_group, const struct cpumask *cpus,
  			int *balance, struct sg_lb_stats *sgs)
  {
2582f0eba   Nikhil Rao   sched: Set group_...
3660
  	unsigned long load, max_cpu_load, min_cpu_load, max_nr_running;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3661
3662
  	int i;
  	unsigned int balance_cpu = -1, first_idle_cpu = 0;
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
3663
  	unsigned long avg_load_per_task = 0;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3664

871e35bc9   Gautham R Shenoy   sched: Fix the pl...
3665
  	if (local_group)
1e3c88bde   Peter Zijlstra   sched: Move load ...
3666
  		balance_cpu = group_first_cpu(group);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3667
3668
  
  	/* Tally up the load of all CPUs in the group */
1e3c88bde   Peter Zijlstra   sched: Move load ...
3669
3670
  	max_cpu_load = 0;
  	min_cpu_load = ~0UL;
2582f0eba   Nikhil Rao   sched: Set group_...
3671
  	max_nr_running = 0;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3672
3673
3674
  
  	for_each_cpu_and(i, sched_group_cpus(group), cpus) {
  		struct rq *rq = cpu_rq(i);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
  		/* Bias balancing toward cpus of our domain */
  		if (local_group) {
  			if (idle_cpu(i) && !first_idle_cpu) {
  				first_idle_cpu = 1;
  				balance_cpu = i;
  			}
  
  			load = target_load(i, load_idx);
  		} else {
  			load = source_load(i, load_idx);
2582f0eba   Nikhil Rao   sched: Set group_...
3685
  			if (load > max_cpu_load) {
1e3c88bde   Peter Zijlstra   sched: Move load ...
3686
  				max_cpu_load = load;
2582f0eba   Nikhil Rao   sched: Set group_...
3687
3688
  				max_nr_running = rq->nr_running;
  			}
1e3c88bde   Peter Zijlstra   sched: Move load ...
3689
3690
3691
3692
3693
3694
3695
  			if (min_cpu_load > load)
  				min_cpu_load = load;
  		}
  
  		sgs->group_load += load;
  		sgs->sum_nr_running += rq->nr_running;
  		sgs->sum_weighted_load += weighted_cpuload(i);
aae6d3ddd   Suresh Siddha   sched: Use group ...
3696
3697
  		if (idle_cpu(i))
  			sgs->idle_cpus++;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3698
3699
3700
3701
3702
3703
3704
3705
  	}
  
  	/*
  	 * First idle cpu or the first cpu(busiest) in this sched group
  	 * is eligible for doing load balancing at this and above
  	 * domains. In the newly idle case, we will allow all the cpu's
  	 * to do the newly idle load balance.
  	 */
bbc8cb5ba   Peter Zijlstra   sched: Reduce upd...
3706
3707
3708
3709
3710
3711
  	if (idle != CPU_NEWLY_IDLE && local_group) {
  		if (balance_cpu != this_cpu) {
  			*balance = 0;
  			return;
  		}
  		update_group_power(sd, this_cpu);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3712
3713
3714
  	}
  
  	/* Adjust by relative CPU power of the group */
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3715
  	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->sgp->power;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3716

1e3c88bde   Peter Zijlstra   sched: Move load ...
3717
3718
  	/*
  	 * Consider the group unbalanced when the imbalance is larger
866ab43ef   Peter Zijlstra   sched: Fix the gr...
3719
  	 * than the average weight of a task.
1e3c88bde   Peter Zijlstra   sched: Move load ...
3720
3721
3722
3723
3724
3725
  	 *
  	 * APZ: with cgroup the avg task weight can vary wildly and
  	 *      might not be a suitable number - should we keep a
  	 *      normalized nr_running number somewhere that negates
  	 *      the hierarchy?
  	 */
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
3726
3727
  	if (sgs->sum_nr_running)
  		avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3728

866ab43ef   Peter Zijlstra   sched: Fix the gr...
3729
  	if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
1e3c88bde   Peter Zijlstra   sched: Move load ...
3730
  		sgs->group_imb = 1;
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3731
  	sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power,
1399fa780   Nikhil Rao   sched: Introduce ...
3732
  						SCHED_POWER_SCALE);
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
3733
3734
  	if (!sgs->group_capacity)
  		sgs->group_capacity = fix_small_capacity(sd, group);
aae6d3ddd   Suresh Siddha   sched: Use group ...
3735
  	sgs->group_weight = group->group_weight;
fab476228   Nikhil Rao   sched: Force bala...
3736
3737
3738
  
  	if (sgs->group_capacity > sgs->sum_nr_running)
  		sgs->group_has_capacity = 1;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3739
3740
3741
  }
  
  /**
532cb4c40   Michael Neuling   sched: Add asymme...
3742
3743
3744
3745
   * update_sd_pick_busiest - return 1 on busiest group
   * @sd: sched_domain whose statistics are to be checked
   * @sds: sched_domain statistics
   * @sg: sched_group candidate to be checked for being the busiest
b6b122944   Michael Neuling   sched: Fix commen...
3746
3747
   * @sgs: sched_group statistics
   * @this_cpu: the current cpu
532cb4c40   Michael Neuling   sched: Add asymme...
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
   *
   * Determine if @sg is a busier group than the previously selected
   * busiest group.
   */
  static bool update_sd_pick_busiest(struct sched_domain *sd,
  				   struct sd_lb_stats *sds,
  				   struct sched_group *sg,
  				   struct sg_lb_stats *sgs,
  				   int this_cpu)
  {
  	if (sgs->avg_load <= sds->max_load)
  		return false;
  
  	if (sgs->sum_nr_running > sgs->group_capacity)
  		return true;
  
  	if (sgs->group_imb)
  		return true;
  
  	/*
  	 * ASYM_PACKING needs to move all the work to the lowest
  	 * numbered CPUs in the group, therefore mark all groups
  	 * higher than ourself as busy.
  	 */
  	if ((sd->flags & SD_ASYM_PACKING) && sgs->sum_nr_running &&
  	    this_cpu < group_first_cpu(sg)) {
  		if (!sds->busiest)
  			return true;
  
  		if (group_first_cpu(sds->busiest) > group_first_cpu(sg))
  			return true;
  	}
  
  	return false;
  }
  
  /**
461819ac8   Hui Kang   sched_fair: Fix a...
3785
   * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
1e3c88bde   Peter Zijlstra   sched: Move load ...
3786
3787
3788
   * @sd: sched_domain whose statistics are to be updated.
   * @this_cpu: Cpu for which load balance is currently performed.
   * @idle: Idle status of this_cpu
1e3c88bde   Peter Zijlstra   sched: Move load ...
3789
3790
3791
3792
3793
   * @cpus: Set of cpus considered for load balancing.
   * @balance: Should we balance.
   * @sds: variable to hold the statistics for this sched_domain.
   */
  static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
3794
3795
  			enum cpu_idle_type idle, const struct cpumask *cpus,
  			int *balance, struct sd_lb_stats *sds)
1e3c88bde   Peter Zijlstra   sched: Move load ...
3796
3797
  {
  	struct sched_domain *child = sd->child;
532cb4c40   Michael Neuling   sched: Add asymme...
3798
  	struct sched_group *sg = sd->groups;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
  	struct sg_lb_stats sgs;
  	int load_idx, prefer_sibling = 0;
  
  	if (child && child->flags & SD_PREFER_SIBLING)
  		prefer_sibling = 1;
  
  	init_sd_power_savings_stats(sd, sds, idle);
  	load_idx = get_sd_load_idx(sd, idle);
  
  	do {
  		int local_group;
532cb4c40   Michael Neuling   sched: Add asymme...
3810
  		local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg));
1e3c88bde   Peter Zijlstra   sched: Move load ...
3811
  		memset(&sgs, 0, sizeof(sgs));
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
3812
  		update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx,
1e3c88bde   Peter Zijlstra   sched: Move load ...
3813
  				local_group, cpus, balance, &sgs);
8f190fb3f   Peter Zijlstra   sched: Assume *ba...
3814
  		if (local_group && !(*balance))
1e3c88bde   Peter Zijlstra   sched: Move load ...
3815
3816
3817
  			return;
  
  		sds->total_load += sgs.group_load;
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3818
  		sds->total_pwr += sg->sgp->power;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3819
3820
3821
  
  		/*
  		 * In case the child domain prefers tasks go to siblings
532cb4c40   Michael Neuling   sched: Add asymme...
3822
  		 * first, lower the sg capacity to one so that we'll try
75dd321d7   Nikhil Rao   sched: Drop group...
3823
3824
3825
3826
3827
3828
  		 * and move all the excess tasks away. We lower the capacity
  		 * of a group only if the local group has the capacity to fit
  		 * these excess tasks, i.e. nr_running < group_capacity. The
  		 * extra check prevents the case where you always pull from the
  		 * heaviest group when it is already under-utilized (possible
  		 * with a large weight task outweighs the tasks on the system).
1e3c88bde   Peter Zijlstra   sched: Move load ...
3829
  		 */
75dd321d7   Nikhil Rao   sched: Drop group...
3830
  		if (prefer_sibling && !local_group && sds->this_has_capacity)
1e3c88bde   Peter Zijlstra   sched: Move load ...
3831
3832
3833
3834
  			sgs.group_capacity = min(sgs.group_capacity, 1UL);
  
  		if (local_group) {
  			sds->this_load = sgs.avg_load;
532cb4c40   Michael Neuling   sched: Add asymme...
3835
  			sds->this = sg;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3836
3837
  			sds->this_nr_running = sgs.sum_nr_running;
  			sds->this_load_per_task = sgs.sum_weighted_load;
fab476228   Nikhil Rao   sched: Force bala...
3838
  			sds->this_has_capacity = sgs.group_has_capacity;
aae6d3ddd   Suresh Siddha   sched: Use group ...
3839
  			sds->this_idle_cpus = sgs.idle_cpus;
532cb4c40   Michael Neuling   sched: Add asymme...
3840
  		} else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
1e3c88bde   Peter Zijlstra   sched: Move load ...
3841
  			sds->max_load = sgs.avg_load;
532cb4c40   Michael Neuling   sched: Add asymme...
3842
  			sds->busiest = sg;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3843
  			sds->busiest_nr_running = sgs.sum_nr_running;
aae6d3ddd   Suresh Siddha   sched: Use group ...
3844
  			sds->busiest_idle_cpus = sgs.idle_cpus;
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
3845
  			sds->busiest_group_capacity = sgs.group_capacity;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3846
  			sds->busiest_load_per_task = sgs.sum_weighted_load;
fab476228   Nikhil Rao   sched: Force bala...
3847
  			sds->busiest_has_capacity = sgs.group_has_capacity;
aae6d3ddd   Suresh Siddha   sched: Use group ...
3848
  			sds->busiest_group_weight = sgs.group_weight;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3849
3850
  			sds->group_imb = sgs.group_imb;
  		}
532cb4c40   Michael Neuling   sched: Add asymme...
3851
3852
3853
3854
  		update_sd_power_savings_stats(sg, sds, local_group, &sgs);
  		sg = sg->next;
  	} while (sg != sd->groups);
  }
532cb4c40   Michael Neuling   sched: Add asymme...
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
  /**
   * check_asym_packing - Check to see if the group is packed into the
   *			sched doman.
   *
   * This is primarily intended to used at the sibling level.  Some
   * cores like POWER7 prefer to use lower numbered SMT threads.  In the
   * case of POWER7, it can move to lower SMT modes only when higher
   * threads are idle.  When in lower SMT modes, the threads will
   * perform better since they share less core resources.  Hence when we
   * have idle threads, we want them to be the higher ones.
   *
   * This packing function is run on idle threads.  It checks to see if
   * the busiest CPU in this domain (core in the P7 case) has a higher
   * CPU number than the packing function is being run on.  Here we are
   * assuming lower CPU number will be equivalent to lower a SMT thread
   * number.
   *
b6b122944   Michael Neuling   sched: Fix commen...
3872
3873
3874
   * Returns 1 when packing is required and a task should be moved to
   * this CPU.  The amount of the imbalance is returned in *imbalance.
   *
532cb4c40   Michael Neuling   sched: Add asymme...
3875
3876
3877
3878
   * @sd: The sched_domain whose packing is to be checked.
   * @sds: Statistics of the sched_domain which is to be packed
   * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
   * @imbalance: returns amount of imbalanced due to packing.
532cb4c40   Michael Neuling   sched: Add asymme...
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
   */
  static int check_asym_packing(struct sched_domain *sd,
  			      struct sd_lb_stats *sds,
  			      int this_cpu, unsigned long *imbalance)
  {
  	int busiest_cpu;
  
  	if (!(sd->flags & SD_ASYM_PACKING))
  		return 0;
  
  	if (!sds->busiest)
  		return 0;
  
  	busiest_cpu = group_first_cpu(sds->busiest);
  	if (this_cpu > busiest_cpu)
  		return 0;
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3895
  	*imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->sgp->power,
1399fa780   Nikhil Rao   sched: Introduce ...
3896
  				       SCHED_POWER_SCALE);
532cb4c40   Michael Neuling   sched: Add asymme...
3897
  	return 1;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
  }
  
  /**
   * fix_small_imbalance - Calculate the minor imbalance that exists
   *			amongst the groups of a sched_domain, during
   *			load balancing.
   * @sds: Statistics of the sched_domain whose imbalance is to be calculated.
   * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
   * @imbalance: Variable to store the imbalance.
   */
  static inline void fix_small_imbalance(struct sd_lb_stats *sds,
  				int this_cpu, unsigned long *imbalance)
  {
  	unsigned long tmp, pwr_now = 0, pwr_move = 0;
  	unsigned int imbn = 2;
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
3913
  	unsigned long scaled_busy_load_per_task;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3914
3915
3916
3917
3918
3919
3920
3921
3922
  
  	if (sds->this_nr_running) {
  		sds->this_load_per_task /= sds->this_nr_running;
  		if (sds->busiest_load_per_task >
  				sds->this_load_per_task)
  			imbn = 1;
  	} else
  		sds->this_load_per_task =
  			cpu_avg_load_per_task(this_cpu);
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
3923
  	scaled_busy_load_per_task = sds->busiest_load_per_task
1399fa780   Nikhil Rao   sched: Introduce ...
3924
  					 * SCHED_POWER_SCALE;
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3925
  	scaled_busy_load_per_task /= sds->busiest->sgp->power;
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
3926
3927
3928
  
  	if (sds->max_load - sds->this_load + scaled_busy_load_per_task >=
  			(scaled_busy_load_per_task * imbn)) {
1e3c88bde   Peter Zijlstra   sched: Move load ...
3929
3930
3931
3932
3933
3934
3935
3936
3937
  		*imbalance = sds->busiest_load_per_task;
  		return;
  	}
  
  	/*
  	 * OK, we don't have enough imbalance to justify moving tasks,
  	 * however we may be able to increase total CPU power used by
  	 * moving them.
  	 */
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3938
  	pwr_now += sds->busiest->sgp->power *
1e3c88bde   Peter Zijlstra   sched: Move load ...
3939
  			min(sds->busiest_load_per_task, sds->max_load);
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3940
  	pwr_now += sds->this->sgp->power *
1e3c88bde   Peter Zijlstra   sched: Move load ...
3941
  			min(sds->this_load_per_task, sds->this_load);
1399fa780   Nikhil Rao   sched: Introduce ...
3942
  	pwr_now /= SCHED_POWER_SCALE;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3943
3944
  
  	/* Amount of load we'd subtract */
1399fa780   Nikhil Rao   sched: Introduce ...
3945
  	tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3946
  		sds->busiest->sgp->power;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3947
  	if (sds->max_load > tmp)
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3948
  		pwr_move += sds->busiest->sgp->power *
1e3c88bde   Peter Zijlstra   sched: Move load ...
3949
3950
3951
  			min(sds->busiest_load_per_task, sds->max_load - tmp);
  
  	/* Amount of load we'd add */
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3952
  	if (sds->max_load * sds->busiest->sgp->power <
1399fa780   Nikhil Rao   sched: Introduce ...
3953
  		sds->busiest_load_per_task * SCHED_POWER_SCALE)
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3954
3955
  		tmp = (sds->max_load * sds->busiest->sgp->power) /
  			sds->this->sgp->power;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3956
  	else
1399fa780   Nikhil Rao   sched: Introduce ...
3957
  		tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
9c3f75cbd   Peter Zijlstra   sched: Break out ...
3958
3959
  			sds->this->sgp->power;
  	pwr_move += sds->this->sgp->power *
1e3c88bde   Peter Zijlstra   sched: Move load ...
3960
  			min(sds->this_load_per_task, sds->this_load + tmp);
1399fa780   Nikhil Rao   sched: Introduce ...
3961
  	pwr_move /= SCHED_POWER_SCALE;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
  
  	/* Move if we gain throughput */
  	if (pwr_move > pwr_now)
  		*imbalance = sds->busiest_load_per_task;
  }
  
  /**
   * calculate_imbalance - Calculate the amount of imbalance present within the
   *			 groups of a given sched_domain during load balance.
   * @sds: statistics of the sched_domain whose imbalance is to be calculated.
   * @this_cpu: Cpu for which currently load balance is being performed.
   * @imbalance: The variable to store the imbalance.
   */
  static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
  		unsigned long *imbalance)
  {
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
3978
3979
3980
3981
3982
3983
3984
  	unsigned long max_pull, load_above_capacity = ~0UL;
  
  	sds->busiest_load_per_task /= sds->busiest_nr_running;
  	if (sds->group_imb) {
  		sds->busiest_load_per_task =
  			min(sds->busiest_load_per_task, sds->avg_load);
  	}
1e3c88bde   Peter Zijlstra   sched: Move load ...
3985
3986
3987
3988
3989
3990
3991
3992
3993
  	/*
  	 * In the presence of smp nice balancing, certain scenarios can have
  	 * max load less than avg load(as we skip the groups at or below
  	 * its cpu_power, while calculating max_load..)
  	 */
  	if (sds->max_load < sds->avg_load) {
  		*imbalance = 0;
  		return fix_small_imbalance(sds, this_cpu, imbalance);
  	}
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
3994
3995
3996
3997
3998
3999
  	if (!sds->group_imb) {
  		/*
  		 * Don't want to pull so many tasks that a group would go idle.
  		 */
  		load_above_capacity = (sds->busiest_nr_running -
  						sds->busiest_group_capacity);
1399fa780   Nikhil Rao   sched: Introduce ...
4000
  		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
4001

9c3f75cbd   Peter Zijlstra   sched: Break out ...
4002
  		load_above_capacity /= sds->busiest->sgp->power;
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
  	}
  
  	/*
  	 * We're trying to get all the cpus to the average_load, so we don't
  	 * want to push ourselves above the average load, nor do we wish to
  	 * reduce the max loaded cpu below the average load. At the same time,
  	 * we also don't want to reduce the group load below the group capacity
  	 * (so that we can implement power-savings policies etc). Thus we look
  	 * for the minimum possible imbalance.
  	 * Be careful of negative numbers as they'll appear as very large values
  	 * with unsigned longs.
  	 */
  	max_pull = min(sds->max_load - sds->avg_load, load_above_capacity);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4016
4017
  
  	/* How much load to actually move to equalise the imbalance */
9c3f75cbd   Peter Zijlstra   sched: Break out ...
4018
4019
  	*imbalance = min(max_pull * sds->busiest->sgp->power,
  		(sds->avg_load - sds->this_load) * sds->this->sgp->power)
1399fa780   Nikhil Rao   sched: Introduce ...
4020
  			/ SCHED_POWER_SCALE;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4021
4022
4023
  
  	/*
  	 * if *imbalance is less than the average load per runnable task
25985edce   Lucas De Marchi   Fix common misspe...
4024
  	 * there is no guarantee that any tasks will be moved so we'll have
1e3c88bde   Peter Zijlstra   sched: Move load ...
4025
4026
4027
4028
4029
4030
4031
  	 * a think about bumping its value to force at least one task to be
  	 * moved
  	 */
  	if (*imbalance < sds->busiest_load_per_task)
  		return fix_small_imbalance(sds, this_cpu, imbalance);
  
  }
fab476228   Nikhil Rao   sched: Force bala...
4032

1e3c88bde   Peter Zijlstra   sched: Move load ...
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
  /******* find_busiest_group() helpers end here *********************/
  
  /**
   * find_busiest_group - Returns the busiest group within the sched_domain
   * if there is an imbalance. If there isn't an imbalance, and
   * the user has opted for power-savings, it returns a group whose
   * CPUs can be put to idle by rebalancing those tasks elsewhere, if
   * such a group exists.
   *
   * Also calculates the amount of weighted load which should be moved
   * to restore balance.
   *
   * @sd: The sched_domain whose busiest group is to be returned.
   * @this_cpu: The cpu for which load balancing is currently being performed.
   * @imbalance: Variable which stores amount of weighted load which should
   *		be moved to restore balance/put a group to idle.
   * @idle: The idle status of this_cpu.
1e3c88bde   Peter Zijlstra   sched: Move load ...
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
   * @cpus: The set of CPUs under consideration for load-balancing.
   * @balance: Pointer to a variable indicating if this_cpu
   *	is the appropriate cpu to perform load balancing at this_level.
   *
   * Returns:	- the busiest group if imbalance exists.
   *		- If no imbalance and user has opted for power-savings balance,
   *		   return the least loaded group whose CPUs can be
   *		   put to idle by rebalancing its tasks onto our group.
   */
  static struct sched_group *
  find_busiest_group(struct sched_domain *sd, int this_cpu,
  		   unsigned long *imbalance, enum cpu_idle_type idle,
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
4062
  		   const struct cpumask *cpus, int *balance)
1e3c88bde   Peter Zijlstra   sched: Move load ...
4063
4064
4065
4066
4067
4068
4069
4070
4071
  {
  	struct sd_lb_stats sds;
  
  	memset(&sds, 0, sizeof(sds));
  
  	/*
  	 * Compute the various statistics relavent for load balancing at
  	 * this level.
  	 */
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
4072
  	update_sd_lb_stats(sd, this_cpu, idle, cpus, balance, &sds);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4073

cc57aa8f4   Peter Zijlstra   sched: Clean up s...
4074
4075
4076
  	/*
  	 * this_cpu is not the appropriate cpu to perform load balancing at
  	 * this level.
1e3c88bde   Peter Zijlstra   sched: Move load ...
4077
  	 */
8f190fb3f   Peter Zijlstra   sched: Assume *ba...
4078
  	if (!(*balance))
1e3c88bde   Peter Zijlstra   sched: Move load ...
4079
  		goto ret;
532cb4c40   Michael Neuling   sched: Add asymme...
4080
4081
4082
  	if ((idle == CPU_IDLE || idle == CPU_NEWLY_IDLE) &&
  	    check_asym_packing(sd, &sds, this_cpu, imbalance))
  		return sds.busiest;
cc57aa8f4   Peter Zijlstra   sched: Clean up s...
4083
  	/* There is no busy sibling group to pull tasks from */
1e3c88bde   Peter Zijlstra   sched: Move load ...
4084
4085
  	if (!sds.busiest || sds.busiest_nr_running == 0)
  		goto out_balanced;
1399fa780   Nikhil Rao   sched: Introduce ...
4086
  	sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_pwr;
b0432d8f1   Ken Chen   sched: Fix sched-...
4087

866ab43ef   Peter Zijlstra   sched: Fix the gr...
4088
4089
4090
4091
4092
4093
4094
  	/*
  	 * If the busiest group is imbalanced the below checks don't
  	 * work because they assumes all things are equal, which typically
  	 * isn't true due to cpus_allowed constraints and the like.
  	 */
  	if (sds.group_imb)
  		goto force_balance;
cc57aa8f4   Peter Zijlstra   sched: Clean up s...
4095
  	/* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
fab476228   Nikhil Rao   sched: Force bala...
4096
4097
4098
  	if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity &&
  			!sds.busiest_has_capacity)
  		goto force_balance;
cc57aa8f4   Peter Zijlstra   sched: Clean up s...
4099
4100
4101
4102
  	/*
  	 * If the local group is more busy than the selected busiest group
  	 * don't try and pull any tasks.
  	 */
1e3c88bde   Peter Zijlstra   sched: Move load ...
4103
4104
  	if (sds.this_load >= sds.max_load)
  		goto out_balanced;
cc57aa8f4   Peter Zijlstra   sched: Clean up s...
4105
4106
4107
4108
  	/*
  	 * Don't pull any tasks if this group is already above the domain
  	 * average load.
  	 */
1e3c88bde   Peter Zijlstra   sched: Move load ...
4109
4110
  	if (sds.this_load >= sds.avg_load)
  		goto out_balanced;
c186fafe9   Peter Zijlstra   sched: Clean up r...
4111
  	if (idle == CPU_IDLE) {
aae6d3ddd   Suresh Siddha   sched: Use group ...
4112
4113
4114
4115
4116
4117
  		/*
  		 * This cpu is idle. If the busiest group load doesn't
  		 * have more tasks than the number of available cpu's and
  		 * there is no imbalance between this and busiest group
  		 * wrt to idle cpu's, it is balanced.
  		 */
c186fafe9   Peter Zijlstra   sched: Clean up r...
4118
  		if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
aae6d3ddd   Suresh Siddha   sched: Use group ...
4119
4120
  		    sds.busiest_nr_running <= sds.busiest_group_weight)
  			goto out_balanced;
c186fafe9   Peter Zijlstra   sched: Clean up r...
4121
4122
4123
4124
4125
4126
4127
  	} else {
  		/*
  		 * In the CPU_NEWLY_IDLE, CPU_NOT_IDLE cases, use
  		 * imbalance_pct to be conservative.
  		 */
  		if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
  			goto out_balanced;
aae6d3ddd   Suresh Siddha   sched: Use group ...
4128
  	}
1e3c88bde   Peter Zijlstra   sched: Move load ...
4129

fab476228   Nikhil Rao   sched: Force bala...
4130
  force_balance:
1e3c88bde   Peter Zijlstra   sched: Move load ...
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
  	/* Looks like there is an imbalance. Compute it */
  	calculate_imbalance(&sds, this_cpu, imbalance);
  	return sds.busiest;
  
  out_balanced:
  	/*
  	 * There is no obvious imbalance. But check if we can do some balancing
  	 * to save power.
  	 */
  	if (check_power_save_busiest_group(&sds, this_cpu, imbalance))
  		return sds.busiest;
  ret:
  	*imbalance = 0;
  	return NULL;
  }
  
  /*
   * find_busiest_queue - find the busiest runqueue among the cpus in group.
   */
  static struct rq *
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
4151
4152
4153
  find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
  		   enum cpu_idle_type idle, unsigned long imbalance,
  		   const struct cpumask *cpus)
1e3c88bde   Peter Zijlstra   sched: Move load ...
4154
4155
4156
4157
4158
4159
4160
  {
  	struct rq *busiest = NULL, *rq;
  	unsigned long max_load = 0;
  	int i;
  
  	for_each_cpu(i, sched_group_cpus(group)) {
  		unsigned long power = power_of(i);
1399fa780   Nikhil Rao   sched: Introduce ...
4161
4162
  		unsigned long capacity = DIV_ROUND_CLOSEST(power,
  							   SCHED_POWER_SCALE);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4163
  		unsigned long wl;
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
4164
4165
  		if (!capacity)
  			capacity = fix_small_capacity(sd, group);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4166
4167
4168
4169
  		if (!cpumask_test_cpu(i, cpus))
  			continue;
  
  		rq = cpu_rq(i);
6e40f5bbb   Thomas Gleixner   Merge branch 'sch...
4170
  		wl = weighted_cpuload(i);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4171

6e40f5bbb   Thomas Gleixner   Merge branch 'sch...
4172
4173
4174
4175
  		/*
  		 * When comparing with imbalance, use weighted_cpuload()
  		 * which is not scaled with the cpu power.
  		 */
1e3c88bde   Peter Zijlstra   sched: Move load ...
4176
4177
  		if (capacity && rq->nr_running == 1 && wl > imbalance)
  			continue;
6e40f5bbb   Thomas Gleixner   Merge branch 'sch...
4178
4179
4180
4181
4182
4183
  		/*
  		 * For the load comparisons with the other cpu's, consider
  		 * the weighted_cpuload() scaled with the cpu power, so that
  		 * the load can be moved away from the cpu that is potentially
  		 * running at a lower capacity.
  		 */
1399fa780   Nikhil Rao   sched: Introduce ...
4184
  		wl = (wl * SCHED_POWER_SCALE) / power;
6e40f5bbb   Thomas Gleixner   Merge branch 'sch...
4185

1e3c88bde   Peter Zijlstra   sched: Move load ...
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
  		if (wl > max_load) {
  			max_load = wl;
  			busiest = rq;
  		}
  	}
  
  	return busiest;
  }
  
  /*
   * Max backoff if we encounter pinned tasks. Pretty arbitrary value, but
   * so long as it is large enough.
   */
  #define MAX_PINNED_INTERVAL	512
  
  /* Working cpumask for load_balance and load_balance_newidle. */
029632fbb   Peter Zijlstra   sched: Make separ...
4202
  DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4203

46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
4204
  static int need_active_balance(struct sched_domain *sd, int idle,
532cb4c40   Michael Neuling   sched: Add asymme...
4205
  			       int busiest_cpu, int this_cpu)
1af3ed3dd   Peter Zijlstra   sched: Unify load...
4206
4207
  {
  	if (idle == CPU_NEWLY_IDLE) {
532cb4c40   Michael Neuling   sched: Add asymme...
4208
4209
4210
4211
4212
4213
4214
4215
  
  		/*
  		 * ASYM_PACKING needs to force migrate tasks from busy but
  		 * higher numbered CPUs in order to pack all tasks in the
  		 * lowest numbered CPUs.
  		 */
  		if ((sd->flags & SD_ASYM_PACKING) && busiest_cpu > this_cpu)
  			return 1;
1af3ed3dd   Peter Zijlstra   sched: Unify load...
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
  		/*
  		 * The only task running in a non-idle cpu can be moved to this
  		 * cpu in an attempt to completely freeup the other CPU
  		 * package.
  		 *
  		 * The package power saving logic comes from
  		 * find_busiest_group(). If there are no imbalance, then
  		 * f_b_g() will return NULL. However when sched_mc={1,2} then
  		 * f_b_g() will select a group from which a running task may be
  		 * pulled to this cpu in order to make the other package idle.
  		 * If there is no opportunity to make a package idle and if
  		 * there are no imbalance, then f_b_g() will return NULL and no
  		 * action will be taken in load_balance_newidle().
  		 *
  		 * Under normal task pull operation due to imbalance, there
  		 * will be more than one task in the source run queue and
  		 * move_tasks() will succeed.  ld_moved will be true and this
  		 * active balance code will not be triggered.
  		 */
1af3ed3dd   Peter Zijlstra   sched: Unify load...
4235
4236
4237
4238
4239
4240
  		if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
  			return 0;
  	}
  
  	return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
  }
969c79215   Tejun Heo   sched: replace mi...
4241
  static int active_load_balance_cpu_stop(void *data);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4242
4243
4244
4245
4246
4247
4248
4249
  /*
   * Check this_cpu to ensure it is balanced within domain. Attempt to move
   * tasks if there is an imbalance.
   */
  static int load_balance(int this_cpu, struct rq *this_rq,
  			struct sched_domain *sd, enum cpu_idle_type idle,
  			int *balance)
  {
5b54b56be   Peter Zijlstra   sched: Replace al...
4250
  	int ld_moved, lb_flags = 0, active_balance = 0;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4251
4252
4253
4254
4255
4256
4257
  	struct sched_group *group;
  	unsigned long imbalance;
  	struct rq *busiest;
  	unsigned long flags;
  	struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
  
  	cpumask_copy(cpus, cpu_active_mask);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4258
4259
4260
  	schedstat_inc(sd, lb_count[idle]);
  
  redo:
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
4261
  	group = find_busiest_group(sd, this_cpu, &imbalance, idle,
1e3c88bde   Peter Zijlstra   sched: Move load ...
4262
4263
4264
4265
4266
4267
4268
4269
4270
  				   cpus, balance);
  
  	if (*balance == 0)
  		goto out_balanced;
  
  	if (!group) {
  		schedstat_inc(sd, lb_nobusyg[idle]);
  		goto out_balanced;
  	}
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
4271
  	busiest = find_busiest_queue(sd, group, idle, imbalance, cpus);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
  	if (!busiest) {
  		schedstat_inc(sd, lb_nobusyq[idle]);
  		goto out_balanced;
  	}
  
  	BUG_ON(busiest == this_rq);
  
  	schedstat_add(sd, lb_imbalance[idle], imbalance);
  
  	ld_moved = 0;
  	if (busiest->nr_running > 1) {
  		/*
  		 * Attempt to move tasks. If find_busiest_group has found
  		 * an imbalance but busiest->nr_running <= 1, the group is
  		 * still unbalanced. ld_moved simply stays zero, so it is
  		 * correctly treated as an imbalance.
  		 */
5b54b56be   Peter Zijlstra   sched: Replace al...
4289
  		lb_flags |= LBF_ALL_PINNED;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4290
4291
4292
  		local_irq_save(flags);
  		double_rq_lock(this_rq, busiest);
  		ld_moved = move_tasks(this_rq, this_cpu, busiest,
5b54b56be   Peter Zijlstra   sched: Replace al...
4293
  				      imbalance, sd, idle, &lb_flags);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4294
4295
4296
4297
4298
4299
4300
4301
  		double_rq_unlock(this_rq, busiest);
  		local_irq_restore(flags);
  
  		/*
  		 * some other cpu did the load balance for us.
  		 */
  		if (ld_moved && this_cpu != smp_processor_id())
  			resched_cpu(this_cpu);
a195f004e   Peter Zijlstra   sched: Fix load-b...
4302
4303
4304
4305
  		if (lb_flags & LBF_ABORT)
  			goto out_balanced;
  
  		if (lb_flags & LBF_NEED_BREAK) {
bced76aea   Peter Zijlstra   sched: Fix lockup...
4306
4307
4308
  			lb_flags += LBF_HAD_BREAK - LBF_NEED_BREAK;
  			if (lb_flags & LBF_ABORT)
  				goto out_balanced;
a195f004e   Peter Zijlstra   sched: Fix load-b...
4309
4310
  			goto redo;
  		}
1e3c88bde   Peter Zijlstra   sched: Move load ...
4311
  		/* All tasks on this runqueue were pinned by CPU affinity */
5b54b56be   Peter Zijlstra   sched: Replace al...
4312
  		if (unlikely(lb_flags & LBF_ALL_PINNED)) {
1e3c88bde   Peter Zijlstra   sched: Move load ...
4313
4314
4315
4316
4317
4318
4319
4320
4321
  			cpumask_clear_cpu(cpu_of(busiest), cpus);
  			if (!cpumask_empty(cpus))
  				goto redo;
  			goto out_balanced;
  		}
  	}
  
  	if (!ld_moved) {
  		schedstat_inc(sd, lb_failed[idle]);
58b26c4c0   Venkatesh Pallipadi   sched: Increment ...
4322
4323
4324
4325
4326
4327
4328
4329
  		/*
  		 * Increment the failure counter only on periodic balance.
  		 * We do not want newidle balance, which can be very
  		 * frequent, pollute the failure counter causing
  		 * excessive cache_hot migrations and active balances.
  		 */
  		if (idle != CPU_NEWLY_IDLE)
  			sd->nr_balance_failed++;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4330

46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
4331
  		if (need_active_balance(sd, idle, cpu_of(busiest), this_cpu)) {
1e3c88bde   Peter Zijlstra   sched: Move load ...
4332
  			raw_spin_lock_irqsave(&busiest->lock, flags);
969c79215   Tejun Heo   sched: replace mi...
4333
4334
4335
  			/* don't kick the active_load_balance_cpu_stop,
  			 * if the curr task on busiest cpu can't be
  			 * moved to this_cpu
1e3c88bde   Peter Zijlstra   sched: Move load ...
4336
4337
  			 */
  			if (!cpumask_test_cpu(this_cpu,
fa17b507f   Peter Zijlstra   sched: Wrap sched...
4338
  					tsk_cpus_allowed(busiest->curr))) {
1e3c88bde   Peter Zijlstra   sched: Move load ...
4339
4340
  				raw_spin_unlock_irqrestore(&busiest->lock,
  							    flags);
5b54b56be   Peter Zijlstra   sched: Replace al...
4341
  				lb_flags |= LBF_ALL_PINNED;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4342
4343
  				goto out_one_pinned;
  			}
969c79215   Tejun Heo   sched: replace mi...
4344
4345
4346
4347
4348
  			/*
  			 * ->active_balance synchronizes accesses to
  			 * ->active_balance_work.  Once set, it's cleared
  			 * only after active load balance is finished.
  			 */
1e3c88bde   Peter Zijlstra   sched: Move load ...
4349
4350
4351
4352
4353
4354
  			if (!busiest->active_balance) {
  				busiest->active_balance = 1;
  				busiest->push_cpu = this_cpu;
  				active_balance = 1;
  			}
  			raw_spin_unlock_irqrestore(&busiest->lock, flags);
969c79215   Tejun Heo   sched: replace mi...
4355

1e3c88bde   Peter Zijlstra   sched: Move load ...
4356
  			if (active_balance)
969c79215   Tejun Heo   sched: replace mi...
4357
4358
4359
  				stop_one_cpu_nowait(cpu_of(busiest),
  					active_load_balance_cpu_stop, busiest,
  					&busiest->active_balance_work);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
  
  			/*
  			 * We've kicked active balancing, reset the failure
  			 * counter.
  			 */
  			sd->nr_balance_failed = sd->cache_nice_tries+1;
  		}
  	} else
  		sd->nr_balance_failed = 0;
  
  	if (likely(!active_balance)) {
  		/* We were unbalanced, so reset the balancing interval */
  		sd->balance_interval = sd->min_interval;
  	} else {
  		/*
  		 * If we've begun active balancing, start to back off. This
  		 * case may not be covered by the all_pinned logic if there
  		 * is only 1 task on the busy runqueue (because we don't call
  		 * move_tasks).
  		 */
  		if (sd->balance_interval < sd->max_interval)
  			sd->balance_interval *= 2;
  	}
1e3c88bde   Peter Zijlstra   sched: Move load ...
4383
4384
4385
4386
4387
4388
4389
4390
4391
  	goto out;
  
  out_balanced:
  	schedstat_inc(sd, lb_balanced[idle]);
  
  	sd->nr_balance_failed = 0;
  
  out_one_pinned:
  	/* tune up the balancing interval */
5b54b56be   Peter Zijlstra   sched: Replace al...
4392
4393
  	if (((lb_flags & LBF_ALL_PINNED) &&
  			sd->balance_interval < MAX_PINNED_INTERVAL) ||
1e3c88bde   Peter Zijlstra   sched: Move load ...
4394
4395
  			(sd->balance_interval < sd->max_interval))
  		sd->balance_interval *= 2;
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
4396
  	ld_moved = 0;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4397
  out:
1e3c88bde   Peter Zijlstra   sched: Move load ...
4398
4399
4400
4401
  	return ld_moved;
  }
  
  /*
1e3c88bde   Peter Zijlstra   sched: Move load ...
4402
4403
4404
   * idle_balance is called by schedule() if this_cpu is about to become
   * idle. Attempts to pull tasks from other CPUs.
   */
029632fbb   Peter Zijlstra   sched: Make separ...
4405
  void idle_balance(int this_cpu, struct rq *this_rq)
1e3c88bde   Peter Zijlstra   sched: Move load ...
4406
4407
4408
4409
4410
4411
4412
4413
4414
  {
  	struct sched_domain *sd;
  	int pulled_task = 0;
  	unsigned long next_balance = jiffies + HZ;
  
  	this_rq->idle_stamp = this_rq->clock;
  
  	if (this_rq->avg_idle < sysctl_sched_migration_cost)
  		return;
f492e12ef   Peter Zijlstra   sched: Remove loa...
4415
4416
4417
4418
  	/*
  	 * Drop the rq->lock, but keep IRQ/preempt disabled.
  	 */
  	raw_spin_unlock(&this_rq->lock);
c66eaf619   Paul Turner   sched: Update sha...
4419
  	update_shares(this_cpu);
dce840a08   Peter Zijlstra   sched: Dynamicall...
4420
  	rcu_read_lock();
1e3c88bde   Peter Zijlstra   sched: Move load ...
4421
4422
  	for_each_domain(this_cpu, sd) {
  		unsigned long interval;
f492e12ef   Peter Zijlstra   sched: Remove loa...
4423
  		int balance = 1;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4424
4425
4426
  
  		if (!(sd->flags & SD_LOAD_BALANCE))
  			continue;
f492e12ef   Peter Zijlstra   sched: Remove loa...
4427
  		if (sd->flags & SD_BALANCE_NEWIDLE) {
1e3c88bde   Peter Zijlstra   sched: Move load ...
4428
  			/* If we've pulled tasks over stop searching: */
f492e12ef   Peter Zijlstra   sched: Remove loa...
4429
4430
4431
  			pulled_task = load_balance(this_cpu, this_rq,
  						   sd, CPU_NEWLY_IDLE, &balance);
  		}
1e3c88bde   Peter Zijlstra   sched: Move load ...
4432
4433
4434
4435
  
  		interval = msecs_to_jiffies(sd->balance_interval);
  		if (time_after(next_balance, sd->last_balance + interval))
  			next_balance = sd->last_balance + interval;
d5ad140bc   Nikhil Rao   sched: Fix idle b...
4436
4437
  		if (pulled_task) {
  			this_rq->idle_stamp = 0;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4438
  			break;
d5ad140bc   Nikhil Rao   sched: Fix idle b...
4439
  		}
1e3c88bde   Peter Zijlstra   sched: Move load ...
4440
  	}
dce840a08   Peter Zijlstra   sched: Dynamicall...
4441
  	rcu_read_unlock();
f492e12ef   Peter Zijlstra   sched: Remove loa...
4442
4443
  
  	raw_spin_lock(&this_rq->lock);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
  	if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
  		/*
  		 * We are going idle. next_balance may be set based on
  		 * a busy processor. So reset next_balance.
  		 */
  		this_rq->next_balance = next_balance;
  	}
  }
  
  /*
969c79215   Tejun Heo   sched: replace mi...
4454
4455
4456
4457
   * active_load_balance_cpu_stop is run by cpu stopper. It pushes
   * running tasks off the busiest CPU onto idle CPUs. It requires at
   * least 1 task to be running on each physical CPU where possible, and
   * avoids physical / logical imbalances.
1e3c88bde   Peter Zijlstra   sched: Move load ...
4458
   */
969c79215   Tejun Heo   sched: replace mi...
4459
  static int active_load_balance_cpu_stop(void *data)
1e3c88bde   Peter Zijlstra   sched: Move load ...
4460
  {
969c79215   Tejun Heo   sched: replace mi...
4461
4462
  	struct rq *busiest_rq = data;
  	int busiest_cpu = cpu_of(busiest_rq);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4463
  	int target_cpu = busiest_rq->push_cpu;
969c79215   Tejun Heo   sched: replace mi...
4464
  	struct rq *target_rq = cpu_rq(target_cpu);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4465
  	struct sched_domain *sd;
969c79215   Tejun Heo   sched: replace mi...
4466
4467
4468
4469
4470
4471
4472
  
  	raw_spin_lock_irq(&busiest_rq->lock);
  
  	/* make sure the requested cpu hasn't gone down in the meantime */
  	if (unlikely(busiest_cpu != smp_processor_id() ||
  		     !busiest_rq->active_balance))
  		goto out_unlock;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4473
4474
4475
  
  	/* Is there any task to move? */
  	if (busiest_rq->nr_running <= 1)
969c79215   Tejun Heo   sched: replace mi...
4476
  		goto out_unlock;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
  
  	/*
  	 * This condition is "impossible", if it occurs
  	 * we need to fix it. Originally reported by
  	 * Bjorn Helgaas on a 128-cpu setup.
  	 */
  	BUG_ON(busiest_rq == target_rq);
  
  	/* move a task from busiest_rq to target_rq */
  	double_lock_balance(busiest_rq, target_rq);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4487
4488
  
  	/* Search for an sd spanning us and the target CPU. */
dce840a08   Peter Zijlstra   sched: Dynamicall...
4489
  	rcu_read_lock();
1e3c88bde   Peter Zijlstra   sched: Move load ...
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
  	for_each_domain(target_cpu, sd) {
  		if ((sd->flags & SD_LOAD_BALANCE) &&
  		    cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
  				break;
  	}
  
  	if (likely(sd)) {
  		schedstat_inc(sd, alb_count);
  
  		if (move_one_task(target_rq, target_cpu, busiest_rq,
  				  sd, CPU_IDLE))
  			schedstat_inc(sd, alb_pushed);
  		else
  			schedstat_inc(sd, alb_failed);
  	}
dce840a08   Peter Zijlstra   sched: Dynamicall...
4505
  	rcu_read_unlock();
1e3c88bde   Peter Zijlstra   sched: Move load ...
4506
  	double_unlock_balance(busiest_rq, target_rq);
969c79215   Tejun Heo   sched: replace mi...
4507
4508
4509
4510
  out_unlock:
  	busiest_rq->active_balance = 0;
  	raw_spin_unlock_irq(&busiest_rq->lock);
  	return 0;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4511
4512
4513
  }
  
  #ifdef CONFIG_NO_HZ
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4514
4515
  /*
   * idle load balancing details
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4516
4517
4518
4519
   * - When one of the busy CPUs notice that there may be an idle rebalancing
   *   needed, they will kick the idle load balancer, which then does idle
   *   load balancing for all the idle CPUs.
   */
1e3c88bde   Peter Zijlstra   sched: Move load ...
4520
  static struct {
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4521
  	cpumask_var_t idle_cpus_mask;
0b005cf54   Suresh Siddha   sched, nohz: Impl...
4522
  	atomic_t nr_cpus;
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4523
4524
  	unsigned long next_balance;     /* in jiffy units */
  } nohz ____cacheline_aligned;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4525

1e3c88bde   Peter Zijlstra   sched: Move load ...
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
  #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
  /**
   * lowest_flag_domain - Return lowest sched_domain containing flag.
   * @cpu:	The cpu whose lowest level of sched domain is to
   *		be returned.
   * @flag:	The flag to check for the lowest sched_domain
   *		for the given cpu.
   *
   * Returns the lowest sched_domain of a cpu which contains the given flag.
   */
  static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
  {
  	struct sched_domain *sd;
  
  	for_each_domain(cpu, sd)
083547169   Hillf Danton   sched: Remove noo...
4541
  		if (sd->flags & flag)
1e3c88bde   Peter Zijlstra   sched: Move load ...
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
  			break;
  
  	return sd;
  }
  
  /**
   * for_each_flag_domain - Iterates over sched_domains containing the flag.
   * @cpu:	The cpu whose domains we're iterating over.
   * @sd:		variable holding the value of the power_savings_sd
   *		for cpu.
   * @flag:	The flag to filter the sched_domains to be iterated.
   *
   * Iterates over all the scheduler domains for a given cpu that has the 'flag'
   * set, starting from the lowest sched_domain to the highest.
   */
  #define for_each_flag_domain(cpu, sd, flag) \
  	for (sd = lowest_flag_domain(cpu, flag); \
  		(sd && (sd->flags & flag)); sd = sd->parent)
  
  /**
1e3c88bde   Peter Zijlstra   sched: Move load ...
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
   * find_new_ilb - Finds the optimum idle load balancer for nomination.
   * @cpu:	The cpu which is nominating a new idle_load_balancer.
   *
   * Returns:	Returns the id of the idle load balancer if it exists,
   *		Else, returns >= nr_cpu_ids.
   *
   * This algorithm picks the idle load balancer such that it belongs to a
   * semi-idle powersavings sched_domain. The idea is to try and avoid
   * completely idle packages/cores just for the purpose of idle load balancing
   * when there are other idle cpu's which are better suited for that job.
   */
  static int find_new_ilb(int cpu)
  {
0b005cf54   Suresh Siddha   sched, nohz: Impl...
4575
  	int ilb = cpumask_first(nohz.idle_cpus_mask);
786d6dc7a   Suresh Siddha   sched, nohz: Clea...
4576
  	struct sched_group *ilbg;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4577
  	struct sched_domain *sd;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
  
  	/*
  	 * Have idle load balancer selection from semi-idle packages only
  	 * when power-aware load balancing is enabled
  	 */
  	if (!(sched_smt_power_savings || sched_mc_power_savings))
  		goto out_done;
  
  	/*
  	 * Optimize for the case when we have no idle CPUs or only one
  	 * idle CPU. Don't walk the sched_domain hierarchy in such cases
  	 */
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4590
  	if (cpumask_weight(nohz.idle_cpus_mask) < 2)
1e3c88bde   Peter Zijlstra   sched: Move load ...
4591
  		goto out_done;
dce840a08   Peter Zijlstra   sched: Dynamicall...
4592
  	rcu_read_lock();
1e3c88bde   Peter Zijlstra   sched: Move load ...
4593
  	for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
786d6dc7a   Suresh Siddha   sched, nohz: Clea...
4594
  		ilbg = sd->groups;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4595
4596
  
  		do {
786d6dc7a   Suresh Siddha   sched, nohz: Clea...
4597
4598
4599
4600
  			if (ilbg->group_weight !=
  				atomic_read(&ilbg->sgp->nr_busy_cpus)) {
  				ilb = cpumask_first_and(nohz.idle_cpus_mask,
  							sched_group_cpus(ilbg));
dce840a08   Peter Zijlstra   sched: Dynamicall...
4601
4602
  				goto unlock;
  			}
1e3c88bde   Peter Zijlstra   sched: Move load ...
4603

786d6dc7a   Suresh Siddha   sched, nohz: Clea...
4604
  			ilbg = ilbg->next;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4605

786d6dc7a   Suresh Siddha   sched, nohz: Clea...
4606
  		} while (ilbg != sd->groups);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4607
  	}
dce840a08   Peter Zijlstra   sched: Dynamicall...
4608
4609
  unlock:
  	rcu_read_unlock();
1e3c88bde   Peter Zijlstra   sched: Move load ...
4610
4611
  
  out_done:
786d6dc7a   Suresh Siddha   sched, nohz: Clea...
4612
4613
4614
4615
  	if (ilb < nr_cpu_ids && idle_cpu(ilb))
  		return ilb;
  
  	return nr_cpu_ids;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4616
4617
4618
4619
  }
  #else /*  (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
  static inline int find_new_ilb(int call_cpu)
  {
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4620
  	return nr_cpu_ids;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4621
4622
4623
4624
  }
  #endif
  
  /*
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4625
4626
4627
4628
4629
4630
4631
4632
4633
   * Kick a CPU to do the nohz balancing, if it is time for it. We pick the
   * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
   * CPU (if there is one).
   */
  static void nohz_balancer_kick(int cpu)
  {
  	int ilb_cpu;
  
  	nohz.next_balance++;
0b005cf54   Suresh Siddha   sched, nohz: Impl...
4634
  	ilb_cpu = find_new_ilb(cpu);
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4635

0b005cf54   Suresh Siddha   sched, nohz: Impl...
4636
4637
  	if (ilb_cpu >= nr_cpu_ids)
  		return;
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4638

cd490c5b2   Suresh Siddha   sched, nohz: Set ...
4639
  	if (test_and_set_bit(NOHZ_BALANCE_KICK, nohz_flags(ilb_cpu)))
1c792db7f   Suresh Siddha   sched, nohz: Intr...
4640
4641
4642
4643
4644
4645
4646
4647
  		return;
  	/*
  	 * Use smp_send_reschedule() instead of resched_cpu().
  	 * This way we generate a sched IPI on the target cpu which
  	 * is idle. And the softirq performing nohz idle load balance
  	 * will be run before returning from the IPI.
  	 */
  	smp_send_reschedule(ilb_cpu);
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4648
4649
  	return;
  }
69e1e811d   Suresh Siddha   sched, nohz: Trac...
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
  static inline void set_cpu_sd_state_busy(void)
  {
  	struct sched_domain *sd;
  	int cpu = smp_processor_id();
  
  	if (!test_bit(NOHZ_IDLE, nohz_flags(cpu)))
  		return;
  	clear_bit(NOHZ_IDLE, nohz_flags(cpu));
  
  	rcu_read_lock();
  	for_each_domain(cpu, sd)
  		atomic_inc(&sd->groups->sgp->nr_busy_cpus);
  	rcu_read_unlock();
  }
  
  void set_cpu_sd_state_idle(void)
  {
  	struct sched_domain *sd;
  	int cpu = smp_processor_id();
  
  	if (test_bit(NOHZ_IDLE, nohz_flags(cpu)))
  		return;
  	set_bit(NOHZ_IDLE, nohz_flags(cpu));
  
  	rcu_read_lock();
  	for_each_domain(cpu, sd)
  		atomic_dec(&sd->groups->sgp->nr_busy_cpus);
  	rcu_read_unlock();
  }
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4679
  /*
0b005cf54   Suresh Siddha   sched, nohz: Impl...
4680
4681
   * This routine will record that this cpu is going idle with tick stopped.
   * This info will be used in performing idle load balancing in the future.
1e3c88bde   Peter Zijlstra   sched: Move load ...
4682
   */
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4683
  void select_nohz_load_balancer(int stop_tick)
1e3c88bde   Peter Zijlstra   sched: Move load ...
4684
4685
4686
4687
  {
  	int cpu = smp_processor_id();
  
  	if (stop_tick) {
0b005cf54   Suresh Siddha   sched, nohz: Impl...
4688
  		if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4689
  			return;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4690

83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4691
  		cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
0b005cf54   Suresh Siddha   sched, nohz: Impl...
4692
  		atomic_inc(&nohz.nr_cpus);
1c792db7f   Suresh Siddha   sched, nohz: Intr...
4693
  		set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
1e3c88bde   Peter Zijlstra   sched: Move load ...
4694
  	}
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4695
  	return;
1e3c88bde   Peter Zijlstra   sched: Move load ...
4696
4697
4698
4699
  }
  #endif
  
  static DEFINE_SPINLOCK(balancing);
49c022e65   Peter Zijlstra   sched: Clean up r...
4700
4701
4702
4703
4704
4705
  static unsigned long __read_mostly max_load_balance_interval = HZ/10;
  
  /*
   * Scale the max load_balance interval with the number of CPUs in the system.
   * This trades load-balance latency on larger machines for less cross talk.
   */
029632fbb   Peter Zijlstra   sched: Make separ...
4706
  void update_max_interval(void)
49c022e65   Peter Zijlstra   sched: Clean up r...
4707
4708
4709
  {
  	max_load_balance_interval = HZ*num_online_cpus()/10;
  }
1e3c88bde   Peter Zijlstra   sched: Move load ...
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
  /*
   * It checks each scheduling domain to see if it is due to be balanced,
   * and initiates a balancing operation if so.
   *
   * Balancing parameters are set up in arch_init_sched_domains.
   */
  static void rebalance_domains(int cpu, enum cpu_idle_type idle)
  {
  	int balance = 1;
  	struct rq *rq = cpu_rq(cpu);
  	unsigned long interval;
  	struct sched_domain *sd;
  	/* Earliest time when we have to do rebalance again */
  	unsigned long next_balance = jiffies + 60*HZ;
  	int update_next_balance = 0;
  	int need_serialize;
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
4726
  	update_shares(cpu);
dce840a08   Peter Zijlstra   sched: Dynamicall...
4727
  	rcu_read_lock();
1e3c88bde   Peter Zijlstra   sched: Move load ...
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
  	for_each_domain(cpu, sd) {
  		if (!(sd->flags & SD_LOAD_BALANCE))
  			continue;
  
  		interval = sd->balance_interval;
  		if (idle != CPU_IDLE)
  			interval *= sd->busy_factor;
  
  		/* scale ms to jiffies */
  		interval = msecs_to_jiffies(interval);
49c022e65   Peter Zijlstra   sched: Clean up r...
4738
  		interval = clamp(interval, 1UL, max_load_balance_interval);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
  
  		need_serialize = sd->flags & SD_SERIALIZE;
  
  		if (need_serialize) {
  			if (!spin_trylock(&balancing))
  				goto out;
  		}
  
  		if (time_after_eq(jiffies, sd->last_balance + interval)) {
  			if (load_balance(cpu, rq, sd, idle, &balance)) {
  				/*
  				 * We've pulled tasks over so either we're no
c186fafe9   Peter Zijlstra   sched: Clean up r...
4751
  				 * longer idle.
1e3c88bde   Peter Zijlstra   sched: Move load ...
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
  				 */
  				idle = CPU_NOT_IDLE;
  			}
  			sd->last_balance = jiffies;
  		}
  		if (need_serialize)
  			spin_unlock(&balancing);
  out:
  		if (time_after(next_balance, sd->last_balance + interval)) {
  			next_balance = sd->last_balance + interval;
  			update_next_balance = 1;
  		}
  
  		/*
  		 * Stop the load balance at this level. There is another
  		 * CPU in our sched group which is doing load balancing more
  		 * actively.
  		 */
  		if (!balance)
  			break;
  	}
dce840a08   Peter Zijlstra   sched: Dynamicall...
4773
  	rcu_read_unlock();
1e3c88bde   Peter Zijlstra   sched: Move load ...
4774
4775
4776
4777
4778
4779
4780
4781
4782
  
  	/*
  	 * next_balance will be updated only when there is a need.
  	 * When the cpu is attached to null domain for ex, it will not be
  	 * updated.
  	 */
  	if (likely(update_next_balance))
  		rq->next_balance = next_balance;
  }
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4783
  #ifdef CONFIG_NO_HZ
1e3c88bde   Peter Zijlstra   sched: Move load ...
4784
  /*
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4785
   * In CONFIG_NO_HZ case, the idle balance kickee will do the
1e3c88bde   Peter Zijlstra   sched: Move load ...
4786
4787
   * rebalancing for all the cpus for whom scheduler ticks are stopped.
   */
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4788
4789
4790
4791
4792
  static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
  {
  	struct rq *this_rq = cpu_rq(this_cpu);
  	struct rq *rq;
  	int balance_cpu;
1c792db7f   Suresh Siddha   sched, nohz: Intr...
4793
4794
4795
  	if (idle != CPU_IDLE ||
  	    !test_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu)))
  		goto end;
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4796
4797
  
  	for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
8a6d42d1b   Suresh Siddha   sched, nohz: Fix ...
4798
  		if (balance_cpu == this_cpu || !idle_cpu(balance_cpu))
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4799
4800
4801
4802
4803
4804
4805
  			continue;
  
  		/*
  		 * If this cpu gets work to do, stop the load balancing
  		 * work being done for other cpus. Next load
  		 * balancing owner will pick it up.
  		 */
1c792db7f   Suresh Siddha   sched, nohz: Intr...
4806
  		if (need_resched())
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4807
  			break;
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4808
4809
  
  		raw_spin_lock_irq(&this_rq->lock);
5343bdb8f   Suresh Siddha   sched: Update rq-...
4810
  		update_rq_clock(this_rq);
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
  		update_cpu_load(this_rq);
  		raw_spin_unlock_irq(&this_rq->lock);
  
  		rebalance_domains(balance_cpu, CPU_IDLE);
  
  		rq = cpu_rq(balance_cpu);
  		if (time_after(this_rq->next_balance, rq->next_balance))
  			this_rq->next_balance = rq->next_balance;
  	}
  	nohz.next_balance = this_rq->next_balance;
1c792db7f   Suresh Siddha   sched, nohz: Intr...
4821
4822
  end:
  	clear_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu));
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4823
4824
4825
  }
  
  /*
0b005cf54   Suresh Siddha   sched, nohz: Impl...
4826
4827
4828
4829
4830
4831
4832
   * Current heuristic for kicking the idle load balancer in the presence
   * of an idle cpu is the system.
   *   - This rq has more than one task.
   *   - At any scheduler domain level, this cpu's scheduler group has multiple
   *     busy cpu's exceeding the group's power.
   *   - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
   *     domain span are idle.
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4833
4834
4835
4836
   */
  static inline int nohz_kick_needed(struct rq *rq, int cpu)
  {
  	unsigned long now = jiffies;
0b005cf54   Suresh Siddha   sched, nohz: Impl...
4837
  	struct sched_domain *sd;
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4838

1c792db7f   Suresh Siddha   sched, nohz: Intr...
4839
  	if (unlikely(idle_cpu(cpu)))
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4840
  		return 0;
1c792db7f   Suresh Siddha   sched, nohz: Intr...
4841
4842
4843
4844
         /*
  	* We may be recently in ticked or tickless idle mode. At the first
  	* busy tick after returning from idle, we will update the busy stats.
  	*/
69e1e811d   Suresh Siddha   sched, nohz: Trac...
4845
  	set_cpu_sd_state_busy();
0b005cf54   Suresh Siddha   sched, nohz: Impl...
4846
  	if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
1c792db7f   Suresh Siddha   sched, nohz: Intr...
4847
  		clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
0b005cf54   Suresh Siddha   sched, nohz: Impl...
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
  		cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
  		atomic_dec(&nohz.nr_cpus);
  	}
  
  	/*
  	 * None are in tickless mode and hence no need for NOHZ idle load
  	 * balancing.
  	 */
  	if (likely(!atomic_read(&nohz.nr_cpus)))
  		return 0;
1c792db7f   Suresh Siddha   sched, nohz: Intr...
4858
4859
  
  	if (time_before(now, nohz.next_balance))
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4860
  		return 0;
0b005cf54   Suresh Siddha   sched, nohz: Impl...
4861
4862
  	if (rq->nr_running >= 2)
  		goto need_kick;
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4863

067491b73   Peter Zijlstra   sched, nohz: Fix ...
4864
  	rcu_read_lock();
0b005cf54   Suresh Siddha   sched, nohz: Impl...
4865
4866
4867
4868
  	for_each_domain(cpu, sd) {
  		struct sched_group *sg = sd->groups;
  		struct sched_group_power *sgp = sg->sgp;
  		int nr_busy = atomic_read(&sgp->nr_busy_cpus);
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4869

0b005cf54   Suresh Siddha   sched, nohz: Impl...
4870
  		if (sd->flags & SD_SHARE_PKG_RESOURCES && nr_busy > 1)
067491b73   Peter Zijlstra   sched, nohz: Fix ...
4871
  			goto need_kick_unlock;
0b005cf54   Suresh Siddha   sched, nohz: Impl...
4872
4873
4874
4875
  
  		if (sd->flags & SD_ASYM_PACKING && nr_busy != sg->group_weight
  		    && (cpumask_first_and(nohz.idle_cpus_mask,
  					  sched_domain_span(sd)) < cpu))
067491b73   Peter Zijlstra   sched, nohz: Fix ...
4876
  			goto need_kick_unlock;
0b005cf54   Suresh Siddha   sched, nohz: Impl...
4877
4878
4879
  
  		if (!(sd->flags & (SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING)))
  			break;
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4880
  	}
067491b73   Peter Zijlstra   sched, nohz: Fix ...
4881
  	rcu_read_unlock();
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4882
  	return 0;
067491b73   Peter Zijlstra   sched, nohz: Fix ...
4883
4884
4885
  
  need_kick_unlock:
  	rcu_read_unlock();
0b005cf54   Suresh Siddha   sched, nohz: Impl...
4886
4887
  need_kick:
  	return 1;
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4888
4889
4890
4891
4892
4893
4894
4895
4896
  }
  #else
  static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
  #endif
  
  /*
   * run_rebalance_domains is triggered when needed from the scheduler tick.
   * Also triggered for nohz idle balancing (with nohz_balancing_kick set).
   */
1e3c88bde   Peter Zijlstra   sched: Move load ...
4897
4898
4899
4900
  static void run_rebalance_domains(struct softirq_action *h)
  {
  	int this_cpu = smp_processor_id();
  	struct rq *this_rq = cpu_rq(this_cpu);
6eb57e0d6   Suresh Siddha   sched: Request fo...
4901
  	enum cpu_idle_type idle = this_rq->idle_balance ?
1e3c88bde   Peter Zijlstra   sched: Move load ...
4902
4903
4904
  						CPU_IDLE : CPU_NOT_IDLE;
  
  	rebalance_domains(this_cpu, idle);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4905
  	/*
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4906
  	 * If this cpu has a pending nohz_balance_kick, then do the
1e3c88bde   Peter Zijlstra   sched: Move load ...
4907
4908
4909
  	 * balancing on behalf of the other idle cpus whose ticks are
  	 * stopped.
  	 */
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4910
  	nohz_idle_balance(this_cpu, idle);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4911
4912
4913
4914
  }
  
  static inline int on_null_domain(int cpu)
  {
90a6501f9   Paul E. McKenney   sched, rcu: Fix r...
4915
  	return !rcu_dereference_sched(cpu_rq(cpu)->sd);
1e3c88bde   Peter Zijlstra   sched: Move load ...
4916
4917
4918
4919
  }
  
  /*
   * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing.
1e3c88bde   Peter Zijlstra   sched: Move load ...
4920
   */
029632fbb   Peter Zijlstra   sched: Make separ...
4921
  void trigger_load_balance(struct rq *rq, int cpu)
1e3c88bde   Peter Zijlstra   sched: Move load ...
4922
  {
1e3c88bde   Peter Zijlstra   sched: Move load ...
4923
4924
4925
4926
  	/* Don't need to rebalance while attached to NULL domain */
  	if (time_after_eq(jiffies, rq->next_balance) &&
  	    likely(!on_null_domain(cpu)))
  		raise_softirq(SCHED_SOFTIRQ);
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4927
  #ifdef CONFIG_NO_HZ
1c792db7f   Suresh Siddha   sched, nohz: Intr...
4928
  	if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu)))
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
4929
4930
  		nohz_balancer_kick(cpu);
  #endif
1e3c88bde   Peter Zijlstra   sched: Move load ...
4931
  }
0bcdcf28c   Christian Ehrhardt   sched: Fix missin...
4932
4933
4934
4935
4936
4937
4938
4939
4940
  static void rq_online_fair(struct rq *rq)
  {
  	update_sysctl();
  }
  
  static void rq_offline_fair(struct rq *rq)
  {
  	update_sysctl();
  }
55e12e5e7   Dhaval Giani   sched: make sched...
4941
  #endif /* CONFIG_SMP */
e1d1484f7   Peter Williams   sched: reduce bal...
4942

bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4943
4944
4945
  /*
   * scheduler tick hitting a task of our scheduling class:
   */
8f4d37ec0   Peter Zijlstra   sched: high-res p...
4946
  static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4947
4948
4949
4950
4951
4952
  {
  	struct cfs_rq *cfs_rq;
  	struct sched_entity *se = &curr->se;
  
  	for_each_sched_entity(se) {
  		cfs_rq = cfs_rq_of(se);
8f4d37ec0   Peter Zijlstra   sched: high-res p...
4953
  		entity_tick(cfs_rq, se, queued);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4954
4955
4956
4957
  	}
  }
  
  /*
cd29fe6f2   Peter Zijlstra   sched: Sanitize f...
4958
4959
4960
   * called on fork with the child task as argument from the parent's context
   *  - child not yet on the tasklist
   *  - preemption disabled
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4961
   */
cd29fe6f2   Peter Zijlstra   sched: Sanitize f...
4962
  static void task_fork_fair(struct task_struct *p)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4963
  {
4fc420c91   Daisuke Nishimura   sched: Fix cgroup...
4964
4965
  	struct cfs_rq *cfs_rq;
  	struct sched_entity *se = &p->se, *curr;
00bf7bfc2   Ingo Molnar   sched: fix: move ...
4966
  	int this_cpu = smp_processor_id();
cd29fe6f2   Peter Zijlstra   sched: Sanitize f...
4967
4968
  	struct rq *rq = this_rq();
  	unsigned long flags;
05fa785cf   Thomas Gleixner   sched: Convert rq...
4969
  	raw_spin_lock_irqsave(&rq->lock, flags);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4970

861d034ee   Peter Zijlstra   sched: Fix rq->cl...
4971
  	update_rq_clock(rq);
4fc420c91   Daisuke Nishimura   sched: Fix cgroup...
4972
4973
  	cfs_rq = task_cfs_rq(current);
  	curr = cfs_rq->curr;
b0a0f667a   Paul E. McKenney   sched: suppress R...
4974
4975
  	if (unlikely(task_cpu(p) != this_cpu)) {
  		rcu_read_lock();
cd29fe6f2   Peter Zijlstra   sched: Sanitize f...
4976
  		__set_task_cpu(p, this_cpu);
b0a0f667a   Paul E. McKenney   sched: suppress R...
4977
4978
  		rcu_read_unlock();
  	}
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4979

7109c4429   Ting Yang   sched: call updat...
4980
  	update_curr(cfs_rq);
cd29fe6f2   Peter Zijlstra   sched: Sanitize f...
4981

b5d9d734a   Mike Galbraith   sched: Ensure tha...
4982
4983
  	if (curr)
  		se->vruntime = curr->vruntime;
aeb73b040   Peter Zijlstra   sched: clean up n...
4984
  	place_entity(cfs_rq, se, 1);
4d78e7b65   Peter Zijlstra   sched: new task p...
4985

cd29fe6f2   Peter Zijlstra   sched: Sanitize f...
4986
  	if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
87fefa381   Dmitry Adamushko   sched: optimize t...
4987
  		/*
edcb60a30   Ingo Molnar   sched: kernel/sch...
4988
4989
4990
  		 * Upon rescheduling, sched_class::put_prev_task() will place
  		 * 'current' within the tree based on its new key value.
  		 */
4d78e7b65   Peter Zijlstra   sched: new task p...
4991
  		swap(curr->vruntime, se->vruntime);
aec0a5142   Bharata B Rao   sched: call resch...
4992
  		resched_task(rq->curr);
4d78e7b65   Peter Zijlstra   sched: new task p...
4993
  	}
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4994

88ec22d3e   Peter Zijlstra   sched: Remove the...
4995
  	se->vruntime -= cfs_rq->min_vruntime;
05fa785cf   Thomas Gleixner   sched: Convert rq...
4996
  	raw_spin_unlock_irqrestore(&rq->lock, flags);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4997
  }
cb4698450   Steven Rostedt   sched: RT-balance...
4998
4999
5000
5001
  /*
   * Priority of the task has changed. Check to see if we preempt
   * the current task.
   */
da7a735e5   Peter Zijlstra   sched: Fix switch...
5002
5003
  static void
  prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
cb4698450   Steven Rostedt   sched: RT-balance...
5004
  {
da7a735e5   Peter Zijlstra   sched: Fix switch...
5005
5006
  	if (!p->se.on_rq)
  		return;
cb4698450   Steven Rostedt   sched: RT-balance...
5007
5008
5009
5010
5011
  	/*
  	 * Reschedule if we are currently running on this runqueue and
  	 * our priority decreased, or if we are not currently running on
  	 * this runqueue and our priority is higher than the current's
  	 */
da7a735e5   Peter Zijlstra   sched: Fix switch...
5012
  	if (rq->curr == p) {
cb4698450   Steven Rostedt   sched: RT-balance...
5013
5014
5015
  		if (p->prio > oldprio)
  			resched_task(rq->curr);
  	} else
15afe09bf   Peter Zijlstra   sched: wakeup pre...
5016
  		check_preempt_curr(rq, p, 0);
cb4698450   Steven Rostedt   sched: RT-balance...
5017
  }
da7a735e5   Peter Zijlstra   sched: Fix switch...
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
  static void switched_from_fair(struct rq *rq, struct task_struct *p)
  {
  	struct sched_entity *se = &p->se;
  	struct cfs_rq *cfs_rq = cfs_rq_of(se);
  
  	/*
  	 * Ensure the task's vruntime is normalized, so that when its
  	 * switched back to the fair class the enqueue_entity(.flags=0) will
  	 * do the right thing.
  	 *
  	 * If it was on_rq, then the dequeue_entity(.flags=0) will already
  	 * have normalized the vruntime, if it was !on_rq, then only when
  	 * the task is sleeping will it still have non-normalized vruntime.
  	 */
  	if (!se->on_rq && p->state != TASK_RUNNING) {
  		/*
  		 * Fix up our vruntime so that the current sleep doesn't
  		 * cause 'unlimited' sleep bonus.
  		 */
  		place_entity(cfs_rq, se, 0);
  		se->vruntime -= cfs_rq->min_vruntime;
  	}
  }
cb4698450   Steven Rostedt   sched: RT-balance...
5041
5042
5043
  /*
   * We switched to the sched_fair class.
   */
da7a735e5   Peter Zijlstra   sched: Fix switch...
5044
  static void switched_to_fair(struct rq *rq, struct task_struct *p)
cb4698450   Steven Rostedt   sched: RT-balance...
5045
  {
da7a735e5   Peter Zijlstra   sched: Fix switch...
5046
5047
  	if (!p->se.on_rq)
  		return;
cb4698450   Steven Rostedt   sched: RT-balance...
5048
5049
5050
5051
5052
  	/*
  	 * We were most likely switched from sched_rt, so
  	 * kick off the schedule if running, otherwise just see
  	 * if we can still preempt the current task.
  	 */
da7a735e5   Peter Zijlstra   sched: Fix switch...
5053
  	if (rq->curr == p)
cb4698450   Steven Rostedt   sched: RT-balance...
5054
5055
  		resched_task(rq->curr);
  	else
15afe09bf   Peter Zijlstra   sched: wakeup pre...
5056
  		check_preempt_curr(rq, p, 0);
cb4698450   Steven Rostedt   sched: RT-balance...
5057
  }
83b699ed2   Srivatsa Vaddagiri   sched: revert rec...
5058
5059
5060
5061
5062
5063
5064
5065
  /* Account for a task changing its policy or group.
   *
   * This routine is mostly called to set cfs_rq->curr field when a task
   * migrates between groups/classes.
   */
  static void set_curr_task_fair(struct rq *rq)
  {
  	struct sched_entity *se = &rq->curr->se;
ec12cb7f3   Paul Turner   sched: Accumulate...
5066
5067
5068
5069
5070
5071
5072
  	for_each_sched_entity(se) {
  		struct cfs_rq *cfs_rq = cfs_rq_of(se);
  
  		set_next_entity(cfs_rq, se);
  		/* ensure bandwidth has been allocated on our new cfs_rq */
  		account_cfs_rq_runtime(cfs_rq, 0);
  	}
83b699ed2   Srivatsa Vaddagiri   sched: revert rec...
5073
  }
029632fbb   Peter Zijlstra   sched: Make separ...
5074
5075
5076
5077
5078
5079
5080
5081
5082
  void init_cfs_rq(struct cfs_rq *cfs_rq)
  {
  	cfs_rq->tasks_timeline = RB_ROOT;
  	INIT_LIST_HEAD(&cfs_rq->tasks);
  	cfs_rq->min_vruntime = (u64)(-(1LL << 20));
  #ifndef CONFIG_64BIT
  	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
  #endif
  }
810b38179   Peter Zijlstra   sched: retain vru...
5083
  #ifdef CONFIG_FAIR_GROUP_SCHED
b2b5ce022   Peter Zijlstra   sched, cgroup: Fi...
5084
  static void task_move_group_fair(struct task_struct *p, int on_rq)
810b38179   Peter Zijlstra   sched: retain vru...
5085
  {
b2b5ce022   Peter Zijlstra   sched, cgroup: Fi...
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
  	/*
  	 * If the task was not on the rq at the time of this cgroup movement
  	 * it must have been asleep, sleeping tasks keep their ->vruntime
  	 * absolute on their old rq until wakeup (needed for the fair sleeper
  	 * bonus in place_entity()).
  	 *
  	 * If it was on the rq, we've just 'preempted' it, which does convert
  	 * ->vruntime to a relative base.
  	 *
  	 * Make sure both cases convert their relative position when migrating
  	 * to another cgroup's rq. This does somewhat interfere with the
  	 * fair sleeper stuff for the first placement, but who cares.
  	 */
7ceff013c   Daisuke Nishimura   sched: Fix cgroup...
5099
5100
5101
5102
5103
5104
  	/*
  	 * When !on_rq, vruntime of the task has usually NOT been normalized.
  	 * But there are some cases where it has already been normalized:
  	 *
  	 * - Moving a forked child which is waiting for being woken up by
  	 *   wake_up_new_task().
62af3783e   Daisuke Nishimura   sched: Fix cgroup...
5105
5106
  	 * - Moving a task which has been woken up by try_to_wake_up() and
  	 *   waiting for actually being woken up by sched_ttwu_pending().
7ceff013c   Daisuke Nishimura   sched: Fix cgroup...
5107
5108
5109
5110
  	 *
  	 * To prevent boost or penalty in the new cfs_rq caused by delta
  	 * min_vruntime between the two cfs_rqs, we skip vruntime adjustment.
  	 */
62af3783e   Daisuke Nishimura   sched: Fix cgroup...
5111
  	if (!on_rq && (!p->se.sum_exec_runtime || p->state == TASK_WAKING))
7ceff013c   Daisuke Nishimura   sched: Fix cgroup...
5112
  		on_rq = 1;
b2b5ce022   Peter Zijlstra   sched, cgroup: Fi...
5113
5114
5115
  	if (!on_rq)
  		p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime;
  	set_task_rq(p, task_cpu(p));
88ec22d3e   Peter Zijlstra   sched: Remove the...
5116
  	if (!on_rq)
b2b5ce022   Peter Zijlstra   sched, cgroup: Fi...
5117
  		p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime;
810b38179   Peter Zijlstra   sched: retain vru...
5118
  }
029632fbb   Peter Zijlstra   sched: Make separ...
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
  
  void free_fair_sched_group(struct task_group *tg)
  {
  	int i;
  
  	destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
  
  	for_each_possible_cpu(i) {
  		if (tg->cfs_rq)
  			kfree(tg->cfs_rq[i]);
  		if (tg->se)
  			kfree(tg->se[i]);
  	}
  
  	kfree(tg->cfs_rq);
  	kfree(tg->se);
  }
  
  int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
  {
  	struct cfs_rq *cfs_rq;
  	struct sched_entity *se;
  	int i;
  
  	tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
  	if (!tg->cfs_rq)
  		goto err;
  	tg->se = kzalloc(sizeof(se) * nr_cpu_ids, GFP_KERNEL);
  	if (!tg->se)
  		goto err;
  
  	tg->shares = NICE_0_LOAD;
  
  	init_cfs_bandwidth(tg_cfs_bandwidth(tg));
  
  	for_each_possible_cpu(i) {
  		cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
  				      GFP_KERNEL, cpu_to_node(i));
  		if (!cfs_rq)
  			goto err;
  
  		se = kzalloc_node(sizeof(struct sched_entity),
  				  GFP_KERNEL, cpu_to_node(i));
  		if (!se)
  			goto err_free_rq;
  
  		init_cfs_rq(cfs_rq);
  		init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
  	}
  
  	return 1;
  
  err_free_rq:
  	kfree(cfs_rq);
  err:
  	return 0;
  }
  
  void unregister_fair_sched_group(struct task_group *tg, int cpu)
  {
  	struct rq *rq = cpu_rq(cpu);
  	unsigned long flags;
  
  	/*
  	* Only empty task groups can be destroyed; so we can speculatively
  	* check on_list without danger of it being re-added.
  	*/
  	if (!tg->cfs_rq[cpu]->on_list)
  		return;
  
  	raw_spin_lock_irqsave(&rq->lock, flags);
  	list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
  	raw_spin_unlock_irqrestore(&rq->lock, flags);
  }
  
  void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
  			struct sched_entity *se, int cpu,
  			struct sched_entity *parent)
  {
  	struct rq *rq = cpu_rq(cpu);
  
  	cfs_rq->tg = tg;
  	cfs_rq->rq = rq;
  #ifdef CONFIG_SMP
  	/* allow initial update_cfs_load() to truncate */
  	cfs_rq->load_stamp = 1;
810b38179   Peter Zijlstra   sched: retain vru...
5205
  #endif
029632fbb   Peter Zijlstra   sched: Make separ...
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
  	init_cfs_rq_runtime(cfs_rq);
  
  	tg->cfs_rq[cpu] = cfs_rq;
  	tg->se[cpu] = se;
  
  	/* se could be NULL for root_task_group */
  	if (!se)
  		return;
  
  	if (!parent)
  		se->cfs_rq = &rq->cfs;
  	else
  		se->cfs_rq = parent->my_q;
  
  	se->my_q = cfs_rq;
  	update_load_set(&se->load, 0);
  	se->parent = parent;
  }
  
  static DEFINE_MUTEX(shares_mutex);
  
  int sched_group_set_shares(struct task_group *tg, unsigned long shares)
  {
  	int i;
  	unsigned long flags;
  
  	/*
  	 * We can't change the weight of the root cgroup.
  	 */
  	if (!tg->se[0])
  		return -EINVAL;
  
  	shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES));
  
  	mutex_lock(&shares_mutex);
  	if (tg->shares == shares)
  		goto done;
  
  	tg->shares = shares;
  	for_each_possible_cpu(i) {
  		struct rq *rq = cpu_rq(i);
  		struct sched_entity *se;
  
  		se = tg->se[i];
  		/* Propagate contribution to hierarchy */
  		raw_spin_lock_irqsave(&rq->lock, flags);
  		for_each_sched_entity(se)
  			update_cfs_shares(group_cfs_rq(se));
  		raw_spin_unlock_irqrestore(&rq->lock, flags);
  	}
  
  done:
  	mutex_unlock(&shares_mutex);
  	return 0;
  }
  #else /* CONFIG_FAIR_GROUP_SCHED */
  
  void free_fair_sched_group(struct task_group *tg) { }
  
  int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
  {
  	return 1;
  }
  
  void unregister_fair_sched_group(struct task_group *tg, int cpu) { }
  
  #endif /* CONFIG_FAIR_GROUP_SCHED */
810b38179   Peter Zijlstra   sched: retain vru...
5273

6d686f456   H Hartley Sweeten   sched: Don't expo...
5274
  static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
0d721cead   Peter Williams   sched: Simplify s...
5275
5276
  {
  	struct sched_entity *se = &task->se;
0d721cead   Peter Williams   sched: Simplify s...
5277
5278
5279
5280
5281
5282
  	unsigned int rr_interval = 0;
  
  	/*
  	 * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
  	 * idle runqueue:
  	 */
0d721cead   Peter Williams   sched: Simplify s...
5283
5284
  	if (rq->cfs.load.weight)
  		rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
0d721cead   Peter Williams   sched: Simplify s...
5285
5286
5287
  
  	return rr_interval;
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
5288
5289
5290
  /*
   * All the scheduling class methods:
   */
029632fbb   Peter Zijlstra   sched: Make separ...
5291
  const struct sched_class fair_sched_class = {
5522d5d5f   Ingo Molnar   sched: mark sched...
5292
  	.next			= &idle_sched_class,
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
5293
5294
5295
  	.enqueue_task		= enqueue_task_fair,
  	.dequeue_task		= dequeue_task_fair,
  	.yield_task		= yield_task_fair,
d95f41220   Mike Galbraith   sched: Add yield_...
5296
  	.yield_to_task		= yield_to_task_fair,
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
5297

2e09bf556   Ingo Molnar   sched: wakeup gra...
5298
  	.check_preempt_curr	= check_preempt_wakeup,
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
5299
5300
5301
  
  	.pick_next_task		= pick_next_task_fair,
  	.put_prev_task		= put_prev_task_fair,
681f3e685   Peter Williams   sched: isolate SM...
5302
  #ifdef CONFIG_SMP
4ce72a2c0   Li Zefan   sched: add CONFIG...
5303
  	.select_task_rq		= select_task_rq_fair,
0bcdcf28c   Christian Ehrhardt   sched: Fix missin...
5304
5305
  	.rq_online		= rq_online_fair,
  	.rq_offline		= rq_offline_fair,
88ec22d3e   Peter Zijlstra   sched: Remove the...
5306
5307
  
  	.task_waking		= task_waking_fair,
681f3e685   Peter Williams   sched: isolate SM...
5308
  #endif
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
5309

83b699ed2   Srivatsa Vaddagiri   sched: revert rec...
5310
  	.set_curr_task          = set_curr_task_fair,
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
5311
  	.task_tick		= task_tick_fair,
cd29fe6f2   Peter Zijlstra   sched: Sanitize f...
5312
  	.task_fork		= task_fork_fair,
cb4698450   Steven Rostedt   sched: RT-balance...
5313
5314
  
  	.prio_changed		= prio_changed_fair,
da7a735e5   Peter Zijlstra   sched: Fix switch...
5315
  	.switched_from		= switched_from_fair,
cb4698450   Steven Rostedt   sched: RT-balance...
5316
  	.switched_to		= switched_to_fair,
810b38179   Peter Zijlstra   sched: retain vru...
5317

0d721cead   Peter Williams   sched: Simplify s...
5318
  	.get_rr_interval	= get_rr_interval_fair,
810b38179   Peter Zijlstra   sched: retain vru...
5319
  #ifdef CONFIG_FAIR_GROUP_SCHED
b2b5ce022   Peter Zijlstra   sched, cgroup: Fi...
5320
  	.task_move_group	= task_move_group_fair,
810b38179   Peter Zijlstra   sched: retain vru...
5321
  #endif
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
5322
5323
5324
  };
  
  #ifdef CONFIG_SCHED_DEBUG
029632fbb   Peter Zijlstra   sched: Make separ...
5325
  void print_cfs_stats(struct seq_file *m, int cpu)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
5326
  {
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
5327
  	struct cfs_rq *cfs_rq;
5973e5b95   Peter Zijlstra   sched: fix: don't...
5328
  	rcu_read_lock();
c3b64f1e4   Ingo Molnar   sched: clean up s...
5329
  	for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
5cef9eca3   Ingo Molnar   sched: remove the...
5330
  		print_cfs_rq(m, cpu, cfs_rq);
5973e5b95   Peter Zijlstra   sched: fix: don't...
5331
  	rcu_read_unlock();
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
5332
5333
  }
  #endif
029632fbb   Peter Zijlstra   sched: Make separ...
5334
5335
5336
5337
5338
5339
5340
5341
  
  __init void init_sched_fair_class(void)
  {
  #ifdef CONFIG_SMP
  	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
  
  #ifdef CONFIG_NO_HZ
  	zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
029632fbb   Peter Zijlstra   sched: Make separ...
5342
5343
5344
5345
  #endif
  #endif /* SMP */
  
  }