Blame view

kernel/sched_fair.c 110 KB
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
  /*
   * Completely Fair Scheduling (CFS) Class (SCHED_NORMAL/SCHED_BATCH)
   *
   *  Copyright (C) 2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
   *
   *  Interactivity improvements by Mike Galbraith
   *  (C) 2007 Mike Galbraith <efault@gmx.de>
   *
   *  Various enhancements by Dmitry Adamushko.
   *  (C) 2007 Dmitry Adamushko <dmitry.adamushko@gmail.com>
   *
   *  Group scheduling enhancements by Srivatsa Vaddagiri
   *  Copyright IBM Corporation, 2007
   *  Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
   *
   *  Scaled math optimizations by Thomas Gleixner
   *  Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
218050855   Peter Zijlstra   sched: adaptive s...
18
19
20
   *
   *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
   *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
21
   */
9745512ce   Arjan van de Ven   sched: latencytop...
22
  #include <linux/latencytop.h>
1983a922a   Christian Ehrhardt   sched: Make tunab...
23
  #include <linux/sched.h>
3436ae129   Sisir Koppaka   sched: Fix rebala...
24
  #include <linux/cpumask.h>
9745512ce   Arjan van de Ven   sched: latencytop...
25

bf0f6f24a   Ingo Molnar   sched: cfs core, ...
26
  /*
218050855   Peter Zijlstra   sched: adaptive s...
27
   * Targeted preemption latency for CPU-bound tasks:
864616ee6   Takuya Yoshikawa   sched: Comment up...
28
   * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
29
   *
218050855   Peter Zijlstra   sched: adaptive s...
30
   * NOTE: this latency value is not the same as the concept of
d274a4cee   Ingo Molnar   sched: update com...
31
32
33
   * 'timeslice length' - timeslices in CFS are of variable length
   * and have no persistent notion like in traditional, time-slice
   * based scheduling concepts.
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
34
   *
d274a4cee   Ingo Molnar   sched: update com...
35
36
   * (to see the precise effective timeslice length of your workload,
   *  run vmstat and monitor the context-switches (cs) field)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
37
   */
21406928a   Mike Galbraith   sched: Tweak sche...
38
39
  unsigned int sysctl_sched_latency = 6000000ULL;
  unsigned int normalized_sysctl_sched_latency = 6000000ULL;
2bd8e6d42   Ingo Molnar   sched: use consta...
40
41
  
  /*
1983a922a   Christian Ehrhardt   sched: Make tunab...
42
43
44
45
46
47
48
49
50
51
52
53
   * The initial- and re-scaling of tunables is configurable
   * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
   *
   * Options are:
   * SCHED_TUNABLESCALING_NONE - unscaled, always *1
   * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
   * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
   */
  enum sched_tunable_scaling sysctl_sched_tunable_scaling
  	= SCHED_TUNABLESCALING_LOG;
  
  /*
b2be5e96d   Peter Zijlstra   sched: reintroduc...
54
   * Minimal preemption granularity for CPU-bound tasks:
864616ee6   Takuya Yoshikawa   sched: Comment up...
55
   * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
2bd8e6d42   Ingo Molnar   sched: use consta...
56
   */
0bf377bbb   Ingo Molnar   sched: Improve la...
57
58
  unsigned int sysctl_sched_min_granularity = 750000ULL;
  unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
218050855   Peter Zijlstra   sched: adaptive s...
59
60
  
  /*
b2be5e96d   Peter Zijlstra   sched: reintroduc...
61
62
   * is kept at sysctl_sched_latency / sysctl_sched_min_granularity
   */
0bf377bbb   Ingo Molnar   sched: Improve la...
63
  static unsigned int sched_nr_latency = 8;
b2be5e96d   Peter Zijlstra   sched: reintroduc...
64
65
  
  /*
2bba22c50   Mike Galbraith   sched: Turn off c...
66
   * After fork, child runs first. If set to 0 (default) then
b2be5e96d   Peter Zijlstra   sched: reintroduc...
67
   * parent will (try to) run first.
218050855   Peter Zijlstra   sched: adaptive s...
68
   */
2bba22c50   Mike Galbraith   sched: Turn off c...
69
  unsigned int sysctl_sched_child_runs_first __read_mostly;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
70
71
  
  /*
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
72
   * SCHED_OTHER wake-up granularity.
172e082a9   Mike Galbraith   sched: Re-tune th...
73
   * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
74
75
76
77
78
   *
   * This option delays the preemption effects of decoupled workloads
   * and reduces their over-scheduling. Synchronous workloads will still
   * have immediate wakeup/sleep latencies.
   */
172e082a9   Mike Galbraith   sched: Re-tune th...
79
  unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
0bcdcf28c   Christian Ehrhardt   sched: Fix missin...
80
  unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
81

da84d9617   Ingo Molnar   sched: reintroduc...
82
  const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
a7a4f8a75   Paul Turner   sched: Add sysctl...
83
84
85
86
87
88
  /*
   * The exponential sliding  window over which load is averaged for shares
   * distribution.
   * (default: 10msec)
   */
  unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL;
a4c2f00f5   Peter Zijlstra   sched: fair sched...
89
  static const struct sched_class fair_sched_class;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
90
91
92
  /**************************************************************
   * CFS operations on generic schedulable entities:
   */
62160e3f4   Ingo Molnar   sched: track cfs_...
93
  #ifdef CONFIG_FAIR_GROUP_SCHED
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
94

62160e3f4   Ingo Molnar   sched: track cfs_...
95
  /* cpu runqueue to which this cfs_rq is attached */
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
96
97
  static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
  {
62160e3f4   Ingo Molnar   sched: track cfs_...
98
  	return cfs_rq->rq;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
99
  }
62160e3f4   Ingo Molnar   sched: track cfs_...
100
101
  /* An entity is a task if it doesn't "own" a runqueue */
  #define entity_is_task(se)	(!se->my_q)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
102

8f48894fc   Peter Zijlstra   sched: Add debug ...
103
104
105
106
107
108
109
  static inline struct task_struct *task_of(struct sched_entity *se)
  {
  #ifdef CONFIG_SCHED_DEBUG
  	WARN_ON_ONCE(!entity_is_task(se));
  #endif
  	return container_of(se, struct task_struct, se);
  }
b758149c0   Peter Zijlstra   sched: prepatory ...
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
  /* Walk up scheduling entities hierarchy */
  #define for_each_sched_entity(se) \
  		for (; se; se = se->parent)
  
  static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
  {
  	return p->se.cfs_rq;
  }
  
  /* runqueue on which this entity is (to be) queued */
  static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
  {
  	return se->cfs_rq;
  }
  
  /* runqueue "owned" by this group */
  static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
  {
  	return grp->my_q;
  }
3d4b47b4b   Peter Zijlstra   sched: Implement ...
130
131
132
  static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
  {
  	if (!cfs_rq->on_list) {
67e86250f   Paul Turner   sched: Introduce ...
133
134
135
136
137
138
139
140
141
142
143
144
  		/*
  		 * Ensure we either appear before our parent (if already
  		 * enqueued) or force our parent to appear after us when it is
  		 * enqueued.  The fact that we always enqueue bottom-up
  		 * reduces this to two cases.
  		 */
  		if (cfs_rq->tg->parent &&
  		    cfs_rq->tg->parent->cfs_rq[cpu_of(rq_of(cfs_rq))]->on_list) {
  			list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
  				&rq_of(cfs_rq)->leaf_cfs_rq_list);
  		} else {
  			list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
3d4b47b4b   Peter Zijlstra   sched: Implement ...
145
  				&rq_of(cfs_rq)->leaf_cfs_rq_list);
67e86250f   Paul Turner   sched: Introduce ...
146
  		}
3d4b47b4b   Peter Zijlstra   sched: Implement ...
147
148
149
150
151
152
153
154
155
156
157
158
  
  		cfs_rq->on_list = 1;
  	}
  }
  
  static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
  {
  	if (cfs_rq->on_list) {
  		list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
  		cfs_rq->on_list = 0;
  	}
  }
b758149c0   Peter Zijlstra   sched: prepatory ...
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
  /* Iterate thr' all leaf cfs_rq's on a runqueue */
  #define for_each_leaf_cfs_rq(rq, cfs_rq) \
  	list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
  
  /* Do the two (enqueued) entities belong to the same group ? */
  static inline int
  is_same_group(struct sched_entity *se, struct sched_entity *pse)
  {
  	if (se->cfs_rq == pse->cfs_rq)
  		return 1;
  
  	return 0;
  }
  
  static inline struct sched_entity *parent_entity(struct sched_entity *se)
  {
  	return se->parent;
  }
464b75273   Peter Zijlstra   sched: re-instate...
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
  /* return depth at which a sched entity is present in the hierarchy */
  static inline int depth_se(struct sched_entity *se)
  {
  	int depth = 0;
  
  	for_each_sched_entity(se)
  		depth++;
  
  	return depth;
  }
  
  static void
  find_matching_se(struct sched_entity **se, struct sched_entity **pse)
  {
  	int se_depth, pse_depth;
  
  	/*
  	 * preemption test can be made between sibling entities who are in the
  	 * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
  	 * both tasks until we find their ancestors who are siblings of common
  	 * parent.
  	 */
  
  	/* First walk up until both entities are at same depth */
  	se_depth = depth_se(*se);
  	pse_depth = depth_se(*pse);
  
  	while (se_depth > pse_depth) {
  		se_depth--;
  		*se = parent_entity(*se);
  	}
  
  	while (pse_depth > se_depth) {
  		pse_depth--;
  		*pse = parent_entity(*pse);
  	}
  
  	while (!is_same_group(*se, *pse)) {
  		*se = parent_entity(*se);
  		*pse = parent_entity(*pse);
  	}
  }
8f48894fc   Peter Zijlstra   sched: Add debug ...
219
220
221
222
223
224
  #else	/* !CONFIG_FAIR_GROUP_SCHED */
  
  static inline struct task_struct *task_of(struct sched_entity *se)
  {
  	return container_of(se, struct task_struct, se);
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
225

62160e3f4   Ingo Molnar   sched: track cfs_...
226
227
228
  static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
  {
  	return container_of(cfs_rq, struct rq, cfs);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
229
230
231
  }
  
  #define entity_is_task(se)	1
b758149c0   Peter Zijlstra   sched: prepatory ...
232
233
  #define for_each_sched_entity(se) \
  		for (; se; se = NULL)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
234

b758149c0   Peter Zijlstra   sched: prepatory ...
235
  static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
236
  {
b758149c0   Peter Zijlstra   sched: prepatory ...
237
  	return &task_rq(p)->cfs;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
238
  }
b758149c0   Peter Zijlstra   sched: prepatory ...
239
240
241
242
243
244
245
246
247
248
249
250
251
  static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
  {
  	struct task_struct *p = task_of(se);
  	struct rq *rq = task_rq(p);
  
  	return &rq->cfs;
  }
  
  /* runqueue "owned" by this group */
  static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
  {
  	return NULL;
  }
3d4b47b4b   Peter Zijlstra   sched: Implement ...
252
253
254
255
256
257
258
  static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
  {
  }
  
  static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
  {
  }
b758149c0   Peter Zijlstra   sched: prepatory ...
259
260
261
262
263
264
265
266
267
268
269
270
271
  #define for_each_leaf_cfs_rq(rq, cfs_rq) \
  		for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
  
  static inline int
  is_same_group(struct sched_entity *se, struct sched_entity *pse)
  {
  	return 1;
  }
  
  static inline struct sched_entity *parent_entity(struct sched_entity *se)
  {
  	return NULL;
  }
464b75273   Peter Zijlstra   sched: re-instate...
272
273
274
275
  static inline void
  find_matching_se(struct sched_entity **se, struct sched_entity **pse)
  {
  }
b758149c0   Peter Zijlstra   sched: prepatory ...
276
  #endif	/* CONFIG_FAIR_GROUP_SCHED */
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
277
278
279
280
  
  /**************************************************************
   * Scheduling class tree data structure manipulation methods:
   */
0702e3ebc   Ingo Molnar   sched: cleanup: f...
281
  static inline u64 max_vruntime(u64 min_vruntime, u64 vruntime)
02e0431a3   Peter Zijlstra   sched: better min...
282
  {
368059a97   Peter Zijlstra   sched: max_vrunti...
283
284
  	s64 delta = (s64)(vruntime - min_vruntime);
  	if (delta > 0)
02e0431a3   Peter Zijlstra   sched: better min...
285
286
287
288
  		min_vruntime = vruntime;
  
  	return min_vruntime;
  }
0702e3ebc   Ingo Molnar   sched: cleanup: f...
289
  static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime)
b0ffd246e   Peter Zijlstra   sched: clean up m...
290
291
292
293
294
295
296
  {
  	s64 delta = (s64)(vruntime - min_vruntime);
  	if (delta < 0)
  		min_vruntime = vruntime;
  
  	return min_vruntime;
  }
54fdc5816   Fabio Checconi   sched: Account fo...
297
298
299
300
301
  static inline int entity_before(struct sched_entity *a,
  				struct sched_entity *b)
  {
  	return (s64)(a->vruntime - b->vruntime) < 0;
  }
1af5f730f   Peter Zijlstra   sched: more accur...
302
303
304
305
306
307
308
309
310
311
312
  static void update_min_vruntime(struct cfs_rq *cfs_rq)
  {
  	u64 vruntime = cfs_rq->min_vruntime;
  
  	if (cfs_rq->curr)
  		vruntime = cfs_rq->curr->vruntime;
  
  	if (cfs_rq->rb_leftmost) {
  		struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
  						   struct sched_entity,
  						   run_node);
e17036dac   Peter Zijlstra   sched: fix update...
313
  		if (!cfs_rq->curr)
1af5f730f   Peter Zijlstra   sched: more accur...
314
315
316
317
318
319
  			vruntime = se->vruntime;
  		else
  			vruntime = min_vruntime(vruntime, se->vruntime);
  	}
  
  	cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
3fe1698b7   Peter Zijlstra   sched: Deal with ...
320
321
322
323
  #ifndef CONFIG_64BIT
  	smp_wmb();
  	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
  #endif
1af5f730f   Peter Zijlstra   sched: more accur...
324
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
325
326
327
  /*
   * Enqueue an entity into the rb-tree:
   */
0702e3ebc   Ingo Molnar   sched: cleanup: f...
328
  static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
329
330
331
332
  {
  	struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
  	struct rb_node *parent = NULL;
  	struct sched_entity *entry;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
333
334
335
336
337
338
339
340
341
342
343
344
  	int leftmost = 1;
  
  	/*
  	 * Find the right place in the rbtree:
  	 */
  	while (*link) {
  		parent = *link;
  		entry = rb_entry(parent, struct sched_entity, run_node);
  		/*
  		 * We dont care about collisions. Nodes with
  		 * the same key stay together.
  		 */
2bd2d6f2d   Stephan Baerwolf   sched: Replace us...
345
  		if (entity_before(se, entry)) {
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
346
347
348
349
350
351
352
353
354
355
356
  			link = &parent->rb_left;
  		} else {
  			link = &parent->rb_right;
  			leftmost = 0;
  		}
  	}
  
  	/*
  	 * Maintain a cache of leftmost tree entries (it is frequently
  	 * used):
  	 */
1af5f730f   Peter Zijlstra   sched: more accur...
357
  	if (leftmost)
57cb499df   Ingo Molnar   sched: remove set...
358
  		cfs_rq->rb_leftmost = &se->run_node;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
359
360
361
  
  	rb_link_node(&se->run_node, parent, link);
  	rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
362
  }
0702e3ebc   Ingo Molnar   sched: cleanup: f...
363
  static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
364
  {
3fe69747d   Peter Zijlstra   sched: min_vrunti...
365
366
  	if (cfs_rq->rb_leftmost == &se->run_node) {
  		struct rb_node *next_node;
3fe69747d   Peter Zijlstra   sched: min_vrunti...
367
368
369
  
  		next_node = rb_next(&se->run_node);
  		cfs_rq->rb_leftmost = next_node;
3fe69747d   Peter Zijlstra   sched: min_vrunti...
370
  	}
e9acbff64   Ingo Molnar   sched: introduce ...
371

bf0f6f24a   Ingo Molnar   sched: cfs core, ...
372
  	rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
373
  }
ac53db596   Rik van Riel   sched: Use a budd...
374
  static struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
375
  {
f4b6755fb   Peter Zijlstra   sched: cleanup fa...
376
377
378
379
380
381
  	struct rb_node *left = cfs_rq->rb_leftmost;
  
  	if (!left)
  		return NULL;
  
  	return rb_entry(left, struct sched_entity, run_node);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
382
  }
ac53db596   Rik van Riel   sched: Use a budd...
383
384
385
386
387
388
389
390
391
392
393
  static struct sched_entity *__pick_next_entity(struct sched_entity *se)
  {
  	struct rb_node *next = rb_next(&se->run_node);
  
  	if (!next)
  		return NULL;
  
  	return rb_entry(next, struct sched_entity, run_node);
  }
  
  #ifdef CONFIG_SCHED_DEBUG
f4b6755fb   Peter Zijlstra   sched: cleanup fa...
394
  static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
aeb73b040   Peter Zijlstra   sched: clean up n...
395
  {
7eee3e677   Ingo Molnar   sched: clean up _...
396
  	struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
aeb73b040   Peter Zijlstra   sched: clean up n...
397

70eee74b7   Balbir Singh   sched: remove dup...
398
399
  	if (!last)
  		return NULL;
7eee3e677   Ingo Molnar   sched: clean up _...
400
401
  
  	return rb_entry(last, struct sched_entity, run_node);
aeb73b040   Peter Zijlstra   sched: clean up n...
402
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
403
404
405
  /**************************************************************
   * Scheduling class statistics methods:
   */
acb4a848d   Christian Ehrhardt   sched: Update nor...
406
  int sched_proc_update_handler(struct ctl_table *table, int write,
8d65af789   Alexey Dobriyan   sysctl: remove "s...
407
  		void __user *buffer, size_t *lenp,
b2be5e96d   Peter Zijlstra   sched: reintroduc...
408
409
  		loff_t *ppos)
  {
8d65af789   Alexey Dobriyan   sysctl: remove "s...
410
  	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
acb4a848d   Christian Ehrhardt   sched: Update nor...
411
  	int factor = get_update_sysctl_factor();
b2be5e96d   Peter Zijlstra   sched: reintroduc...
412
413
414
415
416
417
  
  	if (ret || !write)
  		return ret;
  
  	sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,
  					sysctl_sched_min_granularity);
acb4a848d   Christian Ehrhardt   sched: Update nor...
418
419
420
421
422
  #define WRT_SYSCTL(name) \
  	(normalized_sysctl_##name = sysctl_##name / (factor))
  	WRT_SYSCTL(sched_min_granularity);
  	WRT_SYSCTL(sched_latency);
  	WRT_SYSCTL(sched_wakeup_granularity);
acb4a848d   Christian Ehrhardt   sched: Update nor...
423
  #undef WRT_SYSCTL
b2be5e96d   Peter Zijlstra   sched: reintroduc...
424
425
426
  	return 0;
  }
  #endif
647e7cac2   Ingo Molnar   sched: vslice fix...
427
428
  
  /*
f9c0b0950   Peter Zijlstra   sched: revert bac...
429
   * delta /= w
a7be37ac8   Peter Zijlstra   sched: revert the...
430
431
432
433
   */
  static inline unsigned long
  calc_delta_fair(unsigned long delta, struct sched_entity *se)
  {
f9c0b0950   Peter Zijlstra   sched: revert bac...
434
435
  	if (unlikely(se->load.weight != NICE_0_LOAD))
  		delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load);
a7be37ac8   Peter Zijlstra   sched: revert the...
436
437
438
439
440
  
  	return delta;
  }
  
  /*
647e7cac2   Ingo Molnar   sched: vslice fix...
441
442
443
444
445
446
447
   * The idea is to set a period in which each task runs once.
   *
   * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch
   * this period because otherwise the slices get too small.
   *
   * p = (nr <= nl) ? l : l*nr/nl
   */
4d78e7b65   Peter Zijlstra   sched: new task p...
448
449
450
  static u64 __sched_period(unsigned long nr_running)
  {
  	u64 period = sysctl_sched_latency;
b2be5e96d   Peter Zijlstra   sched: reintroduc...
451
  	unsigned long nr_latency = sched_nr_latency;
4d78e7b65   Peter Zijlstra   sched: new task p...
452
453
  
  	if (unlikely(nr_running > nr_latency)) {
4bf0b7715   Peter Zijlstra   sched: remove do_...
454
  		period = sysctl_sched_min_granularity;
4d78e7b65   Peter Zijlstra   sched: new task p...
455
  		period *= nr_running;
4d78e7b65   Peter Zijlstra   sched: new task p...
456
457
458
459
  	}
  
  	return period;
  }
647e7cac2   Ingo Molnar   sched: vslice fix...
460
461
462
463
  /*
   * We calculate the wall-time slice from the period by taking a part
   * proportional to the weight.
   *
f9c0b0950   Peter Zijlstra   sched: revert bac...
464
   * s = p*P[w/rw]
647e7cac2   Ingo Molnar   sched: vslice fix...
465
   */
6d0f0ebd0   Peter Zijlstra   sched: simplify a...
466
  static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
218050855   Peter Zijlstra   sched: adaptive s...
467
  {
0a582440f   Mike Galbraith   sched: fix sched_...
468
  	u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq);
f9c0b0950   Peter Zijlstra   sched: revert bac...
469

0a582440f   Mike Galbraith   sched: fix sched_...
470
  	for_each_sched_entity(se) {
6272d68cc   Lin Ming   sched: sched_slic...
471
  		struct load_weight *load;
3104bf03a   Christian Engelmayer   sched: Fix out of...
472
  		struct load_weight lw;
6272d68cc   Lin Ming   sched: sched_slic...
473
474
475
  
  		cfs_rq = cfs_rq_of(se);
  		load = &cfs_rq->load;
f9c0b0950   Peter Zijlstra   sched: revert bac...
476

0a582440f   Mike Galbraith   sched: fix sched_...
477
  		if (unlikely(!se->on_rq)) {
3104bf03a   Christian Engelmayer   sched: Fix out of...
478
  			lw = cfs_rq->load;
0a582440f   Mike Galbraith   sched: fix sched_...
479
480
481
482
483
484
485
  
  			update_load_add(&lw, se->load.weight);
  			load = &lw;
  		}
  		slice = calc_delta_mine(slice, se->load.weight, load);
  	}
  	return slice;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
486
  }
647e7cac2   Ingo Molnar   sched: vslice fix...
487
  /*
ac884dec6   Peter Zijlstra   sched: fair-group...
488
   * We calculate the vruntime slice of a to be inserted task
647e7cac2   Ingo Molnar   sched: vslice fix...
489
   *
f9c0b0950   Peter Zijlstra   sched: revert bac...
490
   * vs = s/w
647e7cac2   Ingo Molnar   sched: vslice fix...
491
   */
f9c0b0950   Peter Zijlstra   sched: revert bac...
492
  static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
67e9fb2a3   Peter Zijlstra   sched: add vslice
493
  {
f9c0b0950   Peter Zijlstra   sched: revert bac...
494
  	return calc_delta_fair(sched_slice(cfs_rq, se), se);
a7be37ac8   Peter Zijlstra   sched: revert the...
495
  }
d6b559182   Paul Turner   sched: Allow upda...
496
  static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update);
6d5ab2932   Paul Turner   sched: Simplify u...
497
  static void update_cfs_shares(struct cfs_rq *cfs_rq);
3b3d190ec   Paul Turner   sched: Implement ...
498

a7be37ac8   Peter Zijlstra   sched: revert the...
499
  /*
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
500
501
502
503
   * Update the current task's runtime statistics. Skip current tasks that
   * are not in our scheduling class.
   */
  static inline void
8ebc91d93   Ingo Molnar   sched: remove sta...
504
505
  __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
  	      unsigned long delta_exec)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
506
  {
bbdba7c0e   Ingo Molnar   sched: remove wai...
507
  	unsigned long delta_exec_weighted;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
508

41acab885   Lucas De Marchi   sched: Implement ...
509
510
  	schedstat_set(curr->statistics.exec_max,
  		      max((u64)delta_exec, curr->statistics.exec_max));
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
511
512
  
  	curr->sum_exec_runtime += delta_exec;
7a62eabc4   Ingo Molnar   sched: debug: upd...
513
  	schedstat_add(cfs_rq, exec_clock, delta_exec);
a7be37ac8   Peter Zijlstra   sched: revert the...
514
  	delta_exec_weighted = calc_delta_fair(delta_exec, curr);
88ec22d3e   Peter Zijlstra   sched: Remove the...
515

e9acbff64   Ingo Molnar   sched: introduce ...
516
  	curr->vruntime += delta_exec_weighted;
1af5f730f   Peter Zijlstra   sched: more accur...
517
  	update_min_vruntime(cfs_rq);
3b3d190ec   Paul Turner   sched: Implement ...
518

70caf8a6c   Peter Zijlstra   sched: Fix UP bui...
519
  #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
3b3d190ec   Paul Turner   sched: Implement ...
520
  	cfs_rq->load_unacc_exec_time += delta_exec;
3b3d190ec   Paul Turner   sched: Implement ...
521
  #endif
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
522
  }
b7cc08965   Ingo Molnar   sched: remove the...
523
  static void update_curr(struct cfs_rq *cfs_rq)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
524
  {
429d43bcc   Ingo Molnar   sched: cleanup: s...
525
  	struct sched_entity *curr = cfs_rq->curr;
305e6835e   Venkatesh Pallipadi   sched: Do not acc...
526
  	u64 now = rq_of(cfs_rq)->clock_task;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
527
528
529
530
531
532
533
534
535
536
  	unsigned long delta_exec;
  
  	if (unlikely(!curr))
  		return;
  
  	/*
  	 * Get the amount of time the current task was running
  	 * since the last time we changed load (this cannot
  	 * overflow on 32 bits):
  	 */
8ebc91d93   Ingo Molnar   sched: remove sta...
537
  	delta_exec = (unsigned long)(now - curr->exec_start);
34f28ecd0   Peter Zijlstra   sched: optimize u...
538
539
  	if (!delta_exec)
  		return;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
540

8ebc91d93   Ingo Molnar   sched: remove sta...
541
542
  	__update_curr(cfs_rq, curr, delta_exec);
  	curr->exec_start = now;
d842de871   Srivatsa Vaddagiri   sched: cpu accoun...
543
544
545
  
  	if (entity_is_task(curr)) {
  		struct task_struct *curtask = task_of(curr);
f977bb493   Ingo Molnar   perf_counter, sch...
546
  		trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
d842de871   Srivatsa Vaddagiri   sched: cpu accoun...
547
  		cpuacct_charge(curtask, delta_exec);
f06febc96   Frank Mayhar   timers: fix itime...
548
  		account_group_exec_runtime(curtask, delta_exec);
d842de871   Srivatsa Vaddagiri   sched: cpu accoun...
549
  	}
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
550
551
552
  }
  
  static inline void
5870db5b8   Ingo Molnar   sched: remove the...
553
  update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
554
  {
41acab885   Lucas De Marchi   sched: Implement ...
555
  	schedstat_set(se->statistics.wait_start, rq_of(cfs_rq)->clock);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
556
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
557
558
559
  /*
   * Task is being enqueued - update stats:
   */
d2417e5a3   Ingo Molnar   sched: remove the...
560
  static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
561
  {
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
562
563
564
565
  	/*
  	 * Are we enqueueing a waiting task? (for current tasks
  	 * a dequeue/enqueue event is a NOP)
  	 */
429d43bcc   Ingo Molnar   sched: cleanup: s...
566
  	if (se != cfs_rq->curr)
5870db5b8   Ingo Molnar   sched: remove the...
567
  		update_stats_wait_start(cfs_rq, se);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
568
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
569
  static void
9ef0a9615   Ingo Molnar   sched: remove the...
570
  update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
571
  {
41acab885   Lucas De Marchi   sched: Implement ...
572
573
574
575
576
  	schedstat_set(se->statistics.wait_max, max(se->statistics.wait_max,
  			rq_of(cfs_rq)->clock - se->statistics.wait_start));
  	schedstat_set(se->statistics.wait_count, se->statistics.wait_count + 1);
  	schedstat_set(se->statistics.wait_sum, se->statistics.wait_sum +
  			rq_of(cfs_rq)->clock - se->statistics.wait_start);
768d0c272   Peter Zijlstra   sched: Add wait, ...
577
578
579
  #ifdef CONFIG_SCHEDSTATS
  	if (entity_is_task(se)) {
  		trace_sched_stat_wait(task_of(se),
41acab885   Lucas De Marchi   sched: Implement ...
580
  			rq_of(cfs_rq)->clock - se->statistics.wait_start);
768d0c272   Peter Zijlstra   sched: Add wait, ...
581
582
  	}
  #endif
41acab885   Lucas De Marchi   sched: Implement ...
583
  	schedstat_set(se->statistics.wait_start, 0);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
584
585
586
  }
  
  static inline void
19b6a2e37   Ingo Molnar   sched: remove the...
587
  update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
588
  {
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
589
590
591
592
  	/*
  	 * Mark the end of the wait period if dequeueing a
  	 * waiting task:
  	 */
429d43bcc   Ingo Molnar   sched: cleanup: s...
593
  	if (se != cfs_rq->curr)
9ef0a9615   Ingo Molnar   sched: remove the...
594
  		update_stats_wait_end(cfs_rq, se);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
595
596
597
598
599
600
  }
  
  /*
   * We are picking a new current task - update its stats:
   */
  static inline void
79303e9e0   Ingo Molnar   sched: remove the...
601
  update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
602
603
604
605
  {
  	/*
  	 * We are starting a new run period:
  	 */
305e6835e   Venkatesh Pallipadi   sched: Do not acc...
606
  	se->exec_start = rq_of(cfs_rq)->clock_task;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
607
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
608
609
610
  /**************************************************
   * Scheduling class queueing methods:
   */
c09595f63   Peter Zijlstra   sched: revert rev...
611
612
613
614
615
616
617
618
619
620
621
622
  #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
  static void
  add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
  {
  	cfs_rq->task_weight += weight;
  }
  #else
  static inline void
  add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
  {
  }
  #endif
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
623
624
625
626
  static void
  account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
  	update_load_add(&cfs_rq->load, se->load.weight);
c09595f63   Peter Zijlstra   sched: revert rev...
627
628
  	if (!parent_entity(se))
  		inc_cpu_load(rq_of(cfs_rq), se->load.weight);
b87f17242   Bharata B Rao   sched: maintain o...
629
  	if (entity_is_task(se)) {
c09595f63   Peter Zijlstra   sched: revert rev...
630
  		add_cfs_task_weight(cfs_rq, se->load.weight);
b87f17242   Bharata B Rao   sched: maintain o...
631
632
  		list_add(&se->group_node, &cfs_rq->tasks);
  	}
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
633
  	cfs_rq->nr_running++;
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
634
635
636
637
638
639
  }
  
  static void
  account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
  	update_load_sub(&cfs_rq->load, se->load.weight);
c09595f63   Peter Zijlstra   sched: revert rev...
640
641
  	if (!parent_entity(se))
  		dec_cpu_load(rq_of(cfs_rq), se->load.weight);
b87f17242   Bharata B Rao   sched: maintain o...
642
  	if (entity_is_task(se)) {
c09595f63   Peter Zijlstra   sched: revert rev...
643
  		add_cfs_task_weight(cfs_rq, -se->load.weight);
b87f17242   Bharata B Rao   sched: maintain o...
644
645
  		list_del_init(&se->group_node);
  	}
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
646
  	cfs_rq->nr_running--;
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
647
  }
3ff6dcac7   Yong Zhang   sched: Fix poor i...
648
649
  #ifdef CONFIG_FAIR_GROUP_SCHED
  # ifdef CONFIG_SMP
d6b559182   Paul Turner   sched: Allow upda...
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
  static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
  					    int global_update)
  {
  	struct task_group *tg = cfs_rq->tg;
  	long load_avg;
  
  	load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
  	load_avg -= cfs_rq->load_contribution;
  
  	if (global_update || abs(load_avg) > cfs_rq->load_contribution / 8) {
  		atomic_add(load_avg, &tg->load_weight);
  		cfs_rq->load_contribution += load_avg;
  	}
  }
  
  static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
666
  {
a7a4f8a75   Paul Turner   sched: Add sysctl...
667
  	u64 period = sysctl_sched_shares_window;
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
668
  	u64 now, delta;
e33078baa   Paul Turner   sched: Fix update...
669
  	unsigned long load = cfs_rq->load.weight;
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
670

b815f1963   Paul Turner   sched: Fix/remove...
671
  	if (cfs_rq->tg == &root_task_group)
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
672
  		return;
05ca62c6c   Paul Turner   sched: Use rq->cl...
673
  	now = rq_of(cfs_rq)->clock_task;
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
674
  	delta = now - cfs_rq->load_stamp;
e33078baa   Paul Turner   sched: Fix update...
675
676
677
678
679
  	/* truncate load history at 4 idle periods */
  	if (cfs_rq->load_stamp > cfs_rq->load_last &&
  	    now - cfs_rq->load_last > 4 * period) {
  		cfs_rq->load_period = 0;
  		cfs_rq->load_avg = 0;
f07333bf6   Paul Turner   sched: Avoid expe...
680
  		delta = period - 1;
e33078baa   Paul Turner   sched: Fix update...
681
  	}
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
682
  	cfs_rq->load_stamp = now;
3b3d190ec   Paul Turner   sched: Implement ...
683
  	cfs_rq->load_unacc_exec_time = 0;
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
684
  	cfs_rq->load_period += delta;
e33078baa   Paul Turner   sched: Fix update...
685
686
687
688
  	if (load) {
  		cfs_rq->load_last = now;
  		cfs_rq->load_avg += delta * load;
  	}
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
689

d6b559182   Paul Turner   sched: Allow upda...
690
691
692
693
  	/* consider updating load contribution on each fold or truncate */
  	if (global_update || cfs_rq->load_period > period
  	    || !cfs_rq->load_period)
  		update_cfs_rq_load_contribution(cfs_rq, global_update);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
694
695
696
697
698
699
700
701
702
703
  	while (cfs_rq->load_period > period) {
  		/*
  		 * Inline assembly required to prevent the compiler
  		 * optimising this loop into a divmod call.
  		 * See __iter_div_u64_rem() for another example of this.
  		 */
  		asm("" : "+rm" (cfs_rq->load_period));
  		cfs_rq->load_period /= 2;
  		cfs_rq->load_avg /= 2;
  	}
3d4b47b4b   Peter Zijlstra   sched: Implement ...
704

e33078baa   Paul Turner   sched: Fix update...
705
706
  	if (!cfs_rq->curr && !cfs_rq->nr_running && !cfs_rq->load_avg)
  		list_del_leaf_cfs_rq(cfs_rq);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
707
  }
6d5ab2932   Paul Turner   sched: Simplify u...
708
  static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
3ff6dcac7   Yong Zhang   sched: Fix poor i...
709
710
  {
  	long load_weight, load, shares;
6d5ab2932   Paul Turner   sched: Simplify u...
711
  	load = cfs_rq->load.weight;
3ff6dcac7   Yong Zhang   sched: Fix poor i...
712
713
  
  	load_weight = atomic_read(&tg->load_weight);
3ff6dcac7   Yong Zhang   sched: Fix poor i...
714
  	load_weight += load;
6d5ab2932   Paul Turner   sched: Simplify u...
715
  	load_weight -= cfs_rq->load_contribution;
3ff6dcac7   Yong Zhang   sched: Fix poor i...
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
  
  	shares = (tg->shares * load);
  	if (load_weight)
  		shares /= load_weight;
  
  	if (shares < MIN_SHARES)
  		shares = MIN_SHARES;
  	if (shares > tg->shares)
  		shares = tg->shares;
  
  	return shares;
  }
  
  static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
  {
  	if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
  		update_cfs_load(cfs_rq, 0);
6d5ab2932   Paul Turner   sched: Simplify u...
733
  		update_cfs_shares(cfs_rq);
3ff6dcac7   Yong Zhang   sched: Fix poor i...
734
735
736
737
738
739
  	}
  }
  # else /* CONFIG_SMP */
  static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
  {
  }
6d5ab2932   Paul Turner   sched: Simplify u...
740
  static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
3ff6dcac7   Yong Zhang   sched: Fix poor i...
741
742
743
744
745
746
747
748
  {
  	return tg->shares;
  }
  
  static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
  {
  }
  # endif /* CONFIG_SMP */
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
749
750
751
  static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
  			    unsigned long weight)
  {
19e5eebb8   Paul Turner   sched: Fix intera...
752
753
754
755
  	if (se->on_rq) {
  		/* commit outstanding execution time */
  		if (cfs_rq->curr == se)
  			update_curr(cfs_rq);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
756
  		account_entity_dequeue(cfs_rq, se);
19e5eebb8   Paul Turner   sched: Fix intera...
757
  	}
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
758
759
760
761
762
763
  
  	update_load_set(&se->load, weight);
  
  	if (se->on_rq)
  		account_entity_enqueue(cfs_rq, se);
  }
6d5ab2932   Paul Turner   sched: Simplify u...
764
  static void update_cfs_shares(struct cfs_rq *cfs_rq)
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
765
766
767
  {
  	struct task_group *tg;
  	struct sched_entity *se;
3ff6dcac7   Yong Zhang   sched: Fix poor i...
768
  	long shares;
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
769

2069dd75c   Peter Zijlstra   sched: Rewrite tg...
770
771
772
773
  	tg = cfs_rq->tg;
  	se = tg->se[cpu_of(rq_of(cfs_rq))];
  	if (!se)
  		return;
3ff6dcac7   Yong Zhang   sched: Fix poor i...
774
775
776
777
  #ifndef CONFIG_SMP
  	if (likely(se->load.weight == tg->shares))
  		return;
  #endif
6d5ab2932   Paul Turner   sched: Simplify u...
778
  	shares = calc_cfs_shares(cfs_rq, tg);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
779
780
781
782
  
  	reweight_entity(cfs_rq_of(se), se, shares);
  }
  #else /* CONFIG_FAIR_GROUP_SCHED */
d6b559182   Paul Turner   sched: Allow upda...
783
  static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
784
785
  {
  }
6d5ab2932   Paul Turner   sched: Simplify u...
786
  static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
787
788
  {
  }
43365bd7f   Paul Turner   sched: Move perio...
789
790
791
792
  
  static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
  {
  }
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
793
  #endif /* CONFIG_FAIR_GROUP_SCHED */
2396af69b   Ingo Molnar   sched: remove the...
794
  static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
795
  {
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
796
  #ifdef CONFIG_SCHEDSTATS
e414314cc   Peter Zijlstra   sched: Fix latenc...
797
798
799
800
  	struct task_struct *tsk = NULL;
  
  	if (entity_is_task(se))
  		tsk = task_of(se);
41acab885   Lucas De Marchi   sched: Implement ...
801
802
  	if (se->statistics.sleep_start) {
  		u64 delta = rq_of(cfs_rq)->clock - se->statistics.sleep_start;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
803
804
805
  
  		if ((s64)delta < 0)
  			delta = 0;
41acab885   Lucas De Marchi   sched: Implement ...
806
807
  		if (unlikely(delta > se->statistics.sleep_max))
  			se->statistics.sleep_max = delta;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
808

41acab885   Lucas De Marchi   sched: Implement ...
809
810
  		se->statistics.sleep_start = 0;
  		se->statistics.sum_sleep_runtime += delta;
9745512ce   Arjan van de Ven   sched: latencytop...
811

768d0c272   Peter Zijlstra   sched: Add wait, ...
812
  		if (tsk) {
e414314cc   Peter Zijlstra   sched: Fix latenc...
813
  			account_scheduler_latency(tsk, delta >> 10, 1);
768d0c272   Peter Zijlstra   sched: Add wait, ...
814
815
  			trace_sched_stat_sleep(tsk, delta);
  		}
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
816
  	}
41acab885   Lucas De Marchi   sched: Implement ...
817
818
  	if (se->statistics.block_start) {
  		u64 delta = rq_of(cfs_rq)->clock - se->statistics.block_start;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
819
820
821
  
  		if ((s64)delta < 0)
  			delta = 0;
41acab885   Lucas De Marchi   sched: Implement ...
822
823
  		if (unlikely(delta > se->statistics.block_max))
  			se->statistics.block_max = delta;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
824

41acab885   Lucas De Marchi   sched: Implement ...
825
826
  		se->statistics.block_start = 0;
  		se->statistics.sum_sleep_runtime += delta;
30084fbd1   Ingo Molnar   sched: fix profil...
827

e414314cc   Peter Zijlstra   sched: Fix latenc...
828
  		if (tsk) {
8f0dfc34e   Arjan van de Ven   sched: Provide io...
829
  			if (tsk->in_iowait) {
41acab885   Lucas De Marchi   sched: Implement ...
830
831
  				se->statistics.iowait_sum += delta;
  				se->statistics.iowait_count++;
768d0c272   Peter Zijlstra   sched: Add wait, ...
832
  				trace_sched_stat_iowait(tsk, delta);
8f0dfc34e   Arjan van de Ven   sched: Provide io...
833
  			}
e414314cc   Peter Zijlstra   sched: Fix latenc...
834
835
836
837
838
839
840
841
842
843
844
  			/*
  			 * Blocking time is in units of nanosecs, so shift by
  			 * 20 to get a milliseconds-range estimation of the
  			 * amount of time that the task spent sleeping:
  			 */
  			if (unlikely(prof_on == SLEEP_PROFILING)) {
  				profile_hits(SLEEP_PROFILING,
  						(void *)get_wchan(tsk),
  						delta >> 20);
  			}
  			account_scheduler_latency(tsk, delta >> 10, 0);
30084fbd1   Ingo Molnar   sched: fix profil...
845
  		}
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
846
847
848
  	}
  #endif
  }
ddc972975   Peter Zijlstra   sched debug: chec...
849
850
851
852
853
854
855
856
857
858
859
860
  static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
  #ifdef CONFIG_SCHED_DEBUG
  	s64 d = se->vruntime - cfs_rq->min_vruntime;
  
  	if (d < 0)
  		d = -d;
  
  	if (d > 3*sysctl_sched_latency)
  		schedstat_inc(cfs_rq, nr_spread_over);
  #endif
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
861
  static void
aeb73b040   Peter Zijlstra   sched: clean up n...
862
863
  place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
  {
1af5f730f   Peter Zijlstra   sched: more accur...
864
  	u64 vruntime = cfs_rq->min_vruntime;
94dfb5e75   Peter Zijlstra   sched: add tree b...
865

2cb8600e6   Peter Zijlstra   sched: documentat...
866
867
868
869
870
871
  	/*
  	 * The 'current' period is already promised to the current tasks,
  	 * however the extra weight of the new task will slow them down a
  	 * little, place the new task so that it fits in the slot that
  	 * stays open at the end.
  	 */
94dfb5e75   Peter Zijlstra   sched: add tree b...
872
  	if (initial && sched_feat(START_DEBIT))
f9c0b0950   Peter Zijlstra   sched: revert bac...
873
  		vruntime += sched_vslice(cfs_rq, se);
aeb73b040   Peter Zijlstra   sched: clean up n...
874

a2e7a7eb2   Mike Galbraith   sched: Remove unn...
875
  	/* sleeps up to a single latency don't count. */
5ca9880c6   Mike Galbraith   sched: Remove FAI...
876
  	if (!initial) {
a2e7a7eb2   Mike Galbraith   sched: Remove unn...
877
  		unsigned long thresh = sysctl_sched_latency;
a7be37ac8   Peter Zijlstra   sched: revert the...
878

a2e7a7eb2   Mike Galbraith   sched: Remove unn...
879
  		/*
a2e7a7eb2   Mike Galbraith   sched: Remove unn...
880
881
882
883
884
  		 * Halve their sleep time's effect, to allow
  		 * for a gentler effect of sleepers:
  		 */
  		if (sched_feat(GENTLE_FAIR_SLEEPERS))
  			thresh >>= 1;
51e0304ce   Ingo Molnar   sched: Implement ...
885

a2e7a7eb2   Mike Galbraith   sched: Remove unn...
886
  		vruntime -= thresh;
aeb73b040   Peter Zijlstra   sched: clean up n...
887
  	}
b5d9d734a   Mike Galbraith   sched: Ensure tha...
888
889
  	/* ensure we never gain time by being placed backwards. */
  	vruntime = max_vruntime(se->vruntime, vruntime);
67e9fb2a3   Peter Zijlstra   sched: add vslice
890
  	se->vruntime = vruntime;
aeb73b040   Peter Zijlstra   sched: clean up n...
891
892
893
  }
  
  static void
88ec22d3e   Peter Zijlstra   sched: Remove the...
894
  enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
895
896
  {
  	/*
88ec22d3e   Peter Zijlstra   sched: Remove the...
897
898
899
  	 * Update the normalized vruntime before updating min_vruntime
  	 * through callig update_curr().
  	 */
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
900
  	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
88ec22d3e   Peter Zijlstra   sched: Remove the...
901
902
903
  		se->vruntime += cfs_rq->min_vruntime;
  
  	/*
a2a2d6807   Dmitry Adamushko   sched: cleanup, m...
904
  	 * Update run-time statistics of the 'current'.
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
905
  	 */
b7cc08965   Ingo Molnar   sched: remove the...
906
  	update_curr(cfs_rq);
d6b559182   Paul Turner   sched: Allow upda...
907
  	update_cfs_load(cfs_rq, 0);
a992241de   Peter Zijlstra   sched: fix normal...
908
  	account_entity_enqueue(cfs_rq, se);
6d5ab2932   Paul Turner   sched: Simplify u...
909
  	update_cfs_shares(cfs_rq);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
910

88ec22d3e   Peter Zijlstra   sched: Remove the...
911
  	if (flags & ENQUEUE_WAKEUP) {
aeb73b040   Peter Zijlstra   sched: clean up n...
912
  		place_entity(cfs_rq, se, 0);
2396af69b   Ingo Molnar   sched: remove the...
913
  		enqueue_sleeper(cfs_rq, se);
e9acbff64   Ingo Molnar   sched: introduce ...
914
  	}
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
915

d2417e5a3   Ingo Molnar   sched: remove the...
916
  	update_stats_enqueue(cfs_rq, se);
ddc972975   Peter Zijlstra   sched debug: chec...
917
  	check_spread(cfs_rq, se);
83b699ed2   Srivatsa Vaddagiri   sched: revert rec...
918
919
  	if (se != cfs_rq->curr)
  		__enqueue_entity(cfs_rq, se);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
920
  	se->on_rq = 1;
3d4b47b4b   Peter Zijlstra   sched: Implement ...
921
922
923
  
  	if (cfs_rq->nr_running == 1)
  		list_add_leaf_cfs_rq(cfs_rq);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
924
  }
2c13c919d   Rik van Riel   sched: Limit the ...
925
  static void __clear_buddies_last(struct sched_entity *se)
2002c6959   Peter Zijlstra   sched: release bu...
926
  {
2c13c919d   Rik van Riel   sched: Limit the ...
927
928
929
930
931
932
933
934
  	for_each_sched_entity(se) {
  		struct cfs_rq *cfs_rq = cfs_rq_of(se);
  		if (cfs_rq->last == se)
  			cfs_rq->last = NULL;
  		else
  			break;
  	}
  }
2002c6959   Peter Zijlstra   sched: release bu...
935

2c13c919d   Rik van Riel   sched: Limit the ...
936
937
938
939
940
941
942
943
944
  static void __clear_buddies_next(struct sched_entity *se)
  {
  	for_each_sched_entity(se) {
  		struct cfs_rq *cfs_rq = cfs_rq_of(se);
  		if (cfs_rq->next == se)
  			cfs_rq->next = NULL;
  		else
  			break;
  	}
2002c6959   Peter Zijlstra   sched: release bu...
945
  }
ac53db596   Rik van Riel   sched: Use a budd...
946
947
948
949
950
951
952
953
954
955
  static void __clear_buddies_skip(struct sched_entity *se)
  {
  	for_each_sched_entity(se) {
  		struct cfs_rq *cfs_rq = cfs_rq_of(se);
  		if (cfs_rq->skip == se)
  			cfs_rq->skip = NULL;
  		else
  			break;
  	}
  }
a571bbeaf   Peter Zijlstra   sched: fix buddie...
956
957
  static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
2c13c919d   Rik van Riel   sched: Limit the ...
958
959
960
961
962
  	if (cfs_rq->last == se)
  		__clear_buddies_last(se);
  
  	if (cfs_rq->next == se)
  		__clear_buddies_next(se);
ac53db596   Rik van Riel   sched: Use a budd...
963
964
965
  
  	if (cfs_rq->skip == se)
  		__clear_buddies_skip(se);
a571bbeaf   Peter Zijlstra   sched: fix buddie...
966
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
967
  static void
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
968
  dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
969
  {
a2a2d6807   Dmitry Adamushko   sched: cleanup, m...
970
971
972
973
  	/*
  	 * Update run-time statistics of the 'current'.
  	 */
  	update_curr(cfs_rq);
19b6a2e37   Ingo Molnar   sched: remove the...
974
  	update_stats_dequeue(cfs_rq, se);
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
975
  	if (flags & DEQUEUE_SLEEP) {
67e9fb2a3   Peter Zijlstra   sched: add vslice
976
  #ifdef CONFIG_SCHEDSTATS
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
977
978
979
980
  		if (entity_is_task(se)) {
  			struct task_struct *tsk = task_of(se);
  
  			if (tsk->state & TASK_INTERRUPTIBLE)
41acab885   Lucas De Marchi   sched: Implement ...
981
  				se->statistics.sleep_start = rq_of(cfs_rq)->clock;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
982
  			if (tsk->state & TASK_UNINTERRUPTIBLE)
41acab885   Lucas De Marchi   sched: Implement ...
983
  				se->statistics.block_start = rq_of(cfs_rq)->clock;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
984
  		}
db36cc7d6   Dmitry Adamushko   sched: clean up s...
985
  #endif
67e9fb2a3   Peter Zijlstra   sched: add vslice
986
  	}
2002c6959   Peter Zijlstra   sched: release bu...
987
  	clear_buddies(cfs_rq, se);
4793241be   Peter Zijlstra   sched: backward l...
988

83b699ed2   Srivatsa Vaddagiri   sched: revert rec...
989
  	if (se != cfs_rq->curr)
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
990
  		__dequeue_entity(cfs_rq, se);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
991
  	se->on_rq = 0;
d6b559182   Paul Turner   sched: Allow upda...
992
  	update_cfs_load(cfs_rq, 0);
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
993
  	account_entity_dequeue(cfs_rq, se);
88ec22d3e   Peter Zijlstra   sched: Remove the...
994
995
996
997
998
999
  
  	/*
  	 * Normalize the entity after updating the min_vruntime because the
  	 * update can refer to the ->curr item and we need to reflect this
  	 * movement in our normalized position.
  	 */
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
1000
  	if (!(flags & DEQUEUE_SLEEP))
88ec22d3e   Peter Zijlstra   sched: Remove the...
1001
  		se->vruntime -= cfs_rq->min_vruntime;
1e8762317   Peter Zijlstra   sched: Fix ->min_...
1002
1003
1004
  
  	update_min_vruntime(cfs_rq);
  	update_cfs_shares(cfs_rq);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1005
1006
1007
1008
1009
  }
  
  /*
   * Preempt the current task with a newly woken task if needed:
   */
7c92e54f6   Peter Zijlstra   sched: simplify _...
1010
  static void
2e09bf556   Ingo Molnar   sched: wakeup gra...
1011
  check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1012
  {
116978308   Peter Zijlstra   sched: fix ideal_...
1013
  	unsigned long ideal_runtime, delta_exec;
6d0f0ebd0   Peter Zijlstra   sched: simplify a...
1014
  	ideal_runtime = sched_slice(cfs_rq, curr);
116978308   Peter Zijlstra   sched: fix ideal_...
1015
  	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
a9f3e2b54   Mike Galbraith   sched: clear budd...
1016
  	if (delta_exec > ideal_runtime) {
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1017
  		resched_task(rq_of(cfs_rq)->curr);
a9f3e2b54   Mike Galbraith   sched: clear budd...
1018
1019
1020
1021
1022
  		/*
  		 * The current task ran long enough, ensure it doesn't get
  		 * re-elected due to buddy favours.
  		 */
  		clear_buddies(cfs_rq, curr);
f685ceaca   Mike Galbraith   sched: Strengthen...
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
  		return;
  	}
  
  	/*
  	 * Ensure that a task that missed wakeup preemption by a
  	 * narrow margin doesn't have to wait for a full slice.
  	 * This also mitigates buddy induced latencies under load.
  	 */
  	if (!sched_feat(WAKEUP_PREEMPT))
  		return;
  
  	if (delta_exec < sysctl_sched_min_granularity)
  		return;
  
  	if (cfs_rq->nr_running > 1) {
ac53db596   Rik van Riel   sched: Use a budd...
1038
  		struct sched_entity *se = __pick_first_entity(cfs_rq);
f685ceaca   Mike Galbraith   sched: Strengthen...
1039
  		s64 delta = curr->vruntime - se->vruntime;
d7d829441   Mike Galbraith   sched: Fix signed...
1040
1041
  		if (delta < 0)
  			return;
f685ceaca   Mike Galbraith   sched: Strengthen...
1042
1043
  		if (delta > ideal_runtime)
  			resched_task(rq_of(cfs_rq)->curr);
a9f3e2b54   Mike Galbraith   sched: clear budd...
1044
  	}
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1045
  }
83b699ed2   Srivatsa Vaddagiri   sched: revert rec...
1046
  static void
8494f412e   Ingo Molnar   sched: remove the...
1047
  set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1048
  {
83b699ed2   Srivatsa Vaddagiri   sched: revert rec...
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
  	/* 'current' is not kept within the tree. */
  	if (se->on_rq) {
  		/*
  		 * Any task has to be enqueued before it get to execute on
  		 * a CPU. So account for the time it spent waiting on the
  		 * runqueue.
  		 */
  		update_stats_wait_end(cfs_rq, se);
  		__dequeue_entity(cfs_rq, se);
  	}
79303e9e0   Ingo Molnar   sched: remove the...
1059
  	update_stats_curr_start(cfs_rq, se);
429d43bcc   Ingo Molnar   sched: cleanup: s...
1060
  	cfs_rq->curr = se;
eba1ed4b7   Ingo Molnar   sched: debug: tra...
1061
1062
1063
1064
1065
1066
  #ifdef CONFIG_SCHEDSTATS
  	/*
  	 * Track our maximum slice length, if the CPU's load is at
  	 * least twice that of our own weight (i.e. dont track it
  	 * when there are only lesser-weight tasks around):
  	 */
495eca494   Dmitry Adamushko   sched: clean up s...
1067
  	if (rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
41acab885   Lucas De Marchi   sched: Implement ...
1068
  		se->statistics.slice_max = max(se->statistics.slice_max,
eba1ed4b7   Ingo Molnar   sched: debug: tra...
1069
1070
1071
  			se->sum_exec_runtime - se->prev_sum_exec_runtime);
  	}
  #endif
4a55b4503   Peter Zijlstra   sched: improve pr...
1072
  	se->prev_sum_exec_runtime = se->sum_exec_runtime;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1073
  }
3f3a49048   Peter Zijlstra   sched: virtual ti...
1074
1075
  static int
  wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
ac53db596   Rik van Riel   sched: Use a budd...
1076
1077
1078
1079
1080
1081
1082
  /*
   * Pick the next process, keeping these things in mind, in this order:
   * 1) keep things fair between processes/task groups
   * 2) pick the "next" process, since someone really wants that to run
   * 3) pick the "last" process, for cache locality
   * 4) do not run the "skip" process, if something else is available
   */
f4b6755fb   Peter Zijlstra   sched: cleanup fa...
1083
  static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
aa2ac2522   Peter Zijlstra   sched: fix overlo...
1084
  {
ac53db596   Rik van Riel   sched: Use a budd...
1085
  	struct sched_entity *se = __pick_first_entity(cfs_rq);
f685ceaca   Mike Galbraith   sched: Strengthen...
1086
  	struct sched_entity *left = se;
f4b6755fb   Peter Zijlstra   sched: cleanup fa...
1087

ac53db596   Rik van Riel   sched: Use a budd...
1088
1089
1090
1091
1092
1093
1094
1095
1096
  	/*
  	 * Avoid running the skip buddy, if running something else can
  	 * be done without getting too unfair.
  	 */
  	if (cfs_rq->skip == se) {
  		struct sched_entity *second = __pick_next_entity(se);
  		if (second && wakeup_preempt_entity(second, left) < 1)
  			se = second;
  	}
aa2ac2522   Peter Zijlstra   sched: fix overlo...
1097

f685ceaca   Mike Galbraith   sched: Strengthen...
1098
1099
1100
1101
1102
  	/*
  	 * Prefer last buddy, try to return the CPU to a preempted task.
  	 */
  	if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1)
  		se = cfs_rq->last;
ac53db596   Rik van Riel   sched: Use a budd...
1103
1104
1105
1106
1107
  	/*
  	 * Someone really wants this to run. If it's not unfair, run it.
  	 */
  	if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
  		se = cfs_rq->next;
f685ceaca   Mike Galbraith   sched: Strengthen...
1108
  	clear_buddies(cfs_rq, se);
4793241be   Peter Zijlstra   sched: backward l...
1109
1110
  
  	return se;
aa2ac2522   Peter Zijlstra   sched: fix overlo...
1111
  }
ab6cde269   Ingo Molnar   sched: remove the...
1112
  static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1113
1114
1115
1116
1117
1118
  {
  	/*
  	 * If still on the runqueue then deactivate_task()
  	 * was not called and update_curr() has to be done:
  	 */
  	if (prev->on_rq)
b7cc08965   Ingo Molnar   sched: remove the...
1119
  		update_curr(cfs_rq);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1120

ddc972975   Peter Zijlstra   sched debug: chec...
1121
  	check_spread(cfs_rq, prev);
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
1122
  	if (prev->on_rq) {
5870db5b8   Ingo Molnar   sched: remove the...
1123
  		update_stats_wait_start(cfs_rq, prev);
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
1124
1125
1126
  		/* Put 'current' back into the tree. */
  		__enqueue_entity(cfs_rq, prev);
  	}
429d43bcc   Ingo Molnar   sched: cleanup: s...
1127
  	cfs_rq->curr = NULL;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1128
  }
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1129
1130
  static void
  entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1131
  {
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1132
  	/*
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
1133
  	 * Update run-time statistics of the 'current'.
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1134
  	 */
30cfdcfc5   Dmitry Adamushko   sched: do not kee...
1135
  	update_curr(cfs_rq);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1136

43365bd7f   Paul Turner   sched: Move perio...
1137
1138
1139
1140
  	/*
  	 * Update share accounting for long-running entities.
  	 */
  	update_entity_shares_tick(cfs_rq);
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1141
1142
1143
1144
1145
  #ifdef CONFIG_SCHED_HRTICK
  	/*
  	 * queued ticks are scheduled to match the slice, so don't bother
  	 * validating it and just reschedule.
  	 */
983ed7a66   Harvey Harrison   sched: add static...
1146
1147
1148
1149
  	if (queued) {
  		resched_task(rq_of(cfs_rq)->curr);
  		return;
  	}
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1150
1151
1152
1153
1154
1155
1156
  	/*
  	 * don't let the period tick interfere with the hrtick preemption
  	 */
  	if (!sched_feat(DOUBLE_TICK) &&
  			hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
  		return;
  #endif
ce6c13113   Peter Zijlstra   sched: disable fo...
1157
  	if (cfs_rq->nr_running > 1 || !sched_feat(WAKEUP_PREEMPT))
2e09bf556   Ingo Molnar   sched: wakeup gra...
1158
  		check_preempt_tick(cfs_rq, curr);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1159
1160
1161
1162
1163
  }
  
  /**************************************************
   * CFS operations on tasks:
   */
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1164
1165
1166
  #ifdef CONFIG_SCHED_HRTICK
  static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
  {
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
  	struct sched_entity *se = &p->se;
  	struct cfs_rq *cfs_rq = cfs_rq_of(se);
  
  	WARN_ON(task_rq(p) != rq);
  
  	if (hrtick_enabled(rq) && cfs_rq->nr_running > 1) {
  		u64 slice = sched_slice(cfs_rq, se);
  		u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
  		s64 delta = slice - ran;
  
  		if (delta < 0) {
  			if (rq->curr == p)
  				resched_task(p);
  			return;
  		}
  
  		/*
  		 * Don't schedule slices shorter than 10000ns, that just
  		 * doesn't make sense. Rely on vruntime for fairness.
  		 */
31656519e   Peter Zijlstra   sched, x86: clean...
1187
  		if (rq->curr != p)
157124c11   Peter Zijlstra   sched: fix warnin...
1188
  			delta = max_t(s64, 10000LL, delta);
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1189

31656519e   Peter Zijlstra   sched, x86: clean...
1190
  		hrtick_start(rq, delta);
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1191
1192
  	}
  }
a4c2f00f5   Peter Zijlstra   sched: fair sched...
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
  
  /*
   * called from enqueue/dequeue and updates the hrtick when the
   * current task is from our class and nr_running is low enough
   * to matter.
   */
  static void hrtick_update(struct rq *rq)
  {
  	struct task_struct *curr = rq->curr;
  
  	if (curr->sched_class != &fair_sched_class)
  		return;
  
  	if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency)
  		hrtick_start_fair(rq, curr);
  }
55e12e5e7   Dhaval Giani   sched: make sched...
1209
  #else /* !CONFIG_SCHED_HRTICK */
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1210
1211
1212
1213
  static inline void
  hrtick_start_fair(struct rq *rq, struct task_struct *p)
  {
  }
a4c2f00f5   Peter Zijlstra   sched: fair sched...
1214
1215
1216
1217
  
  static inline void hrtick_update(struct rq *rq)
  {
  }
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1218
  #endif
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1219
1220
1221
1222
1223
  /*
   * The enqueue_task method is called before nr_running is
   * increased. Here we update the fair scheduling stats and
   * then put the task into the rbtree:
   */
ea87bb785   Thomas Gleixner   sched: Extend enq...
1224
  static void
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
1225
  enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1226
1227
  {
  	struct cfs_rq *cfs_rq;
62fb18513   Peter Zijlstra   sched: revert loa...
1228
  	struct sched_entity *se = &p->se;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1229
1230
  
  	for_each_sched_entity(se) {
62fb18513   Peter Zijlstra   sched: revert loa...
1231
  		if (se->on_rq)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1232
1233
  			break;
  		cfs_rq = cfs_rq_of(se);
88ec22d3e   Peter Zijlstra   sched: Remove the...
1234
1235
  		enqueue_entity(cfs_rq, se, flags);
  		flags = ENQUEUE_WAKEUP;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1236
  	}
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1237

2069dd75c   Peter Zijlstra   sched: Rewrite tg...
1238
  	for_each_sched_entity(se) {
0f3171438   Lin Ming   sched: Cleanup du...
1239
  		cfs_rq = cfs_rq_of(se);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
1240

d6b559182   Paul Turner   sched: Allow upda...
1241
  		update_cfs_load(cfs_rq, 0);
6d5ab2932   Paul Turner   sched: Simplify u...
1242
  		update_cfs_shares(cfs_rq);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
1243
  	}
a4c2f00f5   Peter Zijlstra   sched: fair sched...
1244
  	hrtick_update(rq);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1245
  }
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
1246
  static void set_next_buddy(struct sched_entity *se);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1247
1248
1249
1250
1251
  /*
   * The dequeue_task method is called before nr_running is
   * decreased. We remove the task from the rbtree and
   * update the fair scheduling stats:
   */
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
1252
  static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1253
1254
  {
  	struct cfs_rq *cfs_rq;
62fb18513   Peter Zijlstra   sched: revert loa...
1255
  	struct sched_entity *se = &p->se;
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
1256
  	int task_sleep = flags & DEQUEUE_SLEEP;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1257
1258
1259
  
  	for_each_sched_entity(se) {
  		cfs_rq = cfs_rq_of(se);
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
1260
  		dequeue_entity(cfs_rq, se, flags);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
1261

bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1262
  		/* Don't dequeue parent if it has other entities besides us */
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
1263
1264
1265
1266
1267
1268
1269
  		if (cfs_rq->load.weight) {
  			/*
  			 * Bias pick_next to pick a task from this cfs_rq, as
  			 * p is sleeping when it is within its sched_slice.
  			 */
  			if (task_sleep && parent_entity(se))
  				set_next_buddy(parent_entity(se));
9598c82dc   Paul Turner   sched: Don't upda...
1270
1271
1272
  
  			/* avoid re-evaluating load for this entity */
  			se = parent_entity(se);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1273
  			break;
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
1274
  		}
371fd7e7a   Peter Zijlstra   sched: Add enqueu...
1275
  		flags |= DEQUEUE_SLEEP;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1276
  	}
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1277

2069dd75c   Peter Zijlstra   sched: Rewrite tg...
1278
  	for_each_sched_entity(se) {
0f3171438   Lin Ming   sched: Cleanup du...
1279
  		cfs_rq = cfs_rq_of(se);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
1280

d6b559182   Paul Turner   sched: Allow upda...
1281
  		update_cfs_load(cfs_rq, 0);
6d5ab2932   Paul Turner   sched: Simplify u...
1282
  		update_cfs_shares(cfs_rq);
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
1283
  	}
a4c2f00f5   Peter Zijlstra   sched: fair sched...
1284
  	hrtick_update(rq);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1285
  }
e7693a362   Gregory Haskins   sched: de-SCHED_O...
1286
  #ifdef CONFIG_SMP
098fb9db2   Ingo Molnar   sched: clean up w...
1287

74f8e4b23   Peter Zijlstra   sched: Remove rq ...
1288
  static void task_waking_fair(struct task_struct *p)
88ec22d3e   Peter Zijlstra   sched: Remove the...
1289
1290
1291
  {
  	struct sched_entity *se = &p->se;
  	struct cfs_rq *cfs_rq = cfs_rq_of(se);
3fe1698b7   Peter Zijlstra   sched: Deal with ...
1292
1293
1294
1295
  	u64 min_vruntime;
  
  #ifndef CONFIG_64BIT
  	u64 min_vruntime_copy;
88ec22d3e   Peter Zijlstra   sched: Remove the...
1296

3fe1698b7   Peter Zijlstra   sched: Deal with ...
1297
1298
1299
1300
1301
1302
1303
1304
  	do {
  		min_vruntime_copy = cfs_rq->min_vruntime_copy;
  		smp_rmb();
  		min_vruntime = cfs_rq->min_vruntime;
  	} while (min_vruntime != min_vruntime_copy);
  #else
  	min_vruntime = cfs_rq->min_vruntime;
  #endif
88ec22d3e   Peter Zijlstra   sched: Remove the...
1305

3fe1698b7   Peter Zijlstra   sched: Deal with ...
1306
  	se->vruntime -= min_vruntime;
88ec22d3e   Peter Zijlstra   sched: Remove the...
1307
  }
bb3469ac9   Peter Zijlstra   sched: hierarchic...
1308
  #ifdef CONFIG_FAIR_GROUP_SCHED
f5bfb7d9f   Peter Zijlstra   sched: bias effec...
1309
1310
1311
1312
1313
1314
  /*
   * effective_load() calculates the load change as seen from the root_task_group
   *
   * Adding load to a group doesn't make a group heavier, but can cause movement
   * of group shares between cpus. Assuming the shares were perfectly aligned one
   * can calculate the shift in shares.
f5bfb7d9f   Peter Zijlstra   sched: bias effec...
1315
   */
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
1316
  static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
bb3469ac9   Peter Zijlstra   sched: hierarchic...
1317
  {
4be9daaa1   Peter Zijlstra   sched: fix task_h...
1318
  	struct sched_entity *se = tg->se[cpu];
f1d239f73   Peter Zijlstra   sched: incrementa...
1319
1320
1321
  
  	if (!tg->parent)
  		return wl;
4be9daaa1   Peter Zijlstra   sched: fix task_h...
1322
  	for_each_sched_entity(se) {
977dda7c9   Paul Turner   sched: Update eff...
1323
  		long lw, w;
4be9daaa1   Peter Zijlstra   sched: fix task_h...
1324

977dda7c9   Paul Turner   sched: Update eff...
1325
1326
  		tg = se->my_q->tg;
  		w = se->my_q->load.weight;
bb3469ac9   Peter Zijlstra   sched: hierarchic...
1327

977dda7c9   Paul Turner   sched: Update eff...
1328
1329
1330
1331
  		/* use this cpu's instantaneous contribution */
  		lw = atomic_read(&tg->load_weight);
  		lw -= se->my_q->load_contribution;
  		lw += w + wg;
4be9daaa1   Peter Zijlstra   sched: fix task_h...
1332

977dda7c9   Paul Turner   sched: Update eff...
1333
  		wl += w;
940959e93   Peter Zijlstra   sched: fixlet for...
1334

977dda7c9   Paul Turner   sched: Update eff...
1335
1336
1337
1338
  		if (lw > 0 && wl < lw)
  			wl = (wl * tg->shares) / lw;
  		else
  			wl = tg->shares;
940959e93   Peter Zijlstra   sched: fixlet for...
1339

977dda7c9   Paul Turner   sched: Update eff...
1340
1341
1342
1343
  		/* zero point is MIN_SHARES */
  		if (wl < MIN_SHARES)
  			wl = MIN_SHARES;
  		wl -= se->load.weight;
4be9daaa1   Peter Zijlstra   sched: fix task_h...
1344
  		wg = 0;
4be9daaa1   Peter Zijlstra   sched: fix task_h...
1345
  	}
bb3469ac9   Peter Zijlstra   sched: hierarchic...
1346

4be9daaa1   Peter Zijlstra   sched: fix task_h...
1347
  	return wl;
bb3469ac9   Peter Zijlstra   sched: hierarchic...
1348
  }
4be9daaa1   Peter Zijlstra   sched: fix task_h...
1349

bb3469ac9   Peter Zijlstra   sched: hierarchic...
1350
  #else
4be9daaa1   Peter Zijlstra   sched: fix task_h...
1351

83378269a   Peter Zijlstra   sched: correct wa...
1352
1353
  static inline unsigned long effective_load(struct task_group *tg, int cpu,
  		unsigned long wl, unsigned long wg)
4be9daaa1   Peter Zijlstra   sched: fix task_h...
1354
  {
83378269a   Peter Zijlstra   sched: correct wa...
1355
  	return wl;
bb3469ac9   Peter Zijlstra   sched: hierarchic...
1356
  }
4be9daaa1   Peter Zijlstra   sched: fix task_h...
1357

bb3469ac9   Peter Zijlstra   sched: hierarchic...
1358
  #endif
c88d59108   Peter Zijlstra   sched: Merge sele...
1359
  static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
098fb9db2   Ingo Molnar   sched: clean up w...
1360
  {
e37b6a7b2   Paul Turner   sched: Fix sign u...
1361
  	s64 this_load, load;
c88d59108   Peter Zijlstra   sched: Merge sele...
1362
  	int idx, this_cpu, prev_cpu;
098fb9db2   Ingo Molnar   sched: clean up w...
1363
  	unsigned long tl_per_task;
c88d59108   Peter Zijlstra   sched: Merge sele...
1364
  	struct task_group *tg;
83378269a   Peter Zijlstra   sched: correct wa...
1365
  	unsigned long weight;
b3137bc8e   Mike Galbraith   sched: stop wake_...
1366
  	int balanced;
098fb9db2   Ingo Molnar   sched: clean up w...
1367

c88d59108   Peter Zijlstra   sched: Merge sele...
1368
1369
1370
1371
1372
  	idx	  = sd->wake_idx;
  	this_cpu  = smp_processor_id();
  	prev_cpu  = task_cpu(p);
  	load	  = source_load(prev_cpu, idx);
  	this_load = target_load(this_cpu, idx);
098fb9db2   Ingo Molnar   sched: clean up w...
1373
1374
  
  	/*
b3137bc8e   Mike Galbraith   sched: stop wake_...
1375
1376
1377
1378
  	 * If sync wakeup then subtract the (maximum possible)
  	 * effect of the currently running task from the load
  	 * of the current CPU:
  	 */
83378269a   Peter Zijlstra   sched: correct wa...
1379
1380
1381
  	if (sync) {
  		tg = task_group(current);
  		weight = current->se.load.weight;
c88d59108   Peter Zijlstra   sched: Merge sele...
1382
  		this_load += effective_load(tg, this_cpu, -weight, -weight);
83378269a   Peter Zijlstra   sched: correct wa...
1383
1384
  		load += effective_load(tg, prev_cpu, 0, -weight);
  	}
b3137bc8e   Mike Galbraith   sched: stop wake_...
1385

83378269a   Peter Zijlstra   sched: correct wa...
1386
1387
  	tg = task_group(p);
  	weight = p->se.load.weight;
b3137bc8e   Mike Galbraith   sched: stop wake_...
1388

71a29aa7b   Peter Zijlstra   sched: Deal with ...
1389
1390
  	/*
  	 * In low-load situations, where prev_cpu is idle and this_cpu is idle
c88d59108   Peter Zijlstra   sched: Merge sele...
1391
1392
1393
  	 * due to the sync cause above having dropped this_load to 0, we'll
  	 * always have an imbalance, but there's really nothing you can do
  	 * about that, so that's good too.
71a29aa7b   Peter Zijlstra   sched: Deal with ...
1394
1395
1396
1397
  	 *
  	 * Otherwise check if either cpus are near enough in load to allow this
  	 * task to be woken on this_cpu.
  	 */
e37b6a7b2   Paul Turner   sched: Fix sign u...
1398
1399
  	if (this_load > 0) {
  		s64 this_eff_load, prev_eff_load;
e51fd5e22   Peter Zijlstra   sched: Fix wake_a...
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
  
  		this_eff_load = 100;
  		this_eff_load *= power_of(prev_cpu);
  		this_eff_load *= this_load +
  			effective_load(tg, this_cpu, weight, weight);
  
  		prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
  		prev_eff_load *= power_of(this_cpu);
  		prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
  
  		balanced = this_eff_load <= prev_eff_load;
  	} else
  		balanced = true;
b3137bc8e   Mike Galbraith   sched: stop wake_...
1413
1414
  
  	/*
4ae7d5cef   Ingo Molnar   sched: improve af...
1415
1416
1417
  	 * If the currently running task will sleep within
  	 * a reasonable amount of time then attract this newly
  	 * woken task:
098fb9db2   Ingo Molnar   sched: clean up w...
1418
  	 */
2fb7635c4   Peter Zijlstra   sched: sync wakeu...
1419
1420
  	if (sync && balanced)
  		return 1;
098fb9db2   Ingo Molnar   sched: clean up w...
1421

41acab885   Lucas De Marchi   sched: Implement ...
1422
  	schedstat_inc(p, se.statistics.nr_wakeups_affine_attempts);
098fb9db2   Ingo Molnar   sched: clean up w...
1423
  	tl_per_task = cpu_avg_load_per_task(this_cpu);
c88d59108   Peter Zijlstra   sched: Merge sele...
1424
1425
1426
  	if (balanced ||
  	    (this_load <= load &&
  	     this_load + target_load(prev_cpu, idx) <= tl_per_task)) {
098fb9db2   Ingo Molnar   sched: clean up w...
1427
1428
1429
1430
1431
  		/*
  		 * This domain has SD_WAKE_AFFINE and
  		 * p is cache cold in this domain, and
  		 * there is no bad imbalance.
  		 */
c88d59108   Peter Zijlstra   sched: Merge sele...
1432
  		schedstat_inc(sd, ttwu_move_affine);
41acab885   Lucas De Marchi   sched: Implement ...
1433
  		schedstat_inc(p, se.statistics.nr_wakeups_affine);
098fb9db2   Ingo Molnar   sched: clean up w...
1434
1435
1436
1437
1438
  
  		return 1;
  	}
  	return 0;
  }
aaee1203c   Peter Zijlstra   sched: Move sched...
1439
1440
1441
1442
1443
  /*
   * find_idlest_group finds and returns the least busy CPU group within the
   * domain.
   */
  static struct sched_group *
78e7ed53c   Peter Zijlstra   sched: Tweak wake...
1444
  find_idlest_group(struct sched_domain *sd, struct task_struct *p,
5158f4e44   Peter Zijlstra   sched: Clean up t...
1445
  		  int this_cpu, int load_idx)
e7693a362   Gregory Haskins   sched: de-SCHED_O...
1446
  {
b3bd3de66   Andi Kleen   gcc-4.6: kernel/*...
1447
  	struct sched_group *idlest = NULL, *group = sd->groups;
aaee1203c   Peter Zijlstra   sched: Move sched...
1448
  	unsigned long min_load = ULONG_MAX, this_load = 0;
aaee1203c   Peter Zijlstra   sched: Move sched...
1449
  	int imbalance = 100 + (sd->imbalance_pct-100)/2;
e7693a362   Gregory Haskins   sched: de-SCHED_O...
1450

aaee1203c   Peter Zijlstra   sched: Move sched...
1451
1452
1453
1454
  	do {
  		unsigned long load, avg_load;
  		int local_group;
  		int i;
e7693a362   Gregory Haskins   sched: de-SCHED_O...
1455

aaee1203c   Peter Zijlstra   sched: Move sched...
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
  		/* Skip over this group if it has no CPUs allowed */
  		if (!cpumask_intersects(sched_group_cpus(group),
  					&p->cpus_allowed))
  			continue;
  
  		local_group = cpumask_test_cpu(this_cpu,
  					       sched_group_cpus(group));
  
  		/* Tally up the load of all CPUs in the group */
  		avg_load = 0;
  
  		for_each_cpu(i, sched_group_cpus(group)) {
  			/* Bias balancing toward cpus of our domain */
  			if (local_group)
  				load = source_load(i, load_idx);
  			else
  				load = target_load(i, load_idx);
  
  			avg_load += load;
  		}
  
  		/* Adjust by relative CPU power of the group */
9c3f75cbd   Peter Zijlstra   sched: Break out ...
1478
  		avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power;
aaee1203c   Peter Zijlstra   sched: Move sched...
1479
1480
1481
  
  		if (local_group) {
  			this_load = avg_load;
aaee1203c   Peter Zijlstra   sched: Move sched...
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
  		} else if (avg_load < min_load) {
  			min_load = avg_load;
  			idlest = group;
  		}
  	} while (group = group->next, group != sd->groups);
  
  	if (!idlest || 100*this_load < imbalance*min_load)
  		return NULL;
  	return idlest;
  }
  
  /*
   * find_idlest_cpu - find the idlest cpu among the cpus in group.
   */
  static int
  find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
  {
  	unsigned long load, min_load = ULONG_MAX;
  	int idlest = -1;
  	int i;
  
  	/* Traverse only the allowed CPUs */
  	for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
  		load = weighted_cpuload(i);
  
  		if (load < min_load || (load == min_load && i == this_cpu)) {
  			min_load = load;
  			idlest = i;
e7693a362   Gregory Haskins   sched: de-SCHED_O...
1510
1511
  		}
  	}
aaee1203c   Peter Zijlstra   sched: Move sched...
1512
1513
  	return idlest;
  }
e7693a362   Gregory Haskins   sched: de-SCHED_O...
1514

aaee1203c   Peter Zijlstra   sched: Move sched...
1515
  /*
a50bde513   Peter Zijlstra   sched: Cleanup se...
1516
1517
   * Try and locate an idle CPU in the sched_domain.
   */
99bd5e2f2   Suresh Siddha   sched: Fix select...
1518
  static int select_idle_sibling(struct task_struct *p, int target)
a50bde513   Peter Zijlstra   sched: Cleanup se...
1519
1520
1521
  {
  	int cpu = smp_processor_id();
  	int prev_cpu = task_cpu(p);
99bd5e2f2   Suresh Siddha   sched: Fix select...
1522
  	struct sched_domain *sd;
a50bde513   Peter Zijlstra   sched: Cleanup se...
1523
1524
1525
  	int i;
  
  	/*
99bd5e2f2   Suresh Siddha   sched: Fix select...
1526
1527
  	 * If the task is going to be woken-up on this cpu and if it is
  	 * already idle, then it is the right target.
a50bde513   Peter Zijlstra   sched: Cleanup se...
1528
  	 */
99bd5e2f2   Suresh Siddha   sched: Fix select...
1529
1530
1531
1532
1533
1534
1535
1536
  	if (target == cpu && idle_cpu(cpu))
  		return cpu;
  
  	/*
  	 * If the task is going to be woken-up on the cpu where it previously
  	 * ran and if it is currently idle, then it the right target.
  	 */
  	if (target == prev_cpu && idle_cpu(prev_cpu))
fe3bcfe1f   Peter Zijlstra   sched: More gener...
1537
  		return prev_cpu;
a50bde513   Peter Zijlstra   sched: Cleanup se...
1538
1539
  
  	/*
99bd5e2f2   Suresh Siddha   sched: Fix select...
1540
  	 * Otherwise, iterate the domains and find an elegible idle cpu.
a50bde513   Peter Zijlstra   sched: Cleanup se...
1541
  	 */
dce840a08   Peter Zijlstra   sched: Dynamicall...
1542
  	rcu_read_lock();
99bd5e2f2   Suresh Siddha   sched: Fix select...
1543
1544
  	for_each_domain(target, sd) {
  		if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
fe3bcfe1f   Peter Zijlstra   sched: More gener...
1545
  			break;
99bd5e2f2   Suresh Siddha   sched: Fix select...
1546
1547
1548
1549
1550
1551
  
  		for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
  			if (idle_cpu(i)) {
  				target = i;
  				break;
  			}
a50bde513   Peter Zijlstra   sched: Cleanup se...
1552
  		}
99bd5e2f2   Suresh Siddha   sched: Fix select...
1553
1554
1555
1556
1557
1558
1559
1560
  
  		/*
  		 * Lets stop looking for an idle sibling when we reached
  		 * the domain that spans the current cpu and prev_cpu.
  		 */
  		if (cpumask_test_cpu(cpu, sched_domain_span(sd)) &&
  		    cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
  			break;
a50bde513   Peter Zijlstra   sched: Cleanup se...
1561
  	}
dce840a08   Peter Zijlstra   sched: Dynamicall...
1562
  	rcu_read_unlock();
a50bde513   Peter Zijlstra   sched: Cleanup se...
1563
1564
1565
1566
1567
  
  	return target;
  }
  
  /*
aaee1203c   Peter Zijlstra   sched: Move sched...
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
   * sched_balance_self: balance the current task (running on cpu) in domains
   * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
   * SD_BALANCE_EXEC.
   *
   * Balance, ie. select the least loaded group.
   *
   * Returns the target CPU number, or the same CPU if no balancing is needed.
   *
   * preempt must be disabled.
   */
0017d7350   Peter Zijlstra   sched: Fix TASK_W...
1578
  static int
7608dec2c   Peter Zijlstra   sched: Drop the r...
1579
  select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
aaee1203c   Peter Zijlstra   sched: Move sched...
1580
  {
29cd8bae3   Peter Zijlstra   sched: Fix SD_POW...
1581
  	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
c88d59108   Peter Zijlstra   sched: Merge sele...
1582
1583
1584
  	int cpu = smp_processor_id();
  	int prev_cpu = task_cpu(p);
  	int new_cpu = cpu;
99bd5e2f2   Suresh Siddha   sched: Fix select...
1585
  	int want_affine = 0;
29cd8bae3   Peter Zijlstra   sched: Fix SD_POW...
1586
  	int want_sd = 1;
5158f4e44   Peter Zijlstra   sched: Clean up t...
1587
  	int sync = wake_flags & WF_SYNC;
c88d59108   Peter Zijlstra   sched: Merge sele...
1588

0763a660a   Peter Zijlstra   sched: Rename sel...
1589
  	if (sd_flag & SD_BALANCE_WAKE) {
beac4c7e4   Mike Galbraith   sched: Remove AFF...
1590
  		if (cpumask_test_cpu(cpu, &p->cpus_allowed))
c88d59108   Peter Zijlstra   sched: Merge sele...
1591
1592
1593
  			want_affine = 1;
  		new_cpu = prev_cpu;
  	}
aaee1203c   Peter Zijlstra   sched: Move sched...
1594

dce840a08   Peter Zijlstra   sched: Dynamicall...
1595
  	rcu_read_lock();
aaee1203c   Peter Zijlstra   sched: Move sched...
1596
  	for_each_domain(cpu, tmp) {
e4f428884   Peter Zijlstra   sched: Select_tas...
1597
1598
  		if (!(tmp->flags & SD_LOAD_BALANCE))
  			continue;
aaee1203c   Peter Zijlstra   sched: Move sched...
1599
  		/*
ae154be1f   Peter Zijlstra   sched: Weaken SD_...
1600
1601
  		 * If power savings logic is enabled for a domain, see if we
  		 * are not overloaded, if so, don't balance wider.
aaee1203c   Peter Zijlstra   sched: Move sched...
1602
  		 */
59abf0264   Peter Zijlstra   sched: Add SD_PRE...
1603
  		if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) {
ae154be1f   Peter Zijlstra   sched: Weaken SD_...
1604
1605
1606
1607
1608
1609
1610
1611
1612
  			unsigned long power = 0;
  			unsigned long nr_running = 0;
  			unsigned long capacity;
  			int i;
  
  			for_each_cpu(i, sched_domain_span(tmp)) {
  				power += power_of(i);
  				nr_running += cpu_rq(i)->cfs.nr_running;
  			}
1399fa780   Nikhil Rao   sched: Introduce ...
1613
  			capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE);
ae154be1f   Peter Zijlstra   sched: Weaken SD_...
1614

59abf0264   Peter Zijlstra   sched: Add SD_PRE...
1615
1616
1617
1618
  			if (tmp->flags & SD_POWERSAVINGS_BALANCE)
  				nr_running /= 2;
  
  			if (nr_running < capacity)
29cd8bae3   Peter Zijlstra   sched: Fix SD_POW...
1619
  				want_sd = 0;
ae154be1f   Peter Zijlstra   sched: Weaken SD_...
1620
  		}
aaee1203c   Peter Zijlstra   sched: Move sched...
1621

fe3bcfe1f   Peter Zijlstra   sched: More gener...
1622
  		/*
99bd5e2f2   Suresh Siddha   sched: Fix select...
1623
1624
  		 * If both cpu and prev_cpu are part of this domain,
  		 * cpu is a valid SD_WAKE_AFFINE target.
fe3bcfe1f   Peter Zijlstra   sched: More gener...
1625
  		 */
99bd5e2f2   Suresh Siddha   sched: Fix select...
1626
1627
1628
1629
  		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
  		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
  			affine_sd = tmp;
  			want_affine = 0;
c88d59108   Peter Zijlstra   sched: Merge sele...
1630
  		}
29cd8bae3   Peter Zijlstra   sched: Fix SD_POW...
1631
1632
  		if (!want_sd && !want_affine)
  			break;
0763a660a   Peter Zijlstra   sched: Rename sel...
1633
  		if (!(tmp->flags & sd_flag))
c88d59108   Peter Zijlstra   sched: Merge sele...
1634
  			continue;
29cd8bae3   Peter Zijlstra   sched: Fix SD_POW...
1635
1636
1637
  		if (want_sd)
  			sd = tmp;
  	}
8b911acdf   Mike Galbraith   sched: Fix select...
1638
  	if (affine_sd) {
99bd5e2f2   Suresh Siddha   sched: Fix select...
1639
  		if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
dce840a08   Peter Zijlstra   sched: Dynamicall...
1640
1641
1642
1643
  			prev_cpu = cpu;
  
  		new_cpu = select_idle_sibling(p, prev_cpu);
  		goto unlock;
8b911acdf   Mike Galbraith   sched: Fix select...
1644
  	}
e7693a362   Gregory Haskins   sched: de-SCHED_O...
1645

aaee1203c   Peter Zijlstra   sched: Move sched...
1646
  	while (sd) {
5158f4e44   Peter Zijlstra   sched: Clean up t...
1647
  		int load_idx = sd->forkexec_idx;
aaee1203c   Peter Zijlstra   sched: Move sched...
1648
  		struct sched_group *group;
c88d59108   Peter Zijlstra   sched: Merge sele...
1649
  		int weight;
098fb9db2   Ingo Molnar   sched: clean up w...
1650

0763a660a   Peter Zijlstra   sched: Rename sel...
1651
  		if (!(sd->flags & sd_flag)) {
aaee1203c   Peter Zijlstra   sched: Move sched...
1652
1653
1654
  			sd = sd->child;
  			continue;
  		}
098fb9db2   Ingo Molnar   sched: clean up w...
1655

5158f4e44   Peter Zijlstra   sched: Clean up t...
1656
1657
  		if (sd_flag & SD_BALANCE_WAKE)
  			load_idx = sd->wake_idx;
098fb9db2   Ingo Molnar   sched: clean up w...
1658

5158f4e44   Peter Zijlstra   sched: Clean up t...
1659
  		group = find_idlest_group(sd, p, cpu, load_idx);
aaee1203c   Peter Zijlstra   sched: Move sched...
1660
1661
1662
1663
  		if (!group) {
  			sd = sd->child;
  			continue;
  		}
4ae7d5cef   Ingo Molnar   sched: improve af...
1664

d7c33c493   Peter Zijlstra   sched: Fix task a...
1665
  		new_cpu = find_idlest_cpu(group, p, cpu);
aaee1203c   Peter Zijlstra   sched: Move sched...
1666
1667
1668
1669
  		if (new_cpu == -1 || new_cpu == cpu) {
  			/* Now try balancing at a lower domain level of cpu */
  			sd = sd->child;
  			continue;
e7693a362   Gregory Haskins   sched: de-SCHED_O...
1670
  		}
aaee1203c   Peter Zijlstra   sched: Move sched...
1671
1672
1673
  
  		/* Now try balancing at a lower domain level of new_cpu */
  		cpu = new_cpu;
669c55e9f   Peter Zijlstra   sched: Pre-comput...
1674
  		weight = sd->span_weight;
aaee1203c   Peter Zijlstra   sched: Move sched...
1675
1676
  		sd = NULL;
  		for_each_domain(cpu, tmp) {
669c55e9f   Peter Zijlstra   sched: Pre-comput...
1677
  			if (weight <= tmp->span_weight)
aaee1203c   Peter Zijlstra   sched: Move sched...
1678
  				break;
0763a660a   Peter Zijlstra   sched: Rename sel...
1679
  			if (tmp->flags & sd_flag)
aaee1203c   Peter Zijlstra   sched: Move sched...
1680
1681
1682
  				sd = tmp;
  		}
  		/* while loop will break here if sd == NULL */
e7693a362   Gregory Haskins   sched: de-SCHED_O...
1683
  	}
dce840a08   Peter Zijlstra   sched: Dynamicall...
1684
1685
  unlock:
  	rcu_read_unlock();
e7693a362   Gregory Haskins   sched: de-SCHED_O...
1686

c88d59108   Peter Zijlstra   sched: Merge sele...
1687
  	return new_cpu;
e7693a362   Gregory Haskins   sched: de-SCHED_O...
1688
1689
  }
  #endif /* CONFIG_SMP */
e52fb7c09   Peter Zijlstra   sched: prefer wakers
1690
1691
  static unsigned long
  wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
0bbd3336e   Peter Zijlstra   sched: fix wakeup...
1692
1693
1694
1695
  {
  	unsigned long gran = sysctl_sched_wakeup_granularity;
  
  	/*
e52fb7c09   Peter Zijlstra   sched: prefer wakers
1696
1697
  	 * Since its curr running now, convert the gran from real-time
  	 * to virtual-time in his units.
13814d42e   Mike Galbraith   sched: Remove ASY...
1698
1699
1700
1701
1702
1703
1704
1705
1706
  	 *
  	 * By using 'se' instead of 'curr' we penalize light tasks, so
  	 * they get preempted easier. That is, if 'se' < 'curr' then
  	 * the resulting gran will be larger, therefore penalizing the
  	 * lighter, if otoh 'se' > 'curr' then the resulting gran will
  	 * be smaller, again penalizing the lighter task.
  	 *
  	 * This is especially important for buddies when the leftmost
  	 * task is higher priority than the buddy.
0bbd3336e   Peter Zijlstra   sched: fix wakeup...
1707
  	 */
f4ad9bd20   Shaohua Li   sched: Eliminate ...
1708
  	return calc_delta_fair(gran, se);
0bbd3336e   Peter Zijlstra   sched: fix wakeup...
1709
1710
1711
  }
  
  /*
464b75273   Peter Zijlstra   sched: re-instate...
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
   * Should 'se' preempt 'curr'.
   *
   *             |s1
   *        |s2
   *   |s3
   *         g
   *      |<--->|c
   *
   *  w(c, s1) = -1
   *  w(c, s2) =  0
   *  w(c, s3) =  1
   *
   */
  static int
  wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
  {
  	s64 gran, vdiff = curr->vruntime - se->vruntime;
  
  	if (vdiff <= 0)
  		return -1;
e52fb7c09   Peter Zijlstra   sched: prefer wakers
1732
  	gran = wakeup_gran(curr, se);
464b75273   Peter Zijlstra   sched: re-instate...
1733
1734
1735
1736
1737
  	if (vdiff > gran)
  		return 1;
  
  	return 0;
  }
02479099c   Peter Zijlstra   sched: fix buddie...
1738
1739
  static void set_last_buddy(struct sched_entity *se)
  {
69c80f3e9   Venkatesh Pallipadi   sched: Make set_*...
1740
1741
1742
1743
1744
  	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
  		return;
  
  	for_each_sched_entity(se)
  		cfs_rq_of(se)->last = se;
02479099c   Peter Zijlstra   sched: fix buddie...
1745
1746
1747
1748
  }
  
  static void set_next_buddy(struct sched_entity *se)
  {
69c80f3e9   Venkatesh Pallipadi   sched: Make set_*...
1749
1750
1751
1752
1753
  	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
  		return;
  
  	for_each_sched_entity(se)
  		cfs_rq_of(se)->next = se;
02479099c   Peter Zijlstra   sched: fix buddie...
1754
  }
ac53db596   Rik van Riel   sched: Use a budd...
1755
1756
  static void set_skip_buddy(struct sched_entity *se)
  {
69c80f3e9   Venkatesh Pallipadi   sched: Make set_*...
1757
1758
  	for_each_sched_entity(se)
  		cfs_rq_of(se)->skip = se;
ac53db596   Rik van Riel   sched: Use a budd...
1759
  }
464b75273   Peter Zijlstra   sched: re-instate...
1760
  /*
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1761
1762
   * Preempt the current task with a newly woken task if needed:
   */
5a9b86f64   Peter Zijlstra   sched: Rename fla...
1763
  static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1764
1765
  {
  	struct task_struct *curr = rq->curr;
8651a86c3   Srivatsa Vaddagiri   sched: group sche...
1766
  	struct sched_entity *se = &curr->se, *pse = &p->se;
03e89e457   Mike Galbraith   sched: fix wakeup...
1767
  	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
f685ceaca   Mike Galbraith   sched: Strengthen...
1768
  	int scale = cfs_rq->nr_running >= sched_nr_latency;
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
1769
  	int next_buddy_marked = 0;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1770

4ae7d5cef   Ingo Molnar   sched: improve af...
1771
1772
  	if (unlikely(se == pse))
  		return;
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
1773
  	if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) {
3cb63d527   Mike Galbraith   sched: Complete b...
1774
  		set_next_buddy(pse);
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
1775
1776
  		next_buddy_marked = 1;
  	}
57fdc26d4   Peter Zijlstra   sched: fixup budd...
1777

aec0a5142   Bharata B Rao   sched: call resch...
1778
1779
1780
1781
1782
1783
  	/*
  	 * We can come here with TIF_NEED_RESCHED already set from new task
  	 * wake up path.
  	 */
  	if (test_tsk_need_resched(curr))
  		return;
a2f5c9ab7   Darren Hart   sched: Allow SCHE...
1784
1785
1786
1787
  	/* Idle tasks are by definition preempted by non-idle tasks. */
  	if (unlikely(curr->policy == SCHED_IDLE) &&
  	    likely(p->policy != SCHED_IDLE))
  		goto preempt;
91c234b4e   Ingo Molnar   sched: do not wak...
1788
  	/*
a2f5c9ab7   Darren Hart   sched: Allow SCHE...
1789
1790
  	 * Batch and idle tasks do not preempt non-idle tasks (their preemption
  	 * is driven by the tick):
91c234b4e   Ingo Molnar   sched: do not wak...
1791
  	 */
6bc912b71   Peter Zijlstra   sched: SCHED_OTHE...
1792
  	if (unlikely(p->policy != SCHED_NORMAL))
91c234b4e   Ingo Molnar   sched: do not wak...
1793
  		return;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1794

bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1795

ad4b78bbc   Peter Zijlstra   sched: Add new wa...
1796
1797
  	if (!sched_feat(WAKEUP_PREEMPT))
  		return;
464b75273   Peter Zijlstra   sched: re-instate...
1798
  	find_matching_se(&se, &pse);
9bbd73743   Paul Turner   sched: update cor...
1799
  	update_curr(cfs_rq_of(se));
002f128b4   Paul Turner   sched: remove red...
1800
  	BUG_ON(!pse);
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
1801
1802
1803
1804
1805
1806
1807
  	if (wakeup_preempt_entity(se, pse) == 1) {
  		/*
  		 * Bias pick_next to pick the sched entity that is
  		 * triggering this preemption.
  		 */
  		if (!next_buddy_marked)
  			set_next_buddy(pse);
3a7e73a2e   Peter Zijlstra   sched: Clean up c...
1808
  		goto preempt;
2f36825b1   Venkatesh Pallipadi   sched: Next buddy...
1809
  	}
464b75273   Peter Zijlstra   sched: re-instate...
1810

3a7e73a2e   Peter Zijlstra   sched: Clean up c...
1811
  	return;
a65ac745e   Jupyung Lee   sched: Move updat...
1812

3a7e73a2e   Peter Zijlstra   sched: Clean up c...
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
  preempt:
  	resched_task(curr);
  	/*
  	 * Only set the backward buddy when the current task is still
  	 * on the rq. This can happen when a wakeup gets interleaved
  	 * with schedule on the ->pre_schedule() or idle_balance()
  	 * point, either of which can * drop the rq lock.
  	 *
  	 * Also, during early boot the idle thread is in the fair class,
  	 * for obvious reasons its a bad idea to schedule back to it.
  	 */
  	if (unlikely(!se->on_rq || curr == rq->idle))
  		return;
  
  	if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
  		set_last_buddy(se);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1829
  }
fb8d47240   Ingo Molnar   sched: remove the...
1830
  static struct task_struct *pick_next_task_fair(struct rq *rq)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1831
  {
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1832
  	struct task_struct *p;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1833
1834
  	struct cfs_rq *cfs_rq = &rq->cfs;
  	struct sched_entity *se;
36ace27e3   Tim Blechmann   sched: Optimize b...
1835
  	if (!cfs_rq->nr_running)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1836
1837
1838
  		return NULL;
  
  	do {
9948f4b2a   Ingo Molnar   sched: remove the...
1839
  		se = pick_next_entity(cfs_rq);
f4b6755fb   Peter Zijlstra   sched: cleanup fa...
1840
  		set_next_entity(cfs_rq, se);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1841
1842
  		cfs_rq = group_cfs_rq(se);
  	} while (cfs_rq);
8f4d37ec0   Peter Zijlstra   sched: high-res p...
1843
1844
1845
1846
  	p = task_of(se);
  	hrtick_start_fair(rq, p);
  
  	return p;
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1847
1848
1849
1850
1851
  }
  
  /*
   * Account for a descheduled task:
   */
31ee529cc   Ingo Molnar   sched: remove the...
1852
  static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1853
1854
1855
1856
1857
1858
  {
  	struct sched_entity *se = &prev->se;
  	struct cfs_rq *cfs_rq;
  
  	for_each_sched_entity(se) {
  		cfs_rq = cfs_rq_of(se);
ab6cde269   Ingo Molnar   sched: remove the...
1859
  		put_prev_entity(cfs_rq, se);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1860
1861
  	}
  }
ac53db596   Rik van Riel   sched: Use a budd...
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
  /*
   * sched_yield() is very simple
   *
   * The magic of dealing with the ->skip buddy is in pick_next_entity.
   */
  static void yield_task_fair(struct rq *rq)
  {
  	struct task_struct *curr = rq->curr;
  	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
  	struct sched_entity *se = &curr->se;
  
  	/*
  	 * Are we the only task in the tree?
  	 */
  	if (unlikely(rq->nr_running == 1))
  		return;
  
  	clear_buddies(cfs_rq, se);
  
  	if (curr->policy != SCHED_BATCH) {
  		update_rq_clock(rq);
  		/*
  		 * Update run-time statistics of the 'current'.
  		 */
  		update_curr(cfs_rq);
  	}
  
  	set_skip_buddy(se);
  }
d95f41220   Mike Galbraith   sched: Add yield_...
1891
1892
1893
1894
1895
1896
1897
1898
1899
  static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preempt)
  {
  	struct sched_entity *se = &p->se;
  
  	if (!se->on_rq)
  		return false;
  
  	/* Tell the scheduler that we'd really like pse to run next. */
  	set_next_buddy(se);
d95f41220   Mike Galbraith   sched: Add yield_...
1900
1901
1902
1903
  	yield_task_fair(rq);
  
  	return true;
  }
681f3e685   Peter Williams   sched: isolate SM...
1904
  #ifdef CONFIG_SMP
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
1905
1906
1907
  /**************************************************
   * Fair scheduling class load-balancing methods:
   */
1e3c88bde   Peter Zijlstra   sched: Move load ...
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
  /*
   * pull_task - move a task from a remote runqueue to the local runqueue.
   * Both runqueues must be locked.
   */
  static void pull_task(struct rq *src_rq, struct task_struct *p,
  		      struct rq *this_rq, int this_cpu)
  {
  	deactivate_task(src_rq, p, 0);
  	set_task_cpu(p, this_cpu);
  	activate_task(this_rq, p, 0);
  	check_preempt_curr(this_rq, p, 0);
  }
  
  /*
   * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
   */
  static
  int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
  		     struct sched_domain *sd, enum cpu_idle_type idle,
  		     int *all_pinned)
  {
  	int tsk_cache_hot = 0;
  	/*
  	 * We do not migrate tasks that are:
  	 * 1) running (obviously), or
  	 * 2) cannot be migrated to this CPU due to cpus_allowed, or
  	 * 3) are cache-hot on their current CPU.
  	 */
  	if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
41acab885   Lucas De Marchi   sched: Implement ...
1937
  		schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
1e3c88bde   Peter Zijlstra   sched: Move load ...
1938
1939
1940
1941
1942
  		return 0;
  	}
  	*all_pinned = 0;
  
  	if (task_running(rq, p)) {
41acab885   Lucas De Marchi   sched: Implement ...
1943
  		schedstat_inc(p, se.statistics.nr_failed_migrations_running);
1e3c88bde   Peter Zijlstra   sched: Move load ...
1944
1945
1946
1947
1948
1949
1950
1951
  		return 0;
  	}
  
  	/*
  	 * Aggressive migration if:
  	 * 1) task is cache cold, or
  	 * 2) too many balance attempts have failed.
  	 */
305e6835e   Venkatesh Pallipadi   sched: Do not acc...
1952
  	tsk_cache_hot = task_hot(p, rq->clock_task, sd);
1e3c88bde   Peter Zijlstra   sched: Move load ...
1953
1954
1955
1956
1957
  	if (!tsk_cache_hot ||
  		sd->nr_balance_failed > sd->cache_nice_tries) {
  #ifdef CONFIG_SCHEDSTATS
  		if (tsk_cache_hot) {
  			schedstat_inc(sd, lb_hot_gained[idle]);
41acab885   Lucas De Marchi   sched: Implement ...
1958
  			schedstat_inc(p, se.statistics.nr_forced_migrations);
1e3c88bde   Peter Zijlstra   sched: Move load ...
1959
1960
1961
1962
1963
1964
  		}
  #endif
  		return 1;
  	}
  
  	if (tsk_cache_hot) {
41acab885   Lucas De Marchi   sched: Implement ...
1965
  		schedstat_inc(p, se.statistics.nr_failed_migrations_hot);
1e3c88bde   Peter Zijlstra   sched: Move load ...
1966
1967
1968
1969
  		return 0;
  	}
  	return 1;
  }
897c395f4   Peter Zijlstra   sched: Remove rq_...
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
  /*
   * move_one_task tries to move exactly one task from busiest to this_rq, as
   * part of active balancing operations within "domain".
   * Returns 1 if successful and 0 otherwise.
   *
   * Called with both runqueues locked.
   */
  static int
  move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
  	      struct sched_domain *sd, enum cpu_idle_type idle)
  {
  	struct task_struct *p, *n;
  	struct cfs_rq *cfs_rq;
  	int pinned = 0;
  
  	for_each_leaf_cfs_rq(busiest, cfs_rq) {
  		list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) {
  
  			if (!can_migrate_task(p, busiest, this_cpu,
  						sd, idle, &pinned))
  				continue;
  
  			pull_task(busiest, p, this_rq, this_cpu);
  			/*
  			 * Right now, this is only the second place pull_task()
  			 * is called, so we can safely collect pull_task()
  			 * stats here rather than inside pull_task().
  			 */
  			schedstat_inc(sd, lb_gained[idle]);
  			return 1;
  		}
  	}
  
  	return 0;
  }
1e3c88bde   Peter Zijlstra   sched: Move load ...
2005
2006
2007
2008
  static unsigned long
  balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
  	      unsigned long max_load_move, struct sched_domain *sd,
  	      enum cpu_idle_type idle, int *all_pinned,
931aeeda0   Vladimir Davydov   sched: Remove unu...
2009
  	      struct cfs_rq *busiest_cfs_rq)
1e3c88bde   Peter Zijlstra   sched: Move load ...
2010
  {
b30aef17f   Ken Chen   sched: Fix errone...
2011
  	int loops = 0, pulled = 0;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2012
  	long rem_load_move = max_load_move;
ee00e66ff   Peter Zijlstra   sched: Remove rq_...
2013
  	struct task_struct *p, *n;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2014
2015
2016
  
  	if (max_load_move == 0)
  		goto out;
ee00e66ff   Peter Zijlstra   sched: Remove rq_...
2017
2018
2019
  	list_for_each_entry_safe(p, n, &busiest_cfs_rq->tasks, se.group_node) {
  		if (loops++ > sysctl_sched_nr_migrate)
  			break;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2020

ee00e66ff   Peter Zijlstra   sched: Remove rq_...
2021
  		if ((p->se.load.weight >> 1) > rem_load_move ||
b30aef17f   Ken Chen   sched: Fix errone...
2022
2023
  		    !can_migrate_task(p, busiest, this_cpu, sd, idle,
  				      all_pinned))
ee00e66ff   Peter Zijlstra   sched: Remove rq_...
2024
  			continue;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2025

ee00e66ff   Peter Zijlstra   sched: Remove rq_...
2026
2027
2028
  		pull_task(busiest, p, this_rq, this_cpu);
  		pulled++;
  		rem_load_move -= p->se.load.weight;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2029
2030
  
  #ifdef CONFIG_PREEMPT
ee00e66ff   Peter Zijlstra   sched: Remove rq_...
2031
2032
2033
2034
2035
2036
2037
  		/*
  		 * NEWIDLE balancing is a source of latency, so preemptible
  		 * kernels will stop after the first task is pulled to minimize
  		 * the critical section.
  		 */
  		if (idle == CPU_NEWLY_IDLE)
  			break;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2038
  #endif
ee00e66ff   Peter Zijlstra   sched: Remove rq_...
2039
2040
2041
2042
2043
2044
  		/*
  		 * We only want to steal up to the prescribed amount of
  		 * weighted load.
  		 */
  		if (rem_load_move <= 0)
  			break;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2045
2046
2047
2048
2049
2050
2051
2052
  	}
  out:
  	/*
  	 * Right now, this is one of only two places pull_task() is called,
  	 * so we can safely collect pull_task() stats here rather than
  	 * inside pull_task().
  	 */
  	schedstat_add(sd, lb_gained[idle], pulled);
1e3c88bde   Peter Zijlstra   sched: Move load ...
2053
2054
  	return max_load_move - rem_load_move;
  }
230059de7   Peter Zijlstra   sched: Remove fro...
2055
  #ifdef CONFIG_FAIR_GROUP_SCHED
9e3081ca6   Peter Zijlstra   sched: Make tg_sh...
2056
2057
2058
  /*
   * update tg->load_weight by folding this cpu's load_avg
   */
67e86250f   Paul Turner   sched: Introduce ...
2059
  static int update_shares_cpu(struct task_group *tg, int cpu)
9e3081ca6   Peter Zijlstra   sched: Make tg_sh...
2060
2061
2062
2063
  {
  	struct cfs_rq *cfs_rq;
  	unsigned long flags;
  	struct rq *rq;
9e3081ca6   Peter Zijlstra   sched: Make tg_sh...
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
  
  	if (!tg->se[cpu])
  		return 0;
  
  	rq = cpu_rq(cpu);
  	cfs_rq = tg->cfs_rq[cpu];
  
  	raw_spin_lock_irqsave(&rq->lock, flags);
  
  	update_rq_clock(rq);
d6b559182   Paul Turner   sched: Allow upda...
2074
  	update_cfs_load(cfs_rq, 1);
9e3081ca6   Peter Zijlstra   sched: Make tg_sh...
2075
2076
2077
2078
2079
  
  	/*
  	 * We need to update shares after updating tg->load_weight in
  	 * order to adjust the weight of groups with long running tasks.
  	 */
6d5ab2932   Paul Turner   sched: Simplify u...
2080
  	update_cfs_shares(cfs_rq);
9e3081ca6   Peter Zijlstra   sched: Make tg_sh...
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
  
  	raw_spin_unlock_irqrestore(&rq->lock, flags);
  
  	return 0;
  }
  
  static void update_shares(int cpu)
  {
  	struct cfs_rq *cfs_rq;
  	struct rq *rq = cpu_rq(cpu);
  
  	rcu_read_lock();
9763b67fb   Peter Zijlstra   sched, cgroup: Op...
2093
2094
2095
2096
  	/*
  	 * Iterates the task_group tree in a bottom up fashion, see
  	 * list_add_leaf_cfs_rq() for details.
  	 */
67e86250f   Paul Turner   sched: Introduce ...
2097
2098
  	for_each_leaf_cfs_rq(rq, cfs_rq)
  		update_shares_cpu(cfs_rq->tg, cpu);
9e3081ca6   Peter Zijlstra   sched: Make tg_sh...
2099
2100
  	rcu_read_unlock();
  }
9763b67fb   Peter Zijlstra   sched, cgroup: Op...
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
  /*
   * Compute the cpu's hierarchical load factor for each task group.
   * This needs to be done in a top-down fashion because the load of a child
   * group is a fraction of its parents load.
   */
  static int tg_load_down(struct task_group *tg, void *data)
  {
  	unsigned long load;
  	long cpu = (long)data;
  
  	if (!tg->parent) {
  		load = cpu_rq(cpu)->load.weight;
  	} else {
  		load = tg->parent->cfs_rq[cpu]->h_load;
  		load *= tg->se[cpu]->load.weight;
  		load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
  	}
  
  	tg->cfs_rq[cpu]->h_load = load;
  
  	return 0;
  }
  
  static void update_h_load(long cpu)
  {
  	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
  }
230059de7   Peter Zijlstra   sched: Remove fro...
2128
2129
2130
2131
  static unsigned long
  load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
  		  unsigned long max_load_move,
  		  struct sched_domain *sd, enum cpu_idle_type idle,
931aeeda0   Vladimir Davydov   sched: Remove unu...
2132
  		  int *all_pinned)
230059de7   Peter Zijlstra   sched: Remove fro...
2133
2134
  {
  	long rem_load_move = max_load_move;
9763b67fb   Peter Zijlstra   sched, cgroup: Op...
2135
  	struct cfs_rq *busiest_cfs_rq;
230059de7   Peter Zijlstra   sched: Remove fro...
2136
2137
  
  	rcu_read_lock();
9763b67fb   Peter Zijlstra   sched, cgroup: Op...
2138
  	update_h_load(cpu_of(busiest));
230059de7   Peter Zijlstra   sched: Remove fro...
2139

9763b67fb   Peter Zijlstra   sched, cgroup: Op...
2140
  	for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) {
230059de7   Peter Zijlstra   sched: Remove fro...
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
  		unsigned long busiest_h_load = busiest_cfs_rq->h_load;
  		unsigned long busiest_weight = busiest_cfs_rq->load.weight;
  		u64 rem_load, moved_load;
  
  		/*
  		 * empty group
  		 */
  		if (!busiest_cfs_rq->task_weight)
  			continue;
  
  		rem_load = (u64)rem_load_move * busiest_weight;
  		rem_load = div_u64(rem_load, busiest_h_load + 1);
  
  		moved_load = balance_tasks(this_rq, this_cpu, busiest,
931aeeda0   Vladimir Davydov   sched: Remove unu...
2155
  				rem_load, sd, idle, all_pinned,
230059de7   Peter Zijlstra   sched: Remove fro...
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
  				busiest_cfs_rq);
  
  		if (!moved_load)
  			continue;
  
  		moved_load *= busiest_h_load;
  		moved_load = div_u64(moved_load, busiest_weight + 1);
  
  		rem_load_move -= moved_load;
  		if (rem_load_move < 0)
  			break;
  	}
  	rcu_read_unlock();
  
  	return max_load_move - rem_load_move;
  }
  #else
9e3081ca6   Peter Zijlstra   sched: Make tg_sh...
2173
2174
2175
  static inline void update_shares(int cpu)
  {
  }
230059de7   Peter Zijlstra   sched: Remove fro...
2176
2177
2178
2179
  static unsigned long
  load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
  		  unsigned long max_load_move,
  		  struct sched_domain *sd, enum cpu_idle_type idle,
931aeeda0   Vladimir Davydov   sched: Remove unu...
2180
  		  int *all_pinned)
230059de7   Peter Zijlstra   sched: Remove fro...
2181
2182
2183
  {
  	return balance_tasks(this_rq, this_cpu, busiest,
  			max_load_move, sd, idle, all_pinned,
931aeeda0   Vladimir Davydov   sched: Remove unu...
2184
  			&busiest->cfs);
230059de7   Peter Zijlstra   sched: Remove fro...
2185
2186
  }
  #endif
1e3c88bde   Peter Zijlstra   sched: Move load ...
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
  /*
   * move_tasks tries to move up to max_load_move weighted load from busiest to
   * this_rq, as part of a balancing operation within domain "sd".
   * Returns 1 if successful and 0 otherwise.
   *
   * Called with both runqueues locked.
   */
  static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
  		      unsigned long max_load_move,
  		      struct sched_domain *sd, enum cpu_idle_type idle,
  		      int *all_pinned)
  {
3d45fd804   Peter Zijlstra   sched: Remove the...
2199
  	unsigned long total_load_moved = 0, load_moved;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2200
2201
  
  	do {
3d45fd804   Peter Zijlstra   sched: Remove the...
2202
  		load_moved = load_balance_fair(this_rq, this_cpu, busiest,
1e3c88bde   Peter Zijlstra   sched: Move load ...
2203
  				max_load_move - total_load_moved,
931aeeda0   Vladimir Davydov   sched: Remove unu...
2204
  				sd, idle, all_pinned);
3d45fd804   Peter Zijlstra   sched: Remove the...
2205
2206
  
  		total_load_moved += load_moved;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2207
2208
2209
2210
2211
2212
2213
2214
2215
  
  #ifdef CONFIG_PREEMPT
  		/*
  		 * NEWIDLE balancing is a source of latency, so preemptible
  		 * kernels will stop after the first task is pulled to minimize
  		 * the critical section.
  		 */
  		if (idle == CPU_NEWLY_IDLE && this_rq->nr_running)
  			break;
baa8c1102   Peter Zijlstra   sched: Add a lock...
2216
2217
2218
2219
  
  		if (raw_spin_is_contended(&this_rq->lock) ||
  				raw_spin_is_contended(&busiest->lock))
  			break;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2220
  #endif
3d45fd804   Peter Zijlstra   sched: Remove the...
2221
  	} while (load_moved && max_load_move > total_load_moved);
1e3c88bde   Peter Zijlstra   sched: Move load ...
2222
2223
2224
  
  	return total_load_moved > 0;
  }
1e3c88bde   Peter Zijlstra   sched: Move load ...
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
  /********** Helpers for find_busiest_group ************************/
  /*
   * sd_lb_stats - Structure to store the statistics of a sched_domain
   * 		during load balancing.
   */
  struct sd_lb_stats {
  	struct sched_group *busiest; /* Busiest group in this sd */
  	struct sched_group *this;  /* Local group in this sd */
  	unsigned long total_load;  /* Total load of all groups in sd */
  	unsigned long total_pwr;   /*	Total power of all groups in sd */
  	unsigned long avg_load;	   /* Average load across all groups in sd */
  
  	/** Statistics of this group */
  	unsigned long this_load;
  	unsigned long this_load_per_task;
  	unsigned long this_nr_running;
fab476228   Nikhil Rao   sched: Force bala...
2241
  	unsigned long this_has_capacity;
aae6d3ddd   Suresh Siddha   sched: Use group ...
2242
  	unsigned int  this_idle_cpus;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2243
2244
  
  	/* Statistics of the busiest group */
aae6d3ddd   Suresh Siddha   sched: Use group ...
2245
  	unsigned int  busiest_idle_cpus;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2246
2247
2248
  	unsigned long max_load;
  	unsigned long busiest_load_per_task;
  	unsigned long busiest_nr_running;
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
2249
  	unsigned long busiest_group_capacity;
fab476228   Nikhil Rao   sched: Force bala...
2250
  	unsigned long busiest_has_capacity;
aae6d3ddd   Suresh Siddha   sched: Use group ...
2251
  	unsigned int  busiest_group_weight;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
  
  	int group_imb; /* Is there imbalance in this sd */
  #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
  	int power_savings_balance; /* Is powersave balance needed for this sd */
  	struct sched_group *group_min; /* Least loaded group in sd */
  	struct sched_group *group_leader; /* Group which relieves group_min */
  	unsigned long min_load_per_task; /* load_per_task in group_min */
  	unsigned long leader_nr_running; /* Nr running of group_leader */
  	unsigned long min_nr_running; /* Nr running of group_min */
  #endif
  };
  
  /*
   * sg_lb_stats - stats of a sched_group required for load_balancing
   */
  struct sg_lb_stats {
  	unsigned long avg_load; /*Avg load across the CPUs of the group */
  	unsigned long group_load; /* Total load over the CPUs of the group */
  	unsigned long sum_nr_running; /* Nr tasks running in the group */
  	unsigned long sum_weighted_load; /* Weighted load of group's tasks */
  	unsigned long group_capacity;
aae6d3ddd   Suresh Siddha   sched: Use group ...
2273
2274
  	unsigned long idle_cpus;
  	unsigned long group_weight;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2275
  	int group_imb; /* Is there an imbalance in the group ? */
fab476228   Nikhil Rao   sched: Force bala...
2276
  	int group_has_capacity; /* Is there extra capacity in the group? */
1e3c88bde   Peter Zijlstra   sched: Move load ...
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
  };
  
  /**
   * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
   * @group: The group whose first cpu is to be returned.
   */
  static inline unsigned int group_first_cpu(struct sched_group *group)
  {
  	return cpumask_first(sched_group_cpus(group));
  }
  
  /**
   * get_sd_load_idx - Obtain the load index for a given sched domain.
   * @sd: The sched_domain whose load_idx is to be obtained.
   * @idle: The Idle status of the CPU for whose sd load_icx is obtained.
   */
  static inline int get_sd_load_idx(struct sched_domain *sd,
  					enum cpu_idle_type idle)
  {
  	int load_idx;
  
  	switch (idle) {
  	case CPU_NOT_IDLE:
  		load_idx = sd->busy_idx;
  		break;
  
  	case CPU_NEWLY_IDLE:
  		load_idx = sd->newidle_idx;
  		break;
  	default:
  		load_idx = sd->idle_idx;
  		break;
  	}
  
  	return load_idx;
  }
  
  
  #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
  /**
   * init_sd_power_savings_stats - Initialize power savings statistics for
   * the given sched_domain, during load balancing.
   *
   * @sd: Sched domain whose power-savings statistics are to be initialized.
   * @sds: Variable containing the statistics for sd.
   * @idle: Idle status of the CPU at which we're performing load-balancing.
   */
  static inline void init_sd_power_savings_stats(struct sched_domain *sd,
  	struct sd_lb_stats *sds, enum cpu_idle_type idle)
  {
  	/*
  	 * Busy processors will not participate in power savings
  	 * balance.
  	 */
  	if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
  		sds->power_savings_balance = 0;
  	else {
  		sds->power_savings_balance = 1;
  		sds->min_nr_running = ULONG_MAX;
  		sds->leader_nr_running = 0;
  	}
  }
  
  /**
   * update_sd_power_savings_stats - Update the power saving stats for a
   * sched_domain while performing load balancing.
   *
   * @group: sched_group belonging to the sched_domain under consideration.
   * @sds: Variable containing the statistics of the sched_domain
   * @local_group: Does group contain the CPU for which we're performing
   * 		load balancing ?
   * @sgs: Variable containing the statistics of the group.
   */
  static inline void update_sd_power_savings_stats(struct sched_group *group,
  	struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
  {
  
  	if (!sds->power_savings_balance)
  		return;
  
  	/*
  	 * If the local group is idle or completely loaded
  	 * no need to do power savings balance at this domain
  	 */
  	if (local_group && (sds->this_nr_running >= sgs->group_capacity ||
  				!sds->this_nr_running))
  		sds->power_savings_balance = 0;
  
  	/*
  	 * If a group is already running at full capacity or idle,
  	 * don't include that group in power savings calculations
  	 */
  	if (!sds->power_savings_balance ||
  		sgs->sum_nr_running >= sgs->group_capacity ||
  		!sgs->sum_nr_running)
  		return;
  
  	/*
  	 * Calculate the group which has the least non-idle load.
  	 * This is the group from where we need to pick up the load
  	 * for saving power
  	 */
  	if ((sgs->sum_nr_running < sds->min_nr_running) ||
  	    (sgs->sum_nr_running == sds->min_nr_running &&
  	     group_first_cpu(group) > group_first_cpu(sds->group_min))) {
  		sds->group_min = group;
  		sds->min_nr_running = sgs->sum_nr_running;
  		sds->min_load_per_task = sgs->sum_weighted_load /
  						sgs->sum_nr_running;
  	}
  
  	/*
  	 * Calculate the group which is almost near its
  	 * capacity but still has some space to pick up some load
  	 * from other group and save more power
  	 */
  	if (sgs->sum_nr_running + 1 > sgs->group_capacity)
  		return;
  
  	if (sgs->sum_nr_running > sds->leader_nr_running ||
  	    (sgs->sum_nr_running == sds->leader_nr_running &&
  	     group_first_cpu(group) < group_first_cpu(sds->group_leader))) {
  		sds->group_leader = group;
  		sds->leader_nr_running = sgs->sum_nr_running;
  	}
  }
  
  /**
   * check_power_save_busiest_group - see if there is potential for some power-savings balance
   * @sds: Variable containing the statistics of the sched_domain
   *	under consideration.
   * @this_cpu: Cpu at which we're currently performing load-balancing.
   * @imbalance: Variable to store the imbalance.
   *
   * Description:
   * Check if we have potential to perform some power-savings balance.
   * If yes, set the busiest group to be the least loaded group in the
   * sched_domain, so that it's CPUs can be put to idle.
   *
   * Returns 1 if there is potential to perform power-savings balance.
   * Else returns 0.
   */
  static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
  					int this_cpu, unsigned long *imbalance)
  {
  	if (!sds->power_savings_balance)
  		return 0;
  
  	if (sds->this != sds->group_leader ||
  			sds->group_leader == sds->group_min)
  		return 0;
  
  	*imbalance = sds->min_load_per_task;
  	sds->busiest = sds->group_min;
  
  	return 1;
  
  }
  #else /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
  static inline void init_sd_power_savings_stats(struct sched_domain *sd,
  	struct sd_lb_stats *sds, enum cpu_idle_type idle)
  {
  	return;
  }
  
  static inline void update_sd_power_savings_stats(struct sched_group *group,
  	struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
  {
  	return;
  }
  
  static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
  					int this_cpu, unsigned long *imbalance)
  {
  	return 0;
  }
  #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
  
  
  unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
  {
1399fa780   Nikhil Rao   sched: Introduce ...
2458
  	return SCHED_POWER_SCALE;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2459
2460
2461
2462
2463
2464
2465
2466
2467
  }
  
  unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
  {
  	return default_scale_freq_power(sd, cpu);
  }
  
  unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
  {
669c55e9f   Peter Zijlstra   sched: Pre-comput...
2468
  	unsigned long weight = sd->span_weight;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
  	unsigned long smt_gain = sd->smt_gain;
  
  	smt_gain /= weight;
  
  	return smt_gain;
  }
  
  unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
  {
  	return default_scale_smt_power(sd, cpu);
  }
  
  unsigned long scale_rt_power(int cpu)
  {
  	struct rq *rq = cpu_rq(cpu);
  	u64 total, available;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2485
  	total = sched_avg_period() + (rq->clock - rq->age_stamp);
aa4838085   Venkatesh Pallipadi   sched: Remove irq...
2486
2487
2488
2489
2490
2491
2492
  
  	if (unlikely(total < rq->rt_avg)) {
  		/* Ensures that power won't end up being negative */
  		available = 0;
  	} else {
  		available = total - rq->rt_avg;
  	}
1e3c88bde   Peter Zijlstra   sched: Move load ...
2493

1399fa780   Nikhil Rao   sched: Introduce ...
2494
2495
  	if (unlikely((s64)total < SCHED_POWER_SCALE))
  		total = SCHED_POWER_SCALE;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2496

1399fa780   Nikhil Rao   sched: Introduce ...
2497
  	total >>= SCHED_POWER_SHIFT;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2498
2499
2500
2501
2502
2503
  
  	return div_u64(available, total);
  }
  
  static void update_cpu_power(struct sched_domain *sd, int cpu)
  {
669c55e9f   Peter Zijlstra   sched: Pre-comput...
2504
  	unsigned long weight = sd->span_weight;
1399fa780   Nikhil Rao   sched: Introduce ...
2505
  	unsigned long power = SCHED_POWER_SCALE;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2506
  	struct sched_group *sdg = sd->groups;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2507
2508
2509
2510
2511
  	if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
  		if (sched_feat(ARCH_POWER))
  			power *= arch_scale_smt_power(sd, cpu);
  		else
  			power *= default_scale_smt_power(sd, cpu);
1399fa780   Nikhil Rao   sched: Introduce ...
2512
  		power >>= SCHED_POWER_SHIFT;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2513
  	}
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2514
  	sdg->sgp->power_orig = power;
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
2515
2516
2517
2518
2519
  
  	if (sched_feat(ARCH_POWER))
  		power *= arch_scale_freq_power(sd, cpu);
  	else
  		power *= default_scale_freq_power(sd, cpu);
1399fa780   Nikhil Rao   sched: Introduce ...
2520
  	power >>= SCHED_POWER_SHIFT;
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
2521

1e3c88bde   Peter Zijlstra   sched: Move load ...
2522
  	power *= scale_rt_power(cpu);
1399fa780   Nikhil Rao   sched: Introduce ...
2523
  	power >>= SCHED_POWER_SHIFT;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2524
2525
2526
  
  	if (!power)
  		power = 1;
e51fd5e22   Peter Zijlstra   sched: Fix wake_a...
2527
  	cpu_rq(cpu)->cpu_power = power;
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2528
  	sdg->sgp->power = power;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
  }
  
  static void update_group_power(struct sched_domain *sd, int cpu)
  {
  	struct sched_domain *child = sd->child;
  	struct sched_group *group, *sdg = sd->groups;
  	unsigned long power;
  
  	if (!child) {
  		update_cpu_power(sd, cpu);
  		return;
  	}
  
  	power = 0;
  
  	group = child->groups;
  	do {
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2546
  		power += group->sgp->power;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2547
2548
  		group = group->next;
  	} while (group != child->groups);
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2549
  	sdg->sgp->power = power;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2550
  }
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
  /*
   * Try and fix up capacity for tiny siblings, this is needed when
   * things like SD_ASYM_PACKING need f_b_g to select another sibling
   * which on its own isn't powerful enough.
   *
   * See update_sd_pick_busiest() and check_asym_packing().
   */
  static inline int
  fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
  {
  	/*
1399fa780   Nikhil Rao   sched: Introduce ...
2562
  	 * Only siblings can have significantly less than SCHED_POWER_SCALE
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
2563
  	 */
a6c75f2f8   Peter Zijlstra   sched: Avoid usin...
2564
  	if (!(sd->flags & SD_SHARE_CPUPOWER))
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
2565
2566
2567
2568
2569
  		return 0;
  
  	/*
  	 * If ~90% of the cpu_power is still there, we're good.
  	 */
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2570
  	if (group->sgp->power * 32 > group->sgp->power_orig * 29)
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
2571
2572
2573
2574
  		return 1;
  
  	return 0;
  }
1e3c88bde   Peter Zijlstra   sched: Move load ...
2575
2576
2577
2578
2579
2580
2581
  /**
   * update_sg_lb_stats - Update sched_group's statistics for load balancing.
   * @sd: The sched_domain whose statistics are to be updated.
   * @group: sched_group whose statistics are to be updated.
   * @this_cpu: Cpu for which load balance is currently performed.
   * @idle: Idle status of this_cpu
   * @load_idx: Load index of sched_domain of this_cpu for load calc.
1e3c88bde   Peter Zijlstra   sched: Move load ...
2582
2583
2584
2585
2586
2587
2588
   * @local_group: Does group contain this_cpu.
   * @cpus: Set of cpus considered for load balancing.
   * @balance: Should we balance.
   * @sgs: variable to hold the statistics for this group.
   */
  static inline void update_sg_lb_stats(struct sched_domain *sd,
  			struct sched_group *group, int this_cpu,
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
2589
  			enum cpu_idle_type idle, int load_idx,
1e3c88bde   Peter Zijlstra   sched: Move load ...
2590
2591
2592
  			int local_group, const struct cpumask *cpus,
  			int *balance, struct sg_lb_stats *sgs)
  {
2582f0eba   Nikhil Rao   sched: Set group_...
2593
  	unsigned long load, max_cpu_load, min_cpu_load, max_nr_running;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2594
2595
  	int i;
  	unsigned int balance_cpu = -1, first_idle_cpu = 0;
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
2596
  	unsigned long avg_load_per_task = 0;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2597

871e35bc9   Gautham R Shenoy   sched: Fix the pl...
2598
  	if (local_group)
1e3c88bde   Peter Zijlstra   sched: Move load ...
2599
  		balance_cpu = group_first_cpu(group);
1e3c88bde   Peter Zijlstra   sched: Move load ...
2600
2601
  
  	/* Tally up the load of all CPUs in the group */
1e3c88bde   Peter Zijlstra   sched: Move load ...
2602
2603
  	max_cpu_load = 0;
  	min_cpu_load = ~0UL;
2582f0eba   Nikhil Rao   sched: Set group_...
2604
  	max_nr_running = 0;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2605
2606
2607
  
  	for_each_cpu_and(i, sched_group_cpus(group), cpus) {
  		struct rq *rq = cpu_rq(i);
1e3c88bde   Peter Zijlstra   sched: Move load ...
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
  		/* Bias balancing toward cpus of our domain */
  		if (local_group) {
  			if (idle_cpu(i) && !first_idle_cpu) {
  				first_idle_cpu = 1;
  				balance_cpu = i;
  			}
  
  			load = target_load(i, load_idx);
  		} else {
  			load = source_load(i, load_idx);
2582f0eba   Nikhil Rao   sched: Set group_...
2618
  			if (load > max_cpu_load) {
1e3c88bde   Peter Zijlstra   sched: Move load ...
2619
  				max_cpu_load = load;
2582f0eba   Nikhil Rao   sched: Set group_...
2620
2621
  				max_nr_running = rq->nr_running;
  			}
1e3c88bde   Peter Zijlstra   sched: Move load ...
2622
2623
2624
2625
2626
2627
2628
  			if (min_cpu_load > load)
  				min_cpu_load = load;
  		}
  
  		sgs->group_load += load;
  		sgs->sum_nr_running += rq->nr_running;
  		sgs->sum_weighted_load += weighted_cpuload(i);
aae6d3ddd   Suresh Siddha   sched: Use group ...
2629
2630
  		if (idle_cpu(i))
  			sgs->idle_cpus++;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2631
2632
2633
2634
2635
2636
2637
2638
  	}
  
  	/*
  	 * First idle cpu or the first cpu(busiest) in this sched group
  	 * is eligible for doing load balancing at this and above
  	 * domains. In the newly idle case, we will allow all the cpu's
  	 * to do the newly idle load balance.
  	 */
bbc8cb5ba   Peter Zijlstra   sched: Reduce upd...
2639
2640
2641
2642
2643
2644
  	if (idle != CPU_NEWLY_IDLE && local_group) {
  		if (balance_cpu != this_cpu) {
  			*balance = 0;
  			return;
  		}
  		update_group_power(sd, this_cpu);
1e3c88bde   Peter Zijlstra   sched: Move load ...
2645
2646
2647
  	}
  
  	/* Adjust by relative CPU power of the group */
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2648
  	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->sgp->power;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2649

1e3c88bde   Peter Zijlstra   sched: Move load ...
2650
2651
  	/*
  	 * Consider the group unbalanced when the imbalance is larger
866ab43ef   Peter Zijlstra   sched: Fix the gr...
2652
  	 * than the average weight of a task.
1e3c88bde   Peter Zijlstra   sched: Move load ...
2653
2654
2655
2656
2657
2658
  	 *
  	 * APZ: with cgroup the avg task weight can vary wildly and
  	 *      might not be a suitable number - should we keep a
  	 *      normalized nr_running number somewhere that negates
  	 *      the hierarchy?
  	 */
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
2659
2660
  	if (sgs->sum_nr_running)
  		avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2661

866ab43ef   Peter Zijlstra   sched: Fix the gr...
2662
  	if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
1e3c88bde   Peter Zijlstra   sched: Move load ...
2663
  		sgs->group_imb = 1;
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2664
  	sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power,
1399fa780   Nikhil Rao   sched: Introduce ...
2665
  						SCHED_POWER_SCALE);
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
2666
2667
  	if (!sgs->group_capacity)
  		sgs->group_capacity = fix_small_capacity(sd, group);
aae6d3ddd   Suresh Siddha   sched: Use group ...
2668
  	sgs->group_weight = group->group_weight;
fab476228   Nikhil Rao   sched: Force bala...
2669
2670
2671
  
  	if (sgs->group_capacity > sgs->sum_nr_running)
  		sgs->group_has_capacity = 1;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2672
2673
2674
  }
  
  /**
532cb4c40   Michael Neuling   sched: Add asymme...
2675
2676
2677
2678
   * update_sd_pick_busiest - return 1 on busiest group
   * @sd: sched_domain whose statistics are to be checked
   * @sds: sched_domain statistics
   * @sg: sched_group candidate to be checked for being the busiest
b6b122944   Michael Neuling   sched: Fix commen...
2679
2680
   * @sgs: sched_group statistics
   * @this_cpu: the current cpu
532cb4c40   Michael Neuling   sched: Add asymme...
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
   *
   * Determine if @sg is a busier group than the previously selected
   * busiest group.
   */
  static bool update_sd_pick_busiest(struct sched_domain *sd,
  				   struct sd_lb_stats *sds,
  				   struct sched_group *sg,
  				   struct sg_lb_stats *sgs,
  				   int this_cpu)
  {
  	if (sgs->avg_load <= sds->max_load)
  		return false;
  
  	if (sgs->sum_nr_running > sgs->group_capacity)
  		return true;
  
  	if (sgs->group_imb)
  		return true;
  
  	/*
  	 * ASYM_PACKING needs to move all the work to the lowest
  	 * numbered CPUs in the group, therefore mark all groups
  	 * higher than ourself as busy.
  	 */
  	if ((sd->flags & SD_ASYM_PACKING) && sgs->sum_nr_running &&
  	    this_cpu < group_first_cpu(sg)) {
  		if (!sds->busiest)
  			return true;
  
  		if (group_first_cpu(sds->busiest) > group_first_cpu(sg))
  			return true;
  	}
  
  	return false;
  }
  
  /**
1e3c88bde   Peter Zijlstra   sched: Move load ...
2718
2719
2720
2721
   * update_sd_lb_stats - Update sched_group's statistics for load balancing.
   * @sd: sched_domain whose statistics are to be updated.
   * @this_cpu: Cpu for which load balance is currently performed.
   * @idle: Idle status of this_cpu
1e3c88bde   Peter Zijlstra   sched: Move load ...
2722
2723
2724
2725
2726
   * @cpus: Set of cpus considered for load balancing.
   * @balance: Should we balance.
   * @sds: variable to hold the statistics for this sched_domain.
   */
  static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
2727
2728
  			enum cpu_idle_type idle, const struct cpumask *cpus,
  			int *balance, struct sd_lb_stats *sds)
1e3c88bde   Peter Zijlstra   sched: Move load ...
2729
2730
  {
  	struct sched_domain *child = sd->child;
532cb4c40   Michael Neuling   sched: Add asymme...
2731
  	struct sched_group *sg = sd->groups;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
  	struct sg_lb_stats sgs;
  	int load_idx, prefer_sibling = 0;
  
  	if (child && child->flags & SD_PREFER_SIBLING)
  		prefer_sibling = 1;
  
  	init_sd_power_savings_stats(sd, sds, idle);
  	load_idx = get_sd_load_idx(sd, idle);
  
  	do {
  		int local_group;
532cb4c40   Michael Neuling   sched: Add asymme...
2743
  		local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg));
1e3c88bde   Peter Zijlstra   sched: Move load ...
2744
  		memset(&sgs, 0, sizeof(sgs));
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
2745
  		update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx,
1e3c88bde   Peter Zijlstra   sched: Move load ...
2746
  				local_group, cpus, balance, &sgs);
8f190fb3f   Peter Zijlstra   sched: Assume *ba...
2747
  		if (local_group && !(*balance))
1e3c88bde   Peter Zijlstra   sched: Move load ...
2748
2749
2750
  			return;
  
  		sds->total_load += sgs.group_load;
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2751
  		sds->total_pwr += sg->sgp->power;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2752
2753
2754
  
  		/*
  		 * In case the child domain prefers tasks go to siblings
532cb4c40   Michael Neuling   sched: Add asymme...
2755
  		 * first, lower the sg capacity to one so that we'll try
75dd321d7   Nikhil Rao   sched: Drop group...
2756
2757
2758
2759
2760
2761
  		 * and move all the excess tasks away. We lower the capacity
  		 * of a group only if the local group has the capacity to fit
  		 * these excess tasks, i.e. nr_running < group_capacity. The
  		 * extra check prevents the case where you always pull from the
  		 * heaviest group when it is already under-utilized (possible
  		 * with a large weight task outweighs the tasks on the system).
1e3c88bde   Peter Zijlstra   sched: Move load ...
2762
  		 */
75dd321d7   Nikhil Rao   sched: Drop group...
2763
  		if (prefer_sibling && !local_group && sds->this_has_capacity)
1e3c88bde   Peter Zijlstra   sched: Move load ...
2764
2765
2766
2767
  			sgs.group_capacity = min(sgs.group_capacity, 1UL);
  
  		if (local_group) {
  			sds->this_load = sgs.avg_load;
532cb4c40   Michael Neuling   sched: Add asymme...
2768
  			sds->this = sg;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2769
2770
  			sds->this_nr_running = sgs.sum_nr_running;
  			sds->this_load_per_task = sgs.sum_weighted_load;
fab476228   Nikhil Rao   sched: Force bala...
2771
  			sds->this_has_capacity = sgs.group_has_capacity;
aae6d3ddd   Suresh Siddha   sched: Use group ...
2772
  			sds->this_idle_cpus = sgs.idle_cpus;
532cb4c40   Michael Neuling   sched: Add asymme...
2773
  		} else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
1e3c88bde   Peter Zijlstra   sched: Move load ...
2774
  			sds->max_load = sgs.avg_load;
532cb4c40   Michael Neuling   sched: Add asymme...
2775
  			sds->busiest = sg;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2776
  			sds->busiest_nr_running = sgs.sum_nr_running;
aae6d3ddd   Suresh Siddha   sched: Use group ...
2777
  			sds->busiest_idle_cpus = sgs.idle_cpus;
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
2778
  			sds->busiest_group_capacity = sgs.group_capacity;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2779
  			sds->busiest_load_per_task = sgs.sum_weighted_load;
fab476228   Nikhil Rao   sched: Force bala...
2780
  			sds->busiest_has_capacity = sgs.group_has_capacity;
aae6d3ddd   Suresh Siddha   sched: Use group ...
2781
  			sds->busiest_group_weight = sgs.group_weight;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2782
2783
  			sds->group_imb = sgs.group_imb;
  		}
532cb4c40   Michael Neuling   sched: Add asymme...
2784
2785
2786
2787
  		update_sd_power_savings_stats(sg, sds, local_group, &sgs);
  		sg = sg->next;
  	} while (sg != sd->groups);
  }
2ec57d448   Michael Neuling   sched: Fix spelli...
2788
  int __weak arch_sd_sibling_asym_packing(void)
532cb4c40   Michael Neuling   sched: Add asymme...
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
  {
         return 0*SD_ASYM_PACKING;
  }
  
  /**
   * check_asym_packing - Check to see if the group is packed into the
   *			sched doman.
   *
   * This is primarily intended to used at the sibling level.  Some
   * cores like POWER7 prefer to use lower numbered SMT threads.  In the
   * case of POWER7, it can move to lower SMT modes only when higher
   * threads are idle.  When in lower SMT modes, the threads will
   * perform better since they share less core resources.  Hence when we
   * have idle threads, we want them to be the higher ones.
   *
   * This packing function is run on idle threads.  It checks to see if
   * the busiest CPU in this domain (core in the P7 case) has a higher
   * CPU number than the packing function is being run on.  Here we are
   * assuming lower CPU number will be equivalent to lower a SMT thread
   * number.
   *
b6b122944   Michael Neuling   sched: Fix commen...
2810
2811
2812
   * Returns 1 when packing is required and a task should be moved to
   * this CPU.  The amount of the imbalance is returned in *imbalance.
   *
532cb4c40   Michael Neuling   sched: Add asymme...
2813
2814
2815
2816
   * @sd: The sched_domain whose packing is to be checked.
   * @sds: Statistics of the sched_domain which is to be packed
   * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
   * @imbalance: returns amount of imbalanced due to packing.
532cb4c40   Michael Neuling   sched: Add asymme...
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
   */
  static int check_asym_packing(struct sched_domain *sd,
  			      struct sd_lb_stats *sds,
  			      int this_cpu, unsigned long *imbalance)
  {
  	int busiest_cpu;
  
  	if (!(sd->flags & SD_ASYM_PACKING))
  		return 0;
  
  	if (!sds->busiest)
  		return 0;
  
  	busiest_cpu = group_first_cpu(sds->busiest);
  	if (this_cpu > busiest_cpu)
  		return 0;
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2833
  	*imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->sgp->power,
1399fa780   Nikhil Rao   sched: Introduce ...
2834
  				       SCHED_POWER_SCALE);
532cb4c40   Michael Neuling   sched: Add asymme...
2835
  	return 1;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
  }
  
  /**
   * fix_small_imbalance - Calculate the minor imbalance that exists
   *			amongst the groups of a sched_domain, during
   *			load balancing.
   * @sds: Statistics of the sched_domain whose imbalance is to be calculated.
   * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
   * @imbalance: Variable to store the imbalance.
   */
  static inline void fix_small_imbalance(struct sd_lb_stats *sds,
  				int this_cpu, unsigned long *imbalance)
  {
  	unsigned long tmp, pwr_now = 0, pwr_move = 0;
  	unsigned int imbn = 2;
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
2851
  	unsigned long scaled_busy_load_per_task;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2852
2853
2854
2855
2856
2857
2858
2859
2860
  
  	if (sds->this_nr_running) {
  		sds->this_load_per_task /= sds->this_nr_running;
  		if (sds->busiest_load_per_task >
  				sds->this_load_per_task)
  			imbn = 1;
  	} else
  		sds->this_load_per_task =
  			cpu_avg_load_per_task(this_cpu);
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
2861
  	scaled_busy_load_per_task = sds->busiest_load_per_task
1399fa780   Nikhil Rao   sched: Introduce ...
2862
  					 * SCHED_POWER_SCALE;
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2863
  	scaled_busy_load_per_task /= sds->busiest->sgp->power;
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
2864
2865
2866
  
  	if (sds->max_load - sds->this_load + scaled_busy_load_per_task >=
  			(scaled_busy_load_per_task * imbn)) {
1e3c88bde   Peter Zijlstra   sched: Move load ...
2867
2868
2869
2870
2871
2872
2873
2874
2875
  		*imbalance = sds->busiest_load_per_task;
  		return;
  	}
  
  	/*
  	 * OK, we don't have enough imbalance to justify moving tasks,
  	 * however we may be able to increase total CPU power used by
  	 * moving them.
  	 */
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2876
  	pwr_now += sds->busiest->sgp->power *
1e3c88bde   Peter Zijlstra   sched: Move load ...
2877
  			min(sds->busiest_load_per_task, sds->max_load);
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2878
  	pwr_now += sds->this->sgp->power *
1e3c88bde   Peter Zijlstra   sched: Move load ...
2879
  			min(sds->this_load_per_task, sds->this_load);
1399fa780   Nikhil Rao   sched: Introduce ...
2880
  	pwr_now /= SCHED_POWER_SCALE;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2881
2882
  
  	/* Amount of load we'd subtract */
1399fa780   Nikhil Rao   sched: Introduce ...
2883
  	tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2884
  		sds->busiest->sgp->power;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2885
  	if (sds->max_load > tmp)
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2886
  		pwr_move += sds->busiest->sgp->power *
1e3c88bde   Peter Zijlstra   sched: Move load ...
2887
2888
2889
  			min(sds->busiest_load_per_task, sds->max_load - tmp);
  
  	/* Amount of load we'd add */
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2890
  	if (sds->max_load * sds->busiest->sgp->power <
1399fa780   Nikhil Rao   sched: Introduce ...
2891
  		sds->busiest_load_per_task * SCHED_POWER_SCALE)
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2892
2893
  		tmp = (sds->max_load * sds->busiest->sgp->power) /
  			sds->this->sgp->power;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2894
  	else
1399fa780   Nikhil Rao   sched: Introduce ...
2895
  		tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2896
2897
  			sds->this->sgp->power;
  	pwr_move += sds->this->sgp->power *
1e3c88bde   Peter Zijlstra   sched: Move load ...
2898
  			min(sds->this_load_per_task, sds->this_load + tmp);
1399fa780   Nikhil Rao   sched: Introduce ...
2899
  	pwr_move /= SCHED_POWER_SCALE;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
  
  	/* Move if we gain throughput */
  	if (pwr_move > pwr_now)
  		*imbalance = sds->busiest_load_per_task;
  }
  
  /**
   * calculate_imbalance - Calculate the amount of imbalance present within the
   *			 groups of a given sched_domain during load balance.
   * @sds: statistics of the sched_domain whose imbalance is to be calculated.
   * @this_cpu: Cpu for which currently load balance is being performed.
   * @imbalance: The variable to store the imbalance.
   */
  static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
  		unsigned long *imbalance)
  {
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
2916
2917
2918
2919
2920
2921
2922
  	unsigned long max_pull, load_above_capacity = ~0UL;
  
  	sds->busiest_load_per_task /= sds->busiest_nr_running;
  	if (sds->group_imb) {
  		sds->busiest_load_per_task =
  			min(sds->busiest_load_per_task, sds->avg_load);
  	}
1e3c88bde   Peter Zijlstra   sched: Move load ...
2923
2924
2925
2926
2927
2928
2929
2930
2931
  	/*
  	 * In the presence of smp nice balancing, certain scenarios can have
  	 * max load less than avg load(as we skip the groups at or below
  	 * its cpu_power, while calculating max_load..)
  	 */
  	if (sds->max_load < sds->avg_load) {
  		*imbalance = 0;
  		return fix_small_imbalance(sds, this_cpu, imbalance);
  	}
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
2932
2933
2934
2935
2936
2937
  	if (!sds->group_imb) {
  		/*
  		 * Don't want to pull so many tasks that a group would go idle.
  		 */
  		load_above_capacity = (sds->busiest_nr_running -
  						sds->busiest_group_capacity);
1399fa780   Nikhil Rao   sched: Introduce ...
2938
  		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
2939

9c3f75cbd   Peter Zijlstra   sched: Break out ...
2940
  		load_above_capacity /= sds->busiest->sgp->power;
dd5feea14   Suresh Siddha   sched: Fix SCHED_...
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
  	}
  
  	/*
  	 * We're trying to get all the cpus to the average_load, so we don't
  	 * want to push ourselves above the average load, nor do we wish to
  	 * reduce the max loaded cpu below the average load. At the same time,
  	 * we also don't want to reduce the group load below the group capacity
  	 * (so that we can implement power-savings policies etc). Thus we look
  	 * for the minimum possible imbalance.
  	 * Be careful of negative numbers as they'll appear as very large values
  	 * with unsigned longs.
  	 */
  	max_pull = min(sds->max_load - sds->avg_load, load_above_capacity);
1e3c88bde   Peter Zijlstra   sched: Move load ...
2954
2955
  
  	/* How much load to actually move to equalise the imbalance */
9c3f75cbd   Peter Zijlstra   sched: Break out ...
2956
2957
  	*imbalance = min(max_pull * sds->busiest->sgp->power,
  		(sds->avg_load - sds->this_load) * sds->this->sgp->power)
1399fa780   Nikhil Rao   sched: Introduce ...
2958
  			/ SCHED_POWER_SCALE;
1e3c88bde   Peter Zijlstra   sched: Move load ...
2959
2960
2961
  
  	/*
  	 * if *imbalance is less than the average load per runnable task
25985edce   Lucas De Marchi   Fix common misspe...
2962
  	 * there is no guarantee that any tasks will be moved so we'll have
1e3c88bde   Peter Zijlstra   sched: Move load ...
2963
2964
2965
2966
2967
2968
2969
  	 * a think about bumping its value to force at least one task to be
  	 * moved
  	 */
  	if (*imbalance < sds->busiest_load_per_task)
  		return fix_small_imbalance(sds, this_cpu, imbalance);
  
  }
fab476228   Nikhil Rao   sched: Force bala...
2970

1e3c88bde   Peter Zijlstra   sched: Move load ...
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
  /******* find_busiest_group() helpers end here *********************/
  
  /**
   * find_busiest_group - Returns the busiest group within the sched_domain
   * if there is an imbalance. If there isn't an imbalance, and
   * the user has opted for power-savings, it returns a group whose
   * CPUs can be put to idle by rebalancing those tasks elsewhere, if
   * such a group exists.
   *
   * Also calculates the amount of weighted load which should be moved
   * to restore balance.
   *
   * @sd: The sched_domain whose busiest group is to be returned.
   * @this_cpu: The cpu for which load balancing is currently being performed.
   * @imbalance: Variable which stores amount of weighted load which should
   *		be moved to restore balance/put a group to idle.
   * @idle: The idle status of this_cpu.
1e3c88bde   Peter Zijlstra   sched: Move load ...
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
   * @cpus: The set of CPUs under consideration for load-balancing.
   * @balance: Pointer to a variable indicating if this_cpu
   *	is the appropriate cpu to perform load balancing at this_level.
   *
   * Returns:	- the busiest group if imbalance exists.
   *		- If no imbalance and user has opted for power-savings balance,
   *		   return the least loaded group whose CPUs can be
   *		   put to idle by rebalancing its tasks onto our group.
   */
  static struct sched_group *
  find_busiest_group(struct sched_domain *sd, int this_cpu,
  		   unsigned long *imbalance, enum cpu_idle_type idle,
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
3000
  		   const struct cpumask *cpus, int *balance)
1e3c88bde   Peter Zijlstra   sched: Move load ...
3001
3002
3003
3004
3005
3006
3007
3008
3009
  {
  	struct sd_lb_stats sds;
  
  	memset(&sds, 0, sizeof(sds));
  
  	/*
  	 * Compute the various statistics relavent for load balancing at
  	 * this level.
  	 */
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
3010
  	update_sd_lb_stats(sd, this_cpu, idle, cpus, balance, &sds);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3011

cc57aa8f4   Peter Zijlstra   sched: Clean up s...
3012
3013
3014
  	/*
  	 * this_cpu is not the appropriate cpu to perform load balancing at
  	 * this level.
1e3c88bde   Peter Zijlstra   sched: Move load ...
3015
  	 */
8f190fb3f   Peter Zijlstra   sched: Assume *ba...
3016
  	if (!(*balance))
1e3c88bde   Peter Zijlstra   sched: Move load ...
3017
  		goto ret;
532cb4c40   Michael Neuling   sched: Add asymme...
3018
3019
3020
  	if ((idle == CPU_IDLE || idle == CPU_NEWLY_IDLE) &&
  	    check_asym_packing(sd, &sds, this_cpu, imbalance))
  		return sds.busiest;
cc57aa8f4   Peter Zijlstra   sched: Clean up s...
3021
  	/* There is no busy sibling group to pull tasks from */
1e3c88bde   Peter Zijlstra   sched: Move load ...
3022
3023
  	if (!sds.busiest || sds.busiest_nr_running == 0)
  		goto out_balanced;
1399fa780   Nikhil Rao   sched: Introduce ...
3024
  	sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_pwr;
b0432d8f1   Ken Chen   sched: Fix sched-...
3025

866ab43ef   Peter Zijlstra   sched: Fix the gr...
3026
3027
3028
3029
3030
3031
3032
  	/*
  	 * If the busiest group is imbalanced the below checks don't
  	 * work because they assumes all things are equal, which typically
  	 * isn't true due to cpus_allowed constraints and the like.
  	 */
  	if (sds.group_imb)
  		goto force_balance;
cc57aa8f4   Peter Zijlstra   sched: Clean up s...
3033
  	/* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
fab476228   Nikhil Rao   sched: Force bala...
3034
3035
3036
  	if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity &&
  			!sds.busiest_has_capacity)
  		goto force_balance;
cc57aa8f4   Peter Zijlstra   sched: Clean up s...
3037
3038
3039
3040
  	/*
  	 * If the local group is more busy than the selected busiest group
  	 * don't try and pull any tasks.
  	 */
1e3c88bde   Peter Zijlstra   sched: Move load ...
3041
3042
  	if (sds.this_load >= sds.max_load)
  		goto out_balanced;
cc57aa8f4   Peter Zijlstra   sched: Clean up s...
3043
3044
3045
3046
  	/*
  	 * Don't pull any tasks if this group is already above the domain
  	 * average load.
  	 */
1e3c88bde   Peter Zijlstra   sched: Move load ...
3047
3048
  	if (sds.this_load >= sds.avg_load)
  		goto out_balanced;
c186fafe9   Peter Zijlstra   sched: Clean up r...
3049
  	if (idle == CPU_IDLE) {
aae6d3ddd   Suresh Siddha   sched: Use group ...
3050
3051
3052
3053
3054
3055
  		/*
  		 * This cpu is idle. If the busiest group load doesn't
  		 * have more tasks than the number of available cpu's and
  		 * there is no imbalance between this and busiest group
  		 * wrt to idle cpu's, it is balanced.
  		 */
c186fafe9   Peter Zijlstra   sched: Clean up r...
3056
  		if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
aae6d3ddd   Suresh Siddha   sched: Use group ...
3057
3058
  		    sds.busiest_nr_running <= sds.busiest_group_weight)
  			goto out_balanced;
c186fafe9   Peter Zijlstra   sched: Clean up r...
3059
3060
3061
3062
3063
3064
3065
  	} else {
  		/*
  		 * In the CPU_NEWLY_IDLE, CPU_NOT_IDLE cases, use
  		 * imbalance_pct to be conservative.
  		 */
  		if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
  			goto out_balanced;
aae6d3ddd   Suresh Siddha   sched: Use group ...
3066
  	}
1e3c88bde   Peter Zijlstra   sched: Move load ...
3067

fab476228   Nikhil Rao   sched: Force bala...
3068
  force_balance:
1e3c88bde   Peter Zijlstra   sched: Move load ...
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
  	/* Looks like there is an imbalance. Compute it */
  	calculate_imbalance(&sds, this_cpu, imbalance);
  	return sds.busiest;
  
  out_balanced:
  	/*
  	 * There is no obvious imbalance. But check if we can do some balancing
  	 * to save power.
  	 */
  	if (check_power_save_busiest_group(&sds, this_cpu, imbalance))
  		return sds.busiest;
  ret:
  	*imbalance = 0;
  	return NULL;
  }
  
  /*
   * find_busiest_queue - find the busiest runqueue among the cpus in group.
   */
  static struct rq *
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
3089
3090
3091
  find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
  		   enum cpu_idle_type idle, unsigned long imbalance,
  		   const struct cpumask *cpus)
1e3c88bde   Peter Zijlstra   sched: Move load ...
3092
3093
3094
3095
3096
3097
3098
  {
  	struct rq *busiest = NULL, *rq;
  	unsigned long max_load = 0;
  	int i;
  
  	for_each_cpu(i, sched_group_cpus(group)) {
  		unsigned long power = power_of(i);
1399fa780   Nikhil Rao   sched: Introduce ...
3099
3100
  		unsigned long capacity = DIV_ROUND_CLOSEST(power,
  							   SCHED_POWER_SCALE);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3101
  		unsigned long wl;
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
3102
3103
  		if (!capacity)
  			capacity = fix_small_capacity(sd, group);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3104
3105
3106
3107
  		if (!cpumask_test_cpu(i, cpus))
  			continue;
  
  		rq = cpu_rq(i);
6e40f5bbb   Thomas Gleixner   Merge branch 'sch...
3108
  		wl = weighted_cpuload(i);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3109

6e40f5bbb   Thomas Gleixner   Merge branch 'sch...
3110
3111
3112
3113
  		/*
  		 * When comparing with imbalance, use weighted_cpuload()
  		 * which is not scaled with the cpu power.
  		 */
1e3c88bde   Peter Zijlstra   sched: Move load ...
3114
3115
  		if (capacity && rq->nr_running == 1 && wl > imbalance)
  			continue;
6e40f5bbb   Thomas Gleixner   Merge branch 'sch...
3116
3117
3118
3119
3120
3121
  		/*
  		 * For the load comparisons with the other cpu's, consider
  		 * the weighted_cpuload() scaled with the cpu power, so that
  		 * the load can be moved away from the cpu that is potentially
  		 * running at a lower capacity.
  		 */
1399fa780   Nikhil Rao   sched: Introduce ...
3122
  		wl = (wl * SCHED_POWER_SCALE) / power;
6e40f5bbb   Thomas Gleixner   Merge branch 'sch...
3123

1e3c88bde   Peter Zijlstra   sched: Move load ...
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
  		if (wl > max_load) {
  			max_load = wl;
  			busiest = rq;
  		}
  	}
  
  	return busiest;
  }
  
  /*
   * Max backoff if we encounter pinned tasks. Pretty arbitrary value, but
   * so long as it is large enough.
   */
  #define MAX_PINNED_INTERVAL	512
  
  /* Working cpumask for load_balance and load_balance_newidle. */
  static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
3141
  static int need_active_balance(struct sched_domain *sd, int idle,
532cb4c40   Michael Neuling   sched: Add asymme...
3142
  			       int busiest_cpu, int this_cpu)
1af3ed3dd   Peter Zijlstra   sched: Unify load...
3143
3144
  {
  	if (idle == CPU_NEWLY_IDLE) {
532cb4c40   Michael Neuling   sched: Add asymme...
3145
3146
3147
3148
3149
3150
3151
3152
  
  		/*
  		 * ASYM_PACKING needs to force migrate tasks from busy but
  		 * higher numbered CPUs in order to pack all tasks in the
  		 * lowest numbered CPUs.
  		 */
  		if ((sd->flags & SD_ASYM_PACKING) && busiest_cpu > this_cpu)
  			return 1;
1af3ed3dd   Peter Zijlstra   sched: Unify load...
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
  		/*
  		 * The only task running in a non-idle cpu can be moved to this
  		 * cpu in an attempt to completely freeup the other CPU
  		 * package.
  		 *
  		 * The package power saving logic comes from
  		 * find_busiest_group(). If there are no imbalance, then
  		 * f_b_g() will return NULL. However when sched_mc={1,2} then
  		 * f_b_g() will select a group from which a running task may be
  		 * pulled to this cpu in order to make the other package idle.
  		 * If there is no opportunity to make a package idle and if
  		 * there are no imbalance, then f_b_g() will return NULL and no
  		 * action will be taken in load_balance_newidle().
  		 *
  		 * Under normal task pull operation due to imbalance, there
  		 * will be more than one task in the source run queue and
  		 * move_tasks() will succeed.  ld_moved will be true and this
  		 * active balance code will not be triggered.
  		 */
1af3ed3dd   Peter Zijlstra   sched: Unify load...
3172
3173
3174
3175
3176
3177
  		if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
  			return 0;
  	}
  
  	return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
  }
969c79215   Tejun Heo   sched: replace mi...
3178
  static int active_load_balance_cpu_stop(void *data);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3179
3180
3181
3182
3183
3184
3185
3186
  /*
   * Check this_cpu to ensure it is balanced within domain. Attempt to move
   * tasks if there is an imbalance.
   */
  static int load_balance(int this_cpu, struct rq *this_rq,
  			struct sched_domain *sd, enum cpu_idle_type idle,
  			int *balance)
  {
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
3187
  	int ld_moved, all_pinned = 0, active_balance = 0;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3188
3189
3190
3191
3192
3193
3194
  	struct sched_group *group;
  	unsigned long imbalance;
  	struct rq *busiest;
  	unsigned long flags;
  	struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
  
  	cpumask_copy(cpus, cpu_active_mask);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3195
3196
3197
  	schedstat_inc(sd, lb_count[idle]);
  
  redo:
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
3198
  	group = find_busiest_group(sd, this_cpu, &imbalance, idle,
1e3c88bde   Peter Zijlstra   sched: Move load ...
3199
3200
3201
3202
3203
3204
3205
3206
3207
  				   cpus, balance);
  
  	if (*balance == 0)
  		goto out_balanced;
  
  	if (!group) {
  		schedstat_inc(sd, lb_nobusyg[idle]);
  		goto out_balanced;
  	}
9d5efe05e   Srivatsa Vaddagiri   sched: Fix capaci...
3208
  	busiest = find_busiest_queue(sd, group, idle, imbalance, cpus);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
  	if (!busiest) {
  		schedstat_inc(sd, lb_nobusyq[idle]);
  		goto out_balanced;
  	}
  
  	BUG_ON(busiest == this_rq);
  
  	schedstat_add(sd, lb_imbalance[idle], imbalance);
  
  	ld_moved = 0;
  	if (busiest->nr_running > 1) {
  		/*
  		 * Attempt to move tasks. If find_busiest_group has found
  		 * an imbalance but busiest->nr_running <= 1, the group is
  		 * still unbalanced. ld_moved simply stays zero, so it is
  		 * correctly treated as an imbalance.
  		 */
b30aef17f   Ken Chen   sched: Fix errone...
3226
  		all_pinned = 1;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
  		local_irq_save(flags);
  		double_rq_lock(this_rq, busiest);
  		ld_moved = move_tasks(this_rq, this_cpu, busiest,
  				      imbalance, sd, idle, &all_pinned);
  		double_rq_unlock(this_rq, busiest);
  		local_irq_restore(flags);
  
  		/*
  		 * some other cpu did the load balance for us.
  		 */
  		if (ld_moved && this_cpu != smp_processor_id())
  			resched_cpu(this_cpu);
  
  		/* All tasks on this runqueue were pinned by CPU affinity */
  		if (unlikely(all_pinned)) {
  			cpumask_clear_cpu(cpu_of(busiest), cpus);
  			if (!cpumask_empty(cpus))
  				goto redo;
  			goto out_balanced;
  		}
  	}
  
  	if (!ld_moved) {
  		schedstat_inc(sd, lb_failed[idle]);
58b26c4c0   Venkatesh Pallipadi   sched: Increment ...
3251
3252
3253
3254
3255
3256
3257
3258
  		/*
  		 * Increment the failure counter only on periodic balance.
  		 * We do not want newidle balance, which can be very
  		 * frequent, pollute the failure counter causing
  		 * excessive cache_hot migrations and active balances.
  		 */
  		if (idle != CPU_NEWLY_IDLE)
  			sd->nr_balance_failed++;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3259

46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
3260
  		if (need_active_balance(sd, idle, cpu_of(busiest), this_cpu)) {
1e3c88bde   Peter Zijlstra   sched: Move load ...
3261
  			raw_spin_lock_irqsave(&busiest->lock, flags);
969c79215   Tejun Heo   sched: replace mi...
3262
3263
3264
  			/* don't kick the active_load_balance_cpu_stop,
  			 * if the curr task on busiest cpu can't be
  			 * moved to this_cpu
1e3c88bde   Peter Zijlstra   sched: Move load ...
3265
3266
3267
3268
3269
3270
3271
3272
  			 */
  			if (!cpumask_test_cpu(this_cpu,
  					      &busiest->curr->cpus_allowed)) {
  				raw_spin_unlock_irqrestore(&busiest->lock,
  							    flags);
  				all_pinned = 1;
  				goto out_one_pinned;
  			}
969c79215   Tejun Heo   sched: replace mi...
3273
3274
3275
3276
3277
  			/*
  			 * ->active_balance synchronizes accesses to
  			 * ->active_balance_work.  Once set, it's cleared
  			 * only after active load balance is finished.
  			 */
1e3c88bde   Peter Zijlstra   sched: Move load ...
3278
3279
3280
3281
3282
3283
  			if (!busiest->active_balance) {
  				busiest->active_balance = 1;
  				busiest->push_cpu = this_cpu;
  				active_balance = 1;
  			}
  			raw_spin_unlock_irqrestore(&busiest->lock, flags);
969c79215   Tejun Heo   sched: replace mi...
3284

1e3c88bde   Peter Zijlstra   sched: Move load ...
3285
  			if (active_balance)
969c79215   Tejun Heo   sched: replace mi...
3286
3287
3288
  				stop_one_cpu_nowait(cpu_of(busiest),
  					active_load_balance_cpu_stop, busiest,
  					&busiest->active_balance_work);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
  
  			/*
  			 * We've kicked active balancing, reset the failure
  			 * counter.
  			 */
  			sd->nr_balance_failed = sd->cache_nice_tries+1;
  		}
  	} else
  		sd->nr_balance_failed = 0;
  
  	if (likely(!active_balance)) {
  		/* We were unbalanced, so reset the balancing interval */
  		sd->balance_interval = sd->min_interval;
  	} else {
  		/*
  		 * If we've begun active balancing, start to back off. This
  		 * case may not be covered by the all_pinned logic if there
  		 * is only 1 task on the busy runqueue (because we don't call
  		 * move_tasks).
  		 */
  		if (sd->balance_interval < sd->max_interval)
  			sd->balance_interval *= 2;
  	}
1e3c88bde   Peter Zijlstra   sched: Move load ...
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
  	goto out;
  
  out_balanced:
  	schedstat_inc(sd, lb_balanced[idle]);
  
  	sd->nr_balance_failed = 0;
  
  out_one_pinned:
  	/* tune up the balancing interval */
  	if ((all_pinned && sd->balance_interval < MAX_PINNED_INTERVAL) ||
  			(sd->balance_interval < sd->max_interval))
  		sd->balance_interval *= 2;
46e49b383   Venkatesh Pallipadi   sched: Wholesale ...
3324
  	ld_moved = 0;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3325
  out:
1e3c88bde   Peter Zijlstra   sched: Move load ...
3326
3327
3328
3329
  	return ld_moved;
  }
  
  /*
1e3c88bde   Peter Zijlstra   sched: Move load ...
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
   * idle_balance is called by schedule() if this_cpu is about to become
   * idle. Attempts to pull tasks from other CPUs.
   */
  static void idle_balance(int this_cpu, struct rq *this_rq)
  {
  	struct sched_domain *sd;
  	int pulled_task = 0;
  	unsigned long next_balance = jiffies + HZ;
  
  	this_rq->idle_stamp = this_rq->clock;
  
  	if (this_rq->avg_idle < sysctl_sched_migration_cost)
  		return;
f492e12ef   Peter Zijlstra   sched: Remove loa...
3343
3344
3345
3346
  	/*
  	 * Drop the rq->lock, but keep IRQ/preempt disabled.
  	 */
  	raw_spin_unlock(&this_rq->lock);
c66eaf619   Paul Turner   sched: Update sha...
3347
  	update_shares(this_cpu);
dce840a08   Peter Zijlstra   sched: Dynamicall...
3348
  	rcu_read_lock();
1e3c88bde   Peter Zijlstra   sched: Move load ...
3349
3350
  	for_each_domain(this_cpu, sd) {
  		unsigned long interval;
f492e12ef   Peter Zijlstra   sched: Remove loa...
3351
  		int balance = 1;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3352
3353
3354
  
  		if (!(sd->flags & SD_LOAD_BALANCE))
  			continue;
f492e12ef   Peter Zijlstra   sched: Remove loa...
3355
  		if (sd->flags & SD_BALANCE_NEWIDLE) {
1e3c88bde   Peter Zijlstra   sched: Move load ...
3356
  			/* If we've pulled tasks over stop searching: */
f492e12ef   Peter Zijlstra   sched: Remove loa...
3357
3358
3359
  			pulled_task = load_balance(this_cpu, this_rq,
  						   sd, CPU_NEWLY_IDLE, &balance);
  		}
1e3c88bde   Peter Zijlstra   sched: Move load ...
3360
3361
3362
3363
  
  		interval = msecs_to_jiffies(sd->balance_interval);
  		if (time_after(next_balance, sd->last_balance + interval))
  			next_balance = sd->last_balance + interval;
d5ad140bc   Nikhil Rao   sched: Fix idle b...
3364
3365
  		if (pulled_task) {
  			this_rq->idle_stamp = 0;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3366
  			break;
d5ad140bc   Nikhil Rao   sched: Fix idle b...
3367
  		}
1e3c88bde   Peter Zijlstra   sched: Move load ...
3368
  	}
dce840a08   Peter Zijlstra   sched: Dynamicall...
3369
  	rcu_read_unlock();
f492e12ef   Peter Zijlstra   sched: Remove loa...
3370
3371
  
  	raw_spin_lock(&this_rq->lock);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
  	if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
  		/*
  		 * We are going idle. next_balance may be set based on
  		 * a busy processor. So reset next_balance.
  		 */
  		this_rq->next_balance = next_balance;
  	}
  }
  
  /*
969c79215   Tejun Heo   sched: replace mi...
3382
3383
3384
3385
   * active_load_balance_cpu_stop is run by cpu stopper. It pushes
   * running tasks off the busiest CPU onto idle CPUs. It requires at
   * least 1 task to be running on each physical CPU where possible, and
   * avoids physical / logical imbalances.
1e3c88bde   Peter Zijlstra   sched: Move load ...
3386
   */
969c79215   Tejun Heo   sched: replace mi...
3387
  static int active_load_balance_cpu_stop(void *data)
1e3c88bde   Peter Zijlstra   sched: Move load ...
3388
  {
969c79215   Tejun Heo   sched: replace mi...
3389
3390
  	struct rq *busiest_rq = data;
  	int busiest_cpu = cpu_of(busiest_rq);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3391
  	int target_cpu = busiest_rq->push_cpu;
969c79215   Tejun Heo   sched: replace mi...
3392
  	struct rq *target_rq = cpu_rq(target_cpu);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3393
  	struct sched_domain *sd;
969c79215   Tejun Heo   sched: replace mi...
3394
3395
3396
3397
3398
3399
3400
  
  	raw_spin_lock_irq(&busiest_rq->lock);
  
  	/* make sure the requested cpu hasn't gone down in the meantime */
  	if (unlikely(busiest_cpu != smp_processor_id() ||
  		     !busiest_rq->active_balance))
  		goto out_unlock;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3401
3402
3403
  
  	/* Is there any task to move? */
  	if (busiest_rq->nr_running <= 1)
969c79215   Tejun Heo   sched: replace mi...
3404
  		goto out_unlock;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
  
  	/*
  	 * This condition is "impossible", if it occurs
  	 * we need to fix it. Originally reported by
  	 * Bjorn Helgaas on a 128-cpu setup.
  	 */
  	BUG_ON(busiest_rq == target_rq);
  
  	/* move a task from busiest_rq to target_rq */
  	double_lock_balance(busiest_rq, target_rq);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3415
3416
  
  	/* Search for an sd spanning us and the target CPU. */
dce840a08   Peter Zijlstra   sched: Dynamicall...
3417
  	rcu_read_lock();
1e3c88bde   Peter Zijlstra   sched: Move load ...
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
  	for_each_domain(target_cpu, sd) {
  		if ((sd->flags & SD_LOAD_BALANCE) &&
  		    cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
  				break;
  	}
  
  	if (likely(sd)) {
  		schedstat_inc(sd, alb_count);
  
  		if (move_one_task(target_rq, target_cpu, busiest_rq,
  				  sd, CPU_IDLE))
  			schedstat_inc(sd, alb_pushed);
  		else
  			schedstat_inc(sd, alb_failed);
  	}
dce840a08   Peter Zijlstra   sched: Dynamicall...
3433
  	rcu_read_unlock();
1e3c88bde   Peter Zijlstra   sched: Move load ...
3434
  	double_unlock_balance(busiest_rq, target_rq);
969c79215   Tejun Heo   sched: replace mi...
3435
3436
3437
3438
  out_unlock:
  	busiest_rq->active_balance = 0;
  	raw_spin_unlock_irq(&busiest_rq->lock);
  	return 0;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3439
3440
3441
  }
  
  #ifdef CONFIG_NO_HZ
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
  
  static DEFINE_PER_CPU(struct call_single_data, remote_sched_softirq_cb);
  
  static void trigger_sched_softirq(void *data)
  {
  	raise_softirq_irqoff(SCHED_SOFTIRQ);
  }
  
  static inline void init_sched_softirq_csd(struct call_single_data *csd)
  {
  	csd->func = trigger_sched_softirq;
  	csd->info = NULL;
  	csd->flags = 0;
  	csd->priv = 0;
  }
  
  /*
   * idle load balancing details
   * - One of the idle CPUs nominates itself as idle load_balancer, while
   *   entering idle.
   * - This idle load balancer CPU will also go into tickless mode when
   *   it is idle, just like all other idle CPUs
   * - When one of the busy CPUs notice that there may be an idle rebalancing
   *   needed, they will kick the idle load balancer, which then does idle
   *   load balancing for all the idle CPUs.
   */
1e3c88bde   Peter Zijlstra   sched: Move load ...
3468
3469
  static struct {
  	atomic_t load_balancer;
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3470
3471
3472
3473
3474
3475
  	atomic_t first_pick_cpu;
  	atomic_t second_pick_cpu;
  	cpumask_var_t idle_cpus_mask;
  	cpumask_var_t grp_idle_mask;
  	unsigned long next_balance;     /* in jiffy units */
  } nohz ____cacheline_aligned;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
  
  int get_nohz_load_balancer(void)
  {
  	return atomic_read(&nohz.load_balancer);
  }
  
  #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
  /**
   * lowest_flag_domain - Return lowest sched_domain containing flag.
   * @cpu:	The cpu whose lowest level of sched domain is to
   *		be returned.
   * @flag:	The flag to check for the lowest sched_domain
   *		for the given cpu.
   *
   * Returns the lowest sched_domain of a cpu which contains the given flag.
   */
  static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
  {
  	struct sched_domain *sd;
  
  	for_each_domain(cpu, sd)
  		if (sd && (sd->flags & flag))
  			break;
  
  	return sd;
  }
  
  /**
   * for_each_flag_domain - Iterates over sched_domains containing the flag.
   * @cpu:	The cpu whose domains we're iterating over.
   * @sd:		variable holding the value of the power_savings_sd
   *		for cpu.
   * @flag:	The flag to filter the sched_domains to be iterated.
   *
   * Iterates over all the scheduler domains for a given cpu that has the 'flag'
   * set, starting from the lowest sched_domain to the highest.
   */
  #define for_each_flag_domain(cpu, sd, flag) \
  	for (sd = lowest_flag_domain(cpu, flag); \
  		(sd && (sd->flags & flag)); sd = sd->parent)
  
  /**
   * is_semi_idle_group - Checks if the given sched_group is semi-idle.
   * @ilb_group:	group to be checked for semi-idleness
   *
   * Returns:	1 if the group is semi-idle. 0 otherwise.
   *
   * We define a sched_group to be semi idle if it has atleast one idle-CPU
   * and atleast one non-idle CPU. This helper function checks if the given
   * sched_group is semi-idle or not.
   */
  static inline int is_semi_idle_group(struct sched_group *ilb_group)
  {
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3529
  	cpumask_and(nohz.grp_idle_mask, nohz.idle_cpus_mask,
1e3c88bde   Peter Zijlstra   sched: Move load ...
3530
3531
3532
3533
3534
3535
  					sched_group_cpus(ilb_group));
  
  	/*
  	 * A sched_group is semi-idle when it has atleast one busy cpu
  	 * and atleast one idle cpu.
  	 */
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3536
  	if (cpumask_empty(nohz.grp_idle_mask))
1e3c88bde   Peter Zijlstra   sched: Move load ...
3537
  		return 0;
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3538
  	if (cpumask_equal(nohz.grp_idle_mask, sched_group_cpus(ilb_group)))
1e3c88bde   Peter Zijlstra   sched: Move load ...
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
  		return 0;
  
  	return 1;
  }
  /**
   * find_new_ilb - Finds the optimum idle load balancer for nomination.
   * @cpu:	The cpu which is nominating a new idle_load_balancer.
   *
   * Returns:	Returns the id of the idle load balancer if it exists,
   *		Else, returns >= nr_cpu_ids.
   *
   * This algorithm picks the idle load balancer such that it belongs to a
   * semi-idle powersavings sched_domain. The idea is to try and avoid
   * completely idle packages/cores just for the purpose of idle load balancing
   * when there are other idle cpu's which are better suited for that job.
   */
  static int find_new_ilb(int cpu)
  {
  	struct sched_domain *sd;
  	struct sched_group *ilb_group;
dce840a08   Peter Zijlstra   sched: Dynamicall...
3559
  	int ilb = nr_cpu_ids;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
  
  	/*
  	 * Have idle load balancer selection from semi-idle packages only
  	 * when power-aware load balancing is enabled
  	 */
  	if (!(sched_smt_power_savings || sched_mc_power_savings))
  		goto out_done;
  
  	/*
  	 * Optimize for the case when we have no idle CPUs or only one
  	 * idle CPU. Don't walk the sched_domain hierarchy in such cases
  	 */
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3572
  	if (cpumask_weight(nohz.idle_cpus_mask) < 2)
1e3c88bde   Peter Zijlstra   sched: Move load ...
3573
  		goto out_done;
dce840a08   Peter Zijlstra   sched: Dynamicall...
3574
  	rcu_read_lock();
1e3c88bde   Peter Zijlstra   sched: Move load ...
3575
3576
3577
3578
  	for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
  		ilb_group = sd->groups;
  
  		do {
dce840a08   Peter Zijlstra   sched: Dynamicall...
3579
3580
3581
3582
  			if (is_semi_idle_group(ilb_group)) {
  				ilb = cpumask_first(nohz.grp_idle_mask);
  				goto unlock;
  			}
1e3c88bde   Peter Zijlstra   sched: Move load ...
3583
3584
3585
3586
3587
  
  			ilb_group = ilb_group->next;
  
  		} while (ilb_group != sd->groups);
  	}
dce840a08   Peter Zijlstra   sched: Dynamicall...
3588
3589
  unlock:
  	rcu_read_unlock();
1e3c88bde   Peter Zijlstra   sched: Move load ...
3590
3591
  
  out_done:
dce840a08   Peter Zijlstra   sched: Dynamicall...
3592
  	return ilb;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3593
3594
3595
3596
  }
  #else /*  (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
  static inline int find_new_ilb(int call_cpu)
  {
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3597
  	return nr_cpu_ids;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3598
3599
3600
3601
  }
  #endif
  
  /*
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
   * Kick a CPU to do the nohz balancing, if it is time for it. We pick the
   * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
   * CPU (if there is one).
   */
  static void nohz_balancer_kick(int cpu)
  {
  	int ilb_cpu;
  
  	nohz.next_balance++;
  
  	ilb_cpu = get_nohz_load_balancer();
  
  	if (ilb_cpu >= nr_cpu_ids) {
  		ilb_cpu = cpumask_first(nohz.idle_cpus_mask);
  		if (ilb_cpu >= nr_cpu_ids)
  			return;
  	}
  
  	if (!cpu_rq(ilb_cpu)->nohz_balance_kick) {
  		struct call_single_data *cp;
  
  		cpu_rq(ilb_cpu)->nohz_balance_kick = 1;
  		cp = &per_cpu(remote_sched_softirq_cb, cpu);
  		__smp_call_function_single(ilb_cpu, cp, 0);
  	}
  	return;
  }
  
  /*
1e3c88bde   Peter Zijlstra   sched: Move load ...
3631
3632
   * This routine will try to nominate the ilb (idle load balancing)
   * owner among the cpus whose ticks are stopped. ilb owner will do the idle
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3633
   * load balancing on behalf of all those cpus.
1e3c88bde   Peter Zijlstra   sched: Move load ...
3634
   *
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3635
3636
3637
   * When the ilb owner becomes busy, we will not have new ilb owner until some
   * idle CPU wakes up and goes back to idle or some busy CPU tries to kick
   * idle load balancing by kicking one of the idle CPUs.
1e3c88bde   Peter Zijlstra   sched: Move load ...
3638
   *
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3639
3640
3641
   * Ticks are stopped for the ilb owner as well, with busy CPU kicking this
   * ilb owner CPU in future (when there is a need for idle load balancing on
   * behalf of all idle CPUs).
1e3c88bde   Peter Zijlstra   sched: Move load ...
3642
   */
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3643
  void select_nohz_load_balancer(int stop_tick)
1e3c88bde   Peter Zijlstra   sched: Move load ...
3644
3645
3646
3647
  {
  	int cpu = smp_processor_id();
  
  	if (stop_tick) {
1e3c88bde   Peter Zijlstra   sched: Move load ...
3648
3649
  		if (!cpu_active(cpu)) {
  			if (atomic_read(&nohz.load_balancer) != cpu)
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3650
  				return;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3651
3652
3653
3654
3655
  
  			/*
  			 * If we are going offline and still the leader,
  			 * give up!
  			 */
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3656
3657
  			if (atomic_cmpxchg(&nohz.load_balancer, cpu,
  					   nr_cpu_ids) != cpu)
1e3c88bde   Peter Zijlstra   sched: Move load ...
3658
  				BUG();
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3659
  			return;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3660
  		}
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3661
  		cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3662

83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3663
3664
3665
3666
  		if (atomic_read(&nohz.first_pick_cpu) == cpu)
  			atomic_cmpxchg(&nohz.first_pick_cpu, cpu, nr_cpu_ids);
  		if (atomic_read(&nohz.second_pick_cpu) == cpu)
  			atomic_cmpxchg(&nohz.second_pick_cpu, cpu, nr_cpu_ids);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3667

83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3668
  		if (atomic_read(&nohz.load_balancer) >= nr_cpu_ids) {
1e3c88bde   Peter Zijlstra   sched: Move load ...
3669
  			int new_ilb;
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3670
3671
3672
3673
  			/* make me the ilb owner */
  			if (atomic_cmpxchg(&nohz.load_balancer, nr_cpu_ids,
  					   cpu) != nr_cpu_ids)
  				return;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3674
3675
3676
3677
3678
3679
  			/*
  			 * Check to see if there is a more power-efficient
  			 * ilb.
  			 */
  			new_ilb = find_new_ilb(cpu);
  			if (new_ilb < nr_cpu_ids && new_ilb != cpu) {
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3680
  				atomic_set(&nohz.load_balancer, nr_cpu_ids);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3681
  				resched_cpu(new_ilb);
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3682
  				return;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3683
  			}
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3684
  			return;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3685
3686
  		}
  	} else {
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3687
3688
  		if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask))
  			return;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3689

83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3690
  		cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3691
3692
  
  		if (atomic_read(&nohz.load_balancer) == cpu)
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3693
3694
  			if (atomic_cmpxchg(&nohz.load_balancer, cpu,
  					   nr_cpu_ids) != cpu)
1e3c88bde   Peter Zijlstra   sched: Move load ...
3695
3696
  				BUG();
  	}
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3697
  	return;
1e3c88bde   Peter Zijlstra   sched: Move load ...
3698
3699
3700
3701
  }
  #endif
  
  static DEFINE_SPINLOCK(balancing);
49c022e65   Peter Zijlstra   sched: Clean up r...
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
  static unsigned long __read_mostly max_load_balance_interval = HZ/10;
  
  /*
   * Scale the max load_balance interval with the number of CPUs in the system.
   * This trades load-balance latency on larger machines for less cross talk.
   */
  static void update_max_interval(void)
  {
  	max_load_balance_interval = HZ*num_online_cpus()/10;
  }
1e3c88bde   Peter Zijlstra   sched: Move load ...
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
  /*
   * It checks each scheduling domain to see if it is due to be balanced,
   * and initiates a balancing operation if so.
   *
   * Balancing parameters are set up in arch_init_sched_domains.
   */
  static void rebalance_domains(int cpu, enum cpu_idle_type idle)
  {
  	int balance = 1;
  	struct rq *rq = cpu_rq(cpu);
  	unsigned long interval;
  	struct sched_domain *sd;
  	/* Earliest time when we have to do rebalance again */
  	unsigned long next_balance = jiffies + 60*HZ;
  	int update_next_balance = 0;
  	int need_serialize;
2069dd75c   Peter Zijlstra   sched: Rewrite tg...
3728
  	update_shares(cpu);
dce840a08   Peter Zijlstra   sched: Dynamicall...
3729
  	rcu_read_lock();
1e3c88bde   Peter Zijlstra   sched: Move load ...
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
  	for_each_domain(cpu, sd) {
  		if (!(sd->flags & SD_LOAD_BALANCE))
  			continue;
  
  		interval = sd->balance_interval;
  		if (idle != CPU_IDLE)
  			interval *= sd->busy_factor;
  
  		/* scale ms to jiffies */
  		interval = msecs_to_jiffies(interval);
49c022e65   Peter Zijlstra   sched: Clean up r...
3740
  		interval = clamp(interval, 1UL, max_load_balance_interval);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
  
  		need_serialize = sd->flags & SD_SERIALIZE;
  
  		if (need_serialize) {
  			if (!spin_trylock(&balancing))
  				goto out;
  		}
  
  		if (time_after_eq(jiffies, sd->last_balance + interval)) {
  			if (load_balance(cpu, rq, sd, idle, &balance)) {
  				/*
  				 * We've pulled tasks over so either we're no
c186fafe9   Peter Zijlstra   sched: Clean up r...
3753
  				 * longer idle.
1e3c88bde   Peter Zijlstra   sched: Move load ...
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
  				 */
  				idle = CPU_NOT_IDLE;
  			}
  			sd->last_balance = jiffies;
  		}
  		if (need_serialize)
  			spin_unlock(&balancing);
  out:
  		if (time_after(next_balance, sd->last_balance + interval)) {
  			next_balance = sd->last_balance + interval;
  			update_next_balance = 1;
  		}
  
  		/*
  		 * Stop the load balance at this level. There is another
  		 * CPU in our sched group which is doing load balancing more
  		 * actively.
  		 */
  		if (!balance)
  			break;
  	}
dce840a08   Peter Zijlstra   sched: Dynamicall...
3775
  	rcu_read_unlock();
1e3c88bde   Peter Zijlstra   sched: Move load ...
3776
3777
3778
3779
3780
3781
3782
3783
3784
  
  	/*
  	 * next_balance will be updated only when there is a need.
  	 * When the cpu is attached to null domain for ex, it will not be
  	 * updated.
  	 */
  	if (likely(update_next_balance))
  		rq->next_balance = next_balance;
  }
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3785
  #ifdef CONFIG_NO_HZ
1e3c88bde   Peter Zijlstra   sched: Move load ...
3786
  /*
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3787
   * In CONFIG_NO_HZ case, the idle balance kickee will do the
1e3c88bde   Peter Zijlstra   sched: Move load ...
3788
3789
   * rebalancing for all the cpus for whom scheduler ticks are stopped.
   */
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
  static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
  {
  	struct rq *this_rq = cpu_rq(this_cpu);
  	struct rq *rq;
  	int balance_cpu;
  
  	if (idle != CPU_IDLE || !this_rq->nohz_balance_kick)
  		return;
  
  	for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
  		if (balance_cpu == this_cpu)
  			continue;
  
  		/*
  		 * If this cpu gets work to do, stop the load balancing
  		 * work being done for other cpus. Next load
  		 * balancing owner will pick it up.
  		 */
  		if (need_resched()) {
  			this_rq->nohz_balance_kick = 0;
  			break;
  		}
  
  		raw_spin_lock_irq(&this_rq->lock);
5343bdb8f   Suresh Siddha   sched: Update rq-...
3814
  		update_rq_clock(this_rq);
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
  		update_cpu_load(this_rq);
  		raw_spin_unlock_irq(&this_rq->lock);
  
  		rebalance_domains(balance_cpu, CPU_IDLE);
  
  		rq = cpu_rq(balance_cpu);
  		if (time_after(this_rq->next_balance, rq->next_balance))
  			this_rq->next_balance = rq->next_balance;
  	}
  	nohz.next_balance = this_rq->next_balance;
  	this_rq->nohz_balance_kick = 0;
  }
  
  /*
   * Current heuristic for kicking the idle load balancer
   * - first_pick_cpu is the one of the busy CPUs. It will kick
   *   idle load balancer when it has more than one process active. This
   *   eliminates the need for idle load balancing altogether when we have
   *   only one running process in the system (common case).
   * - If there are more than one busy CPU, idle load balancer may have
   *   to run for active_load_balance to happen (i.e., two busy CPUs are
   *   SMT or core siblings and can run better if they move to different
   *   physical CPUs). So, second_pick_cpu is the second of the busy CPUs
   *   which will kick idle load balancer as soon as it has any load.
   */
  static inline int nohz_kick_needed(struct rq *rq, int cpu)
  {
  	unsigned long now = jiffies;
  	int ret;
  	int first_pick_cpu, second_pick_cpu;
  
  	if (time_before(now, nohz.next_balance))
  		return 0;
f6c3f1686   Suresh Siddha   sched: Fix nohz b...
3848
  	if (rq->idle_at_tick)
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
  		return 0;
  
  	first_pick_cpu = atomic_read(&nohz.first_pick_cpu);
  	second_pick_cpu = atomic_read(&nohz.second_pick_cpu);
  
  	if (first_pick_cpu < nr_cpu_ids && first_pick_cpu != cpu &&
  	    second_pick_cpu < nr_cpu_ids && second_pick_cpu != cpu)
  		return 0;
  
  	ret = atomic_cmpxchg(&nohz.first_pick_cpu, nr_cpu_ids, cpu);
  	if (ret == nr_cpu_ids || ret == cpu) {
  		atomic_cmpxchg(&nohz.second_pick_cpu, cpu, nr_cpu_ids);
  		if (rq->nr_running > 1)
  			return 1;
  	} else {
  		ret = atomic_cmpxchg(&nohz.second_pick_cpu, nr_cpu_ids, cpu);
  		if (ret == nr_cpu_ids || ret == cpu) {
  			if (rq->nr_running)
  				return 1;
  		}
  	}
  	return 0;
  }
  #else
  static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
  #endif
  
  /*
   * run_rebalance_domains is triggered when needed from the scheduler tick.
   * Also triggered for nohz idle balancing (with nohz_balancing_kick set).
   */
1e3c88bde   Peter Zijlstra   sched: Move load ...
3880
3881
3882
3883
3884
3885
3886
3887
  static void run_rebalance_domains(struct softirq_action *h)
  {
  	int this_cpu = smp_processor_id();
  	struct rq *this_rq = cpu_rq(this_cpu);
  	enum cpu_idle_type idle = this_rq->idle_at_tick ?
  						CPU_IDLE : CPU_NOT_IDLE;
  
  	rebalance_domains(this_cpu, idle);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3888
  	/*
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3889
  	 * If this cpu has a pending nohz_balance_kick, then do the
1e3c88bde   Peter Zijlstra   sched: Move load ...
3890
3891
3892
  	 * balancing on behalf of the other idle cpus whose ticks are
  	 * stopped.
  	 */
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3893
  	nohz_idle_balance(this_cpu, idle);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3894
3895
3896
3897
  }
  
  static inline int on_null_domain(int cpu)
  {
90a6501f9   Paul E. McKenney   sched, rcu: Fix r...
3898
  	return !rcu_dereference_sched(cpu_rq(cpu)->sd);
1e3c88bde   Peter Zijlstra   sched: Move load ...
3899
3900
3901
3902
  }
  
  /*
   * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing.
1e3c88bde   Peter Zijlstra   sched: Move load ...
3903
3904
3905
   */
  static inline void trigger_load_balance(struct rq *rq, int cpu)
  {
1e3c88bde   Peter Zijlstra   sched: Move load ...
3906
3907
3908
3909
  	/* Don't need to rebalance while attached to NULL domain */
  	if (time_after_eq(jiffies, rq->next_balance) &&
  	    likely(!on_null_domain(cpu)))
  		raise_softirq(SCHED_SOFTIRQ);
83cd4fe27   Venkatesh Pallipadi   sched: Change noh...
3910
3911
3912
3913
  #ifdef CONFIG_NO_HZ
  	else if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu)))
  		nohz_balancer_kick(cpu);
  #endif
1e3c88bde   Peter Zijlstra   sched: Move load ...
3914
  }
0bcdcf28c   Christian Ehrhardt   sched: Fix missin...
3915
3916
3917
3918
3919
3920
3921
3922
3923
  static void rq_online_fair(struct rq *rq)
  {
  	update_sysctl();
  }
  
  static void rq_offline_fair(struct rq *rq)
  {
  	update_sysctl();
  }
1e3c88bde   Peter Zijlstra   sched: Move load ...
3924
3925
3926
3927
3928
3929
3930
3931
  #else	/* CONFIG_SMP */
  
  /*
   * on UP we do not need to balance between CPUs:
   */
  static inline void idle_balance(int cpu, struct rq *rq)
  {
  }
55e12e5e7   Dhaval Giani   sched: make sched...
3932
  #endif /* CONFIG_SMP */
e1d1484f7   Peter Williams   sched: reduce bal...
3933

bf0f6f24a   Ingo Molnar   sched: cfs core, ...
3934
3935
3936
  /*
   * scheduler tick hitting a task of our scheduling class:
   */
8f4d37ec0   Peter Zijlstra   sched: high-res p...
3937
  static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
3938
3939
3940
3941
3942
3943
  {
  	struct cfs_rq *cfs_rq;
  	struct sched_entity *se = &curr->se;
  
  	for_each_sched_entity(se) {
  		cfs_rq = cfs_rq_of(se);
8f4d37ec0   Peter Zijlstra   sched: high-res p...
3944
  		entity_tick(cfs_rq, se, queued);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
3945
3946
3947
3948
  	}
  }
  
  /*
cd29fe6f2   Peter Zijlstra   sched: Sanitize f...
3949
3950
3951
   * called on fork with the child task as argument from the parent's context
   *  - child not yet on the tasklist
   *  - preemption disabled
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
3952
   */
cd29fe6f2   Peter Zijlstra   sched: Sanitize f...
3953
  static void task_fork_fair(struct task_struct *p)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
3954
  {
cd29fe6f2   Peter Zijlstra   sched: Sanitize f...
3955
  	struct cfs_rq *cfs_rq = task_cfs_rq(current);
429d43bcc   Ingo Molnar   sched: cleanup: s...
3956
  	struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
00bf7bfc2   Ingo Molnar   sched: fix: move ...
3957
  	int this_cpu = smp_processor_id();
cd29fe6f2   Peter Zijlstra   sched: Sanitize f...
3958
3959
  	struct rq *rq = this_rq();
  	unsigned long flags;
05fa785cf   Thomas Gleixner   sched: Convert rq...
3960
  	raw_spin_lock_irqsave(&rq->lock, flags);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
3961

861d034ee   Peter Zijlstra   sched: Fix rq->cl...
3962
  	update_rq_clock(rq);
b0a0f667a   Paul E. McKenney   sched: suppress R...
3963
3964
  	if (unlikely(task_cpu(p) != this_cpu)) {
  		rcu_read_lock();
cd29fe6f2   Peter Zijlstra   sched: Sanitize f...
3965
  		__set_task_cpu(p, this_cpu);
b0a0f667a   Paul E. McKenney   sched: suppress R...
3966
3967
  		rcu_read_unlock();
  	}
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
3968

7109c4429   Ting Yang   sched: call updat...
3969
  	update_curr(cfs_rq);
cd29fe6f2   Peter Zijlstra   sched: Sanitize f...
3970

b5d9d734a   Mike Galbraith   sched: Ensure tha...
3971
3972
  	if (curr)
  		se->vruntime = curr->vruntime;
aeb73b040   Peter Zijlstra   sched: clean up n...
3973
  	place_entity(cfs_rq, se, 1);
4d78e7b65   Peter Zijlstra   sched: new task p...
3974

cd29fe6f2   Peter Zijlstra   sched: Sanitize f...
3975
  	if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
87fefa381   Dmitry Adamushko   sched: optimize t...
3976
  		/*
edcb60a30   Ingo Molnar   sched: kernel/sch...
3977
3978
3979
  		 * Upon rescheduling, sched_class::put_prev_task() will place
  		 * 'current' within the tree based on its new key value.
  		 */
4d78e7b65   Peter Zijlstra   sched: new task p...
3980
  		swap(curr->vruntime, se->vruntime);
aec0a5142   Bharata B Rao   sched: call resch...
3981
  		resched_task(rq->curr);
4d78e7b65   Peter Zijlstra   sched: new task p...
3982
  	}
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
3983

88ec22d3e   Peter Zijlstra   sched: Remove the...
3984
  	se->vruntime -= cfs_rq->min_vruntime;
05fa785cf   Thomas Gleixner   sched: Convert rq...
3985
  	raw_spin_unlock_irqrestore(&rq->lock, flags);
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
3986
  }
cb4698450   Steven Rostedt   sched: RT-balance...
3987
3988
3989
3990
  /*
   * Priority of the task has changed. Check to see if we preempt
   * the current task.
   */
da7a735e5   Peter Zijlstra   sched: Fix switch...
3991
3992
  static void
  prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
cb4698450   Steven Rostedt   sched: RT-balance...
3993
  {
da7a735e5   Peter Zijlstra   sched: Fix switch...
3994
3995
  	if (!p->se.on_rq)
  		return;
cb4698450   Steven Rostedt   sched: RT-balance...
3996
3997
3998
3999
4000
  	/*
  	 * Reschedule if we are currently running on this runqueue and
  	 * our priority decreased, or if we are not currently running on
  	 * this runqueue and our priority is higher than the current's
  	 */
da7a735e5   Peter Zijlstra   sched: Fix switch...
4001
  	if (rq->curr == p) {
cb4698450   Steven Rostedt   sched: RT-balance...
4002
4003
4004
  		if (p->prio > oldprio)
  			resched_task(rq->curr);
  	} else
15afe09bf   Peter Zijlstra   sched: wakeup pre...
4005
  		check_preempt_curr(rq, p, 0);
cb4698450   Steven Rostedt   sched: RT-balance...
4006
  }
da7a735e5   Peter Zijlstra   sched: Fix switch...
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
  static void switched_from_fair(struct rq *rq, struct task_struct *p)
  {
  	struct sched_entity *se = &p->se;
  	struct cfs_rq *cfs_rq = cfs_rq_of(se);
  
  	/*
  	 * Ensure the task's vruntime is normalized, so that when its
  	 * switched back to the fair class the enqueue_entity(.flags=0) will
  	 * do the right thing.
  	 *
  	 * If it was on_rq, then the dequeue_entity(.flags=0) will already
  	 * have normalized the vruntime, if it was !on_rq, then only when
  	 * the task is sleeping will it still have non-normalized vruntime.
  	 */
  	if (!se->on_rq && p->state != TASK_RUNNING) {
  		/*
  		 * Fix up our vruntime so that the current sleep doesn't
  		 * cause 'unlimited' sleep bonus.
  		 */
  		place_entity(cfs_rq, se, 0);
  		se->vruntime -= cfs_rq->min_vruntime;
  	}
  }
cb4698450   Steven Rostedt   sched: RT-balance...
4030
4031
4032
  /*
   * We switched to the sched_fair class.
   */
da7a735e5   Peter Zijlstra   sched: Fix switch...
4033
  static void switched_to_fair(struct rq *rq, struct task_struct *p)
cb4698450   Steven Rostedt   sched: RT-balance...
4034
  {
da7a735e5   Peter Zijlstra   sched: Fix switch...
4035
4036
  	if (!p->se.on_rq)
  		return;
cb4698450   Steven Rostedt   sched: RT-balance...
4037
4038
4039
4040
4041
  	/*
  	 * We were most likely switched from sched_rt, so
  	 * kick off the schedule if running, otherwise just see
  	 * if we can still preempt the current task.
  	 */
da7a735e5   Peter Zijlstra   sched: Fix switch...
4042
  	if (rq->curr == p)
cb4698450   Steven Rostedt   sched: RT-balance...
4043
4044
  		resched_task(rq->curr);
  	else
15afe09bf   Peter Zijlstra   sched: wakeup pre...
4045
  		check_preempt_curr(rq, p, 0);
cb4698450   Steven Rostedt   sched: RT-balance...
4046
  }
83b699ed2   Srivatsa Vaddagiri   sched: revert rec...
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
  /* Account for a task changing its policy or group.
   *
   * This routine is mostly called to set cfs_rq->curr field when a task
   * migrates between groups/classes.
   */
  static void set_curr_task_fair(struct rq *rq)
  {
  	struct sched_entity *se = &rq->curr->se;
  
  	for_each_sched_entity(se)
  		set_next_entity(cfs_rq_of(se), se);
  }
810b38179   Peter Zijlstra   sched: retain vru...
4059
  #ifdef CONFIG_FAIR_GROUP_SCHED
b2b5ce022   Peter Zijlstra   sched, cgroup: Fi...
4060
  static void task_move_group_fair(struct task_struct *p, int on_rq)
810b38179   Peter Zijlstra   sched: retain vru...
4061
  {
b2b5ce022   Peter Zijlstra   sched, cgroup: Fi...
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
  	/*
  	 * If the task was not on the rq at the time of this cgroup movement
  	 * it must have been asleep, sleeping tasks keep their ->vruntime
  	 * absolute on their old rq until wakeup (needed for the fair sleeper
  	 * bonus in place_entity()).
  	 *
  	 * If it was on the rq, we've just 'preempted' it, which does convert
  	 * ->vruntime to a relative base.
  	 *
  	 * Make sure both cases convert their relative position when migrating
  	 * to another cgroup's rq. This does somewhat interfere with the
  	 * fair sleeper stuff for the first placement, but who cares.
  	 */
  	if (!on_rq)
  		p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime;
  	set_task_rq(p, task_cpu(p));
88ec22d3e   Peter Zijlstra   sched: Remove the...
4078
  	if (!on_rq)
b2b5ce022   Peter Zijlstra   sched, cgroup: Fi...
4079
  		p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime;
810b38179   Peter Zijlstra   sched: retain vru...
4080
4081
  }
  #endif
6d686f456   H Hartley Sweeten   sched: Don't expo...
4082
  static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
0d721cead   Peter Williams   sched: Simplify s...
4083
4084
  {
  	struct sched_entity *se = &task->se;
0d721cead   Peter Williams   sched: Simplify s...
4085
4086
4087
4088
4089
4090
  	unsigned int rr_interval = 0;
  
  	/*
  	 * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
  	 * idle runqueue:
  	 */
0d721cead   Peter Williams   sched: Simplify s...
4091
4092
  	if (rq->cfs.load.weight)
  		rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
0d721cead   Peter Williams   sched: Simplify s...
4093
4094
4095
  
  	return rr_interval;
  }
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4096
4097
4098
  /*
   * All the scheduling class methods:
   */
5522d5d5f   Ingo Molnar   sched: mark sched...
4099
4100
  static const struct sched_class fair_sched_class = {
  	.next			= &idle_sched_class,
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4101
4102
4103
  	.enqueue_task		= enqueue_task_fair,
  	.dequeue_task		= dequeue_task_fair,
  	.yield_task		= yield_task_fair,
d95f41220   Mike Galbraith   sched: Add yield_...
4104
  	.yield_to_task		= yield_to_task_fair,
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4105

2e09bf556   Ingo Molnar   sched: wakeup gra...
4106
  	.check_preempt_curr	= check_preempt_wakeup,
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4107
4108
4109
  
  	.pick_next_task		= pick_next_task_fair,
  	.put_prev_task		= put_prev_task_fair,
681f3e685   Peter Williams   sched: isolate SM...
4110
  #ifdef CONFIG_SMP
4ce72a2c0   Li Zefan   sched: add CONFIG...
4111
  	.select_task_rq		= select_task_rq_fair,
0bcdcf28c   Christian Ehrhardt   sched: Fix missin...
4112
4113
  	.rq_online		= rq_online_fair,
  	.rq_offline		= rq_offline_fair,
88ec22d3e   Peter Zijlstra   sched: Remove the...
4114
4115
  
  	.task_waking		= task_waking_fair,
681f3e685   Peter Williams   sched: isolate SM...
4116
  #endif
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4117

83b699ed2   Srivatsa Vaddagiri   sched: revert rec...
4118
  	.set_curr_task          = set_curr_task_fair,
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4119
  	.task_tick		= task_tick_fair,
cd29fe6f2   Peter Zijlstra   sched: Sanitize f...
4120
  	.task_fork		= task_fork_fair,
cb4698450   Steven Rostedt   sched: RT-balance...
4121
4122
  
  	.prio_changed		= prio_changed_fair,
da7a735e5   Peter Zijlstra   sched: Fix switch...
4123
  	.switched_from		= switched_from_fair,
cb4698450   Steven Rostedt   sched: RT-balance...
4124
  	.switched_to		= switched_to_fair,
810b38179   Peter Zijlstra   sched: retain vru...
4125

0d721cead   Peter Williams   sched: Simplify s...
4126
  	.get_rr_interval	= get_rr_interval_fair,
810b38179   Peter Zijlstra   sched: retain vru...
4127
  #ifdef CONFIG_FAIR_GROUP_SCHED
b2b5ce022   Peter Zijlstra   sched, cgroup: Fi...
4128
  	.task_move_group	= task_move_group_fair,
810b38179   Peter Zijlstra   sched: retain vru...
4129
  #endif
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4130
4131
4132
  };
  
  #ifdef CONFIG_SCHED_DEBUG
5cef9eca3   Ingo Molnar   sched: remove the...
4133
  static void print_cfs_stats(struct seq_file *m, int cpu)
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4134
  {
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4135
  	struct cfs_rq *cfs_rq;
5973e5b95   Peter Zijlstra   sched: fix: don't...
4136
  	rcu_read_lock();
c3b64f1e4   Ingo Molnar   sched: clean up s...
4137
  	for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
5cef9eca3   Ingo Molnar   sched: remove the...
4138
  		print_cfs_rq(m, cpu, cfs_rq);
5973e5b95   Peter Zijlstra   sched: fix: don't...
4139
  	rcu_read_unlock();
bf0f6f24a   Ingo Molnar   sched: cfs core, ...
4140
4141
  }
  #endif