Commit 1158ddb55416855fd17abe3214298f736f00426a

Authored by Kirill Tkhai
Committed by Ingo Molnar
1 parent a59f4e079d

sched/rt: Add reschedule check to switched_from_rt()

Reschedule rq->curr if the first RT task has just been
pulled to the rq.

Signed-off-by: Kirill V Tkhai <tkhai@yandex.ru>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tkhai Kirill <tkhai@yandex.ru>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/118761353614535@web28f.yandex.ru
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Showing 1 changed file with 5 additions and 2 deletions Inline Diff

1 /* 1 /*
2 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR 2 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
3 * policies) 3 * policies)
4 */ 4 */
5 5
6 #include "sched.h" 6 #include "sched.h"
7 7
8 #include <linux/slab.h> 8 #include <linux/slab.h>
9 9
10 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); 10 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
11 11
12 struct rt_bandwidth def_rt_bandwidth; 12 struct rt_bandwidth def_rt_bandwidth;
13 13
14 static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) 14 static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
15 { 15 {
16 struct rt_bandwidth *rt_b = 16 struct rt_bandwidth *rt_b =
17 container_of(timer, struct rt_bandwidth, rt_period_timer); 17 container_of(timer, struct rt_bandwidth, rt_period_timer);
18 ktime_t now; 18 ktime_t now;
19 int overrun; 19 int overrun;
20 int idle = 0; 20 int idle = 0;
21 21
22 for (;;) { 22 for (;;) {
23 now = hrtimer_cb_get_time(timer); 23 now = hrtimer_cb_get_time(timer);
24 overrun = hrtimer_forward(timer, now, rt_b->rt_period); 24 overrun = hrtimer_forward(timer, now, rt_b->rt_period);
25 25
26 if (!overrun) 26 if (!overrun)
27 break; 27 break;
28 28
29 idle = do_sched_rt_period_timer(rt_b, overrun); 29 idle = do_sched_rt_period_timer(rt_b, overrun);
30 } 30 }
31 31
32 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; 32 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
33 } 33 }
34 34
35 void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) 35 void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
36 { 36 {
37 rt_b->rt_period = ns_to_ktime(period); 37 rt_b->rt_period = ns_to_ktime(period);
38 rt_b->rt_runtime = runtime; 38 rt_b->rt_runtime = runtime;
39 39
40 raw_spin_lock_init(&rt_b->rt_runtime_lock); 40 raw_spin_lock_init(&rt_b->rt_runtime_lock);
41 41
42 hrtimer_init(&rt_b->rt_period_timer, 42 hrtimer_init(&rt_b->rt_period_timer,
43 CLOCK_MONOTONIC, HRTIMER_MODE_REL); 43 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
44 rt_b->rt_period_timer.function = sched_rt_period_timer; 44 rt_b->rt_period_timer.function = sched_rt_period_timer;
45 } 45 }
46 46
47 static void start_rt_bandwidth(struct rt_bandwidth *rt_b) 47 static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
48 { 48 {
49 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) 49 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
50 return; 50 return;
51 51
52 if (hrtimer_active(&rt_b->rt_period_timer)) 52 if (hrtimer_active(&rt_b->rt_period_timer))
53 return; 53 return;
54 54
55 raw_spin_lock(&rt_b->rt_runtime_lock); 55 raw_spin_lock(&rt_b->rt_runtime_lock);
56 start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period); 56 start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
57 raw_spin_unlock(&rt_b->rt_runtime_lock); 57 raw_spin_unlock(&rt_b->rt_runtime_lock);
58 } 58 }
59 59
60 void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) 60 void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
61 { 61 {
62 struct rt_prio_array *array; 62 struct rt_prio_array *array;
63 int i; 63 int i;
64 64
65 array = &rt_rq->active; 65 array = &rt_rq->active;
66 for (i = 0; i < MAX_RT_PRIO; i++) { 66 for (i = 0; i < MAX_RT_PRIO; i++) {
67 INIT_LIST_HEAD(array->queue + i); 67 INIT_LIST_HEAD(array->queue + i);
68 __clear_bit(i, array->bitmap); 68 __clear_bit(i, array->bitmap);
69 } 69 }
70 /* delimiter for bitsearch: */ 70 /* delimiter for bitsearch: */
71 __set_bit(MAX_RT_PRIO, array->bitmap); 71 __set_bit(MAX_RT_PRIO, array->bitmap);
72 72
73 #if defined CONFIG_SMP 73 #if defined CONFIG_SMP
74 rt_rq->highest_prio.curr = MAX_RT_PRIO; 74 rt_rq->highest_prio.curr = MAX_RT_PRIO;
75 rt_rq->highest_prio.next = MAX_RT_PRIO; 75 rt_rq->highest_prio.next = MAX_RT_PRIO;
76 rt_rq->rt_nr_migratory = 0; 76 rt_rq->rt_nr_migratory = 0;
77 rt_rq->overloaded = 0; 77 rt_rq->overloaded = 0;
78 plist_head_init(&rt_rq->pushable_tasks); 78 plist_head_init(&rt_rq->pushable_tasks);
79 #endif 79 #endif
80 80
81 rt_rq->rt_time = 0; 81 rt_rq->rt_time = 0;
82 rt_rq->rt_throttled = 0; 82 rt_rq->rt_throttled = 0;
83 rt_rq->rt_runtime = 0; 83 rt_rq->rt_runtime = 0;
84 raw_spin_lock_init(&rt_rq->rt_runtime_lock); 84 raw_spin_lock_init(&rt_rq->rt_runtime_lock);
85 } 85 }
86 86
87 #ifdef CONFIG_RT_GROUP_SCHED 87 #ifdef CONFIG_RT_GROUP_SCHED
88 static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) 88 static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
89 { 89 {
90 hrtimer_cancel(&rt_b->rt_period_timer); 90 hrtimer_cancel(&rt_b->rt_period_timer);
91 } 91 }
92 92
93 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q) 93 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
94 94
95 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) 95 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
96 { 96 {
97 #ifdef CONFIG_SCHED_DEBUG 97 #ifdef CONFIG_SCHED_DEBUG
98 WARN_ON_ONCE(!rt_entity_is_task(rt_se)); 98 WARN_ON_ONCE(!rt_entity_is_task(rt_se));
99 #endif 99 #endif
100 return container_of(rt_se, struct task_struct, rt); 100 return container_of(rt_se, struct task_struct, rt);
101 } 101 }
102 102
103 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 103 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
104 { 104 {
105 return rt_rq->rq; 105 return rt_rq->rq;
106 } 106 }
107 107
108 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 108 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
109 { 109 {
110 return rt_se->rt_rq; 110 return rt_se->rt_rq;
111 } 111 }
112 112
113 void free_rt_sched_group(struct task_group *tg) 113 void free_rt_sched_group(struct task_group *tg)
114 { 114 {
115 int i; 115 int i;
116 116
117 if (tg->rt_se) 117 if (tg->rt_se)
118 destroy_rt_bandwidth(&tg->rt_bandwidth); 118 destroy_rt_bandwidth(&tg->rt_bandwidth);
119 119
120 for_each_possible_cpu(i) { 120 for_each_possible_cpu(i) {
121 if (tg->rt_rq) 121 if (tg->rt_rq)
122 kfree(tg->rt_rq[i]); 122 kfree(tg->rt_rq[i]);
123 if (tg->rt_se) 123 if (tg->rt_se)
124 kfree(tg->rt_se[i]); 124 kfree(tg->rt_se[i]);
125 } 125 }
126 126
127 kfree(tg->rt_rq); 127 kfree(tg->rt_rq);
128 kfree(tg->rt_se); 128 kfree(tg->rt_se);
129 } 129 }
130 130
131 void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, 131 void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
132 struct sched_rt_entity *rt_se, int cpu, 132 struct sched_rt_entity *rt_se, int cpu,
133 struct sched_rt_entity *parent) 133 struct sched_rt_entity *parent)
134 { 134 {
135 struct rq *rq = cpu_rq(cpu); 135 struct rq *rq = cpu_rq(cpu);
136 136
137 rt_rq->highest_prio.curr = MAX_RT_PRIO; 137 rt_rq->highest_prio.curr = MAX_RT_PRIO;
138 rt_rq->rt_nr_boosted = 0; 138 rt_rq->rt_nr_boosted = 0;
139 rt_rq->rq = rq; 139 rt_rq->rq = rq;
140 rt_rq->tg = tg; 140 rt_rq->tg = tg;
141 141
142 tg->rt_rq[cpu] = rt_rq; 142 tg->rt_rq[cpu] = rt_rq;
143 tg->rt_se[cpu] = rt_se; 143 tg->rt_se[cpu] = rt_se;
144 144
145 if (!rt_se) 145 if (!rt_se)
146 return; 146 return;
147 147
148 if (!parent) 148 if (!parent)
149 rt_se->rt_rq = &rq->rt; 149 rt_se->rt_rq = &rq->rt;
150 else 150 else
151 rt_se->rt_rq = parent->my_q; 151 rt_se->rt_rq = parent->my_q;
152 152
153 rt_se->my_q = rt_rq; 153 rt_se->my_q = rt_rq;
154 rt_se->parent = parent; 154 rt_se->parent = parent;
155 INIT_LIST_HEAD(&rt_se->run_list); 155 INIT_LIST_HEAD(&rt_se->run_list);
156 } 156 }
157 157
158 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) 158 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
159 { 159 {
160 struct rt_rq *rt_rq; 160 struct rt_rq *rt_rq;
161 struct sched_rt_entity *rt_se; 161 struct sched_rt_entity *rt_se;
162 int i; 162 int i;
163 163
164 tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL); 164 tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL);
165 if (!tg->rt_rq) 165 if (!tg->rt_rq)
166 goto err; 166 goto err;
167 tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL); 167 tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL);
168 if (!tg->rt_se) 168 if (!tg->rt_se)
169 goto err; 169 goto err;
170 170
171 init_rt_bandwidth(&tg->rt_bandwidth, 171 init_rt_bandwidth(&tg->rt_bandwidth,
172 ktime_to_ns(def_rt_bandwidth.rt_period), 0); 172 ktime_to_ns(def_rt_bandwidth.rt_period), 0);
173 173
174 for_each_possible_cpu(i) { 174 for_each_possible_cpu(i) {
175 rt_rq = kzalloc_node(sizeof(struct rt_rq), 175 rt_rq = kzalloc_node(sizeof(struct rt_rq),
176 GFP_KERNEL, cpu_to_node(i)); 176 GFP_KERNEL, cpu_to_node(i));
177 if (!rt_rq) 177 if (!rt_rq)
178 goto err; 178 goto err;
179 179
180 rt_se = kzalloc_node(sizeof(struct sched_rt_entity), 180 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
181 GFP_KERNEL, cpu_to_node(i)); 181 GFP_KERNEL, cpu_to_node(i));
182 if (!rt_se) 182 if (!rt_se)
183 goto err_free_rq; 183 goto err_free_rq;
184 184
185 init_rt_rq(rt_rq, cpu_rq(i)); 185 init_rt_rq(rt_rq, cpu_rq(i));
186 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; 186 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
187 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); 187 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
188 } 188 }
189 189
190 return 1; 190 return 1;
191 191
192 err_free_rq: 192 err_free_rq:
193 kfree(rt_rq); 193 kfree(rt_rq);
194 err: 194 err:
195 return 0; 195 return 0;
196 } 196 }
197 197
198 #else /* CONFIG_RT_GROUP_SCHED */ 198 #else /* CONFIG_RT_GROUP_SCHED */
199 199
200 #define rt_entity_is_task(rt_se) (1) 200 #define rt_entity_is_task(rt_se) (1)
201 201
202 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) 202 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
203 { 203 {
204 return container_of(rt_se, struct task_struct, rt); 204 return container_of(rt_se, struct task_struct, rt);
205 } 205 }
206 206
207 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 207 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
208 { 208 {
209 return container_of(rt_rq, struct rq, rt); 209 return container_of(rt_rq, struct rq, rt);
210 } 210 }
211 211
212 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 212 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
213 { 213 {
214 struct task_struct *p = rt_task_of(rt_se); 214 struct task_struct *p = rt_task_of(rt_se);
215 struct rq *rq = task_rq(p); 215 struct rq *rq = task_rq(p);
216 216
217 return &rq->rt; 217 return &rq->rt;
218 } 218 }
219 219
220 void free_rt_sched_group(struct task_group *tg) { } 220 void free_rt_sched_group(struct task_group *tg) { }
221 221
222 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) 222 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
223 { 223 {
224 return 1; 224 return 1;
225 } 225 }
226 #endif /* CONFIG_RT_GROUP_SCHED */ 226 #endif /* CONFIG_RT_GROUP_SCHED */
227 227
228 #ifdef CONFIG_SMP 228 #ifdef CONFIG_SMP
229 229
230 static inline int rt_overloaded(struct rq *rq) 230 static inline int rt_overloaded(struct rq *rq)
231 { 231 {
232 return atomic_read(&rq->rd->rto_count); 232 return atomic_read(&rq->rd->rto_count);
233 } 233 }
234 234
235 static inline void rt_set_overload(struct rq *rq) 235 static inline void rt_set_overload(struct rq *rq)
236 { 236 {
237 if (!rq->online) 237 if (!rq->online)
238 return; 238 return;
239 239
240 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); 240 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
241 /* 241 /*
242 * Make sure the mask is visible before we set 242 * Make sure the mask is visible before we set
243 * the overload count. That is checked to determine 243 * the overload count. That is checked to determine
244 * if we should look at the mask. It would be a shame 244 * if we should look at the mask. It would be a shame
245 * if we looked at the mask, but the mask was not 245 * if we looked at the mask, but the mask was not
246 * updated yet. 246 * updated yet.
247 */ 247 */
248 wmb(); 248 wmb();
249 atomic_inc(&rq->rd->rto_count); 249 atomic_inc(&rq->rd->rto_count);
250 } 250 }
251 251
252 static inline void rt_clear_overload(struct rq *rq) 252 static inline void rt_clear_overload(struct rq *rq)
253 { 253 {
254 if (!rq->online) 254 if (!rq->online)
255 return; 255 return;
256 256
257 /* the order here really doesn't matter */ 257 /* the order here really doesn't matter */
258 atomic_dec(&rq->rd->rto_count); 258 atomic_dec(&rq->rd->rto_count);
259 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); 259 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
260 } 260 }
261 261
262 static void update_rt_migration(struct rt_rq *rt_rq) 262 static void update_rt_migration(struct rt_rq *rt_rq)
263 { 263 {
264 if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) { 264 if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
265 if (!rt_rq->overloaded) { 265 if (!rt_rq->overloaded) {
266 rt_set_overload(rq_of_rt_rq(rt_rq)); 266 rt_set_overload(rq_of_rt_rq(rt_rq));
267 rt_rq->overloaded = 1; 267 rt_rq->overloaded = 1;
268 } 268 }
269 } else if (rt_rq->overloaded) { 269 } else if (rt_rq->overloaded) {
270 rt_clear_overload(rq_of_rt_rq(rt_rq)); 270 rt_clear_overload(rq_of_rt_rq(rt_rq));
271 rt_rq->overloaded = 0; 271 rt_rq->overloaded = 0;
272 } 272 }
273 } 273 }
274 274
275 static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 275 static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
276 { 276 {
277 struct task_struct *p; 277 struct task_struct *p;
278 278
279 if (!rt_entity_is_task(rt_se)) 279 if (!rt_entity_is_task(rt_se))
280 return; 280 return;
281 281
282 p = rt_task_of(rt_se); 282 p = rt_task_of(rt_se);
283 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 283 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
284 284
285 rt_rq->rt_nr_total++; 285 rt_rq->rt_nr_total++;
286 if (p->nr_cpus_allowed > 1) 286 if (p->nr_cpus_allowed > 1)
287 rt_rq->rt_nr_migratory++; 287 rt_rq->rt_nr_migratory++;
288 288
289 update_rt_migration(rt_rq); 289 update_rt_migration(rt_rq);
290 } 290 }
291 291
292 static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 292 static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
293 { 293 {
294 struct task_struct *p; 294 struct task_struct *p;
295 295
296 if (!rt_entity_is_task(rt_se)) 296 if (!rt_entity_is_task(rt_se))
297 return; 297 return;
298 298
299 p = rt_task_of(rt_se); 299 p = rt_task_of(rt_se);
300 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 300 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
301 301
302 rt_rq->rt_nr_total--; 302 rt_rq->rt_nr_total--;
303 if (p->nr_cpus_allowed > 1) 303 if (p->nr_cpus_allowed > 1)
304 rt_rq->rt_nr_migratory--; 304 rt_rq->rt_nr_migratory--;
305 305
306 update_rt_migration(rt_rq); 306 update_rt_migration(rt_rq);
307 } 307 }
308 308
309 static inline int has_pushable_tasks(struct rq *rq) 309 static inline int has_pushable_tasks(struct rq *rq)
310 { 310 {
311 return !plist_head_empty(&rq->rt.pushable_tasks); 311 return !plist_head_empty(&rq->rt.pushable_tasks);
312 } 312 }
313 313
314 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 314 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
315 { 315 {
316 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 316 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
317 plist_node_init(&p->pushable_tasks, p->prio); 317 plist_node_init(&p->pushable_tasks, p->prio);
318 plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks); 318 plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
319 319
320 /* Update the highest prio pushable task */ 320 /* Update the highest prio pushable task */
321 if (p->prio < rq->rt.highest_prio.next) 321 if (p->prio < rq->rt.highest_prio.next)
322 rq->rt.highest_prio.next = p->prio; 322 rq->rt.highest_prio.next = p->prio;
323 } 323 }
324 324
325 static void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 325 static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
326 { 326 {
327 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 327 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
328 328
329 /* Update the new highest prio pushable task */ 329 /* Update the new highest prio pushable task */
330 if (has_pushable_tasks(rq)) { 330 if (has_pushable_tasks(rq)) {
331 p = plist_first_entry(&rq->rt.pushable_tasks, 331 p = plist_first_entry(&rq->rt.pushable_tasks,
332 struct task_struct, pushable_tasks); 332 struct task_struct, pushable_tasks);
333 rq->rt.highest_prio.next = p->prio; 333 rq->rt.highest_prio.next = p->prio;
334 } else 334 } else
335 rq->rt.highest_prio.next = MAX_RT_PRIO; 335 rq->rt.highest_prio.next = MAX_RT_PRIO;
336 } 336 }
337 337
338 #else 338 #else
339 339
340 static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 340 static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
341 { 341 {
342 } 342 }
343 343
344 static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 344 static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
345 { 345 {
346 } 346 }
347 347
348 static inline 348 static inline
349 void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 349 void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
350 { 350 {
351 } 351 }
352 352
353 static inline 353 static inline
354 void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 354 void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
355 { 355 {
356 } 356 }
357 357
358 #endif /* CONFIG_SMP */ 358 #endif /* CONFIG_SMP */
359 359
360 static inline int on_rt_rq(struct sched_rt_entity *rt_se) 360 static inline int on_rt_rq(struct sched_rt_entity *rt_se)
361 { 361 {
362 return !list_empty(&rt_se->run_list); 362 return !list_empty(&rt_se->run_list);
363 } 363 }
364 364
365 #ifdef CONFIG_RT_GROUP_SCHED 365 #ifdef CONFIG_RT_GROUP_SCHED
366 366
367 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 367 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
368 { 368 {
369 if (!rt_rq->tg) 369 if (!rt_rq->tg)
370 return RUNTIME_INF; 370 return RUNTIME_INF;
371 371
372 return rt_rq->rt_runtime; 372 return rt_rq->rt_runtime;
373 } 373 }
374 374
375 static inline u64 sched_rt_period(struct rt_rq *rt_rq) 375 static inline u64 sched_rt_period(struct rt_rq *rt_rq)
376 { 376 {
377 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); 377 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
378 } 378 }
379 379
380 typedef struct task_group *rt_rq_iter_t; 380 typedef struct task_group *rt_rq_iter_t;
381 381
382 static inline struct task_group *next_task_group(struct task_group *tg) 382 static inline struct task_group *next_task_group(struct task_group *tg)
383 { 383 {
384 do { 384 do {
385 tg = list_entry_rcu(tg->list.next, 385 tg = list_entry_rcu(tg->list.next,
386 typeof(struct task_group), list); 386 typeof(struct task_group), list);
387 } while (&tg->list != &task_groups && task_group_is_autogroup(tg)); 387 } while (&tg->list != &task_groups && task_group_is_autogroup(tg));
388 388
389 if (&tg->list == &task_groups) 389 if (&tg->list == &task_groups)
390 tg = NULL; 390 tg = NULL;
391 391
392 return tg; 392 return tg;
393 } 393 }
394 394
395 #define for_each_rt_rq(rt_rq, iter, rq) \ 395 #define for_each_rt_rq(rt_rq, iter, rq) \
396 for (iter = container_of(&task_groups, typeof(*iter), list); \ 396 for (iter = container_of(&task_groups, typeof(*iter), list); \
397 (iter = next_task_group(iter)) && \ 397 (iter = next_task_group(iter)) && \
398 (rt_rq = iter->rt_rq[cpu_of(rq)]);) 398 (rt_rq = iter->rt_rq[cpu_of(rq)]);)
399 399
400 static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) 400 static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
401 { 401 {
402 list_add_rcu(&rt_rq->leaf_rt_rq_list, 402 list_add_rcu(&rt_rq->leaf_rt_rq_list,
403 &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list); 403 &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list);
404 } 404 }
405 405
406 static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) 406 static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
407 { 407 {
408 list_del_rcu(&rt_rq->leaf_rt_rq_list); 408 list_del_rcu(&rt_rq->leaf_rt_rq_list);
409 } 409 }
410 410
411 #define for_each_leaf_rt_rq(rt_rq, rq) \ 411 #define for_each_leaf_rt_rq(rt_rq, rq) \
412 list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) 412 list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
413 413
414 #define for_each_sched_rt_entity(rt_se) \ 414 #define for_each_sched_rt_entity(rt_se) \
415 for (; rt_se; rt_se = rt_se->parent) 415 for (; rt_se; rt_se = rt_se->parent)
416 416
417 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 417 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
418 { 418 {
419 return rt_se->my_q; 419 return rt_se->my_q;
420 } 420 }
421 421
422 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head); 422 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
423 static void dequeue_rt_entity(struct sched_rt_entity *rt_se); 423 static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
424 424
425 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 425 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
426 { 426 {
427 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; 427 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
428 struct sched_rt_entity *rt_se; 428 struct sched_rt_entity *rt_se;
429 429
430 int cpu = cpu_of(rq_of_rt_rq(rt_rq)); 430 int cpu = cpu_of(rq_of_rt_rq(rt_rq));
431 431
432 rt_se = rt_rq->tg->rt_se[cpu]; 432 rt_se = rt_rq->tg->rt_se[cpu];
433 433
434 if (rt_rq->rt_nr_running) { 434 if (rt_rq->rt_nr_running) {
435 if (rt_se && !on_rt_rq(rt_se)) 435 if (rt_se && !on_rt_rq(rt_se))
436 enqueue_rt_entity(rt_se, false); 436 enqueue_rt_entity(rt_se, false);
437 if (rt_rq->highest_prio.curr < curr->prio) 437 if (rt_rq->highest_prio.curr < curr->prio)
438 resched_task(curr); 438 resched_task(curr);
439 } 439 }
440 } 440 }
441 441
442 static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 442 static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
443 { 443 {
444 struct sched_rt_entity *rt_se; 444 struct sched_rt_entity *rt_se;
445 int cpu = cpu_of(rq_of_rt_rq(rt_rq)); 445 int cpu = cpu_of(rq_of_rt_rq(rt_rq));
446 446
447 rt_se = rt_rq->tg->rt_se[cpu]; 447 rt_se = rt_rq->tg->rt_se[cpu];
448 448
449 if (rt_se && on_rt_rq(rt_se)) 449 if (rt_se && on_rt_rq(rt_se))
450 dequeue_rt_entity(rt_se); 450 dequeue_rt_entity(rt_se);
451 } 451 }
452 452
453 static inline int rt_rq_throttled(struct rt_rq *rt_rq) 453 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
454 { 454 {
455 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; 455 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
456 } 456 }
457 457
458 static int rt_se_boosted(struct sched_rt_entity *rt_se) 458 static int rt_se_boosted(struct sched_rt_entity *rt_se)
459 { 459 {
460 struct rt_rq *rt_rq = group_rt_rq(rt_se); 460 struct rt_rq *rt_rq = group_rt_rq(rt_se);
461 struct task_struct *p; 461 struct task_struct *p;
462 462
463 if (rt_rq) 463 if (rt_rq)
464 return !!rt_rq->rt_nr_boosted; 464 return !!rt_rq->rt_nr_boosted;
465 465
466 p = rt_task_of(rt_se); 466 p = rt_task_of(rt_se);
467 return p->prio != p->normal_prio; 467 return p->prio != p->normal_prio;
468 } 468 }
469 469
470 #ifdef CONFIG_SMP 470 #ifdef CONFIG_SMP
471 static inline const struct cpumask *sched_rt_period_mask(void) 471 static inline const struct cpumask *sched_rt_period_mask(void)
472 { 472 {
473 return cpu_rq(smp_processor_id())->rd->span; 473 return cpu_rq(smp_processor_id())->rd->span;
474 } 474 }
475 #else 475 #else
476 static inline const struct cpumask *sched_rt_period_mask(void) 476 static inline const struct cpumask *sched_rt_period_mask(void)
477 { 477 {
478 return cpu_online_mask; 478 return cpu_online_mask;
479 } 479 }
480 #endif 480 #endif
481 481
482 static inline 482 static inline
483 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 483 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
484 { 484 {
485 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; 485 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
486 } 486 }
487 487
488 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 488 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
489 { 489 {
490 return &rt_rq->tg->rt_bandwidth; 490 return &rt_rq->tg->rt_bandwidth;
491 } 491 }
492 492
493 #else /* !CONFIG_RT_GROUP_SCHED */ 493 #else /* !CONFIG_RT_GROUP_SCHED */
494 494
495 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 495 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
496 { 496 {
497 return rt_rq->rt_runtime; 497 return rt_rq->rt_runtime;
498 } 498 }
499 499
500 static inline u64 sched_rt_period(struct rt_rq *rt_rq) 500 static inline u64 sched_rt_period(struct rt_rq *rt_rq)
501 { 501 {
502 return ktime_to_ns(def_rt_bandwidth.rt_period); 502 return ktime_to_ns(def_rt_bandwidth.rt_period);
503 } 503 }
504 504
505 typedef struct rt_rq *rt_rq_iter_t; 505 typedef struct rt_rq *rt_rq_iter_t;
506 506
507 #define for_each_rt_rq(rt_rq, iter, rq) \ 507 #define for_each_rt_rq(rt_rq, iter, rq) \
508 for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) 508 for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
509 509
510 static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) 510 static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
511 { 511 {
512 } 512 }
513 513
514 static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) 514 static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
515 { 515 {
516 } 516 }
517 517
518 #define for_each_leaf_rt_rq(rt_rq, rq) \ 518 #define for_each_leaf_rt_rq(rt_rq, rq) \
519 for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) 519 for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
520 520
521 #define for_each_sched_rt_entity(rt_se) \ 521 #define for_each_sched_rt_entity(rt_se) \
522 for (; rt_se; rt_se = NULL) 522 for (; rt_se; rt_se = NULL)
523 523
524 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 524 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
525 { 525 {
526 return NULL; 526 return NULL;
527 } 527 }
528 528
529 static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 529 static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
530 { 530 {
531 if (rt_rq->rt_nr_running) 531 if (rt_rq->rt_nr_running)
532 resched_task(rq_of_rt_rq(rt_rq)->curr); 532 resched_task(rq_of_rt_rq(rt_rq)->curr);
533 } 533 }
534 534
535 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 535 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
536 { 536 {
537 } 537 }
538 538
539 static inline int rt_rq_throttled(struct rt_rq *rt_rq) 539 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
540 { 540 {
541 return rt_rq->rt_throttled; 541 return rt_rq->rt_throttled;
542 } 542 }
543 543
544 static inline const struct cpumask *sched_rt_period_mask(void) 544 static inline const struct cpumask *sched_rt_period_mask(void)
545 { 545 {
546 return cpu_online_mask; 546 return cpu_online_mask;
547 } 547 }
548 548
549 static inline 549 static inline
550 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 550 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
551 { 551 {
552 return &cpu_rq(cpu)->rt; 552 return &cpu_rq(cpu)->rt;
553 } 553 }
554 554
555 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 555 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
556 { 556 {
557 return &def_rt_bandwidth; 557 return &def_rt_bandwidth;
558 } 558 }
559 559
560 #endif /* CONFIG_RT_GROUP_SCHED */ 560 #endif /* CONFIG_RT_GROUP_SCHED */
561 561
562 #ifdef CONFIG_SMP 562 #ifdef CONFIG_SMP
563 /* 563 /*
564 * We ran out of runtime, see if we can borrow some from our neighbours. 564 * We ran out of runtime, see if we can borrow some from our neighbours.
565 */ 565 */
566 static int do_balance_runtime(struct rt_rq *rt_rq) 566 static int do_balance_runtime(struct rt_rq *rt_rq)
567 { 567 {
568 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 568 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
569 struct root_domain *rd = cpu_rq(smp_processor_id())->rd; 569 struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
570 int i, weight, more = 0; 570 int i, weight, more = 0;
571 u64 rt_period; 571 u64 rt_period;
572 572
573 weight = cpumask_weight(rd->span); 573 weight = cpumask_weight(rd->span);
574 574
575 raw_spin_lock(&rt_b->rt_runtime_lock); 575 raw_spin_lock(&rt_b->rt_runtime_lock);
576 rt_period = ktime_to_ns(rt_b->rt_period); 576 rt_period = ktime_to_ns(rt_b->rt_period);
577 for_each_cpu(i, rd->span) { 577 for_each_cpu(i, rd->span) {
578 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 578 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
579 s64 diff; 579 s64 diff;
580 580
581 if (iter == rt_rq) 581 if (iter == rt_rq)
582 continue; 582 continue;
583 583
584 raw_spin_lock(&iter->rt_runtime_lock); 584 raw_spin_lock(&iter->rt_runtime_lock);
585 /* 585 /*
586 * Either all rqs have inf runtime and there's nothing to steal 586 * Either all rqs have inf runtime and there's nothing to steal
587 * or __disable_runtime() below sets a specific rq to inf to 587 * or __disable_runtime() below sets a specific rq to inf to
588 * indicate its been disabled and disalow stealing. 588 * indicate its been disabled and disalow stealing.
589 */ 589 */
590 if (iter->rt_runtime == RUNTIME_INF) 590 if (iter->rt_runtime == RUNTIME_INF)
591 goto next; 591 goto next;
592 592
593 /* 593 /*
594 * From runqueues with spare time, take 1/n part of their 594 * From runqueues with spare time, take 1/n part of their
595 * spare time, but no more than our period. 595 * spare time, but no more than our period.
596 */ 596 */
597 diff = iter->rt_runtime - iter->rt_time; 597 diff = iter->rt_runtime - iter->rt_time;
598 if (diff > 0) { 598 if (diff > 0) {
599 diff = div_u64((u64)diff, weight); 599 diff = div_u64((u64)diff, weight);
600 if (rt_rq->rt_runtime + diff > rt_period) 600 if (rt_rq->rt_runtime + diff > rt_period)
601 diff = rt_period - rt_rq->rt_runtime; 601 diff = rt_period - rt_rq->rt_runtime;
602 iter->rt_runtime -= diff; 602 iter->rt_runtime -= diff;
603 rt_rq->rt_runtime += diff; 603 rt_rq->rt_runtime += diff;
604 more = 1; 604 more = 1;
605 if (rt_rq->rt_runtime == rt_period) { 605 if (rt_rq->rt_runtime == rt_period) {
606 raw_spin_unlock(&iter->rt_runtime_lock); 606 raw_spin_unlock(&iter->rt_runtime_lock);
607 break; 607 break;
608 } 608 }
609 } 609 }
610 next: 610 next:
611 raw_spin_unlock(&iter->rt_runtime_lock); 611 raw_spin_unlock(&iter->rt_runtime_lock);
612 } 612 }
613 raw_spin_unlock(&rt_b->rt_runtime_lock); 613 raw_spin_unlock(&rt_b->rt_runtime_lock);
614 614
615 return more; 615 return more;
616 } 616 }
617 617
618 /* 618 /*
619 * Ensure this RQ takes back all the runtime it lend to its neighbours. 619 * Ensure this RQ takes back all the runtime it lend to its neighbours.
620 */ 620 */
621 static void __disable_runtime(struct rq *rq) 621 static void __disable_runtime(struct rq *rq)
622 { 622 {
623 struct root_domain *rd = rq->rd; 623 struct root_domain *rd = rq->rd;
624 rt_rq_iter_t iter; 624 rt_rq_iter_t iter;
625 struct rt_rq *rt_rq; 625 struct rt_rq *rt_rq;
626 626
627 if (unlikely(!scheduler_running)) 627 if (unlikely(!scheduler_running))
628 return; 628 return;
629 629
630 for_each_rt_rq(rt_rq, iter, rq) { 630 for_each_rt_rq(rt_rq, iter, rq) {
631 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 631 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
632 s64 want; 632 s64 want;
633 int i; 633 int i;
634 634
635 raw_spin_lock(&rt_b->rt_runtime_lock); 635 raw_spin_lock(&rt_b->rt_runtime_lock);
636 raw_spin_lock(&rt_rq->rt_runtime_lock); 636 raw_spin_lock(&rt_rq->rt_runtime_lock);
637 /* 637 /*
638 * Either we're all inf and nobody needs to borrow, or we're 638 * Either we're all inf and nobody needs to borrow, or we're
639 * already disabled and thus have nothing to do, or we have 639 * already disabled and thus have nothing to do, or we have
640 * exactly the right amount of runtime to take out. 640 * exactly the right amount of runtime to take out.
641 */ 641 */
642 if (rt_rq->rt_runtime == RUNTIME_INF || 642 if (rt_rq->rt_runtime == RUNTIME_INF ||
643 rt_rq->rt_runtime == rt_b->rt_runtime) 643 rt_rq->rt_runtime == rt_b->rt_runtime)
644 goto balanced; 644 goto balanced;
645 raw_spin_unlock(&rt_rq->rt_runtime_lock); 645 raw_spin_unlock(&rt_rq->rt_runtime_lock);
646 646
647 /* 647 /*
648 * Calculate the difference between what we started out with 648 * Calculate the difference between what we started out with
649 * and what we current have, that's the amount of runtime 649 * and what we current have, that's the amount of runtime
650 * we lend and now have to reclaim. 650 * we lend and now have to reclaim.
651 */ 651 */
652 want = rt_b->rt_runtime - rt_rq->rt_runtime; 652 want = rt_b->rt_runtime - rt_rq->rt_runtime;
653 653
654 /* 654 /*
655 * Greedy reclaim, take back as much as we can. 655 * Greedy reclaim, take back as much as we can.
656 */ 656 */
657 for_each_cpu(i, rd->span) { 657 for_each_cpu(i, rd->span) {
658 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 658 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
659 s64 diff; 659 s64 diff;
660 660
661 /* 661 /*
662 * Can't reclaim from ourselves or disabled runqueues. 662 * Can't reclaim from ourselves or disabled runqueues.
663 */ 663 */
664 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) 664 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
665 continue; 665 continue;
666 666
667 raw_spin_lock(&iter->rt_runtime_lock); 667 raw_spin_lock(&iter->rt_runtime_lock);
668 if (want > 0) { 668 if (want > 0) {
669 diff = min_t(s64, iter->rt_runtime, want); 669 diff = min_t(s64, iter->rt_runtime, want);
670 iter->rt_runtime -= diff; 670 iter->rt_runtime -= diff;
671 want -= diff; 671 want -= diff;
672 } else { 672 } else {
673 iter->rt_runtime -= want; 673 iter->rt_runtime -= want;
674 want -= want; 674 want -= want;
675 } 675 }
676 raw_spin_unlock(&iter->rt_runtime_lock); 676 raw_spin_unlock(&iter->rt_runtime_lock);
677 677
678 if (!want) 678 if (!want)
679 break; 679 break;
680 } 680 }
681 681
682 raw_spin_lock(&rt_rq->rt_runtime_lock); 682 raw_spin_lock(&rt_rq->rt_runtime_lock);
683 /* 683 /*
684 * We cannot be left wanting - that would mean some runtime 684 * We cannot be left wanting - that would mean some runtime
685 * leaked out of the system. 685 * leaked out of the system.
686 */ 686 */
687 BUG_ON(want); 687 BUG_ON(want);
688 balanced: 688 balanced:
689 /* 689 /*
690 * Disable all the borrow logic by pretending we have inf 690 * Disable all the borrow logic by pretending we have inf
691 * runtime - in which case borrowing doesn't make sense. 691 * runtime - in which case borrowing doesn't make sense.
692 */ 692 */
693 rt_rq->rt_runtime = RUNTIME_INF; 693 rt_rq->rt_runtime = RUNTIME_INF;
694 rt_rq->rt_throttled = 0; 694 rt_rq->rt_throttled = 0;
695 raw_spin_unlock(&rt_rq->rt_runtime_lock); 695 raw_spin_unlock(&rt_rq->rt_runtime_lock);
696 raw_spin_unlock(&rt_b->rt_runtime_lock); 696 raw_spin_unlock(&rt_b->rt_runtime_lock);
697 } 697 }
698 } 698 }
699 699
700 static void disable_runtime(struct rq *rq) 700 static void disable_runtime(struct rq *rq)
701 { 701 {
702 unsigned long flags; 702 unsigned long flags;
703 703
704 raw_spin_lock_irqsave(&rq->lock, flags); 704 raw_spin_lock_irqsave(&rq->lock, flags);
705 __disable_runtime(rq); 705 __disable_runtime(rq);
706 raw_spin_unlock_irqrestore(&rq->lock, flags); 706 raw_spin_unlock_irqrestore(&rq->lock, flags);
707 } 707 }
708 708
709 static void __enable_runtime(struct rq *rq) 709 static void __enable_runtime(struct rq *rq)
710 { 710 {
711 rt_rq_iter_t iter; 711 rt_rq_iter_t iter;
712 struct rt_rq *rt_rq; 712 struct rt_rq *rt_rq;
713 713
714 if (unlikely(!scheduler_running)) 714 if (unlikely(!scheduler_running))
715 return; 715 return;
716 716
717 /* 717 /*
718 * Reset each runqueue's bandwidth settings 718 * Reset each runqueue's bandwidth settings
719 */ 719 */
720 for_each_rt_rq(rt_rq, iter, rq) { 720 for_each_rt_rq(rt_rq, iter, rq) {
721 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 721 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
722 722
723 raw_spin_lock(&rt_b->rt_runtime_lock); 723 raw_spin_lock(&rt_b->rt_runtime_lock);
724 raw_spin_lock(&rt_rq->rt_runtime_lock); 724 raw_spin_lock(&rt_rq->rt_runtime_lock);
725 rt_rq->rt_runtime = rt_b->rt_runtime; 725 rt_rq->rt_runtime = rt_b->rt_runtime;
726 rt_rq->rt_time = 0; 726 rt_rq->rt_time = 0;
727 rt_rq->rt_throttled = 0; 727 rt_rq->rt_throttled = 0;
728 raw_spin_unlock(&rt_rq->rt_runtime_lock); 728 raw_spin_unlock(&rt_rq->rt_runtime_lock);
729 raw_spin_unlock(&rt_b->rt_runtime_lock); 729 raw_spin_unlock(&rt_b->rt_runtime_lock);
730 } 730 }
731 } 731 }
732 732
733 static void enable_runtime(struct rq *rq) 733 static void enable_runtime(struct rq *rq)
734 { 734 {
735 unsigned long flags; 735 unsigned long flags;
736 736
737 raw_spin_lock_irqsave(&rq->lock, flags); 737 raw_spin_lock_irqsave(&rq->lock, flags);
738 __enable_runtime(rq); 738 __enable_runtime(rq);
739 raw_spin_unlock_irqrestore(&rq->lock, flags); 739 raw_spin_unlock_irqrestore(&rq->lock, flags);
740 } 740 }
741 741
742 int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu) 742 int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu)
743 { 743 {
744 int cpu = (int)(long)hcpu; 744 int cpu = (int)(long)hcpu;
745 745
746 switch (action) { 746 switch (action) {
747 case CPU_DOWN_PREPARE: 747 case CPU_DOWN_PREPARE:
748 case CPU_DOWN_PREPARE_FROZEN: 748 case CPU_DOWN_PREPARE_FROZEN:
749 disable_runtime(cpu_rq(cpu)); 749 disable_runtime(cpu_rq(cpu));
750 return NOTIFY_OK; 750 return NOTIFY_OK;
751 751
752 case CPU_DOWN_FAILED: 752 case CPU_DOWN_FAILED:
753 case CPU_DOWN_FAILED_FROZEN: 753 case CPU_DOWN_FAILED_FROZEN:
754 case CPU_ONLINE: 754 case CPU_ONLINE:
755 case CPU_ONLINE_FROZEN: 755 case CPU_ONLINE_FROZEN:
756 enable_runtime(cpu_rq(cpu)); 756 enable_runtime(cpu_rq(cpu));
757 return NOTIFY_OK; 757 return NOTIFY_OK;
758 758
759 default: 759 default:
760 return NOTIFY_DONE; 760 return NOTIFY_DONE;
761 } 761 }
762 } 762 }
763 763
764 static int balance_runtime(struct rt_rq *rt_rq) 764 static int balance_runtime(struct rt_rq *rt_rq)
765 { 765 {
766 int more = 0; 766 int more = 0;
767 767
768 if (!sched_feat(RT_RUNTIME_SHARE)) 768 if (!sched_feat(RT_RUNTIME_SHARE))
769 return more; 769 return more;
770 770
771 if (rt_rq->rt_time > rt_rq->rt_runtime) { 771 if (rt_rq->rt_time > rt_rq->rt_runtime) {
772 raw_spin_unlock(&rt_rq->rt_runtime_lock); 772 raw_spin_unlock(&rt_rq->rt_runtime_lock);
773 more = do_balance_runtime(rt_rq); 773 more = do_balance_runtime(rt_rq);
774 raw_spin_lock(&rt_rq->rt_runtime_lock); 774 raw_spin_lock(&rt_rq->rt_runtime_lock);
775 } 775 }
776 776
777 return more; 777 return more;
778 } 778 }
779 #else /* !CONFIG_SMP */ 779 #else /* !CONFIG_SMP */
780 static inline int balance_runtime(struct rt_rq *rt_rq) 780 static inline int balance_runtime(struct rt_rq *rt_rq)
781 { 781 {
782 return 0; 782 return 0;
783 } 783 }
784 #endif /* CONFIG_SMP */ 784 #endif /* CONFIG_SMP */
785 785
786 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) 786 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
787 { 787 {
788 int i, idle = 1, throttled = 0; 788 int i, idle = 1, throttled = 0;
789 const struct cpumask *span; 789 const struct cpumask *span;
790 790
791 span = sched_rt_period_mask(); 791 span = sched_rt_period_mask();
792 #ifdef CONFIG_RT_GROUP_SCHED 792 #ifdef CONFIG_RT_GROUP_SCHED
793 /* 793 /*
794 * FIXME: isolated CPUs should really leave the root task group, 794 * FIXME: isolated CPUs should really leave the root task group,
795 * whether they are isolcpus or were isolated via cpusets, lest 795 * whether they are isolcpus or were isolated via cpusets, lest
796 * the timer run on a CPU which does not service all runqueues, 796 * the timer run on a CPU which does not service all runqueues,
797 * potentially leaving other CPUs indefinitely throttled. If 797 * potentially leaving other CPUs indefinitely throttled. If
798 * isolation is really required, the user will turn the throttle 798 * isolation is really required, the user will turn the throttle
799 * off to kill the perturbations it causes anyway. Meanwhile, 799 * off to kill the perturbations it causes anyway. Meanwhile,
800 * this maintains functionality for boot and/or troubleshooting. 800 * this maintains functionality for boot and/or troubleshooting.
801 */ 801 */
802 if (rt_b == &root_task_group.rt_bandwidth) 802 if (rt_b == &root_task_group.rt_bandwidth)
803 span = cpu_online_mask; 803 span = cpu_online_mask;
804 #endif 804 #endif
805 for_each_cpu(i, span) { 805 for_each_cpu(i, span) {
806 int enqueue = 0; 806 int enqueue = 0;
807 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); 807 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
808 struct rq *rq = rq_of_rt_rq(rt_rq); 808 struct rq *rq = rq_of_rt_rq(rt_rq);
809 809
810 raw_spin_lock(&rq->lock); 810 raw_spin_lock(&rq->lock);
811 if (rt_rq->rt_time) { 811 if (rt_rq->rt_time) {
812 u64 runtime; 812 u64 runtime;
813 813
814 raw_spin_lock(&rt_rq->rt_runtime_lock); 814 raw_spin_lock(&rt_rq->rt_runtime_lock);
815 if (rt_rq->rt_throttled) 815 if (rt_rq->rt_throttled)
816 balance_runtime(rt_rq); 816 balance_runtime(rt_rq);
817 runtime = rt_rq->rt_runtime; 817 runtime = rt_rq->rt_runtime;
818 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); 818 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
819 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { 819 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
820 rt_rq->rt_throttled = 0; 820 rt_rq->rt_throttled = 0;
821 enqueue = 1; 821 enqueue = 1;
822 822
823 /* 823 /*
824 * Force a clock update if the CPU was idle, 824 * Force a clock update if the CPU was idle,
825 * lest wakeup -> unthrottle time accumulate. 825 * lest wakeup -> unthrottle time accumulate.
826 */ 826 */
827 if (rt_rq->rt_nr_running && rq->curr == rq->idle) 827 if (rt_rq->rt_nr_running && rq->curr == rq->idle)
828 rq->skip_clock_update = -1; 828 rq->skip_clock_update = -1;
829 } 829 }
830 if (rt_rq->rt_time || rt_rq->rt_nr_running) 830 if (rt_rq->rt_time || rt_rq->rt_nr_running)
831 idle = 0; 831 idle = 0;
832 raw_spin_unlock(&rt_rq->rt_runtime_lock); 832 raw_spin_unlock(&rt_rq->rt_runtime_lock);
833 } else if (rt_rq->rt_nr_running) { 833 } else if (rt_rq->rt_nr_running) {
834 idle = 0; 834 idle = 0;
835 if (!rt_rq_throttled(rt_rq)) 835 if (!rt_rq_throttled(rt_rq))
836 enqueue = 1; 836 enqueue = 1;
837 } 837 }
838 if (rt_rq->rt_throttled) 838 if (rt_rq->rt_throttled)
839 throttled = 1; 839 throttled = 1;
840 840
841 if (enqueue) 841 if (enqueue)
842 sched_rt_rq_enqueue(rt_rq); 842 sched_rt_rq_enqueue(rt_rq);
843 raw_spin_unlock(&rq->lock); 843 raw_spin_unlock(&rq->lock);
844 } 844 }
845 845
846 if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)) 846 if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
847 return 1; 847 return 1;
848 848
849 return idle; 849 return idle;
850 } 850 }
851 851
852 static inline int rt_se_prio(struct sched_rt_entity *rt_se) 852 static inline int rt_se_prio(struct sched_rt_entity *rt_se)
853 { 853 {
854 #ifdef CONFIG_RT_GROUP_SCHED 854 #ifdef CONFIG_RT_GROUP_SCHED
855 struct rt_rq *rt_rq = group_rt_rq(rt_se); 855 struct rt_rq *rt_rq = group_rt_rq(rt_se);
856 856
857 if (rt_rq) 857 if (rt_rq)
858 return rt_rq->highest_prio.curr; 858 return rt_rq->highest_prio.curr;
859 #endif 859 #endif
860 860
861 return rt_task_of(rt_se)->prio; 861 return rt_task_of(rt_se)->prio;
862 } 862 }
863 863
864 static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) 864 static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
865 { 865 {
866 u64 runtime = sched_rt_runtime(rt_rq); 866 u64 runtime = sched_rt_runtime(rt_rq);
867 867
868 if (rt_rq->rt_throttled) 868 if (rt_rq->rt_throttled)
869 return rt_rq_throttled(rt_rq); 869 return rt_rq_throttled(rt_rq);
870 870
871 if (runtime >= sched_rt_period(rt_rq)) 871 if (runtime >= sched_rt_period(rt_rq))
872 return 0; 872 return 0;
873 873
874 balance_runtime(rt_rq); 874 balance_runtime(rt_rq);
875 runtime = sched_rt_runtime(rt_rq); 875 runtime = sched_rt_runtime(rt_rq);
876 if (runtime == RUNTIME_INF) 876 if (runtime == RUNTIME_INF)
877 return 0; 877 return 0;
878 878
879 if (rt_rq->rt_time > runtime) { 879 if (rt_rq->rt_time > runtime) {
880 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 880 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
881 881
882 /* 882 /*
883 * Don't actually throttle groups that have no runtime assigned 883 * Don't actually throttle groups that have no runtime assigned
884 * but accrue some time due to boosting. 884 * but accrue some time due to boosting.
885 */ 885 */
886 if (likely(rt_b->rt_runtime)) { 886 if (likely(rt_b->rt_runtime)) {
887 static bool once = false; 887 static bool once = false;
888 888
889 rt_rq->rt_throttled = 1; 889 rt_rq->rt_throttled = 1;
890 890
891 if (!once) { 891 if (!once) {
892 once = true; 892 once = true;
893 printk_sched("sched: RT throttling activated\n"); 893 printk_sched("sched: RT throttling activated\n");
894 } 894 }
895 } else { 895 } else {
896 /* 896 /*
897 * In case we did anyway, make it go away, 897 * In case we did anyway, make it go away,
898 * replenishment is a joke, since it will replenish us 898 * replenishment is a joke, since it will replenish us
899 * with exactly 0 ns. 899 * with exactly 0 ns.
900 */ 900 */
901 rt_rq->rt_time = 0; 901 rt_rq->rt_time = 0;
902 } 902 }
903 903
904 if (rt_rq_throttled(rt_rq)) { 904 if (rt_rq_throttled(rt_rq)) {
905 sched_rt_rq_dequeue(rt_rq); 905 sched_rt_rq_dequeue(rt_rq);
906 return 1; 906 return 1;
907 } 907 }
908 } 908 }
909 909
910 return 0; 910 return 0;
911 } 911 }
912 912
913 /* 913 /*
914 * Update the current task's runtime statistics. Skip current tasks that 914 * Update the current task's runtime statistics. Skip current tasks that
915 * are not in our scheduling class. 915 * are not in our scheduling class.
916 */ 916 */
917 static void update_curr_rt(struct rq *rq) 917 static void update_curr_rt(struct rq *rq)
918 { 918 {
919 struct task_struct *curr = rq->curr; 919 struct task_struct *curr = rq->curr;
920 struct sched_rt_entity *rt_se = &curr->rt; 920 struct sched_rt_entity *rt_se = &curr->rt;
921 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 921 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
922 u64 delta_exec; 922 u64 delta_exec;
923 923
924 if (curr->sched_class != &rt_sched_class) 924 if (curr->sched_class != &rt_sched_class)
925 return; 925 return;
926 926
927 delta_exec = rq->clock_task - curr->se.exec_start; 927 delta_exec = rq->clock_task - curr->se.exec_start;
928 if (unlikely((s64)delta_exec < 0)) 928 if (unlikely((s64)delta_exec < 0))
929 delta_exec = 0; 929 delta_exec = 0;
930 930
931 schedstat_set(curr->se.statistics.exec_max, 931 schedstat_set(curr->se.statistics.exec_max,
932 max(curr->se.statistics.exec_max, delta_exec)); 932 max(curr->se.statistics.exec_max, delta_exec));
933 933
934 curr->se.sum_exec_runtime += delta_exec; 934 curr->se.sum_exec_runtime += delta_exec;
935 account_group_exec_runtime(curr, delta_exec); 935 account_group_exec_runtime(curr, delta_exec);
936 936
937 curr->se.exec_start = rq->clock_task; 937 curr->se.exec_start = rq->clock_task;
938 cpuacct_charge(curr, delta_exec); 938 cpuacct_charge(curr, delta_exec);
939 939
940 sched_rt_avg_update(rq, delta_exec); 940 sched_rt_avg_update(rq, delta_exec);
941 941
942 if (!rt_bandwidth_enabled()) 942 if (!rt_bandwidth_enabled())
943 return; 943 return;
944 944
945 for_each_sched_rt_entity(rt_se) { 945 for_each_sched_rt_entity(rt_se) {
946 rt_rq = rt_rq_of_se(rt_se); 946 rt_rq = rt_rq_of_se(rt_se);
947 947
948 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { 948 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
949 raw_spin_lock(&rt_rq->rt_runtime_lock); 949 raw_spin_lock(&rt_rq->rt_runtime_lock);
950 rt_rq->rt_time += delta_exec; 950 rt_rq->rt_time += delta_exec;
951 if (sched_rt_runtime_exceeded(rt_rq)) 951 if (sched_rt_runtime_exceeded(rt_rq))
952 resched_task(curr); 952 resched_task(curr);
953 raw_spin_unlock(&rt_rq->rt_runtime_lock); 953 raw_spin_unlock(&rt_rq->rt_runtime_lock);
954 } 954 }
955 } 955 }
956 } 956 }
957 957
958 #if defined CONFIG_SMP 958 #if defined CONFIG_SMP
959 959
960 static void 960 static void
961 inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 961 inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
962 { 962 {
963 struct rq *rq = rq_of_rt_rq(rt_rq); 963 struct rq *rq = rq_of_rt_rq(rt_rq);
964 964
965 if (rq->online && prio < prev_prio) 965 if (rq->online && prio < prev_prio)
966 cpupri_set(&rq->rd->cpupri, rq->cpu, prio); 966 cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
967 } 967 }
968 968
969 static void 969 static void
970 dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 970 dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
971 { 971 {
972 struct rq *rq = rq_of_rt_rq(rt_rq); 972 struct rq *rq = rq_of_rt_rq(rt_rq);
973 973
974 if (rq->online && rt_rq->highest_prio.curr != prev_prio) 974 if (rq->online && rt_rq->highest_prio.curr != prev_prio)
975 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); 975 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
976 } 976 }
977 977
978 #else /* CONFIG_SMP */ 978 #else /* CONFIG_SMP */
979 979
980 static inline 980 static inline
981 void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 981 void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
982 static inline 982 static inline
983 void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 983 void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
984 984
985 #endif /* CONFIG_SMP */ 985 #endif /* CONFIG_SMP */
986 986
987 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 987 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
988 static void 988 static void
989 inc_rt_prio(struct rt_rq *rt_rq, int prio) 989 inc_rt_prio(struct rt_rq *rt_rq, int prio)
990 { 990 {
991 int prev_prio = rt_rq->highest_prio.curr; 991 int prev_prio = rt_rq->highest_prio.curr;
992 992
993 if (prio < prev_prio) 993 if (prio < prev_prio)
994 rt_rq->highest_prio.curr = prio; 994 rt_rq->highest_prio.curr = prio;
995 995
996 inc_rt_prio_smp(rt_rq, prio, prev_prio); 996 inc_rt_prio_smp(rt_rq, prio, prev_prio);
997 } 997 }
998 998
999 static void 999 static void
1000 dec_rt_prio(struct rt_rq *rt_rq, int prio) 1000 dec_rt_prio(struct rt_rq *rt_rq, int prio)
1001 { 1001 {
1002 int prev_prio = rt_rq->highest_prio.curr; 1002 int prev_prio = rt_rq->highest_prio.curr;
1003 1003
1004 if (rt_rq->rt_nr_running) { 1004 if (rt_rq->rt_nr_running) {
1005 1005
1006 WARN_ON(prio < prev_prio); 1006 WARN_ON(prio < prev_prio);
1007 1007
1008 /* 1008 /*
1009 * This may have been our highest task, and therefore 1009 * This may have been our highest task, and therefore
1010 * we may have some recomputation to do 1010 * we may have some recomputation to do
1011 */ 1011 */
1012 if (prio == prev_prio) { 1012 if (prio == prev_prio) {
1013 struct rt_prio_array *array = &rt_rq->active; 1013 struct rt_prio_array *array = &rt_rq->active;
1014 1014
1015 rt_rq->highest_prio.curr = 1015 rt_rq->highest_prio.curr =
1016 sched_find_first_bit(array->bitmap); 1016 sched_find_first_bit(array->bitmap);
1017 } 1017 }
1018 1018
1019 } else 1019 } else
1020 rt_rq->highest_prio.curr = MAX_RT_PRIO; 1020 rt_rq->highest_prio.curr = MAX_RT_PRIO;
1021 1021
1022 dec_rt_prio_smp(rt_rq, prio, prev_prio); 1022 dec_rt_prio_smp(rt_rq, prio, prev_prio);
1023 } 1023 }
1024 1024
1025 #else 1025 #else
1026 1026
1027 static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {} 1027 static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
1028 static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {} 1028 static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
1029 1029
1030 #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */ 1030 #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
1031 1031
1032 #ifdef CONFIG_RT_GROUP_SCHED 1032 #ifdef CONFIG_RT_GROUP_SCHED
1033 1033
1034 static void 1034 static void
1035 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1035 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1036 { 1036 {
1037 if (rt_se_boosted(rt_se)) 1037 if (rt_se_boosted(rt_se))
1038 rt_rq->rt_nr_boosted++; 1038 rt_rq->rt_nr_boosted++;
1039 1039
1040 if (rt_rq->tg) 1040 if (rt_rq->tg)
1041 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); 1041 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
1042 } 1042 }
1043 1043
1044 static void 1044 static void
1045 dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1045 dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1046 { 1046 {
1047 if (rt_se_boosted(rt_se)) 1047 if (rt_se_boosted(rt_se))
1048 rt_rq->rt_nr_boosted--; 1048 rt_rq->rt_nr_boosted--;
1049 1049
1050 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); 1050 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
1051 } 1051 }
1052 1052
1053 #else /* CONFIG_RT_GROUP_SCHED */ 1053 #else /* CONFIG_RT_GROUP_SCHED */
1054 1054
1055 static void 1055 static void
1056 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1056 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1057 { 1057 {
1058 start_rt_bandwidth(&def_rt_bandwidth); 1058 start_rt_bandwidth(&def_rt_bandwidth);
1059 } 1059 }
1060 1060
1061 static inline 1061 static inline
1062 void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} 1062 void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
1063 1063
1064 #endif /* CONFIG_RT_GROUP_SCHED */ 1064 #endif /* CONFIG_RT_GROUP_SCHED */
1065 1065
1066 static inline 1066 static inline
1067 void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1067 void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1068 { 1068 {
1069 int prio = rt_se_prio(rt_se); 1069 int prio = rt_se_prio(rt_se);
1070 1070
1071 WARN_ON(!rt_prio(prio)); 1071 WARN_ON(!rt_prio(prio));
1072 rt_rq->rt_nr_running++; 1072 rt_rq->rt_nr_running++;
1073 1073
1074 inc_rt_prio(rt_rq, prio); 1074 inc_rt_prio(rt_rq, prio);
1075 inc_rt_migration(rt_se, rt_rq); 1075 inc_rt_migration(rt_se, rt_rq);
1076 inc_rt_group(rt_se, rt_rq); 1076 inc_rt_group(rt_se, rt_rq);
1077 } 1077 }
1078 1078
1079 static inline 1079 static inline
1080 void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1080 void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1081 { 1081 {
1082 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 1082 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
1083 WARN_ON(!rt_rq->rt_nr_running); 1083 WARN_ON(!rt_rq->rt_nr_running);
1084 rt_rq->rt_nr_running--; 1084 rt_rq->rt_nr_running--;
1085 1085
1086 dec_rt_prio(rt_rq, rt_se_prio(rt_se)); 1086 dec_rt_prio(rt_rq, rt_se_prio(rt_se));
1087 dec_rt_migration(rt_se, rt_rq); 1087 dec_rt_migration(rt_se, rt_rq);
1088 dec_rt_group(rt_se, rt_rq); 1088 dec_rt_group(rt_se, rt_rq);
1089 } 1089 }
1090 1090
1091 static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) 1091 static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
1092 { 1092 {
1093 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 1093 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1094 struct rt_prio_array *array = &rt_rq->active; 1094 struct rt_prio_array *array = &rt_rq->active;
1095 struct rt_rq *group_rq = group_rt_rq(rt_se); 1095 struct rt_rq *group_rq = group_rt_rq(rt_se);
1096 struct list_head *queue = array->queue + rt_se_prio(rt_se); 1096 struct list_head *queue = array->queue + rt_se_prio(rt_se);
1097 1097
1098 /* 1098 /*
1099 * Don't enqueue the group if its throttled, or when empty. 1099 * Don't enqueue the group if its throttled, or when empty.
1100 * The latter is a consequence of the former when a child group 1100 * The latter is a consequence of the former when a child group
1101 * get throttled and the current group doesn't have any other 1101 * get throttled and the current group doesn't have any other
1102 * active members. 1102 * active members.
1103 */ 1103 */
1104 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 1104 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
1105 return; 1105 return;
1106 1106
1107 if (!rt_rq->rt_nr_running) 1107 if (!rt_rq->rt_nr_running)
1108 list_add_leaf_rt_rq(rt_rq); 1108 list_add_leaf_rt_rq(rt_rq);
1109 1109
1110 if (head) 1110 if (head)
1111 list_add(&rt_se->run_list, queue); 1111 list_add(&rt_se->run_list, queue);
1112 else 1112 else
1113 list_add_tail(&rt_se->run_list, queue); 1113 list_add_tail(&rt_se->run_list, queue);
1114 __set_bit(rt_se_prio(rt_se), array->bitmap); 1114 __set_bit(rt_se_prio(rt_se), array->bitmap);
1115 1115
1116 inc_rt_tasks(rt_se, rt_rq); 1116 inc_rt_tasks(rt_se, rt_rq);
1117 } 1117 }
1118 1118
1119 static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) 1119 static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
1120 { 1120 {
1121 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 1121 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1122 struct rt_prio_array *array = &rt_rq->active; 1122 struct rt_prio_array *array = &rt_rq->active;
1123 1123
1124 list_del_init(&rt_se->run_list); 1124 list_del_init(&rt_se->run_list);
1125 if (list_empty(array->queue + rt_se_prio(rt_se))) 1125 if (list_empty(array->queue + rt_se_prio(rt_se)))
1126 __clear_bit(rt_se_prio(rt_se), array->bitmap); 1126 __clear_bit(rt_se_prio(rt_se), array->bitmap);
1127 1127
1128 dec_rt_tasks(rt_se, rt_rq); 1128 dec_rt_tasks(rt_se, rt_rq);
1129 if (!rt_rq->rt_nr_running) 1129 if (!rt_rq->rt_nr_running)
1130 list_del_leaf_rt_rq(rt_rq); 1130 list_del_leaf_rt_rq(rt_rq);
1131 } 1131 }
1132 1132
1133 /* 1133 /*
1134 * Because the prio of an upper entry depends on the lower 1134 * Because the prio of an upper entry depends on the lower
1135 * entries, we must remove entries top - down. 1135 * entries, we must remove entries top - down.
1136 */ 1136 */
1137 static void dequeue_rt_stack(struct sched_rt_entity *rt_se) 1137 static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
1138 { 1138 {
1139 struct sched_rt_entity *back = NULL; 1139 struct sched_rt_entity *back = NULL;
1140 1140
1141 for_each_sched_rt_entity(rt_se) { 1141 for_each_sched_rt_entity(rt_se) {
1142 rt_se->back = back; 1142 rt_se->back = back;
1143 back = rt_se; 1143 back = rt_se;
1144 } 1144 }
1145 1145
1146 for (rt_se = back; rt_se; rt_se = rt_se->back) { 1146 for (rt_se = back; rt_se; rt_se = rt_se->back) {
1147 if (on_rt_rq(rt_se)) 1147 if (on_rt_rq(rt_se))
1148 __dequeue_rt_entity(rt_se); 1148 __dequeue_rt_entity(rt_se);
1149 } 1149 }
1150 } 1150 }
1151 1151
1152 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) 1152 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
1153 { 1153 {
1154 dequeue_rt_stack(rt_se); 1154 dequeue_rt_stack(rt_se);
1155 for_each_sched_rt_entity(rt_se) 1155 for_each_sched_rt_entity(rt_se)
1156 __enqueue_rt_entity(rt_se, head); 1156 __enqueue_rt_entity(rt_se, head);
1157 } 1157 }
1158 1158
1159 static void dequeue_rt_entity(struct sched_rt_entity *rt_se) 1159 static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
1160 { 1160 {
1161 dequeue_rt_stack(rt_se); 1161 dequeue_rt_stack(rt_se);
1162 1162
1163 for_each_sched_rt_entity(rt_se) { 1163 for_each_sched_rt_entity(rt_se) {
1164 struct rt_rq *rt_rq = group_rt_rq(rt_se); 1164 struct rt_rq *rt_rq = group_rt_rq(rt_se);
1165 1165
1166 if (rt_rq && rt_rq->rt_nr_running) 1166 if (rt_rq && rt_rq->rt_nr_running)
1167 __enqueue_rt_entity(rt_se, false); 1167 __enqueue_rt_entity(rt_se, false);
1168 } 1168 }
1169 } 1169 }
1170 1170
1171 /* 1171 /*
1172 * Adding/removing a task to/from a priority array: 1172 * Adding/removing a task to/from a priority array:
1173 */ 1173 */
1174 static void 1174 static void
1175 enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) 1175 enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1176 { 1176 {
1177 struct sched_rt_entity *rt_se = &p->rt; 1177 struct sched_rt_entity *rt_se = &p->rt;
1178 1178
1179 if (flags & ENQUEUE_WAKEUP) 1179 if (flags & ENQUEUE_WAKEUP)
1180 rt_se->timeout = 0; 1180 rt_se->timeout = 0;
1181 1181
1182 enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); 1182 enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
1183 1183
1184 if (!task_current(rq, p) && p->nr_cpus_allowed > 1) 1184 if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
1185 enqueue_pushable_task(rq, p); 1185 enqueue_pushable_task(rq, p);
1186 1186
1187 inc_nr_running(rq); 1187 inc_nr_running(rq);
1188 } 1188 }
1189 1189
1190 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) 1190 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1191 { 1191 {
1192 struct sched_rt_entity *rt_se = &p->rt; 1192 struct sched_rt_entity *rt_se = &p->rt;
1193 1193
1194 update_curr_rt(rq); 1194 update_curr_rt(rq);
1195 dequeue_rt_entity(rt_se); 1195 dequeue_rt_entity(rt_se);
1196 1196
1197 dequeue_pushable_task(rq, p); 1197 dequeue_pushable_task(rq, p);
1198 1198
1199 dec_nr_running(rq); 1199 dec_nr_running(rq);
1200 } 1200 }
1201 1201
1202 /* 1202 /*
1203 * Put task to the head or the end of the run list without the overhead of 1203 * Put task to the head or the end of the run list without the overhead of
1204 * dequeue followed by enqueue. 1204 * dequeue followed by enqueue.
1205 */ 1205 */
1206 static void 1206 static void
1207 requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) 1207 requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
1208 { 1208 {
1209 if (on_rt_rq(rt_se)) { 1209 if (on_rt_rq(rt_se)) {
1210 struct rt_prio_array *array = &rt_rq->active; 1210 struct rt_prio_array *array = &rt_rq->active;
1211 struct list_head *queue = array->queue + rt_se_prio(rt_se); 1211 struct list_head *queue = array->queue + rt_se_prio(rt_se);
1212 1212
1213 if (head) 1213 if (head)
1214 list_move(&rt_se->run_list, queue); 1214 list_move(&rt_se->run_list, queue);
1215 else 1215 else
1216 list_move_tail(&rt_se->run_list, queue); 1216 list_move_tail(&rt_se->run_list, queue);
1217 } 1217 }
1218 } 1218 }
1219 1219
1220 static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) 1220 static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
1221 { 1221 {
1222 struct sched_rt_entity *rt_se = &p->rt; 1222 struct sched_rt_entity *rt_se = &p->rt;
1223 struct rt_rq *rt_rq; 1223 struct rt_rq *rt_rq;
1224 1224
1225 for_each_sched_rt_entity(rt_se) { 1225 for_each_sched_rt_entity(rt_se) {
1226 rt_rq = rt_rq_of_se(rt_se); 1226 rt_rq = rt_rq_of_se(rt_se);
1227 requeue_rt_entity(rt_rq, rt_se, head); 1227 requeue_rt_entity(rt_rq, rt_se, head);
1228 } 1228 }
1229 } 1229 }
1230 1230
1231 static void yield_task_rt(struct rq *rq) 1231 static void yield_task_rt(struct rq *rq)
1232 { 1232 {
1233 requeue_task_rt(rq, rq->curr, 0); 1233 requeue_task_rt(rq, rq->curr, 0);
1234 } 1234 }
1235 1235
1236 #ifdef CONFIG_SMP 1236 #ifdef CONFIG_SMP
1237 static int find_lowest_rq(struct task_struct *task); 1237 static int find_lowest_rq(struct task_struct *task);
1238 1238
1239 static int 1239 static int
1240 select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) 1240 select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
1241 { 1241 {
1242 struct task_struct *curr; 1242 struct task_struct *curr;
1243 struct rq *rq; 1243 struct rq *rq;
1244 int cpu; 1244 int cpu;
1245 1245
1246 cpu = task_cpu(p); 1246 cpu = task_cpu(p);
1247 1247
1248 if (p->nr_cpus_allowed == 1) 1248 if (p->nr_cpus_allowed == 1)
1249 goto out; 1249 goto out;
1250 1250
1251 /* For anything but wake ups, just return the task_cpu */ 1251 /* For anything but wake ups, just return the task_cpu */
1252 if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) 1252 if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
1253 goto out; 1253 goto out;
1254 1254
1255 rq = cpu_rq(cpu); 1255 rq = cpu_rq(cpu);
1256 1256
1257 rcu_read_lock(); 1257 rcu_read_lock();
1258 curr = ACCESS_ONCE(rq->curr); /* unlocked access */ 1258 curr = ACCESS_ONCE(rq->curr); /* unlocked access */
1259 1259
1260 /* 1260 /*
1261 * If the current task on @p's runqueue is an RT task, then 1261 * If the current task on @p's runqueue is an RT task, then
1262 * try to see if we can wake this RT task up on another 1262 * try to see if we can wake this RT task up on another
1263 * runqueue. Otherwise simply start this RT task 1263 * runqueue. Otherwise simply start this RT task
1264 * on its current runqueue. 1264 * on its current runqueue.
1265 * 1265 *
1266 * We want to avoid overloading runqueues. If the woken 1266 * We want to avoid overloading runqueues. If the woken
1267 * task is a higher priority, then it will stay on this CPU 1267 * task is a higher priority, then it will stay on this CPU
1268 * and the lower prio task should be moved to another CPU. 1268 * and the lower prio task should be moved to another CPU.
1269 * Even though this will probably make the lower prio task 1269 * Even though this will probably make the lower prio task
1270 * lose its cache, we do not want to bounce a higher task 1270 * lose its cache, we do not want to bounce a higher task
1271 * around just because it gave up its CPU, perhaps for a 1271 * around just because it gave up its CPU, perhaps for a
1272 * lock? 1272 * lock?
1273 * 1273 *
1274 * For equal prio tasks, we just let the scheduler sort it out. 1274 * For equal prio tasks, we just let the scheduler sort it out.
1275 * 1275 *
1276 * Otherwise, just let it ride on the affined RQ and the 1276 * Otherwise, just let it ride on the affined RQ and the
1277 * post-schedule router will push the preempted task away 1277 * post-schedule router will push the preempted task away
1278 * 1278 *
1279 * This test is optimistic, if we get it wrong the load-balancer 1279 * This test is optimistic, if we get it wrong the load-balancer
1280 * will have to sort it out. 1280 * will have to sort it out.
1281 */ 1281 */
1282 if (curr && unlikely(rt_task(curr)) && 1282 if (curr && unlikely(rt_task(curr)) &&
1283 (curr->nr_cpus_allowed < 2 || 1283 (curr->nr_cpus_allowed < 2 ||
1284 curr->prio <= p->prio) && 1284 curr->prio <= p->prio) &&
1285 (p->nr_cpus_allowed > 1)) { 1285 (p->nr_cpus_allowed > 1)) {
1286 int target = find_lowest_rq(p); 1286 int target = find_lowest_rq(p);
1287 1287
1288 if (target != -1) 1288 if (target != -1)
1289 cpu = target; 1289 cpu = target;
1290 } 1290 }
1291 rcu_read_unlock(); 1291 rcu_read_unlock();
1292 1292
1293 out: 1293 out:
1294 return cpu; 1294 return cpu;
1295 } 1295 }
1296 1296
1297 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 1297 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1298 { 1298 {
1299 if (rq->curr->nr_cpus_allowed == 1) 1299 if (rq->curr->nr_cpus_allowed == 1)
1300 return; 1300 return;
1301 1301
1302 if (p->nr_cpus_allowed != 1 1302 if (p->nr_cpus_allowed != 1
1303 && cpupri_find(&rq->rd->cpupri, p, NULL)) 1303 && cpupri_find(&rq->rd->cpupri, p, NULL))
1304 return; 1304 return;
1305 1305
1306 if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) 1306 if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
1307 return; 1307 return;
1308 1308
1309 /* 1309 /*
1310 * There appears to be other cpus that can accept 1310 * There appears to be other cpus that can accept
1311 * current and none to run 'p', so lets reschedule 1311 * current and none to run 'p', so lets reschedule
1312 * to try and push current away: 1312 * to try and push current away:
1313 */ 1313 */
1314 requeue_task_rt(rq, p, 1); 1314 requeue_task_rt(rq, p, 1);
1315 resched_task(rq->curr); 1315 resched_task(rq->curr);
1316 } 1316 }
1317 1317
1318 #endif /* CONFIG_SMP */ 1318 #endif /* CONFIG_SMP */
1319 1319
1320 /* 1320 /*
1321 * Preempt the current task with a newly woken task if needed: 1321 * Preempt the current task with a newly woken task if needed:
1322 */ 1322 */
1323 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) 1323 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
1324 { 1324 {
1325 if (p->prio < rq->curr->prio) { 1325 if (p->prio < rq->curr->prio) {
1326 resched_task(rq->curr); 1326 resched_task(rq->curr);
1327 return; 1327 return;
1328 } 1328 }
1329 1329
1330 #ifdef CONFIG_SMP 1330 #ifdef CONFIG_SMP
1331 /* 1331 /*
1332 * If: 1332 * If:
1333 * 1333 *
1334 * - the newly woken task is of equal priority to the current task 1334 * - the newly woken task is of equal priority to the current task
1335 * - the newly woken task is non-migratable while current is migratable 1335 * - the newly woken task is non-migratable while current is migratable
1336 * - current will be preempted on the next reschedule 1336 * - current will be preempted on the next reschedule
1337 * 1337 *
1338 * we should check to see if current can readily move to a different 1338 * we should check to see if current can readily move to a different
1339 * cpu. If so, we will reschedule to allow the push logic to try 1339 * cpu. If so, we will reschedule to allow the push logic to try
1340 * to move current somewhere else, making room for our non-migratable 1340 * to move current somewhere else, making room for our non-migratable
1341 * task. 1341 * task.
1342 */ 1342 */
1343 if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr)) 1343 if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
1344 check_preempt_equal_prio(rq, p); 1344 check_preempt_equal_prio(rq, p);
1345 #endif 1345 #endif
1346 } 1346 }
1347 1347
1348 static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, 1348 static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
1349 struct rt_rq *rt_rq) 1349 struct rt_rq *rt_rq)
1350 { 1350 {
1351 struct rt_prio_array *array = &rt_rq->active; 1351 struct rt_prio_array *array = &rt_rq->active;
1352 struct sched_rt_entity *next = NULL; 1352 struct sched_rt_entity *next = NULL;
1353 struct list_head *queue; 1353 struct list_head *queue;
1354 int idx; 1354 int idx;
1355 1355
1356 idx = sched_find_first_bit(array->bitmap); 1356 idx = sched_find_first_bit(array->bitmap);
1357 BUG_ON(idx >= MAX_RT_PRIO); 1357 BUG_ON(idx >= MAX_RT_PRIO);
1358 1358
1359 queue = array->queue + idx; 1359 queue = array->queue + idx;
1360 next = list_entry(queue->next, struct sched_rt_entity, run_list); 1360 next = list_entry(queue->next, struct sched_rt_entity, run_list);
1361 1361
1362 return next; 1362 return next;
1363 } 1363 }
1364 1364
1365 static struct task_struct *_pick_next_task_rt(struct rq *rq) 1365 static struct task_struct *_pick_next_task_rt(struct rq *rq)
1366 { 1366 {
1367 struct sched_rt_entity *rt_se; 1367 struct sched_rt_entity *rt_se;
1368 struct task_struct *p; 1368 struct task_struct *p;
1369 struct rt_rq *rt_rq; 1369 struct rt_rq *rt_rq;
1370 1370
1371 rt_rq = &rq->rt; 1371 rt_rq = &rq->rt;
1372 1372
1373 if (!rt_rq->rt_nr_running) 1373 if (!rt_rq->rt_nr_running)
1374 return NULL; 1374 return NULL;
1375 1375
1376 if (rt_rq_throttled(rt_rq)) 1376 if (rt_rq_throttled(rt_rq))
1377 return NULL; 1377 return NULL;
1378 1378
1379 do { 1379 do {
1380 rt_se = pick_next_rt_entity(rq, rt_rq); 1380 rt_se = pick_next_rt_entity(rq, rt_rq);
1381 BUG_ON(!rt_se); 1381 BUG_ON(!rt_se);
1382 rt_rq = group_rt_rq(rt_se); 1382 rt_rq = group_rt_rq(rt_se);
1383 } while (rt_rq); 1383 } while (rt_rq);
1384 1384
1385 p = rt_task_of(rt_se); 1385 p = rt_task_of(rt_se);
1386 p->se.exec_start = rq->clock_task; 1386 p->se.exec_start = rq->clock_task;
1387 1387
1388 return p; 1388 return p;
1389 } 1389 }
1390 1390
1391 static struct task_struct *pick_next_task_rt(struct rq *rq) 1391 static struct task_struct *pick_next_task_rt(struct rq *rq)
1392 { 1392 {
1393 struct task_struct *p = _pick_next_task_rt(rq); 1393 struct task_struct *p = _pick_next_task_rt(rq);
1394 1394
1395 /* The running task is never eligible for pushing */ 1395 /* The running task is never eligible for pushing */
1396 if (p) 1396 if (p)
1397 dequeue_pushable_task(rq, p); 1397 dequeue_pushable_task(rq, p);
1398 1398
1399 #ifdef CONFIG_SMP 1399 #ifdef CONFIG_SMP
1400 /* 1400 /*
1401 * We detect this state here so that we can avoid taking the RQ 1401 * We detect this state here so that we can avoid taking the RQ
1402 * lock again later if there is no need to push 1402 * lock again later if there is no need to push
1403 */ 1403 */
1404 rq->post_schedule = has_pushable_tasks(rq); 1404 rq->post_schedule = has_pushable_tasks(rq);
1405 #endif 1405 #endif
1406 1406
1407 return p; 1407 return p;
1408 } 1408 }
1409 1409
1410 static void put_prev_task_rt(struct rq *rq, struct task_struct *p) 1410 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1411 { 1411 {
1412 update_curr_rt(rq); 1412 update_curr_rt(rq);
1413 1413
1414 /* 1414 /*
1415 * The previous task needs to be made eligible for pushing 1415 * The previous task needs to be made eligible for pushing
1416 * if it is still active 1416 * if it is still active
1417 */ 1417 */
1418 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) 1418 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
1419 enqueue_pushable_task(rq, p); 1419 enqueue_pushable_task(rq, p);
1420 } 1420 }
1421 1421
1422 #ifdef CONFIG_SMP 1422 #ifdef CONFIG_SMP
1423 1423
1424 /* Only try algorithms three times */ 1424 /* Only try algorithms three times */
1425 #define RT_MAX_TRIES 3 1425 #define RT_MAX_TRIES 3
1426 1426
1427 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 1427 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1428 { 1428 {
1429 if (!task_running(rq, p) && 1429 if (!task_running(rq, p) &&
1430 (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && 1430 (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
1431 (p->nr_cpus_allowed > 1)) 1431 (p->nr_cpus_allowed > 1))
1432 return 1; 1432 return 1;
1433 return 0; 1433 return 0;
1434 } 1434 }
1435 1435
1436 /* Return the second highest RT task, NULL otherwise */ 1436 /* Return the second highest RT task, NULL otherwise */
1437 static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) 1437 static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
1438 { 1438 {
1439 struct task_struct *next = NULL; 1439 struct task_struct *next = NULL;
1440 struct sched_rt_entity *rt_se; 1440 struct sched_rt_entity *rt_se;
1441 struct rt_prio_array *array; 1441 struct rt_prio_array *array;
1442 struct rt_rq *rt_rq; 1442 struct rt_rq *rt_rq;
1443 int idx; 1443 int idx;
1444 1444
1445 for_each_leaf_rt_rq(rt_rq, rq) { 1445 for_each_leaf_rt_rq(rt_rq, rq) {
1446 array = &rt_rq->active; 1446 array = &rt_rq->active;
1447 idx = sched_find_first_bit(array->bitmap); 1447 idx = sched_find_first_bit(array->bitmap);
1448 next_idx: 1448 next_idx:
1449 if (idx >= MAX_RT_PRIO) 1449 if (idx >= MAX_RT_PRIO)
1450 continue; 1450 continue;
1451 if (next && next->prio <= idx) 1451 if (next && next->prio <= idx)
1452 continue; 1452 continue;
1453 list_for_each_entry(rt_se, array->queue + idx, run_list) { 1453 list_for_each_entry(rt_se, array->queue + idx, run_list) {
1454 struct task_struct *p; 1454 struct task_struct *p;
1455 1455
1456 if (!rt_entity_is_task(rt_se)) 1456 if (!rt_entity_is_task(rt_se))
1457 continue; 1457 continue;
1458 1458
1459 p = rt_task_of(rt_se); 1459 p = rt_task_of(rt_se);
1460 if (pick_rt_task(rq, p, cpu)) { 1460 if (pick_rt_task(rq, p, cpu)) {
1461 next = p; 1461 next = p;
1462 break; 1462 break;
1463 } 1463 }
1464 } 1464 }
1465 if (!next) { 1465 if (!next) {
1466 idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); 1466 idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
1467 goto next_idx; 1467 goto next_idx;
1468 } 1468 }
1469 } 1469 }
1470 1470
1471 return next; 1471 return next;
1472 } 1472 }
1473 1473
1474 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); 1474 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
1475 1475
1476 static int find_lowest_rq(struct task_struct *task) 1476 static int find_lowest_rq(struct task_struct *task)
1477 { 1477 {
1478 struct sched_domain *sd; 1478 struct sched_domain *sd;
1479 struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); 1479 struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
1480 int this_cpu = smp_processor_id(); 1480 int this_cpu = smp_processor_id();
1481 int cpu = task_cpu(task); 1481 int cpu = task_cpu(task);
1482 1482
1483 /* Make sure the mask is initialized first */ 1483 /* Make sure the mask is initialized first */
1484 if (unlikely(!lowest_mask)) 1484 if (unlikely(!lowest_mask))
1485 return -1; 1485 return -1;
1486 1486
1487 if (task->nr_cpus_allowed == 1) 1487 if (task->nr_cpus_allowed == 1)
1488 return -1; /* No other targets possible */ 1488 return -1; /* No other targets possible */
1489 1489
1490 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) 1490 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
1491 return -1; /* No targets found */ 1491 return -1; /* No targets found */
1492 1492
1493 /* 1493 /*
1494 * At this point we have built a mask of cpus representing the 1494 * At this point we have built a mask of cpus representing the
1495 * lowest priority tasks in the system. Now we want to elect 1495 * lowest priority tasks in the system. Now we want to elect
1496 * the best one based on our affinity and topology. 1496 * the best one based on our affinity and topology.
1497 * 1497 *
1498 * We prioritize the last cpu that the task executed on since 1498 * We prioritize the last cpu that the task executed on since
1499 * it is most likely cache-hot in that location. 1499 * it is most likely cache-hot in that location.
1500 */ 1500 */
1501 if (cpumask_test_cpu(cpu, lowest_mask)) 1501 if (cpumask_test_cpu(cpu, lowest_mask))
1502 return cpu; 1502 return cpu;
1503 1503
1504 /* 1504 /*
1505 * Otherwise, we consult the sched_domains span maps to figure 1505 * Otherwise, we consult the sched_domains span maps to figure
1506 * out which cpu is logically closest to our hot cache data. 1506 * out which cpu is logically closest to our hot cache data.
1507 */ 1507 */
1508 if (!cpumask_test_cpu(this_cpu, lowest_mask)) 1508 if (!cpumask_test_cpu(this_cpu, lowest_mask))
1509 this_cpu = -1; /* Skip this_cpu opt if not among lowest */ 1509 this_cpu = -1; /* Skip this_cpu opt if not among lowest */
1510 1510
1511 rcu_read_lock(); 1511 rcu_read_lock();
1512 for_each_domain(cpu, sd) { 1512 for_each_domain(cpu, sd) {
1513 if (sd->flags & SD_WAKE_AFFINE) { 1513 if (sd->flags & SD_WAKE_AFFINE) {
1514 int best_cpu; 1514 int best_cpu;
1515 1515
1516 /* 1516 /*
1517 * "this_cpu" is cheaper to preempt than a 1517 * "this_cpu" is cheaper to preempt than a
1518 * remote processor. 1518 * remote processor.
1519 */ 1519 */
1520 if (this_cpu != -1 && 1520 if (this_cpu != -1 &&
1521 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { 1521 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
1522 rcu_read_unlock(); 1522 rcu_read_unlock();
1523 return this_cpu; 1523 return this_cpu;
1524 } 1524 }
1525 1525
1526 best_cpu = cpumask_first_and(lowest_mask, 1526 best_cpu = cpumask_first_and(lowest_mask,
1527 sched_domain_span(sd)); 1527 sched_domain_span(sd));
1528 if (best_cpu < nr_cpu_ids) { 1528 if (best_cpu < nr_cpu_ids) {
1529 rcu_read_unlock(); 1529 rcu_read_unlock();
1530 return best_cpu; 1530 return best_cpu;
1531 } 1531 }
1532 } 1532 }
1533 } 1533 }
1534 rcu_read_unlock(); 1534 rcu_read_unlock();
1535 1535
1536 /* 1536 /*
1537 * And finally, if there were no matches within the domains 1537 * And finally, if there were no matches within the domains
1538 * just give the caller *something* to work with from the compatible 1538 * just give the caller *something* to work with from the compatible
1539 * locations. 1539 * locations.
1540 */ 1540 */
1541 if (this_cpu != -1) 1541 if (this_cpu != -1)
1542 return this_cpu; 1542 return this_cpu;
1543 1543
1544 cpu = cpumask_any(lowest_mask); 1544 cpu = cpumask_any(lowest_mask);
1545 if (cpu < nr_cpu_ids) 1545 if (cpu < nr_cpu_ids)
1546 return cpu; 1546 return cpu;
1547 return -1; 1547 return -1;
1548 } 1548 }
1549 1549
1550 /* Will lock the rq it finds */ 1550 /* Will lock the rq it finds */
1551 static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) 1551 static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1552 { 1552 {
1553 struct rq *lowest_rq = NULL; 1553 struct rq *lowest_rq = NULL;
1554 int tries; 1554 int tries;
1555 int cpu; 1555 int cpu;
1556 1556
1557 for (tries = 0; tries < RT_MAX_TRIES; tries++) { 1557 for (tries = 0; tries < RT_MAX_TRIES; tries++) {
1558 cpu = find_lowest_rq(task); 1558 cpu = find_lowest_rq(task);
1559 1559
1560 if ((cpu == -1) || (cpu == rq->cpu)) 1560 if ((cpu == -1) || (cpu == rq->cpu))
1561 break; 1561 break;
1562 1562
1563 lowest_rq = cpu_rq(cpu); 1563 lowest_rq = cpu_rq(cpu);
1564 1564
1565 /* if the prio of this runqueue changed, try again */ 1565 /* if the prio of this runqueue changed, try again */
1566 if (double_lock_balance(rq, lowest_rq)) { 1566 if (double_lock_balance(rq, lowest_rq)) {
1567 /* 1567 /*
1568 * We had to unlock the run queue. In 1568 * We had to unlock the run queue. In
1569 * the mean time, task could have 1569 * the mean time, task could have
1570 * migrated already or had its affinity changed. 1570 * migrated already or had its affinity changed.
1571 * Also make sure that it wasn't scheduled on its rq. 1571 * Also make sure that it wasn't scheduled on its rq.
1572 */ 1572 */
1573 if (unlikely(task_rq(task) != rq || 1573 if (unlikely(task_rq(task) != rq ||
1574 !cpumask_test_cpu(lowest_rq->cpu, 1574 !cpumask_test_cpu(lowest_rq->cpu,
1575 tsk_cpus_allowed(task)) || 1575 tsk_cpus_allowed(task)) ||
1576 task_running(rq, task) || 1576 task_running(rq, task) ||
1577 !task->on_rq)) { 1577 !task->on_rq)) {
1578 1578
1579 double_unlock_balance(rq, lowest_rq); 1579 double_unlock_balance(rq, lowest_rq);
1580 lowest_rq = NULL; 1580 lowest_rq = NULL;
1581 break; 1581 break;
1582 } 1582 }
1583 } 1583 }
1584 1584
1585 /* If this rq is still suitable use it. */ 1585 /* If this rq is still suitable use it. */
1586 if (lowest_rq->rt.highest_prio.curr > task->prio) 1586 if (lowest_rq->rt.highest_prio.curr > task->prio)
1587 break; 1587 break;
1588 1588
1589 /* try again */ 1589 /* try again */
1590 double_unlock_balance(rq, lowest_rq); 1590 double_unlock_balance(rq, lowest_rq);
1591 lowest_rq = NULL; 1591 lowest_rq = NULL;
1592 } 1592 }
1593 1593
1594 return lowest_rq; 1594 return lowest_rq;
1595 } 1595 }
1596 1596
1597 static struct task_struct *pick_next_pushable_task(struct rq *rq) 1597 static struct task_struct *pick_next_pushable_task(struct rq *rq)
1598 { 1598 {
1599 struct task_struct *p; 1599 struct task_struct *p;
1600 1600
1601 if (!has_pushable_tasks(rq)) 1601 if (!has_pushable_tasks(rq))
1602 return NULL; 1602 return NULL;
1603 1603
1604 p = plist_first_entry(&rq->rt.pushable_tasks, 1604 p = plist_first_entry(&rq->rt.pushable_tasks,
1605 struct task_struct, pushable_tasks); 1605 struct task_struct, pushable_tasks);
1606 1606
1607 BUG_ON(rq->cpu != task_cpu(p)); 1607 BUG_ON(rq->cpu != task_cpu(p));
1608 BUG_ON(task_current(rq, p)); 1608 BUG_ON(task_current(rq, p));
1609 BUG_ON(p->nr_cpus_allowed <= 1); 1609 BUG_ON(p->nr_cpus_allowed <= 1);
1610 1610
1611 BUG_ON(!p->on_rq); 1611 BUG_ON(!p->on_rq);
1612 BUG_ON(!rt_task(p)); 1612 BUG_ON(!rt_task(p));
1613 1613
1614 return p; 1614 return p;
1615 } 1615 }
1616 1616
1617 /* 1617 /*
1618 * If the current CPU has more than one RT task, see if the non 1618 * If the current CPU has more than one RT task, see if the non
1619 * running task can migrate over to a CPU that is running a task 1619 * running task can migrate over to a CPU that is running a task
1620 * of lesser priority. 1620 * of lesser priority.
1621 */ 1621 */
1622 static int push_rt_task(struct rq *rq) 1622 static int push_rt_task(struct rq *rq)
1623 { 1623 {
1624 struct task_struct *next_task; 1624 struct task_struct *next_task;
1625 struct rq *lowest_rq; 1625 struct rq *lowest_rq;
1626 int ret = 0; 1626 int ret = 0;
1627 1627
1628 if (!rq->rt.overloaded) 1628 if (!rq->rt.overloaded)
1629 return 0; 1629 return 0;
1630 1630
1631 next_task = pick_next_pushable_task(rq); 1631 next_task = pick_next_pushable_task(rq);
1632 if (!next_task) 1632 if (!next_task)
1633 return 0; 1633 return 0;
1634 1634
1635 retry: 1635 retry:
1636 if (unlikely(next_task == rq->curr)) { 1636 if (unlikely(next_task == rq->curr)) {
1637 WARN_ON(1); 1637 WARN_ON(1);
1638 return 0; 1638 return 0;
1639 } 1639 }
1640 1640
1641 /* 1641 /*
1642 * It's possible that the next_task slipped in of 1642 * It's possible that the next_task slipped in of
1643 * higher priority than current. If that's the case 1643 * higher priority than current. If that's the case
1644 * just reschedule current. 1644 * just reschedule current.
1645 */ 1645 */
1646 if (unlikely(next_task->prio < rq->curr->prio)) { 1646 if (unlikely(next_task->prio < rq->curr->prio)) {
1647 resched_task(rq->curr); 1647 resched_task(rq->curr);
1648 return 0; 1648 return 0;
1649 } 1649 }
1650 1650
1651 /* We might release rq lock */ 1651 /* We might release rq lock */
1652 get_task_struct(next_task); 1652 get_task_struct(next_task);
1653 1653
1654 /* find_lock_lowest_rq locks the rq if found */ 1654 /* find_lock_lowest_rq locks the rq if found */
1655 lowest_rq = find_lock_lowest_rq(next_task, rq); 1655 lowest_rq = find_lock_lowest_rq(next_task, rq);
1656 if (!lowest_rq) { 1656 if (!lowest_rq) {
1657 struct task_struct *task; 1657 struct task_struct *task;
1658 /* 1658 /*
1659 * find_lock_lowest_rq releases rq->lock 1659 * find_lock_lowest_rq releases rq->lock
1660 * so it is possible that next_task has migrated. 1660 * so it is possible that next_task has migrated.
1661 * 1661 *
1662 * We need to make sure that the task is still on the same 1662 * We need to make sure that the task is still on the same
1663 * run-queue and is also still the next task eligible for 1663 * run-queue and is also still the next task eligible for
1664 * pushing. 1664 * pushing.
1665 */ 1665 */
1666 task = pick_next_pushable_task(rq); 1666 task = pick_next_pushable_task(rq);
1667 if (task_cpu(next_task) == rq->cpu && task == next_task) { 1667 if (task_cpu(next_task) == rq->cpu && task == next_task) {
1668 /* 1668 /*
1669 * The task hasn't migrated, and is still the next 1669 * The task hasn't migrated, and is still the next
1670 * eligible task, but we failed to find a run-queue 1670 * eligible task, but we failed to find a run-queue
1671 * to push it to. Do not retry in this case, since 1671 * to push it to. Do not retry in this case, since
1672 * other cpus will pull from us when ready. 1672 * other cpus will pull from us when ready.
1673 */ 1673 */
1674 goto out; 1674 goto out;
1675 } 1675 }
1676 1676
1677 if (!task) 1677 if (!task)
1678 /* No more tasks, just exit */ 1678 /* No more tasks, just exit */
1679 goto out; 1679 goto out;
1680 1680
1681 /* 1681 /*
1682 * Something has shifted, try again. 1682 * Something has shifted, try again.
1683 */ 1683 */
1684 put_task_struct(next_task); 1684 put_task_struct(next_task);
1685 next_task = task; 1685 next_task = task;
1686 goto retry; 1686 goto retry;
1687 } 1687 }
1688 1688
1689 deactivate_task(rq, next_task, 0); 1689 deactivate_task(rq, next_task, 0);
1690 set_task_cpu(next_task, lowest_rq->cpu); 1690 set_task_cpu(next_task, lowest_rq->cpu);
1691 activate_task(lowest_rq, next_task, 0); 1691 activate_task(lowest_rq, next_task, 0);
1692 ret = 1; 1692 ret = 1;
1693 1693
1694 resched_task(lowest_rq->curr); 1694 resched_task(lowest_rq->curr);
1695 1695
1696 double_unlock_balance(rq, lowest_rq); 1696 double_unlock_balance(rq, lowest_rq);
1697 1697
1698 out: 1698 out:
1699 put_task_struct(next_task); 1699 put_task_struct(next_task);
1700 1700
1701 return ret; 1701 return ret;
1702 } 1702 }
1703 1703
1704 static void push_rt_tasks(struct rq *rq) 1704 static void push_rt_tasks(struct rq *rq)
1705 { 1705 {
1706 /* push_rt_task will return true if it moved an RT */ 1706 /* push_rt_task will return true if it moved an RT */
1707 while (push_rt_task(rq)) 1707 while (push_rt_task(rq))
1708 ; 1708 ;
1709 } 1709 }
1710 1710
1711 static int pull_rt_task(struct rq *this_rq) 1711 static int pull_rt_task(struct rq *this_rq)
1712 { 1712 {
1713 int this_cpu = this_rq->cpu, ret = 0, cpu; 1713 int this_cpu = this_rq->cpu, ret = 0, cpu;
1714 struct task_struct *p; 1714 struct task_struct *p;
1715 struct rq *src_rq; 1715 struct rq *src_rq;
1716 1716
1717 if (likely(!rt_overloaded(this_rq))) 1717 if (likely(!rt_overloaded(this_rq)))
1718 return 0; 1718 return 0;
1719 1719
1720 for_each_cpu(cpu, this_rq->rd->rto_mask) { 1720 for_each_cpu(cpu, this_rq->rd->rto_mask) {
1721 if (this_cpu == cpu) 1721 if (this_cpu == cpu)
1722 continue; 1722 continue;
1723 1723
1724 src_rq = cpu_rq(cpu); 1724 src_rq = cpu_rq(cpu);
1725 1725
1726 /* 1726 /*
1727 * Don't bother taking the src_rq->lock if the next highest 1727 * Don't bother taking the src_rq->lock if the next highest
1728 * task is known to be lower-priority than our current task. 1728 * task is known to be lower-priority than our current task.
1729 * This may look racy, but if this value is about to go 1729 * This may look racy, but if this value is about to go
1730 * logically higher, the src_rq will push this task away. 1730 * logically higher, the src_rq will push this task away.
1731 * And if its going logically lower, we do not care 1731 * And if its going logically lower, we do not care
1732 */ 1732 */
1733 if (src_rq->rt.highest_prio.next >= 1733 if (src_rq->rt.highest_prio.next >=
1734 this_rq->rt.highest_prio.curr) 1734 this_rq->rt.highest_prio.curr)
1735 continue; 1735 continue;
1736 1736
1737 /* 1737 /*
1738 * We can potentially drop this_rq's lock in 1738 * We can potentially drop this_rq's lock in
1739 * double_lock_balance, and another CPU could 1739 * double_lock_balance, and another CPU could
1740 * alter this_rq 1740 * alter this_rq
1741 */ 1741 */
1742 double_lock_balance(this_rq, src_rq); 1742 double_lock_balance(this_rq, src_rq);
1743 1743
1744 /* 1744 /*
1745 * Are there still pullable RT tasks? 1745 * Are there still pullable RT tasks?
1746 */ 1746 */
1747 if (src_rq->rt.rt_nr_running <= 1) 1747 if (src_rq->rt.rt_nr_running <= 1)
1748 goto skip; 1748 goto skip;
1749 1749
1750 p = pick_next_highest_task_rt(src_rq, this_cpu); 1750 p = pick_next_highest_task_rt(src_rq, this_cpu);
1751 1751
1752 /* 1752 /*
1753 * Do we have an RT task that preempts 1753 * Do we have an RT task that preempts
1754 * the to-be-scheduled task? 1754 * the to-be-scheduled task?
1755 */ 1755 */
1756 if (p && (p->prio < this_rq->rt.highest_prio.curr)) { 1756 if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
1757 WARN_ON(p == src_rq->curr); 1757 WARN_ON(p == src_rq->curr);
1758 WARN_ON(!p->on_rq); 1758 WARN_ON(!p->on_rq);
1759 1759
1760 /* 1760 /*
1761 * There's a chance that p is higher in priority 1761 * There's a chance that p is higher in priority
1762 * than what's currently running on its cpu. 1762 * than what's currently running on its cpu.
1763 * This is just that p is wakeing up and hasn't 1763 * This is just that p is wakeing up and hasn't
1764 * had a chance to schedule. We only pull 1764 * had a chance to schedule. We only pull
1765 * p if it is lower in priority than the 1765 * p if it is lower in priority than the
1766 * current task on the run queue 1766 * current task on the run queue
1767 */ 1767 */
1768 if (p->prio < src_rq->curr->prio) 1768 if (p->prio < src_rq->curr->prio)
1769 goto skip; 1769 goto skip;
1770 1770
1771 ret = 1; 1771 ret = 1;
1772 1772
1773 deactivate_task(src_rq, p, 0); 1773 deactivate_task(src_rq, p, 0);
1774 set_task_cpu(p, this_cpu); 1774 set_task_cpu(p, this_cpu);
1775 activate_task(this_rq, p, 0); 1775 activate_task(this_rq, p, 0);
1776 /* 1776 /*
1777 * We continue with the search, just in 1777 * We continue with the search, just in
1778 * case there's an even higher prio task 1778 * case there's an even higher prio task
1779 * in another runqueue. (low likelihood 1779 * in another runqueue. (low likelihood
1780 * but possible) 1780 * but possible)
1781 */ 1781 */
1782 } 1782 }
1783 skip: 1783 skip:
1784 double_unlock_balance(this_rq, src_rq); 1784 double_unlock_balance(this_rq, src_rq);
1785 } 1785 }
1786 1786
1787 return ret; 1787 return ret;
1788 } 1788 }
1789 1789
1790 static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) 1790 static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
1791 { 1791 {
1792 /* Try to pull RT tasks here if we lower this rq's prio */ 1792 /* Try to pull RT tasks here if we lower this rq's prio */
1793 if (rq->rt.highest_prio.curr > prev->prio) 1793 if (rq->rt.highest_prio.curr > prev->prio)
1794 pull_rt_task(rq); 1794 pull_rt_task(rq);
1795 } 1795 }
1796 1796
1797 static void post_schedule_rt(struct rq *rq) 1797 static void post_schedule_rt(struct rq *rq)
1798 { 1798 {
1799 push_rt_tasks(rq); 1799 push_rt_tasks(rq);
1800 } 1800 }
1801 1801
1802 /* 1802 /*
1803 * If we are not running and we are not going to reschedule soon, we should 1803 * If we are not running and we are not going to reschedule soon, we should
1804 * try to push tasks away now 1804 * try to push tasks away now
1805 */ 1805 */
1806 static void task_woken_rt(struct rq *rq, struct task_struct *p) 1806 static void task_woken_rt(struct rq *rq, struct task_struct *p)
1807 { 1807 {
1808 if (!task_running(rq, p) && 1808 if (!task_running(rq, p) &&
1809 !test_tsk_need_resched(rq->curr) && 1809 !test_tsk_need_resched(rq->curr) &&
1810 has_pushable_tasks(rq) && 1810 has_pushable_tasks(rq) &&
1811 p->nr_cpus_allowed > 1 && 1811 p->nr_cpus_allowed > 1 &&
1812 rt_task(rq->curr) && 1812 rt_task(rq->curr) &&
1813 (rq->curr->nr_cpus_allowed < 2 || 1813 (rq->curr->nr_cpus_allowed < 2 ||
1814 rq->curr->prio <= p->prio)) 1814 rq->curr->prio <= p->prio))
1815 push_rt_tasks(rq); 1815 push_rt_tasks(rq);
1816 } 1816 }
1817 1817
1818 static void set_cpus_allowed_rt(struct task_struct *p, 1818 static void set_cpus_allowed_rt(struct task_struct *p,
1819 const struct cpumask *new_mask) 1819 const struct cpumask *new_mask)
1820 { 1820 {
1821 struct rq *rq; 1821 struct rq *rq;
1822 int weight; 1822 int weight;
1823 1823
1824 BUG_ON(!rt_task(p)); 1824 BUG_ON(!rt_task(p));
1825 1825
1826 if (!p->on_rq) 1826 if (!p->on_rq)
1827 return; 1827 return;
1828 1828
1829 weight = cpumask_weight(new_mask); 1829 weight = cpumask_weight(new_mask);
1830 1830
1831 /* 1831 /*
1832 * Only update if the process changes its state from whether it 1832 * Only update if the process changes its state from whether it
1833 * can migrate or not. 1833 * can migrate or not.
1834 */ 1834 */
1835 if ((p->nr_cpus_allowed > 1) == (weight > 1)) 1835 if ((p->nr_cpus_allowed > 1) == (weight > 1))
1836 return; 1836 return;
1837 1837
1838 rq = task_rq(p); 1838 rq = task_rq(p);
1839 1839
1840 /* 1840 /*
1841 * The process used to be able to migrate OR it can now migrate 1841 * The process used to be able to migrate OR it can now migrate
1842 */ 1842 */
1843 if (weight <= 1) { 1843 if (weight <= 1) {
1844 if (!task_current(rq, p)) 1844 if (!task_current(rq, p))
1845 dequeue_pushable_task(rq, p); 1845 dequeue_pushable_task(rq, p);
1846 BUG_ON(!rq->rt.rt_nr_migratory); 1846 BUG_ON(!rq->rt.rt_nr_migratory);
1847 rq->rt.rt_nr_migratory--; 1847 rq->rt.rt_nr_migratory--;
1848 } else { 1848 } else {
1849 if (!task_current(rq, p)) 1849 if (!task_current(rq, p))
1850 enqueue_pushable_task(rq, p); 1850 enqueue_pushable_task(rq, p);
1851 rq->rt.rt_nr_migratory++; 1851 rq->rt.rt_nr_migratory++;
1852 } 1852 }
1853 1853
1854 update_rt_migration(&rq->rt); 1854 update_rt_migration(&rq->rt);
1855 } 1855 }
1856 1856
1857 /* Assumes rq->lock is held */ 1857 /* Assumes rq->lock is held */
1858 static void rq_online_rt(struct rq *rq) 1858 static void rq_online_rt(struct rq *rq)
1859 { 1859 {
1860 if (rq->rt.overloaded) 1860 if (rq->rt.overloaded)
1861 rt_set_overload(rq); 1861 rt_set_overload(rq);
1862 1862
1863 __enable_runtime(rq); 1863 __enable_runtime(rq);
1864 1864
1865 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr); 1865 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
1866 } 1866 }
1867 1867
1868 /* Assumes rq->lock is held */ 1868 /* Assumes rq->lock is held */
1869 static void rq_offline_rt(struct rq *rq) 1869 static void rq_offline_rt(struct rq *rq)
1870 { 1870 {
1871 if (rq->rt.overloaded) 1871 if (rq->rt.overloaded)
1872 rt_clear_overload(rq); 1872 rt_clear_overload(rq);
1873 1873
1874 __disable_runtime(rq); 1874 __disable_runtime(rq);
1875 1875
1876 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); 1876 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
1877 } 1877 }
1878 1878
1879 /* 1879 /*
1880 * When switch from the rt queue, we bring ourselves to a position 1880 * When switch from the rt queue, we bring ourselves to a position
1881 * that we might want to pull RT tasks from other runqueues. 1881 * that we might want to pull RT tasks from other runqueues.
1882 */ 1882 */
1883 static void switched_from_rt(struct rq *rq, struct task_struct *p) 1883 static void switched_from_rt(struct rq *rq, struct task_struct *p)
1884 { 1884 {
1885 /* 1885 /*
1886 * If there are other RT tasks then we will reschedule 1886 * If there are other RT tasks then we will reschedule
1887 * and the scheduling of the other RT tasks will handle 1887 * and the scheduling of the other RT tasks will handle
1888 * the balancing. But if we are the last RT task 1888 * the balancing. But if we are the last RT task
1889 * we may need to handle the pulling of RT tasks 1889 * we may need to handle the pulling of RT tasks
1890 * now. 1890 * now.
1891 */ 1891 */
1892 if (p->on_rq && !rq->rt.rt_nr_running) 1892 if (!p->on_rq || rq->rt.rt_nr_running)
1893 pull_rt_task(rq); 1893 return;
1894
1895 if (pull_rt_task(rq))
1896 resched_task(rq->curr);
1894 } 1897 }
1895 1898
1896 void init_sched_rt_class(void) 1899 void init_sched_rt_class(void)
1897 { 1900 {
1898 unsigned int i; 1901 unsigned int i;
1899 1902
1900 for_each_possible_cpu(i) { 1903 for_each_possible_cpu(i) {
1901 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), 1904 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
1902 GFP_KERNEL, cpu_to_node(i)); 1905 GFP_KERNEL, cpu_to_node(i));
1903 } 1906 }
1904 } 1907 }
1905 #endif /* CONFIG_SMP */ 1908 #endif /* CONFIG_SMP */
1906 1909
1907 /* 1910 /*
1908 * When switching a task to RT, we may overload the runqueue 1911 * When switching a task to RT, we may overload the runqueue
1909 * with RT tasks. In this case we try to push them off to 1912 * with RT tasks. In this case we try to push them off to
1910 * other runqueues. 1913 * other runqueues.
1911 */ 1914 */
1912 static void switched_to_rt(struct rq *rq, struct task_struct *p) 1915 static void switched_to_rt(struct rq *rq, struct task_struct *p)
1913 { 1916 {
1914 int check_resched = 1; 1917 int check_resched = 1;
1915 1918
1916 /* 1919 /*
1917 * If we are already running, then there's nothing 1920 * If we are already running, then there's nothing
1918 * that needs to be done. But if we are not running 1921 * that needs to be done. But if we are not running
1919 * we may need to preempt the current running task. 1922 * we may need to preempt the current running task.
1920 * If that current running task is also an RT task 1923 * If that current running task is also an RT task
1921 * then see if we can move to another run queue. 1924 * then see if we can move to another run queue.
1922 */ 1925 */
1923 if (p->on_rq && rq->curr != p) { 1926 if (p->on_rq && rq->curr != p) {
1924 #ifdef CONFIG_SMP 1927 #ifdef CONFIG_SMP
1925 if (rq->rt.overloaded && push_rt_task(rq) && 1928 if (rq->rt.overloaded && push_rt_task(rq) &&
1926 /* Don't resched if we changed runqueues */ 1929 /* Don't resched if we changed runqueues */
1927 rq != task_rq(p)) 1930 rq != task_rq(p))
1928 check_resched = 0; 1931 check_resched = 0;
1929 #endif /* CONFIG_SMP */ 1932 #endif /* CONFIG_SMP */
1930 if (check_resched && p->prio < rq->curr->prio) 1933 if (check_resched && p->prio < rq->curr->prio)
1931 resched_task(rq->curr); 1934 resched_task(rq->curr);
1932 } 1935 }
1933 } 1936 }
1934 1937
1935 /* 1938 /*
1936 * Priority of the task has changed. This may cause 1939 * Priority of the task has changed. This may cause
1937 * us to initiate a push or pull. 1940 * us to initiate a push or pull.
1938 */ 1941 */
1939 static void 1942 static void
1940 prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) 1943 prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
1941 { 1944 {
1942 if (!p->on_rq) 1945 if (!p->on_rq)
1943 return; 1946 return;
1944 1947
1945 if (rq->curr == p) { 1948 if (rq->curr == p) {
1946 #ifdef CONFIG_SMP 1949 #ifdef CONFIG_SMP
1947 /* 1950 /*
1948 * If our priority decreases while running, we 1951 * If our priority decreases while running, we
1949 * may need to pull tasks to this runqueue. 1952 * may need to pull tasks to this runqueue.
1950 */ 1953 */
1951 if (oldprio < p->prio) 1954 if (oldprio < p->prio)
1952 pull_rt_task(rq); 1955 pull_rt_task(rq);
1953 /* 1956 /*
1954 * If there's a higher priority task waiting to run 1957 * If there's a higher priority task waiting to run
1955 * then reschedule. Note, the above pull_rt_task 1958 * then reschedule. Note, the above pull_rt_task
1956 * can release the rq lock and p could migrate. 1959 * can release the rq lock and p could migrate.
1957 * Only reschedule if p is still on the same runqueue. 1960 * Only reschedule if p is still on the same runqueue.
1958 */ 1961 */
1959 if (p->prio > rq->rt.highest_prio.curr && rq->curr == p) 1962 if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
1960 resched_task(p); 1963 resched_task(p);
1961 #else 1964 #else
1962 /* For UP simply resched on drop of prio */ 1965 /* For UP simply resched on drop of prio */
1963 if (oldprio < p->prio) 1966 if (oldprio < p->prio)
1964 resched_task(p); 1967 resched_task(p);
1965 #endif /* CONFIG_SMP */ 1968 #endif /* CONFIG_SMP */
1966 } else { 1969 } else {
1967 /* 1970 /*
1968 * This task is not running, but if it is 1971 * This task is not running, but if it is
1969 * greater than the current running task 1972 * greater than the current running task
1970 * then reschedule. 1973 * then reschedule.
1971 */ 1974 */
1972 if (p->prio < rq->curr->prio) 1975 if (p->prio < rq->curr->prio)
1973 resched_task(rq->curr); 1976 resched_task(rq->curr);
1974 } 1977 }
1975 } 1978 }
1976 1979
1977 static void watchdog(struct rq *rq, struct task_struct *p) 1980 static void watchdog(struct rq *rq, struct task_struct *p)
1978 { 1981 {
1979 unsigned long soft, hard; 1982 unsigned long soft, hard;
1980 1983
1981 /* max may change after cur was read, this will be fixed next tick */ 1984 /* max may change after cur was read, this will be fixed next tick */
1982 soft = task_rlimit(p, RLIMIT_RTTIME); 1985 soft = task_rlimit(p, RLIMIT_RTTIME);
1983 hard = task_rlimit_max(p, RLIMIT_RTTIME); 1986 hard = task_rlimit_max(p, RLIMIT_RTTIME);
1984 1987
1985 if (soft != RLIM_INFINITY) { 1988 if (soft != RLIM_INFINITY) {
1986 unsigned long next; 1989 unsigned long next;
1987 1990
1988 p->rt.timeout++; 1991 p->rt.timeout++;
1989 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); 1992 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
1990 if (p->rt.timeout > next) 1993 if (p->rt.timeout > next)
1991 p->cputime_expires.sched_exp = p->se.sum_exec_runtime; 1994 p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
1992 } 1995 }
1993 } 1996 }
1994 1997
1995 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) 1998 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
1996 { 1999 {
1997 struct sched_rt_entity *rt_se = &p->rt; 2000 struct sched_rt_entity *rt_se = &p->rt;
1998 2001
1999 update_curr_rt(rq); 2002 update_curr_rt(rq);
2000 2003
2001 watchdog(rq, p); 2004 watchdog(rq, p);
2002 2005
2003 /* 2006 /*
2004 * RR tasks need a special form of timeslice management. 2007 * RR tasks need a special form of timeslice management.
2005 * FIFO tasks have no timeslices. 2008 * FIFO tasks have no timeslices.
2006 */ 2009 */
2007 if (p->policy != SCHED_RR) 2010 if (p->policy != SCHED_RR)
2008 return; 2011 return;
2009 2012
2010 if (--p->rt.time_slice) 2013 if (--p->rt.time_slice)
2011 return; 2014 return;
2012 2015
2013 p->rt.time_slice = RR_TIMESLICE; 2016 p->rt.time_slice = RR_TIMESLICE;
2014 2017
2015 /* 2018 /*
2016 * Requeue to the end of queue if we (and all of our ancestors) are the 2019 * Requeue to the end of queue if we (and all of our ancestors) are the
2017 * only element on the queue 2020 * only element on the queue
2018 */ 2021 */
2019 for_each_sched_rt_entity(rt_se) { 2022 for_each_sched_rt_entity(rt_se) {
2020 if (rt_se->run_list.prev != rt_se->run_list.next) { 2023 if (rt_se->run_list.prev != rt_se->run_list.next) {
2021 requeue_task_rt(rq, p, 0); 2024 requeue_task_rt(rq, p, 0);
2022 set_tsk_need_resched(p); 2025 set_tsk_need_resched(p);
2023 return; 2026 return;
2024 } 2027 }
2025 } 2028 }
2026 } 2029 }
2027 2030
2028 static void set_curr_task_rt(struct rq *rq) 2031 static void set_curr_task_rt(struct rq *rq)
2029 { 2032 {
2030 struct task_struct *p = rq->curr; 2033 struct task_struct *p = rq->curr;
2031 2034
2032 p->se.exec_start = rq->clock_task; 2035 p->se.exec_start = rq->clock_task;
2033 2036
2034 /* The running task is never eligible for pushing */ 2037 /* The running task is never eligible for pushing */
2035 dequeue_pushable_task(rq, p); 2038 dequeue_pushable_task(rq, p);
2036 } 2039 }
2037 2040
2038 static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) 2041 static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
2039 { 2042 {
2040 /* 2043 /*
2041 * Time slice is 0 for SCHED_FIFO tasks 2044 * Time slice is 0 for SCHED_FIFO tasks
2042 */ 2045 */
2043 if (task->policy == SCHED_RR) 2046 if (task->policy == SCHED_RR)
2044 return RR_TIMESLICE; 2047 return RR_TIMESLICE;
2045 else 2048 else
2046 return 0; 2049 return 0;
2047 } 2050 }
2048 2051
2049 const struct sched_class rt_sched_class = { 2052 const struct sched_class rt_sched_class = {
2050 .next = &fair_sched_class, 2053 .next = &fair_sched_class,
2051 .enqueue_task = enqueue_task_rt, 2054 .enqueue_task = enqueue_task_rt,
2052 .dequeue_task = dequeue_task_rt, 2055 .dequeue_task = dequeue_task_rt,
2053 .yield_task = yield_task_rt, 2056 .yield_task = yield_task_rt,
2054 2057
2055 .check_preempt_curr = check_preempt_curr_rt, 2058 .check_preempt_curr = check_preempt_curr_rt,
2056 2059
2057 .pick_next_task = pick_next_task_rt, 2060 .pick_next_task = pick_next_task_rt,
2058 .put_prev_task = put_prev_task_rt, 2061 .put_prev_task = put_prev_task_rt,
2059 2062
2060 #ifdef CONFIG_SMP 2063 #ifdef CONFIG_SMP
2061 .select_task_rq = select_task_rq_rt, 2064 .select_task_rq = select_task_rq_rt,
2062 2065
2063 .set_cpus_allowed = set_cpus_allowed_rt, 2066 .set_cpus_allowed = set_cpus_allowed_rt,
2064 .rq_online = rq_online_rt, 2067 .rq_online = rq_online_rt,
2065 .rq_offline = rq_offline_rt, 2068 .rq_offline = rq_offline_rt,
2066 .pre_schedule = pre_schedule_rt, 2069 .pre_schedule = pre_schedule_rt,
2067 .post_schedule = post_schedule_rt, 2070 .post_schedule = post_schedule_rt,
2068 .task_woken = task_woken_rt, 2071 .task_woken = task_woken_rt,
2069 .switched_from = switched_from_rt, 2072 .switched_from = switched_from_rt,
2070 #endif 2073 #endif
2071 2074
2072 .set_curr_task = set_curr_task_rt, 2075 .set_curr_task = set_curr_task_rt,
2073 .task_tick = task_tick_rt, 2076 .task_tick = task_tick_rt,
2074 2077
2075 .get_rr_interval = get_rr_interval_rt, 2078 .get_rr_interval = get_rr_interval_rt,
2076 2079
2077 .prio_changed = prio_changed_rt, 2080 .prio_changed = prio_changed_rt,
2078 .switched_to = switched_to_rt, 2081 .switched_to = switched_to_rt,
2079 }; 2082 };
2080 2083
2081 #ifdef CONFIG_SCHED_DEBUG 2084 #ifdef CONFIG_SCHED_DEBUG
2082 extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); 2085 extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
2083 2086
2084 void print_rt_stats(struct seq_file *m, int cpu) 2087 void print_rt_stats(struct seq_file *m, int cpu)
2085 { 2088 {
2086 rt_rq_iter_t iter; 2089 rt_rq_iter_t iter;
2087 struct rt_rq *rt_rq; 2090 struct rt_rq *rt_rq;
2088 2091
2089 rcu_read_lock(); 2092 rcu_read_lock();
2090 for_each_rt_rq(rt_rq, iter, cpu_rq(cpu)) 2093 for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
2091 print_rt_rq(m, cpu, rt_rq); 2094 print_rt_rq(m, cpu, rt_rq);
2092 rcu_read_unlock(); 2095 rcu_read_unlock();
2093 } 2096 }
2094 #endif /* CONFIG_SCHED_DEBUG */ 2097 #endif /* CONFIG_SCHED_DEBUG */
2095 2098