Commit 0f397f2c90ce68821ee864c2c53baafe78de765d
Committed by
Ingo Molnar
1 parent
b14ed2c273
Exists in
ti-lsk-linux-4.1.y
and in
12 other branches
sched/dl: Fix race in dl_task_timer()
Throttled task is still on rq, and it may be moved to other cpu if user is playing with sched_setaffinity(). Therefore, unlocked task_rq() access makes the race. Juri Lelli reports he got this race when dl_bandwidth_enabled() was not set. Other thing, pointed by Peter Zijlstra: "Now I suppose the problem can still actually happen when you change the root domain and trigger a effective affinity change that way". To fix that we do the same as made in __task_rq_lock(). We do not use __task_rq_lock() itself, because it has a useful lockdep check, which is not correct in case of dl_task_timer(). We do not need pi_lock locked here. This case is an exception (PeterZ): "The only reason we don't strictly need ->pi_lock now is because we're guaranteed to have p->state == TASK_RUNNING here and are thus free of ttwu races". Signed-off-by: Kirill Tkhai <tkhai@yandex.ru> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: <stable@vger.kernel.org> # v3.14+ Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/3056991400578422@web14g.yandex.ru Signed-off-by: Ingo Molnar <mingo@kernel.org>
Showing 1 changed file with 9 additions and 1 deletions Inline Diff
kernel/sched/deadline.c
1 | /* | 1 | /* |
2 | * Deadline Scheduling Class (SCHED_DEADLINE) | 2 | * Deadline Scheduling Class (SCHED_DEADLINE) |
3 | * | 3 | * |
4 | * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS). | 4 | * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS). |
5 | * | 5 | * |
6 | * Tasks that periodically executes their instances for less than their | 6 | * Tasks that periodically executes their instances for less than their |
7 | * runtime won't miss any of their deadlines. | 7 | * runtime won't miss any of their deadlines. |
8 | * Tasks that are not periodic or sporadic or that tries to execute more | 8 | * Tasks that are not periodic or sporadic or that tries to execute more |
9 | * than their reserved bandwidth will be slowed down (and may potentially | 9 | * than their reserved bandwidth will be slowed down (and may potentially |
10 | * miss some of their deadlines), and won't affect any other task. | 10 | * miss some of their deadlines), and won't affect any other task. |
11 | * | 11 | * |
12 | * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>, | 12 | * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>, |
13 | * Juri Lelli <juri.lelli@gmail.com>, | 13 | * Juri Lelli <juri.lelli@gmail.com>, |
14 | * Michael Trimarchi <michael@amarulasolutions.com>, | 14 | * Michael Trimarchi <michael@amarulasolutions.com>, |
15 | * Fabio Checconi <fchecconi@gmail.com> | 15 | * Fabio Checconi <fchecconi@gmail.com> |
16 | */ | 16 | */ |
17 | #include "sched.h" | 17 | #include "sched.h" |
18 | 18 | ||
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | 20 | ||
21 | struct dl_bandwidth def_dl_bandwidth; | 21 | struct dl_bandwidth def_dl_bandwidth; |
22 | 22 | ||
23 | static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) | 23 | static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) |
24 | { | 24 | { |
25 | return container_of(dl_se, struct task_struct, dl); | 25 | return container_of(dl_se, struct task_struct, dl); |
26 | } | 26 | } |
27 | 27 | ||
28 | static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq) | 28 | static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq) |
29 | { | 29 | { |
30 | return container_of(dl_rq, struct rq, dl); | 30 | return container_of(dl_rq, struct rq, dl); |
31 | } | 31 | } |
32 | 32 | ||
33 | static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se) | 33 | static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se) |
34 | { | 34 | { |
35 | struct task_struct *p = dl_task_of(dl_se); | 35 | struct task_struct *p = dl_task_of(dl_se); |
36 | struct rq *rq = task_rq(p); | 36 | struct rq *rq = task_rq(p); |
37 | 37 | ||
38 | return &rq->dl; | 38 | return &rq->dl; |
39 | } | 39 | } |
40 | 40 | ||
41 | static inline int on_dl_rq(struct sched_dl_entity *dl_se) | 41 | static inline int on_dl_rq(struct sched_dl_entity *dl_se) |
42 | { | 42 | { |
43 | return !RB_EMPTY_NODE(&dl_se->rb_node); | 43 | return !RB_EMPTY_NODE(&dl_se->rb_node); |
44 | } | 44 | } |
45 | 45 | ||
46 | static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq) | 46 | static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq) |
47 | { | 47 | { |
48 | struct sched_dl_entity *dl_se = &p->dl; | 48 | struct sched_dl_entity *dl_se = &p->dl; |
49 | 49 | ||
50 | return dl_rq->rb_leftmost == &dl_se->rb_node; | 50 | return dl_rq->rb_leftmost == &dl_se->rb_node; |
51 | } | 51 | } |
52 | 52 | ||
53 | void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime) | 53 | void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime) |
54 | { | 54 | { |
55 | raw_spin_lock_init(&dl_b->dl_runtime_lock); | 55 | raw_spin_lock_init(&dl_b->dl_runtime_lock); |
56 | dl_b->dl_period = period; | 56 | dl_b->dl_period = period; |
57 | dl_b->dl_runtime = runtime; | 57 | dl_b->dl_runtime = runtime; |
58 | } | 58 | } |
59 | 59 | ||
60 | extern unsigned long to_ratio(u64 period, u64 runtime); | 60 | extern unsigned long to_ratio(u64 period, u64 runtime); |
61 | 61 | ||
62 | void init_dl_bw(struct dl_bw *dl_b) | 62 | void init_dl_bw(struct dl_bw *dl_b) |
63 | { | 63 | { |
64 | raw_spin_lock_init(&dl_b->lock); | 64 | raw_spin_lock_init(&dl_b->lock); |
65 | raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock); | 65 | raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock); |
66 | if (global_rt_runtime() == RUNTIME_INF) | 66 | if (global_rt_runtime() == RUNTIME_INF) |
67 | dl_b->bw = -1; | 67 | dl_b->bw = -1; |
68 | else | 68 | else |
69 | dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime()); | 69 | dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime()); |
70 | raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock); | 70 | raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock); |
71 | dl_b->total_bw = 0; | 71 | dl_b->total_bw = 0; |
72 | } | 72 | } |
73 | 73 | ||
74 | void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq) | 74 | void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq) |
75 | { | 75 | { |
76 | dl_rq->rb_root = RB_ROOT; | 76 | dl_rq->rb_root = RB_ROOT; |
77 | 77 | ||
78 | #ifdef CONFIG_SMP | 78 | #ifdef CONFIG_SMP |
79 | /* zero means no -deadline tasks */ | 79 | /* zero means no -deadline tasks */ |
80 | dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0; | 80 | dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0; |
81 | 81 | ||
82 | dl_rq->dl_nr_migratory = 0; | 82 | dl_rq->dl_nr_migratory = 0; |
83 | dl_rq->overloaded = 0; | 83 | dl_rq->overloaded = 0; |
84 | dl_rq->pushable_dl_tasks_root = RB_ROOT; | 84 | dl_rq->pushable_dl_tasks_root = RB_ROOT; |
85 | #else | 85 | #else |
86 | init_dl_bw(&dl_rq->dl_bw); | 86 | init_dl_bw(&dl_rq->dl_bw); |
87 | #endif | 87 | #endif |
88 | } | 88 | } |
89 | 89 | ||
90 | #ifdef CONFIG_SMP | 90 | #ifdef CONFIG_SMP |
91 | 91 | ||
92 | static inline int dl_overloaded(struct rq *rq) | 92 | static inline int dl_overloaded(struct rq *rq) |
93 | { | 93 | { |
94 | return atomic_read(&rq->rd->dlo_count); | 94 | return atomic_read(&rq->rd->dlo_count); |
95 | } | 95 | } |
96 | 96 | ||
97 | static inline void dl_set_overload(struct rq *rq) | 97 | static inline void dl_set_overload(struct rq *rq) |
98 | { | 98 | { |
99 | if (!rq->online) | 99 | if (!rq->online) |
100 | return; | 100 | return; |
101 | 101 | ||
102 | cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask); | 102 | cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask); |
103 | /* | 103 | /* |
104 | * Must be visible before the overload count is | 104 | * Must be visible before the overload count is |
105 | * set (as in sched_rt.c). | 105 | * set (as in sched_rt.c). |
106 | * | 106 | * |
107 | * Matched by the barrier in pull_dl_task(). | 107 | * Matched by the barrier in pull_dl_task(). |
108 | */ | 108 | */ |
109 | smp_wmb(); | 109 | smp_wmb(); |
110 | atomic_inc(&rq->rd->dlo_count); | 110 | atomic_inc(&rq->rd->dlo_count); |
111 | } | 111 | } |
112 | 112 | ||
113 | static inline void dl_clear_overload(struct rq *rq) | 113 | static inline void dl_clear_overload(struct rq *rq) |
114 | { | 114 | { |
115 | if (!rq->online) | 115 | if (!rq->online) |
116 | return; | 116 | return; |
117 | 117 | ||
118 | atomic_dec(&rq->rd->dlo_count); | 118 | atomic_dec(&rq->rd->dlo_count); |
119 | cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask); | 119 | cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask); |
120 | } | 120 | } |
121 | 121 | ||
122 | static void update_dl_migration(struct dl_rq *dl_rq) | 122 | static void update_dl_migration(struct dl_rq *dl_rq) |
123 | { | 123 | { |
124 | if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) { | 124 | if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) { |
125 | if (!dl_rq->overloaded) { | 125 | if (!dl_rq->overloaded) { |
126 | dl_set_overload(rq_of_dl_rq(dl_rq)); | 126 | dl_set_overload(rq_of_dl_rq(dl_rq)); |
127 | dl_rq->overloaded = 1; | 127 | dl_rq->overloaded = 1; |
128 | } | 128 | } |
129 | } else if (dl_rq->overloaded) { | 129 | } else if (dl_rq->overloaded) { |
130 | dl_clear_overload(rq_of_dl_rq(dl_rq)); | 130 | dl_clear_overload(rq_of_dl_rq(dl_rq)); |
131 | dl_rq->overloaded = 0; | 131 | dl_rq->overloaded = 0; |
132 | } | 132 | } |
133 | } | 133 | } |
134 | 134 | ||
135 | static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | 135 | static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) |
136 | { | 136 | { |
137 | struct task_struct *p = dl_task_of(dl_se); | 137 | struct task_struct *p = dl_task_of(dl_se); |
138 | 138 | ||
139 | if (p->nr_cpus_allowed > 1) | 139 | if (p->nr_cpus_allowed > 1) |
140 | dl_rq->dl_nr_migratory++; | 140 | dl_rq->dl_nr_migratory++; |
141 | 141 | ||
142 | update_dl_migration(dl_rq); | 142 | update_dl_migration(dl_rq); |
143 | } | 143 | } |
144 | 144 | ||
145 | static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | 145 | static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) |
146 | { | 146 | { |
147 | struct task_struct *p = dl_task_of(dl_se); | 147 | struct task_struct *p = dl_task_of(dl_se); |
148 | 148 | ||
149 | if (p->nr_cpus_allowed > 1) | 149 | if (p->nr_cpus_allowed > 1) |
150 | dl_rq->dl_nr_migratory--; | 150 | dl_rq->dl_nr_migratory--; |
151 | 151 | ||
152 | update_dl_migration(dl_rq); | 152 | update_dl_migration(dl_rq); |
153 | } | 153 | } |
154 | 154 | ||
155 | /* | 155 | /* |
156 | * The list of pushable -deadline task is not a plist, like in | 156 | * The list of pushable -deadline task is not a plist, like in |
157 | * sched_rt.c, it is an rb-tree with tasks ordered by deadline. | 157 | * sched_rt.c, it is an rb-tree with tasks ordered by deadline. |
158 | */ | 158 | */ |
159 | static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) | 159 | static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) |
160 | { | 160 | { |
161 | struct dl_rq *dl_rq = &rq->dl; | 161 | struct dl_rq *dl_rq = &rq->dl; |
162 | struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node; | 162 | struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node; |
163 | struct rb_node *parent = NULL; | 163 | struct rb_node *parent = NULL; |
164 | struct task_struct *entry; | 164 | struct task_struct *entry; |
165 | int leftmost = 1; | 165 | int leftmost = 1; |
166 | 166 | ||
167 | BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks)); | 167 | BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks)); |
168 | 168 | ||
169 | while (*link) { | 169 | while (*link) { |
170 | parent = *link; | 170 | parent = *link; |
171 | entry = rb_entry(parent, struct task_struct, | 171 | entry = rb_entry(parent, struct task_struct, |
172 | pushable_dl_tasks); | 172 | pushable_dl_tasks); |
173 | if (dl_entity_preempt(&p->dl, &entry->dl)) | 173 | if (dl_entity_preempt(&p->dl, &entry->dl)) |
174 | link = &parent->rb_left; | 174 | link = &parent->rb_left; |
175 | else { | 175 | else { |
176 | link = &parent->rb_right; | 176 | link = &parent->rb_right; |
177 | leftmost = 0; | 177 | leftmost = 0; |
178 | } | 178 | } |
179 | } | 179 | } |
180 | 180 | ||
181 | if (leftmost) | 181 | if (leftmost) |
182 | dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks; | 182 | dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks; |
183 | 183 | ||
184 | rb_link_node(&p->pushable_dl_tasks, parent, link); | 184 | rb_link_node(&p->pushable_dl_tasks, parent, link); |
185 | rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); | 185 | rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); |
186 | } | 186 | } |
187 | 187 | ||
188 | static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) | 188 | static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) |
189 | { | 189 | { |
190 | struct dl_rq *dl_rq = &rq->dl; | 190 | struct dl_rq *dl_rq = &rq->dl; |
191 | 191 | ||
192 | if (RB_EMPTY_NODE(&p->pushable_dl_tasks)) | 192 | if (RB_EMPTY_NODE(&p->pushable_dl_tasks)) |
193 | return; | 193 | return; |
194 | 194 | ||
195 | if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) { | 195 | if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) { |
196 | struct rb_node *next_node; | 196 | struct rb_node *next_node; |
197 | 197 | ||
198 | next_node = rb_next(&p->pushable_dl_tasks); | 198 | next_node = rb_next(&p->pushable_dl_tasks); |
199 | dl_rq->pushable_dl_tasks_leftmost = next_node; | 199 | dl_rq->pushable_dl_tasks_leftmost = next_node; |
200 | } | 200 | } |
201 | 201 | ||
202 | rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); | 202 | rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); |
203 | RB_CLEAR_NODE(&p->pushable_dl_tasks); | 203 | RB_CLEAR_NODE(&p->pushable_dl_tasks); |
204 | } | 204 | } |
205 | 205 | ||
206 | static inline int has_pushable_dl_tasks(struct rq *rq) | 206 | static inline int has_pushable_dl_tasks(struct rq *rq) |
207 | { | 207 | { |
208 | return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root); | 208 | return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root); |
209 | } | 209 | } |
210 | 210 | ||
211 | static int push_dl_task(struct rq *rq); | 211 | static int push_dl_task(struct rq *rq); |
212 | 212 | ||
213 | static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev) | 213 | static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev) |
214 | { | 214 | { |
215 | return dl_task(prev); | 215 | return dl_task(prev); |
216 | } | 216 | } |
217 | 217 | ||
218 | static inline void set_post_schedule(struct rq *rq) | 218 | static inline void set_post_schedule(struct rq *rq) |
219 | { | 219 | { |
220 | rq->post_schedule = has_pushable_dl_tasks(rq); | 220 | rq->post_schedule = has_pushable_dl_tasks(rq); |
221 | } | 221 | } |
222 | 222 | ||
223 | #else | 223 | #else |
224 | 224 | ||
225 | static inline | 225 | static inline |
226 | void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) | 226 | void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) |
227 | { | 227 | { |
228 | } | 228 | } |
229 | 229 | ||
230 | static inline | 230 | static inline |
231 | void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) | 231 | void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) |
232 | { | 232 | { |
233 | } | 233 | } |
234 | 234 | ||
235 | static inline | 235 | static inline |
236 | void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | 236 | void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) |
237 | { | 237 | { |
238 | } | 238 | } |
239 | 239 | ||
240 | static inline | 240 | static inline |
241 | void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | 241 | void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) |
242 | { | 242 | { |
243 | } | 243 | } |
244 | 244 | ||
245 | static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev) | 245 | static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev) |
246 | { | 246 | { |
247 | return false; | 247 | return false; |
248 | } | 248 | } |
249 | 249 | ||
250 | static inline int pull_dl_task(struct rq *rq) | 250 | static inline int pull_dl_task(struct rq *rq) |
251 | { | 251 | { |
252 | return 0; | 252 | return 0; |
253 | } | 253 | } |
254 | 254 | ||
255 | static inline void set_post_schedule(struct rq *rq) | 255 | static inline void set_post_schedule(struct rq *rq) |
256 | { | 256 | { |
257 | } | 257 | } |
258 | #endif /* CONFIG_SMP */ | 258 | #endif /* CONFIG_SMP */ |
259 | 259 | ||
260 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags); | 260 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags); |
261 | static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags); | 261 | static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags); |
262 | static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, | 262 | static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, |
263 | int flags); | 263 | int flags); |
264 | 264 | ||
265 | /* | 265 | /* |
266 | * We are being explicitly informed that a new instance is starting, | 266 | * We are being explicitly informed that a new instance is starting, |
267 | * and this means that: | 267 | * and this means that: |
268 | * - the absolute deadline of the entity has to be placed at | 268 | * - the absolute deadline of the entity has to be placed at |
269 | * current time + relative deadline; | 269 | * current time + relative deadline; |
270 | * - the runtime of the entity has to be set to the maximum value. | 270 | * - the runtime of the entity has to be set to the maximum value. |
271 | * | 271 | * |
272 | * The capability of specifying such event is useful whenever a -deadline | 272 | * The capability of specifying such event is useful whenever a -deadline |
273 | * entity wants to (try to!) synchronize its behaviour with the scheduler's | 273 | * entity wants to (try to!) synchronize its behaviour with the scheduler's |
274 | * one, and to (try to!) reconcile itself with its own scheduling | 274 | * one, and to (try to!) reconcile itself with its own scheduling |
275 | * parameters. | 275 | * parameters. |
276 | */ | 276 | */ |
277 | static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se, | 277 | static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se, |
278 | struct sched_dl_entity *pi_se) | 278 | struct sched_dl_entity *pi_se) |
279 | { | 279 | { |
280 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | 280 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); |
281 | struct rq *rq = rq_of_dl_rq(dl_rq); | 281 | struct rq *rq = rq_of_dl_rq(dl_rq); |
282 | 282 | ||
283 | WARN_ON(!dl_se->dl_new || dl_se->dl_throttled); | 283 | WARN_ON(!dl_se->dl_new || dl_se->dl_throttled); |
284 | 284 | ||
285 | /* | 285 | /* |
286 | * We use the regular wall clock time to set deadlines in the | 286 | * We use the regular wall clock time to set deadlines in the |
287 | * future; in fact, we must consider execution overheads (time | 287 | * future; in fact, we must consider execution overheads (time |
288 | * spent on hardirq context, etc.). | 288 | * spent on hardirq context, etc.). |
289 | */ | 289 | */ |
290 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; | 290 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; |
291 | dl_se->runtime = pi_se->dl_runtime; | 291 | dl_se->runtime = pi_se->dl_runtime; |
292 | dl_se->dl_new = 0; | 292 | dl_se->dl_new = 0; |
293 | } | 293 | } |
294 | 294 | ||
295 | /* | 295 | /* |
296 | * Pure Earliest Deadline First (EDF) scheduling does not deal with the | 296 | * Pure Earliest Deadline First (EDF) scheduling does not deal with the |
297 | * possibility of a entity lasting more than what it declared, and thus | 297 | * possibility of a entity lasting more than what it declared, and thus |
298 | * exhausting its runtime. | 298 | * exhausting its runtime. |
299 | * | 299 | * |
300 | * Here we are interested in making runtime overrun possible, but we do | 300 | * Here we are interested in making runtime overrun possible, but we do |
301 | * not want a entity which is misbehaving to affect the scheduling of all | 301 | * not want a entity which is misbehaving to affect the scheduling of all |
302 | * other entities. | 302 | * other entities. |
303 | * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS) | 303 | * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS) |
304 | * is used, in order to confine each entity within its own bandwidth. | 304 | * is used, in order to confine each entity within its own bandwidth. |
305 | * | 305 | * |
306 | * This function deals exactly with that, and ensures that when the runtime | 306 | * This function deals exactly with that, and ensures that when the runtime |
307 | * of a entity is replenished, its deadline is also postponed. That ensures | 307 | * of a entity is replenished, its deadline is also postponed. That ensures |
308 | * the overrunning entity can't interfere with other entity in the system and | 308 | * the overrunning entity can't interfere with other entity in the system and |
309 | * can't make them miss their deadlines. Reasons why this kind of overruns | 309 | * can't make them miss their deadlines. Reasons why this kind of overruns |
310 | * could happen are, typically, a entity voluntarily trying to overcome its | 310 | * could happen are, typically, a entity voluntarily trying to overcome its |
311 | * runtime, or it just underestimated it during sched_setscheduler_ex(). | 311 | * runtime, or it just underestimated it during sched_setscheduler_ex(). |
312 | */ | 312 | */ |
313 | static void replenish_dl_entity(struct sched_dl_entity *dl_se, | 313 | static void replenish_dl_entity(struct sched_dl_entity *dl_se, |
314 | struct sched_dl_entity *pi_se) | 314 | struct sched_dl_entity *pi_se) |
315 | { | 315 | { |
316 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | 316 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); |
317 | struct rq *rq = rq_of_dl_rq(dl_rq); | 317 | struct rq *rq = rq_of_dl_rq(dl_rq); |
318 | 318 | ||
319 | BUG_ON(pi_se->dl_runtime <= 0); | 319 | BUG_ON(pi_se->dl_runtime <= 0); |
320 | 320 | ||
321 | /* | 321 | /* |
322 | * This could be the case for a !-dl task that is boosted. | 322 | * This could be the case for a !-dl task that is boosted. |
323 | * Just go with full inherited parameters. | 323 | * Just go with full inherited parameters. |
324 | */ | 324 | */ |
325 | if (dl_se->dl_deadline == 0) { | 325 | if (dl_se->dl_deadline == 0) { |
326 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; | 326 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; |
327 | dl_se->runtime = pi_se->dl_runtime; | 327 | dl_se->runtime = pi_se->dl_runtime; |
328 | } | 328 | } |
329 | 329 | ||
330 | /* | 330 | /* |
331 | * We keep moving the deadline away until we get some | 331 | * We keep moving the deadline away until we get some |
332 | * available runtime for the entity. This ensures correct | 332 | * available runtime for the entity. This ensures correct |
333 | * handling of situations where the runtime overrun is | 333 | * handling of situations where the runtime overrun is |
334 | * arbitrary large. | 334 | * arbitrary large. |
335 | */ | 335 | */ |
336 | while (dl_se->runtime <= 0) { | 336 | while (dl_se->runtime <= 0) { |
337 | dl_se->deadline += pi_se->dl_period; | 337 | dl_se->deadline += pi_se->dl_period; |
338 | dl_se->runtime += pi_se->dl_runtime; | 338 | dl_se->runtime += pi_se->dl_runtime; |
339 | } | 339 | } |
340 | 340 | ||
341 | /* | 341 | /* |
342 | * At this point, the deadline really should be "in | 342 | * At this point, the deadline really should be "in |
343 | * the future" with respect to rq->clock. If it's | 343 | * the future" with respect to rq->clock. If it's |
344 | * not, we are, for some reason, lagging too much! | 344 | * not, we are, for some reason, lagging too much! |
345 | * Anyway, after having warn userspace abut that, | 345 | * Anyway, after having warn userspace abut that, |
346 | * we still try to keep the things running by | 346 | * we still try to keep the things running by |
347 | * resetting the deadline and the budget of the | 347 | * resetting the deadline and the budget of the |
348 | * entity. | 348 | * entity. |
349 | */ | 349 | */ |
350 | if (dl_time_before(dl_se->deadline, rq_clock(rq))) { | 350 | if (dl_time_before(dl_se->deadline, rq_clock(rq))) { |
351 | static bool lag_once = false; | 351 | static bool lag_once = false; |
352 | 352 | ||
353 | if (!lag_once) { | 353 | if (!lag_once) { |
354 | lag_once = true; | 354 | lag_once = true; |
355 | printk_sched("sched: DL replenish lagged to much\n"); | 355 | printk_sched("sched: DL replenish lagged to much\n"); |
356 | } | 356 | } |
357 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; | 357 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; |
358 | dl_se->runtime = pi_se->dl_runtime; | 358 | dl_se->runtime = pi_se->dl_runtime; |
359 | } | 359 | } |
360 | } | 360 | } |
361 | 361 | ||
362 | /* | 362 | /* |
363 | * Here we check if --at time t-- an entity (which is probably being | 363 | * Here we check if --at time t-- an entity (which is probably being |
364 | * [re]activated or, in general, enqueued) can use its remaining runtime | 364 | * [re]activated or, in general, enqueued) can use its remaining runtime |
365 | * and its current deadline _without_ exceeding the bandwidth it is | 365 | * and its current deadline _without_ exceeding the bandwidth it is |
366 | * assigned (function returns true if it can't). We are in fact applying | 366 | * assigned (function returns true if it can't). We are in fact applying |
367 | * one of the CBS rules: when a task wakes up, if the residual runtime | 367 | * one of the CBS rules: when a task wakes up, if the residual runtime |
368 | * over residual deadline fits within the allocated bandwidth, then we | 368 | * over residual deadline fits within the allocated bandwidth, then we |
369 | * can keep the current (absolute) deadline and residual budget without | 369 | * can keep the current (absolute) deadline and residual budget without |
370 | * disrupting the schedulability of the system. Otherwise, we should | 370 | * disrupting the schedulability of the system. Otherwise, we should |
371 | * refill the runtime and set the deadline a period in the future, | 371 | * refill the runtime and set the deadline a period in the future, |
372 | * because keeping the current (absolute) deadline of the task would | 372 | * because keeping the current (absolute) deadline of the task would |
373 | * result in breaking guarantees promised to other tasks (refer to | 373 | * result in breaking guarantees promised to other tasks (refer to |
374 | * Documentation/scheduler/sched-deadline.txt for more informations). | 374 | * Documentation/scheduler/sched-deadline.txt for more informations). |
375 | * | 375 | * |
376 | * This function returns true if: | 376 | * This function returns true if: |
377 | * | 377 | * |
378 | * runtime / (deadline - t) > dl_runtime / dl_period , | 378 | * runtime / (deadline - t) > dl_runtime / dl_period , |
379 | * | 379 | * |
380 | * IOW we can't recycle current parameters. | 380 | * IOW we can't recycle current parameters. |
381 | * | 381 | * |
382 | * Notice that the bandwidth check is done against the period. For | 382 | * Notice that the bandwidth check is done against the period. For |
383 | * task with deadline equal to period this is the same of using | 383 | * task with deadline equal to period this is the same of using |
384 | * dl_deadline instead of dl_period in the equation above. | 384 | * dl_deadline instead of dl_period in the equation above. |
385 | */ | 385 | */ |
386 | static bool dl_entity_overflow(struct sched_dl_entity *dl_se, | 386 | static bool dl_entity_overflow(struct sched_dl_entity *dl_se, |
387 | struct sched_dl_entity *pi_se, u64 t) | 387 | struct sched_dl_entity *pi_se, u64 t) |
388 | { | 388 | { |
389 | u64 left, right; | 389 | u64 left, right; |
390 | 390 | ||
391 | /* | 391 | /* |
392 | * left and right are the two sides of the equation above, | 392 | * left and right are the two sides of the equation above, |
393 | * after a bit of shuffling to use multiplications instead | 393 | * after a bit of shuffling to use multiplications instead |
394 | * of divisions. | 394 | * of divisions. |
395 | * | 395 | * |
396 | * Note that none of the time values involved in the two | 396 | * Note that none of the time values involved in the two |
397 | * multiplications are absolute: dl_deadline and dl_runtime | 397 | * multiplications are absolute: dl_deadline and dl_runtime |
398 | * are the relative deadline and the maximum runtime of each | 398 | * are the relative deadline and the maximum runtime of each |
399 | * instance, runtime is the runtime left for the last instance | 399 | * instance, runtime is the runtime left for the last instance |
400 | * and (deadline - t), since t is rq->clock, is the time left | 400 | * and (deadline - t), since t is rq->clock, is the time left |
401 | * to the (absolute) deadline. Even if overflowing the u64 type | 401 | * to the (absolute) deadline. Even if overflowing the u64 type |
402 | * is very unlikely to occur in both cases, here we scale down | 402 | * is very unlikely to occur in both cases, here we scale down |
403 | * as we want to avoid that risk at all. Scaling down by 10 | 403 | * as we want to avoid that risk at all. Scaling down by 10 |
404 | * means that we reduce granularity to 1us. We are fine with it, | 404 | * means that we reduce granularity to 1us. We are fine with it, |
405 | * since this is only a true/false check and, anyway, thinking | 405 | * since this is only a true/false check and, anyway, thinking |
406 | * of anything below microseconds resolution is actually fiction | 406 | * of anything below microseconds resolution is actually fiction |
407 | * (but still we want to give the user that illusion >;). | 407 | * (but still we want to give the user that illusion >;). |
408 | */ | 408 | */ |
409 | left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); | 409 | left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); |
410 | right = ((dl_se->deadline - t) >> DL_SCALE) * | 410 | right = ((dl_se->deadline - t) >> DL_SCALE) * |
411 | (pi_se->dl_runtime >> DL_SCALE); | 411 | (pi_se->dl_runtime >> DL_SCALE); |
412 | 412 | ||
413 | return dl_time_before(right, left); | 413 | return dl_time_before(right, left); |
414 | } | 414 | } |
415 | 415 | ||
416 | /* | 416 | /* |
417 | * When a -deadline entity is queued back on the runqueue, its runtime and | 417 | * When a -deadline entity is queued back on the runqueue, its runtime and |
418 | * deadline might need updating. | 418 | * deadline might need updating. |
419 | * | 419 | * |
420 | * The policy here is that we update the deadline of the entity only if: | 420 | * The policy here is that we update the deadline of the entity only if: |
421 | * - the current deadline is in the past, | 421 | * - the current deadline is in the past, |
422 | * - using the remaining runtime with the current deadline would make | 422 | * - using the remaining runtime with the current deadline would make |
423 | * the entity exceed its bandwidth. | 423 | * the entity exceed its bandwidth. |
424 | */ | 424 | */ |
425 | static void update_dl_entity(struct sched_dl_entity *dl_se, | 425 | static void update_dl_entity(struct sched_dl_entity *dl_se, |
426 | struct sched_dl_entity *pi_se) | 426 | struct sched_dl_entity *pi_se) |
427 | { | 427 | { |
428 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | 428 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); |
429 | struct rq *rq = rq_of_dl_rq(dl_rq); | 429 | struct rq *rq = rq_of_dl_rq(dl_rq); |
430 | 430 | ||
431 | /* | 431 | /* |
432 | * The arrival of a new instance needs special treatment, i.e., | 432 | * The arrival of a new instance needs special treatment, i.e., |
433 | * the actual scheduling parameters have to be "renewed". | 433 | * the actual scheduling parameters have to be "renewed". |
434 | */ | 434 | */ |
435 | if (dl_se->dl_new) { | 435 | if (dl_se->dl_new) { |
436 | setup_new_dl_entity(dl_se, pi_se); | 436 | setup_new_dl_entity(dl_se, pi_se); |
437 | return; | 437 | return; |
438 | } | 438 | } |
439 | 439 | ||
440 | if (dl_time_before(dl_se->deadline, rq_clock(rq)) || | 440 | if (dl_time_before(dl_se->deadline, rq_clock(rq)) || |
441 | dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) { | 441 | dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) { |
442 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; | 442 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; |
443 | dl_se->runtime = pi_se->dl_runtime; | 443 | dl_se->runtime = pi_se->dl_runtime; |
444 | } | 444 | } |
445 | } | 445 | } |
446 | 446 | ||
447 | /* | 447 | /* |
448 | * If the entity depleted all its runtime, and if we want it to sleep | 448 | * If the entity depleted all its runtime, and if we want it to sleep |
449 | * while waiting for some new execution time to become available, we | 449 | * while waiting for some new execution time to become available, we |
450 | * set the bandwidth enforcement timer to the replenishment instant | 450 | * set the bandwidth enforcement timer to the replenishment instant |
451 | * and try to activate it. | 451 | * and try to activate it. |
452 | * | 452 | * |
453 | * Notice that it is important for the caller to know if the timer | 453 | * Notice that it is important for the caller to know if the timer |
454 | * actually started or not (i.e., the replenishment instant is in | 454 | * actually started or not (i.e., the replenishment instant is in |
455 | * the future or in the past). | 455 | * the future or in the past). |
456 | */ | 456 | */ |
457 | static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted) | 457 | static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted) |
458 | { | 458 | { |
459 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | 459 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); |
460 | struct rq *rq = rq_of_dl_rq(dl_rq); | 460 | struct rq *rq = rq_of_dl_rq(dl_rq); |
461 | ktime_t now, act; | 461 | ktime_t now, act; |
462 | ktime_t soft, hard; | 462 | ktime_t soft, hard; |
463 | unsigned long range; | 463 | unsigned long range; |
464 | s64 delta; | 464 | s64 delta; |
465 | 465 | ||
466 | if (boosted) | 466 | if (boosted) |
467 | return 0; | 467 | return 0; |
468 | /* | 468 | /* |
469 | * We want the timer to fire at the deadline, but considering | 469 | * We want the timer to fire at the deadline, but considering |
470 | * that it is actually coming from rq->clock and not from | 470 | * that it is actually coming from rq->clock and not from |
471 | * hrtimer's time base reading. | 471 | * hrtimer's time base reading. |
472 | */ | 472 | */ |
473 | act = ns_to_ktime(dl_se->deadline); | 473 | act = ns_to_ktime(dl_se->deadline); |
474 | now = hrtimer_cb_get_time(&dl_se->dl_timer); | 474 | now = hrtimer_cb_get_time(&dl_se->dl_timer); |
475 | delta = ktime_to_ns(now) - rq_clock(rq); | 475 | delta = ktime_to_ns(now) - rq_clock(rq); |
476 | act = ktime_add_ns(act, delta); | 476 | act = ktime_add_ns(act, delta); |
477 | 477 | ||
478 | /* | 478 | /* |
479 | * If the expiry time already passed, e.g., because the value | 479 | * If the expiry time already passed, e.g., because the value |
480 | * chosen as the deadline is too small, don't even try to | 480 | * chosen as the deadline is too small, don't even try to |
481 | * start the timer in the past! | 481 | * start the timer in the past! |
482 | */ | 482 | */ |
483 | if (ktime_us_delta(act, now) < 0) | 483 | if (ktime_us_delta(act, now) < 0) |
484 | return 0; | 484 | return 0; |
485 | 485 | ||
486 | hrtimer_set_expires(&dl_se->dl_timer, act); | 486 | hrtimer_set_expires(&dl_se->dl_timer, act); |
487 | 487 | ||
488 | soft = hrtimer_get_softexpires(&dl_se->dl_timer); | 488 | soft = hrtimer_get_softexpires(&dl_se->dl_timer); |
489 | hard = hrtimer_get_expires(&dl_se->dl_timer); | 489 | hard = hrtimer_get_expires(&dl_se->dl_timer); |
490 | range = ktime_to_ns(ktime_sub(hard, soft)); | 490 | range = ktime_to_ns(ktime_sub(hard, soft)); |
491 | __hrtimer_start_range_ns(&dl_se->dl_timer, soft, | 491 | __hrtimer_start_range_ns(&dl_se->dl_timer, soft, |
492 | range, HRTIMER_MODE_ABS, 0); | 492 | range, HRTIMER_MODE_ABS, 0); |
493 | 493 | ||
494 | return hrtimer_active(&dl_se->dl_timer); | 494 | return hrtimer_active(&dl_se->dl_timer); |
495 | } | 495 | } |
496 | 496 | ||
497 | /* | 497 | /* |
498 | * This is the bandwidth enforcement timer callback. If here, we know | 498 | * This is the bandwidth enforcement timer callback. If here, we know |
499 | * a task is not on its dl_rq, since the fact that the timer was running | 499 | * a task is not on its dl_rq, since the fact that the timer was running |
500 | * means the task is throttled and needs a runtime replenishment. | 500 | * means the task is throttled and needs a runtime replenishment. |
501 | * | 501 | * |
502 | * However, what we actually do depends on the fact the task is active, | 502 | * However, what we actually do depends on the fact the task is active, |
503 | * (it is on its rq) or has been removed from there by a call to | 503 | * (it is on its rq) or has been removed from there by a call to |
504 | * dequeue_task_dl(). In the former case we must issue the runtime | 504 | * dequeue_task_dl(). In the former case we must issue the runtime |
505 | * replenishment and add the task back to the dl_rq; in the latter, we just | 505 | * replenishment and add the task back to the dl_rq; in the latter, we just |
506 | * do nothing but clearing dl_throttled, so that runtime and deadline | 506 | * do nothing but clearing dl_throttled, so that runtime and deadline |
507 | * updating (and the queueing back to dl_rq) will be done by the | 507 | * updating (and the queueing back to dl_rq) will be done by the |
508 | * next call to enqueue_task_dl(). | 508 | * next call to enqueue_task_dl(). |
509 | */ | 509 | */ |
510 | static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) | 510 | static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) |
511 | { | 511 | { |
512 | struct sched_dl_entity *dl_se = container_of(timer, | 512 | struct sched_dl_entity *dl_se = container_of(timer, |
513 | struct sched_dl_entity, | 513 | struct sched_dl_entity, |
514 | dl_timer); | 514 | dl_timer); |
515 | struct task_struct *p = dl_task_of(dl_se); | 515 | struct task_struct *p = dl_task_of(dl_se); |
516 | struct rq *rq = task_rq(p); | 516 | struct rq *rq; |
517 | again: | ||
518 | rq = task_rq(p); | ||
517 | raw_spin_lock(&rq->lock); | 519 | raw_spin_lock(&rq->lock); |
520 | |||
521 | if (rq != task_rq(p)) { | ||
522 | /* Task was moved, retrying. */ | ||
523 | raw_spin_unlock(&rq->lock); | ||
524 | goto again; | ||
525 | } | ||
518 | 526 | ||
519 | /* | 527 | /* |
520 | * We need to take care of a possible races here. In fact, the | 528 | * We need to take care of a possible races here. In fact, the |
521 | * task might have changed its scheduling policy to something | 529 | * task might have changed its scheduling policy to something |
522 | * different from SCHED_DEADLINE or changed its reservation | 530 | * different from SCHED_DEADLINE or changed its reservation |
523 | * parameters (through sched_setscheduler()). | 531 | * parameters (through sched_setscheduler()). |
524 | */ | 532 | */ |
525 | if (!dl_task(p) || dl_se->dl_new) | 533 | if (!dl_task(p) || dl_se->dl_new) |
526 | goto unlock; | 534 | goto unlock; |
527 | 535 | ||
528 | sched_clock_tick(); | 536 | sched_clock_tick(); |
529 | update_rq_clock(rq); | 537 | update_rq_clock(rq); |
530 | dl_se->dl_throttled = 0; | 538 | dl_se->dl_throttled = 0; |
531 | dl_se->dl_yielded = 0; | 539 | dl_se->dl_yielded = 0; |
532 | if (p->on_rq) { | 540 | if (p->on_rq) { |
533 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); | 541 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); |
534 | if (task_has_dl_policy(rq->curr)) | 542 | if (task_has_dl_policy(rq->curr)) |
535 | check_preempt_curr_dl(rq, p, 0); | 543 | check_preempt_curr_dl(rq, p, 0); |
536 | else | 544 | else |
537 | resched_task(rq->curr); | 545 | resched_task(rq->curr); |
538 | #ifdef CONFIG_SMP | 546 | #ifdef CONFIG_SMP |
539 | /* | 547 | /* |
540 | * Queueing this task back might have overloaded rq, | 548 | * Queueing this task back might have overloaded rq, |
541 | * check if we need to kick someone away. | 549 | * check if we need to kick someone away. |
542 | */ | 550 | */ |
543 | if (has_pushable_dl_tasks(rq)) | 551 | if (has_pushable_dl_tasks(rq)) |
544 | push_dl_task(rq); | 552 | push_dl_task(rq); |
545 | #endif | 553 | #endif |
546 | } | 554 | } |
547 | unlock: | 555 | unlock: |
548 | raw_spin_unlock(&rq->lock); | 556 | raw_spin_unlock(&rq->lock); |
549 | 557 | ||
550 | return HRTIMER_NORESTART; | 558 | return HRTIMER_NORESTART; |
551 | } | 559 | } |
552 | 560 | ||
553 | void init_dl_task_timer(struct sched_dl_entity *dl_se) | 561 | void init_dl_task_timer(struct sched_dl_entity *dl_se) |
554 | { | 562 | { |
555 | struct hrtimer *timer = &dl_se->dl_timer; | 563 | struct hrtimer *timer = &dl_se->dl_timer; |
556 | 564 | ||
557 | if (hrtimer_active(timer)) { | 565 | if (hrtimer_active(timer)) { |
558 | hrtimer_try_to_cancel(timer); | 566 | hrtimer_try_to_cancel(timer); |
559 | return; | 567 | return; |
560 | } | 568 | } |
561 | 569 | ||
562 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 570 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
563 | timer->function = dl_task_timer; | 571 | timer->function = dl_task_timer; |
564 | } | 572 | } |
565 | 573 | ||
566 | static | 574 | static |
567 | int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se) | 575 | int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se) |
568 | { | 576 | { |
569 | int dmiss = dl_time_before(dl_se->deadline, rq_clock(rq)); | 577 | int dmiss = dl_time_before(dl_se->deadline, rq_clock(rq)); |
570 | int rorun = dl_se->runtime <= 0; | 578 | int rorun = dl_se->runtime <= 0; |
571 | 579 | ||
572 | if (!rorun && !dmiss) | 580 | if (!rorun && !dmiss) |
573 | return 0; | 581 | return 0; |
574 | 582 | ||
575 | /* | 583 | /* |
576 | * If we are beyond our current deadline and we are still | 584 | * If we are beyond our current deadline and we are still |
577 | * executing, then we have already used some of the runtime of | 585 | * executing, then we have already used some of the runtime of |
578 | * the next instance. Thus, if we do not account that, we are | 586 | * the next instance. Thus, if we do not account that, we are |
579 | * stealing bandwidth from the system at each deadline miss! | 587 | * stealing bandwidth from the system at each deadline miss! |
580 | */ | 588 | */ |
581 | if (dmiss) { | 589 | if (dmiss) { |
582 | dl_se->runtime = rorun ? dl_se->runtime : 0; | 590 | dl_se->runtime = rorun ? dl_se->runtime : 0; |
583 | dl_se->runtime -= rq_clock(rq) - dl_se->deadline; | 591 | dl_se->runtime -= rq_clock(rq) - dl_se->deadline; |
584 | } | 592 | } |
585 | 593 | ||
586 | return 1; | 594 | return 1; |
587 | } | 595 | } |
588 | 596 | ||
589 | extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); | 597 | extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); |
590 | 598 | ||
591 | /* | 599 | /* |
592 | * Update the current task's runtime statistics (provided it is still | 600 | * Update the current task's runtime statistics (provided it is still |
593 | * a -deadline task and has not been removed from the dl_rq). | 601 | * a -deadline task and has not been removed from the dl_rq). |
594 | */ | 602 | */ |
595 | static void update_curr_dl(struct rq *rq) | 603 | static void update_curr_dl(struct rq *rq) |
596 | { | 604 | { |
597 | struct task_struct *curr = rq->curr; | 605 | struct task_struct *curr = rq->curr; |
598 | struct sched_dl_entity *dl_se = &curr->dl; | 606 | struct sched_dl_entity *dl_se = &curr->dl; |
599 | u64 delta_exec; | 607 | u64 delta_exec; |
600 | 608 | ||
601 | if (!dl_task(curr) || !on_dl_rq(dl_se)) | 609 | if (!dl_task(curr) || !on_dl_rq(dl_se)) |
602 | return; | 610 | return; |
603 | 611 | ||
604 | /* | 612 | /* |
605 | * Consumed budget is computed considering the time as | 613 | * Consumed budget is computed considering the time as |
606 | * observed by schedulable tasks (excluding time spent | 614 | * observed by schedulable tasks (excluding time spent |
607 | * in hardirq context, etc.). Deadlines are instead | 615 | * in hardirq context, etc.). Deadlines are instead |
608 | * computed using hard walltime. This seems to be the more | 616 | * computed using hard walltime. This seems to be the more |
609 | * natural solution, but the full ramifications of this | 617 | * natural solution, but the full ramifications of this |
610 | * approach need further study. | 618 | * approach need further study. |
611 | */ | 619 | */ |
612 | delta_exec = rq_clock_task(rq) - curr->se.exec_start; | 620 | delta_exec = rq_clock_task(rq) - curr->se.exec_start; |
613 | if (unlikely((s64)delta_exec <= 0)) | 621 | if (unlikely((s64)delta_exec <= 0)) |
614 | return; | 622 | return; |
615 | 623 | ||
616 | schedstat_set(curr->se.statistics.exec_max, | 624 | schedstat_set(curr->se.statistics.exec_max, |
617 | max(curr->se.statistics.exec_max, delta_exec)); | 625 | max(curr->se.statistics.exec_max, delta_exec)); |
618 | 626 | ||
619 | curr->se.sum_exec_runtime += delta_exec; | 627 | curr->se.sum_exec_runtime += delta_exec; |
620 | account_group_exec_runtime(curr, delta_exec); | 628 | account_group_exec_runtime(curr, delta_exec); |
621 | 629 | ||
622 | curr->se.exec_start = rq_clock_task(rq); | 630 | curr->se.exec_start = rq_clock_task(rq); |
623 | cpuacct_charge(curr, delta_exec); | 631 | cpuacct_charge(curr, delta_exec); |
624 | 632 | ||
625 | sched_rt_avg_update(rq, delta_exec); | 633 | sched_rt_avg_update(rq, delta_exec); |
626 | 634 | ||
627 | dl_se->runtime -= delta_exec; | 635 | dl_se->runtime -= delta_exec; |
628 | if (dl_runtime_exceeded(rq, dl_se)) { | 636 | if (dl_runtime_exceeded(rq, dl_se)) { |
629 | __dequeue_task_dl(rq, curr, 0); | 637 | __dequeue_task_dl(rq, curr, 0); |
630 | if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted))) | 638 | if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted))) |
631 | dl_se->dl_throttled = 1; | 639 | dl_se->dl_throttled = 1; |
632 | else | 640 | else |
633 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); | 641 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); |
634 | 642 | ||
635 | if (!is_leftmost(curr, &rq->dl)) | 643 | if (!is_leftmost(curr, &rq->dl)) |
636 | resched_task(curr); | 644 | resched_task(curr); |
637 | } | 645 | } |
638 | 646 | ||
639 | /* | 647 | /* |
640 | * Because -- for now -- we share the rt bandwidth, we need to | 648 | * Because -- for now -- we share the rt bandwidth, we need to |
641 | * account our runtime there too, otherwise actual rt tasks | 649 | * account our runtime there too, otherwise actual rt tasks |
642 | * would be able to exceed the shared quota. | 650 | * would be able to exceed the shared quota. |
643 | * | 651 | * |
644 | * Account to the root rt group for now. | 652 | * Account to the root rt group for now. |
645 | * | 653 | * |
646 | * The solution we're working towards is having the RT groups scheduled | 654 | * The solution we're working towards is having the RT groups scheduled |
647 | * using deadline servers -- however there's a few nasties to figure | 655 | * using deadline servers -- however there's a few nasties to figure |
648 | * out before that can happen. | 656 | * out before that can happen. |
649 | */ | 657 | */ |
650 | if (rt_bandwidth_enabled()) { | 658 | if (rt_bandwidth_enabled()) { |
651 | struct rt_rq *rt_rq = &rq->rt; | 659 | struct rt_rq *rt_rq = &rq->rt; |
652 | 660 | ||
653 | raw_spin_lock(&rt_rq->rt_runtime_lock); | 661 | raw_spin_lock(&rt_rq->rt_runtime_lock); |
654 | /* | 662 | /* |
655 | * We'll let actual RT tasks worry about the overflow here, we | 663 | * We'll let actual RT tasks worry about the overflow here, we |
656 | * have our own CBS to keep us inline; only account when RT | 664 | * have our own CBS to keep us inline; only account when RT |
657 | * bandwidth is relevant. | 665 | * bandwidth is relevant. |
658 | */ | 666 | */ |
659 | if (sched_rt_bandwidth_account(rt_rq)) | 667 | if (sched_rt_bandwidth_account(rt_rq)) |
660 | rt_rq->rt_time += delta_exec; | 668 | rt_rq->rt_time += delta_exec; |
661 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | 669 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
662 | } | 670 | } |
663 | } | 671 | } |
664 | 672 | ||
665 | #ifdef CONFIG_SMP | 673 | #ifdef CONFIG_SMP |
666 | 674 | ||
667 | static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu); | 675 | static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu); |
668 | 676 | ||
669 | static inline u64 next_deadline(struct rq *rq) | 677 | static inline u64 next_deadline(struct rq *rq) |
670 | { | 678 | { |
671 | struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu); | 679 | struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu); |
672 | 680 | ||
673 | if (next && dl_prio(next->prio)) | 681 | if (next && dl_prio(next->prio)) |
674 | return next->dl.deadline; | 682 | return next->dl.deadline; |
675 | else | 683 | else |
676 | return 0; | 684 | return 0; |
677 | } | 685 | } |
678 | 686 | ||
679 | static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) | 687 | static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) |
680 | { | 688 | { |
681 | struct rq *rq = rq_of_dl_rq(dl_rq); | 689 | struct rq *rq = rq_of_dl_rq(dl_rq); |
682 | 690 | ||
683 | if (dl_rq->earliest_dl.curr == 0 || | 691 | if (dl_rq->earliest_dl.curr == 0 || |
684 | dl_time_before(deadline, dl_rq->earliest_dl.curr)) { | 692 | dl_time_before(deadline, dl_rq->earliest_dl.curr)) { |
685 | /* | 693 | /* |
686 | * If the dl_rq had no -deadline tasks, or if the new task | 694 | * If the dl_rq had no -deadline tasks, or if the new task |
687 | * has shorter deadline than the current one on dl_rq, we | 695 | * has shorter deadline than the current one on dl_rq, we |
688 | * know that the previous earliest becomes our next earliest, | 696 | * know that the previous earliest becomes our next earliest, |
689 | * as the new task becomes the earliest itself. | 697 | * as the new task becomes the earliest itself. |
690 | */ | 698 | */ |
691 | dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr; | 699 | dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr; |
692 | dl_rq->earliest_dl.curr = deadline; | 700 | dl_rq->earliest_dl.curr = deadline; |
693 | cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1); | 701 | cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1); |
694 | } else if (dl_rq->earliest_dl.next == 0 || | 702 | } else if (dl_rq->earliest_dl.next == 0 || |
695 | dl_time_before(deadline, dl_rq->earliest_dl.next)) { | 703 | dl_time_before(deadline, dl_rq->earliest_dl.next)) { |
696 | /* | 704 | /* |
697 | * On the other hand, if the new -deadline task has a | 705 | * On the other hand, if the new -deadline task has a |
698 | * a later deadline than the earliest one on dl_rq, but | 706 | * a later deadline than the earliest one on dl_rq, but |
699 | * it is earlier than the next (if any), we must | 707 | * it is earlier than the next (if any), we must |
700 | * recompute the next-earliest. | 708 | * recompute the next-earliest. |
701 | */ | 709 | */ |
702 | dl_rq->earliest_dl.next = next_deadline(rq); | 710 | dl_rq->earliest_dl.next = next_deadline(rq); |
703 | } | 711 | } |
704 | } | 712 | } |
705 | 713 | ||
706 | static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) | 714 | static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) |
707 | { | 715 | { |
708 | struct rq *rq = rq_of_dl_rq(dl_rq); | 716 | struct rq *rq = rq_of_dl_rq(dl_rq); |
709 | 717 | ||
710 | /* | 718 | /* |
711 | * Since we may have removed our earliest (and/or next earliest) | 719 | * Since we may have removed our earliest (and/or next earliest) |
712 | * task we must recompute them. | 720 | * task we must recompute them. |
713 | */ | 721 | */ |
714 | if (!dl_rq->dl_nr_running) { | 722 | if (!dl_rq->dl_nr_running) { |
715 | dl_rq->earliest_dl.curr = 0; | 723 | dl_rq->earliest_dl.curr = 0; |
716 | dl_rq->earliest_dl.next = 0; | 724 | dl_rq->earliest_dl.next = 0; |
717 | cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0); | 725 | cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0); |
718 | } else { | 726 | } else { |
719 | struct rb_node *leftmost = dl_rq->rb_leftmost; | 727 | struct rb_node *leftmost = dl_rq->rb_leftmost; |
720 | struct sched_dl_entity *entry; | 728 | struct sched_dl_entity *entry; |
721 | 729 | ||
722 | entry = rb_entry(leftmost, struct sched_dl_entity, rb_node); | 730 | entry = rb_entry(leftmost, struct sched_dl_entity, rb_node); |
723 | dl_rq->earliest_dl.curr = entry->deadline; | 731 | dl_rq->earliest_dl.curr = entry->deadline; |
724 | dl_rq->earliest_dl.next = next_deadline(rq); | 732 | dl_rq->earliest_dl.next = next_deadline(rq); |
725 | cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1); | 733 | cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1); |
726 | } | 734 | } |
727 | } | 735 | } |
728 | 736 | ||
729 | #else | 737 | #else |
730 | 738 | ||
731 | static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {} | 739 | static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {} |
732 | static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {} | 740 | static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {} |
733 | 741 | ||
734 | #endif /* CONFIG_SMP */ | 742 | #endif /* CONFIG_SMP */ |
735 | 743 | ||
736 | static inline | 744 | static inline |
737 | void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | 745 | void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) |
738 | { | 746 | { |
739 | int prio = dl_task_of(dl_se)->prio; | 747 | int prio = dl_task_of(dl_se)->prio; |
740 | u64 deadline = dl_se->deadline; | 748 | u64 deadline = dl_se->deadline; |
741 | 749 | ||
742 | WARN_ON(!dl_prio(prio)); | 750 | WARN_ON(!dl_prio(prio)); |
743 | dl_rq->dl_nr_running++; | 751 | dl_rq->dl_nr_running++; |
744 | inc_nr_running(rq_of_dl_rq(dl_rq)); | 752 | inc_nr_running(rq_of_dl_rq(dl_rq)); |
745 | 753 | ||
746 | inc_dl_deadline(dl_rq, deadline); | 754 | inc_dl_deadline(dl_rq, deadline); |
747 | inc_dl_migration(dl_se, dl_rq); | 755 | inc_dl_migration(dl_se, dl_rq); |
748 | } | 756 | } |
749 | 757 | ||
750 | static inline | 758 | static inline |
751 | void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | 759 | void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) |
752 | { | 760 | { |
753 | int prio = dl_task_of(dl_se)->prio; | 761 | int prio = dl_task_of(dl_se)->prio; |
754 | 762 | ||
755 | WARN_ON(!dl_prio(prio)); | 763 | WARN_ON(!dl_prio(prio)); |
756 | WARN_ON(!dl_rq->dl_nr_running); | 764 | WARN_ON(!dl_rq->dl_nr_running); |
757 | dl_rq->dl_nr_running--; | 765 | dl_rq->dl_nr_running--; |
758 | dec_nr_running(rq_of_dl_rq(dl_rq)); | 766 | dec_nr_running(rq_of_dl_rq(dl_rq)); |
759 | 767 | ||
760 | dec_dl_deadline(dl_rq, dl_se->deadline); | 768 | dec_dl_deadline(dl_rq, dl_se->deadline); |
761 | dec_dl_migration(dl_se, dl_rq); | 769 | dec_dl_migration(dl_se, dl_rq); |
762 | } | 770 | } |
763 | 771 | ||
764 | static void __enqueue_dl_entity(struct sched_dl_entity *dl_se) | 772 | static void __enqueue_dl_entity(struct sched_dl_entity *dl_se) |
765 | { | 773 | { |
766 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | 774 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); |
767 | struct rb_node **link = &dl_rq->rb_root.rb_node; | 775 | struct rb_node **link = &dl_rq->rb_root.rb_node; |
768 | struct rb_node *parent = NULL; | 776 | struct rb_node *parent = NULL; |
769 | struct sched_dl_entity *entry; | 777 | struct sched_dl_entity *entry; |
770 | int leftmost = 1; | 778 | int leftmost = 1; |
771 | 779 | ||
772 | BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node)); | 780 | BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node)); |
773 | 781 | ||
774 | while (*link) { | 782 | while (*link) { |
775 | parent = *link; | 783 | parent = *link; |
776 | entry = rb_entry(parent, struct sched_dl_entity, rb_node); | 784 | entry = rb_entry(parent, struct sched_dl_entity, rb_node); |
777 | if (dl_time_before(dl_se->deadline, entry->deadline)) | 785 | if (dl_time_before(dl_se->deadline, entry->deadline)) |
778 | link = &parent->rb_left; | 786 | link = &parent->rb_left; |
779 | else { | 787 | else { |
780 | link = &parent->rb_right; | 788 | link = &parent->rb_right; |
781 | leftmost = 0; | 789 | leftmost = 0; |
782 | } | 790 | } |
783 | } | 791 | } |
784 | 792 | ||
785 | if (leftmost) | 793 | if (leftmost) |
786 | dl_rq->rb_leftmost = &dl_se->rb_node; | 794 | dl_rq->rb_leftmost = &dl_se->rb_node; |
787 | 795 | ||
788 | rb_link_node(&dl_se->rb_node, parent, link); | 796 | rb_link_node(&dl_se->rb_node, parent, link); |
789 | rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root); | 797 | rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root); |
790 | 798 | ||
791 | inc_dl_tasks(dl_se, dl_rq); | 799 | inc_dl_tasks(dl_se, dl_rq); |
792 | } | 800 | } |
793 | 801 | ||
794 | static void __dequeue_dl_entity(struct sched_dl_entity *dl_se) | 802 | static void __dequeue_dl_entity(struct sched_dl_entity *dl_se) |
795 | { | 803 | { |
796 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | 804 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); |
797 | 805 | ||
798 | if (RB_EMPTY_NODE(&dl_se->rb_node)) | 806 | if (RB_EMPTY_NODE(&dl_se->rb_node)) |
799 | return; | 807 | return; |
800 | 808 | ||
801 | if (dl_rq->rb_leftmost == &dl_se->rb_node) { | 809 | if (dl_rq->rb_leftmost == &dl_se->rb_node) { |
802 | struct rb_node *next_node; | 810 | struct rb_node *next_node; |
803 | 811 | ||
804 | next_node = rb_next(&dl_se->rb_node); | 812 | next_node = rb_next(&dl_se->rb_node); |
805 | dl_rq->rb_leftmost = next_node; | 813 | dl_rq->rb_leftmost = next_node; |
806 | } | 814 | } |
807 | 815 | ||
808 | rb_erase(&dl_se->rb_node, &dl_rq->rb_root); | 816 | rb_erase(&dl_se->rb_node, &dl_rq->rb_root); |
809 | RB_CLEAR_NODE(&dl_se->rb_node); | 817 | RB_CLEAR_NODE(&dl_se->rb_node); |
810 | 818 | ||
811 | dec_dl_tasks(dl_se, dl_rq); | 819 | dec_dl_tasks(dl_se, dl_rq); |
812 | } | 820 | } |
813 | 821 | ||
814 | static void | 822 | static void |
815 | enqueue_dl_entity(struct sched_dl_entity *dl_se, | 823 | enqueue_dl_entity(struct sched_dl_entity *dl_se, |
816 | struct sched_dl_entity *pi_se, int flags) | 824 | struct sched_dl_entity *pi_se, int flags) |
817 | { | 825 | { |
818 | BUG_ON(on_dl_rq(dl_se)); | 826 | BUG_ON(on_dl_rq(dl_se)); |
819 | 827 | ||
820 | /* | 828 | /* |
821 | * If this is a wakeup or a new instance, the scheduling | 829 | * If this is a wakeup or a new instance, the scheduling |
822 | * parameters of the task might need updating. Otherwise, | 830 | * parameters of the task might need updating. Otherwise, |
823 | * we want a replenishment of its runtime. | 831 | * we want a replenishment of its runtime. |
824 | */ | 832 | */ |
825 | if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH) | 833 | if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH) |
826 | replenish_dl_entity(dl_se, pi_se); | 834 | replenish_dl_entity(dl_se, pi_se); |
827 | else | 835 | else |
828 | update_dl_entity(dl_se, pi_se); | 836 | update_dl_entity(dl_se, pi_se); |
829 | 837 | ||
830 | __enqueue_dl_entity(dl_se); | 838 | __enqueue_dl_entity(dl_se); |
831 | } | 839 | } |
832 | 840 | ||
833 | static void dequeue_dl_entity(struct sched_dl_entity *dl_se) | 841 | static void dequeue_dl_entity(struct sched_dl_entity *dl_se) |
834 | { | 842 | { |
835 | __dequeue_dl_entity(dl_se); | 843 | __dequeue_dl_entity(dl_se); |
836 | } | 844 | } |
837 | 845 | ||
838 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | 846 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) |
839 | { | 847 | { |
840 | struct task_struct *pi_task = rt_mutex_get_top_task(p); | 848 | struct task_struct *pi_task = rt_mutex_get_top_task(p); |
841 | struct sched_dl_entity *pi_se = &p->dl; | 849 | struct sched_dl_entity *pi_se = &p->dl; |
842 | 850 | ||
843 | /* | 851 | /* |
844 | * Use the scheduling parameters of the top pi-waiter | 852 | * Use the scheduling parameters of the top pi-waiter |
845 | * task if we have one and its (relative) deadline is | 853 | * task if we have one and its (relative) deadline is |
846 | * smaller than our one... OTW we keep our runtime and | 854 | * smaller than our one... OTW we keep our runtime and |
847 | * deadline. | 855 | * deadline. |
848 | */ | 856 | */ |
849 | if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) | 857 | if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) |
850 | pi_se = &pi_task->dl; | 858 | pi_se = &pi_task->dl; |
851 | 859 | ||
852 | /* | 860 | /* |
853 | * If p is throttled, we do nothing. In fact, if it exhausted | 861 | * If p is throttled, we do nothing. In fact, if it exhausted |
854 | * its budget it needs a replenishment and, since it now is on | 862 | * its budget it needs a replenishment and, since it now is on |
855 | * its rq, the bandwidth timer callback (which clearly has not | 863 | * its rq, the bandwidth timer callback (which clearly has not |
856 | * run yet) will take care of this. | 864 | * run yet) will take care of this. |
857 | */ | 865 | */ |
858 | if (p->dl.dl_throttled) | 866 | if (p->dl.dl_throttled) |
859 | return; | 867 | return; |
860 | 868 | ||
861 | enqueue_dl_entity(&p->dl, pi_se, flags); | 869 | enqueue_dl_entity(&p->dl, pi_se, flags); |
862 | 870 | ||
863 | if (!task_current(rq, p) && p->nr_cpus_allowed > 1) | 871 | if (!task_current(rq, p) && p->nr_cpus_allowed > 1) |
864 | enqueue_pushable_dl_task(rq, p); | 872 | enqueue_pushable_dl_task(rq, p); |
865 | } | 873 | } |
866 | 874 | ||
867 | static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) | 875 | static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) |
868 | { | 876 | { |
869 | dequeue_dl_entity(&p->dl); | 877 | dequeue_dl_entity(&p->dl); |
870 | dequeue_pushable_dl_task(rq, p); | 878 | dequeue_pushable_dl_task(rq, p); |
871 | } | 879 | } |
872 | 880 | ||
873 | static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) | 881 | static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) |
874 | { | 882 | { |
875 | update_curr_dl(rq); | 883 | update_curr_dl(rq); |
876 | __dequeue_task_dl(rq, p, flags); | 884 | __dequeue_task_dl(rq, p, flags); |
877 | } | 885 | } |
878 | 886 | ||
879 | /* | 887 | /* |
880 | * Yield task semantic for -deadline tasks is: | 888 | * Yield task semantic for -deadline tasks is: |
881 | * | 889 | * |
882 | * get off from the CPU until our next instance, with | 890 | * get off from the CPU until our next instance, with |
883 | * a new runtime. This is of little use now, since we | 891 | * a new runtime. This is of little use now, since we |
884 | * don't have a bandwidth reclaiming mechanism. Anyway, | 892 | * don't have a bandwidth reclaiming mechanism. Anyway, |
885 | * bandwidth reclaiming is planned for the future, and | 893 | * bandwidth reclaiming is planned for the future, and |
886 | * yield_task_dl will indicate that some spare budget | 894 | * yield_task_dl will indicate that some spare budget |
887 | * is available for other task instances to use it. | 895 | * is available for other task instances to use it. |
888 | */ | 896 | */ |
889 | static void yield_task_dl(struct rq *rq) | 897 | static void yield_task_dl(struct rq *rq) |
890 | { | 898 | { |
891 | struct task_struct *p = rq->curr; | 899 | struct task_struct *p = rq->curr; |
892 | 900 | ||
893 | /* | 901 | /* |
894 | * We make the task go to sleep until its current deadline by | 902 | * We make the task go to sleep until its current deadline by |
895 | * forcing its runtime to zero. This way, update_curr_dl() stops | 903 | * forcing its runtime to zero. This way, update_curr_dl() stops |
896 | * it and the bandwidth timer will wake it up and will give it | 904 | * it and the bandwidth timer will wake it up and will give it |
897 | * new scheduling parameters (thanks to dl_yielded=1). | 905 | * new scheduling parameters (thanks to dl_yielded=1). |
898 | */ | 906 | */ |
899 | if (p->dl.runtime > 0) { | 907 | if (p->dl.runtime > 0) { |
900 | rq->curr->dl.dl_yielded = 1; | 908 | rq->curr->dl.dl_yielded = 1; |
901 | p->dl.runtime = 0; | 909 | p->dl.runtime = 0; |
902 | } | 910 | } |
903 | update_curr_dl(rq); | 911 | update_curr_dl(rq); |
904 | } | 912 | } |
905 | 913 | ||
906 | #ifdef CONFIG_SMP | 914 | #ifdef CONFIG_SMP |
907 | 915 | ||
908 | static int find_later_rq(struct task_struct *task); | 916 | static int find_later_rq(struct task_struct *task); |
909 | 917 | ||
910 | static int | 918 | static int |
911 | select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) | 919 | select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) |
912 | { | 920 | { |
913 | struct task_struct *curr; | 921 | struct task_struct *curr; |
914 | struct rq *rq; | 922 | struct rq *rq; |
915 | 923 | ||
916 | if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) | 924 | if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) |
917 | goto out; | 925 | goto out; |
918 | 926 | ||
919 | rq = cpu_rq(cpu); | 927 | rq = cpu_rq(cpu); |
920 | 928 | ||
921 | rcu_read_lock(); | 929 | rcu_read_lock(); |
922 | curr = ACCESS_ONCE(rq->curr); /* unlocked access */ | 930 | curr = ACCESS_ONCE(rq->curr); /* unlocked access */ |
923 | 931 | ||
924 | /* | 932 | /* |
925 | * If we are dealing with a -deadline task, we must | 933 | * If we are dealing with a -deadline task, we must |
926 | * decide where to wake it up. | 934 | * decide where to wake it up. |
927 | * If it has a later deadline and the current task | 935 | * If it has a later deadline and the current task |
928 | * on this rq can't move (provided the waking task | 936 | * on this rq can't move (provided the waking task |
929 | * can!) we prefer to send it somewhere else. On the | 937 | * can!) we prefer to send it somewhere else. On the |
930 | * other hand, if it has a shorter deadline, we | 938 | * other hand, if it has a shorter deadline, we |
931 | * try to make it stay here, it might be important. | 939 | * try to make it stay here, it might be important. |
932 | */ | 940 | */ |
933 | if (unlikely(dl_task(curr)) && | 941 | if (unlikely(dl_task(curr)) && |
934 | (curr->nr_cpus_allowed < 2 || | 942 | (curr->nr_cpus_allowed < 2 || |
935 | !dl_entity_preempt(&p->dl, &curr->dl)) && | 943 | !dl_entity_preempt(&p->dl, &curr->dl)) && |
936 | (p->nr_cpus_allowed > 1)) { | 944 | (p->nr_cpus_allowed > 1)) { |
937 | int target = find_later_rq(p); | 945 | int target = find_later_rq(p); |
938 | 946 | ||
939 | if (target != -1) | 947 | if (target != -1) |
940 | cpu = target; | 948 | cpu = target; |
941 | } | 949 | } |
942 | rcu_read_unlock(); | 950 | rcu_read_unlock(); |
943 | 951 | ||
944 | out: | 952 | out: |
945 | return cpu; | 953 | return cpu; |
946 | } | 954 | } |
947 | 955 | ||
948 | static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) | 956 | static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) |
949 | { | 957 | { |
950 | /* | 958 | /* |
951 | * Current can't be migrated, useless to reschedule, | 959 | * Current can't be migrated, useless to reschedule, |
952 | * let's hope p can move out. | 960 | * let's hope p can move out. |
953 | */ | 961 | */ |
954 | if (rq->curr->nr_cpus_allowed == 1 || | 962 | if (rq->curr->nr_cpus_allowed == 1 || |
955 | cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1) | 963 | cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1) |
956 | return; | 964 | return; |
957 | 965 | ||
958 | /* | 966 | /* |
959 | * p is migratable, so let's not schedule it and | 967 | * p is migratable, so let's not schedule it and |
960 | * see if it is pushed or pulled somewhere else. | 968 | * see if it is pushed or pulled somewhere else. |
961 | */ | 969 | */ |
962 | if (p->nr_cpus_allowed != 1 && | 970 | if (p->nr_cpus_allowed != 1 && |
963 | cpudl_find(&rq->rd->cpudl, p, NULL) != -1) | 971 | cpudl_find(&rq->rd->cpudl, p, NULL) != -1) |
964 | return; | 972 | return; |
965 | 973 | ||
966 | resched_task(rq->curr); | 974 | resched_task(rq->curr); |
967 | } | 975 | } |
968 | 976 | ||
969 | static int pull_dl_task(struct rq *this_rq); | 977 | static int pull_dl_task(struct rq *this_rq); |
970 | 978 | ||
971 | #endif /* CONFIG_SMP */ | 979 | #endif /* CONFIG_SMP */ |
972 | 980 | ||
973 | /* | 981 | /* |
974 | * Only called when both the current and waking task are -deadline | 982 | * Only called when both the current and waking task are -deadline |
975 | * tasks. | 983 | * tasks. |
976 | */ | 984 | */ |
977 | static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, | 985 | static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, |
978 | int flags) | 986 | int flags) |
979 | { | 987 | { |
980 | if (dl_entity_preempt(&p->dl, &rq->curr->dl)) { | 988 | if (dl_entity_preempt(&p->dl, &rq->curr->dl)) { |
981 | resched_task(rq->curr); | 989 | resched_task(rq->curr); |
982 | return; | 990 | return; |
983 | } | 991 | } |
984 | 992 | ||
985 | #ifdef CONFIG_SMP | 993 | #ifdef CONFIG_SMP |
986 | /* | 994 | /* |
987 | * In the unlikely case current and p have the same deadline | 995 | * In the unlikely case current and p have the same deadline |
988 | * let us try to decide what's the best thing to do... | 996 | * let us try to decide what's the best thing to do... |
989 | */ | 997 | */ |
990 | if ((p->dl.deadline == rq->curr->dl.deadline) && | 998 | if ((p->dl.deadline == rq->curr->dl.deadline) && |
991 | !test_tsk_need_resched(rq->curr)) | 999 | !test_tsk_need_resched(rq->curr)) |
992 | check_preempt_equal_dl(rq, p); | 1000 | check_preempt_equal_dl(rq, p); |
993 | #endif /* CONFIG_SMP */ | 1001 | #endif /* CONFIG_SMP */ |
994 | } | 1002 | } |
995 | 1003 | ||
996 | #ifdef CONFIG_SCHED_HRTICK | 1004 | #ifdef CONFIG_SCHED_HRTICK |
997 | static void start_hrtick_dl(struct rq *rq, struct task_struct *p) | 1005 | static void start_hrtick_dl(struct rq *rq, struct task_struct *p) |
998 | { | 1006 | { |
999 | s64 delta = p->dl.dl_runtime - p->dl.runtime; | 1007 | s64 delta = p->dl.dl_runtime - p->dl.runtime; |
1000 | 1008 | ||
1001 | if (delta > 10000) | 1009 | if (delta > 10000) |
1002 | hrtick_start(rq, p->dl.runtime); | 1010 | hrtick_start(rq, p->dl.runtime); |
1003 | } | 1011 | } |
1004 | #endif | 1012 | #endif |
1005 | 1013 | ||
1006 | static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq, | 1014 | static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq, |
1007 | struct dl_rq *dl_rq) | 1015 | struct dl_rq *dl_rq) |
1008 | { | 1016 | { |
1009 | struct rb_node *left = dl_rq->rb_leftmost; | 1017 | struct rb_node *left = dl_rq->rb_leftmost; |
1010 | 1018 | ||
1011 | if (!left) | 1019 | if (!left) |
1012 | return NULL; | 1020 | return NULL; |
1013 | 1021 | ||
1014 | return rb_entry(left, struct sched_dl_entity, rb_node); | 1022 | return rb_entry(left, struct sched_dl_entity, rb_node); |
1015 | } | 1023 | } |
1016 | 1024 | ||
1017 | struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) | 1025 | struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) |
1018 | { | 1026 | { |
1019 | struct sched_dl_entity *dl_se; | 1027 | struct sched_dl_entity *dl_se; |
1020 | struct task_struct *p; | 1028 | struct task_struct *p; |
1021 | struct dl_rq *dl_rq; | 1029 | struct dl_rq *dl_rq; |
1022 | 1030 | ||
1023 | dl_rq = &rq->dl; | 1031 | dl_rq = &rq->dl; |
1024 | 1032 | ||
1025 | if (need_pull_dl_task(rq, prev)) { | 1033 | if (need_pull_dl_task(rq, prev)) { |
1026 | pull_dl_task(rq); | 1034 | pull_dl_task(rq); |
1027 | /* | 1035 | /* |
1028 | * pull_rt_task() can drop (and re-acquire) rq->lock; this | 1036 | * pull_rt_task() can drop (and re-acquire) rq->lock; this |
1029 | * means a stop task can slip in, in which case we need to | 1037 | * means a stop task can slip in, in which case we need to |
1030 | * re-start task selection. | 1038 | * re-start task selection. |
1031 | */ | 1039 | */ |
1032 | if (rq->stop && rq->stop->on_rq) | 1040 | if (rq->stop && rq->stop->on_rq) |
1033 | return RETRY_TASK; | 1041 | return RETRY_TASK; |
1034 | } | 1042 | } |
1035 | 1043 | ||
1036 | /* | 1044 | /* |
1037 | * When prev is DL, we may throttle it in put_prev_task(). | 1045 | * When prev is DL, we may throttle it in put_prev_task(). |
1038 | * So, we update time before we check for dl_nr_running. | 1046 | * So, we update time before we check for dl_nr_running. |
1039 | */ | 1047 | */ |
1040 | if (prev->sched_class == &dl_sched_class) | 1048 | if (prev->sched_class == &dl_sched_class) |
1041 | update_curr_dl(rq); | 1049 | update_curr_dl(rq); |
1042 | 1050 | ||
1043 | if (unlikely(!dl_rq->dl_nr_running)) | 1051 | if (unlikely(!dl_rq->dl_nr_running)) |
1044 | return NULL; | 1052 | return NULL; |
1045 | 1053 | ||
1046 | put_prev_task(rq, prev); | 1054 | put_prev_task(rq, prev); |
1047 | 1055 | ||
1048 | dl_se = pick_next_dl_entity(rq, dl_rq); | 1056 | dl_se = pick_next_dl_entity(rq, dl_rq); |
1049 | BUG_ON(!dl_se); | 1057 | BUG_ON(!dl_se); |
1050 | 1058 | ||
1051 | p = dl_task_of(dl_se); | 1059 | p = dl_task_of(dl_se); |
1052 | p->se.exec_start = rq_clock_task(rq); | 1060 | p->se.exec_start = rq_clock_task(rq); |
1053 | 1061 | ||
1054 | /* Running task will never be pushed. */ | 1062 | /* Running task will never be pushed. */ |
1055 | dequeue_pushable_dl_task(rq, p); | 1063 | dequeue_pushable_dl_task(rq, p); |
1056 | 1064 | ||
1057 | #ifdef CONFIG_SCHED_HRTICK | 1065 | #ifdef CONFIG_SCHED_HRTICK |
1058 | if (hrtick_enabled(rq)) | 1066 | if (hrtick_enabled(rq)) |
1059 | start_hrtick_dl(rq, p); | 1067 | start_hrtick_dl(rq, p); |
1060 | #endif | 1068 | #endif |
1061 | 1069 | ||
1062 | set_post_schedule(rq); | 1070 | set_post_schedule(rq); |
1063 | 1071 | ||
1064 | return p; | 1072 | return p; |
1065 | } | 1073 | } |
1066 | 1074 | ||
1067 | static void put_prev_task_dl(struct rq *rq, struct task_struct *p) | 1075 | static void put_prev_task_dl(struct rq *rq, struct task_struct *p) |
1068 | { | 1076 | { |
1069 | update_curr_dl(rq); | 1077 | update_curr_dl(rq); |
1070 | 1078 | ||
1071 | if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) | 1079 | if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) |
1072 | enqueue_pushable_dl_task(rq, p); | 1080 | enqueue_pushable_dl_task(rq, p); |
1073 | } | 1081 | } |
1074 | 1082 | ||
1075 | static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued) | 1083 | static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued) |
1076 | { | 1084 | { |
1077 | update_curr_dl(rq); | 1085 | update_curr_dl(rq); |
1078 | 1086 | ||
1079 | #ifdef CONFIG_SCHED_HRTICK | 1087 | #ifdef CONFIG_SCHED_HRTICK |
1080 | if (hrtick_enabled(rq) && queued && p->dl.runtime > 0) | 1088 | if (hrtick_enabled(rq) && queued && p->dl.runtime > 0) |
1081 | start_hrtick_dl(rq, p); | 1089 | start_hrtick_dl(rq, p); |
1082 | #endif | 1090 | #endif |
1083 | } | 1091 | } |
1084 | 1092 | ||
1085 | static void task_fork_dl(struct task_struct *p) | 1093 | static void task_fork_dl(struct task_struct *p) |
1086 | { | 1094 | { |
1087 | /* | 1095 | /* |
1088 | * SCHED_DEADLINE tasks cannot fork and this is achieved through | 1096 | * SCHED_DEADLINE tasks cannot fork and this is achieved through |
1089 | * sched_fork() | 1097 | * sched_fork() |
1090 | */ | 1098 | */ |
1091 | } | 1099 | } |
1092 | 1100 | ||
1093 | static void task_dead_dl(struct task_struct *p) | 1101 | static void task_dead_dl(struct task_struct *p) |
1094 | { | 1102 | { |
1095 | struct hrtimer *timer = &p->dl.dl_timer; | 1103 | struct hrtimer *timer = &p->dl.dl_timer; |
1096 | struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); | 1104 | struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); |
1097 | 1105 | ||
1098 | /* | 1106 | /* |
1099 | * Since we are TASK_DEAD we won't slip out of the domain! | 1107 | * Since we are TASK_DEAD we won't slip out of the domain! |
1100 | */ | 1108 | */ |
1101 | raw_spin_lock_irq(&dl_b->lock); | 1109 | raw_spin_lock_irq(&dl_b->lock); |
1102 | dl_b->total_bw -= p->dl.dl_bw; | 1110 | dl_b->total_bw -= p->dl.dl_bw; |
1103 | raw_spin_unlock_irq(&dl_b->lock); | 1111 | raw_spin_unlock_irq(&dl_b->lock); |
1104 | 1112 | ||
1105 | hrtimer_cancel(timer); | 1113 | hrtimer_cancel(timer); |
1106 | } | 1114 | } |
1107 | 1115 | ||
1108 | static void set_curr_task_dl(struct rq *rq) | 1116 | static void set_curr_task_dl(struct rq *rq) |
1109 | { | 1117 | { |
1110 | struct task_struct *p = rq->curr; | 1118 | struct task_struct *p = rq->curr; |
1111 | 1119 | ||
1112 | p->se.exec_start = rq_clock_task(rq); | 1120 | p->se.exec_start = rq_clock_task(rq); |
1113 | 1121 | ||
1114 | /* You can't push away the running task */ | 1122 | /* You can't push away the running task */ |
1115 | dequeue_pushable_dl_task(rq, p); | 1123 | dequeue_pushable_dl_task(rq, p); |
1116 | } | 1124 | } |
1117 | 1125 | ||
1118 | #ifdef CONFIG_SMP | 1126 | #ifdef CONFIG_SMP |
1119 | 1127 | ||
1120 | /* Only try algorithms three times */ | 1128 | /* Only try algorithms three times */ |
1121 | #define DL_MAX_TRIES 3 | 1129 | #define DL_MAX_TRIES 3 |
1122 | 1130 | ||
1123 | static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) | 1131 | static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) |
1124 | { | 1132 | { |
1125 | if (!task_running(rq, p) && | 1133 | if (!task_running(rq, p) && |
1126 | (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && | 1134 | (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && |
1127 | (p->nr_cpus_allowed > 1)) | 1135 | (p->nr_cpus_allowed > 1)) |
1128 | return 1; | 1136 | return 1; |
1129 | 1137 | ||
1130 | return 0; | 1138 | return 0; |
1131 | } | 1139 | } |
1132 | 1140 | ||
1133 | /* Returns the second earliest -deadline task, NULL otherwise */ | 1141 | /* Returns the second earliest -deadline task, NULL otherwise */ |
1134 | static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu) | 1142 | static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu) |
1135 | { | 1143 | { |
1136 | struct rb_node *next_node = rq->dl.rb_leftmost; | 1144 | struct rb_node *next_node = rq->dl.rb_leftmost; |
1137 | struct sched_dl_entity *dl_se; | 1145 | struct sched_dl_entity *dl_se; |
1138 | struct task_struct *p = NULL; | 1146 | struct task_struct *p = NULL; |
1139 | 1147 | ||
1140 | next_node: | 1148 | next_node: |
1141 | next_node = rb_next(next_node); | 1149 | next_node = rb_next(next_node); |
1142 | if (next_node) { | 1150 | if (next_node) { |
1143 | dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node); | 1151 | dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node); |
1144 | p = dl_task_of(dl_se); | 1152 | p = dl_task_of(dl_se); |
1145 | 1153 | ||
1146 | if (pick_dl_task(rq, p, cpu)) | 1154 | if (pick_dl_task(rq, p, cpu)) |
1147 | return p; | 1155 | return p; |
1148 | 1156 | ||
1149 | goto next_node; | 1157 | goto next_node; |
1150 | } | 1158 | } |
1151 | 1159 | ||
1152 | return NULL; | 1160 | return NULL; |
1153 | } | 1161 | } |
1154 | 1162 | ||
1155 | static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); | 1163 | static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); |
1156 | 1164 | ||
1157 | static int find_later_rq(struct task_struct *task) | 1165 | static int find_later_rq(struct task_struct *task) |
1158 | { | 1166 | { |
1159 | struct sched_domain *sd; | 1167 | struct sched_domain *sd; |
1160 | struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl); | 1168 | struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl); |
1161 | int this_cpu = smp_processor_id(); | 1169 | int this_cpu = smp_processor_id(); |
1162 | int best_cpu, cpu = task_cpu(task); | 1170 | int best_cpu, cpu = task_cpu(task); |
1163 | 1171 | ||
1164 | /* Make sure the mask is initialized first */ | 1172 | /* Make sure the mask is initialized first */ |
1165 | if (unlikely(!later_mask)) | 1173 | if (unlikely(!later_mask)) |
1166 | return -1; | 1174 | return -1; |
1167 | 1175 | ||
1168 | if (task->nr_cpus_allowed == 1) | 1176 | if (task->nr_cpus_allowed == 1) |
1169 | return -1; | 1177 | return -1; |
1170 | 1178 | ||
1171 | best_cpu = cpudl_find(&task_rq(task)->rd->cpudl, | 1179 | best_cpu = cpudl_find(&task_rq(task)->rd->cpudl, |
1172 | task, later_mask); | 1180 | task, later_mask); |
1173 | if (best_cpu == -1) | 1181 | if (best_cpu == -1) |
1174 | return -1; | 1182 | return -1; |
1175 | 1183 | ||
1176 | /* | 1184 | /* |
1177 | * If we are here, some target has been found, | 1185 | * If we are here, some target has been found, |
1178 | * the most suitable of which is cached in best_cpu. | 1186 | * the most suitable of which is cached in best_cpu. |
1179 | * This is, among the runqueues where the current tasks | 1187 | * This is, among the runqueues where the current tasks |
1180 | * have later deadlines than the task's one, the rq | 1188 | * have later deadlines than the task's one, the rq |
1181 | * with the latest possible one. | 1189 | * with the latest possible one. |
1182 | * | 1190 | * |
1183 | * Now we check how well this matches with task's | 1191 | * Now we check how well this matches with task's |
1184 | * affinity and system topology. | 1192 | * affinity and system topology. |
1185 | * | 1193 | * |
1186 | * The last cpu where the task run is our first | 1194 | * The last cpu where the task run is our first |
1187 | * guess, since it is most likely cache-hot there. | 1195 | * guess, since it is most likely cache-hot there. |
1188 | */ | 1196 | */ |
1189 | if (cpumask_test_cpu(cpu, later_mask)) | 1197 | if (cpumask_test_cpu(cpu, later_mask)) |
1190 | return cpu; | 1198 | return cpu; |
1191 | /* | 1199 | /* |
1192 | * Check if this_cpu is to be skipped (i.e., it is | 1200 | * Check if this_cpu is to be skipped (i.e., it is |
1193 | * not in the mask) or not. | 1201 | * not in the mask) or not. |
1194 | */ | 1202 | */ |
1195 | if (!cpumask_test_cpu(this_cpu, later_mask)) | 1203 | if (!cpumask_test_cpu(this_cpu, later_mask)) |
1196 | this_cpu = -1; | 1204 | this_cpu = -1; |
1197 | 1205 | ||
1198 | rcu_read_lock(); | 1206 | rcu_read_lock(); |
1199 | for_each_domain(cpu, sd) { | 1207 | for_each_domain(cpu, sd) { |
1200 | if (sd->flags & SD_WAKE_AFFINE) { | 1208 | if (sd->flags & SD_WAKE_AFFINE) { |
1201 | 1209 | ||
1202 | /* | 1210 | /* |
1203 | * If possible, preempting this_cpu is | 1211 | * If possible, preempting this_cpu is |
1204 | * cheaper than migrating. | 1212 | * cheaper than migrating. |
1205 | */ | 1213 | */ |
1206 | if (this_cpu != -1 && | 1214 | if (this_cpu != -1 && |
1207 | cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { | 1215 | cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { |
1208 | rcu_read_unlock(); | 1216 | rcu_read_unlock(); |
1209 | return this_cpu; | 1217 | return this_cpu; |
1210 | } | 1218 | } |
1211 | 1219 | ||
1212 | /* | 1220 | /* |
1213 | * Last chance: if best_cpu is valid and is | 1221 | * Last chance: if best_cpu is valid and is |
1214 | * in the mask, that becomes our choice. | 1222 | * in the mask, that becomes our choice. |
1215 | */ | 1223 | */ |
1216 | if (best_cpu < nr_cpu_ids && | 1224 | if (best_cpu < nr_cpu_ids && |
1217 | cpumask_test_cpu(best_cpu, sched_domain_span(sd))) { | 1225 | cpumask_test_cpu(best_cpu, sched_domain_span(sd))) { |
1218 | rcu_read_unlock(); | 1226 | rcu_read_unlock(); |
1219 | return best_cpu; | 1227 | return best_cpu; |
1220 | } | 1228 | } |
1221 | } | 1229 | } |
1222 | } | 1230 | } |
1223 | rcu_read_unlock(); | 1231 | rcu_read_unlock(); |
1224 | 1232 | ||
1225 | /* | 1233 | /* |
1226 | * At this point, all our guesses failed, we just return | 1234 | * At this point, all our guesses failed, we just return |
1227 | * 'something', and let the caller sort the things out. | 1235 | * 'something', and let the caller sort the things out. |
1228 | */ | 1236 | */ |
1229 | if (this_cpu != -1) | 1237 | if (this_cpu != -1) |
1230 | return this_cpu; | 1238 | return this_cpu; |
1231 | 1239 | ||
1232 | cpu = cpumask_any(later_mask); | 1240 | cpu = cpumask_any(later_mask); |
1233 | if (cpu < nr_cpu_ids) | 1241 | if (cpu < nr_cpu_ids) |
1234 | return cpu; | 1242 | return cpu; |
1235 | 1243 | ||
1236 | return -1; | 1244 | return -1; |
1237 | } | 1245 | } |
1238 | 1246 | ||
1239 | /* Locks the rq it finds */ | 1247 | /* Locks the rq it finds */ |
1240 | static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) | 1248 | static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) |
1241 | { | 1249 | { |
1242 | struct rq *later_rq = NULL; | 1250 | struct rq *later_rq = NULL; |
1243 | int tries; | 1251 | int tries; |
1244 | int cpu; | 1252 | int cpu; |
1245 | 1253 | ||
1246 | for (tries = 0; tries < DL_MAX_TRIES; tries++) { | 1254 | for (tries = 0; tries < DL_MAX_TRIES; tries++) { |
1247 | cpu = find_later_rq(task); | 1255 | cpu = find_later_rq(task); |
1248 | 1256 | ||
1249 | if ((cpu == -1) || (cpu == rq->cpu)) | 1257 | if ((cpu == -1) || (cpu == rq->cpu)) |
1250 | break; | 1258 | break; |
1251 | 1259 | ||
1252 | later_rq = cpu_rq(cpu); | 1260 | later_rq = cpu_rq(cpu); |
1253 | 1261 | ||
1254 | /* Retry if something changed. */ | 1262 | /* Retry if something changed. */ |
1255 | if (double_lock_balance(rq, later_rq)) { | 1263 | if (double_lock_balance(rq, later_rq)) { |
1256 | if (unlikely(task_rq(task) != rq || | 1264 | if (unlikely(task_rq(task) != rq || |
1257 | !cpumask_test_cpu(later_rq->cpu, | 1265 | !cpumask_test_cpu(later_rq->cpu, |
1258 | &task->cpus_allowed) || | 1266 | &task->cpus_allowed) || |
1259 | task_running(rq, task) || !task->on_rq)) { | 1267 | task_running(rq, task) || !task->on_rq)) { |
1260 | double_unlock_balance(rq, later_rq); | 1268 | double_unlock_balance(rq, later_rq); |
1261 | later_rq = NULL; | 1269 | later_rq = NULL; |
1262 | break; | 1270 | break; |
1263 | } | 1271 | } |
1264 | } | 1272 | } |
1265 | 1273 | ||
1266 | /* | 1274 | /* |
1267 | * If the rq we found has no -deadline task, or | 1275 | * If the rq we found has no -deadline task, or |
1268 | * its earliest one has a later deadline than our | 1276 | * its earliest one has a later deadline than our |
1269 | * task, the rq is a good one. | 1277 | * task, the rq is a good one. |
1270 | */ | 1278 | */ |
1271 | if (!later_rq->dl.dl_nr_running || | 1279 | if (!later_rq->dl.dl_nr_running || |
1272 | dl_time_before(task->dl.deadline, | 1280 | dl_time_before(task->dl.deadline, |
1273 | later_rq->dl.earliest_dl.curr)) | 1281 | later_rq->dl.earliest_dl.curr)) |
1274 | break; | 1282 | break; |
1275 | 1283 | ||
1276 | /* Otherwise we try again. */ | 1284 | /* Otherwise we try again. */ |
1277 | double_unlock_balance(rq, later_rq); | 1285 | double_unlock_balance(rq, later_rq); |
1278 | later_rq = NULL; | 1286 | later_rq = NULL; |
1279 | } | 1287 | } |
1280 | 1288 | ||
1281 | return later_rq; | 1289 | return later_rq; |
1282 | } | 1290 | } |
1283 | 1291 | ||
1284 | static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) | 1292 | static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) |
1285 | { | 1293 | { |
1286 | struct task_struct *p; | 1294 | struct task_struct *p; |
1287 | 1295 | ||
1288 | if (!has_pushable_dl_tasks(rq)) | 1296 | if (!has_pushable_dl_tasks(rq)) |
1289 | return NULL; | 1297 | return NULL; |
1290 | 1298 | ||
1291 | p = rb_entry(rq->dl.pushable_dl_tasks_leftmost, | 1299 | p = rb_entry(rq->dl.pushable_dl_tasks_leftmost, |
1292 | struct task_struct, pushable_dl_tasks); | 1300 | struct task_struct, pushable_dl_tasks); |
1293 | 1301 | ||
1294 | BUG_ON(rq->cpu != task_cpu(p)); | 1302 | BUG_ON(rq->cpu != task_cpu(p)); |
1295 | BUG_ON(task_current(rq, p)); | 1303 | BUG_ON(task_current(rq, p)); |
1296 | BUG_ON(p->nr_cpus_allowed <= 1); | 1304 | BUG_ON(p->nr_cpus_allowed <= 1); |
1297 | 1305 | ||
1298 | BUG_ON(!p->on_rq); | 1306 | BUG_ON(!p->on_rq); |
1299 | BUG_ON(!dl_task(p)); | 1307 | BUG_ON(!dl_task(p)); |
1300 | 1308 | ||
1301 | return p; | 1309 | return p; |
1302 | } | 1310 | } |
1303 | 1311 | ||
1304 | /* | 1312 | /* |
1305 | * See if the non running -deadline tasks on this rq | 1313 | * See if the non running -deadline tasks on this rq |
1306 | * can be sent to some other CPU where they can preempt | 1314 | * can be sent to some other CPU where they can preempt |
1307 | * and start executing. | 1315 | * and start executing. |
1308 | */ | 1316 | */ |
1309 | static int push_dl_task(struct rq *rq) | 1317 | static int push_dl_task(struct rq *rq) |
1310 | { | 1318 | { |
1311 | struct task_struct *next_task; | 1319 | struct task_struct *next_task; |
1312 | struct rq *later_rq; | 1320 | struct rq *later_rq; |
1313 | 1321 | ||
1314 | if (!rq->dl.overloaded) | 1322 | if (!rq->dl.overloaded) |
1315 | return 0; | 1323 | return 0; |
1316 | 1324 | ||
1317 | next_task = pick_next_pushable_dl_task(rq); | 1325 | next_task = pick_next_pushable_dl_task(rq); |
1318 | if (!next_task) | 1326 | if (!next_task) |
1319 | return 0; | 1327 | return 0; |
1320 | 1328 | ||
1321 | retry: | 1329 | retry: |
1322 | if (unlikely(next_task == rq->curr)) { | 1330 | if (unlikely(next_task == rq->curr)) { |
1323 | WARN_ON(1); | 1331 | WARN_ON(1); |
1324 | return 0; | 1332 | return 0; |
1325 | } | 1333 | } |
1326 | 1334 | ||
1327 | /* | 1335 | /* |
1328 | * If next_task preempts rq->curr, and rq->curr | 1336 | * If next_task preempts rq->curr, and rq->curr |
1329 | * can move away, it makes sense to just reschedule | 1337 | * can move away, it makes sense to just reschedule |
1330 | * without going further in pushing next_task. | 1338 | * without going further in pushing next_task. |
1331 | */ | 1339 | */ |
1332 | if (dl_task(rq->curr) && | 1340 | if (dl_task(rq->curr) && |
1333 | dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && | 1341 | dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && |
1334 | rq->curr->nr_cpus_allowed > 1) { | 1342 | rq->curr->nr_cpus_allowed > 1) { |
1335 | resched_task(rq->curr); | 1343 | resched_task(rq->curr); |
1336 | return 0; | 1344 | return 0; |
1337 | } | 1345 | } |
1338 | 1346 | ||
1339 | /* We might release rq lock */ | 1347 | /* We might release rq lock */ |
1340 | get_task_struct(next_task); | 1348 | get_task_struct(next_task); |
1341 | 1349 | ||
1342 | /* Will lock the rq it'll find */ | 1350 | /* Will lock the rq it'll find */ |
1343 | later_rq = find_lock_later_rq(next_task, rq); | 1351 | later_rq = find_lock_later_rq(next_task, rq); |
1344 | if (!later_rq) { | 1352 | if (!later_rq) { |
1345 | struct task_struct *task; | 1353 | struct task_struct *task; |
1346 | 1354 | ||
1347 | /* | 1355 | /* |
1348 | * We must check all this again, since | 1356 | * We must check all this again, since |
1349 | * find_lock_later_rq releases rq->lock and it is | 1357 | * find_lock_later_rq releases rq->lock and it is |
1350 | * then possible that next_task has migrated. | 1358 | * then possible that next_task has migrated. |
1351 | */ | 1359 | */ |
1352 | task = pick_next_pushable_dl_task(rq); | 1360 | task = pick_next_pushable_dl_task(rq); |
1353 | if (task_cpu(next_task) == rq->cpu && task == next_task) { | 1361 | if (task_cpu(next_task) == rq->cpu && task == next_task) { |
1354 | /* | 1362 | /* |
1355 | * The task is still there. We don't try | 1363 | * The task is still there. We don't try |
1356 | * again, some other cpu will pull it when ready. | 1364 | * again, some other cpu will pull it when ready. |
1357 | */ | 1365 | */ |
1358 | dequeue_pushable_dl_task(rq, next_task); | 1366 | dequeue_pushable_dl_task(rq, next_task); |
1359 | goto out; | 1367 | goto out; |
1360 | } | 1368 | } |
1361 | 1369 | ||
1362 | if (!task) | 1370 | if (!task) |
1363 | /* No more tasks */ | 1371 | /* No more tasks */ |
1364 | goto out; | 1372 | goto out; |
1365 | 1373 | ||
1366 | put_task_struct(next_task); | 1374 | put_task_struct(next_task); |
1367 | next_task = task; | 1375 | next_task = task; |
1368 | goto retry; | 1376 | goto retry; |
1369 | } | 1377 | } |
1370 | 1378 | ||
1371 | deactivate_task(rq, next_task, 0); | 1379 | deactivate_task(rq, next_task, 0); |
1372 | set_task_cpu(next_task, later_rq->cpu); | 1380 | set_task_cpu(next_task, later_rq->cpu); |
1373 | activate_task(later_rq, next_task, 0); | 1381 | activate_task(later_rq, next_task, 0); |
1374 | 1382 | ||
1375 | resched_task(later_rq->curr); | 1383 | resched_task(later_rq->curr); |
1376 | 1384 | ||
1377 | double_unlock_balance(rq, later_rq); | 1385 | double_unlock_balance(rq, later_rq); |
1378 | 1386 | ||
1379 | out: | 1387 | out: |
1380 | put_task_struct(next_task); | 1388 | put_task_struct(next_task); |
1381 | 1389 | ||
1382 | return 1; | 1390 | return 1; |
1383 | } | 1391 | } |
1384 | 1392 | ||
1385 | static void push_dl_tasks(struct rq *rq) | 1393 | static void push_dl_tasks(struct rq *rq) |
1386 | { | 1394 | { |
1387 | /* Terminates as it moves a -deadline task */ | 1395 | /* Terminates as it moves a -deadline task */ |
1388 | while (push_dl_task(rq)) | 1396 | while (push_dl_task(rq)) |
1389 | ; | 1397 | ; |
1390 | } | 1398 | } |
1391 | 1399 | ||
1392 | static int pull_dl_task(struct rq *this_rq) | 1400 | static int pull_dl_task(struct rq *this_rq) |
1393 | { | 1401 | { |
1394 | int this_cpu = this_rq->cpu, ret = 0, cpu; | 1402 | int this_cpu = this_rq->cpu, ret = 0, cpu; |
1395 | struct task_struct *p; | 1403 | struct task_struct *p; |
1396 | struct rq *src_rq; | 1404 | struct rq *src_rq; |
1397 | u64 dmin = LONG_MAX; | 1405 | u64 dmin = LONG_MAX; |
1398 | 1406 | ||
1399 | if (likely(!dl_overloaded(this_rq))) | 1407 | if (likely(!dl_overloaded(this_rq))) |
1400 | return 0; | 1408 | return 0; |
1401 | 1409 | ||
1402 | /* | 1410 | /* |
1403 | * Match the barrier from dl_set_overloaded; this guarantees that if we | 1411 | * Match the barrier from dl_set_overloaded; this guarantees that if we |
1404 | * see overloaded we must also see the dlo_mask bit. | 1412 | * see overloaded we must also see the dlo_mask bit. |
1405 | */ | 1413 | */ |
1406 | smp_rmb(); | 1414 | smp_rmb(); |
1407 | 1415 | ||
1408 | for_each_cpu(cpu, this_rq->rd->dlo_mask) { | 1416 | for_each_cpu(cpu, this_rq->rd->dlo_mask) { |
1409 | if (this_cpu == cpu) | 1417 | if (this_cpu == cpu) |
1410 | continue; | 1418 | continue; |
1411 | 1419 | ||
1412 | src_rq = cpu_rq(cpu); | 1420 | src_rq = cpu_rq(cpu); |
1413 | 1421 | ||
1414 | /* | 1422 | /* |
1415 | * It looks racy, abd it is! However, as in sched_rt.c, | 1423 | * It looks racy, abd it is! However, as in sched_rt.c, |
1416 | * we are fine with this. | 1424 | * we are fine with this. |
1417 | */ | 1425 | */ |
1418 | if (this_rq->dl.dl_nr_running && | 1426 | if (this_rq->dl.dl_nr_running && |
1419 | dl_time_before(this_rq->dl.earliest_dl.curr, | 1427 | dl_time_before(this_rq->dl.earliest_dl.curr, |
1420 | src_rq->dl.earliest_dl.next)) | 1428 | src_rq->dl.earliest_dl.next)) |
1421 | continue; | 1429 | continue; |
1422 | 1430 | ||
1423 | /* Might drop this_rq->lock */ | 1431 | /* Might drop this_rq->lock */ |
1424 | double_lock_balance(this_rq, src_rq); | 1432 | double_lock_balance(this_rq, src_rq); |
1425 | 1433 | ||
1426 | /* | 1434 | /* |
1427 | * If there are no more pullable tasks on the | 1435 | * If there are no more pullable tasks on the |
1428 | * rq, we're done with it. | 1436 | * rq, we're done with it. |
1429 | */ | 1437 | */ |
1430 | if (src_rq->dl.dl_nr_running <= 1) | 1438 | if (src_rq->dl.dl_nr_running <= 1) |
1431 | goto skip; | 1439 | goto skip; |
1432 | 1440 | ||
1433 | p = pick_next_earliest_dl_task(src_rq, this_cpu); | 1441 | p = pick_next_earliest_dl_task(src_rq, this_cpu); |
1434 | 1442 | ||
1435 | /* | 1443 | /* |
1436 | * We found a task to be pulled if: | 1444 | * We found a task to be pulled if: |
1437 | * - it preempts our current (if there's one), | 1445 | * - it preempts our current (if there's one), |
1438 | * - it will preempt the last one we pulled (if any). | 1446 | * - it will preempt the last one we pulled (if any). |
1439 | */ | 1447 | */ |
1440 | if (p && dl_time_before(p->dl.deadline, dmin) && | 1448 | if (p && dl_time_before(p->dl.deadline, dmin) && |
1441 | (!this_rq->dl.dl_nr_running || | 1449 | (!this_rq->dl.dl_nr_running || |
1442 | dl_time_before(p->dl.deadline, | 1450 | dl_time_before(p->dl.deadline, |
1443 | this_rq->dl.earliest_dl.curr))) { | 1451 | this_rq->dl.earliest_dl.curr))) { |
1444 | WARN_ON(p == src_rq->curr); | 1452 | WARN_ON(p == src_rq->curr); |
1445 | WARN_ON(!p->on_rq); | 1453 | WARN_ON(!p->on_rq); |
1446 | 1454 | ||
1447 | /* | 1455 | /* |
1448 | * Then we pull iff p has actually an earlier | 1456 | * Then we pull iff p has actually an earlier |
1449 | * deadline than the current task of its runqueue. | 1457 | * deadline than the current task of its runqueue. |
1450 | */ | 1458 | */ |
1451 | if (dl_time_before(p->dl.deadline, | 1459 | if (dl_time_before(p->dl.deadline, |
1452 | src_rq->curr->dl.deadline)) | 1460 | src_rq->curr->dl.deadline)) |
1453 | goto skip; | 1461 | goto skip; |
1454 | 1462 | ||
1455 | ret = 1; | 1463 | ret = 1; |
1456 | 1464 | ||
1457 | deactivate_task(src_rq, p, 0); | 1465 | deactivate_task(src_rq, p, 0); |
1458 | set_task_cpu(p, this_cpu); | 1466 | set_task_cpu(p, this_cpu); |
1459 | activate_task(this_rq, p, 0); | 1467 | activate_task(this_rq, p, 0); |
1460 | dmin = p->dl.deadline; | 1468 | dmin = p->dl.deadline; |
1461 | 1469 | ||
1462 | /* Is there any other task even earlier? */ | 1470 | /* Is there any other task even earlier? */ |
1463 | } | 1471 | } |
1464 | skip: | 1472 | skip: |
1465 | double_unlock_balance(this_rq, src_rq); | 1473 | double_unlock_balance(this_rq, src_rq); |
1466 | } | 1474 | } |
1467 | 1475 | ||
1468 | return ret; | 1476 | return ret; |
1469 | } | 1477 | } |
1470 | 1478 | ||
1471 | static void post_schedule_dl(struct rq *rq) | 1479 | static void post_schedule_dl(struct rq *rq) |
1472 | { | 1480 | { |
1473 | push_dl_tasks(rq); | 1481 | push_dl_tasks(rq); |
1474 | } | 1482 | } |
1475 | 1483 | ||
1476 | /* | 1484 | /* |
1477 | * Since the task is not running and a reschedule is not going to happen | 1485 | * Since the task is not running and a reschedule is not going to happen |
1478 | * anytime soon on its runqueue, we try pushing it away now. | 1486 | * anytime soon on its runqueue, we try pushing it away now. |
1479 | */ | 1487 | */ |
1480 | static void task_woken_dl(struct rq *rq, struct task_struct *p) | 1488 | static void task_woken_dl(struct rq *rq, struct task_struct *p) |
1481 | { | 1489 | { |
1482 | if (!task_running(rq, p) && | 1490 | if (!task_running(rq, p) && |
1483 | !test_tsk_need_resched(rq->curr) && | 1491 | !test_tsk_need_resched(rq->curr) && |
1484 | has_pushable_dl_tasks(rq) && | 1492 | has_pushable_dl_tasks(rq) && |
1485 | p->nr_cpus_allowed > 1 && | 1493 | p->nr_cpus_allowed > 1 && |
1486 | dl_task(rq->curr) && | 1494 | dl_task(rq->curr) && |
1487 | (rq->curr->nr_cpus_allowed < 2 || | 1495 | (rq->curr->nr_cpus_allowed < 2 || |
1488 | dl_entity_preempt(&rq->curr->dl, &p->dl))) { | 1496 | dl_entity_preempt(&rq->curr->dl, &p->dl))) { |
1489 | push_dl_tasks(rq); | 1497 | push_dl_tasks(rq); |
1490 | } | 1498 | } |
1491 | } | 1499 | } |
1492 | 1500 | ||
1493 | static void set_cpus_allowed_dl(struct task_struct *p, | 1501 | static void set_cpus_allowed_dl(struct task_struct *p, |
1494 | const struct cpumask *new_mask) | 1502 | const struct cpumask *new_mask) |
1495 | { | 1503 | { |
1496 | struct rq *rq; | 1504 | struct rq *rq; |
1497 | int weight; | 1505 | int weight; |
1498 | 1506 | ||
1499 | BUG_ON(!dl_task(p)); | 1507 | BUG_ON(!dl_task(p)); |
1500 | 1508 | ||
1501 | /* | 1509 | /* |
1502 | * Update only if the task is actually running (i.e., | 1510 | * Update only if the task is actually running (i.e., |
1503 | * it is on the rq AND it is not throttled). | 1511 | * it is on the rq AND it is not throttled). |
1504 | */ | 1512 | */ |
1505 | if (!on_dl_rq(&p->dl)) | 1513 | if (!on_dl_rq(&p->dl)) |
1506 | return; | 1514 | return; |
1507 | 1515 | ||
1508 | weight = cpumask_weight(new_mask); | 1516 | weight = cpumask_weight(new_mask); |
1509 | 1517 | ||
1510 | /* | 1518 | /* |
1511 | * Only update if the process changes its state from whether it | 1519 | * Only update if the process changes its state from whether it |
1512 | * can migrate or not. | 1520 | * can migrate or not. |
1513 | */ | 1521 | */ |
1514 | if ((p->nr_cpus_allowed > 1) == (weight > 1)) | 1522 | if ((p->nr_cpus_allowed > 1) == (weight > 1)) |
1515 | return; | 1523 | return; |
1516 | 1524 | ||
1517 | rq = task_rq(p); | 1525 | rq = task_rq(p); |
1518 | 1526 | ||
1519 | /* | 1527 | /* |
1520 | * The process used to be able to migrate OR it can now migrate | 1528 | * The process used to be able to migrate OR it can now migrate |
1521 | */ | 1529 | */ |
1522 | if (weight <= 1) { | 1530 | if (weight <= 1) { |
1523 | if (!task_current(rq, p)) | 1531 | if (!task_current(rq, p)) |
1524 | dequeue_pushable_dl_task(rq, p); | 1532 | dequeue_pushable_dl_task(rq, p); |
1525 | BUG_ON(!rq->dl.dl_nr_migratory); | 1533 | BUG_ON(!rq->dl.dl_nr_migratory); |
1526 | rq->dl.dl_nr_migratory--; | 1534 | rq->dl.dl_nr_migratory--; |
1527 | } else { | 1535 | } else { |
1528 | if (!task_current(rq, p)) | 1536 | if (!task_current(rq, p)) |
1529 | enqueue_pushable_dl_task(rq, p); | 1537 | enqueue_pushable_dl_task(rq, p); |
1530 | rq->dl.dl_nr_migratory++; | 1538 | rq->dl.dl_nr_migratory++; |
1531 | } | 1539 | } |
1532 | 1540 | ||
1533 | update_dl_migration(&rq->dl); | 1541 | update_dl_migration(&rq->dl); |
1534 | } | 1542 | } |
1535 | 1543 | ||
1536 | /* Assumes rq->lock is held */ | 1544 | /* Assumes rq->lock is held */ |
1537 | static void rq_online_dl(struct rq *rq) | 1545 | static void rq_online_dl(struct rq *rq) |
1538 | { | 1546 | { |
1539 | if (rq->dl.overloaded) | 1547 | if (rq->dl.overloaded) |
1540 | dl_set_overload(rq); | 1548 | dl_set_overload(rq); |
1541 | 1549 | ||
1542 | if (rq->dl.dl_nr_running > 0) | 1550 | if (rq->dl.dl_nr_running > 0) |
1543 | cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1); | 1551 | cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1); |
1544 | } | 1552 | } |
1545 | 1553 | ||
1546 | /* Assumes rq->lock is held */ | 1554 | /* Assumes rq->lock is held */ |
1547 | static void rq_offline_dl(struct rq *rq) | 1555 | static void rq_offline_dl(struct rq *rq) |
1548 | { | 1556 | { |
1549 | if (rq->dl.overloaded) | 1557 | if (rq->dl.overloaded) |
1550 | dl_clear_overload(rq); | 1558 | dl_clear_overload(rq); |
1551 | 1559 | ||
1552 | cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0); | 1560 | cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0); |
1553 | } | 1561 | } |
1554 | 1562 | ||
1555 | void init_sched_dl_class(void) | 1563 | void init_sched_dl_class(void) |
1556 | { | 1564 | { |
1557 | unsigned int i; | 1565 | unsigned int i; |
1558 | 1566 | ||
1559 | for_each_possible_cpu(i) | 1567 | for_each_possible_cpu(i) |
1560 | zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i), | 1568 | zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i), |
1561 | GFP_KERNEL, cpu_to_node(i)); | 1569 | GFP_KERNEL, cpu_to_node(i)); |
1562 | } | 1570 | } |
1563 | 1571 | ||
1564 | #endif /* CONFIG_SMP */ | 1572 | #endif /* CONFIG_SMP */ |
1565 | 1573 | ||
1566 | static void switched_from_dl(struct rq *rq, struct task_struct *p) | 1574 | static void switched_from_dl(struct rq *rq, struct task_struct *p) |
1567 | { | 1575 | { |
1568 | if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy)) | 1576 | if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy)) |
1569 | hrtimer_try_to_cancel(&p->dl.dl_timer); | 1577 | hrtimer_try_to_cancel(&p->dl.dl_timer); |
1570 | 1578 | ||
1571 | #ifdef CONFIG_SMP | 1579 | #ifdef CONFIG_SMP |
1572 | /* | 1580 | /* |
1573 | * Since this might be the only -deadline task on the rq, | 1581 | * Since this might be the only -deadline task on the rq, |
1574 | * this is the right place to try to pull some other one | 1582 | * this is the right place to try to pull some other one |
1575 | * from an overloaded cpu, if any. | 1583 | * from an overloaded cpu, if any. |
1576 | */ | 1584 | */ |
1577 | if (!rq->dl.dl_nr_running) | 1585 | if (!rq->dl.dl_nr_running) |
1578 | pull_dl_task(rq); | 1586 | pull_dl_task(rq); |
1579 | #endif | 1587 | #endif |
1580 | } | 1588 | } |
1581 | 1589 | ||
1582 | /* | 1590 | /* |
1583 | * When switching to -deadline, we may overload the rq, then | 1591 | * When switching to -deadline, we may overload the rq, then |
1584 | * we try to push someone off, if possible. | 1592 | * we try to push someone off, if possible. |
1585 | */ | 1593 | */ |
1586 | static void switched_to_dl(struct rq *rq, struct task_struct *p) | 1594 | static void switched_to_dl(struct rq *rq, struct task_struct *p) |
1587 | { | 1595 | { |
1588 | int check_resched = 1; | 1596 | int check_resched = 1; |
1589 | 1597 | ||
1590 | /* | 1598 | /* |
1591 | * If p is throttled, don't consider the possibility | 1599 | * If p is throttled, don't consider the possibility |
1592 | * of preempting rq->curr, the check will be done right | 1600 | * of preempting rq->curr, the check will be done right |
1593 | * after its runtime will get replenished. | 1601 | * after its runtime will get replenished. |
1594 | */ | 1602 | */ |
1595 | if (unlikely(p->dl.dl_throttled)) | 1603 | if (unlikely(p->dl.dl_throttled)) |
1596 | return; | 1604 | return; |
1597 | 1605 | ||
1598 | if (p->on_rq && rq->curr != p) { | 1606 | if (p->on_rq && rq->curr != p) { |
1599 | #ifdef CONFIG_SMP | 1607 | #ifdef CONFIG_SMP |
1600 | if (rq->dl.overloaded && push_dl_task(rq) && rq != task_rq(p)) | 1608 | if (rq->dl.overloaded && push_dl_task(rq) && rq != task_rq(p)) |
1601 | /* Only reschedule if pushing failed */ | 1609 | /* Only reschedule if pushing failed */ |
1602 | check_resched = 0; | 1610 | check_resched = 0; |
1603 | #endif /* CONFIG_SMP */ | 1611 | #endif /* CONFIG_SMP */ |
1604 | if (check_resched && task_has_dl_policy(rq->curr)) | 1612 | if (check_resched && task_has_dl_policy(rq->curr)) |
1605 | check_preempt_curr_dl(rq, p, 0); | 1613 | check_preempt_curr_dl(rq, p, 0); |
1606 | } | 1614 | } |
1607 | } | 1615 | } |
1608 | 1616 | ||
1609 | /* | 1617 | /* |
1610 | * If the scheduling parameters of a -deadline task changed, | 1618 | * If the scheduling parameters of a -deadline task changed, |
1611 | * a push or pull operation might be needed. | 1619 | * a push or pull operation might be needed. |
1612 | */ | 1620 | */ |
1613 | static void prio_changed_dl(struct rq *rq, struct task_struct *p, | 1621 | static void prio_changed_dl(struct rq *rq, struct task_struct *p, |
1614 | int oldprio) | 1622 | int oldprio) |
1615 | { | 1623 | { |
1616 | if (p->on_rq || rq->curr == p) { | 1624 | if (p->on_rq || rq->curr == p) { |
1617 | #ifdef CONFIG_SMP | 1625 | #ifdef CONFIG_SMP |
1618 | /* | 1626 | /* |
1619 | * This might be too much, but unfortunately | 1627 | * This might be too much, but unfortunately |
1620 | * we don't have the old deadline value, and | 1628 | * we don't have the old deadline value, and |
1621 | * we can't argue if the task is increasing | 1629 | * we can't argue if the task is increasing |
1622 | * or lowering its prio, so... | 1630 | * or lowering its prio, so... |
1623 | */ | 1631 | */ |
1624 | if (!rq->dl.overloaded) | 1632 | if (!rq->dl.overloaded) |
1625 | pull_dl_task(rq); | 1633 | pull_dl_task(rq); |
1626 | 1634 | ||
1627 | /* | 1635 | /* |
1628 | * If we now have a earlier deadline task than p, | 1636 | * If we now have a earlier deadline task than p, |
1629 | * then reschedule, provided p is still on this | 1637 | * then reschedule, provided p is still on this |
1630 | * runqueue. | 1638 | * runqueue. |
1631 | */ | 1639 | */ |
1632 | if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) && | 1640 | if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) && |
1633 | rq->curr == p) | 1641 | rq->curr == p) |
1634 | resched_task(p); | 1642 | resched_task(p); |
1635 | #else | 1643 | #else |
1636 | /* | 1644 | /* |
1637 | * Again, we don't know if p has a earlier | 1645 | * Again, we don't know if p has a earlier |
1638 | * or later deadline, so let's blindly set a | 1646 | * or later deadline, so let's blindly set a |
1639 | * (maybe not needed) rescheduling point. | 1647 | * (maybe not needed) rescheduling point. |
1640 | */ | 1648 | */ |
1641 | resched_task(p); | 1649 | resched_task(p); |
1642 | #endif /* CONFIG_SMP */ | 1650 | #endif /* CONFIG_SMP */ |
1643 | } else | 1651 | } else |
1644 | switched_to_dl(rq, p); | 1652 | switched_to_dl(rq, p); |
1645 | } | 1653 | } |
1646 | 1654 | ||
1647 | const struct sched_class dl_sched_class = { | 1655 | const struct sched_class dl_sched_class = { |
1648 | .next = &rt_sched_class, | 1656 | .next = &rt_sched_class, |
1649 | .enqueue_task = enqueue_task_dl, | 1657 | .enqueue_task = enqueue_task_dl, |
1650 | .dequeue_task = dequeue_task_dl, | 1658 | .dequeue_task = dequeue_task_dl, |
1651 | .yield_task = yield_task_dl, | 1659 | .yield_task = yield_task_dl, |
1652 | 1660 | ||
1653 | .check_preempt_curr = check_preempt_curr_dl, | 1661 | .check_preempt_curr = check_preempt_curr_dl, |
1654 | 1662 | ||
1655 | .pick_next_task = pick_next_task_dl, | 1663 | .pick_next_task = pick_next_task_dl, |
1656 | .put_prev_task = put_prev_task_dl, | 1664 | .put_prev_task = put_prev_task_dl, |
1657 | 1665 | ||
1658 | #ifdef CONFIG_SMP | 1666 | #ifdef CONFIG_SMP |
1659 | .select_task_rq = select_task_rq_dl, | 1667 | .select_task_rq = select_task_rq_dl, |
1660 | .set_cpus_allowed = set_cpus_allowed_dl, | 1668 | .set_cpus_allowed = set_cpus_allowed_dl, |
1661 | .rq_online = rq_online_dl, | 1669 | .rq_online = rq_online_dl, |
1662 | .rq_offline = rq_offline_dl, | 1670 | .rq_offline = rq_offline_dl, |
1663 | .post_schedule = post_schedule_dl, | 1671 | .post_schedule = post_schedule_dl, |
1664 | .task_woken = task_woken_dl, | 1672 | .task_woken = task_woken_dl, |
1665 | #endif | 1673 | #endif |
1666 | 1674 | ||
1667 | .set_curr_task = set_curr_task_dl, | 1675 | .set_curr_task = set_curr_task_dl, |
1668 | .task_tick = task_tick_dl, | 1676 | .task_tick = task_tick_dl, |
1669 | .task_fork = task_fork_dl, | 1677 | .task_fork = task_fork_dl, |
1670 | .task_dead = task_dead_dl, | 1678 | .task_dead = task_dead_dl, |
1671 | 1679 | ||
1672 | .prio_changed = prio_changed_dl, | 1680 | .prio_changed = prio_changed_dl, |
1673 | .switched_from = switched_from_dl, | 1681 | .switched_from = switched_from_dl, |
1674 | .switched_to = switched_to_dl, | 1682 | .switched_to = switched_to_dl, |
1675 | }; | 1683 | }; |
1676 | 1684 |
-
mentioned in commit 8ac38a
-
mentioned in commit 8ac38a
-
mentioned in commit 8ac38a
-
mentioned in commit 8ac38a
-
mentioned in commit 8ac38a
-
mentioned in commit 3960c8
-
mentioned in commit 3960c8
-
mentioned in commit 3960c8
-
mentioned in commit 3960c8
-
mentioned in commit 3960c8
-
mentioned in commit 3960c8
-
mentioned in commit 3960c8
-
mentioned in commit 3960c8
-
mentioned in commit 3960c8
-
mentioned in commit 3960c8
-
mentioned in commit 3960c8
-
mentioned in commit 3960c8
-
mentioned in commit 3960c8