Commit 46ac22bab42cc868b9c1d0e915ddbc8e8065a44d
Committed by
Ingo Molnar
1 parent
2087a1ad82
Exists in
master
and in
39 other branches
sched: fix accounting in task delay accounting & migration
On Thu, Jun 19, 2008 at 12:27:14PM +0200, Peter Zijlstra wrote: > On Thu, 2008-06-05 at 10:50 +0530, Ankita Garg wrote: > > > Thanks Peter for the explanation... > > > > I agree with the above and that is the reason why I did not see weird > > values with cpu_time. But, run_delay still would suffer skews as the end > > points for delta could be taken on different cpus due to migration (more > > so on RT kernel due to the push-pull operations). With the below patch, > > I could not reproduce the issue I had seen earlier. After every dequeue, > > we take the delta and start wait measurements from zero when moved to a > > different rq. > > OK, so task delay delay accounting is broken because it doesn't take > migration into account. > > What you've done is make it symmetric wrt enqueue, and account it like > > cpu0 cpu1 > > enqueue > <wait-d1> > dequeue > enqueue > <wait-d2> > run > > Where you add both d1 and d2 to the run_delay,.. right? > Thanks for reviewing the patch. The above is exactly what I have done. > This seems like a good fix, however it looks like the patch will break > compilation in !CONFIG_SCHEDSTATS && !CONFIG_TASK_DELAY_ACCT, of it > failing to provide a stub for sched_info_dequeue() in that case. Fixed. Pl. find the new patch below. Signed-off-by: Ankita Garg <ankita@in.ibm.com> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Gregory Haskins <ghaskins@novell.com> Cc: rostedt@goodmis.org Cc: suresh.b.siddha@intel.com Cc: aneesh.kumar@linux.vnet.ibm.com Cc: dhaval@linux.vnet.ibm.com Cc: vatsa@linux.vnet.ibm.com Cc: David Bahi <DBahi@novell.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 2 changed files with 34 additions and 9 deletions Side-by-side Diff
kernel/sched.c
kernel/sched_stats.h
... | ... | @@ -118,6 +118,13 @@ |
118 | 118 | if (rq) |
119 | 119 | rq->rq_sched_info.cpu_time += delta; |
120 | 120 | } |
121 | + | |
122 | +static inline void | |
123 | +rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) | |
124 | +{ | |
125 | + if (rq) | |
126 | + rq->rq_sched_info.run_delay += delta; | |
127 | +} | |
121 | 128 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) |
122 | 129 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) |
123 | 130 | # define schedstat_set(var, val) do { var = (val); } while (0) |
... | ... | @@ -126,6 +133,9 @@ |
126 | 133 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) |
127 | 134 | {} |
128 | 135 | static inline void |
136 | +rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) | |
137 | +{} | |
138 | +static inline void | |
129 | 139 | rq_sched_info_depart(struct rq *rq, unsigned long long delta) |
130 | 140 | {} |
131 | 141 | # define schedstat_inc(rq, field) do { } while (0) |
... | ... | @@ -134,6 +144,11 @@ |
134 | 144 | #endif |
135 | 145 | |
136 | 146 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
147 | +static inline void sched_info_reset_dequeued(struct task_struct *t) | |
148 | +{ | |
149 | + t->sched_info.last_queued = 0; | |
150 | +} | |
151 | + | |
137 | 152 | /* |
138 | 153 | * Called when a process is dequeued from the active array and given |
139 | 154 | * the cpu. We should note that with the exception of interactive |
140 | 155 | |
... | ... | @@ -143,15 +158,22 @@ |
143 | 158 | * active queue, thus delaying tasks in the expired queue from running; |
144 | 159 | * see scheduler_tick()). |
145 | 160 | * |
146 | - * This function is only called from sched_info_arrive(), rather than | |
147 | - * dequeue_task(). Even though a task may be queued and dequeued multiple | |
148 | - * times as it is shuffled about, we're really interested in knowing how | |
149 | - * long it was from the *first* time it was queued to the time that it | |
150 | - * finally hit a cpu. | |
161 | + * Though we are interested in knowing how long it was from the *first* time a | |
162 | + * task was queued to the time that it finally hit a cpu, we call this routine | |
163 | + * from dequeue_task() to account for possible rq->clock skew across cpus. The | |
164 | + * delta taken on each cpu would annul the skew. | |
151 | 165 | */ |
152 | 166 | static inline void sched_info_dequeued(struct task_struct *t) |
153 | 167 | { |
154 | - t->sched_info.last_queued = 0; | |
168 | + unsigned long long now = task_rq(t)->clock, delta = 0; | |
169 | + | |
170 | + if (unlikely(sched_info_on())) | |
171 | + if (t->sched_info.last_queued) | |
172 | + delta = now - t->sched_info.last_queued; | |
173 | + sched_info_reset_dequeued(t); | |
174 | + t->sched_info.run_delay += delta; | |
175 | + | |
176 | + rq_sched_info_dequeued(task_rq(t), delta); | |
155 | 177 | } |
156 | 178 | |
157 | 179 | /* |
... | ... | @@ -165,7 +187,7 @@ |
165 | 187 | |
166 | 188 | if (t->sched_info.last_queued) |
167 | 189 | delta = now - t->sched_info.last_queued; |
168 | - sched_info_dequeued(t); | |
190 | + sched_info_reset_dequeued(t); | |
169 | 191 | t->sched_info.run_delay += delta; |
170 | 192 | t->sched_info.last_arrival = now; |
171 | 193 | t->sched_info.pcount++; |
... | ... | @@ -242,7 +264,9 @@ |
242 | 264 | __sched_info_switch(prev, next); |
243 | 265 | } |
244 | 266 | #else |
245 | -#define sched_info_queued(t) do { } while (0) | |
246 | -#define sched_info_switch(t, next) do { } while (0) | |
267 | +#define sched_info_queued(t) do { } while (0) | |
268 | +#define sched_info_reset_dequeued(t) do { } while (0) | |
269 | +#define sched_info_dequeued(t) do { } while (0) | |
270 | +#define sched_info_switch(t, next) do { } while (0) | |
247 | 271 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ |