Blame view
kernel/sched_stats.h
10.6 KB
425e0968a
|
1 2 3 4 5 6 |
#ifdef CONFIG_SCHEDSTATS /* * bump this up when changing the output format or the meaning of an existing * format, so that tools can adapt (or abort) */ |
67aa0f767
|
7 |
#define SCHEDSTAT_VERSION 15 |
425e0968a
|
8 9 10 11 |
static int show_schedstat(struct seq_file *seq, void *v) { int cpu; |
b96890529
|
12 |
int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9; |
39106dcf8
|
13 14 15 16 |
char *mask_str = kmalloc(mask_len, GFP_KERNEL); if (mask_str == NULL) return -ENOMEM; |
425e0968a
|
17 18 19 20 21 22 23 24 25 |
seq_printf(seq, "version %d ", SCHEDSTAT_VERSION); seq_printf(seq, "timestamp %lu ", jiffies); for_each_online_cpu(cpu) { struct rq *rq = cpu_rq(cpu); #ifdef CONFIG_SMP struct sched_domain *sd; |
2d72376b3
|
26 |
int dcount = 0; |
425e0968a
|
27 28 29 30 |
#endif /* runqueue-specific stats */ seq_printf(seq, |
67aa0f767
|
31 32 |
"cpu%d %u %u %u %u %u %u %llu %llu %lu", cpu, rq->yld_count, |
2d72376b3
|
33 34 |
rq->sched_switch, rq->sched_count, rq->sched_goidle, rq->ttwu_count, rq->ttwu_local, |
9c2c48020
|
35 |
rq->rq_cpu_time, |
2d72376b3
|
36 |
rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount); |
425e0968a
|
37 38 39 40 41 42 43 44 45 |
seq_printf(seq, " "); #ifdef CONFIG_SMP /* domain-specific stats */ preempt_disable(); for_each_domain(cpu, sd) { enum cpu_idle_type itype; |
425e0968a
|
46 |
|
758b2cdc6
|
47 |
cpumask_scnprintf(mask_str, mask_len, |
968ea6d80
|
48 |
sched_domain_span(sd)); |
2d72376b3
|
49 |
seq_printf(seq, "domain%d %s", dcount++, mask_str); |
425e0968a
|
50 51 |
for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; itype++) { |
480b9434c
|
52 |
seq_printf(seq, " %u %u %u %u %u %u %u %u", |
2d72376b3
|
53 |
sd->lb_count[itype], |
425e0968a
|
54 55 56 57 58 59 60 61 |
sd->lb_balanced[itype], sd->lb_failed[itype], sd->lb_imbalance[itype], sd->lb_gained[itype], sd->lb_hot_gained[itype], sd->lb_nobusyq[itype], sd->lb_nobusyg[itype]); } |
f95e0d1c2
|
62 63 64 |
seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u %u ", |
2d72376b3
|
65 66 67 |
sd->alb_count, sd->alb_failed, sd->alb_pushed, sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed, sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed, |
425e0968a
|
68 69 70 71 72 73 |
sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance); } preempt_enable(); #endif } |
c6fba5451
|
74 |
kfree(mask_str); |
425e0968a
|
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
return 0; } static int schedstat_open(struct inode *inode, struct file *file) { unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); char *buf = kmalloc(size, GFP_KERNEL); struct seq_file *m; int res; if (!buf) return -ENOMEM; res = single_open(file, show_schedstat, NULL); if (!res) { m = file->private_data; m->buf = buf; m->size = size; } else kfree(buf); return res; } |
b5aadf7f1
|
96 |
static const struct file_operations proc_schedstat_operations = { |
425e0968a
|
97 98 99 100 101 |
.open = schedstat_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; |
b5aadf7f1
|
102 103 104 105 106 107 |
static int __init proc_schedstat_init(void) { proc_create("schedstat", 0, NULL, &proc_schedstat_operations); return 0; } module_init(proc_schedstat_init); |
425e0968a
|
108 109 110 111 112 113 114 115 |
/* * Expects runqueue lock to be held for atomicity of update */ static inline void rq_sched_info_arrive(struct rq *rq, unsigned long long delta) { if (rq) { rq->rq_sched_info.run_delay += delta; |
2d72376b3
|
116 |
rq->rq_sched_info.pcount++; |
425e0968a
|
117 118 119 120 121 122 123 124 125 126 |
} } /* * Expects runqueue lock to be held for atomicity of update */ static inline void rq_sched_info_depart(struct rq *rq, unsigned long long delta) { if (rq) |
9c2c48020
|
127 |
rq->rq_cpu_time += delta; |
425e0968a
|
128 |
} |
46ac22bab
|
129 130 131 132 133 134 135 |
static inline void rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) { if (rq) rq->rq_sched_info.run_delay += delta; } |
425e0968a
|
136 137 |
# define schedstat_inc(rq, field) do { (rq)->field++; } while (0) # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) |
c3c701196
|
138 |
# define schedstat_set(var, val) do { var = (val); } while (0) |
425e0968a
|
139 140 141 142 143 |
#else /* !CONFIG_SCHEDSTATS */ static inline void rq_sched_info_arrive(struct rq *rq, unsigned long long delta) {} static inline void |
46ac22bab
|
144 145 146 |
rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) {} static inline void |
425e0968a
|
147 148 149 150 |
rq_sched_info_depart(struct rq *rq, unsigned long long delta) {} # define schedstat_inc(rq, field) do { } while (0) # define schedstat_add(rq, field, amt) do { } while (0) |
c3c701196
|
151 |
# define schedstat_set(var, val) do { } while (0) |
425e0968a
|
152 |
#endif |
9a41785cc
|
153 |
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
46ac22bab
|
154 155 156 157 |
static inline void sched_info_reset_dequeued(struct task_struct *t) { t->sched_info.last_queued = 0; } |
425e0968a
|
158 159 160 161 162 163 164 165 166 |
/* * Called when a process is dequeued from the active array and given * the cpu. We should note that with the exception of interactive * tasks, the expired queue will become the active queue after the active * queue is empty, without explicitly dequeuing and requeuing tasks in the * expired queue. (Interactive tasks may be requeued directly to the * active queue, thus delaying tasks in the expired queue from running; * see scheduler_tick()). * |
46ac22bab
|
167 168 169 170 |
* Though we are interested in knowing how long it was from the *first* time a * task was queued to the time that it finally hit a cpu, we call this routine * from dequeue_task() to account for possible rq->clock skew across cpus. The * delta taken on each cpu would annul the skew. |
425e0968a
|
171 172 173 |
*/ static inline void sched_info_dequeued(struct task_struct *t) { |
46ac22bab
|
174 175 176 177 178 179 180 181 182 |
unsigned long long now = task_rq(t)->clock, delta = 0; if (unlikely(sched_info_on())) if (t->sched_info.last_queued) delta = now - t->sched_info.last_queued; sched_info_reset_dequeued(t); t->sched_info.run_delay += delta; rq_sched_info_dequeued(task_rq(t), delta); |
425e0968a
|
183 184 185 186 187 188 189 190 191 |
} /* * Called when a task finally hits the cpu. We can now calculate how * long it was waiting to run. We also note when it began so that we * can keep stats on how long its timeslice is. */ static void sched_info_arrive(struct task_struct *t) { |
9a41785cc
|
192 |
unsigned long long now = task_rq(t)->clock, delta = 0; |
425e0968a
|
193 194 195 |
if (t->sched_info.last_queued) delta = now - t->sched_info.last_queued; |
46ac22bab
|
196 |
sched_info_reset_dequeued(t); |
425e0968a
|
197 198 |
t->sched_info.run_delay += delta; t->sched_info.last_arrival = now; |
2d72376b3
|
199 |
t->sched_info.pcount++; |
425e0968a
|
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
rq_sched_info_arrive(task_rq(t), delta); } /* * Called when a process is queued into either the active or expired * array. The time is noted and later used to determine how long we * had to wait for us to reach the cpu. Since the expired queue will * become the active queue after active queue is empty, without dequeuing * and requeuing any tasks, we are interested in queuing to either. It * is unusual but not impossible for tasks to be dequeued and immediately * requeued in the same or another array: this can happen in sched_yield(), * set_user_nice(), and even load_balance() as it moves tasks from runqueue * to runqueue. * * This function is only called from enqueue_task(), but also only updates * the timestamp if it is already not set. It's assumed that * sched_info_dequeued() will clear that stamp when appropriate. */ static inline void sched_info_queued(struct task_struct *t) { if (unlikely(sched_info_on())) if (!t->sched_info.last_queued) |
9a41785cc
|
223 |
t->sched_info.last_queued = task_rq(t)->clock; |
425e0968a
|
224 225 226 227 228 |
} /* * Called when a process ceases being the active-running process, either * voluntarily or involuntarily. Now we can calculate how long we ran. |
d4abc238c
|
229 230 231 |
* Also, if the process is still in the TASK_RUNNING state, call * sched_info_queued() to mark that it has now again started waiting on * the runqueue. |
425e0968a
|
232 233 234 |
*/ static inline void sched_info_depart(struct task_struct *t) { |
9a41785cc
|
235 236 |
unsigned long long delta = task_rq(t)->clock - t->sched_info.last_arrival; |
425e0968a
|
237 |
|
425e0968a
|
238 |
rq_sched_info_depart(task_rq(t), delta); |
d4abc238c
|
239 240 241 |
if (t->state == TASK_RUNNING) sched_info_queued(t); |
425e0968a
|
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 |
} /* * Called when tasks are switched involuntarily due, typically, to expiring * their time slice. (This may also be called when switching to or from * the idle task.) We are only called when prev != next. */ static inline void __sched_info_switch(struct task_struct *prev, struct task_struct *next) { struct rq *rq = task_rq(prev); /* * prev now departs the cpu. It's not interesting to record * stats about how efficient we were at scheduling the idle * process, however. */ if (prev != rq->idle) sched_info_depart(prev); if (next != rq->idle) sched_info_arrive(next); } static inline void sched_info_switch(struct task_struct *prev, struct task_struct *next) { if (unlikely(sched_info_on())) __sched_info_switch(prev, next); } #else |
46ac22bab
|
272 273 274 275 |
#define sched_info_queued(t) do { } while (0) #define sched_info_reset_dequeued(t) do { } while (0) #define sched_info_dequeued(t) do { } while (0) #define sched_info_switch(t, next) do { } while (0) |
9a41785cc
|
276 |
#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ |
425e0968a
|
277 |
|
bb34d92f6
|
278 279 280 281 282 |
/* * The following are functions that support scheduler-internal time accounting. * These functions are generally called at the timer tick. None of this depends * on CONFIG_SCHEDSTATS. */ |
bb34d92f6
|
283 |
/** |
7086efe1c
|
284 |
* account_group_user_time - Maintain utime for a thread group. |
bb34d92f6
|
285 |
* |
7086efe1c
|
286 287 288 |
* @tsk: Pointer to task structure. * @cputime: Time value by which to increment the utime field of the * thread_group_cputime structure. |
bb34d92f6
|
289 290 291 292 |
* * If thread group time is being maintained, get the structure for the * running CPU and update the utime field there. */ |
7086efe1c
|
293 294 |
static inline void account_group_user_time(struct task_struct *tsk, cputime_t cputime) |
bb34d92f6
|
295 |
{ |
4cd4c1b40
|
296 |
struct thread_group_cputimer *cputimer; |
7086efe1c
|
297 |
|
ad133ba3d
|
298 299 |
/* tsk == current, ensure it is safe to use ->signal */ if (unlikely(tsk->exit_state)) |
7086efe1c
|
300 |
return; |
ad133ba3d
|
301 |
|
4cd4c1b40
|
302 |
cputimer = &tsk->signal->cputimer; |
bb34d92f6
|
303 |
|
4cd4c1b40
|
304 305 306 307 308 309 310 |
if (!cputimer->running) return; spin_lock(&cputimer->lock); cputimer->cputime.utime = cputime_add(cputimer->cputime.utime, cputime); spin_unlock(&cputimer->lock); |
bb34d92f6
|
311 312 313 |
} /** |
7086efe1c
|
314 |
* account_group_system_time - Maintain stime for a thread group. |
bb34d92f6
|
315 |
* |
7086efe1c
|
316 317 318 |
* @tsk: Pointer to task structure. * @cputime: Time value by which to increment the stime field of the * thread_group_cputime structure. |
bb34d92f6
|
319 320 321 322 |
* * If thread group time is being maintained, get the structure for the * running CPU and update the stime field there. */ |
7086efe1c
|
323 324 |
static inline void account_group_system_time(struct task_struct *tsk, cputime_t cputime) |
bb34d92f6
|
325 |
{ |
4cd4c1b40
|
326 |
struct thread_group_cputimer *cputimer; |
7086efe1c
|
327 |
|
ad133ba3d
|
328 329 |
/* tsk == current, ensure it is safe to use ->signal */ if (unlikely(tsk->exit_state)) |
7086efe1c
|
330 |
return; |
ad133ba3d
|
331 |
|
4cd4c1b40
|
332 333 334 335 |
cputimer = &tsk->signal->cputimer; if (!cputimer->running) return; |
bb34d92f6
|
336 |
|
4cd4c1b40
|
337 338 339 340 |
spin_lock(&cputimer->lock); cputimer->cputime.stime = cputime_add(cputimer->cputime.stime, cputime); spin_unlock(&cputimer->lock); |
bb34d92f6
|
341 342 343 |
} /** |
7086efe1c
|
344 |
* account_group_exec_runtime - Maintain exec runtime for a thread group. |
bb34d92f6
|
345 |
* |
7086efe1c
|
346 |
* @tsk: Pointer to task structure. |
bb34d92f6
|
347 |
* @ns: Time value by which to increment the sum_exec_runtime field |
7086efe1c
|
348 |
* of the thread_group_cputime structure. |
bb34d92f6
|
349 350 351 352 |
* * If thread group time is being maintained, get the structure for the * running CPU and update the sum_exec_runtime field there. */ |
7086efe1c
|
353 354 |
static inline void account_group_exec_runtime(struct task_struct *tsk, unsigned long long ns) |
bb34d92f6
|
355 |
{ |
4cd4c1b40
|
356 |
struct thread_group_cputimer *cputimer; |
7086efe1c
|
357 358 359 |
struct signal_struct *sig; sig = tsk->signal; |
ad133ba3d
|
360 361 |
/* see __exit_signal()->task_rq_unlock_wait() */ barrier(); |
7086efe1c
|
362 363 |
if (unlikely(!sig)) return; |
ad133ba3d
|
364 |
|
4cd4c1b40
|
365 366 367 368 |
cputimer = &sig->cputimer; if (!cputimer->running) return; |
bb34d92f6
|
369 |
|
4cd4c1b40
|
370 371 372 |
spin_lock(&cputimer->lock); cputimer->cputime.sum_exec_runtime += ns; spin_unlock(&cputimer->lock); |
bb34d92f6
|
373 |
} |