Commit 297c5d92634c809cef23d73e7b2556f2528ff7e2
Committed by
Linus Torvalds
1 parent
0c18d7a5df
Exists in
master
and in
4 other branches
task IO accounting: provide distinct tgid/tid I/O statistics
Report per-thread I/O statistics in /proc/pid/task/tid/io and aggregate parent I/O statistics in /proc/pid/io. This approach follows the same model used to account per-process and per-thread CPU times. As a practial application, this allows for example to quickly find the top I/O consumer when a process spawns many child threads that perform the actual I/O work, because the aggregated I/O statistics can always be found in /proc/pid/io. [ Oleg Nesterov points out that we should check that the task is still alive before we iterate over the threads, but also says that we can do that fixup on top of this later. - Linus ] Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Andrea Righi <righi.andrea@gmail.com> Cc: Matt Heaton <matt@hostmonster.com> Cc: Shailabh Nagar <nagar@watson.ibm.com> Acked-by-with-comments: Oleg Nesterov <oleg@tv-sign.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 4 changed files with 108 additions and 15 deletions Side-by-side Diff
fs/proc/base.c
... | ... | @@ -2376,30 +2376,83 @@ |
2376 | 2376 | } |
2377 | 2377 | |
2378 | 2378 | #ifdef CONFIG_TASK_IO_ACCOUNTING |
2379 | -static int proc_pid_io_accounting(struct task_struct *task, char *buffer) | |
2379 | +static int do_io_accounting(struct task_struct *task, char *buffer, int whole) | |
2380 | 2380 | { |
2381 | + u64 rchar, wchar, syscr, syscw; | |
2382 | + struct task_io_accounting ioac; | |
2383 | + | |
2384 | + if (!whole) { | |
2385 | + rchar = task->rchar; | |
2386 | + wchar = task->wchar; | |
2387 | + syscr = task->syscr; | |
2388 | + syscw = task->syscw; | |
2389 | + memcpy(&ioac, &task->ioac, sizeof(ioac)); | |
2390 | + } else { | |
2391 | + unsigned long flags; | |
2392 | + struct task_struct *t = task; | |
2393 | + rchar = wchar = syscr = syscw = 0; | |
2394 | + memset(&ioac, 0, sizeof(ioac)); | |
2395 | + | |
2396 | + rcu_read_lock(); | |
2397 | + do { | |
2398 | + rchar += t->rchar; | |
2399 | + wchar += t->wchar; | |
2400 | + syscr += t->syscr; | |
2401 | + syscw += t->syscw; | |
2402 | + | |
2403 | + ioac.read_bytes += t->ioac.read_bytes; | |
2404 | + ioac.write_bytes += t->ioac.write_bytes; | |
2405 | + ioac.cancelled_write_bytes += | |
2406 | + t->ioac.cancelled_write_bytes; | |
2407 | + t = next_thread(t); | |
2408 | + } while (t != task); | |
2409 | + rcu_read_unlock(); | |
2410 | + | |
2411 | + if (lock_task_sighand(task, &flags)) { | |
2412 | + struct signal_struct *sig = task->signal; | |
2413 | + | |
2414 | + rchar += sig->rchar; | |
2415 | + wchar += sig->wchar; | |
2416 | + syscr += sig->syscr; | |
2417 | + syscw += sig->syscw; | |
2418 | + | |
2419 | + ioac.read_bytes += sig->ioac.read_bytes; | |
2420 | + ioac.write_bytes += sig->ioac.write_bytes; | |
2421 | + ioac.cancelled_write_bytes += | |
2422 | + sig->ioac.cancelled_write_bytes; | |
2423 | + | |
2424 | + unlock_task_sighand(task, &flags); | |
2425 | + } | |
2426 | + } | |
2427 | + | |
2381 | 2428 | return sprintf(buffer, |
2382 | -#ifdef CONFIG_TASK_XACCT | |
2383 | 2429 | "rchar: %llu\n" |
2384 | 2430 | "wchar: %llu\n" |
2385 | 2431 | "syscr: %llu\n" |
2386 | 2432 | "syscw: %llu\n" |
2387 | -#endif | |
2388 | 2433 | "read_bytes: %llu\n" |
2389 | 2434 | "write_bytes: %llu\n" |
2390 | 2435 | "cancelled_write_bytes: %llu\n", |
2391 | -#ifdef CONFIG_TASK_XACCT | |
2392 | - (unsigned long long)task->rchar, | |
2393 | - (unsigned long long)task->wchar, | |
2394 | - (unsigned long long)task->syscr, | |
2395 | - (unsigned long long)task->syscw, | |
2396 | -#endif | |
2397 | - (unsigned long long)task->ioac.read_bytes, | |
2398 | - (unsigned long long)task->ioac.write_bytes, | |
2399 | - (unsigned long long)task->ioac.cancelled_write_bytes); | |
2436 | + (unsigned long long)rchar, | |
2437 | + (unsigned long long)wchar, | |
2438 | + (unsigned long long)syscr, | |
2439 | + (unsigned long long)syscw, | |
2440 | + (unsigned long long)ioac.read_bytes, | |
2441 | + (unsigned long long)ioac.write_bytes, | |
2442 | + (unsigned long long)ioac.cancelled_write_bytes); | |
2400 | 2443 | } |
2401 | -#endif | |
2402 | 2444 | |
2445 | +static int proc_tid_io_accounting(struct task_struct *task, char *buffer) | |
2446 | +{ | |
2447 | + return do_io_accounting(task, buffer, 0); | |
2448 | +} | |
2449 | + | |
2450 | +static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) | |
2451 | +{ | |
2452 | + return do_io_accounting(task, buffer, 1); | |
2453 | +} | |
2454 | +#endif /* CONFIG_TASK_IO_ACCOUNTING */ | |
2455 | + | |
2403 | 2456 | /* |
2404 | 2457 | * Thread groups |
2405 | 2458 | */ |
... | ... | @@ -2470,7 +2523,7 @@ |
2470 | 2523 | REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), |
2471 | 2524 | #endif |
2472 | 2525 | #ifdef CONFIG_TASK_IO_ACCOUNTING |
2473 | - INF("io", S_IRUGO, pid_io_accounting), | |
2526 | + INF("io", S_IRUGO, tgid_io_accounting), | |
2474 | 2527 | #endif |
2475 | 2528 | }; |
2476 | 2529 | |
... | ... | @@ -2796,6 +2849,9 @@ |
2796 | 2849 | #endif |
2797 | 2850 | #ifdef CONFIG_FAULT_INJECTION |
2798 | 2851 | REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), |
2852 | +#endif | |
2853 | +#ifdef CONFIG_TASK_IO_ACCOUNTING | |
2854 | + INF("io", S_IRUGO, tid_io_accounting), | |
2799 | 2855 | #endif |
2800 | 2856 | }; |
2801 | 2857 |
include/linux/sched.h
... | ... | @@ -506,6 +506,10 @@ |
506 | 506 | unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; |
507 | 507 | unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; |
508 | 508 | unsigned long inblock, oublock, cinblock, coublock; |
509 | +#ifdef CONFIG_TASK_XACCT | |
510 | + u64 rchar, wchar, syscr, syscw; | |
511 | +#endif | |
512 | + struct task_io_accounting ioac; | |
509 | 513 | |
510 | 514 | /* |
511 | 515 | * Cumulative ns of scheduled CPU time for dead threads in the |
kernel/exit.c
... | ... | @@ -120,6 +120,18 @@ |
120 | 120 | sig->nivcsw += tsk->nivcsw; |
121 | 121 | sig->inblock += task_io_get_inblock(tsk); |
122 | 122 | sig->oublock += task_io_get_oublock(tsk); |
123 | +#ifdef CONFIG_TASK_XACCT | |
124 | + sig->rchar += tsk->rchar; | |
125 | + sig->wchar += tsk->wchar; | |
126 | + sig->syscr += tsk->syscr; | |
127 | + sig->syscw += tsk->syscw; | |
128 | +#endif /* CONFIG_TASK_XACCT */ | |
129 | +#ifdef CONFIG_TASK_IO_ACCOUNTING | |
130 | + sig->ioac.read_bytes += tsk->ioac.read_bytes; | |
131 | + sig->ioac.write_bytes += tsk->ioac.write_bytes; | |
132 | + sig->ioac.cancelled_write_bytes += | |
133 | + tsk->ioac.cancelled_write_bytes; | |
134 | +#endif /* CONFIG_TASK_IO_ACCOUNTING */ | |
123 | 135 | sig->sum_sched_runtime += tsk->se.sum_exec_runtime; |
124 | 136 | sig = NULL; /* Marker for below. */ |
125 | 137 | } |
... | ... | @@ -1366,6 +1378,21 @@ |
1366 | 1378 | psig->coublock += |
1367 | 1379 | task_io_get_oublock(p) + |
1368 | 1380 | sig->oublock + sig->coublock; |
1381 | +#ifdef CONFIG_TASK_XACCT | |
1382 | + psig->rchar += p->rchar + sig->rchar; | |
1383 | + psig->wchar += p->wchar + sig->wchar; | |
1384 | + psig->syscr += p->syscr + sig->syscr; | |
1385 | + psig->syscw += p->syscw + sig->syscw; | |
1386 | +#endif /* CONFIG_TASK_XACCT */ | |
1387 | +#ifdef CONFIG_TASK_IO_ACCOUNTING | |
1388 | + psig->ioac.read_bytes += | |
1389 | + p->ioac.read_bytes + sig->ioac.read_bytes; | |
1390 | + psig->ioac.write_bytes += | |
1391 | + p->ioac.write_bytes + sig->ioac.write_bytes; | |
1392 | + psig->ioac.cancelled_write_bytes += | |
1393 | + p->ioac.cancelled_write_bytes + | |
1394 | + sig->ioac.cancelled_write_bytes; | |
1395 | +#endif /* CONFIG_TASK_IO_ACCOUNTING */ | |
1369 | 1396 | spin_unlock_irq(&p->parent->sighand->siglock); |
1370 | 1397 | } |
1371 | 1398 |
kernel/fork.c
... | ... | @@ -812,6 +812,12 @@ |
812 | 812 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; |
813 | 813 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; |
814 | 814 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; |
815 | +#ifdef CONFIG_TASK_XACCT | |
816 | + sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0; | |
817 | +#endif | |
818 | +#ifdef CONFIG_TASK_IO_ACCOUNTING | |
819 | + memset(&sig->ioac, 0, sizeof(sig->ioac)); | |
820 | +#endif | |
815 | 821 | sig->sum_sched_runtime = 0; |
816 | 822 | INIT_LIST_HEAD(&sig->cpu_timers[0]); |
817 | 823 | INIT_LIST_HEAD(&sig->cpu_timers[1]); |