Commit 297c5d92634c809cef23d73e7b2556f2528ff7e2

Authored by Andrea Righi
Committed by Linus Torvalds
1 parent 0c18d7a5df

task IO accounting: provide distinct tgid/tid I/O statistics

Report per-thread I/O statistics in /proc/pid/task/tid/io and aggregate
parent I/O statistics in /proc/pid/io.  This approach follows the same
model used to account per-process and per-thread CPU times.

As a practial application, this allows for example to quickly find the top
I/O consumer when a process spawns many child threads that perform the
actual I/O work, because the aggregated I/O statistics can always be found
in /proc/pid/io.

[ Oleg Nesterov points out that we should check that the task is still
  alive before we iterate over the threads, but also says that we can do
  that fixup on top of this later.  - Linus ]

Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
Cc: Matt Heaton <matt@hostmonster.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Acked-by-with-comments: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 4 changed files with 108 additions and 15 deletions Side-by-side Diff

... ... @@ -2376,30 +2376,83 @@
2376 2376 }
2377 2377  
2378 2378 #ifdef CONFIG_TASK_IO_ACCOUNTING
2379   -static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
  2379 +static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2380 2380 {
  2381 + u64 rchar, wchar, syscr, syscw;
  2382 + struct task_io_accounting ioac;
  2383 +
  2384 + if (!whole) {
  2385 + rchar = task->rchar;
  2386 + wchar = task->wchar;
  2387 + syscr = task->syscr;
  2388 + syscw = task->syscw;
  2389 + memcpy(&ioac, &task->ioac, sizeof(ioac));
  2390 + } else {
  2391 + unsigned long flags;
  2392 + struct task_struct *t = task;
  2393 + rchar = wchar = syscr = syscw = 0;
  2394 + memset(&ioac, 0, sizeof(ioac));
  2395 +
  2396 + rcu_read_lock();
  2397 + do {
  2398 + rchar += t->rchar;
  2399 + wchar += t->wchar;
  2400 + syscr += t->syscr;
  2401 + syscw += t->syscw;
  2402 +
  2403 + ioac.read_bytes += t->ioac.read_bytes;
  2404 + ioac.write_bytes += t->ioac.write_bytes;
  2405 + ioac.cancelled_write_bytes +=
  2406 + t->ioac.cancelled_write_bytes;
  2407 + t = next_thread(t);
  2408 + } while (t != task);
  2409 + rcu_read_unlock();
  2410 +
  2411 + if (lock_task_sighand(task, &flags)) {
  2412 + struct signal_struct *sig = task->signal;
  2413 +
  2414 + rchar += sig->rchar;
  2415 + wchar += sig->wchar;
  2416 + syscr += sig->syscr;
  2417 + syscw += sig->syscw;
  2418 +
  2419 + ioac.read_bytes += sig->ioac.read_bytes;
  2420 + ioac.write_bytes += sig->ioac.write_bytes;
  2421 + ioac.cancelled_write_bytes +=
  2422 + sig->ioac.cancelled_write_bytes;
  2423 +
  2424 + unlock_task_sighand(task, &flags);
  2425 + }
  2426 + }
  2427 +
2381 2428 return sprintf(buffer,
2382   -#ifdef CONFIG_TASK_XACCT
2383 2429 "rchar: %llu\n"
2384 2430 "wchar: %llu\n"
2385 2431 "syscr: %llu\n"
2386 2432 "syscw: %llu\n"
2387   -#endif
2388 2433 "read_bytes: %llu\n"
2389 2434 "write_bytes: %llu\n"
2390 2435 "cancelled_write_bytes: %llu\n",
2391   -#ifdef CONFIG_TASK_XACCT
2392   - (unsigned long long)task->rchar,
2393   - (unsigned long long)task->wchar,
2394   - (unsigned long long)task->syscr,
2395   - (unsigned long long)task->syscw,
2396   -#endif
2397   - (unsigned long long)task->ioac.read_bytes,
2398   - (unsigned long long)task->ioac.write_bytes,
2399   - (unsigned long long)task->ioac.cancelled_write_bytes);
  2436 + (unsigned long long)rchar,
  2437 + (unsigned long long)wchar,
  2438 + (unsigned long long)syscr,
  2439 + (unsigned long long)syscw,
  2440 + (unsigned long long)ioac.read_bytes,
  2441 + (unsigned long long)ioac.write_bytes,
  2442 + (unsigned long long)ioac.cancelled_write_bytes);
2400 2443 }
2401   -#endif
2402 2444  
  2445 +static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
  2446 +{
  2447 + return do_io_accounting(task, buffer, 0);
  2448 +}
  2449 +
  2450 +static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
  2451 +{
  2452 + return do_io_accounting(task, buffer, 1);
  2453 +}
  2454 +#endif /* CONFIG_TASK_IO_ACCOUNTING */
  2455 +
2403 2456 /*
2404 2457 * Thread groups
2405 2458 */
... ... @@ -2470,7 +2523,7 @@
2470 2523 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
2471 2524 #endif
2472 2525 #ifdef CONFIG_TASK_IO_ACCOUNTING
2473   - INF("io", S_IRUGO, pid_io_accounting),
  2526 + INF("io", S_IRUGO, tgid_io_accounting),
2474 2527 #endif
2475 2528 };
2476 2529  
... ... @@ -2796,6 +2849,9 @@
2796 2849 #endif
2797 2850 #ifdef CONFIG_FAULT_INJECTION
2798 2851 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
  2852 +#endif
  2853 +#ifdef CONFIG_TASK_IO_ACCOUNTING
  2854 + INF("io", S_IRUGO, tid_io_accounting),
2799 2855 #endif
2800 2856 };
2801 2857  
include/linux/sched.h
... ... @@ -506,6 +506,10 @@
506 506 unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
507 507 unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
508 508 unsigned long inblock, oublock, cinblock, coublock;
  509 +#ifdef CONFIG_TASK_XACCT
  510 + u64 rchar, wchar, syscr, syscw;
  511 +#endif
  512 + struct task_io_accounting ioac;
509 513  
510 514 /*
511 515 * Cumulative ns of scheduled CPU time for dead threads in the
... ... @@ -120,6 +120,18 @@
120 120 sig->nivcsw += tsk->nivcsw;
121 121 sig->inblock += task_io_get_inblock(tsk);
122 122 sig->oublock += task_io_get_oublock(tsk);
  123 +#ifdef CONFIG_TASK_XACCT
  124 + sig->rchar += tsk->rchar;
  125 + sig->wchar += tsk->wchar;
  126 + sig->syscr += tsk->syscr;
  127 + sig->syscw += tsk->syscw;
  128 +#endif /* CONFIG_TASK_XACCT */
  129 +#ifdef CONFIG_TASK_IO_ACCOUNTING
  130 + sig->ioac.read_bytes += tsk->ioac.read_bytes;
  131 + sig->ioac.write_bytes += tsk->ioac.write_bytes;
  132 + sig->ioac.cancelled_write_bytes +=
  133 + tsk->ioac.cancelled_write_bytes;
  134 +#endif /* CONFIG_TASK_IO_ACCOUNTING */
123 135 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
124 136 sig = NULL; /* Marker for below. */
125 137 }
... ... @@ -1366,6 +1378,21 @@
1366 1378 psig->coublock +=
1367 1379 task_io_get_oublock(p) +
1368 1380 sig->oublock + sig->coublock;
  1381 +#ifdef CONFIG_TASK_XACCT
  1382 + psig->rchar += p->rchar + sig->rchar;
  1383 + psig->wchar += p->wchar + sig->wchar;
  1384 + psig->syscr += p->syscr + sig->syscr;
  1385 + psig->syscw += p->syscw + sig->syscw;
  1386 +#endif /* CONFIG_TASK_XACCT */
  1387 +#ifdef CONFIG_TASK_IO_ACCOUNTING
  1388 + psig->ioac.read_bytes +=
  1389 + p->ioac.read_bytes + sig->ioac.read_bytes;
  1390 + psig->ioac.write_bytes +=
  1391 + p->ioac.write_bytes + sig->ioac.write_bytes;
  1392 + psig->ioac.cancelled_write_bytes +=
  1393 + p->ioac.cancelled_write_bytes +
  1394 + sig->ioac.cancelled_write_bytes;
  1395 +#endif /* CONFIG_TASK_IO_ACCOUNTING */
1369 1396 spin_unlock_irq(&p->parent->sighand->siglock);
1370 1397 }
1371 1398  
... ... @@ -812,6 +812,12 @@
812 812 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
813 813 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
814 814 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
  815 +#ifdef CONFIG_TASK_XACCT
  816 + sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0;
  817 +#endif
  818 +#ifdef CONFIG_TASK_IO_ACCOUNTING
  819 + memset(&sig->ioac, 0, sizeof(sig->ioac));
  820 +#endif
815 821 sig->sum_sched_runtime = 0;
816 822 INIT_LIST_HEAD(&sig->cpu_timers[0]);
817 823 INIT_LIST_HEAD(&sig->cpu_timers[1]);