Commit db5fed26b2e0beed939b773dd5896077a1794d65

Authored by Jay Lan
Committed by Linus Torvalds
1 parent 8f0ab51479

[PATCH] csa accounting taskstats update

ChangeLog:
   Feedbacks from Andrew Morton:
   - define TS_COMM_LEN to 32
   - change acct_stimexpd field of task_struct to be of
     cputime_t, which is to be used to save the tsk->stime
     of last timer interrupt update.
   - a new Documentation/accounting/taskstats-struct.txt
     to describe fields of taskstats struct.

   Feedback from Balbir Singh:
   - keep the stime of a task to be zero when both stime
     and utime are zero as recoreded in task_struct.

   Misc:
   - convert accumulated RSS/VM from platform dependent
     pages-ticks to MBytes-usecs in the kernel

Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Jes Sorensen <jes@sgi.com>
Cc: Chris Sturtivant <csturtiv@sgi.com>
Cc: Tony Ernst <tee@sgi.com>
Cc: Guillaume Thouvenin <guillaume.thouvenin@bull.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 4 changed files with 207 additions and 21 deletions Side-by-side Diff

Documentation/accounting/taskstats-struct.txt
  1 +The struct taskstats
  2 +--------------------
  3 +
  4 +This document contains an explanation of the struct taskstats fields.
  5 +
  6 +There are three different groups of fields in the struct taskstats:
  7 +
  8 +1) Common and basic accounting fields
  9 + If CONFIG_TASKSTATS is set, the taskstats inteface is enabled and
  10 + the common fields and basic accounting fields are collected for
  11 + delivery at do_exit() of a task.
  12 +2) Delay accounting fields
  13 + These fields are placed between
  14 + /* Delay accounting fields start */
  15 + and
  16 + /* Delay accounting fields end */
  17 + Their values are collected if CONFIG_TASK_DELAY_ACCT is set.
  18 +3) Extended accounting fields
  19 + These fields are placed between
  20 + /* Extended accounting fields start */
  21 + and
  22 + /* Extended accounting fields end */
  23 + Their values are collected if CONFIG_TASK_XACCT is set.
  24 +
  25 +Future extension should add fields to the end of the taskstats struct, and
  26 +should not change the relative position of each field within the struct.
  27 +
  28 +
  29 +struct taskstats {
  30 +
  31 +1) Common and basic accounting fields:
  32 + /* The version number of this struct. This field is always set to
  33 + * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
  34 + * Each time the struct is changed, the value should be incremented.
  35 + */
  36 + __u16 version;
  37 +
  38 + /* The exit code of a task. */
  39 + __u32 ac_exitcode; /* Exit status */
  40 +
  41 + /* The accounting flags of a task as defined in <linux/acct.h>
  42 + * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG.
  43 + */
  44 + __u8 ac_flag; /* Record flags */
  45 +
  46 + /* The value of task_nice() of a task. */
  47 + __u8 ac_nice; /* task_nice */
  48 +
  49 + /* The name of the command that started this task. */
  50 + char ac_comm[TS_COMM_LEN]; /* Command name */
  51 +
  52 + /* The scheduling discipline as set in task->policy field. */
  53 + __u8 ac_sched; /* Scheduling discipline */
  54 +
  55 + __u8 ac_pad[3];
  56 + __u32 ac_uid; /* User ID */
  57 + __u32 ac_gid; /* Group ID */
  58 + __u32 ac_pid; /* Process ID */
  59 + __u32 ac_ppid; /* Parent process ID */
  60 +
  61 + /* The time when a task begins, in [secs] since 1970. */
  62 + __u32 ac_btime; /* Begin time [sec since 1970] */
  63 +
  64 + /* The elapsed time of a task, in [usec]. */
  65 + __u64 ac_etime; /* Elapsed time [usec] */
  66 +
  67 + /* The user CPU time of a task, in [usec]. */
  68 + __u64 ac_utime; /* User CPU time [usec] */
  69 +
  70 + /* The system CPU time of a task, in [usec]. */
  71 + __u64 ac_stime; /* System CPU time [usec] */
  72 +
  73 + /* The minor page fault count of a task, as set in task->min_flt. */
  74 + __u64 ac_minflt; /* Minor Page Fault Count */
  75 +
  76 + /* The major page fault count of a task, as set in task->maj_flt. */
  77 + __u64 ac_majflt; /* Major Page Fault Count */
  78 +
  79 +
  80 +2) Delay accounting fields:
  81 + /* Delay accounting fields start
  82 + *
  83 + * All values, until the comment "Delay accounting fields end" are
  84 + * available only if delay accounting is enabled, even though the last
  85 + * few fields are not delays
  86 + *
  87 + * xxx_count is the number of delay values recorded
  88 + * xxx_delay_total is the corresponding cumulative delay in nanoseconds
  89 + *
  90 + * xxx_delay_total wraps around to zero on overflow
  91 + * xxx_count incremented regardless of overflow
  92 + */
  93 +
  94 + /* Delay waiting for cpu, while runnable
  95 + * count, delay_total NOT updated atomically
  96 + */
  97 + __u64 cpu_count;
  98 + __u64 cpu_delay_total;
  99 +
  100 + /* Following four fields atomically updated using task->delays->lock */
  101 +
  102 + /* Delay waiting for synchronous block I/O to complete
  103 + * does not account for delays in I/O submission
  104 + */
  105 + __u64 blkio_count;
  106 + __u64 blkio_delay_total;
  107 +
  108 + /* Delay waiting for page fault I/O (swap in only) */
  109 + __u64 swapin_count;
  110 + __u64 swapin_delay_total;
  111 +
  112 + /* cpu "wall-clock" running time
  113 + * On some architectures, value will adjust for cpu time stolen
  114 + * from the kernel in involuntary waits due to virtualization.
  115 + * Value is cumulative, in nanoseconds, without a corresponding count
  116 + * and wraps around to zero silently on overflow
  117 + */
  118 + __u64 cpu_run_real_total;
  119 +
  120 + /* cpu "virtual" running time
  121 + * Uses time intervals seen by the kernel i.e. no adjustment
  122 + * for kernel's involuntary waits due to virtualization.
  123 + * Value is cumulative, in nanoseconds, without a corresponding count
  124 + * and wraps around to zero silently on overflow
  125 + */
  126 + __u64 cpu_run_virtual_total;
  127 + /* Delay accounting fields end */
  128 + /* version 1 ends here */
  129 +
  130 +
  131 +3) Extended accounting fields
  132 + /* Extended accounting fields start */
  133 +
  134 + /* Accumulated RSS usage in duration of a task, in MBytes-usecs.
  135 + * The current rss usage is added to this counter every time
  136 + * a tick is charged to a task's system time. So, at the end we
  137 + * will have memory usage multiplied by system time. Thus an
  138 + * average usage per system time unit can be calculated.
  139 + */
  140 + __u64 coremem; /* accumulated RSS usage in MB-usec */
  141 +
  142 + /* Accumulated virtual memory usage in duration of a task.
  143 + * Same as acct_rss_mem1 above except that we keep track of VM usage.
  144 + */
  145 + __u64 virtmem; /* accumulated VM usage in MB-usec */
  146 +
  147 + /* High watermark of RSS usage in duration of a task, in KBytes. */
  148 + __u64 hiwater_rss; /* High-watermark of RSS usage */
  149 +
  150 + /* High watermark of VM usage in duration of a task, in KBytes. */
  151 + __u64 hiwater_vm; /* High-water virtual memory usage */
  152 +
  153 + /* The following four fields are I/O statistics of a task. */
  154 + __u64 read_char; /* bytes read */
  155 + __u64 write_char; /* bytes written */
  156 + __u64 read_syscalls; /* read syscalls */
  157 + __u64 write_syscalls; /* write syscalls */
  158 +
  159 + /* Extended accounting fields end */
  160 +
  161 +}
include/linux/sched.h
... ... @@ -984,7 +984,7 @@
984 984 #if defined(CONFIG_TASK_XACCT)
985 985 u64 acct_rss_mem1; /* accumulated rss usage */
986 986 u64 acct_vm_mem1; /* accumulated virtual memory usage */
987   - clock_t acct_stimexpd; /* clock_t-converted stime since last update */
  987 + cputime_t acct_stimexpd;/* stime since last update */
988 988 #endif
989 989 #ifdef CONFIG_NUMA
990 990 struct mempolicy *mempolicy;
include/linux/taskstats.h
... ... @@ -32,14 +32,21 @@
32 32  
33 33  
34 34 #define TASKSTATS_VERSION 2
35   -#define TS_COMM_LEN 16 /* should sync up with TASK_COMM_LEN
  35 +#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
36 36 * in linux/sched.h */
37 37  
38 38 struct taskstats {
39 39  
40   - /* Version 1 */
  40 + /* The version number of this struct. This field is always set to
  41 + * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
  42 + * Each time the struct is changed, the value should be incremented.
  43 + */
41 44 __u16 version;
42   - __u32 ac_exitcode; /* Exit status */
  45 + __u32 ac_exitcode; /* Exit status */
  46 +
  47 + /* The accounting flags of a task as defined in <linux/acct.h>
  48 + * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG.
  49 + */
43 50 __u8 ac_flag; /* Record flags */
44 51 __u8 ac_nice; /* task_nice */
45 52  
46 53  
... ... @@ -104,15 +111,30 @@
104 111 __u64 ac_etime; /* Elapsed time [usec] */
105 112 __u64 ac_utime; /* User CPU time [usec] */
106 113 __u64 ac_stime; /* SYstem CPU time [usec] */
107   - __u64 ac_minflt; /* Minor Page Fault */
108   - __u64 ac_majflt; /* Major Page Fault */
  114 + __u64 ac_minflt; /* Minor Page Fault Count */
  115 + __u64 ac_majflt; /* Major Page Fault Count */
109 116 /* Basic Accounting Fields end */
110 117  
111 118 /* Extended accounting fields start */
112   - __u64 acct_rss_mem1; /* accumulated rss usage */
113   - __u64 acct_vm_mem1; /* accumulated virtual memory usage */
114   - __u64 hiwater_rss; /* High-watermark of RSS usage */
115   - __u64 hiwater_vm; /* High-water virtual memory usage */
  119 + /* Accumulated RSS usage in duration of a task, in MBytes-usecs.
  120 + * The current rss usage is added to this counter every time
  121 + * a tick is charged to a task's system time. So, at the end we
  122 + * will have memory usage multiplied by system time. Thus an
  123 + * average usage per system time unit can be calculated.
  124 + */
  125 + __u64 coremem; /* accumulated RSS usage in MB-usec */
  126 + /* Accumulated virtual memory usage in duration of a task.
  127 + * Same as acct_rss_mem1 above except that we keep track of VM usage.
  128 + */
  129 + __u64 virtmem; /* accumulated VM usage in MB-usec */
  130 +
  131 + /* High watermark of RSS and virtual memory usage in duration of
  132 + * a task, in KBytes.
  133 + */
  134 + __u64 hiwater_rss; /* High-watermark of RSS usage, in KB */
  135 + __u64 hiwater_vm; /* High-water VM usage, in KB */
  136 +
  137 + /* The following four fields are I/O statistics of a task. */
116 138 __u64 read_char; /* bytes read */
117 139 __u64 write_char; /* bytes written */
118 140 __u64 read_syscalls; /* read syscalls */
... ... @@ -20,6 +20,7 @@
20 20 #include <linux/sched.h>
21 21 #include <linux/tsacct_kern.h>
22 22 #include <linux/acct.h>
  23 +#include <linux/jiffies.h>
23 24  
24 25  
25 26 #define USEC_PER_TICK (USEC_PER_SEC/HZ)
26 27  
27 28  
28 29  
29 30  
30 31  
... ... @@ -62,34 +63,36 @@
62 63 stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC;
63 64 stats->ac_minflt = tsk->min_flt;
64 65 stats->ac_majflt = tsk->maj_flt;
65   - /* Each process gets a minimum of one usec cpu time */
66   - if ((stats->ac_utime == 0) && (stats->ac_stime == 0)) {
67   - stats->ac_stime = 1;
68   - }
69 66  
70 67 strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm));
71 68 }
72 69  
73 70  
74 71 #ifdef CONFIG_TASK_XACCT
  72 +
  73 +#define KB 1024
  74 +#define MB (1024*KB)
75 75 /*
76 76 * fill in extended accounting fields
77 77 */
78 78 void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
79 79 {
80   - stats->acct_rss_mem1 = p->acct_rss_mem1;
81   - stats->acct_vm_mem1 = p->acct_vm_mem1;
  80 + /* convert pages-jiffies to Mbyte-usec */
  81 + stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB;
  82 + stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB;
82 83 if (p->mm) {
83   - stats->hiwater_rss = p->mm->hiwater_rss;
84   - stats->hiwater_vm = p->mm->hiwater_vm;
  84 + /* adjust to KB unit */
  85 + stats->hiwater_rss = p->mm->hiwater_rss * PAGE_SIZE / KB;
  86 + stats->hiwater_vm = p->mm->hiwater_vm * PAGE_SIZE / KB;
85 87 }
86 88 stats->read_char = p->rchar;
87 89 stats->write_char = p->wchar;
88 90 stats->read_syscalls = p->syscr;
89 91 stats->write_syscalls = p->syscw;
90 92 }
  93 +#undef KB
  94 +#undef MB
91 95  
92   -
93 96 /**
94 97 * acct_update_integrals - update mm integral fields in task_struct
95 98 * @tsk: task_struct for accounting
... ... @@ -97,8 +100,8 @@
97 100 void acct_update_integrals(struct task_struct *tsk)
98 101 {
99 102 if (likely(tsk->mm)) {
100   - long delta =
101   - cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd;
  103 + long delta = cputime_to_jiffies(
  104 + cputime_sub(tsk->stime, tsk->acct_stimexpd));
102 105  
103 106 if (delta == 0)
104 107 return;