Commit db5fed26b2e0beed939b773dd5896077a1794d65
Committed by
Linus Torvalds
1 parent
8f0ab51479
Exists in
master
and in
4 other branches
[PATCH] csa accounting taskstats update
ChangeLog: Feedbacks from Andrew Morton: - define TS_COMM_LEN to 32 - change acct_stimexpd field of task_struct to be of cputime_t, which is to be used to save the tsk->stime of last timer interrupt update. - a new Documentation/accounting/taskstats-struct.txt to describe fields of taskstats struct. Feedback from Balbir Singh: - keep the stime of a task to be zero when both stime and utime are zero as recoreded in task_struct. Misc: - convert accumulated RSS/VM from platform dependent pages-ticks to MBytes-usecs in the kernel Cc: Shailabh Nagar <nagar@watson.ibm.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Jes Sorensen <jes@sgi.com> Cc: Chris Sturtivant <csturtiv@sgi.com> Cc: Tony Ernst <tee@sgi.com> Cc: Guillaume Thouvenin <guillaume.thouvenin@bull.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 4 changed files with 207 additions and 21 deletions Side-by-side Diff
Documentation/accounting/taskstats-struct.txt
1 | +The struct taskstats | |
2 | +-------------------- | |
3 | + | |
4 | +This document contains an explanation of the struct taskstats fields. | |
5 | + | |
6 | +There are three different groups of fields in the struct taskstats: | |
7 | + | |
8 | +1) Common and basic accounting fields | |
9 | + If CONFIG_TASKSTATS is set, the taskstats inteface is enabled and | |
10 | + the common fields and basic accounting fields are collected for | |
11 | + delivery at do_exit() of a task. | |
12 | +2) Delay accounting fields | |
13 | + These fields are placed between | |
14 | + /* Delay accounting fields start */ | |
15 | + and | |
16 | + /* Delay accounting fields end */ | |
17 | + Their values are collected if CONFIG_TASK_DELAY_ACCT is set. | |
18 | +3) Extended accounting fields | |
19 | + These fields are placed between | |
20 | + /* Extended accounting fields start */ | |
21 | + and | |
22 | + /* Extended accounting fields end */ | |
23 | + Their values are collected if CONFIG_TASK_XACCT is set. | |
24 | + | |
25 | +Future extension should add fields to the end of the taskstats struct, and | |
26 | +should not change the relative position of each field within the struct. | |
27 | + | |
28 | + | |
29 | +struct taskstats { | |
30 | + | |
31 | +1) Common and basic accounting fields: | |
32 | + /* The version number of this struct. This field is always set to | |
33 | + * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>. | |
34 | + * Each time the struct is changed, the value should be incremented. | |
35 | + */ | |
36 | + __u16 version; | |
37 | + | |
38 | + /* The exit code of a task. */ | |
39 | + __u32 ac_exitcode; /* Exit status */ | |
40 | + | |
41 | + /* The accounting flags of a task as defined in <linux/acct.h> | |
42 | + * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG. | |
43 | + */ | |
44 | + __u8 ac_flag; /* Record flags */ | |
45 | + | |
46 | + /* The value of task_nice() of a task. */ | |
47 | + __u8 ac_nice; /* task_nice */ | |
48 | + | |
49 | + /* The name of the command that started this task. */ | |
50 | + char ac_comm[TS_COMM_LEN]; /* Command name */ | |
51 | + | |
52 | + /* The scheduling discipline as set in task->policy field. */ | |
53 | + __u8 ac_sched; /* Scheduling discipline */ | |
54 | + | |
55 | + __u8 ac_pad[3]; | |
56 | + __u32 ac_uid; /* User ID */ | |
57 | + __u32 ac_gid; /* Group ID */ | |
58 | + __u32 ac_pid; /* Process ID */ | |
59 | + __u32 ac_ppid; /* Parent process ID */ | |
60 | + | |
61 | + /* The time when a task begins, in [secs] since 1970. */ | |
62 | + __u32 ac_btime; /* Begin time [sec since 1970] */ | |
63 | + | |
64 | + /* The elapsed time of a task, in [usec]. */ | |
65 | + __u64 ac_etime; /* Elapsed time [usec] */ | |
66 | + | |
67 | + /* The user CPU time of a task, in [usec]. */ | |
68 | + __u64 ac_utime; /* User CPU time [usec] */ | |
69 | + | |
70 | + /* The system CPU time of a task, in [usec]. */ | |
71 | + __u64 ac_stime; /* System CPU time [usec] */ | |
72 | + | |
73 | + /* The minor page fault count of a task, as set in task->min_flt. */ | |
74 | + __u64 ac_minflt; /* Minor Page Fault Count */ | |
75 | + | |
76 | + /* The major page fault count of a task, as set in task->maj_flt. */ | |
77 | + __u64 ac_majflt; /* Major Page Fault Count */ | |
78 | + | |
79 | + | |
80 | +2) Delay accounting fields: | |
81 | + /* Delay accounting fields start | |
82 | + * | |
83 | + * All values, until the comment "Delay accounting fields end" are | |
84 | + * available only if delay accounting is enabled, even though the last | |
85 | + * few fields are not delays | |
86 | + * | |
87 | + * xxx_count is the number of delay values recorded | |
88 | + * xxx_delay_total is the corresponding cumulative delay in nanoseconds | |
89 | + * | |
90 | + * xxx_delay_total wraps around to zero on overflow | |
91 | + * xxx_count incremented regardless of overflow | |
92 | + */ | |
93 | + | |
94 | + /* Delay waiting for cpu, while runnable | |
95 | + * count, delay_total NOT updated atomically | |
96 | + */ | |
97 | + __u64 cpu_count; | |
98 | + __u64 cpu_delay_total; | |
99 | + | |
100 | + /* Following four fields atomically updated using task->delays->lock */ | |
101 | + | |
102 | + /* Delay waiting for synchronous block I/O to complete | |
103 | + * does not account for delays in I/O submission | |
104 | + */ | |
105 | + __u64 blkio_count; | |
106 | + __u64 blkio_delay_total; | |
107 | + | |
108 | + /* Delay waiting for page fault I/O (swap in only) */ | |
109 | + __u64 swapin_count; | |
110 | + __u64 swapin_delay_total; | |
111 | + | |
112 | + /* cpu "wall-clock" running time | |
113 | + * On some architectures, value will adjust for cpu time stolen | |
114 | + * from the kernel in involuntary waits due to virtualization. | |
115 | + * Value is cumulative, in nanoseconds, without a corresponding count | |
116 | + * and wraps around to zero silently on overflow | |
117 | + */ | |
118 | + __u64 cpu_run_real_total; | |
119 | + | |
120 | + /* cpu "virtual" running time | |
121 | + * Uses time intervals seen by the kernel i.e. no adjustment | |
122 | + * for kernel's involuntary waits due to virtualization. | |
123 | + * Value is cumulative, in nanoseconds, without a corresponding count | |
124 | + * and wraps around to zero silently on overflow | |
125 | + */ | |
126 | + __u64 cpu_run_virtual_total; | |
127 | + /* Delay accounting fields end */ | |
128 | + /* version 1 ends here */ | |
129 | + | |
130 | + | |
131 | +3) Extended accounting fields | |
132 | + /* Extended accounting fields start */ | |
133 | + | |
134 | + /* Accumulated RSS usage in duration of a task, in MBytes-usecs. | |
135 | + * The current rss usage is added to this counter every time | |
136 | + * a tick is charged to a task's system time. So, at the end we | |
137 | + * will have memory usage multiplied by system time. Thus an | |
138 | + * average usage per system time unit can be calculated. | |
139 | + */ | |
140 | + __u64 coremem; /* accumulated RSS usage in MB-usec */ | |
141 | + | |
142 | + /* Accumulated virtual memory usage in duration of a task. | |
143 | + * Same as acct_rss_mem1 above except that we keep track of VM usage. | |
144 | + */ | |
145 | + __u64 virtmem; /* accumulated VM usage in MB-usec */ | |
146 | + | |
147 | + /* High watermark of RSS usage in duration of a task, in KBytes. */ | |
148 | + __u64 hiwater_rss; /* High-watermark of RSS usage */ | |
149 | + | |
150 | + /* High watermark of VM usage in duration of a task, in KBytes. */ | |
151 | + __u64 hiwater_vm; /* High-water virtual memory usage */ | |
152 | + | |
153 | + /* The following four fields are I/O statistics of a task. */ | |
154 | + __u64 read_char; /* bytes read */ | |
155 | + __u64 write_char; /* bytes written */ | |
156 | + __u64 read_syscalls; /* read syscalls */ | |
157 | + __u64 write_syscalls; /* write syscalls */ | |
158 | + | |
159 | + /* Extended accounting fields end */ | |
160 | + | |
161 | +} |
include/linux/sched.h
... | ... | @@ -984,7 +984,7 @@ |
984 | 984 | #if defined(CONFIG_TASK_XACCT) |
985 | 985 | u64 acct_rss_mem1; /* accumulated rss usage */ |
986 | 986 | u64 acct_vm_mem1; /* accumulated virtual memory usage */ |
987 | - clock_t acct_stimexpd; /* clock_t-converted stime since last update */ | |
987 | + cputime_t acct_stimexpd;/* stime since last update */ | |
988 | 988 | #endif |
989 | 989 | #ifdef CONFIG_NUMA |
990 | 990 | struct mempolicy *mempolicy; |
include/linux/taskstats.h
... | ... | @@ -32,14 +32,21 @@ |
32 | 32 | |
33 | 33 | |
34 | 34 | #define TASKSTATS_VERSION 2 |
35 | -#define TS_COMM_LEN 16 /* should sync up with TASK_COMM_LEN | |
35 | +#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN | |
36 | 36 | * in linux/sched.h */ |
37 | 37 | |
38 | 38 | struct taskstats { |
39 | 39 | |
40 | - /* Version 1 */ | |
40 | + /* The version number of this struct. This field is always set to | |
41 | + * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>. | |
42 | + * Each time the struct is changed, the value should be incremented. | |
43 | + */ | |
41 | 44 | __u16 version; |
42 | - __u32 ac_exitcode; /* Exit status */ | |
45 | + __u32 ac_exitcode; /* Exit status */ | |
46 | + | |
47 | + /* The accounting flags of a task as defined in <linux/acct.h> | |
48 | + * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG. | |
49 | + */ | |
43 | 50 | __u8 ac_flag; /* Record flags */ |
44 | 51 | __u8 ac_nice; /* task_nice */ |
45 | 52 | |
46 | 53 | |
... | ... | @@ -104,15 +111,30 @@ |
104 | 111 | __u64 ac_etime; /* Elapsed time [usec] */ |
105 | 112 | __u64 ac_utime; /* User CPU time [usec] */ |
106 | 113 | __u64 ac_stime; /* SYstem CPU time [usec] */ |
107 | - __u64 ac_minflt; /* Minor Page Fault */ | |
108 | - __u64 ac_majflt; /* Major Page Fault */ | |
114 | + __u64 ac_minflt; /* Minor Page Fault Count */ | |
115 | + __u64 ac_majflt; /* Major Page Fault Count */ | |
109 | 116 | /* Basic Accounting Fields end */ |
110 | 117 | |
111 | 118 | /* Extended accounting fields start */ |
112 | - __u64 acct_rss_mem1; /* accumulated rss usage */ | |
113 | - __u64 acct_vm_mem1; /* accumulated virtual memory usage */ | |
114 | - __u64 hiwater_rss; /* High-watermark of RSS usage */ | |
115 | - __u64 hiwater_vm; /* High-water virtual memory usage */ | |
119 | + /* Accumulated RSS usage in duration of a task, in MBytes-usecs. | |
120 | + * The current rss usage is added to this counter every time | |
121 | + * a tick is charged to a task's system time. So, at the end we | |
122 | + * will have memory usage multiplied by system time. Thus an | |
123 | + * average usage per system time unit can be calculated. | |
124 | + */ | |
125 | + __u64 coremem; /* accumulated RSS usage in MB-usec */ | |
126 | + /* Accumulated virtual memory usage in duration of a task. | |
127 | + * Same as acct_rss_mem1 above except that we keep track of VM usage. | |
128 | + */ | |
129 | + __u64 virtmem; /* accumulated VM usage in MB-usec */ | |
130 | + | |
131 | + /* High watermark of RSS and virtual memory usage in duration of | |
132 | + * a task, in KBytes. | |
133 | + */ | |
134 | + __u64 hiwater_rss; /* High-watermark of RSS usage, in KB */ | |
135 | + __u64 hiwater_vm; /* High-water VM usage, in KB */ | |
136 | + | |
137 | + /* The following four fields are I/O statistics of a task. */ | |
116 | 138 | __u64 read_char; /* bytes read */ |
117 | 139 | __u64 write_char; /* bytes written */ |
118 | 140 | __u64 read_syscalls; /* read syscalls */ |
kernel/tsacct.c
... | ... | @@ -20,6 +20,7 @@ |
20 | 20 | #include <linux/sched.h> |
21 | 21 | #include <linux/tsacct_kern.h> |
22 | 22 | #include <linux/acct.h> |
23 | +#include <linux/jiffies.h> | |
23 | 24 | |
24 | 25 | |
25 | 26 | #define USEC_PER_TICK (USEC_PER_SEC/HZ) |
26 | 27 | |
27 | 28 | |
28 | 29 | |
29 | 30 | |
30 | 31 | |
... | ... | @@ -62,34 +63,36 @@ |
62 | 63 | stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; |
63 | 64 | stats->ac_minflt = tsk->min_flt; |
64 | 65 | stats->ac_majflt = tsk->maj_flt; |
65 | - /* Each process gets a minimum of one usec cpu time */ | |
66 | - if ((stats->ac_utime == 0) && (stats->ac_stime == 0)) { | |
67 | - stats->ac_stime = 1; | |
68 | - } | |
69 | 66 | |
70 | 67 | strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm)); |
71 | 68 | } |
72 | 69 | |
73 | 70 | |
74 | 71 | #ifdef CONFIG_TASK_XACCT |
72 | + | |
73 | +#define KB 1024 | |
74 | +#define MB (1024*KB) | |
75 | 75 | /* |
76 | 76 | * fill in extended accounting fields |
77 | 77 | */ |
78 | 78 | void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) |
79 | 79 | { |
80 | - stats->acct_rss_mem1 = p->acct_rss_mem1; | |
81 | - stats->acct_vm_mem1 = p->acct_vm_mem1; | |
80 | + /* convert pages-jiffies to Mbyte-usec */ | |
81 | + stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB; | |
82 | + stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB; | |
82 | 83 | if (p->mm) { |
83 | - stats->hiwater_rss = p->mm->hiwater_rss; | |
84 | - stats->hiwater_vm = p->mm->hiwater_vm; | |
84 | + /* adjust to KB unit */ | |
85 | + stats->hiwater_rss = p->mm->hiwater_rss * PAGE_SIZE / KB; | |
86 | + stats->hiwater_vm = p->mm->hiwater_vm * PAGE_SIZE / KB; | |
85 | 87 | } |
86 | 88 | stats->read_char = p->rchar; |
87 | 89 | stats->write_char = p->wchar; |
88 | 90 | stats->read_syscalls = p->syscr; |
89 | 91 | stats->write_syscalls = p->syscw; |
90 | 92 | } |
93 | +#undef KB | |
94 | +#undef MB | |
91 | 95 | |
92 | - | |
93 | 96 | /** |
94 | 97 | * acct_update_integrals - update mm integral fields in task_struct |
95 | 98 | * @tsk: task_struct for accounting |
... | ... | @@ -97,8 +100,8 @@ |
97 | 100 | void acct_update_integrals(struct task_struct *tsk) |
98 | 101 | { |
99 | 102 | if (likely(tsk->mm)) { |
100 | - long delta = | |
101 | - cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd; | |
103 | + long delta = cputime_to_jiffies( | |
104 | + cputime_sub(tsk->stime, tsk->acct_stimexpd)); | |
102 | 105 | |
103 | 106 | if (delta == 0) |
104 | 107 | return; |