Commit 9e06d3f9f6b14f6e3120923ed215032726246c98
Committed by
Linus Torvalds
1 parent
ad4ecbcba7
Exists in
master
and in
7 other branches
[PATCH] per task delay accounting taskstats interface: documentation fix
Change documentation and example program to reflect the flow control issues being addressed by the cpumask changes. Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 2 changed files with 365 additions and 305 deletions Side-by-side Diff
Documentation/accounting/getdelays.c
... | ... | @@ -5,6 +5,7 @@ |
5 | 5 | * |
6 | 6 | * Copyright (C) Shailabh Nagar, IBM Corp. 2005 |
7 | 7 | * Copyright (C) Balbir Singh, IBM Corp. 2006 |
8 | + * Copyright (c) Jay Lan, SGI. 2006 | |
8 | 9 | * |
9 | 10 | */ |
10 | 11 | |
11 | 12 | |
12 | 13 | |
13 | 14 | |
14 | 15 | |
15 | 16 | |
16 | 17 | |
17 | 18 | |
18 | 19 | |
19 | 20 | |
20 | 21 | |
21 | 22 | |
22 | 23 | |
23 | 24 | |
24 | 25 | |
25 | 26 | |
26 | 27 | |
27 | 28 | |
28 | 29 | |
29 | 30 | |
30 | 31 | |
31 | 32 | |
32 | 33 | |
33 | 34 | |
34 | 35 | |
35 | 36 | |
36 | 37 | |
37 | 38 | |
38 | 39 | |
39 | 40 | |
40 | 41 | |
41 | 42 | |
42 | 43 | |
43 | 44 | |
44 | 45 | |
45 | 46 | |
46 | 47 | |
47 | 48 | |
48 | 49 | |
49 | 50 | |
50 | 51 | |
51 | 52 | |
52 | 53 | |
53 | 54 | |
54 | 55 | |
55 | 56 | |
... | ... | @@ -36,342 +37,361 @@ |
36 | 37 | |
37 | 38 | #define err(code, fmt, arg...) do { printf(fmt, ##arg); exit(code); } while (0) |
38 | 39 | int done = 0; |
40 | +int rcvbufsz=0; | |
39 | 41 | |
42 | + char name[100]; | |
43 | +int dbg=0, print_delays=0; | |
44 | +__u64 stime, utime; | |
45 | +#define PRINTF(fmt, arg...) { \ | |
46 | + if (dbg) { \ | |
47 | + printf(fmt, ##arg); \ | |
48 | + } \ | |
49 | + } | |
50 | + | |
51 | +/* Maximum size of response requested or message sent */ | |
52 | +#define MAX_MSG_SIZE 256 | |
53 | +/* Maximum number of cpus expected to be specified in a cpumask */ | |
54 | +#define MAX_CPUS 32 | |
55 | +/* Maximum length of pathname to log file */ | |
56 | +#define MAX_FILENAME 256 | |
57 | + | |
58 | +struct msgtemplate { | |
59 | + struct nlmsghdr n; | |
60 | + struct genlmsghdr g; | |
61 | + char buf[MAX_MSG_SIZE]; | |
62 | +}; | |
63 | + | |
64 | +char cpumask[100+6*MAX_CPUS]; | |
65 | + | |
40 | 66 | /* |
41 | 67 | * Create a raw netlink socket and bind |
42 | 68 | */ |
43 | -static int create_nl_socket(int protocol, int groups) | |
69 | +static int create_nl_socket(int protocol) | |
44 | 70 | { |
45 | - socklen_t addr_len; | |
46 | - int fd; | |
47 | - struct sockaddr_nl local; | |
71 | + int fd; | |
72 | + struct sockaddr_nl local; | |
48 | 73 | |
49 | - fd = socket(AF_NETLINK, SOCK_RAW, protocol); | |
50 | - if (fd < 0) | |
51 | - return -1; | |
74 | + fd = socket(AF_NETLINK, SOCK_RAW, protocol); | |
75 | + if (fd < 0) | |
76 | + return -1; | |
52 | 77 | |
53 | - memset(&local, 0, sizeof(local)); | |
54 | - local.nl_family = AF_NETLINK; | |
55 | - local.nl_groups = groups; | |
78 | + if (rcvbufsz) | |
79 | + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, | |
80 | + &rcvbufsz, sizeof(rcvbufsz)) < 0) { | |
81 | + printf("Unable to set socket rcv buf size to %d\n", | |
82 | + rcvbufsz); | |
83 | + return -1; | |
84 | + } | |
56 | 85 | |
57 | - if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) | |
58 | - goto error; | |
86 | + memset(&local, 0, sizeof(local)); | |
87 | + local.nl_family = AF_NETLINK; | |
59 | 88 | |
60 | - return fd; | |
61 | - error: | |
62 | - close(fd); | |
63 | - return -1; | |
89 | + if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) | |
90 | + goto error; | |
91 | + | |
92 | + return fd; | |
93 | +error: | |
94 | + close(fd); | |
95 | + return -1; | |
64 | 96 | } |
65 | 97 | |
66 | -int sendto_fd(int s, const char *buf, int bufLen) | |
98 | + | |
99 | +int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, | |
100 | + __u8 genl_cmd, __u16 nla_type, | |
101 | + void *nla_data, int nla_len) | |
67 | 102 | { |
68 | - struct sockaddr_nl nladdr; | |
69 | - int r; | |
103 | + struct nlattr *na; | |
104 | + struct sockaddr_nl nladdr; | |
105 | + int r, buflen; | |
106 | + char *buf; | |
70 | 107 | |
71 | - memset(&nladdr, 0, sizeof(nladdr)); | |
72 | - nladdr.nl_family = AF_NETLINK; | |
108 | + struct msgtemplate msg; | |
73 | 109 | |
74 | - while ((r = sendto(s, buf, bufLen, 0, (struct sockaddr *) &nladdr, | |
75 | - sizeof(nladdr))) < bufLen) { | |
76 | - if (r > 0) { | |
77 | - buf += r; | |
78 | - bufLen -= r; | |
79 | - } else if (errno != EAGAIN) | |
80 | - return -1; | |
81 | - } | |
82 | - return 0; | |
110 | + msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); | |
111 | + msg.n.nlmsg_type = nlmsg_type; | |
112 | + msg.n.nlmsg_flags = NLM_F_REQUEST; | |
113 | + msg.n.nlmsg_seq = 0; | |
114 | + msg.n.nlmsg_pid = nlmsg_pid; | |
115 | + msg.g.cmd = genl_cmd; | |
116 | + msg.g.version = 0x1; | |
117 | + na = (struct nlattr *) GENLMSG_DATA(&msg); | |
118 | + na->nla_type = nla_type; | |
119 | + na->nla_len = nla_len + 1 + NLA_HDRLEN; | |
120 | + memcpy(NLA_DATA(na), nla_data, nla_len); | |
121 | + msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); | |
122 | + | |
123 | + buf = (char *) &msg; | |
124 | + buflen = msg.n.nlmsg_len ; | |
125 | + memset(&nladdr, 0, sizeof(nladdr)); | |
126 | + nladdr.nl_family = AF_NETLINK; | |
127 | + while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr, | |
128 | + sizeof(nladdr))) < buflen) { | |
129 | + if (r > 0) { | |
130 | + buf += r; | |
131 | + buflen -= r; | |
132 | + } else if (errno != EAGAIN) | |
133 | + return -1; | |
134 | + } | |
135 | + return 0; | |
83 | 136 | } |
84 | 137 | |
138 | + | |
85 | 139 | /* |
86 | 140 | * Probe the controller in genetlink to find the family id |
87 | 141 | * for the TASKSTATS family |
88 | 142 | */ |
89 | 143 | int get_family_id(int sd) |
90 | 144 | { |
91 | - struct { | |
92 | - struct nlmsghdr n; | |
93 | - struct genlmsghdr g; | |
94 | - char buf[256]; | |
95 | - } family_req; | |
96 | - struct { | |
97 | - struct nlmsghdr n; | |
98 | - struct genlmsghdr g; | |
99 | - char buf[256]; | |
100 | - } ans; | |
145 | + struct { | |
146 | + struct nlmsghdr n; | |
147 | + struct genlmsghdr g; | |
148 | + char buf[256]; | |
149 | + } ans; | |
101 | 150 | |
102 | - int id; | |
103 | - struct nlattr *na; | |
104 | - int rep_len; | |
151 | + int id, rc; | |
152 | + struct nlattr *na; | |
153 | + int rep_len; | |
105 | 154 | |
106 | - /* Get family name */ | |
107 | - family_req.n.nlmsg_type = GENL_ID_CTRL; | |
108 | - family_req.n.nlmsg_flags = NLM_F_REQUEST; | |
109 | - family_req.n.nlmsg_seq = 0; | |
110 | - family_req.n.nlmsg_pid = getpid(); | |
111 | - family_req.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); | |
112 | - family_req.g.cmd = CTRL_CMD_GETFAMILY; | |
113 | - family_req.g.version = 0x1; | |
114 | - na = (struct nlattr *) GENLMSG_DATA(&family_req); | |
115 | - na->nla_type = CTRL_ATTR_FAMILY_NAME; | |
116 | - na->nla_len = strlen(TASKSTATS_GENL_NAME) + 1 + NLA_HDRLEN; | |
117 | - strcpy(NLA_DATA(na), TASKSTATS_GENL_NAME); | |
118 | - family_req.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); | |
155 | + strcpy(name, TASKSTATS_GENL_NAME); | |
156 | + rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, | |
157 | + CTRL_ATTR_FAMILY_NAME, (void *)name, | |
158 | + strlen(TASKSTATS_GENL_NAME)+1); | |
119 | 159 | |
120 | - if (sendto_fd(sd, (char *) &family_req, family_req.n.nlmsg_len) < 0) | |
121 | - err(1, "error sending message via Netlink\n"); | |
160 | + rep_len = recv(sd, &ans, sizeof(ans), 0); | |
161 | + if (ans.n.nlmsg_type == NLMSG_ERROR || | |
162 | + (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) | |
163 | + return 0; | |
122 | 164 | |
123 | - rep_len = recv(sd, &ans, sizeof(ans), 0); | |
124 | - | |
125 | - if (rep_len < 0) | |
126 | - err(1, "error receiving reply message via Netlink\n"); | |
127 | - | |
128 | - | |
129 | - /* Validate response message */ | |
130 | - if (!NLMSG_OK((&ans.n), rep_len)) | |
131 | - err(1, "invalid reply message received via Netlink\n"); | |
132 | - | |
133 | - if (ans.n.nlmsg_type == NLMSG_ERROR) { /* error */ | |
134 | - printf("error received NACK - leaving\n"); | |
135 | - exit(1); | |
136 | - } | |
137 | - | |
138 | - | |
139 | - na = (struct nlattr *) GENLMSG_DATA(&ans); | |
140 | - na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); | |
141 | - if (na->nla_type == CTRL_ATTR_FAMILY_ID) { | |
142 | - id = *(__u16 *) NLA_DATA(na); | |
143 | - } | |
144 | - return id; | |
165 | + na = (struct nlattr *) GENLMSG_DATA(&ans); | |
166 | + na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); | |
167 | + if (na->nla_type == CTRL_ATTR_FAMILY_ID) { | |
168 | + id = *(__u16 *) NLA_DATA(na); | |
169 | + } | |
170 | + return id; | |
145 | 171 | } |
146 | 172 | |
147 | -void print_taskstats(struct taskstats *t) | |
173 | +void print_delayacct(struct taskstats *t) | |
148 | 174 | { |
149 | - printf("\n\nCPU %15s%15s%15s%15s\n" | |
150 | - " %15llu%15llu%15llu%15llu\n" | |
151 | - "IO %15s%15s\n" | |
152 | - " %15llu%15llu\n" | |
153 | - "MEM %15s%15s\n" | |
154 | - " %15llu%15llu\n\n", | |
155 | - "count", "real total", "virtual total", "delay total", | |
156 | - t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total, | |
157 | - t->cpu_delay_total, | |
158 | - "count", "delay total", | |
159 | - t->blkio_count, t->blkio_delay_total, | |
160 | - "count", "delay total", t->swapin_count, t->swapin_delay_total); | |
175 | + printf("\n\nCPU %15s%15s%15s%15s\n" | |
176 | + " %15llu%15llu%15llu%15llu\n" | |
177 | + "IO %15s%15s\n" | |
178 | + " %15llu%15llu\n" | |
179 | + "MEM %15s%15s\n" | |
180 | + " %15llu%15llu\n\n", | |
181 | + "count", "real total", "virtual total", "delay total", | |
182 | + t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total, | |
183 | + t->cpu_delay_total, | |
184 | + "count", "delay total", | |
185 | + t->blkio_count, t->blkio_delay_total, | |
186 | + "count", "delay total", t->swapin_count, t->swapin_delay_total); | |
161 | 187 | } |
162 | 188 | |
163 | -void sigchld(int sig) | |
164 | -{ | |
165 | - done = 1; | |
166 | -} | |
167 | - | |
168 | 189 | int main(int argc, char *argv[]) |
169 | 190 | { |
170 | - int rc; | |
171 | - int sk_nl; | |
172 | - struct nlmsghdr *nlh; | |
173 | - struct genlmsghdr *genlhdr; | |
174 | - char *buf; | |
175 | - struct taskstats_cmd_param *param; | |
176 | - __u16 id; | |
177 | - struct nlattr *na; | |
191 | + int c, rc, rep_len, aggr_len, len2, cmd_type; | |
192 | + __u16 id; | |
193 | + __u32 mypid; | |
178 | 194 | |
179 | - /* For receiving */ | |
180 | - struct sockaddr_nl kern_nla, from_nla; | |
181 | - socklen_t from_nla_len; | |
182 | - int recv_len; | |
183 | - struct taskstats_reply *reply; | |
195 | + struct nlattr *na; | |
196 | + int nl_sd = -1; | |
197 | + int len = 0; | |
198 | + pid_t tid = 0; | |
199 | + pid_t rtid = 0; | |
184 | 200 | |
185 | - struct { | |
186 | - struct nlmsghdr n; | |
187 | - struct genlmsghdr g; | |
188 | - char buf[256]; | |
189 | - } req; | |
201 | + int fd = 0; | |
202 | + int count = 0; | |
203 | + int write_file = 0; | |
204 | + int maskset = 0; | |
205 | + char logfile[128]; | |
206 | + int loop = 0; | |
190 | 207 | |
191 | - struct { | |
192 | - struct nlmsghdr n; | |
193 | - struct genlmsghdr g; | |
194 | - char buf[256]; | |
195 | - } ans; | |
208 | + struct msgtemplate msg; | |
196 | 209 | |
197 | - int nl_sd = -1; | |
198 | - int rep_len; | |
199 | - int len = 0; | |
200 | - int aggr_len, len2; | |
201 | - struct sockaddr_nl nladdr; | |
202 | - pid_t tid = 0; | |
203 | - pid_t rtid = 0; | |
204 | - int cmd_type = TASKSTATS_TYPE_TGID; | |
205 | - int c, status; | |
206 | - int forking = 0; | |
207 | - struct sigaction act = { | |
208 | - .sa_handler = SIG_IGN, | |
209 | - .sa_mask = SA_NOMASK, | |
210 | - }; | |
211 | - struct sigaction tact ; | |
210 | + while (1) { | |
211 | + c = getopt(argc, argv, "dw:r:m:t:p:v:l"); | |
212 | + if (c < 0) | |
213 | + break; | |
212 | 214 | |
213 | - if (argc < 3) { | |
214 | - printf("usage %s [-t tgid][-p pid][-c cmd]\n", argv[0]); | |
215 | - exit(-1); | |
216 | - } | |
215 | + switch (c) { | |
216 | + case 'd': | |
217 | + printf("print delayacct stats ON\n"); | |
218 | + print_delays = 1; | |
219 | + break; | |
220 | + case 'w': | |
221 | + strncpy(logfile, optarg, MAX_FILENAME); | |
222 | + printf("write to file %s\n", logfile); | |
223 | + write_file = 1; | |
224 | + break; | |
225 | + case 'r': | |
226 | + rcvbufsz = atoi(optarg); | |
227 | + printf("receive buf size %d\n", rcvbufsz); | |
228 | + if (rcvbufsz < 0) | |
229 | + err(1, "Invalid rcv buf size\n"); | |
230 | + break; | |
231 | + case 'm': | |
232 | + strncpy(cpumask, optarg, sizeof(cpumask)); | |
233 | + maskset = 1; | |
234 | + printf("cpumask %s maskset %d\n", cpumask, maskset); | |
235 | + break; | |
236 | + case 't': | |
237 | + tid = atoi(optarg); | |
238 | + if (!tid) | |
239 | + err(1, "Invalid tgid\n"); | |
240 | + cmd_type = TASKSTATS_CMD_ATTR_TGID; | |
241 | + print_delays = 1; | |
242 | + break; | |
243 | + case 'p': | |
244 | + tid = atoi(optarg); | |
245 | + if (!tid) | |
246 | + err(1, "Invalid pid\n"); | |
247 | + cmd_type = TASKSTATS_CMD_ATTR_PID; | |
248 | + print_delays = 1; | |
249 | + break; | |
250 | + case 'v': | |
251 | + printf("debug on\n"); | |
252 | + dbg = 1; | |
253 | + break; | |
254 | + case 'l': | |
255 | + printf("listen forever\n"); | |
256 | + loop = 1; | |
257 | + break; | |
258 | + default: | |
259 | + printf("Unknown option %d\n", c); | |
260 | + exit(-1); | |
261 | + } | |
262 | + } | |
217 | 263 | |
218 | - tact.sa_handler = sigchld; | |
219 | - sigemptyset(&tact.sa_mask); | |
220 | - if (sigaction(SIGCHLD, &tact, NULL) < 0) | |
221 | - err(1, "sigaction failed for SIGCHLD\n"); | |
264 | + if (write_file) { | |
265 | + fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC, | |
266 | + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); | |
267 | + if (fd == -1) { | |
268 | + perror("Cannot open output file\n"); | |
269 | + exit(1); | |
270 | + } | |
271 | + } | |
222 | 272 | |
223 | - while (1) { | |
273 | + if ((nl_sd = create_nl_socket(NETLINK_GENERIC)) < 0) | |
274 | + err(1, "error creating Netlink socket\n"); | |
224 | 275 | |
225 | - c = getopt(argc, argv, "t:p:c:"); | |
226 | - if (c < 0) | |
227 | - break; | |
228 | 276 | |
229 | - switch (c) { | |
230 | - case 't': | |
231 | - tid = atoi(optarg); | |
232 | - if (!tid) | |
233 | - err(1, "Invalid tgid\n"); | |
234 | - cmd_type = TASKSTATS_CMD_ATTR_TGID; | |
235 | - break; | |
236 | - case 'p': | |
237 | - tid = atoi(optarg); | |
238 | - if (!tid) | |
239 | - err(1, "Invalid pid\n"); | |
240 | - cmd_type = TASKSTATS_CMD_ATTR_TGID; | |
241 | - break; | |
242 | - case 'c': | |
243 | - opterr = 0; | |
244 | - tid = fork(); | |
245 | - if (tid < 0) | |
246 | - err(1, "fork failed\n"); | |
277 | + mypid = getpid(); | |
278 | + id = get_family_id(nl_sd); | |
279 | + if (!id) { | |
280 | + printf("Error getting family id, errno %d", errno); | |
281 | + goto err; | |
282 | + } | |
283 | + PRINTF("family id %d\n", id); | |
247 | 284 | |
248 | - if (tid == 0) { /* child process */ | |
249 | - if (execvp(argv[optind - 1], &argv[optind - 1]) < 0) { | |
250 | - exit(-1); | |
285 | + if (maskset) { | |
286 | + rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, | |
287 | + TASKSTATS_CMD_ATTR_REGISTER_CPUMASK, | |
288 | + &cpumask, sizeof(cpumask)); | |
289 | + PRINTF("Sent register cpumask, retval %d\n", rc); | |
290 | + if (rc < 0) { | |
291 | + printf("error sending register cpumask\n"); | |
292 | + goto err; | |
251 | 293 | } |
252 | - } | |
253 | - forking = 1; | |
254 | - break; | |
255 | - default: | |
256 | - printf("usage %s [-t tgid][-p pid][-c cmd]\n", argv[0]); | |
257 | - exit(-1); | |
258 | - break; | |
259 | 294 | } |
260 | - if (c == 'c') | |
261 | - break; | |
262 | - } | |
263 | 295 | |
264 | - /* Construct Netlink request message */ | |
296 | + if (tid) { | |
297 | + rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, | |
298 | + cmd_type, &tid, sizeof(__u32)); | |
299 | + PRINTF("Sent pid/tgid, retval %d\n", rc); | |
300 | + if (rc < 0) { | |
301 | + printf("error sending tid/tgid cmd\n"); | |
302 | + goto done; | |
303 | + } | |
304 | + } | |
265 | 305 | |
266 | - /* Send Netlink request message & get reply */ | |
306 | + do { | |
307 | + int i; | |
267 | 308 | |
268 | - if ((nl_sd = | |
269 | - create_nl_socket(NETLINK_GENERIC, TASKSTATS_LISTEN_GROUP)) < 0) | |
270 | - err(1, "error creating Netlink socket\n"); | |
309 | + rep_len = recv(nl_sd, &msg, sizeof(msg), 0); | |
310 | + PRINTF("received %d bytes\n", rep_len); | |
271 | 311 | |
312 | + if (rep_len < 0) { | |
313 | + printf("nonfatal reply error: errno %d\n", errno); | |
314 | + continue; | |
315 | + } | |
316 | + if (msg.n.nlmsg_type == NLMSG_ERROR || | |
317 | + !NLMSG_OK((&msg.n), rep_len)) { | |
318 | + printf("fatal reply error, errno %d\n", errno); | |
319 | + goto done; | |
320 | + } | |
272 | 321 | |
273 | - id = get_family_id(nl_sd); | |
322 | + PRINTF("nlmsghdr size=%d, nlmsg_len=%d, rep_len=%d\n", | |
323 | + sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len); | |
274 | 324 | |
275 | - /* Send command needed */ | |
276 | - req.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); | |
277 | - req.n.nlmsg_type = id; | |
278 | - req.n.nlmsg_flags = NLM_F_REQUEST; | |
279 | - req.n.nlmsg_seq = 0; | |
280 | - req.n.nlmsg_pid = tid; | |
281 | - req.g.cmd = TASKSTATS_CMD_GET; | |
282 | - na = (struct nlattr *) GENLMSG_DATA(&req); | |
283 | - na->nla_type = cmd_type; | |
284 | - na->nla_len = sizeof(unsigned int) + NLA_HDRLEN; | |
285 | - *(__u32 *) NLA_DATA(na) = tid; | |
286 | - req.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); | |
287 | 325 | |
326 | + rep_len = GENLMSG_PAYLOAD(&msg.n); | |
288 | 327 | |
289 | - if (!forking && sendto_fd(nl_sd, (char *) &req, req.n.nlmsg_len) < 0) | |
290 | - err(1, "error sending message via Netlink\n"); | |
328 | + na = (struct nlattr *) GENLMSG_DATA(&msg); | |
329 | + len = 0; | |
330 | + i = 0; | |
331 | + while (len < rep_len) { | |
332 | + len += NLA_ALIGN(na->nla_len); | |
333 | + switch (na->nla_type) { | |
334 | + case TASKSTATS_TYPE_AGGR_TGID: | |
335 | + /* Fall through */ | |
336 | + case TASKSTATS_TYPE_AGGR_PID: | |
337 | + aggr_len = NLA_PAYLOAD(na->nla_len); | |
338 | + len2 = 0; | |
339 | + /* For nested attributes, na follows */ | |
340 | + na = (struct nlattr *) NLA_DATA(na); | |
341 | + done = 0; | |
342 | + while (len2 < aggr_len) { | |
343 | + switch (na->nla_type) { | |
344 | + case TASKSTATS_TYPE_PID: | |
345 | + rtid = *(int *) NLA_DATA(na); | |
346 | + if (print_delays) | |
347 | + printf("PID\t%d\n", rtid); | |
348 | + break; | |
349 | + case TASKSTATS_TYPE_TGID: | |
350 | + rtid = *(int *) NLA_DATA(na); | |
351 | + if (print_delays) | |
352 | + printf("TGID\t%d\n", rtid); | |
353 | + break; | |
354 | + case TASKSTATS_TYPE_STATS: | |
355 | + count++; | |
356 | + if (print_delays) | |
357 | + print_delayacct((struct taskstats *) NLA_DATA(na)); | |
358 | + if (fd) { | |
359 | + if (write(fd, NLA_DATA(na), na->nla_len) < 0) { | |
360 | + err(1,"write error\n"); | |
361 | + } | |
362 | + } | |
363 | + if (!loop) | |
364 | + goto done; | |
365 | + break; | |
366 | + default: | |
367 | + printf("Unknown nested nla_type %d\n", na->nla_type); | |
368 | + break; | |
369 | + } | |
370 | + len2 += NLA_ALIGN(na->nla_len); | |
371 | + na = (struct nlattr *) ((char *) na + len2); | |
372 | + } | |
373 | + break; | |
291 | 374 | |
292 | - act.sa_handler = SIG_IGN; | |
293 | - sigemptyset(&act.sa_mask); | |
294 | - if (sigaction(SIGINT, &act, NULL) < 0) | |
295 | - err(1, "sigaction failed for SIGINT\n"); | |
296 | - | |
297 | - do { | |
298 | - int i; | |
299 | - struct pollfd pfd; | |
300 | - int pollres; | |
301 | - | |
302 | - pfd.events = 0xffff & ~POLLOUT; | |
303 | - pfd.fd = nl_sd; | |
304 | - pollres = poll(&pfd, 1, 5000); | |
305 | - if (pollres < 0 || done) { | |
306 | - break; | |
307 | - } | |
308 | - | |
309 | - rep_len = recv(nl_sd, &ans, sizeof(ans), 0); | |
310 | - nladdr.nl_family = AF_NETLINK; | |
311 | - nladdr.nl_groups = TASKSTATS_LISTEN_GROUP; | |
312 | - | |
313 | - if (ans.n.nlmsg_type == NLMSG_ERROR) { /* error */ | |
314 | - printf("error received NACK - leaving\n"); | |
315 | - exit(1); | |
316 | - } | |
317 | - | |
318 | - if (rep_len < 0) { | |
319 | - err(1, "error receiving reply message via Netlink\n"); | |
320 | - break; | |
321 | - } | |
322 | - | |
323 | - /* Validate response message */ | |
324 | - if (!NLMSG_OK((&ans.n), rep_len)) | |
325 | - err(1, "invalid reply message received via Netlink\n"); | |
326 | - | |
327 | - rep_len = GENLMSG_PAYLOAD(&ans.n); | |
328 | - | |
329 | - na = (struct nlattr *) GENLMSG_DATA(&ans); | |
330 | - len = 0; | |
331 | - i = 0; | |
332 | - while (len < rep_len) { | |
333 | - len += NLA_ALIGN(na->nla_len); | |
334 | - switch (na->nla_type) { | |
335 | - case TASKSTATS_TYPE_AGGR_PID: | |
336 | - /* Fall through */ | |
337 | - case TASKSTATS_TYPE_AGGR_TGID: | |
338 | - aggr_len = NLA_PAYLOAD(na->nla_len); | |
339 | - len2 = 0; | |
340 | - /* For nested attributes, na follows */ | |
341 | - na = (struct nlattr *) NLA_DATA(na); | |
342 | - done = 0; | |
343 | - while (len2 < aggr_len) { | |
344 | - switch (na->nla_type) { | |
345 | - case TASKSTATS_TYPE_PID: | |
346 | - rtid = *(int *) NLA_DATA(na); | |
347 | - break; | |
348 | - case TASKSTATS_TYPE_TGID: | |
349 | - rtid = *(int *) NLA_DATA(na); | |
350 | - break; | |
351 | - case TASKSTATS_TYPE_STATS: | |
352 | - if (rtid == tid) { | |
353 | - print_taskstats((struct taskstats *) | |
354 | - NLA_DATA(na)); | |
355 | - done = 1; | |
375 | + default: | |
376 | + printf("Unknown nla_type %d\n", na->nla_type); | |
377 | + break; | |
356 | 378 | } |
357 | - break; | |
358 | - } | |
359 | - len2 += NLA_ALIGN(na->nla_len); | |
360 | - na = (struct nlattr *) ((char *) na + len2); | |
361 | - if (done) | |
362 | - break; | |
379 | + na = (struct nlattr *) (GENLMSG_DATA(&msg) + len); | |
363 | 380 | } |
364 | - } | |
365 | - na = (struct nlattr *) (GENLMSG_DATA(&ans) + len); | |
366 | - if (done) | |
367 | - break; | |
381 | + } while (loop); | |
382 | +done: | |
383 | + if (maskset) { | |
384 | + rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, | |
385 | + TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK, | |
386 | + &cpumask, sizeof(cpumask)); | |
387 | + printf("Sent deregister mask, retval %d\n", rc); | |
388 | + if (rc < 0) | |
389 | + err(rc, "error sending deregister cpumask\n"); | |
368 | 390 | } |
369 | - if (done) | |
370 | - break; | |
371 | - } | |
372 | - while (1); | |
373 | - | |
374 | - close(nl_sd); | |
375 | - return 0; | |
391 | +err: | |
392 | + close(nl_sd); | |
393 | + if (fd) | |
394 | + close(fd); | |
395 | + return 0; | |
376 | 396 | } |
Documentation/accounting/taskstats.txt
... | ... | @@ -26,20 +26,28 @@ |
26 | 26 | Usage |
27 | 27 | ----- |
28 | 28 | |
29 | -To get statistics during task's lifetime, userspace opens a unicast netlink | |
29 | +To get statistics during a task's lifetime, userspace opens a unicast netlink | |
30 | 30 | socket (NETLINK_GENERIC family) and sends commands specifying a pid or a tgid. |
31 | 31 | The response contains statistics for a task (if pid is specified) or the sum of |
32 | 32 | statistics for all tasks of the process (if tgid is specified). |
33 | 33 | |
34 | -To obtain statistics for tasks which are exiting, userspace opens a multicast | |
35 | -netlink socket. Each time a task exits, its per-pid statistics is always sent | |
36 | -by the kernel to each listener on the multicast socket. In addition, if it is | |
37 | -the last thread exiting its thread group, an additional record containing the | |
38 | -per-tgid stats are also sent. The latter contains the sum of per-pid stats for | |
39 | -all threads in the thread group, both past and present. | |
34 | +To obtain statistics for tasks which are exiting, the userspace listener | |
35 | +sends a register command and specifies a cpumask. Whenever a task exits on | |
36 | +one of the cpus in the cpumask, its per-pid statistics are sent to the | |
37 | +registered listener. Using cpumasks allows the data received by one listener | |
38 | +to be limited and assists in flow control over the netlink interface and is | |
39 | +explained in more detail below. | |
40 | 40 | |
41 | +If the exiting task is the last thread exiting its thread group, | |
42 | +an additional record containing the per-tgid stats is also sent to userspace. | |
43 | +The latter contains the sum of per-pid stats for all threads in the thread | |
44 | +group, both past and present. | |
45 | + | |
41 | 46 | getdelays.c is a simple utility demonstrating usage of the taskstats interface |
42 | -for reporting delay accounting statistics. | |
47 | +for reporting delay accounting statistics. Users can register cpumasks, | |
48 | +send commands and process responses, listen for per-tid/tgid exit data, | |
49 | +write the data received to a file and do basic flow control by increasing | |
50 | +receive buffer sizes. | |
43 | 51 | |
44 | 52 | Interface |
45 | 53 | --------- |
46 | 54 | |
... | ... | @@ -66,11 +74,21 @@ |
66 | 74 | |
67 | 75 | The taskstats payload is one of the following three kinds: |
68 | 76 | |
69 | -1. Commands: Sent from user to kernel. The payload is one attribute, of type | |
70 | -TASKSTATS_CMD_ATTR_PID/TGID, containing a u32 pid or tgid in the attribute | |
71 | -payload. The pid/tgid denotes the task/process for which userspace wants | |
72 | -statistics. | |
77 | +1. Commands: Sent from user to kernel. Commands to get data on | |
78 | +a pid/tgid consist of one attribute, of type TASKSTATS_CMD_ATTR_PID/TGID, | |
79 | +containing a u32 pid or tgid in the attribute payload. The pid/tgid denotes | |
80 | +the task/process for which userspace wants statistics. | |
73 | 81 | |
82 | +Commands to register/deregister interest in exit data from a set of cpus | |
83 | +consist of one attribute, of type | |
84 | +TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK and contain a cpumask in the | |
85 | +attribute payload. The cpumask is specified as an ascii string of | |
86 | +comma-separated cpu ranges e.g. to listen to exit data from cpus 1,2,3,5,7,8 | |
87 | +the cpumask would be "1-3,5,7-8". If userspace forgets to deregister interest | |
88 | +in cpus before closing the listening socket, the kernel cleans up its interest | |
89 | +set over time. However, for the sake of efficiency, an explicit deregistration | |
90 | +is advisable. | |
91 | + | |
74 | 92 | 2. Response for a command: sent from the kernel in response to a userspace |
75 | 93 | command. The payload is a series of three attributes of type: |
76 | 94 | |
... | ... | @@ -137,6 +155,28 @@ |
137 | 155 | struct too much, requiring disparate userspace accounting utilities to |
138 | 156 | unnecessarily receive large structures whose fields are of no interest, then |
139 | 157 | extending the attributes structure would be worthwhile. |
158 | + | |
159 | +Flow control for taskstats | |
160 | +-------------------------- | |
161 | + | |
162 | +When the rate of task exits becomes large, a listener may not be able to keep | |
163 | +up with the kernel's rate of sending per-tid/tgid exit data leading to data | |
164 | +loss. This possibility gets compounded when the taskstats structure gets | |
165 | +extended and the number of cpus grows large. | |
166 | + | |
167 | +To avoid losing statistics, userspace should do one or more of the following: | |
168 | + | |
169 | +- increase the receive buffer sizes for the netlink sockets opened by | |
170 | +listeners to receive exit data. | |
171 | + | |
172 | +- create more listeners and reduce the number of cpus being listened to by | |
173 | +each listener. In the extreme case, there could be one listener for each cpu. | |
174 | +Users may also consider setting the cpu affinity of the listener to the subset | |
175 | +of cpus to which it listens, especially if they are listening to just one cpu. | |
176 | + | |
177 | +Despite these measures, if the userspace receives ENOBUFS error messages | |
178 | +indicated overflow of receive buffers, it should take measures to handle the | |
179 | +loss of data. | |
140 | 180 | |
141 | 181 | ---- |