Commit 0ae646845b603e9df5711084436d389f8371ffb3
Committed by
Linus Torvalds
1 parent
17db952cd1
Exists in
master
and in
4 other branches
[PATCH] Fix taskstats size calculation (use the new genetlink utility functions)
The addition of the CSA patch pushed the size of struct taskstats to 256 bytes. This exposed a problem with prepare_reply(), we were not allocating space for the netlink and genetlink header. It worked earlier because alloc_skb() would align the skb to SMP_CACHE_BYTES, which added some additonal bytes. Signed-off-by: Balbir Singh <balbir@in.ibm.com> Cc: Jamal Hadi <hadi@cyberus.ca> Cc: Shailabh Nagar <nagar@watson.ibm.com> Cc: Thomas Graf <tgraf@suug.ch> Cc: "David S. Miller" <davem@davemloft.net> Cc: Jay Lan <jlan@engr.sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 1 changed file with 1 additions and 1 deletions Inline Diff
kernel/taskstats.c
1 | /* | 1 | /* |
2 | * taskstats.c - Export per-task statistics to userland | 2 | * taskstats.c - Export per-task statistics to userland |
3 | * | 3 | * |
4 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 | 4 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 |
5 | * (C) Balbir Singh, IBM Corp. 2006 | 5 | * (C) Balbir Singh, IBM Corp. 2006 |
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by | 8 | * it under the terms of the GNU General Public License as published by |
9 | * the Free Software Foundation; either version 2 of the License, or | 9 | * the Free Software Foundation; either version 2 of the License, or |
10 | * (at your option) any later version. | 10 | * (at your option) any later version. |
11 | * | 11 | * |
12 | * This program is distributed in the hope that it will be useful, | 12 | * This program is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | * GNU General Public License for more details. | 15 | * GNU General Public License for more details. |
16 | * | 16 | * |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/taskstats_kern.h> | 20 | #include <linux/taskstats_kern.h> |
21 | #include <linux/delayacct.h> | 21 | #include <linux/delayacct.h> |
22 | #include <linux/cpumask.h> | 22 | #include <linux/cpumask.h> |
23 | #include <linux/percpu.h> | 23 | #include <linux/percpu.h> |
24 | #include <net/genetlink.h> | 24 | #include <net/genetlink.h> |
25 | #include <asm/atomic.h> | 25 | #include <asm/atomic.h> |
26 | 26 | ||
27 | /* | 27 | /* |
28 | * Maximum length of a cpumask that can be specified in | 28 | * Maximum length of a cpumask that can be specified in |
29 | * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute | 29 | * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute |
30 | */ | 30 | */ |
31 | #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) | 31 | #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) |
32 | 32 | ||
33 | static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; | 33 | static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; |
34 | static int family_registered; | 34 | static int family_registered; |
35 | kmem_cache_t *taskstats_cache; | 35 | kmem_cache_t *taskstats_cache; |
36 | 36 | ||
37 | static struct genl_family family = { | 37 | static struct genl_family family = { |
38 | .id = GENL_ID_GENERATE, | 38 | .id = GENL_ID_GENERATE, |
39 | .name = TASKSTATS_GENL_NAME, | 39 | .name = TASKSTATS_GENL_NAME, |
40 | .version = TASKSTATS_GENL_VERSION, | 40 | .version = TASKSTATS_GENL_VERSION, |
41 | .maxattr = TASKSTATS_CMD_ATTR_MAX, | 41 | .maxattr = TASKSTATS_CMD_ATTR_MAX, |
42 | }; | 42 | }; |
43 | 43 | ||
44 | static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] | 44 | static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] |
45 | __read_mostly = { | 45 | __read_mostly = { |
46 | [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, | 46 | [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, |
47 | [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, | 47 | [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, |
48 | [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, | 48 | [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, |
49 | [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; | 49 | [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; |
50 | 50 | ||
51 | struct listener { | 51 | struct listener { |
52 | struct list_head list; | 52 | struct list_head list; |
53 | pid_t pid; | 53 | pid_t pid; |
54 | char valid; | 54 | char valid; |
55 | }; | 55 | }; |
56 | 56 | ||
57 | struct listener_list { | 57 | struct listener_list { |
58 | struct rw_semaphore sem; | 58 | struct rw_semaphore sem; |
59 | struct list_head list; | 59 | struct list_head list; |
60 | }; | 60 | }; |
61 | static DEFINE_PER_CPU(struct listener_list, listener_array); | 61 | static DEFINE_PER_CPU(struct listener_list, listener_array); |
62 | 62 | ||
63 | enum actions { | 63 | enum actions { |
64 | REGISTER, | 64 | REGISTER, |
65 | DEREGISTER, | 65 | DEREGISTER, |
66 | CPU_DONT_CARE | 66 | CPU_DONT_CARE |
67 | }; | 67 | }; |
68 | 68 | ||
69 | static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, | 69 | static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, |
70 | void **replyp, size_t size) | 70 | void **replyp, size_t size) |
71 | { | 71 | { |
72 | struct sk_buff *skb; | 72 | struct sk_buff *skb; |
73 | void *reply; | 73 | void *reply; |
74 | 74 | ||
75 | /* | 75 | /* |
76 | * If new attributes are added, please revisit this allocation | 76 | * If new attributes are added, please revisit this allocation |
77 | */ | 77 | */ |
78 | skb = nlmsg_new(size, GFP_KERNEL); | 78 | skb = nlmsg_new(genlmsg_total_size(size), GFP_KERNEL); |
79 | if (!skb) | 79 | if (!skb) |
80 | return -ENOMEM; | 80 | return -ENOMEM; |
81 | 81 | ||
82 | if (!info) { | 82 | if (!info) { |
83 | int seq = get_cpu_var(taskstats_seqnum)++; | 83 | int seq = get_cpu_var(taskstats_seqnum)++; |
84 | put_cpu_var(taskstats_seqnum); | 84 | put_cpu_var(taskstats_seqnum); |
85 | 85 | ||
86 | reply = genlmsg_put(skb, 0, seq, | 86 | reply = genlmsg_put(skb, 0, seq, |
87 | family.id, 0, 0, | 87 | family.id, 0, 0, |
88 | cmd, family.version); | 88 | cmd, family.version); |
89 | } else | 89 | } else |
90 | reply = genlmsg_put(skb, info->snd_pid, info->snd_seq, | 90 | reply = genlmsg_put(skb, info->snd_pid, info->snd_seq, |
91 | family.id, 0, 0, | 91 | family.id, 0, 0, |
92 | cmd, family.version); | 92 | cmd, family.version); |
93 | if (reply == NULL) { | 93 | if (reply == NULL) { |
94 | nlmsg_free(skb); | 94 | nlmsg_free(skb); |
95 | return -EINVAL; | 95 | return -EINVAL; |
96 | } | 96 | } |
97 | 97 | ||
98 | *skbp = skb; | 98 | *skbp = skb; |
99 | *replyp = reply; | 99 | *replyp = reply; |
100 | return 0; | 100 | return 0; |
101 | } | 101 | } |
102 | 102 | ||
103 | /* | 103 | /* |
104 | * Send taskstats data in @skb to listener with nl_pid @pid | 104 | * Send taskstats data in @skb to listener with nl_pid @pid |
105 | */ | 105 | */ |
106 | static int send_reply(struct sk_buff *skb, pid_t pid) | 106 | static int send_reply(struct sk_buff *skb, pid_t pid) |
107 | { | 107 | { |
108 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | 108 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); |
109 | void *reply = genlmsg_data(genlhdr); | 109 | void *reply = genlmsg_data(genlhdr); |
110 | int rc; | 110 | int rc; |
111 | 111 | ||
112 | rc = genlmsg_end(skb, reply); | 112 | rc = genlmsg_end(skb, reply); |
113 | if (rc < 0) { | 113 | if (rc < 0) { |
114 | nlmsg_free(skb); | 114 | nlmsg_free(skb); |
115 | return rc; | 115 | return rc; |
116 | } | 116 | } |
117 | 117 | ||
118 | return genlmsg_unicast(skb, pid); | 118 | return genlmsg_unicast(skb, pid); |
119 | } | 119 | } |
120 | 120 | ||
121 | /* | 121 | /* |
122 | * Send taskstats data in @skb to listeners registered for @cpu's exit data | 122 | * Send taskstats data in @skb to listeners registered for @cpu's exit data |
123 | */ | 123 | */ |
124 | static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) | 124 | static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) |
125 | { | 125 | { |
126 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | 126 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); |
127 | struct listener_list *listeners; | 127 | struct listener_list *listeners; |
128 | struct listener *s, *tmp; | 128 | struct listener *s, *tmp; |
129 | struct sk_buff *skb_next, *skb_cur = skb; | 129 | struct sk_buff *skb_next, *skb_cur = skb; |
130 | void *reply = genlmsg_data(genlhdr); | 130 | void *reply = genlmsg_data(genlhdr); |
131 | int rc, delcount = 0; | 131 | int rc, delcount = 0; |
132 | 132 | ||
133 | rc = genlmsg_end(skb, reply); | 133 | rc = genlmsg_end(skb, reply); |
134 | if (rc < 0) { | 134 | if (rc < 0) { |
135 | nlmsg_free(skb); | 135 | nlmsg_free(skb); |
136 | return; | 136 | return; |
137 | } | 137 | } |
138 | 138 | ||
139 | rc = 0; | 139 | rc = 0; |
140 | listeners = &per_cpu(listener_array, cpu); | 140 | listeners = &per_cpu(listener_array, cpu); |
141 | down_read(&listeners->sem); | 141 | down_read(&listeners->sem); |
142 | list_for_each_entry(s, &listeners->list, list) { | 142 | list_for_each_entry(s, &listeners->list, list) { |
143 | skb_next = NULL; | 143 | skb_next = NULL; |
144 | if (!list_is_last(&s->list, &listeners->list)) { | 144 | if (!list_is_last(&s->list, &listeners->list)) { |
145 | skb_next = skb_clone(skb_cur, GFP_KERNEL); | 145 | skb_next = skb_clone(skb_cur, GFP_KERNEL); |
146 | if (!skb_next) | 146 | if (!skb_next) |
147 | break; | 147 | break; |
148 | } | 148 | } |
149 | rc = genlmsg_unicast(skb_cur, s->pid); | 149 | rc = genlmsg_unicast(skb_cur, s->pid); |
150 | if (rc == -ECONNREFUSED) { | 150 | if (rc == -ECONNREFUSED) { |
151 | s->valid = 0; | 151 | s->valid = 0; |
152 | delcount++; | 152 | delcount++; |
153 | } | 153 | } |
154 | skb_cur = skb_next; | 154 | skb_cur = skb_next; |
155 | } | 155 | } |
156 | up_read(&listeners->sem); | 156 | up_read(&listeners->sem); |
157 | 157 | ||
158 | if (skb_cur) | 158 | if (skb_cur) |
159 | nlmsg_free(skb_cur); | 159 | nlmsg_free(skb_cur); |
160 | 160 | ||
161 | if (!delcount) | 161 | if (!delcount) |
162 | return; | 162 | return; |
163 | 163 | ||
164 | /* Delete invalidated entries */ | 164 | /* Delete invalidated entries */ |
165 | down_write(&listeners->sem); | 165 | down_write(&listeners->sem); |
166 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { | 166 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { |
167 | if (!s->valid) { | 167 | if (!s->valid) { |
168 | list_del(&s->list); | 168 | list_del(&s->list); |
169 | kfree(s); | 169 | kfree(s); |
170 | } | 170 | } |
171 | } | 171 | } |
172 | up_write(&listeners->sem); | 172 | up_write(&listeners->sem); |
173 | } | 173 | } |
174 | 174 | ||
175 | static int fill_pid(pid_t pid, struct task_struct *pidtsk, | 175 | static int fill_pid(pid_t pid, struct task_struct *pidtsk, |
176 | struct taskstats *stats) | 176 | struct taskstats *stats) |
177 | { | 177 | { |
178 | int rc = 0; | 178 | int rc = 0; |
179 | struct task_struct *tsk = pidtsk; | 179 | struct task_struct *tsk = pidtsk; |
180 | 180 | ||
181 | if (!pidtsk) { | 181 | if (!pidtsk) { |
182 | read_lock(&tasklist_lock); | 182 | read_lock(&tasklist_lock); |
183 | tsk = find_task_by_pid(pid); | 183 | tsk = find_task_by_pid(pid); |
184 | if (!tsk) { | 184 | if (!tsk) { |
185 | read_unlock(&tasklist_lock); | 185 | read_unlock(&tasklist_lock); |
186 | return -ESRCH; | 186 | return -ESRCH; |
187 | } | 187 | } |
188 | get_task_struct(tsk); | 188 | get_task_struct(tsk); |
189 | read_unlock(&tasklist_lock); | 189 | read_unlock(&tasklist_lock); |
190 | } else | 190 | } else |
191 | get_task_struct(tsk); | 191 | get_task_struct(tsk); |
192 | 192 | ||
193 | /* | 193 | /* |
194 | * Each accounting subsystem adds calls to its functions to | 194 | * Each accounting subsystem adds calls to its functions to |
195 | * fill in relevant parts of struct taskstsats as follows | 195 | * fill in relevant parts of struct taskstsats as follows |
196 | * | 196 | * |
197 | * per-task-foo(stats, tsk); | 197 | * per-task-foo(stats, tsk); |
198 | */ | 198 | */ |
199 | 199 | ||
200 | delayacct_add_tsk(stats, tsk); | 200 | delayacct_add_tsk(stats, tsk); |
201 | stats->version = TASKSTATS_VERSION; | 201 | stats->version = TASKSTATS_VERSION; |
202 | 202 | ||
203 | /* Define err: label here if needed */ | 203 | /* Define err: label here if needed */ |
204 | put_task_struct(tsk); | 204 | put_task_struct(tsk); |
205 | return rc; | 205 | return rc; |
206 | 206 | ||
207 | } | 207 | } |
208 | 208 | ||
209 | static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, | 209 | static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, |
210 | struct taskstats *stats) | 210 | struct taskstats *stats) |
211 | { | 211 | { |
212 | struct task_struct *tsk, *first; | 212 | struct task_struct *tsk, *first; |
213 | unsigned long flags; | 213 | unsigned long flags; |
214 | 214 | ||
215 | /* | 215 | /* |
216 | * Add additional stats from live tasks except zombie thread group | 216 | * Add additional stats from live tasks except zombie thread group |
217 | * leaders who are already counted with the dead tasks | 217 | * leaders who are already counted with the dead tasks |
218 | */ | 218 | */ |
219 | first = tgidtsk; | 219 | first = tgidtsk; |
220 | if (!first) { | 220 | if (!first) { |
221 | read_lock(&tasklist_lock); | 221 | read_lock(&tasklist_lock); |
222 | first = find_task_by_pid(tgid); | 222 | first = find_task_by_pid(tgid); |
223 | if (!first) { | 223 | if (!first) { |
224 | read_unlock(&tasklist_lock); | 224 | read_unlock(&tasklist_lock); |
225 | return -ESRCH; | 225 | return -ESRCH; |
226 | } | 226 | } |
227 | get_task_struct(first); | 227 | get_task_struct(first); |
228 | read_unlock(&tasklist_lock); | 228 | read_unlock(&tasklist_lock); |
229 | } else | 229 | } else |
230 | get_task_struct(first); | 230 | get_task_struct(first); |
231 | 231 | ||
232 | /* Start with stats from dead tasks */ | 232 | /* Start with stats from dead tasks */ |
233 | spin_lock_irqsave(&first->signal->stats_lock, flags); | 233 | spin_lock_irqsave(&first->signal->stats_lock, flags); |
234 | if (first->signal->stats) | 234 | if (first->signal->stats) |
235 | memcpy(stats, first->signal->stats, sizeof(*stats)); | 235 | memcpy(stats, first->signal->stats, sizeof(*stats)); |
236 | spin_unlock_irqrestore(&first->signal->stats_lock, flags); | 236 | spin_unlock_irqrestore(&first->signal->stats_lock, flags); |
237 | 237 | ||
238 | tsk = first; | 238 | tsk = first; |
239 | read_lock(&tasklist_lock); | 239 | read_lock(&tasklist_lock); |
240 | do { | 240 | do { |
241 | if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk)) | 241 | if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk)) |
242 | continue; | 242 | continue; |
243 | /* | 243 | /* |
244 | * Accounting subsystem can call its functions here to | 244 | * Accounting subsystem can call its functions here to |
245 | * fill in relevant parts of struct taskstsats as follows | 245 | * fill in relevant parts of struct taskstsats as follows |
246 | * | 246 | * |
247 | * per-task-foo(stats, tsk); | 247 | * per-task-foo(stats, tsk); |
248 | */ | 248 | */ |
249 | delayacct_add_tsk(stats, tsk); | 249 | delayacct_add_tsk(stats, tsk); |
250 | 250 | ||
251 | } while_each_thread(first, tsk); | 251 | } while_each_thread(first, tsk); |
252 | read_unlock(&tasklist_lock); | 252 | read_unlock(&tasklist_lock); |
253 | stats->version = TASKSTATS_VERSION; | 253 | stats->version = TASKSTATS_VERSION; |
254 | 254 | ||
255 | /* | 255 | /* |
256 | * Accounting subsytems can also add calls here to modify | 256 | * Accounting subsytems can also add calls here to modify |
257 | * fields of taskstats. | 257 | * fields of taskstats. |
258 | */ | 258 | */ |
259 | 259 | ||
260 | return 0; | 260 | return 0; |
261 | } | 261 | } |
262 | 262 | ||
263 | 263 | ||
264 | static void fill_tgid_exit(struct task_struct *tsk) | 264 | static void fill_tgid_exit(struct task_struct *tsk) |
265 | { | 265 | { |
266 | unsigned long flags; | 266 | unsigned long flags; |
267 | 267 | ||
268 | spin_lock_irqsave(&tsk->signal->stats_lock, flags); | 268 | spin_lock_irqsave(&tsk->signal->stats_lock, flags); |
269 | if (!tsk->signal->stats) | 269 | if (!tsk->signal->stats) |
270 | goto ret; | 270 | goto ret; |
271 | 271 | ||
272 | /* | 272 | /* |
273 | * Each accounting subsystem calls its functions here to | 273 | * Each accounting subsystem calls its functions here to |
274 | * accumalate its per-task stats for tsk, into the per-tgid structure | 274 | * accumalate its per-task stats for tsk, into the per-tgid structure |
275 | * | 275 | * |
276 | * per-task-foo(tsk->signal->stats, tsk); | 276 | * per-task-foo(tsk->signal->stats, tsk); |
277 | */ | 277 | */ |
278 | delayacct_add_tsk(tsk->signal->stats, tsk); | 278 | delayacct_add_tsk(tsk->signal->stats, tsk); |
279 | ret: | 279 | ret: |
280 | spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); | 280 | spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); |
281 | return; | 281 | return; |
282 | } | 282 | } |
283 | 283 | ||
284 | static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd) | 284 | static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd) |
285 | { | 285 | { |
286 | struct listener_list *listeners; | 286 | struct listener_list *listeners; |
287 | struct listener *s, *tmp; | 287 | struct listener *s, *tmp; |
288 | unsigned int cpu; | 288 | unsigned int cpu; |
289 | cpumask_t mask = *maskp; | 289 | cpumask_t mask = *maskp; |
290 | 290 | ||
291 | if (!cpus_subset(mask, cpu_possible_map)) | 291 | if (!cpus_subset(mask, cpu_possible_map)) |
292 | return -EINVAL; | 292 | return -EINVAL; |
293 | 293 | ||
294 | if (isadd == REGISTER) { | 294 | if (isadd == REGISTER) { |
295 | for_each_cpu_mask(cpu, mask) { | 295 | for_each_cpu_mask(cpu, mask) { |
296 | s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, | 296 | s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, |
297 | cpu_to_node(cpu)); | 297 | cpu_to_node(cpu)); |
298 | if (!s) | 298 | if (!s) |
299 | goto cleanup; | 299 | goto cleanup; |
300 | s->pid = pid; | 300 | s->pid = pid; |
301 | INIT_LIST_HEAD(&s->list); | 301 | INIT_LIST_HEAD(&s->list); |
302 | s->valid = 1; | 302 | s->valid = 1; |
303 | 303 | ||
304 | listeners = &per_cpu(listener_array, cpu); | 304 | listeners = &per_cpu(listener_array, cpu); |
305 | down_write(&listeners->sem); | 305 | down_write(&listeners->sem); |
306 | list_add(&s->list, &listeners->list); | 306 | list_add(&s->list, &listeners->list); |
307 | up_write(&listeners->sem); | 307 | up_write(&listeners->sem); |
308 | } | 308 | } |
309 | return 0; | 309 | return 0; |
310 | } | 310 | } |
311 | 311 | ||
312 | /* Deregister or cleanup */ | 312 | /* Deregister or cleanup */ |
313 | cleanup: | 313 | cleanup: |
314 | for_each_cpu_mask(cpu, mask) { | 314 | for_each_cpu_mask(cpu, mask) { |
315 | listeners = &per_cpu(listener_array, cpu); | 315 | listeners = &per_cpu(listener_array, cpu); |
316 | down_write(&listeners->sem); | 316 | down_write(&listeners->sem); |
317 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { | 317 | list_for_each_entry_safe(s, tmp, &listeners->list, list) { |
318 | if (s->pid == pid) { | 318 | if (s->pid == pid) { |
319 | list_del(&s->list); | 319 | list_del(&s->list); |
320 | kfree(s); | 320 | kfree(s); |
321 | break; | 321 | break; |
322 | } | 322 | } |
323 | } | 323 | } |
324 | up_write(&listeners->sem); | 324 | up_write(&listeners->sem); |
325 | } | 325 | } |
326 | return 0; | 326 | return 0; |
327 | } | 327 | } |
328 | 328 | ||
329 | static int parse(struct nlattr *na, cpumask_t *mask) | 329 | static int parse(struct nlattr *na, cpumask_t *mask) |
330 | { | 330 | { |
331 | char *data; | 331 | char *data; |
332 | int len; | 332 | int len; |
333 | int ret; | 333 | int ret; |
334 | 334 | ||
335 | if (na == NULL) | 335 | if (na == NULL) |
336 | return 1; | 336 | return 1; |
337 | len = nla_len(na); | 337 | len = nla_len(na); |
338 | if (len > TASKSTATS_CPUMASK_MAXLEN) | 338 | if (len > TASKSTATS_CPUMASK_MAXLEN) |
339 | return -E2BIG; | 339 | return -E2BIG; |
340 | if (len < 1) | 340 | if (len < 1) |
341 | return -EINVAL; | 341 | return -EINVAL; |
342 | data = kmalloc(len, GFP_KERNEL); | 342 | data = kmalloc(len, GFP_KERNEL); |
343 | if (!data) | 343 | if (!data) |
344 | return -ENOMEM; | 344 | return -ENOMEM; |
345 | nla_strlcpy(data, na, len); | 345 | nla_strlcpy(data, na, len); |
346 | ret = cpulist_parse(data, *mask); | 346 | ret = cpulist_parse(data, *mask); |
347 | kfree(data); | 347 | kfree(data); |
348 | return ret; | 348 | return ret; |
349 | } | 349 | } |
350 | 350 | ||
351 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | 351 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) |
352 | { | 352 | { |
353 | int rc = 0; | 353 | int rc = 0; |
354 | struct sk_buff *rep_skb; | 354 | struct sk_buff *rep_skb; |
355 | struct taskstats stats; | 355 | struct taskstats stats; |
356 | void *reply; | 356 | void *reply; |
357 | size_t size; | 357 | size_t size; |
358 | struct nlattr *na; | 358 | struct nlattr *na; |
359 | cpumask_t mask; | 359 | cpumask_t mask; |
360 | 360 | ||
361 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask); | 361 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask); |
362 | if (rc < 0) | 362 | if (rc < 0) |
363 | return rc; | 363 | return rc; |
364 | if (rc == 0) | 364 | if (rc == 0) |
365 | return add_del_listener(info->snd_pid, &mask, REGISTER); | 365 | return add_del_listener(info->snd_pid, &mask, REGISTER); |
366 | 366 | ||
367 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask); | 367 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask); |
368 | if (rc < 0) | 368 | if (rc < 0) |
369 | return rc; | 369 | return rc; |
370 | if (rc == 0) | 370 | if (rc == 0) |
371 | return add_del_listener(info->snd_pid, &mask, DEREGISTER); | 371 | return add_del_listener(info->snd_pid, &mask, DEREGISTER); |
372 | 372 | ||
373 | /* | 373 | /* |
374 | * Size includes space for nested attributes | 374 | * Size includes space for nested attributes |
375 | */ | 375 | */ |
376 | size = nla_total_size(sizeof(u32)) + | 376 | size = nla_total_size(sizeof(u32)) + |
377 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | 377 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); |
378 | 378 | ||
379 | memset(&stats, 0, sizeof(stats)); | 379 | memset(&stats, 0, sizeof(stats)); |
380 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); | 380 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); |
381 | if (rc < 0) | 381 | if (rc < 0) |
382 | return rc; | 382 | return rc; |
383 | 383 | ||
384 | if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { | 384 | if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { |
385 | u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); | 385 | u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); |
386 | rc = fill_pid(pid, NULL, &stats); | 386 | rc = fill_pid(pid, NULL, &stats); |
387 | if (rc < 0) | 387 | if (rc < 0) |
388 | goto err; | 388 | goto err; |
389 | 389 | ||
390 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); | 390 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); |
391 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid); | 391 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid); |
392 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | 392 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, |
393 | stats); | 393 | stats); |
394 | } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { | 394 | } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { |
395 | u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); | 395 | u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); |
396 | rc = fill_tgid(tgid, NULL, &stats); | 396 | rc = fill_tgid(tgid, NULL, &stats); |
397 | if (rc < 0) | 397 | if (rc < 0) |
398 | goto err; | 398 | goto err; |
399 | 399 | ||
400 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); | 400 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); |
401 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, tgid); | 401 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, tgid); |
402 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | 402 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, |
403 | stats); | 403 | stats); |
404 | } else { | 404 | } else { |
405 | rc = -EINVAL; | 405 | rc = -EINVAL; |
406 | goto err; | 406 | goto err; |
407 | } | 407 | } |
408 | 408 | ||
409 | nla_nest_end(rep_skb, na); | 409 | nla_nest_end(rep_skb, na); |
410 | 410 | ||
411 | return send_reply(rep_skb, info->snd_pid); | 411 | return send_reply(rep_skb, info->snd_pid); |
412 | 412 | ||
413 | nla_put_failure: | 413 | nla_put_failure: |
414 | return genlmsg_cancel(rep_skb, reply); | 414 | return genlmsg_cancel(rep_skb, reply); |
415 | err: | 415 | err: |
416 | nlmsg_free(rep_skb); | 416 | nlmsg_free(rep_skb); |
417 | return rc; | 417 | return rc; |
418 | } | 418 | } |
419 | 419 | ||
420 | void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu) | 420 | void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu) |
421 | { | 421 | { |
422 | struct listener_list *listeners; | 422 | struct listener_list *listeners; |
423 | struct taskstats *tmp; | 423 | struct taskstats *tmp; |
424 | /* | 424 | /* |
425 | * This is the cpu on which the task is exiting currently and will | 425 | * This is the cpu on which the task is exiting currently and will |
426 | * be the one for which the exit event is sent, even if the cpu | 426 | * be the one for which the exit event is sent, even if the cpu |
427 | * on which this function is running changes later. | 427 | * on which this function is running changes later. |
428 | */ | 428 | */ |
429 | *mycpu = raw_smp_processor_id(); | 429 | *mycpu = raw_smp_processor_id(); |
430 | 430 | ||
431 | *ptidstats = NULL; | 431 | *ptidstats = NULL; |
432 | tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); | 432 | tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); |
433 | if (!tmp) | 433 | if (!tmp) |
434 | return; | 434 | return; |
435 | 435 | ||
436 | listeners = &per_cpu(listener_array, *mycpu); | 436 | listeners = &per_cpu(listener_array, *mycpu); |
437 | down_read(&listeners->sem); | 437 | down_read(&listeners->sem); |
438 | if (!list_empty(&listeners->list)) { | 438 | if (!list_empty(&listeners->list)) { |
439 | *ptidstats = tmp; | 439 | *ptidstats = tmp; |
440 | tmp = NULL; | 440 | tmp = NULL; |
441 | } | 441 | } |
442 | up_read(&listeners->sem); | 442 | up_read(&listeners->sem); |
443 | kfree(tmp); | 443 | kfree(tmp); |
444 | } | 444 | } |
445 | 445 | ||
446 | /* Send pid data out on exit */ | 446 | /* Send pid data out on exit */ |
447 | void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, | 447 | void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, |
448 | int group_dead, unsigned int mycpu) | 448 | int group_dead, unsigned int mycpu) |
449 | { | 449 | { |
450 | int rc; | 450 | int rc; |
451 | struct sk_buff *rep_skb; | 451 | struct sk_buff *rep_skb; |
452 | void *reply; | 452 | void *reply; |
453 | size_t size; | 453 | size_t size; |
454 | int is_thread_group; | 454 | int is_thread_group; |
455 | struct nlattr *na; | 455 | struct nlattr *na; |
456 | unsigned long flags; | 456 | unsigned long flags; |
457 | 457 | ||
458 | if (!family_registered || !tidstats) | 458 | if (!family_registered || !tidstats) |
459 | return; | 459 | return; |
460 | 460 | ||
461 | spin_lock_irqsave(&tsk->signal->stats_lock, flags); | 461 | spin_lock_irqsave(&tsk->signal->stats_lock, flags); |
462 | is_thread_group = tsk->signal->stats ? 1 : 0; | 462 | is_thread_group = tsk->signal->stats ? 1 : 0; |
463 | spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); | 463 | spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); |
464 | 464 | ||
465 | rc = 0; | 465 | rc = 0; |
466 | /* | 466 | /* |
467 | * Size includes space for nested attributes | 467 | * Size includes space for nested attributes |
468 | */ | 468 | */ |
469 | size = nla_total_size(sizeof(u32)) + | 469 | size = nla_total_size(sizeof(u32)) + |
470 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | 470 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); |
471 | 471 | ||
472 | if (is_thread_group) | 472 | if (is_thread_group) |
473 | size = 2 * size; /* PID + STATS + TGID + STATS */ | 473 | size = 2 * size; /* PID + STATS + TGID + STATS */ |
474 | 474 | ||
475 | rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); | 475 | rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); |
476 | if (rc < 0) | 476 | if (rc < 0) |
477 | goto ret; | 477 | goto ret; |
478 | 478 | ||
479 | rc = fill_pid(tsk->pid, tsk, tidstats); | 479 | rc = fill_pid(tsk->pid, tsk, tidstats); |
480 | if (rc < 0) | 480 | if (rc < 0) |
481 | goto err_skb; | 481 | goto err_skb; |
482 | 482 | ||
483 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); | 483 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); |
484 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid); | 484 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid); |
485 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | 485 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, |
486 | *tidstats); | 486 | *tidstats); |
487 | nla_nest_end(rep_skb, na); | 487 | nla_nest_end(rep_skb, na); |
488 | 488 | ||
489 | if (!is_thread_group) | 489 | if (!is_thread_group) |
490 | goto send; | 490 | goto send; |
491 | 491 | ||
492 | /* | 492 | /* |
493 | * tsk has/had a thread group so fill the tsk->signal->stats structure | 493 | * tsk has/had a thread group so fill the tsk->signal->stats structure |
494 | * Doesn't matter if tsk is the leader or the last group member leaving | 494 | * Doesn't matter if tsk is the leader or the last group member leaving |
495 | */ | 495 | */ |
496 | 496 | ||
497 | fill_tgid_exit(tsk); | 497 | fill_tgid_exit(tsk); |
498 | if (!group_dead) | 498 | if (!group_dead) |
499 | goto send; | 499 | goto send; |
500 | 500 | ||
501 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); | 501 | na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); |
502 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid); | 502 | NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid); |
503 | /* No locking needed for tsk->signal->stats since group is dead */ | 503 | /* No locking needed for tsk->signal->stats since group is dead */ |
504 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, | 504 | NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, |
505 | *tsk->signal->stats); | 505 | *tsk->signal->stats); |
506 | nla_nest_end(rep_skb, na); | 506 | nla_nest_end(rep_skb, na); |
507 | 507 | ||
508 | send: | 508 | send: |
509 | send_cpu_listeners(rep_skb, mycpu); | 509 | send_cpu_listeners(rep_skb, mycpu); |
510 | return; | 510 | return; |
511 | 511 | ||
512 | nla_put_failure: | 512 | nla_put_failure: |
513 | genlmsg_cancel(rep_skb, reply); | 513 | genlmsg_cancel(rep_skb, reply); |
514 | goto ret; | 514 | goto ret; |
515 | err_skb: | 515 | err_skb: |
516 | nlmsg_free(rep_skb); | 516 | nlmsg_free(rep_skb); |
517 | ret: | 517 | ret: |
518 | return; | 518 | return; |
519 | } | 519 | } |
520 | 520 | ||
521 | static struct genl_ops taskstats_ops = { | 521 | static struct genl_ops taskstats_ops = { |
522 | .cmd = TASKSTATS_CMD_GET, | 522 | .cmd = TASKSTATS_CMD_GET, |
523 | .doit = taskstats_user_cmd, | 523 | .doit = taskstats_user_cmd, |
524 | .policy = taskstats_cmd_get_policy, | 524 | .policy = taskstats_cmd_get_policy, |
525 | }; | 525 | }; |
526 | 526 | ||
527 | /* Needed early in initialization */ | 527 | /* Needed early in initialization */ |
528 | void __init taskstats_init_early(void) | 528 | void __init taskstats_init_early(void) |
529 | { | 529 | { |
530 | unsigned int i; | 530 | unsigned int i; |
531 | 531 | ||
532 | taskstats_cache = kmem_cache_create("taskstats_cache", | 532 | taskstats_cache = kmem_cache_create("taskstats_cache", |
533 | sizeof(struct taskstats), | 533 | sizeof(struct taskstats), |
534 | 0, SLAB_PANIC, NULL, NULL); | 534 | 0, SLAB_PANIC, NULL, NULL); |
535 | for_each_possible_cpu(i) { | 535 | for_each_possible_cpu(i) { |
536 | INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); | 536 | INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); |
537 | init_rwsem(&(per_cpu(listener_array, i).sem)); | 537 | init_rwsem(&(per_cpu(listener_array, i).sem)); |
538 | } | 538 | } |
539 | } | 539 | } |
540 | 540 | ||
541 | static int __init taskstats_init(void) | 541 | static int __init taskstats_init(void) |
542 | { | 542 | { |
543 | int rc; | 543 | int rc; |
544 | 544 | ||
545 | rc = genl_register_family(&family); | 545 | rc = genl_register_family(&family); |
546 | if (rc) | 546 | if (rc) |
547 | return rc; | 547 | return rc; |
548 | 548 | ||
549 | rc = genl_register_ops(&family, &taskstats_ops); | 549 | rc = genl_register_ops(&family, &taskstats_ops); |
550 | if (rc < 0) | 550 | if (rc < 0) |
551 | goto err; | 551 | goto err; |
552 | 552 | ||
553 | family_registered = 1; | 553 | family_registered = 1; |
554 | return 0; | 554 | return 0; |
555 | err: | 555 | err: |
556 | genl_unregister_family(&family); | 556 | genl_unregister_family(&family); |
557 | return rc; | 557 | return rc; |
558 | } | 558 | } |
559 | 559 | ||
560 | /* | 560 | /* |
561 | * late initcall ensures initialization of statistics collection | 561 | * late initcall ensures initialization of statistics collection |
562 | * mechanisms precedes initialization of the taskstats interface | 562 | * mechanisms precedes initialization of the taskstats interface |
563 | */ | 563 | */ |
564 | late_initcall(taskstats_init); | 564 | late_initcall(taskstats_init); |
565 | 565 |