Commit 096b7fe012d66ed55e98bc8022405ede0cc80e96

Authored by Li Zefan
Committed by Linus Torvalds
1 parent b317c83321

cgroups: fix pid namespace bug

The bug was introduced by commit cc31edceee04a7b87f2be48f9489ebb72d264844
("cgroups: convert tasks file to use a seq_file with shared pid array").

We cache a pid array for all threads that are opening the same "tasks"
file, but the pids in the array are always from the namespace of the
last process that opened the file, so all other threads will read pids
from that namespace instead of their own namespaces.

To fix it, we maintain a list of pid arrays, which is keyed by pid_ns.
The list will be of length 1 at most time.

Reported-by: Paul Menage <menage@google.com>
Idea-by: Paul Menage <menage@google.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: Serge Hallyn <serue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 2 changed files with 76 additions and 31 deletions Side-by-side Diff

include/linux/cgroup.h
... ... @@ -179,14 +179,11 @@
179 179 */
180 180 struct list_head release_list;
181 181  
182   - /* pids_mutex protects the fields below */
  182 + /* pids_mutex protects pids_list and cached pid arrays. */
183 183 struct rw_semaphore pids_mutex;
184   - /* Array of process ids in the cgroup */
185   - pid_t *tasks_pids;
186   - /* How many files are using the current tasks_pids array */
187   - int pids_use_count;
188   - /* Length of the current tasks_pids array */
189   - int pids_length;
  184 +
  185 + /* Linked list of struct cgroup_pids */
  186 + struct list_head pids_list;
190 187  
191 188 /* For RCU-protected deletion */
192 189 struct rcu_head rcu_head;
... ... @@ -47,6 +47,7 @@
47 47 #include <linux/hash.h>
48 48 #include <linux/namei.h>
49 49 #include <linux/smp_lock.h>
  50 +#include <linux/pid_namespace.h>
50 51  
51 52 #include <asm/atomic.h>
52 53  
... ... @@ -960,6 +961,7 @@
960 961 INIT_LIST_HEAD(&cgrp->children);
961 962 INIT_LIST_HEAD(&cgrp->css_sets);
962 963 INIT_LIST_HEAD(&cgrp->release_list);
  964 + INIT_LIST_HEAD(&cgrp->pids_list);
963 965 init_rwsem(&cgrp->pids_mutex);
964 966 }
965 967 static void init_cgroup_root(struct cgroupfs_root *root)
966 968  
... ... @@ -2201,12 +2203,30 @@
2201 2203 return ret;
2202 2204 }
2203 2205  
  2206 +/*
  2207 + * Cache pids for all threads in the same pid namespace that are
  2208 + * opening the same "tasks" file.
  2209 + */
  2210 +struct cgroup_pids {
  2211 + /* The node in cgrp->pids_list */
  2212 + struct list_head list;
  2213 + /* The cgroup those pids belong to */
  2214 + struct cgroup *cgrp;
  2215 + /* The namepsace those pids belong to */
  2216 + struct pid_namespace *ns;
  2217 + /* Array of process ids in the cgroup */
  2218 + pid_t *tasks_pids;
  2219 + /* How many files are using the this tasks_pids array */
  2220 + int use_count;
  2221 + /* Length of the current tasks_pids array */
  2222 + int length;
  2223 +};
  2224 +
2204 2225 static int cmppid(const void *a, const void *b)
2205 2226 {
2206 2227 return *(pid_t *)a - *(pid_t *)b;
2207 2228 }
2208 2229  
2209   -
2210 2230 /*
2211 2231 * seq_file methods for the "tasks" file. The seq_file position is the
2212 2232 * next pid to display; the seq_file iterator is a pointer to the pid
2213 2233  
2214 2234  
2215 2235  
2216 2236  
2217 2237  
2218 2238  
2219 2239  
2220 2240  
... ... @@ -2221,45 +2241,47 @@
2221 2241 * after a seek to the start). Use a binary-search to find the
2222 2242 * next pid to display, if any
2223 2243 */
2224   - struct cgroup *cgrp = s->private;
  2244 + struct cgroup_pids *cp = s->private;
  2245 + struct cgroup *cgrp = cp->cgrp;
2225 2246 int index = 0, pid = *pos;
2226 2247 int *iter;
2227 2248  
2228 2249 down_read(&cgrp->pids_mutex);
2229 2250 if (pid) {
2230   - int end = cgrp->pids_length;
  2251 + int end = cp->length;
2231 2252  
2232 2253 while (index < end) {
2233 2254 int mid = (index + end) / 2;
2234   - if (cgrp->tasks_pids[mid] == pid) {
  2255 + if (cp->tasks_pids[mid] == pid) {
2235 2256 index = mid;
2236 2257 break;
2237   - } else if (cgrp->tasks_pids[mid] <= pid)
  2258 + } else if (cp->tasks_pids[mid] <= pid)
2238 2259 index = mid + 1;
2239 2260 else
2240 2261 end = mid;
2241 2262 }
2242 2263 }
2243 2264 /* If we're off the end of the array, we're done */
2244   - if (index >= cgrp->pids_length)
  2265 + if (index >= cp->length)
2245 2266 return NULL;
2246 2267 /* Update the abstract position to be the actual pid that we found */
2247   - iter = cgrp->tasks_pids + index;
  2268 + iter = cp->tasks_pids + index;
2248 2269 *pos = *iter;
2249 2270 return iter;
2250 2271 }
2251 2272  
2252 2273 static void cgroup_tasks_stop(struct seq_file *s, void *v)
2253 2274 {
2254   - struct cgroup *cgrp = s->private;
  2275 + struct cgroup_pids *cp = s->private;
  2276 + struct cgroup *cgrp = cp->cgrp;
2255 2277 up_read(&cgrp->pids_mutex);
2256 2278 }
2257 2279  
2258 2280 static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
2259 2281 {
2260   - struct cgroup *cgrp = s->private;
  2282 + struct cgroup_pids *cp = s->private;
2261 2283 int *p = v;
2262   - int *end = cgrp->tasks_pids + cgrp->pids_length;
  2284 + int *end = cp->tasks_pids + cp->length;
2263 2285  
2264 2286 /*
2265 2287 * Advance to the next pid in the array. If this goes off the
2266 2288  
2267 2289  
2268 2290  
2269 2291  
... ... @@ -2286,26 +2308,33 @@
2286 2308 .show = cgroup_tasks_show,
2287 2309 };
2288 2310  
2289   -static void release_cgroup_pid_array(struct cgroup *cgrp)
  2311 +static void release_cgroup_pid_array(struct cgroup_pids *cp)
2290 2312 {
  2313 + struct cgroup *cgrp = cp->cgrp;
  2314 +
2291 2315 down_write(&cgrp->pids_mutex);
2292   - BUG_ON(!cgrp->pids_use_count);
2293   - if (!--cgrp->pids_use_count) {
2294   - kfree(cgrp->tasks_pids);
2295   - cgrp->tasks_pids = NULL;
2296   - cgrp->pids_length = 0;
  2316 + BUG_ON(!cp->use_count);
  2317 + if (!--cp->use_count) {
  2318 + list_del(&cp->list);
  2319 + put_pid_ns(cp->ns);
  2320 + kfree(cp->tasks_pids);
  2321 + kfree(cp);
2297 2322 }
2298 2323 up_write(&cgrp->pids_mutex);
2299 2324 }
2300 2325  
2301 2326 static int cgroup_tasks_release(struct inode *inode, struct file *file)
2302 2327 {
2303   - struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
  2328 + struct seq_file *seq;
  2329 + struct cgroup_pids *cp;
2304 2330  
2305 2331 if (!(file->f_mode & FMODE_READ))
2306 2332 return 0;
2307 2333  
2308   - release_cgroup_pid_array(cgrp);
  2334 + seq = file->private_data;
  2335 + cp = seq->private;
  2336 +
  2337 + release_cgroup_pid_array(cp);
2309 2338 return seq_release(inode, file);
2310 2339 }
2311 2340  
... ... @@ -2324,6 +2353,8 @@
2324 2353 static int cgroup_tasks_open(struct inode *unused, struct file *file)
2325 2354 {
2326 2355 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
  2356 + struct pid_namespace *ns = current->nsproxy->pid_ns;
  2357 + struct cgroup_pids *cp;
2327 2358 pid_t *pidarray;
2328 2359 int npids;
2329 2360 int retval;
2330 2361  
2331 2362  
... ... @@ -2350,20 +2381,37 @@
2350 2381 * array if necessary
2351 2382 */
2352 2383 down_write(&cgrp->pids_mutex);
2353   - kfree(cgrp->tasks_pids);
2354   - cgrp->tasks_pids = pidarray;
2355   - cgrp->pids_length = npids;
2356   - cgrp->pids_use_count++;
  2384 +
  2385 + list_for_each_entry(cp, &cgrp->pids_list, list) {
  2386 + if (ns == cp->ns)
  2387 + goto found;
  2388 + }
  2389 +
  2390 + cp = kzalloc(sizeof(*cp), GFP_KERNEL);
  2391 + if (!cp) {
  2392 + up_write(&cgrp->pids_mutex);
  2393 + kfree(pidarray);
  2394 + return -ENOMEM;
  2395 + }
  2396 + cp->cgrp = cgrp;
  2397 + cp->ns = ns;
  2398 + get_pid_ns(ns);
  2399 + list_add(&cp->list, &cgrp->pids_list);
  2400 +found:
  2401 + kfree(cp->tasks_pids);
  2402 + cp->tasks_pids = pidarray;
  2403 + cp->length = npids;
  2404 + cp->use_count++;
2357 2405 up_write(&cgrp->pids_mutex);
2358 2406  
2359 2407 file->f_op = &cgroup_tasks_operations;
2360 2408  
2361 2409 retval = seq_open(file, &cgroup_tasks_seq_operations);
2362 2410 if (retval) {
2363   - release_cgroup_pid_array(cgrp);
  2411 + release_cgroup_pid_array(cp);
2364 2412 return retval;
2365 2413 }
2366   - ((struct seq_file *)file->private_data)->private = cgrp;
  2414 + ((struct seq_file *)file->private_data)->private = cp;
2367 2415 return 0;
2368 2416 }
2369 2417