Commit 1651e14e28a2d9f446018ef522882e0709a2ce4f

Authored by Serge E. Hallyn
Committed by Linus Torvalds
1 parent 0437eb594e

[PATCH] namespaces: incorporate fs namespace into nsproxy

This moves the mount namespace into the nsproxy.  The mount namespace count
now refers to the number of nsproxies point to it, rather than the number of
tasks.  As a result, the unshare_namespace() function in kernel/fork.c no
longer checks whether it is being shared.

Signed-off-by: Serge Hallyn <serue@us.ibm.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Andrey Savochkin <saw@sw.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 9 changed files with 55 additions and 39 deletions Side-by-side Diff

... ... @@ -133,7 +133,7 @@
133 133  
134 134 static inline int check_mnt(struct vfsmount *mnt)
135 135 {
136   - return mnt->mnt_namespace == current->namespace;
  136 + return mnt->mnt_namespace == current->nsproxy->namespace;
137 137 }
138 138  
139 139 static void touch_namespace(struct namespace *ns)
... ... @@ -830,7 +830,7 @@
830 830 if (parent_nd) {
831 831 detach_mnt(source_mnt, parent_nd);
832 832 attach_mnt(source_mnt, nd);
833   - touch_namespace(current->namespace);
  833 + touch_namespace(current->nsproxy->namespace);
834 834 } else {
835 835 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
836 836 commit_tree(source_mnt);
... ... @@ -1441,7 +1441,7 @@
1441 1441 */
1442 1442 struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
1443 1443 {
1444   - struct namespace *namespace = tsk->namespace;
  1444 + struct namespace *namespace = tsk->nsproxy->namespace;
1445 1445 struct namespace *new_ns;
1446 1446 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
1447 1447 struct vfsmount *p, *q;
... ... @@ -1508,7 +1508,7 @@
1508 1508  
1509 1509 int copy_namespace(int flags, struct task_struct *tsk)
1510 1510 {
1511   - struct namespace *namespace = tsk->namespace;
  1511 + struct namespace *namespace = tsk->nsproxy->namespace;
1512 1512 struct namespace *new_ns;
1513 1513 int err = 0;
1514 1514  
... ... @@ -1531,7 +1531,7 @@
1531 1531 goto out;
1532 1532 }
1533 1533  
1534   - tsk->namespace = new_ns;
  1534 + tsk->nsproxy->namespace = new_ns;
1535 1535  
1536 1536 out:
1537 1537 put_namespace(namespace);
... ... @@ -1754,7 +1754,7 @@
1754 1754 detach_mnt(user_nd.mnt, &root_parent);
1755 1755 attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */
1756 1756 attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */
1757   - touch_namespace(current->namespace);
  1757 + touch_namespace(current->nsproxy->namespace);
1758 1758 spin_unlock(&vfsmount_lock);
1759 1759 chroot_fs_refs(&user_nd, &new_nd);
1760 1760 security_sb_post_pivotroot(&user_nd, &new_nd);
... ... @@ -1780,7 +1780,6 @@
1780 1780 {
1781 1781 struct vfsmount *mnt;
1782 1782 struct namespace *namespace;
1783   - struct task_struct *g, *p;
1784 1783  
1785 1784 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
1786 1785 if (IS_ERR(mnt))
... ... @@ -1796,13 +1795,8 @@
1796 1795 namespace->root = mnt;
1797 1796 mnt->mnt_namespace = namespace;
1798 1797  
1799   - init_task.namespace = namespace;
1800   - read_lock(&tasklist_lock);
1801   - do_each_thread(g, p) {
1802   - get_namespace(namespace);
1803   - p->namespace = namespace;
1804   - } while_each_thread(g, p);
1805   - read_unlock(&tasklist_lock);
  1798 + init_task.nsproxy->namespace = namespace;
  1799 + get_namespace(namespace);
1806 1800  
1807 1801 set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root);
1808 1802 set_fs_root(current->fs, namespace->root, namespace->root->mnt_root);
... ... @@ -71,6 +71,7 @@
71 71 #include <linux/cpuset.h>
72 72 #include <linux/audit.h>
73 73 #include <linux/poll.h>
  74 +#include <linux/nsproxy.h>
74 75 #include "internal.h"
75 76  
76 77 /* NOTE:
... ... @@ -473,7 +474,7 @@
473 474  
474 475 if (task) {
475 476 task_lock(task);
476   - namespace = task->namespace;
  477 + namespace = task->nsproxy->namespace;
477 478 if (namespace)
478 479 get_namespace(namespace);
479 480 task_unlock(task);
... ... @@ -544,7 +545,7 @@
544 545  
545 546 if (task) {
546 547 task_lock(task);
547   - namespace = task->namespace;
  548 + namespace = task->nsproxy->namespace;
548 549 if (namespace)
549 550 get_namespace(namespace);
550 551 task_unlock(task);
include/linux/init_task.h
... ... @@ -72,6 +72,7 @@
72 72 #define INIT_NSPROXY(nsproxy) { \
73 73 .count = ATOMIC_INIT(1), \
74 74 .nslock = SPIN_LOCK_UNLOCKED, \
  75 + .namespace = NULL, \
75 76 }
76 77  
77 78 #define INIT_SIGHAND(sighand) { \
include/linux/namespace.h
... ... @@ -4,6 +4,7 @@
4 4  
5 5 #include <linux/mount.h>
6 6 #include <linux/sched.h>
  7 +#include <linux/nsproxy.h>
7 8  
8 9 struct namespace {
9 10 atomic_t count;
10 11  
... ... @@ -26,11 +27,8 @@
26 27  
27 28 static inline void exit_namespace(struct task_struct *p)
28 29 {
29   - struct namespace *namespace = p->namespace;
  30 + struct namespace *namespace = p->nsproxy->namespace;
30 31 if (namespace) {
31   - task_lock(p);
32   - p->namespace = NULL;
33   - task_unlock(p);
34 32 put_namespace(namespace);
35 33 }
36 34 }
include/linux/nsproxy.h
... ... @@ -4,6 +4,8 @@
4 4 #include <linux/spinlock.h>
5 5 #include <linux/sched.h>
6 6  
  7 +struct namespace;
  8 +
7 9 /*
8 10 * A structure to contain pointers to all per-process
9 11 * namespaces - fs (mount), uts, network, sysvipc, etc.
... ... @@ -19,6 +21,7 @@
19 21 struct nsproxy {
20 22 atomic_t count;
21 23 spinlock_t nslock;
  24 + struct namespace *namespace;
22 25 };
23 26 extern struct nsproxy init_nsproxy;
24 27  
include/linux/sched.h
... ... @@ -238,7 +238,6 @@
238 238 extern signed long schedule_timeout_uninterruptible(signed long timeout);
239 239 asmlinkage void schedule(void);
240 240  
241   -struct namespace;
242 241 struct nsproxy;
243 242  
244 243 /* Maximum number of active map areas.. This is a random (large) number */
... ... @@ -897,8 +896,7 @@
897 896 struct fs_struct *fs;
898 897 /* open file information */
899 898 struct files_struct *files;
900   -/* namespace */
901   - struct namespace *namespace;
  899 +/* namespaces */
902 900 struct nsproxy *nsproxy;
903 901 /* signal handlers */
904 902 struct signal_struct *signal;
... ... @@ -399,11 +399,8 @@
399 399 current->fs = fs;
400 400 atomic_inc(&fs->count);
401 401  
402   - exit_namespace(current);
403 402 exit_task_namespaces(current);
404   - current->namespace = init_task.namespace;
405 403 current->nsproxy = init_task.nsproxy;
406   - get_namespace(current->namespace);
407 404 get_task_namespaces(current);
408 405  
409 406 exit_files(current);
... ... @@ -923,7 +920,6 @@
923 920 exit_sem(tsk);
924 921 __exit_files(tsk);
925 922 __exit_fs(tsk);
926   - exit_namespace(tsk);
927 923 exit_task_namespaces(tsk);
928 924 exit_thread();
929 925 cpuset_exit(tsk);
... ... @@ -1119,11 +1119,9 @@
1119 1119 goto bad_fork_cleanup_mm;
1120 1120 if ((retval = copy_namespaces(clone_flags, p)))
1121 1121 goto bad_fork_cleanup_keys;
1122   - if ((retval = copy_namespace(clone_flags, p)))
1123   - goto bad_fork_cleanup_namespaces;
1124 1122 retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
1125 1123 if (retval)
1126   - goto bad_fork_cleanup_namespace;
  1124 + goto bad_fork_cleanup_namespaces;
1127 1125  
1128 1126 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1129 1127 /*
... ... @@ -1215,7 +1213,7 @@
1215 1213 spin_unlock(&current->sighand->siglock);
1216 1214 write_unlock_irq(&tasklist_lock);
1217 1215 retval = -ERESTARTNOINTR;
1218   - goto bad_fork_cleanup_namespace;
  1216 + goto bad_fork_cleanup_namespaces;
1219 1217 }
1220 1218  
1221 1219 if (clone_flags & CLONE_THREAD) {
... ... @@ -1263,8 +1261,6 @@
1263 1261 proc_fork_connector(p);
1264 1262 return p;
1265 1263  
1266   -bad_fork_cleanup_namespace:
1267   - exit_namespace(p);
1268 1264 bad_fork_cleanup_namespaces:
1269 1265 exit_task_namespaces(p);
1270 1266 bad_fork_cleanup_keys:
1271 1267  
... ... @@ -1519,10 +1515,9 @@
1519 1515 */
1520 1516 static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
1521 1517 {
1522   - struct namespace *ns = current->namespace;
  1518 + struct namespace *ns = current->nsproxy->namespace;
1523 1519  
1524   - if ((unshare_flags & CLONE_NEWNS) &&
1525   - (ns && atomic_read(&ns->count) > 1)) {
  1520 + if ((unshare_flags & CLONE_NEWNS) && ns) {
1526 1521 if (!capable(CAP_SYS_ADMIN))
1527 1522 return -EPERM;
1528 1523  
... ... @@ -1655,8 +1650,8 @@
1655 1650 }
1656 1651  
1657 1652 if (new_ns) {
1658   - ns = current->namespace;
1659   - current->namespace = new_ns;
  1653 + ns = current->nsproxy->namespace;
  1654 + current->nsproxy->namespace = new_ns;
1660 1655 new_ns = ns;
1661 1656 }
1662 1657  
... ... @@ -13,6 +13,7 @@
13 13 #include <linux/version.h>
14 14 #include <linux/nsproxy.h>
15 15 #include <linux/init_task.h>
  16 +#include <linux/namespace.h>
16 17  
17 18 struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
18 19  
... ... @@ -55,6 +56,11 @@
55 56 {
56 57 struct nsproxy *ns = clone_namespaces(orig);
57 58  
  59 + if (ns) {
  60 + if (ns->namespace)
  61 + get_namespace(ns->namespace);
  62 + }
  63 +
58 64 return ns;
59 65 }
60 66  
61 67  
62 68  
... ... @@ -65,17 +71,41 @@
65 71 int copy_namespaces(int flags, struct task_struct *tsk)
66 72 {
67 73 struct nsproxy *old_ns = tsk->nsproxy;
  74 + struct nsproxy *new_ns;
  75 + int err = 0;
68 76  
69 77 if (!old_ns)
70 78 return 0;
71 79  
72 80 get_nsproxy(old_ns);
73 81  
74   - return 0;
  82 + if (!(flags & CLONE_NEWNS))
  83 + return 0;
  84 +
  85 + new_ns = clone_namespaces(old_ns);
  86 + if (!new_ns) {
  87 + err = -ENOMEM;
  88 + goto out;
  89 + }
  90 +
  91 + tsk->nsproxy = new_ns;
  92 +
  93 + err = copy_namespace(flags, tsk);
  94 + if (err) {
  95 + tsk->nsproxy = old_ns;
  96 + put_nsproxy(new_ns);
  97 + goto out;
  98 + }
  99 +
  100 +out:
  101 + put_nsproxy(old_ns);
  102 + return err;
75 103 }
76 104  
77 105 void free_nsproxy(struct nsproxy *ns)
78 106 {
  107 + if (ns->namespace)
  108 + put_namespace(ns->namespace);
79 109 kfree(ns);
80 110 }