Commit b90fa9ae8f51f098ee480bbaabd6867992e9fc58

Authored by Ram Pai
Committed by Linus Torvalds
1 parent 03e06e68ff

[PATCH] shared mount handling: bind and rbind

Implement handling of MS_BIND in presense of shared mounts (see
Documentation/sharedsubtree.txt in the end of patch series for detailed
description).

Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 4 changed files with 204 additions and 22 deletions Side-by-side Diff

... ... @@ -28,8 +28,6 @@
28 28  
29 29 extern int __init init_rootfs(void);
30 30  
31   -#define CL_EXPIRE 0x01
32   -
33 31 #ifdef CONFIG_SYSFS
34 32 extern int __init sysfs_init(void);
35 33 #else
36 34  
37 35  
38 36  
... ... @@ -145,15 +143,45 @@
145 143 old_nd->dentry->d_mounted--;
146 144 }
147 145  
  146 +void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
  147 + struct vfsmount *child_mnt)
  148 +{
  149 + child_mnt->mnt_parent = mntget(mnt);
  150 + child_mnt->mnt_mountpoint = dget(dentry);
  151 + dentry->d_mounted++;
  152 +}
  153 +
148 154 static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd)
149 155 {
150   - mnt->mnt_parent = mntget(nd->mnt);
151   - mnt->mnt_mountpoint = dget(nd->dentry);
152   - list_add(&mnt->mnt_hash, mount_hashtable + hash(nd->mnt, nd->dentry));
  156 + mnt_set_mountpoint(nd->mnt, nd->dentry, mnt);
  157 + list_add_tail(&mnt->mnt_hash, mount_hashtable +
  158 + hash(nd->mnt, nd->dentry));
153 159 list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
154   - nd->dentry->d_mounted++;
155 160 }
156 161  
  162 +/*
  163 + * the caller must hold vfsmount_lock
  164 + */
  165 +static void commit_tree(struct vfsmount *mnt)
  166 +{
  167 + struct vfsmount *parent = mnt->mnt_parent;
  168 + struct vfsmount *m;
  169 + LIST_HEAD(head);
  170 + struct namespace *n = parent->mnt_namespace;
  171 +
  172 + BUG_ON(parent == mnt);
  173 +
  174 + list_add_tail(&head, &mnt->mnt_list);
  175 + list_for_each_entry(m, &head, mnt_list)
  176 + m->mnt_namespace = n;
  177 + list_splice(&head, n->list.prev);
  178 +
  179 + list_add_tail(&mnt->mnt_hash, mount_hashtable +
  180 + hash(parent, mnt->mnt_mountpoint));
  181 + list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
  182 + touch_namespace(n);
  183 +}
  184 +
157 185 static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
158 186 {
159 187 struct list_head *next = p->mnt_mounts.next;
160 188  
... ... @@ -183,8 +211,12 @@
183 211 mnt->mnt_root = dget(root);
184 212 mnt->mnt_mountpoint = mnt->mnt_root;
185 213 mnt->mnt_parent = mnt;
186   - mnt->mnt_namespace = current->namespace;
187 214  
  215 + if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
  216 + list_add(&mnt->mnt_share, &old->mnt_share);
  217 + if (flag & CL_MAKE_SHARED)
  218 + set_mnt_shared(mnt);
  219 +
188 220 /* stick the duplicate mount on the same expiry list
189 221 * as the original if that was on one */
190 222 if (flag & CL_EXPIRE) {
... ... @@ -379,7 +411,7 @@
379 411  
380 412 EXPORT_SYMBOL(may_umount);
381 413  
382   -static void release_mounts(struct list_head *head)
  414 +void release_mounts(struct list_head *head)
383 415 {
384 416 struct vfsmount *mnt;
385 417 while(!list_empty(head)) {
... ... @@ -401,7 +433,7 @@
401 433 }
402 434 }
403 435  
404   -static void umount_tree(struct vfsmount *mnt, struct list_head *kill)
  436 +void umount_tree(struct vfsmount *mnt, struct list_head *kill)
405 437 {
406 438 struct vfsmount *p;
407 439  
... ... @@ -581,7 +613,7 @@
581 613 }
582 614 }
583 615  
584   -static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
  616 +struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
585 617 int flag)
586 618 {
587 619 struct vfsmount *res, *p, *q, *r, *s;
... ... @@ -626,6 +658,67 @@
626 658 return NULL;
627 659 }
628 660  
  661 +/*
  662 + * @source_mnt : mount tree to be attached
  663 + * @nd : place the mount tree @source_mnt is attached
  664 + *
  665 + * NOTE: in the table below explains the semantics when a source mount
  666 + * of a given type is attached to a destination mount of a given type.
  667 + * ---------------------------------------------
  668 + * | BIND MOUNT OPERATION |
  669 + * |********************************************
  670 + * | source-->| shared | private |
  671 + * | dest | | |
  672 + * | | | | |
  673 + * | v | | |
  674 + * |********************************************
  675 + * | shared | shared (++) | shared (+) |
  676 + * | | | |
  677 + * |non-shared| shared (+) | private |
  678 + * *********************************************
  679 + * A bind operation clones the source mount and mounts the clone on the
  680 + * destination mount.
  681 + *
  682 + * (++) the cloned mount is propagated to all the mounts in the propagation
  683 + * tree of the destination mount and the cloned mount is added to
  684 + * the peer group of the source mount.
  685 + * (+) the cloned mount is created under the destination mount and is marked
  686 + * as shared. The cloned mount is added to the peer group of the source
  687 + * mount.
  688 + *
  689 + * if the source mount is a tree, the operations explained above is
  690 + * applied to each mount in the tree.
  691 + * Must be called without spinlocks held, since this function can sleep
  692 + * in allocations.
  693 + */
  694 +static int attach_recursive_mnt(struct vfsmount *source_mnt,
  695 + struct nameidata *nd)
  696 +{
  697 + LIST_HEAD(tree_list);
  698 + struct vfsmount *dest_mnt = nd->mnt;
  699 + struct dentry *dest_dentry = nd->dentry;
  700 + struct vfsmount *child, *p;
  701 +
  702 + if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list))
  703 + return -EINVAL;
  704 +
  705 + if (IS_MNT_SHARED(dest_mnt)) {
  706 + for (p = source_mnt; p; p = next_mnt(p, source_mnt))
  707 + set_mnt_shared(p);
  708 + }
  709 +
  710 + spin_lock(&vfsmount_lock);
  711 + mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
  712 + commit_tree(source_mnt);
  713 +
  714 + list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
  715 + list_del_init(&child->mnt_hash);
  716 + commit_tree(child);
  717 + }
  718 + spin_unlock(&vfsmount_lock);
  719 + return 0;
  720 +}
  721 +
629 722 static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
630 723 {
631 724 int err;
... ... @@ -646,17 +739,8 @@
646 739 goto out_unlock;
647 740  
648 741 err = -ENOENT;
649   - spin_lock(&vfsmount_lock);
650   - if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) {
651   - struct list_head head;
652   -
653   - attach_mnt(mnt, nd);
654   - list_add_tail(&head, &mnt->mnt_list);
655   - list_splice(&head, current->namespace->list.prev);
656   - err = 0;
657   - touch_namespace(current->namespace);
658   - }
659   - spin_unlock(&vfsmount_lock);
  742 + if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry))
  743 + err = attach_recursive_mnt(mnt, nd);
660 744 out_unlock:
661 745 up(&nd->dentry->d_inode->i_sem);
662 746 if (!err)
... ... @@ -20,10 +20,89 @@
20 20 void change_mnt_propagation(struct vfsmount *mnt, int type)
21 21 {
22 22 if (type == MS_SHARED) {
23   - mnt->mnt_flags |= MNT_SHARED;
  23 + set_mnt_shared(mnt);
24 24 } else {
25 25 list_del_init(&mnt->mnt_share);
26 26 mnt->mnt_flags &= ~MNT_PNODE_MASK;
27 27 }
  28 +}
  29 +
  30 +/*
  31 + * get the next mount in the propagation tree.
  32 + * @m: the mount seen last
  33 + * @origin: the original mount from where the tree walk initiated
  34 + */
  35 +static struct vfsmount *propagation_next(struct vfsmount *m,
  36 + struct vfsmount *origin)
  37 +{
  38 + m = next_peer(m);
  39 + if (m == origin)
  40 + return NULL;
  41 + return m;
  42 +}
  43 +
  44 +/*
  45 + * mount 'source_mnt' under the destination 'dest_mnt' at
  46 + * dentry 'dest_dentry'. And propagate that mount to
  47 + * all the peer and slave mounts of 'dest_mnt'.
  48 + * Link all the new mounts into a propagation tree headed at
  49 + * source_mnt. Also link all the new mounts using ->mnt_list
  50 + * headed at source_mnt's ->mnt_list
  51 + *
  52 + * @dest_mnt: destination mount.
  53 + * @dest_dentry: destination dentry.
  54 + * @source_mnt: source mount.
  55 + * @tree_list : list of heads of trees to be attached.
  56 + */
  57 +int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry,
  58 + struct vfsmount *source_mnt, struct list_head *tree_list)
  59 +{
  60 + struct vfsmount *m, *child;
  61 + int ret = 0;
  62 + struct vfsmount *prev_dest_mnt = dest_mnt;
  63 + struct vfsmount *prev_src_mnt = source_mnt;
  64 + LIST_HEAD(tmp_list);
  65 + LIST_HEAD(umount_list);
  66 +
  67 + for (m = propagation_next(dest_mnt, dest_mnt); m;
  68 + m = propagation_next(m, dest_mnt)) {
  69 + int type = CL_PROPAGATION;
  70 +
  71 + if (IS_MNT_NEW(m))
  72 + continue;
  73 +
  74 + if (IS_MNT_SHARED(m))
  75 + type |= CL_MAKE_SHARED;
  76 +
  77 + if (!(child = copy_tree(source_mnt, source_mnt->mnt_root,
  78 + type))) {
  79 + ret = -ENOMEM;
  80 + list_splice(tree_list, tmp_list.prev);
  81 + goto out;
  82 + }
  83 +
  84 + if (is_subdir(dest_dentry, m->mnt_root)) {
  85 + mnt_set_mountpoint(m, dest_dentry, child);
  86 + list_add_tail(&child->mnt_hash, tree_list);
  87 + } else {
  88 + /*
  89 + * This can happen if the parent mount was bind mounted
  90 + * on some subdirectory of a shared/slave mount.
  91 + */
  92 + list_add_tail(&child->mnt_hash, &tmp_list);
  93 + }
  94 + prev_dest_mnt = m;
  95 + prev_src_mnt = child;
  96 + }
  97 +out:
  98 + spin_lock(&vfsmount_lock);
  99 + while (!list_empty(&tmp_list)) {
  100 + child = list_entry(tmp_list.next, struct vfsmount, mnt_hash);
  101 + list_del_init(&child->mnt_hash);
  102 + umount_tree(child, &umount_list);
  103 + }
  104 + spin_unlock(&vfsmount_lock);
  105 + release_mounts(&umount_list);
  106 + return ret;
28 107 }
... ... @@ -12,8 +12,22 @@
12 12 #include <linux/mount.h>
13 13  
14 14 #define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED)
  15 +#define IS_MNT_NEW(mnt) (!mnt->mnt_namespace)
15 16 #define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED)
16 17  
  18 +#define CL_EXPIRE 0x01
  19 +#define CL_COPY_ALL 0x04
  20 +#define CL_MAKE_SHARED 0x08
  21 +#define CL_PROPAGATION 0x10
  22 +
  23 +static inline void set_mnt_shared(struct vfsmount *mnt)
  24 +{
  25 + mnt->mnt_flags &= ~MNT_PNODE_MASK;
  26 + mnt->mnt_flags |= MNT_SHARED;
  27 +}
  28 +
17 29 void change_mnt_propagation(struct vfsmount *, int);
  30 +int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *,
  31 + struct list_head *);
18 32 #endif /* _LINUX_PNODE_H */
... ... @@ -1251,7 +1251,12 @@
1251 1251 extern struct vfsmount *kern_mount(struct file_system_type *);
1252 1252 extern int may_umount_tree(struct vfsmount *);
1253 1253 extern int may_umount(struct vfsmount *);
  1254 +extern void umount_tree(struct vfsmount *, struct list_head *);
  1255 +extern void release_mounts(struct list_head *);
1254 1256 extern long do_mount(char *, char *, char *, unsigned long, void *);
  1257 +extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
  1258 +extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
  1259 + struct vfsmount *);
1255 1260  
1256 1261 extern int vfs_statfs(struct super_block *, struct kstatfs *);
1257 1262