Commit b90fa9ae8f51f098ee480bbaabd6867992e9fc58
Committed by
Linus Torvalds
1 parent
03e06e68ff
Exists in
master
and in
4 other branches
[PATCH] shared mount handling: bind and rbind
Implement handling of MS_BIND in presense of shared mounts (see Documentation/sharedsubtree.txt in the end of patch series for detailed description). Signed-off-by: Ram Pai <linuxram@us.ibm.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 4 changed files with 204 additions and 22 deletions Side-by-side Diff
fs/namespace.c
... | ... | @@ -28,8 +28,6 @@ |
28 | 28 | |
29 | 29 | extern int __init init_rootfs(void); |
30 | 30 | |
31 | -#define CL_EXPIRE 0x01 | |
32 | - | |
33 | 31 | #ifdef CONFIG_SYSFS |
34 | 32 | extern int __init sysfs_init(void); |
35 | 33 | #else |
36 | 34 | |
37 | 35 | |
38 | 36 | |
... | ... | @@ -145,15 +143,45 @@ |
145 | 143 | old_nd->dentry->d_mounted--; |
146 | 144 | } |
147 | 145 | |
146 | +void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, | |
147 | + struct vfsmount *child_mnt) | |
148 | +{ | |
149 | + child_mnt->mnt_parent = mntget(mnt); | |
150 | + child_mnt->mnt_mountpoint = dget(dentry); | |
151 | + dentry->d_mounted++; | |
152 | +} | |
153 | + | |
148 | 154 | static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) |
149 | 155 | { |
150 | - mnt->mnt_parent = mntget(nd->mnt); | |
151 | - mnt->mnt_mountpoint = dget(nd->dentry); | |
152 | - list_add(&mnt->mnt_hash, mount_hashtable + hash(nd->mnt, nd->dentry)); | |
156 | + mnt_set_mountpoint(nd->mnt, nd->dentry, mnt); | |
157 | + list_add_tail(&mnt->mnt_hash, mount_hashtable + | |
158 | + hash(nd->mnt, nd->dentry)); | |
153 | 159 | list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts); |
154 | - nd->dentry->d_mounted++; | |
155 | 160 | } |
156 | 161 | |
162 | +/* | |
163 | + * the caller must hold vfsmount_lock | |
164 | + */ | |
165 | +static void commit_tree(struct vfsmount *mnt) | |
166 | +{ | |
167 | + struct vfsmount *parent = mnt->mnt_parent; | |
168 | + struct vfsmount *m; | |
169 | + LIST_HEAD(head); | |
170 | + struct namespace *n = parent->mnt_namespace; | |
171 | + | |
172 | + BUG_ON(parent == mnt); | |
173 | + | |
174 | + list_add_tail(&head, &mnt->mnt_list); | |
175 | + list_for_each_entry(m, &head, mnt_list) | |
176 | + m->mnt_namespace = n; | |
177 | + list_splice(&head, n->list.prev); | |
178 | + | |
179 | + list_add_tail(&mnt->mnt_hash, mount_hashtable + | |
180 | + hash(parent, mnt->mnt_mountpoint)); | |
181 | + list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); | |
182 | + touch_namespace(n); | |
183 | +} | |
184 | + | |
157 | 185 | static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root) |
158 | 186 | { |
159 | 187 | struct list_head *next = p->mnt_mounts.next; |
160 | 188 | |
... | ... | @@ -183,8 +211,12 @@ |
183 | 211 | mnt->mnt_root = dget(root); |
184 | 212 | mnt->mnt_mountpoint = mnt->mnt_root; |
185 | 213 | mnt->mnt_parent = mnt; |
186 | - mnt->mnt_namespace = current->namespace; | |
187 | 214 | |
215 | + if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old)) | |
216 | + list_add(&mnt->mnt_share, &old->mnt_share); | |
217 | + if (flag & CL_MAKE_SHARED) | |
218 | + set_mnt_shared(mnt); | |
219 | + | |
188 | 220 | /* stick the duplicate mount on the same expiry list |
189 | 221 | * as the original if that was on one */ |
190 | 222 | if (flag & CL_EXPIRE) { |
... | ... | @@ -379,7 +411,7 @@ |
379 | 411 | |
380 | 412 | EXPORT_SYMBOL(may_umount); |
381 | 413 | |
382 | -static void release_mounts(struct list_head *head) | |
414 | +void release_mounts(struct list_head *head) | |
383 | 415 | { |
384 | 416 | struct vfsmount *mnt; |
385 | 417 | while(!list_empty(head)) { |
... | ... | @@ -401,7 +433,7 @@ |
401 | 433 | } |
402 | 434 | } |
403 | 435 | |
404 | -static void umount_tree(struct vfsmount *mnt, struct list_head *kill) | |
436 | +void umount_tree(struct vfsmount *mnt, struct list_head *kill) | |
405 | 437 | { |
406 | 438 | struct vfsmount *p; |
407 | 439 | |
... | ... | @@ -581,7 +613,7 @@ |
581 | 613 | } |
582 | 614 | } |
583 | 615 | |
584 | -static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, | |
616 | +struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, | |
585 | 617 | int flag) |
586 | 618 | { |
587 | 619 | struct vfsmount *res, *p, *q, *r, *s; |
... | ... | @@ -626,6 +658,67 @@ |
626 | 658 | return NULL; |
627 | 659 | } |
628 | 660 | |
661 | +/* | |
662 | + * @source_mnt : mount tree to be attached | |
663 | + * @nd : place the mount tree @source_mnt is attached | |
664 | + * | |
665 | + * NOTE: in the table below explains the semantics when a source mount | |
666 | + * of a given type is attached to a destination mount of a given type. | |
667 | + * --------------------------------------------- | |
668 | + * | BIND MOUNT OPERATION | | |
669 | + * |******************************************** | |
670 | + * | source-->| shared | private | | |
671 | + * | dest | | | | |
672 | + * | | | | | | |
673 | + * | v | | | | |
674 | + * |******************************************** | |
675 | + * | shared | shared (++) | shared (+) | | |
676 | + * | | | | | |
677 | + * |non-shared| shared (+) | private | | |
678 | + * ********************************************* | |
679 | + * A bind operation clones the source mount and mounts the clone on the | |
680 | + * destination mount. | |
681 | + * | |
682 | + * (++) the cloned mount is propagated to all the mounts in the propagation | |
683 | + * tree of the destination mount and the cloned mount is added to | |
684 | + * the peer group of the source mount. | |
685 | + * (+) the cloned mount is created under the destination mount and is marked | |
686 | + * as shared. The cloned mount is added to the peer group of the source | |
687 | + * mount. | |
688 | + * | |
689 | + * if the source mount is a tree, the operations explained above is | |
690 | + * applied to each mount in the tree. | |
691 | + * Must be called without spinlocks held, since this function can sleep | |
692 | + * in allocations. | |
693 | + */ | |
694 | +static int attach_recursive_mnt(struct vfsmount *source_mnt, | |
695 | + struct nameidata *nd) | |
696 | +{ | |
697 | + LIST_HEAD(tree_list); | |
698 | + struct vfsmount *dest_mnt = nd->mnt; | |
699 | + struct dentry *dest_dentry = nd->dentry; | |
700 | + struct vfsmount *child, *p; | |
701 | + | |
702 | + if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list)) | |
703 | + return -EINVAL; | |
704 | + | |
705 | + if (IS_MNT_SHARED(dest_mnt)) { | |
706 | + for (p = source_mnt; p; p = next_mnt(p, source_mnt)) | |
707 | + set_mnt_shared(p); | |
708 | + } | |
709 | + | |
710 | + spin_lock(&vfsmount_lock); | |
711 | + mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); | |
712 | + commit_tree(source_mnt); | |
713 | + | |
714 | + list_for_each_entry_safe(child, p, &tree_list, mnt_hash) { | |
715 | + list_del_init(&child->mnt_hash); | |
716 | + commit_tree(child); | |
717 | + } | |
718 | + spin_unlock(&vfsmount_lock); | |
719 | + return 0; | |
720 | +} | |
721 | + | |
629 | 722 | static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) |
630 | 723 | { |
631 | 724 | int err; |
... | ... | @@ -646,17 +739,8 @@ |
646 | 739 | goto out_unlock; |
647 | 740 | |
648 | 741 | err = -ENOENT; |
649 | - spin_lock(&vfsmount_lock); | |
650 | - if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) { | |
651 | - struct list_head head; | |
652 | - | |
653 | - attach_mnt(mnt, nd); | |
654 | - list_add_tail(&head, &mnt->mnt_list); | |
655 | - list_splice(&head, current->namespace->list.prev); | |
656 | - err = 0; | |
657 | - touch_namespace(current->namespace); | |
658 | - } | |
659 | - spin_unlock(&vfsmount_lock); | |
742 | + if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) | |
743 | + err = attach_recursive_mnt(mnt, nd); | |
660 | 744 | out_unlock: |
661 | 745 | up(&nd->dentry->d_inode->i_sem); |
662 | 746 | if (!err) |
fs/pnode.c
... | ... | @@ -20,10 +20,89 @@ |
20 | 20 | void change_mnt_propagation(struct vfsmount *mnt, int type) |
21 | 21 | { |
22 | 22 | if (type == MS_SHARED) { |
23 | - mnt->mnt_flags |= MNT_SHARED; | |
23 | + set_mnt_shared(mnt); | |
24 | 24 | } else { |
25 | 25 | list_del_init(&mnt->mnt_share); |
26 | 26 | mnt->mnt_flags &= ~MNT_PNODE_MASK; |
27 | 27 | } |
28 | +} | |
29 | + | |
30 | +/* | |
31 | + * get the next mount in the propagation tree. | |
32 | + * @m: the mount seen last | |
33 | + * @origin: the original mount from where the tree walk initiated | |
34 | + */ | |
35 | +static struct vfsmount *propagation_next(struct vfsmount *m, | |
36 | + struct vfsmount *origin) | |
37 | +{ | |
38 | + m = next_peer(m); | |
39 | + if (m == origin) | |
40 | + return NULL; | |
41 | + return m; | |
42 | +} | |
43 | + | |
44 | +/* | |
45 | + * mount 'source_mnt' under the destination 'dest_mnt' at | |
46 | + * dentry 'dest_dentry'. And propagate that mount to | |
47 | + * all the peer and slave mounts of 'dest_mnt'. | |
48 | + * Link all the new mounts into a propagation tree headed at | |
49 | + * source_mnt. Also link all the new mounts using ->mnt_list | |
50 | + * headed at source_mnt's ->mnt_list | |
51 | + * | |
52 | + * @dest_mnt: destination mount. | |
53 | + * @dest_dentry: destination dentry. | |
54 | + * @source_mnt: source mount. | |
55 | + * @tree_list : list of heads of trees to be attached. | |
56 | + */ | |
57 | +int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, | |
58 | + struct vfsmount *source_mnt, struct list_head *tree_list) | |
59 | +{ | |
60 | + struct vfsmount *m, *child; | |
61 | + int ret = 0; | |
62 | + struct vfsmount *prev_dest_mnt = dest_mnt; | |
63 | + struct vfsmount *prev_src_mnt = source_mnt; | |
64 | + LIST_HEAD(tmp_list); | |
65 | + LIST_HEAD(umount_list); | |
66 | + | |
67 | + for (m = propagation_next(dest_mnt, dest_mnt); m; | |
68 | + m = propagation_next(m, dest_mnt)) { | |
69 | + int type = CL_PROPAGATION; | |
70 | + | |
71 | + if (IS_MNT_NEW(m)) | |
72 | + continue; | |
73 | + | |
74 | + if (IS_MNT_SHARED(m)) | |
75 | + type |= CL_MAKE_SHARED; | |
76 | + | |
77 | + if (!(child = copy_tree(source_mnt, source_mnt->mnt_root, | |
78 | + type))) { | |
79 | + ret = -ENOMEM; | |
80 | + list_splice(tree_list, tmp_list.prev); | |
81 | + goto out; | |
82 | + } | |
83 | + | |
84 | + if (is_subdir(dest_dentry, m->mnt_root)) { | |
85 | + mnt_set_mountpoint(m, dest_dentry, child); | |
86 | + list_add_tail(&child->mnt_hash, tree_list); | |
87 | + } else { | |
88 | + /* | |
89 | + * This can happen if the parent mount was bind mounted | |
90 | + * on some subdirectory of a shared/slave mount. | |
91 | + */ | |
92 | + list_add_tail(&child->mnt_hash, &tmp_list); | |
93 | + } | |
94 | + prev_dest_mnt = m; | |
95 | + prev_src_mnt = child; | |
96 | + } | |
97 | +out: | |
98 | + spin_lock(&vfsmount_lock); | |
99 | + while (!list_empty(&tmp_list)) { | |
100 | + child = list_entry(tmp_list.next, struct vfsmount, mnt_hash); | |
101 | + list_del_init(&child->mnt_hash); | |
102 | + umount_tree(child, &umount_list); | |
103 | + } | |
104 | + spin_unlock(&vfsmount_lock); | |
105 | + release_mounts(&umount_list); | |
106 | + return ret; | |
28 | 107 | } |
fs/pnode.h
... | ... | @@ -12,8 +12,22 @@ |
12 | 12 | #include <linux/mount.h> |
13 | 13 | |
14 | 14 | #define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED) |
15 | +#define IS_MNT_NEW(mnt) (!mnt->mnt_namespace) | |
15 | 16 | #define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED) |
16 | 17 | |
18 | +#define CL_EXPIRE 0x01 | |
19 | +#define CL_COPY_ALL 0x04 | |
20 | +#define CL_MAKE_SHARED 0x08 | |
21 | +#define CL_PROPAGATION 0x10 | |
22 | + | |
23 | +static inline void set_mnt_shared(struct vfsmount *mnt) | |
24 | +{ | |
25 | + mnt->mnt_flags &= ~MNT_PNODE_MASK; | |
26 | + mnt->mnt_flags |= MNT_SHARED; | |
27 | +} | |
28 | + | |
17 | 29 | void change_mnt_propagation(struct vfsmount *, int); |
30 | +int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *, | |
31 | + struct list_head *); | |
18 | 32 | #endif /* _LINUX_PNODE_H */ |
include/linux/fs.h
... | ... | @@ -1251,7 +1251,12 @@ |
1251 | 1251 | extern struct vfsmount *kern_mount(struct file_system_type *); |
1252 | 1252 | extern int may_umount_tree(struct vfsmount *); |
1253 | 1253 | extern int may_umount(struct vfsmount *); |
1254 | +extern void umount_tree(struct vfsmount *, struct list_head *); | |
1255 | +extern void release_mounts(struct list_head *); | |
1254 | 1256 | extern long do_mount(char *, char *, char *, unsigned long, void *); |
1257 | +extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); | |
1258 | +extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, | |
1259 | + struct vfsmount *); | |
1255 | 1260 | |
1256 | 1261 | extern int vfs_statfs(struct super_block *, struct kstatfs *); |
1257 | 1262 |