Commit e149ed2b805fefdccf7ccdfc19eca22fdd4514ac
1 parent
f77c80142e
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now. It's not mountable (not even registered, so it's not in /proc/filesystems, etc.). Files on it *are* bindable - we explicitly permit that in do_loopback(). This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well. get_proc_ns() is a macro now (it's simply returning ->i_private; would have been an inline, if not for header ordering headache). proc_ns_inode() is an ex-parrot. The interface used in procfs is ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops). Dentries and inodes are never hashed; a non-counting reference to dentry is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path() if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details of that mechanism. As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt; it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets from ns_get_path(). Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Showing 10 changed files with 208 additions and 161 deletions Side-by-side Diff
fs/Makefile
... | ... | @@ -11,7 +11,7 @@ |
11 | 11 | attr.o bad_inode.o file.o filesystems.o namespace.o \ |
12 | 12 | seq_file.o xattr.o libfs.o fs-writeback.o \ |
13 | 13 | pnode.o splice.o sync.o utimes.o \ |
14 | - stack.o fs_struct.o statfs.o fs_pin.o | |
14 | + stack.o fs_struct.o statfs.o fs_pin.o nsfs.o | |
15 | 15 | |
16 | 16 | ifeq ($(CONFIG_BLOCK),y) |
17 | 17 | obj-y += buffer.o block_dev.o direct-io.o mpage.o |
fs/internal.h
fs/namespace.c
... | ... | @@ -1569,8 +1569,8 @@ |
1569 | 1569 | static bool is_mnt_ns_file(struct dentry *dentry) |
1570 | 1570 | { |
1571 | 1571 | /* Is this a proxy for a mount namespace? */ |
1572 | - struct inode *inode = dentry->d_inode; | |
1573 | - return proc_ns_inode(inode) && dentry->d_fsdata == &mntns_operations; | |
1572 | + return dentry->d_op == &ns_dentry_operations && | |
1573 | + dentry->d_fsdata == &mntns_operations; | |
1574 | 1574 | } |
1575 | 1575 | |
1576 | 1576 | struct mnt_namespace *to_mnt_ns(struct ns_common *ns) |
... | ... | @@ -2016,7 +2016,10 @@ |
2016 | 2016 | if (IS_MNT_UNBINDABLE(old)) |
2017 | 2017 | goto out2; |
2018 | 2018 | |
2019 | - if (!check_mnt(parent) || !check_mnt(old)) | |
2019 | + if (!check_mnt(parent)) | |
2020 | + goto out2; | |
2021 | + | |
2022 | + if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations) | |
2020 | 2023 | goto out2; |
2021 | 2024 | |
2022 | 2025 | if (!recurse && has_locked_children(old, old_path.dentry)) |
fs/nsfs.c
1 | +#include <linux/mount.h> | |
2 | +#include <linux/file.h> | |
3 | +#include <linux/fs.h> | |
4 | +#include <linux/proc_ns.h> | |
5 | +#include <linux/magic.h> | |
6 | +#include <linux/ktime.h> | |
7 | + | |
8 | +static struct vfsmount *nsfs_mnt; | |
9 | + | |
10 | +static const struct file_operations ns_file_operations = { | |
11 | + .llseek = no_llseek, | |
12 | +}; | |
13 | + | |
14 | +static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) | |
15 | +{ | |
16 | + struct inode *inode = dentry->d_inode; | |
17 | + const struct proc_ns_operations *ns_ops = dentry->d_fsdata; | |
18 | + | |
19 | + return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", | |
20 | + ns_ops->name, inode->i_ino); | |
21 | +} | |
22 | + | |
23 | +static void ns_prune_dentry(struct dentry *dentry) | |
24 | +{ | |
25 | + struct inode *inode = dentry->d_inode; | |
26 | + if (inode) { | |
27 | + struct ns_common *ns = inode->i_private; | |
28 | + atomic_long_set(&ns->stashed, 0); | |
29 | + } | |
30 | +} | |
31 | + | |
32 | +const struct dentry_operations ns_dentry_operations = | |
33 | +{ | |
34 | + .d_prune = ns_prune_dentry, | |
35 | + .d_delete = always_delete_dentry, | |
36 | + .d_dname = ns_dname, | |
37 | +}; | |
38 | + | |
39 | +static void nsfs_evict(struct inode *inode) | |
40 | +{ | |
41 | + struct ns_common *ns = inode->i_private; | |
42 | + clear_inode(inode); | |
43 | + ns->ops->put(ns); | |
44 | +} | |
45 | + | |
46 | +void *ns_get_path(struct path *path, struct task_struct *task, | |
47 | + const struct proc_ns_operations *ns_ops) | |
48 | +{ | |
49 | + struct vfsmount *mnt = mntget(nsfs_mnt); | |
50 | + struct qstr qname = { .name = "", }; | |
51 | + struct dentry *dentry; | |
52 | + struct inode *inode; | |
53 | + struct ns_common *ns; | |
54 | + unsigned long d; | |
55 | + | |
56 | +again: | |
57 | + ns = ns_ops->get(task); | |
58 | + if (!ns) { | |
59 | + mntput(mnt); | |
60 | + return ERR_PTR(-ENOENT); | |
61 | + } | |
62 | + rcu_read_lock(); | |
63 | + d = atomic_long_read(&ns->stashed); | |
64 | + if (!d) | |
65 | + goto slow; | |
66 | + dentry = (struct dentry *)d; | |
67 | + if (!lockref_get_not_dead(&dentry->d_lockref)) | |
68 | + goto slow; | |
69 | + rcu_read_unlock(); | |
70 | + ns_ops->put(ns); | |
71 | +got_it: | |
72 | + path->mnt = mnt; | |
73 | + path->dentry = dentry; | |
74 | + return NULL; | |
75 | +slow: | |
76 | + rcu_read_unlock(); | |
77 | + inode = new_inode_pseudo(mnt->mnt_sb); | |
78 | + if (!inode) { | |
79 | + ns_ops->put(ns); | |
80 | + mntput(mnt); | |
81 | + return ERR_PTR(-ENOMEM); | |
82 | + } | |
83 | + inode->i_ino = ns->inum; | |
84 | + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | |
85 | + inode->i_flags |= S_IMMUTABLE; | |
86 | + inode->i_mode = S_IFREG | S_IRUGO; | |
87 | + inode->i_fop = &ns_file_operations; | |
88 | + inode->i_private = ns; | |
89 | + | |
90 | + dentry = d_alloc_pseudo(mnt->mnt_sb, &qname); | |
91 | + if (!dentry) { | |
92 | + iput(inode); | |
93 | + mntput(mnt); | |
94 | + return ERR_PTR(-ENOMEM); | |
95 | + } | |
96 | + d_instantiate(dentry, inode); | |
97 | + dentry->d_fsdata = (void *)ns_ops; | |
98 | + d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); | |
99 | + if (d) { | |
100 | + d_delete(dentry); /* make sure ->d_prune() does nothing */ | |
101 | + dput(dentry); | |
102 | + cpu_relax(); | |
103 | + goto again; | |
104 | + } | |
105 | + goto got_it; | |
106 | +} | |
107 | + | |
108 | +int ns_get_name(char *buf, size_t size, struct task_struct *task, | |
109 | + const struct proc_ns_operations *ns_ops) | |
110 | +{ | |
111 | + struct ns_common *ns; | |
112 | + int res = -ENOENT; | |
113 | + ns = ns_ops->get(task); | |
114 | + if (ns) { | |
115 | + res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum); | |
116 | + ns_ops->put(ns); | |
117 | + } | |
118 | + return res; | |
119 | +} | |
120 | + | |
121 | +struct file *proc_ns_fget(int fd) | |
122 | +{ | |
123 | + struct file *file; | |
124 | + | |
125 | + file = fget(fd); | |
126 | + if (!file) | |
127 | + return ERR_PTR(-EBADF); | |
128 | + | |
129 | + if (file->f_op != &ns_file_operations) | |
130 | + goto out_invalid; | |
131 | + | |
132 | + return file; | |
133 | + | |
134 | +out_invalid: | |
135 | + fput(file); | |
136 | + return ERR_PTR(-EINVAL); | |
137 | +} | |
138 | + | |
139 | +static const struct super_operations nsfs_ops = { | |
140 | + .statfs = simple_statfs, | |
141 | + .evict_inode = nsfs_evict, | |
142 | +}; | |
143 | +static struct dentry *nsfs_mount(struct file_system_type *fs_type, | |
144 | + int flags, const char *dev_name, void *data) | |
145 | +{ | |
146 | + return mount_pseudo(fs_type, "nsfs:", &nsfs_ops, | |
147 | + &ns_dentry_operations, NSFS_MAGIC); | |
148 | +} | |
149 | +static struct file_system_type nsfs = { | |
150 | + .name = "nsfs", | |
151 | + .mount = nsfs_mount, | |
152 | + .kill_sb = kill_anon_super, | |
153 | +}; | |
154 | + | |
155 | +void __init nsfs_init(void) | |
156 | +{ | |
157 | + nsfs_mnt = kern_mount(&nsfs); | |
158 | + if (IS_ERR(nsfs_mnt)) | |
159 | + panic("can't set nsfs up\n"); | |
160 | + nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER; | |
161 | +} |
fs/proc/inode.c
... | ... | @@ -32,7 +32,6 @@ |
32 | 32 | { |
33 | 33 | struct proc_dir_entry *de; |
34 | 34 | struct ctl_table_header *head; |
35 | - struct ns_common *ns; | |
36 | 35 | |
37 | 36 | truncate_inode_pages_final(&inode->i_data); |
38 | 37 | clear_inode(inode); |
... | ... | @@ -49,10 +48,6 @@ |
49 | 48 | RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL); |
50 | 49 | sysctl_head_put(head); |
51 | 50 | } |
52 | - /* Release any associated namespace */ | |
53 | - ns = PROC_I(inode)->ns.ns; | |
54 | - if (ns && ns->ops) | |
55 | - ns->ops->put(ns); | |
56 | 51 | } |
57 | 52 | |
58 | 53 | static struct kmem_cache * proc_inode_cachep; |
fs/proc/namespaces.c
1 | 1 | #include <linux/proc_fs.h> |
2 | 2 | #include <linux/nsproxy.h> |
3 | -#include <linux/sched.h> | |
4 | 3 | #include <linux/ptrace.h> |
5 | -#include <linux/fs_struct.h> | |
6 | -#include <linux/mount.h> | |
7 | -#include <linux/path.h> | |
8 | 4 | #include <linux/namei.h> |
9 | 5 | #include <linux/file.h> |
10 | 6 | #include <linux/utsname.h> |
11 | 7 | |
12 | 8 | |
13 | 9 | |
14 | 10 | |
15 | 11 | |
16 | 12 | |
17 | 13 | |
18 | 14 | |
19 | 15 | |
20 | 16 | |
... | ... | @@ -34,139 +30,45 @@ |
34 | 30 | &mntns_operations, |
35 | 31 | }; |
36 | 32 | |
37 | -static const struct file_operations ns_file_operations = { | |
38 | - .llseek = no_llseek, | |
39 | -}; | |
40 | - | |
41 | -static const struct inode_operations ns_inode_operations = { | |
42 | - .setattr = proc_setattr, | |
43 | -}; | |
44 | - | |
45 | -static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) | |
46 | -{ | |
47 | - struct inode *inode = dentry->d_inode; | |
48 | - const struct proc_ns_operations *ns_ops = dentry->d_fsdata; | |
49 | - | |
50 | - return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", | |
51 | - ns_ops->name, inode->i_ino); | |
52 | -} | |
53 | - | |
54 | -const struct dentry_operations ns_dentry_operations = | |
55 | -{ | |
56 | - .d_delete = always_delete_dentry, | |
57 | - .d_dname = ns_dname, | |
58 | -}; | |
59 | - | |
60 | -static struct dentry *proc_ns_get_dentry(struct super_block *sb, | |
61 | - struct task_struct *task, const struct proc_ns_operations *ns_ops) | |
62 | -{ | |
63 | - struct dentry *dentry, *result; | |
64 | - struct inode *inode; | |
65 | - struct proc_inode *ei; | |
66 | - struct qstr qname = { .name = "", }; | |
67 | - struct ns_common *ns; | |
68 | - | |
69 | - ns = ns_ops->get(task); | |
70 | - if (!ns) | |
71 | - return ERR_PTR(-ENOENT); | |
72 | - | |
73 | - dentry = d_alloc_pseudo(sb, &qname); | |
74 | - if (!dentry) { | |
75 | - ns_ops->put(ns); | |
76 | - return ERR_PTR(-ENOMEM); | |
77 | - } | |
78 | - dentry->d_fsdata = (void *)ns_ops; | |
79 | - | |
80 | - inode = iget_locked(sb, ns->inum); | |
81 | - if (!inode) { | |
82 | - dput(dentry); | |
83 | - ns_ops->put(ns); | |
84 | - return ERR_PTR(-ENOMEM); | |
85 | - } | |
86 | - | |
87 | - ei = PROC_I(inode); | |
88 | - if (inode->i_state & I_NEW) { | |
89 | - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | |
90 | - inode->i_op = &ns_inode_operations; | |
91 | - inode->i_mode = S_IFREG | S_IRUGO; | |
92 | - inode->i_fop = &ns_file_operations; | |
93 | - ei->ns.ns_ops = ns_ops; | |
94 | - ei->ns.ns = ns; | |
95 | - unlock_new_inode(inode); | |
96 | - } else { | |
97 | - ns_ops->put(ns); | |
98 | - } | |
99 | - | |
100 | - d_set_d_op(dentry, &ns_dentry_operations); | |
101 | - result = d_instantiate_unique(dentry, inode); | |
102 | - if (result) { | |
103 | - dput(dentry); | |
104 | - dentry = result; | |
105 | - } | |
106 | - | |
107 | - return dentry; | |
108 | -} | |
109 | - | |
110 | 33 | static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) |
111 | 34 | { |
112 | 35 | struct inode *inode = dentry->d_inode; |
113 | - struct super_block *sb = inode->i_sb; | |
114 | - struct proc_inode *ei = PROC_I(inode); | |
36 | + const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops; | |
115 | 37 | struct task_struct *task; |
116 | 38 | struct path ns_path; |
117 | 39 | void *error = ERR_PTR(-EACCES); |
118 | 40 | |
119 | 41 | task = get_proc_task(inode); |
120 | 42 | if (!task) |
121 | - goto out; | |
43 | + return error; | |
122 | 44 | |
123 | - if (!ptrace_may_access(task, PTRACE_MODE_READ)) | |
124 | - goto out_put_task; | |
125 | - | |
126 | - ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns.ns_ops); | |
127 | - if (IS_ERR(ns_path.dentry)) { | |
128 | - error = ERR_CAST(ns_path.dentry); | |
129 | - goto out_put_task; | |
45 | + if (ptrace_may_access(task, PTRACE_MODE_READ)) { | |
46 | + error = ns_get_path(&ns_path, task, ns_ops); | |
47 | + if (!error) | |
48 | + nd_jump_link(nd, &ns_path); | |
130 | 49 | } |
131 | - | |
132 | - ns_path.mnt = mntget(nd->path.mnt); | |
133 | - nd_jump_link(nd, &ns_path); | |
134 | - error = NULL; | |
135 | - | |
136 | -out_put_task: | |
137 | 50 | put_task_struct(task); |
138 | -out: | |
139 | 51 | return error; |
140 | 52 | } |
141 | 53 | |
142 | 54 | static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen) |
143 | 55 | { |
144 | 56 | struct inode *inode = dentry->d_inode; |
145 | - struct proc_inode *ei = PROC_I(inode); | |
146 | - const struct proc_ns_operations *ns_ops = ei->ns.ns_ops; | |
57 | + const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops; | |
147 | 58 | struct task_struct *task; |
148 | - struct ns_common *ns; | |
149 | 59 | char name[50]; |
150 | 60 | int res = -EACCES; |
151 | 61 | |
152 | 62 | task = get_proc_task(inode); |
153 | 63 | if (!task) |
154 | - goto out; | |
64 | + return res; | |
155 | 65 | |
156 | - if (!ptrace_may_access(task, PTRACE_MODE_READ)) | |
157 | - goto out_put_task; | |
158 | - | |
159 | - res = -ENOENT; | |
160 | - ns = ns_ops->get(task); | |
161 | - if (!ns) | |
162 | - goto out_put_task; | |
163 | - | |
164 | - snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns->inum); | |
165 | - res = readlink_copy(buffer, buflen, name); | |
166 | - ns_ops->put(ns); | |
167 | -out_put_task: | |
66 | + if (ptrace_may_access(task, PTRACE_MODE_READ)) { | |
67 | + res = ns_get_name(name, sizeof(name), task, ns_ops); | |
68 | + if (res >= 0) | |
69 | + res = readlink_copy(buffer, buflen, name); | |
70 | + } | |
168 | 71 | put_task_struct(task); |
169 | -out: | |
170 | 72 | return res; |
171 | 73 | } |
172 | 74 | |
... | ... | @@ -268,32 +170,4 @@ |
268 | 170 | .getattr = pid_getattr, |
269 | 171 | .setattr = proc_setattr, |
270 | 172 | }; |
271 | - | |
272 | -struct file *proc_ns_fget(int fd) | |
273 | -{ | |
274 | - struct file *file; | |
275 | - | |
276 | - file = fget(fd); | |
277 | - if (!file) | |
278 | - return ERR_PTR(-EBADF); | |
279 | - | |
280 | - if (file->f_op != &ns_file_operations) | |
281 | - goto out_invalid; | |
282 | - | |
283 | - return file; | |
284 | - | |
285 | -out_invalid: | |
286 | - fput(file); | |
287 | - return ERR_PTR(-EINVAL); | |
288 | -} | |
289 | - | |
290 | -struct ns_common *get_proc_ns(struct inode *inode) | |
291 | -{ | |
292 | - return PROC_I(inode)->ns.ns; | |
293 | -} | |
294 | - | |
295 | -bool proc_ns_inode(struct inode *inode) | |
296 | -{ | |
297 | - return inode->i_fop == &ns_file_operations; | |
298 | -} |
include/linux/ns_common.h
include/linux/proc_ns.h
... | ... | @@ -4,9 +4,11 @@ |
4 | 4 | #ifndef _LINUX_PROC_NS_H |
5 | 5 | #define _LINUX_PROC_NS_H |
6 | 6 | |
7 | +#include <linux/ns_common.h> | |
8 | + | |
7 | 9 | struct pid_namespace; |
8 | 10 | struct nsproxy; |
9 | -struct ns_common; | |
11 | +struct path; | |
10 | 12 | |
11 | 13 | struct proc_ns_operations { |
12 | 14 | const char *name; |
13 | 15 | |
14 | 16 | |
15 | 17 | |
16 | 18 | |
17 | 19 | |
... | ... | @@ -38,36 +40,39 @@ |
38 | 40 | |
39 | 41 | extern int pid_ns_prepare_proc(struct pid_namespace *ns); |
40 | 42 | extern void pid_ns_release_proc(struct pid_namespace *ns); |
41 | -extern struct file *proc_ns_fget(int fd); | |
42 | -extern struct ns_common *get_proc_ns(struct inode *); | |
43 | 43 | extern int proc_alloc_inum(unsigned int *pino); |
44 | 44 | extern void proc_free_inum(unsigned int inum); |
45 | -extern bool proc_ns_inode(struct inode *inode); | |
46 | 45 | |
47 | 46 | #else /* CONFIG_PROC_FS */ |
48 | 47 | |
49 | 48 | static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; } |
50 | 49 | static inline void pid_ns_release_proc(struct pid_namespace *ns) {} |
51 | 50 | |
52 | -static inline struct file *proc_ns_fget(int fd) | |
53 | -{ | |
54 | - return ERR_PTR(-EINVAL); | |
55 | -} | |
56 | - | |
57 | -static inline struct ns_common *get_proc_ns(struct inode *inode) { return NULL; } | |
58 | - | |
59 | 51 | static inline int proc_alloc_inum(unsigned int *inum) |
60 | 52 | { |
61 | 53 | *inum = 1; |
62 | 54 | return 0; |
63 | 55 | } |
64 | 56 | static inline void proc_free_inum(unsigned int inum) {} |
65 | -static inline bool proc_ns_inode(struct inode *inode) { return false; } | |
66 | 57 | |
67 | 58 | #endif /* CONFIG_PROC_FS */ |
68 | 59 | |
69 | -#define ns_alloc_inum(ns) proc_alloc_inum(&(ns)->inum) | |
60 | +static inline int ns_alloc_inum(struct ns_common *ns) | |
61 | +{ | |
62 | + atomic_long_set(&ns->stashed, 0); | |
63 | + return proc_alloc_inum(&ns->inum); | |
64 | +} | |
65 | + | |
70 | 66 | #define ns_free_inum(ns) proc_free_inum((ns)->inum) |
67 | + | |
68 | +extern struct file *proc_ns_fget(int fd); | |
69 | +#define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private) | |
70 | +extern void *ns_get_path(struct path *path, struct task_struct *task, | |
71 | + const struct proc_ns_operations *ns_ops); | |
72 | + | |
73 | +extern int ns_get_name(char *buf, size_t size, struct task_struct *task, | |
74 | + const struct proc_ns_operations *ns_ops); | |
75 | +extern void nsfs_init(void); | |
71 | 76 | |
72 | 77 | #endif /* _LINUX_PROC_NS_H */ |
include/uapi/linux/magic.h
init/main.c
... | ... | @@ -78,6 +78,7 @@ |
78 | 78 | #include <linux/context_tracking.h> |
79 | 79 | #include <linux/random.h> |
80 | 80 | #include <linux/list.h> |
81 | +#include <linux/proc_ns.h> | |
81 | 82 | |
82 | 83 | #include <asm/io.h> |
83 | 84 | #include <asm/bugs.h> |
... | ... | @@ -660,6 +661,7 @@ |
660 | 661 | /* rootfs populating might need page-writeback */ |
661 | 662 | page_writeback_init(); |
662 | 663 | proc_root_init(); |
664 | + nsfs_init(); | |
663 | 665 | cgroup_init(); |
664 | 666 | cpuset_init(); |
665 | 667 | taskstats_init_early(); |