Commit e149ed2b805fefdccf7ccdfc19eca22fdd4514ac

Authored by Al Viro
1 parent f77c80142e

take the targets of /proc/*/ns/* symlinks to separate fs

New pseudo-filesystem: nsfs.  Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.).  Files on it *are* bindable - we explicitly permit that in do_loopback().

This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot.  The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).

Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present.  See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.

As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Showing 10 changed files with 208 additions and 161 deletions Side-by-side Diff

... ... @@ -11,7 +11,7 @@
11 11 attr.o bad_inode.o file.o filesystems.o namespace.o \
12 12 seq_file.o xattr.o libfs.o fs-writeback.o \
13 13 pnode.o splice.o sync.o utimes.o \
14   - stack.o fs_struct.o statfs.o fs_pin.o
  14 + stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
15 15  
16 16 ifeq ($(CONFIG_BLOCK),y)
17 17 obj-y += buffer.o block_dev.o direct-io.o mpage.o
... ... @@ -147,4 +147,9 @@
147 147 */
148 148 extern void sb_pin_kill(struct super_block *sb);
149 149 extern void mnt_pin_kill(struct mount *m);
  150 +
  151 +/*
  152 + * fs/nsfs.c
  153 + */
  154 +extern struct dentry_operations ns_dentry_operations;
... ... @@ -1569,8 +1569,8 @@
1569 1569 static bool is_mnt_ns_file(struct dentry *dentry)
1570 1570 {
1571 1571 /* Is this a proxy for a mount namespace? */
1572   - struct inode *inode = dentry->d_inode;
1573   - return proc_ns_inode(inode) && dentry->d_fsdata == &mntns_operations;
  1572 + return dentry->d_op == &ns_dentry_operations &&
  1573 + dentry->d_fsdata == &mntns_operations;
1574 1574 }
1575 1575  
1576 1576 struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
... ... @@ -2016,7 +2016,10 @@
2016 2016 if (IS_MNT_UNBINDABLE(old))
2017 2017 goto out2;
2018 2018  
2019   - if (!check_mnt(parent) || !check_mnt(old))
  2019 + if (!check_mnt(parent))
  2020 + goto out2;
  2021 +
  2022 + if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
2020 2023 goto out2;
2021 2024  
2022 2025 if (!recurse && has_locked_children(old, old_path.dentry))
  1 +#include <linux/mount.h>
  2 +#include <linux/file.h>
  3 +#include <linux/fs.h>
  4 +#include <linux/proc_ns.h>
  5 +#include <linux/magic.h>
  6 +#include <linux/ktime.h>
  7 +
  8 +static struct vfsmount *nsfs_mnt;
  9 +
  10 +static const struct file_operations ns_file_operations = {
  11 + .llseek = no_llseek,
  12 +};
  13 +
  14 +static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
  15 +{
  16 + struct inode *inode = dentry->d_inode;
  17 + const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
  18 +
  19 + return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
  20 + ns_ops->name, inode->i_ino);
  21 +}
  22 +
  23 +static void ns_prune_dentry(struct dentry *dentry)
  24 +{
  25 + struct inode *inode = dentry->d_inode;
  26 + if (inode) {
  27 + struct ns_common *ns = inode->i_private;
  28 + atomic_long_set(&ns->stashed, 0);
  29 + }
  30 +}
  31 +
  32 +const struct dentry_operations ns_dentry_operations =
  33 +{
  34 + .d_prune = ns_prune_dentry,
  35 + .d_delete = always_delete_dentry,
  36 + .d_dname = ns_dname,
  37 +};
  38 +
  39 +static void nsfs_evict(struct inode *inode)
  40 +{
  41 + struct ns_common *ns = inode->i_private;
  42 + clear_inode(inode);
  43 + ns->ops->put(ns);
  44 +}
  45 +
  46 +void *ns_get_path(struct path *path, struct task_struct *task,
  47 + const struct proc_ns_operations *ns_ops)
  48 +{
  49 + struct vfsmount *mnt = mntget(nsfs_mnt);
  50 + struct qstr qname = { .name = "", };
  51 + struct dentry *dentry;
  52 + struct inode *inode;
  53 + struct ns_common *ns;
  54 + unsigned long d;
  55 +
  56 +again:
  57 + ns = ns_ops->get(task);
  58 + if (!ns) {
  59 + mntput(mnt);
  60 + return ERR_PTR(-ENOENT);
  61 + }
  62 + rcu_read_lock();
  63 + d = atomic_long_read(&ns->stashed);
  64 + if (!d)
  65 + goto slow;
  66 + dentry = (struct dentry *)d;
  67 + if (!lockref_get_not_dead(&dentry->d_lockref))
  68 + goto slow;
  69 + rcu_read_unlock();
  70 + ns_ops->put(ns);
  71 +got_it:
  72 + path->mnt = mnt;
  73 + path->dentry = dentry;
  74 + return NULL;
  75 +slow:
  76 + rcu_read_unlock();
  77 + inode = new_inode_pseudo(mnt->mnt_sb);
  78 + if (!inode) {
  79 + ns_ops->put(ns);
  80 + mntput(mnt);
  81 + return ERR_PTR(-ENOMEM);
  82 + }
  83 + inode->i_ino = ns->inum;
  84 + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
  85 + inode->i_flags |= S_IMMUTABLE;
  86 + inode->i_mode = S_IFREG | S_IRUGO;
  87 + inode->i_fop = &ns_file_operations;
  88 + inode->i_private = ns;
  89 +
  90 + dentry = d_alloc_pseudo(mnt->mnt_sb, &qname);
  91 + if (!dentry) {
  92 + iput(inode);
  93 + mntput(mnt);
  94 + return ERR_PTR(-ENOMEM);
  95 + }
  96 + d_instantiate(dentry, inode);
  97 + dentry->d_fsdata = (void *)ns_ops;
  98 + d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
  99 + if (d) {
  100 + d_delete(dentry); /* make sure ->d_prune() does nothing */
  101 + dput(dentry);
  102 + cpu_relax();
  103 + goto again;
  104 + }
  105 + goto got_it;
  106 +}
  107 +
  108 +int ns_get_name(char *buf, size_t size, struct task_struct *task,
  109 + const struct proc_ns_operations *ns_ops)
  110 +{
  111 + struct ns_common *ns;
  112 + int res = -ENOENT;
  113 + ns = ns_ops->get(task);
  114 + if (ns) {
  115 + res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum);
  116 + ns_ops->put(ns);
  117 + }
  118 + return res;
  119 +}
  120 +
  121 +struct file *proc_ns_fget(int fd)
  122 +{
  123 + struct file *file;
  124 +
  125 + file = fget(fd);
  126 + if (!file)
  127 + return ERR_PTR(-EBADF);
  128 +
  129 + if (file->f_op != &ns_file_operations)
  130 + goto out_invalid;
  131 +
  132 + return file;
  133 +
  134 +out_invalid:
  135 + fput(file);
  136 + return ERR_PTR(-EINVAL);
  137 +}
  138 +
  139 +static const struct super_operations nsfs_ops = {
  140 + .statfs = simple_statfs,
  141 + .evict_inode = nsfs_evict,
  142 +};
  143 +static struct dentry *nsfs_mount(struct file_system_type *fs_type,
  144 + int flags, const char *dev_name, void *data)
  145 +{
  146 + return mount_pseudo(fs_type, "nsfs:", &nsfs_ops,
  147 + &ns_dentry_operations, NSFS_MAGIC);
  148 +}
  149 +static struct file_system_type nsfs = {
  150 + .name = "nsfs",
  151 + .mount = nsfs_mount,
  152 + .kill_sb = kill_anon_super,
  153 +};
  154 +
  155 +void __init nsfs_init(void)
  156 +{
  157 + nsfs_mnt = kern_mount(&nsfs);
  158 + if (IS_ERR(nsfs_mnt))
  159 + panic("can't set nsfs up\n");
  160 + nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER;
  161 +}
... ... @@ -32,7 +32,6 @@
32 32 {
33 33 struct proc_dir_entry *de;
34 34 struct ctl_table_header *head;
35   - struct ns_common *ns;
36 35  
37 36 truncate_inode_pages_final(&inode->i_data);
38 37 clear_inode(inode);
... ... @@ -49,10 +48,6 @@
49 48 RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
50 49 sysctl_head_put(head);
51 50 }
52   - /* Release any associated namespace */
53   - ns = PROC_I(inode)->ns.ns;
54   - if (ns && ns->ops)
55   - ns->ops->put(ns);
56 51 }
57 52  
58 53 static struct kmem_cache * proc_inode_cachep;
fs/proc/namespaces.c
1 1 #include <linux/proc_fs.h>
2 2 #include <linux/nsproxy.h>
3   -#include <linux/sched.h>
4 3 #include <linux/ptrace.h>
5   -#include <linux/fs_struct.h>
6   -#include <linux/mount.h>
7   -#include <linux/path.h>
8 4 #include <linux/namei.h>
9 5 #include <linux/file.h>
10 6 #include <linux/utsname.h>
11 7  
12 8  
13 9  
14 10  
15 11  
16 12  
17 13  
18 14  
19 15  
20 16  
... ... @@ -34,139 +30,45 @@
34 30 &mntns_operations,
35 31 };
36 32  
37   -static const struct file_operations ns_file_operations = {
38   - .llseek = no_llseek,
39   -};
40   -
41   -static const struct inode_operations ns_inode_operations = {
42   - .setattr = proc_setattr,
43   -};
44   -
45   -static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
46   -{
47   - struct inode *inode = dentry->d_inode;
48   - const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
49   -
50   - return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
51   - ns_ops->name, inode->i_ino);
52   -}
53   -
54   -const struct dentry_operations ns_dentry_operations =
55   -{
56   - .d_delete = always_delete_dentry,
57   - .d_dname = ns_dname,
58   -};
59   -
60   -static struct dentry *proc_ns_get_dentry(struct super_block *sb,
61   - struct task_struct *task, const struct proc_ns_operations *ns_ops)
62   -{
63   - struct dentry *dentry, *result;
64   - struct inode *inode;
65   - struct proc_inode *ei;
66   - struct qstr qname = { .name = "", };
67   - struct ns_common *ns;
68   -
69   - ns = ns_ops->get(task);
70   - if (!ns)
71   - return ERR_PTR(-ENOENT);
72   -
73   - dentry = d_alloc_pseudo(sb, &qname);
74   - if (!dentry) {
75   - ns_ops->put(ns);
76   - return ERR_PTR(-ENOMEM);
77   - }
78   - dentry->d_fsdata = (void *)ns_ops;
79   -
80   - inode = iget_locked(sb, ns->inum);
81   - if (!inode) {
82   - dput(dentry);
83   - ns_ops->put(ns);
84   - return ERR_PTR(-ENOMEM);
85   - }
86   -
87   - ei = PROC_I(inode);
88   - if (inode->i_state & I_NEW) {
89   - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
90   - inode->i_op = &ns_inode_operations;
91   - inode->i_mode = S_IFREG | S_IRUGO;
92   - inode->i_fop = &ns_file_operations;
93   - ei->ns.ns_ops = ns_ops;
94   - ei->ns.ns = ns;
95   - unlock_new_inode(inode);
96   - } else {
97   - ns_ops->put(ns);
98   - }
99   -
100   - d_set_d_op(dentry, &ns_dentry_operations);
101   - result = d_instantiate_unique(dentry, inode);
102   - if (result) {
103   - dput(dentry);
104   - dentry = result;
105   - }
106   -
107   - return dentry;
108   -}
109   -
110 33 static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
111 34 {
112 35 struct inode *inode = dentry->d_inode;
113   - struct super_block *sb = inode->i_sb;
114   - struct proc_inode *ei = PROC_I(inode);
  36 + const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops;
115 37 struct task_struct *task;
116 38 struct path ns_path;
117 39 void *error = ERR_PTR(-EACCES);
118 40  
119 41 task = get_proc_task(inode);
120 42 if (!task)
121   - goto out;
  43 + return error;
122 44  
123   - if (!ptrace_may_access(task, PTRACE_MODE_READ))
124   - goto out_put_task;
125   -
126   - ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns.ns_ops);
127   - if (IS_ERR(ns_path.dentry)) {
128   - error = ERR_CAST(ns_path.dentry);
129   - goto out_put_task;
  45 + if (ptrace_may_access(task, PTRACE_MODE_READ)) {
  46 + error = ns_get_path(&ns_path, task, ns_ops);
  47 + if (!error)
  48 + nd_jump_link(nd, &ns_path);
130 49 }
131   -
132   - ns_path.mnt = mntget(nd->path.mnt);
133   - nd_jump_link(nd, &ns_path);
134   - error = NULL;
135   -
136   -out_put_task:
137 50 put_task_struct(task);
138   -out:
139 51 return error;
140 52 }
141 53  
142 54 static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
143 55 {
144 56 struct inode *inode = dentry->d_inode;
145   - struct proc_inode *ei = PROC_I(inode);
146   - const struct proc_ns_operations *ns_ops = ei->ns.ns_ops;
  57 + const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops;
147 58 struct task_struct *task;
148   - struct ns_common *ns;
149 59 char name[50];
150 60 int res = -EACCES;
151 61  
152 62 task = get_proc_task(inode);
153 63 if (!task)
154   - goto out;
  64 + return res;
155 65  
156   - if (!ptrace_may_access(task, PTRACE_MODE_READ))
157   - goto out_put_task;
158   -
159   - res = -ENOENT;
160   - ns = ns_ops->get(task);
161   - if (!ns)
162   - goto out_put_task;
163   -
164   - snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns->inum);
165   - res = readlink_copy(buffer, buflen, name);
166   - ns_ops->put(ns);
167   -out_put_task:
  66 + if (ptrace_may_access(task, PTRACE_MODE_READ)) {
  67 + res = ns_get_name(name, sizeof(name), task, ns_ops);
  68 + if (res >= 0)
  69 + res = readlink_copy(buffer, buflen, name);
  70 + }
168 71 put_task_struct(task);
169   -out:
170 72 return res;
171 73 }
172 74  
... ... @@ -268,32 +170,4 @@
268 170 .getattr = pid_getattr,
269 171 .setattr = proc_setattr,
270 172 };
271   -
272   -struct file *proc_ns_fget(int fd)
273   -{
274   - struct file *file;
275   -
276   - file = fget(fd);
277   - if (!file)
278   - return ERR_PTR(-EBADF);
279   -
280   - if (file->f_op != &ns_file_operations)
281   - goto out_invalid;
282   -
283   - return file;
284   -
285   -out_invalid:
286   - fput(file);
287   - return ERR_PTR(-EINVAL);
288   -}
289   -
290   -struct ns_common *get_proc_ns(struct inode *inode)
291   -{
292   - return PROC_I(inode)->ns.ns;
293   -}
294   -
295   -bool proc_ns_inode(struct inode *inode)
296   -{
297   - return inode->i_fop == &ns_file_operations;
298   -}
include/linux/ns_common.h
... ... @@ -4,6 +4,7 @@
4 4 struct proc_ns_operations;
5 5  
6 6 struct ns_common {
  7 + atomic_long_t stashed;
7 8 const struct proc_ns_operations *ops;
8 9 unsigned int inum;
9 10 };
include/linux/proc_ns.h
... ... @@ -4,9 +4,11 @@
4 4 #ifndef _LINUX_PROC_NS_H
5 5 #define _LINUX_PROC_NS_H
6 6  
  7 +#include <linux/ns_common.h>
  8 +
7 9 struct pid_namespace;
8 10 struct nsproxy;
9   -struct ns_common;
  11 +struct path;
10 12  
11 13 struct proc_ns_operations {
12 14 const char *name;
13 15  
14 16  
15 17  
16 18  
17 19  
... ... @@ -38,36 +40,39 @@
38 40  
39 41 extern int pid_ns_prepare_proc(struct pid_namespace *ns);
40 42 extern void pid_ns_release_proc(struct pid_namespace *ns);
41   -extern struct file *proc_ns_fget(int fd);
42   -extern struct ns_common *get_proc_ns(struct inode *);
43 43 extern int proc_alloc_inum(unsigned int *pino);
44 44 extern void proc_free_inum(unsigned int inum);
45   -extern bool proc_ns_inode(struct inode *inode);
46 45  
47 46 #else /* CONFIG_PROC_FS */
48 47  
49 48 static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; }
50 49 static inline void pid_ns_release_proc(struct pid_namespace *ns) {}
51 50  
52   -static inline struct file *proc_ns_fget(int fd)
53   -{
54   - return ERR_PTR(-EINVAL);
55   -}
56   -
57   -static inline struct ns_common *get_proc_ns(struct inode *inode) { return NULL; }
58   -
59 51 static inline int proc_alloc_inum(unsigned int *inum)
60 52 {
61 53 *inum = 1;
62 54 return 0;
63 55 }
64 56 static inline void proc_free_inum(unsigned int inum) {}
65   -static inline bool proc_ns_inode(struct inode *inode) { return false; }
66 57  
67 58 #endif /* CONFIG_PROC_FS */
68 59  
69   -#define ns_alloc_inum(ns) proc_alloc_inum(&(ns)->inum)
  60 +static inline int ns_alloc_inum(struct ns_common *ns)
  61 +{
  62 + atomic_long_set(&ns->stashed, 0);
  63 + return proc_alloc_inum(&ns->inum);
  64 +}
  65 +
70 66 #define ns_free_inum(ns) proc_free_inum((ns)->inum)
  67 +
  68 +extern struct file *proc_ns_fget(int fd);
  69 +#define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
  70 +extern void *ns_get_path(struct path *path, struct task_struct *task,
  71 + const struct proc_ns_operations *ns_ops);
  72 +
  73 +extern int ns_get_name(char *buf, size_t size, struct task_struct *task,
  74 + const struct proc_ns_operations *ns_ops);
  75 +extern void nsfs_init(void);
71 76  
72 77 #endif /* _LINUX_PROC_NS_H */
include/uapi/linux/magic.h
... ... @@ -72,6 +72,7 @@
72 72 #define MTD_INODE_FS_MAGIC 0x11307854
73 73 #define ANON_INODE_FS_MAGIC 0x09041934
74 74 #define BTRFS_TEST_MAGIC 0x73727279
  75 +#define NSFS_MAGIC 0x6e736673
75 76  
76 77 #endif /* __LINUX_MAGIC_H__ */
... ... @@ -78,6 +78,7 @@
78 78 #include <linux/context_tracking.h>
79 79 #include <linux/random.h>
80 80 #include <linux/list.h>
  81 +#include <linux/proc_ns.h>
81 82  
82 83 #include <asm/io.h>
83 84 #include <asm/bugs.h>
... ... @@ -660,6 +661,7 @@
660 661 /* rootfs populating might need page-writeback */
661 662 page_writeback_init();
662 663 proc_root_init();
  664 + nsfs_init();
663 665 cgroup_init();
664 666 cpuset_init();
665 667 taskstats_init_early();