Commit ca469f35a8e9ef12571a4b80ac6d7fdc0260fb44

Authored by Al Viro
1 parent 866ad9a747

deal with races between remove_proc_entry() and proc_reg_release()

* serialize the call of ->release() on per-pdeo mutex
* don't remove pdeo from per-pde list until we are through with it

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Showing 2 changed files with 34 additions and 53 deletions Inline Diff

1 /* 1 /*
2 * linux/fs/proc/inode.c 2 * linux/fs/proc/inode.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 5 */
6 6
7 #include <linux/time.h> 7 #include <linux/time.h>
8 #include <linux/proc_fs.h> 8 #include <linux/proc_fs.h>
9 #include <linux/kernel.h> 9 #include <linux/kernel.h>
10 #include <linux/pid_namespace.h> 10 #include <linux/pid_namespace.h>
11 #include <linux/mm.h> 11 #include <linux/mm.h>
12 #include <linux/string.h> 12 #include <linux/string.h>
13 #include <linux/stat.h> 13 #include <linux/stat.h>
14 #include <linux/completion.h> 14 #include <linux/completion.h>
15 #include <linux/poll.h> 15 #include <linux/poll.h>
16 #include <linux/printk.h> 16 #include <linux/printk.h>
17 #include <linux/file.h> 17 #include <linux/file.h>
18 #include <linux/limits.h> 18 #include <linux/limits.h>
19 #include <linux/init.h> 19 #include <linux/init.h>
20 #include <linux/module.h> 20 #include <linux/module.h>
21 #include <linux/sysctl.h> 21 #include <linux/sysctl.h>
22 #include <linux/seq_file.h> 22 #include <linux/seq_file.h>
23 #include <linux/slab.h> 23 #include <linux/slab.h>
24 #include <linux/mount.h> 24 #include <linux/mount.h>
25 25
26 #include <asm/uaccess.h> 26 #include <asm/uaccess.h>
27 27
28 #include "internal.h" 28 #include "internal.h"
29 29
30 static void proc_evict_inode(struct inode *inode) 30 static void proc_evict_inode(struct inode *inode)
31 { 31 {
32 struct proc_dir_entry *de; 32 struct proc_dir_entry *de;
33 struct ctl_table_header *head; 33 struct ctl_table_header *head;
34 const struct proc_ns_operations *ns_ops; 34 const struct proc_ns_operations *ns_ops;
35 void *ns; 35 void *ns;
36 36
37 truncate_inode_pages(&inode->i_data, 0); 37 truncate_inode_pages(&inode->i_data, 0);
38 clear_inode(inode); 38 clear_inode(inode);
39 39
40 /* Stop tracking associated processes */ 40 /* Stop tracking associated processes */
41 put_pid(PROC_I(inode)->pid); 41 put_pid(PROC_I(inode)->pid);
42 42
43 /* Let go of any associated proc directory entry */ 43 /* Let go of any associated proc directory entry */
44 de = PROC_I(inode)->pde; 44 de = PROC_I(inode)->pde;
45 if (de) 45 if (de)
46 pde_put(de); 46 pde_put(de);
47 head = PROC_I(inode)->sysctl; 47 head = PROC_I(inode)->sysctl;
48 if (head) { 48 if (head) {
49 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL); 49 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
50 sysctl_head_put(head); 50 sysctl_head_put(head);
51 } 51 }
52 /* Release any associated namespace */ 52 /* Release any associated namespace */
53 ns_ops = PROC_I(inode)->ns_ops; 53 ns_ops = PROC_I(inode)->ns_ops;
54 ns = PROC_I(inode)->ns; 54 ns = PROC_I(inode)->ns;
55 if (ns_ops && ns) 55 if (ns_ops && ns)
56 ns_ops->put(ns); 56 ns_ops->put(ns);
57 } 57 }
58 58
59 static struct kmem_cache * proc_inode_cachep; 59 static struct kmem_cache * proc_inode_cachep;
60 60
61 static struct inode *proc_alloc_inode(struct super_block *sb) 61 static struct inode *proc_alloc_inode(struct super_block *sb)
62 { 62 {
63 struct proc_inode *ei; 63 struct proc_inode *ei;
64 struct inode *inode; 64 struct inode *inode;
65 65
66 ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL); 66 ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL);
67 if (!ei) 67 if (!ei)
68 return NULL; 68 return NULL;
69 ei->pid = NULL; 69 ei->pid = NULL;
70 ei->fd = 0; 70 ei->fd = 0;
71 ei->op.proc_get_link = NULL; 71 ei->op.proc_get_link = NULL;
72 ei->pde = NULL; 72 ei->pde = NULL;
73 ei->sysctl = NULL; 73 ei->sysctl = NULL;
74 ei->sysctl_entry = NULL; 74 ei->sysctl_entry = NULL;
75 ei->ns = NULL; 75 ei->ns = NULL;
76 ei->ns_ops = NULL; 76 ei->ns_ops = NULL;
77 inode = &ei->vfs_inode; 77 inode = &ei->vfs_inode;
78 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 78 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
79 return inode; 79 return inode;
80 } 80 }
81 81
82 static void proc_i_callback(struct rcu_head *head) 82 static void proc_i_callback(struct rcu_head *head)
83 { 83 {
84 struct inode *inode = container_of(head, struct inode, i_rcu); 84 struct inode *inode = container_of(head, struct inode, i_rcu);
85 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 85 kmem_cache_free(proc_inode_cachep, PROC_I(inode));
86 } 86 }
87 87
88 static void proc_destroy_inode(struct inode *inode) 88 static void proc_destroy_inode(struct inode *inode)
89 { 89 {
90 call_rcu(&inode->i_rcu, proc_i_callback); 90 call_rcu(&inode->i_rcu, proc_i_callback);
91 } 91 }
92 92
93 static void init_once(void *foo) 93 static void init_once(void *foo)
94 { 94 {
95 struct proc_inode *ei = (struct proc_inode *) foo; 95 struct proc_inode *ei = (struct proc_inode *) foo;
96 96
97 inode_init_once(&ei->vfs_inode); 97 inode_init_once(&ei->vfs_inode);
98 } 98 }
99 99
100 void __init proc_init_inodecache(void) 100 void __init proc_init_inodecache(void)
101 { 101 {
102 proc_inode_cachep = kmem_cache_create("proc_inode_cache", 102 proc_inode_cachep = kmem_cache_create("proc_inode_cache",
103 sizeof(struct proc_inode), 103 sizeof(struct proc_inode),
104 0, (SLAB_RECLAIM_ACCOUNT| 104 0, (SLAB_RECLAIM_ACCOUNT|
105 SLAB_MEM_SPREAD|SLAB_PANIC), 105 SLAB_MEM_SPREAD|SLAB_PANIC),
106 init_once); 106 init_once);
107 } 107 }
108 108
109 static int proc_show_options(struct seq_file *seq, struct dentry *root) 109 static int proc_show_options(struct seq_file *seq, struct dentry *root)
110 { 110 {
111 struct super_block *sb = root->d_sb; 111 struct super_block *sb = root->d_sb;
112 struct pid_namespace *pid = sb->s_fs_info; 112 struct pid_namespace *pid = sb->s_fs_info;
113 113
114 if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID)) 114 if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID))
115 seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid)); 115 seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid));
116 if (pid->hide_pid != 0) 116 if (pid->hide_pid != 0)
117 seq_printf(seq, ",hidepid=%u", pid->hide_pid); 117 seq_printf(seq, ",hidepid=%u", pid->hide_pid);
118 118
119 return 0; 119 return 0;
120 } 120 }
121 121
122 static const struct super_operations proc_sops = { 122 static const struct super_operations proc_sops = {
123 .alloc_inode = proc_alloc_inode, 123 .alloc_inode = proc_alloc_inode,
124 .destroy_inode = proc_destroy_inode, 124 .destroy_inode = proc_destroy_inode,
125 .drop_inode = generic_delete_inode, 125 .drop_inode = generic_delete_inode,
126 .evict_inode = proc_evict_inode, 126 .evict_inode = proc_evict_inode,
127 .statfs = simple_statfs, 127 .statfs = simple_statfs,
128 .remount_fs = proc_remount, 128 .remount_fs = proc_remount,
129 .show_options = proc_show_options, 129 .show_options = proc_show_options,
130 }; 130 };
131 131
132 enum {BIAS = -1U<<31}; 132 enum {BIAS = -1U<<31};
133 133
134 static inline int use_pde(struct proc_dir_entry *pde) 134 static inline int use_pde(struct proc_dir_entry *pde)
135 { 135 {
136 int res = 1; 136 int res = 1;
137 spin_lock(&pde->pde_unload_lock); 137 spin_lock(&pde->pde_unload_lock);
138 if (unlikely(pde->pde_users < 0)) 138 if (unlikely(pde->pde_users < 0))
139 res = 0; 139 res = 0;
140 else 140 else
141 pde->pde_users++; 141 pde->pde_users++;
142 spin_unlock(&pde->pde_unload_lock); 142 spin_unlock(&pde->pde_unload_lock);
143 return res; 143 return res;
144 } 144 }
145 145
146 static void __pde_users_dec(struct proc_dir_entry *pde) 146 static void __pde_users_dec(struct proc_dir_entry *pde)
147 { 147 {
148 if (--pde->pde_users == BIAS) 148 if (--pde->pde_users == BIAS)
149 complete(pde->pde_unload_completion); 149 complete(pde->pde_unload_completion);
150 } 150 }
151 151
152 static void unuse_pde(struct proc_dir_entry *pde) 152 static void unuse_pde(struct proc_dir_entry *pde)
153 { 153 {
154 spin_lock(&pde->pde_unload_lock); 154 spin_lock(&pde->pde_unload_lock);
155 __pde_users_dec(pde); 155 __pde_users_dec(pde);
156 spin_unlock(&pde->pde_unload_lock); 156 spin_unlock(&pde->pde_unload_lock);
157 } 157 }
158 158
159 /* pde is locked */
160 static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
161 {
162 pdeo->count++;
163 if (!mutex_trylock(&pdeo->mutex)) {
164 /* somebody else is doing that, just wait */
165 spin_unlock(&pde->pde_unload_lock);
166 mutex_lock(&pdeo->mutex);
167 spin_lock(&pde->pde_unload_lock);
168 WARN_ON(!list_empty(&pdeo->lh));
169 } else {
170 struct file *file;
171 spin_unlock(&pde->pde_unload_lock);
172 file = pdeo->file;
173 pde->proc_fops->release(file_inode(file), file);
174 spin_lock(&pde->pde_unload_lock);
175 list_del_init(&pdeo->lh);
176 }
177 mutex_unlock(&pdeo->mutex);
178 if (!--pdeo->count)
179 kfree(pdeo);
180 }
181
159 void proc_entry_rundown(struct proc_dir_entry *de) 182 void proc_entry_rundown(struct proc_dir_entry *de)
160 { 183 {
161 spin_lock(&de->pde_unload_lock); 184 spin_lock(&de->pde_unload_lock);
162 de->pde_users += BIAS; 185 de->pde_users += BIAS;
163 /* Wait until all existing callers into module are done. */ 186 /* Wait until all existing callers into module are done. */
164 if (de->pde_users != BIAS) { 187 if (de->pde_users != BIAS) {
165 DECLARE_COMPLETION_ONSTACK(c); 188 DECLARE_COMPLETION_ONSTACK(c);
166 de->pde_unload_completion = &c; 189 de->pde_unload_completion = &c;
167 spin_unlock(&de->pde_unload_lock); 190 spin_unlock(&de->pde_unload_lock);
168 191
169 wait_for_completion(de->pde_unload_completion); 192 wait_for_completion(de->pde_unload_completion);
170 193
171 spin_lock(&de->pde_unload_lock); 194 spin_lock(&de->pde_unload_lock);
172 } 195 }
173 196
174 while (!list_empty(&de->pde_openers)) { 197 while (!list_empty(&de->pde_openers)) {
175 struct pde_opener *pdeo; 198 struct pde_opener *pdeo;
176 struct file *file;
177
178 pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); 199 pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
179 list_del(&pdeo->lh); 200 close_pdeo(de, pdeo);
180 spin_unlock(&de->pde_unload_lock);
181 file = pdeo->file;
182 de->proc_fops->release(file_inode(file), file);
183 kfree(pdeo);
184 spin_lock(&de->pde_unload_lock);
185 } 201 }
186 spin_unlock(&de->pde_unload_lock); 202 spin_unlock(&de->pde_unload_lock);
187 } 203 }
188 204
189 /* ->read_proc() users - legacy crap */ 205 /* ->read_proc() users - legacy crap */
190 static ssize_t 206 static ssize_t
191 proc_file_read(struct file *file, char __user *buf, size_t nbytes, 207 proc_file_read(struct file *file, char __user *buf, size_t nbytes,
192 loff_t *ppos) 208 loff_t *ppos)
193 { 209 {
194 struct proc_dir_entry *pde = PDE(file_inode(file)); 210 struct proc_dir_entry *pde = PDE(file_inode(file));
195 ssize_t rv = -EIO; 211 ssize_t rv = -EIO;
196 if (use_pde(pde)) { 212 if (use_pde(pde)) {
197 rv = __proc_file_read(file, buf, nbytes, ppos); 213 rv = __proc_file_read(file, buf, nbytes, ppos);
198 unuse_pde(pde); 214 unuse_pde(pde);
199 } 215 }
200 return rv; 216 return rv;
201 } 217 }
202 218
203 static loff_t 219 static loff_t
204 proc_file_lseek(struct file *file, loff_t offset, int orig) 220 proc_file_lseek(struct file *file, loff_t offset, int orig)
205 { 221 {
206 loff_t retval = -EINVAL; 222 loff_t retval = -EINVAL;
207 switch (orig) { 223 switch (orig) {
208 case 1: 224 case 1:
209 offset += file->f_pos; 225 offset += file->f_pos;
210 /* fallthrough */ 226 /* fallthrough */
211 case 0: 227 case 0:
212 if (offset < 0 || offset > MAX_NON_LFS) 228 if (offset < 0 || offset > MAX_NON_LFS)
213 break; 229 break;
214 file->f_pos = retval = offset; 230 file->f_pos = retval = offset;
215 } 231 }
216 return retval; 232 return retval;
217 } 233 }
218 234
219 const struct file_operations proc_file_operations = { 235 const struct file_operations proc_file_operations = {
220 .llseek = proc_file_lseek, 236 .llseek = proc_file_lseek,
221 .read = proc_file_read, 237 .read = proc_file_read,
222 }; 238 };
223 239
224 static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) 240 static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
225 { 241 {
226 struct proc_dir_entry *pde = PDE(file_inode(file)); 242 struct proc_dir_entry *pde = PDE(file_inode(file));
227 loff_t rv = -EINVAL; 243 loff_t rv = -EINVAL;
228 if (use_pde(pde)) { 244 if (use_pde(pde)) {
229 loff_t (*llseek)(struct file *, loff_t, int); 245 loff_t (*llseek)(struct file *, loff_t, int);
230 llseek = pde->proc_fops->llseek; 246 llseek = pde->proc_fops->llseek;
231 if (!llseek) 247 if (!llseek)
232 llseek = default_llseek; 248 llseek = default_llseek;
233 rv = llseek(file, offset, whence); 249 rv = llseek(file, offset, whence);
234 unuse_pde(pde); 250 unuse_pde(pde);
235 } 251 }
236 return rv; 252 return rv;
237 } 253 }
238 254
239 static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) 255 static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
240 { 256 {
241 ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); 257 ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
242 struct proc_dir_entry *pde = PDE(file_inode(file)); 258 struct proc_dir_entry *pde = PDE(file_inode(file));
243 ssize_t rv = -EIO; 259 ssize_t rv = -EIO;
244 if (use_pde(pde)) { 260 if (use_pde(pde)) {
245 read = pde->proc_fops->read; 261 read = pde->proc_fops->read;
246 if (read) 262 if (read)
247 rv = read(file, buf, count, ppos); 263 rv = read(file, buf, count, ppos);
248 unuse_pde(pde); 264 unuse_pde(pde);
249 } 265 }
250 return rv; 266 return rv;
251 } 267 }
252 268
253 static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) 269 static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
254 { 270 {
255 ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); 271 ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
256 struct proc_dir_entry *pde = PDE(file_inode(file)); 272 struct proc_dir_entry *pde = PDE(file_inode(file));
257 ssize_t rv = -EIO; 273 ssize_t rv = -EIO;
258 if (use_pde(pde)) { 274 if (use_pde(pde)) {
259 write = pde->proc_fops->write; 275 write = pde->proc_fops->write;
260 if (write) 276 if (write)
261 rv = write(file, buf, count, ppos); 277 rv = write(file, buf, count, ppos);
262 unuse_pde(pde); 278 unuse_pde(pde);
263 } 279 }
264 return rv; 280 return rv;
265 } 281 }
266 282
267 static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts) 283 static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)
268 { 284 {
269 struct proc_dir_entry *pde = PDE(file_inode(file)); 285 struct proc_dir_entry *pde = PDE(file_inode(file));
270 unsigned int rv = DEFAULT_POLLMASK; 286 unsigned int rv = DEFAULT_POLLMASK;
271 unsigned int (*poll)(struct file *, struct poll_table_struct *); 287 unsigned int (*poll)(struct file *, struct poll_table_struct *);
272 if (use_pde(pde)) { 288 if (use_pde(pde)) {
273 poll = pde->proc_fops->poll; 289 poll = pde->proc_fops->poll;
274 if (poll) 290 if (poll)
275 rv = poll(file, pts); 291 rv = poll(file, pts);
276 unuse_pde(pde); 292 unuse_pde(pde);
277 } 293 }
278 return rv; 294 return rv;
279 } 295 }
280 296
281 static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 297 static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
282 { 298 {
283 struct proc_dir_entry *pde = PDE(file_inode(file)); 299 struct proc_dir_entry *pde = PDE(file_inode(file));
284 long rv = -ENOTTY; 300 long rv = -ENOTTY;
285 long (*ioctl)(struct file *, unsigned int, unsigned long); 301 long (*ioctl)(struct file *, unsigned int, unsigned long);
286 if (use_pde(pde)) { 302 if (use_pde(pde)) {
287 ioctl = pde->proc_fops->unlocked_ioctl; 303 ioctl = pde->proc_fops->unlocked_ioctl;
288 if (ioctl) 304 if (ioctl)
289 rv = ioctl(file, cmd, arg); 305 rv = ioctl(file, cmd, arg);
290 unuse_pde(pde); 306 unuse_pde(pde);
291 } 307 }
292 return rv; 308 return rv;
293 } 309 }
294 310
295 #ifdef CONFIG_COMPAT 311 #ifdef CONFIG_COMPAT
296 static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 312 static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
297 { 313 {
298 struct proc_dir_entry *pde = PDE(file_inode(file)); 314 struct proc_dir_entry *pde = PDE(file_inode(file));
299 long rv = -ENOTTY; 315 long rv = -ENOTTY;
300 long (*compat_ioctl)(struct file *, unsigned int, unsigned long); 316 long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
301 if (use_pde(pde)) { 317 if (use_pde(pde)) {
302 compat_ioctl = pde->proc_fops->compat_ioctl; 318 compat_ioctl = pde->proc_fops->compat_ioctl;
303 if (compat_ioctl) 319 if (compat_ioctl)
304 rv = compat_ioctl(file, cmd, arg); 320 rv = compat_ioctl(file, cmd, arg);
305 unuse_pde(pde); 321 unuse_pde(pde);
306 } 322 }
307 return rv; 323 return rv;
308 } 324 }
309 #endif 325 #endif
310 326
311 static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) 327 static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
312 { 328 {
313 struct proc_dir_entry *pde = PDE(file_inode(file)); 329 struct proc_dir_entry *pde = PDE(file_inode(file));
314 int rv = -EIO; 330 int rv = -EIO;
315 int (*mmap)(struct file *, struct vm_area_struct *); 331 int (*mmap)(struct file *, struct vm_area_struct *);
316 if (use_pde(pde)) { 332 if (use_pde(pde)) {
317 mmap = pde->proc_fops->mmap; 333 mmap = pde->proc_fops->mmap;
318 if (mmap) 334 if (mmap)
319 rv = mmap(file, vma); 335 rv = mmap(file, vma);
320 unuse_pde(pde); 336 unuse_pde(pde);
321 } 337 }
322 return rv; 338 return rv;
323 } 339 }
324 340
325 static int proc_reg_open(struct inode *inode, struct file *file) 341 static int proc_reg_open(struct inode *inode, struct file *file)
326 { 342 {
327 struct proc_dir_entry *pde = PDE(inode); 343 struct proc_dir_entry *pde = PDE(inode);
328 int rv = 0; 344 int rv = 0;
329 int (*open)(struct inode *, struct file *); 345 int (*open)(struct inode *, struct file *);
330 int (*release)(struct inode *, struct file *); 346 int (*release)(struct inode *, struct file *);
331 struct pde_opener *pdeo; 347 struct pde_opener *pdeo;
332 348
333 /* 349 /*
334 * What for, you ask? Well, we can have open, rmmod, remove_proc_entry 350 * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
335 * sequence. ->release won't be called because ->proc_fops will be 351 * sequence. ->release won't be called because ->proc_fops will be
336 * cleared. Depending on complexity of ->release, consequences vary. 352 * cleared. Depending on complexity of ->release, consequences vary.
337 * 353 *
338 * We can't wait for mercy when close will be done for real, it's 354 * We can't wait for mercy when close will be done for real, it's
339 * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release 355 * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
340 * by hand in remove_proc_entry(). For this, save opener's credentials 356 * by hand in remove_proc_entry(). For this, save opener's credentials
341 * for later. 357 * for later.
342 */ 358 */
343 pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL); 359 pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
344 if (!pdeo) 360 if (!pdeo)
345 return -ENOMEM; 361 return -ENOMEM;
346 362
347 if (!use_pde(pde)) { 363 if (!use_pde(pde)) {
348 kfree(pdeo); 364 kfree(pdeo);
349 return -ENOENT; 365 return -ENOENT;
350 } 366 }
351 open = pde->proc_fops->open; 367 open = pde->proc_fops->open;
352 release = pde->proc_fops->release; 368 release = pde->proc_fops->release;
353 369
354 if (open) 370 if (open)
355 rv = open(inode, file); 371 rv = open(inode, file);
356 372
357 spin_lock(&pde->pde_unload_lock); 373 spin_lock(&pde->pde_unload_lock);
358 if (rv == 0 && release) { 374 if (rv == 0 && release) {
359 /* To know what to release. */ 375 /* To know what to release. */
376 mutex_init(&pdeo->mutex);
377 pdeo->count = 0;
360 pdeo->file = file; 378 pdeo->file = file;
361 /* Strictly for "too late" ->release in proc_reg_release(). */ 379 /* Strictly for "too late" ->release in proc_reg_release(). */
362 list_add(&pdeo->lh, &pde->pde_openers); 380 list_add(&pdeo->lh, &pde->pde_openers);
363 } else 381 } else
364 kfree(pdeo); 382 kfree(pdeo);
365 __pde_users_dec(pde); 383 __pde_users_dec(pde);
366 spin_unlock(&pde->pde_unload_lock); 384 spin_unlock(&pde->pde_unload_lock);
367 return rv; 385 return rv;
368 } 386 }
369 387
370 static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
371 struct file *file)
372 {
373 struct pde_opener *pdeo;
374
375 list_for_each_entry(pdeo, &pde->pde_openers, lh) {
376 if (pdeo->file == file)
377 return pdeo;
378 }
379 return NULL;
380 }
381
382 static int proc_reg_release(struct inode *inode, struct file *file) 388 static int proc_reg_release(struct inode *inode, struct file *file)
383 { 389 {
384 struct proc_dir_entry *pde = PDE(inode); 390 struct proc_dir_entry *pde = PDE(inode);
385 int rv = 0;
386 int (*release)(struct inode *, struct file *);
387 struct pde_opener *pdeo; 391 struct pde_opener *pdeo;
388
389 spin_lock(&pde->pde_unload_lock); 392 spin_lock(&pde->pde_unload_lock);
390 pdeo = find_pde_opener(pde, file); 393 list_for_each_entry(pdeo, &pde->pde_openers, lh) {
391 if (pde->pde_users < 0) { 394 if (pdeo->file == file) {
392 /* 395 close_pdeo(pde, pdeo);
393 * Can't simply exit, __fput() will think that everything is OK, 396 break;
394 * and move on to freeing struct file. remove_proc_entry() will 397 }
395 * find slacker in opener's list and will try to do non-trivial
396 * things with struct file. Therefore, remove opener from list.
397 *
398 * But if opener is removed from list, who will ->release it?
399 */
400 if (pdeo) {
401 list_del(&pdeo->lh);
402 spin_unlock(&pde->pde_unload_lock);
403 rv = pde->proc_fops->release(inode, file);
404 kfree(pdeo);
405 } else
406 spin_unlock(&pde->pde_unload_lock);
407 return rv;
408 } 398 }
409 pde->pde_users++;
410 release = pde->proc_fops->release;
411 if (pdeo) {
412 list_del(&pdeo->lh);
413 kfree(pdeo);
414 }
415 spin_unlock(&pde->pde_unload_lock); 399 spin_unlock(&pde->pde_unload_lock);
416 400 return 0;
417 if (release)
418 rv = release(inode, file);
419
420 unuse_pde(pde);
421 return rv;
422 } 401 }
423 402
424 static const struct file_operations proc_reg_file_ops = { 403 static const struct file_operations proc_reg_file_ops = {
425 .llseek = proc_reg_llseek, 404 .llseek = proc_reg_llseek,
426 .read = proc_reg_read, 405 .read = proc_reg_read,
427 .write = proc_reg_write, 406 .write = proc_reg_write,
428 .poll = proc_reg_poll, 407 .poll = proc_reg_poll,
429 .unlocked_ioctl = proc_reg_unlocked_ioctl, 408 .unlocked_ioctl = proc_reg_unlocked_ioctl,
430 #ifdef CONFIG_COMPAT 409 #ifdef CONFIG_COMPAT
431 .compat_ioctl = proc_reg_compat_ioctl, 410 .compat_ioctl = proc_reg_compat_ioctl,
432 #endif 411 #endif
433 .mmap = proc_reg_mmap, 412 .mmap = proc_reg_mmap,
434 .open = proc_reg_open, 413 .open = proc_reg_open,
435 .release = proc_reg_release, 414 .release = proc_reg_release,
436 }; 415 };
437 416
438 #ifdef CONFIG_COMPAT 417 #ifdef CONFIG_COMPAT
439 static const struct file_operations proc_reg_file_ops_no_compat = { 418 static const struct file_operations proc_reg_file_ops_no_compat = {
440 .llseek = proc_reg_llseek, 419 .llseek = proc_reg_llseek,
441 .read = proc_reg_read, 420 .read = proc_reg_read,
442 .write = proc_reg_write, 421 .write = proc_reg_write,
443 .poll = proc_reg_poll, 422 .poll = proc_reg_poll,
444 .unlocked_ioctl = proc_reg_unlocked_ioctl, 423 .unlocked_ioctl = proc_reg_unlocked_ioctl,
445 .mmap = proc_reg_mmap, 424 .mmap = proc_reg_mmap,
446 .open = proc_reg_open, 425 .open = proc_reg_open,
447 .release = proc_reg_release, 426 .release = proc_reg_release,
448 }; 427 };
449 #endif 428 #endif
450 429
451 struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) 430 struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
452 { 431 {
453 struct inode *inode = new_inode_pseudo(sb); 432 struct inode *inode = new_inode_pseudo(sb);
454 433
455 if (inode) { 434 if (inode) {
456 inode->i_ino = de->low_ino; 435 inode->i_ino = de->low_ino;
457 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 436 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
458 PROC_I(inode)->pde = de; 437 PROC_I(inode)->pde = de;
459 438
460 if (de->mode) { 439 if (de->mode) {
461 inode->i_mode = de->mode; 440 inode->i_mode = de->mode;
462 inode->i_uid = de->uid; 441 inode->i_uid = de->uid;
463 inode->i_gid = de->gid; 442 inode->i_gid = de->gid;
464 } 443 }
465 if (de->size) 444 if (de->size)
466 inode->i_size = de->size; 445 inode->i_size = de->size;
467 if (de->nlink) 446 if (de->nlink)
468 set_nlink(inode, de->nlink); 447 set_nlink(inode, de->nlink);
469 WARN_ON(!de->proc_iops); 448 WARN_ON(!de->proc_iops);
470 inode->i_op = de->proc_iops; 449 inode->i_op = de->proc_iops;
471 if (de->proc_fops) { 450 if (de->proc_fops) {
472 if (S_ISREG(inode->i_mode)) { 451 if (S_ISREG(inode->i_mode)) {
473 #ifdef CONFIG_COMPAT 452 #ifdef CONFIG_COMPAT
474 if (!de->proc_fops->compat_ioctl) 453 if (!de->proc_fops->compat_ioctl)
475 inode->i_fop = 454 inode->i_fop =
476 &proc_reg_file_ops_no_compat; 455 &proc_reg_file_ops_no_compat;
477 else 456 else
478 #endif 457 #endif
479 inode->i_fop = &proc_reg_file_ops; 458 inode->i_fop = &proc_reg_file_ops;
480 } else { 459 } else {
481 inode->i_fop = de->proc_fops; 460 inode->i_fop = de->proc_fops;
482 } 461 }
483 } 462 }
484 } else 463 } else
485 pde_put(de); 464 pde_put(de);
486 return inode; 465 return inode;
487 } 466 }
488 467
489 int proc_fill_super(struct super_block *s) 468 int proc_fill_super(struct super_block *s)
490 { 469 {
1 /* internal.h: internal procfs definitions 1 /* internal.h: internal procfs definitions
2 * 2 *
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12 #include <linux/sched.h> 12 #include <linux/sched.h>
13 #include <linux/proc_fs.h> 13 #include <linux/proc_fs.h>
14 #include <linux/binfmts.h> 14 #include <linux/binfmts.h>
15 struct ctl_table_header; 15 struct ctl_table_header;
16 struct mempolicy; 16 struct mempolicy;
17 17
18 extern struct proc_dir_entry proc_root; 18 extern struct proc_dir_entry proc_root;
19 extern void proc_self_init(void); 19 extern void proc_self_init(void);
20 #ifdef CONFIG_PROC_SYSCTL 20 #ifdef CONFIG_PROC_SYSCTL
21 extern int proc_sys_init(void); 21 extern int proc_sys_init(void);
22 extern void sysctl_head_put(struct ctl_table_header *head); 22 extern void sysctl_head_put(struct ctl_table_header *head);
23 #else 23 #else
24 static inline void proc_sys_init(void) { } 24 static inline void proc_sys_init(void) { }
25 static inline void sysctl_head_put(struct ctl_table_header *head) { } 25 static inline void sysctl_head_put(struct ctl_table_header *head) { }
26 #endif 26 #endif
27 #ifdef CONFIG_NET 27 #ifdef CONFIG_NET
28 extern int proc_net_init(void); 28 extern int proc_net_init(void);
29 #else 29 #else
30 static inline int proc_net_init(void) { return 0; } 30 static inline int proc_net_init(void) { return 0; }
31 #endif 31 #endif
32 32
33 struct vmalloc_info { 33 struct vmalloc_info {
34 unsigned long used; 34 unsigned long used;
35 unsigned long largest_chunk; 35 unsigned long largest_chunk;
36 }; 36 };
37 37
38 #ifdef CONFIG_MMU 38 #ifdef CONFIG_MMU
39 #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) 39 #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
40 extern void get_vmalloc_info(struct vmalloc_info *vmi); 40 extern void get_vmalloc_info(struct vmalloc_info *vmi);
41 #else 41 #else
42 42
43 #define VMALLOC_TOTAL 0UL 43 #define VMALLOC_TOTAL 0UL
44 #define get_vmalloc_info(vmi) \ 44 #define get_vmalloc_info(vmi) \
45 do { \ 45 do { \
46 (vmi)->used = 0; \ 46 (vmi)->used = 0; \
47 (vmi)->largest_chunk = 0; \ 47 (vmi)->largest_chunk = 0; \
48 } while(0) 48 } while(0)
49 #endif 49 #endif
50 50
51 extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, 51 extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
52 struct pid *pid, struct task_struct *task); 52 struct pid *pid, struct task_struct *task);
53 extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, 53 extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
54 struct pid *pid, struct task_struct *task); 54 struct pid *pid, struct task_struct *task);
55 extern int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, 55 extern int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
56 struct pid *pid, struct task_struct *task); 56 struct pid *pid, struct task_struct *task);
57 extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, 57 extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
58 struct pid *pid, struct task_struct *task); 58 struct pid *pid, struct task_struct *task);
59 extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); 59 extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
60 60
61 extern const struct file_operations proc_tid_children_operations; 61 extern const struct file_operations proc_tid_children_operations;
62 extern const struct file_operations proc_pid_maps_operations; 62 extern const struct file_operations proc_pid_maps_operations;
63 extern const struct file_operations proc_tid_maps_operations; 63 extern const struct file_operations proc_tid_maps_operations;
64 extern const struct file_operations proc_pid_numa_maps_operations; 64 extern const struct file_operations proc_pid_numa_maps_operations;
65 extern const struct file_operations proc_tid_numa_maps_operations; 65 extern const struct file_operations proc_tid_numa_maps_operations;
66 extern const struct file_operations proc_pid_smaps_operations; 66 extern const struct file_operations proc_pid_smaps_operations;
67 extern const struct file_operations proc_tid_smaps_operations; 67 extern const struct file_operations proc_tid_smaps_operations;
68 extern const struct file_operations proc_clear_refs_operations; 68 extern const struct file_operations proc_clear_refs_operations;
69 extern const struct file_operations proc_pagemap_operations; 69 extern const struct file_operations proc_pagemap_operations;
70 extern const struct file_operations proc_net_operations; 70 extern const struct file_operations proc_net_operations;
71 extern const struct inode_operations proc_net_inode_operations; 71 extern const struct inode_operations proc_net_inode_operations;
72 extern const struct inode_operations proc_pid_link_inode_operations; 72 extern const struct inode_operations proc_pid_link_inode_operations;
73 73
74 struct proc_maps_private { 74 struct proc_maps_private {
75 struct pid *pid; 75 struct pid *pid;
76 struct task_struct *task; 76 struct task_struct *task;
77 #ifdef CONFIG_MMU 77 #ifdef CONFIG_MMU
78 struct vm_area_struct *tail_vma; 78 struct vm_area_struct *tail_vma;
79 #endif 79 #endif
80 #ifdef CONFIG_NUMA 80 #ifdef CONFIG_NUMA
81 struct mempolicy *task_mempolicy; 81 struct mempolicy *task_mempolicy;
82 #endif 82 #endif
83 }; 83 };
84 84
85 void proc_init_inodecache(void); 85 void proc_init_inodecache(void);
86 86
87 static inline struct pid *proc_pid(struct inode *inode) 87 static inline struct pid *proc_pid(struct inode *inode)
88 { 88 {
89 return PROC_I(inode)->pid; 89 return PROC_I(inode)->pid;
90 } 90 }
91 91
92 static inline struct task_struct *get_proc_task(struct inode *inode) 92 static inline struct task_struct *get_proc_task(struct inode *inode)
93 { 93 {
94 return get_pid_task(proc_pid(inode), PIDTYPE_PID); 94 return get_pid_task(proc_pid(inode), PIDTYPE_PID);
95 } 95 }
96 96
97 static inline int proc_fd(struct inode *inode) 97 static inline int proc_fd(struct inode *inode)
98 { 98 {
99 return PROC_I(inode)->fd; 99 return PROC_I(inode)->fd;
100 } 100 }
101 101
102 static inline int task_dumpable(struct task_struct *task) 102 static inline int task_dumpable(struct task_struct *task)
103 { 103 {
104 int dumpable = 0; 104 int dumpable = 0;
105 struct mm_struct *mm; 105 struct mm_struct *mm;
106 106
107 task_lock(task); 107 task_lock(task);
108 mm = task->mm; 108 mm = task->mm;
109 if (mm) 109 if (mm)
110 dumpable = get_dumpable(mm); 110 dumpable = get_dumpable(mm);
111 task_unlock(task); 111 task_unlock(task);
112 if (dumpable == SUID_DUMP_USER) 112 if (dumpable == SUID_DUMP_USER)
113 return 1; 113 return 1;
114 return 0; 114 return 0;
115 } 115 }
116 116
117 static inline int pid_delete_dentry(const struct dentry * dentry) 117 static inline int pid_delete_dentry(const struct dentry * dentry)
118 { 118 {
119 /* Is the task we represent dead? 119 /* Is the task we represent dead?
120 * If so, then don't put the dentry on the lru list, 120 * If so, then don't put the dentry on the lru list,
121 * kill it immediately. 121 * kill it immediately.
122 */ 122 */
123 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; 123 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
124 } 124 }
125 125
126 static inline unsigned name_to_int(struct dentry *dentry) 126 static inline unsigned name_to_int(struct dentry *dentry)
127 { 127 {
128 const char *name = dentry->d_name.name; 128 const char *name = dentry->d_name.name;
129 int len = dentry->d_name.len; 129 int len = dentry->d_name.len;
130 unsigned n = 0; 130 unsigned n = 0;
131 131
132 if (len > 1 && *name == '0') 132 if (len > 1 && *name == '0')
133 goto out; 133 goto out;
134 while (len-- > 0) { 134 while (len-- > 0) {
135 unsigned c = *name++ - '0'; 135 unsigned c = *name++ - '0';
136 if (c > 9) 136 if (c > 9)
137 goto out; 137 goto out;
138 if (n >= (~0U-9)/10) 138 if (n >= (~0U-9)/10)
139 goto out; 139 goto out;
140 n *= 10; 140 n *= 10;
141 n += c; 141 n += c;
142 } 142 }
143 return n; 143 return n;
144 out: 144 out:
145 return ~0U; 145 return ~0U;
146 } 146 }
147 147
148 struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, 148 struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
149 struct dentry *dentry); 149 struct dentry *dentry);
150 int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, 150 int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
151 filldir_t filldir); 151 filldir_t filldir);
152 152
153 struct pde_opener { 153 struct pde_opener {
154 struct file *file; 154 struct file *file;
155 struct list_head lh; 155 struct list_head lh;
156 int count; /* number of threads in close_pdeo() */
157 struct mutex mutex;
156 }; 158 };
157 159
158 ssize_t __proc_file_read(struct file *, char __user *, size_t, loff_t *); 160 ssize_t __proc_file_read(struct file *, char __user *, size_t, loff_t *);
159 extern const struct file_operations proc_file_operations; 161 extern const struct file_operations proc_file_operations;
160 void proc_entry_rundown(struct proc_dir_entry *); 162 void proc_entry_rundown(struct proc_dir_entry *);
161 163
162 extern spinlock_t proc_subdir_lock; 164 extern spinlock_t proc_subdir_lock;
163 165
164 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int); 166 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int);
165 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); 167 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
166 unsigned long task_vsize(struct mm_struct *); 168 unsigned long task_vsize(struct mm_struct *);
167 unsigned long task_statm(struct mm_struct *, 169 unsigned long task_statm(struct mm_struct *,
168 unsigned long *, unsigned long *, unsigned long *, unsigned long *); 170 unsigned long *, unsigned long *, unsigned long *, unsigned long *);
169 void task_mem(struct seq_file *, struct mm_struct *); 171 void task_mem(struct seq_file *, struct mm_struct *);
170 172
171 static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) 173 static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
172 { 174 {
173 atomic_inc(&pde->count); 175 atomic_inc(&pde->count);
174 return pde; 176 return pde;
175 } 177 }
176 void pde_put(struct proc_dir_entry *pde); 178 void pde_put(struct proc_dir_entry *pde);
177 179
178 int proc_fill_super(struct super_block *); 180 int proc_fill_super(struct super_block *);
179 struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); 181 struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
180 int proc_remount(struct super_block *sb, int *flags, char *data); 182 int proc_remount(struct super_block *sb, int *flags, char *data);
181 183
182 /* 184 /*
183 * These are generic /proc routines that use the internal 185 * These are generic /proc routines that use the internal
184 * "struct proc_dir_entry" tree to traverse the filesystem. 186 * "struct proc_dir_entry" tree to traverse the filesystem.
185 * 187 *
186 * The /proc root directory has extended versions to take care 188 * The /proc root directory has extended versions to take care
187 * of the /proc/<pid> subdirectories. 189 * of the /proc/<pid> subdirectories.
188 */ 190 */
189 int proc_readdir(struct file *, void *, filldir_t); 191 int proc_readdir(struct file *, void *, filldir_t);
190 struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); 192 struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
191 193
192 194
193 195
194 /* Lookups */ 196 /* Lookups */
195 typedef struct dentry *instantiate_t(struct inode *, struct dentry *, 197 typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
196 struct task_struct *, const void *); 198 struct task_struct *, const void *);
197 int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 199 int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
198 const char *name, int len, 200 const char *name, int len,
199 instantiate_t instantiate, struct task_struct *task, const void *ptr); 201 instantiate_t instantiate, struct task_struct *task, const void *ptr);
200 int pid_revalidate(struct dentry *dentry, unsigned int flags); 202 int pid_revalidate(struct dentry *dentry, unsigned int flags);
201 struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task); 203 struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
202 extern const struct dentry_operations pid_dentry_operations; 204 extern const struct dentry_operations pid_dentry_operations;
203 int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); 205 int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
204 int proc_setattr(struct dentry *dentry, struct iattr *attr); 206 int proc_setattr(struct dentry *dentry, struct iattr *attr);
205 207
206 extern const struct inode_operations proc_ns_dir_inode_operations; 208 extern const struct inode_operations proc_ns_dir_inode_operations;
207 extern const struct file_operations proc_ns_dir_operations; 209 extern const struct file_operations proc_ns_dir_operations;
208 210
209 extern int proc_setup_self(struct super_block *); 211 extern int proc_setup_self(struct super_block *);
210 212