Commit da5029563a0a026c64821b09e8e7b4fd81d3fe1b

Authored by Nick Piggin
1 parent b7ab39f631

fs: dcache scale d_unhashed

Protect d_unhashed(dentry) condition with d_lock. This means keeping
DCACHE_UNHASHED bit in synch with hash manipulations.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>

Showing 10 changed files with 102 additions and 54 deletions Inline Diff

arch/powerpc/platforms/cell/spufs/inode.c
1 1
2 /* 2 /*
3 * SPU file system 3 * SPU file system
4 * 4 *
5 * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 5 * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
6 * 6 *
7 * Author: Arnd Bergmann <arndb@de.ibm.com> 7 * Author: Arnd Bergmann <arndb@de.ibm.com>
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option) 11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version. 12 * any later version.
13 * 13 *
14 * This program is distributed in the hope that it will be useful, 14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details. 17 * GNU General Public License for more details.
18 * 18 *
19 * You should have received a copy of the GNU General Public License 19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software 20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 */ 22 */
23 23
24 #include <linux/file.h> 24 #include <linux/file.h>
25 #include <linux/fs.h> 25 #include <linux/fs.h>
26 #include <linux/fsnotify.h> 26 #include <linux/fsnotify.h>
27 #include <linux/backing-dev.h> 27 #include <linux/backing-dev.h>
28 #include <linux/init.h> 28 #include <linux/init.h>
29 #include <linux/ioctl.h> 29 #include <linux/ioctl.h>
30 #include <linux/module.h> 30 #include <linux/module.h>
31 #include <linux/mount.h> 31 #include <linux/mount.h>
32 #include <linux/namei.h> 32 #include <linux/namei.h>
33 #include <linux/pagemap.h> 33 #include <linux/pagemap.h>
34 #include <linux/poll.h> 34 #include <linux/poll.h>
35 #include <linux/slab.h> 35 #include <linux/slab.h>
36 #include <linux/parser.h> 36 #include <linux/parser.h>
37 37
38 #include <asm/prom.h> 38 #include <asm/prom.h>
39 #include <asm/spu.h> 39 #include <asm/spu.h>
40 #include <asm/spu_priv1.h> 40 #include <asm/spu_priv1.h>
41 #include <asm/uaccess.h> 41 #include <asm/uaccess.h>
42 42
43 #include "spufs.h" 43 #include "spufs.h"
44 44
45 struct spufs_sb_info { 45 struct spufs_sb_info {
46 int debug; 46 int debug;
47 }; 47 };
48 48
49 static struct kmem_cache *spufs_inode_cache; 49 static struct kmem_cache *spufs_inode_cache;
50 char *isolated_loader; 50 char *isolated_loader;
51 static int isolated_loader_size; 51 static int isolated_loader_size;
52 52
53 static struct spufs_sb_info *spufs_get_sb_info(struct super_block *sb) 53 static struct spufs_sb_info *spufs_get_sb_info(struct super_block *sb)
54 { 54 {
55 return sb->s_fs_info; 55 return sb->s_fs_info;
56 } 56 }
57 57
58 static struct inode * 58 static struct inode *
59 spufs_alloc_inode(struct super_block *sb) 59 spufs_alloc_inode(struct super_block *sb)
60 { 60 {
61 struct spufs_inode_info *ei; 61 struct spufs_inode_info *ei;
62 62
63 ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL); 63 ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL);
64 if (!ei) 64 if (!ei)
65 return NULL; 65 return NULL;
66 66
67 ei->i_gang = NULL; 67 ei->i_gang = NULL;
68 ei->i_ctx = NULL; 68 ei->i_ctx = NULL;
69 ei->i_openers = 0; 69 ei->i_openers = 0;
70 70
71 return &ei->vfs_inode; 71 return &ei->vfs_inode;
72 } 72 }
73 73
74 static void 74 static void
75 spufs_destroy_inode(struct inode *inode) 75 spufs_destroy_inode(struct inode *inode)
76 { 76 {
77 kmem_cache_free(spufs_inode_cache, SPUFS_I(inode)); 77 kmem_cache_free(spufs_inode_cache, SPUFS_I(inode));
78 } 78 }
79 79
80 static void 80 static void
81 spufs_init_once(void *p) 81 spufs_init_once(void *p)
82 { 82 {
83 struct spufs_inode_info *ei = p; 83 struct spufs_inode_info *ei = p;
84 84
85 inode_init_once(&ei->vfs_inode); 85 inode_init_once(&ei->vfs_inode);
86 } 86 }
87 87
88 static struct inode * 88 static struct inode *
89 spufs_new_inode(struct super_block *sb, int mode) 89 spufs_new_inode(struct super_block *sb, int mode)
90 { 90 {
91 struct inode *inode; 91 struct inode *inode;
92 92
93 inode = new_inode(sb); 93 inode = new_inode(sb);
94 if (!inode) 94 if (!inode)
95 goto out; 95 goto out;
96 96
97 inode->i_mode = mode; 97 inode->i_mode = mode;
98 inode->i_uid = current_fsuid(); 98 inode->i_uid = current_fsuid();
99 inode->i_gid = current_fsgid(); 99 inode->i_gid = current_fsgid();
100 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 100 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
101 out: 101 out:
102 return inode; 102 return inode;
103 } 103 }
104 104
105 static int 105 static int
106 spufs_setattr(struct dentry *dentry, struct iattr *attr) 106 spufs_setattr(struct dentry *dentry, struct iattr *attr)
107 { 107 {
108 struct inode *inode = dentry->d_inode; 108 struct inode *inode = dentry->d_inode;
109 109
110 if ((attr->ia_valid & ATTR_SIZE) && 110 if ((attr->ia_valid & ATTR_SIZE) &&
111 (attr->ia_size != inode->i_size)) 111 (attr->ia_size != inode->i_size))
112 return -EINVAL; 112 return -EINVAL;
113 setattr_copy(inode, attr); 113 setattr_copy(inode, attr);
114 mark_inode_dirty(inode); 114 mark_inode_dirty(inode);
115 return 0; 115 return 0;
116 } 116 }
117 117
118 118
119 static int 119 static int
120 spufs_new_file(struct super_block *sb, struct dentry *dentry, 120 spufs_new_file(struct super_block *sb, struct dentry *dentry,
121 const struct file_operations *fops, int mode, 121 const struct file_operations *fops, int mode,
122 size_t size, struct spu_context *ctx) 122 size_t size, struct spu_context *ctx)
123 { 123 {
124 static const struct inode_operations spufs_file_iops = { 124 static const struct inode_operations spufs_file_iops = {
125 .setattr = spufs_setattr, 125 .setattr = spufs_setattr,
126 }; 126 };
127 struct inode *inode; 127 struct inode *inode;
128 int ret; 128 int ret;
129 129
130 ret = -ENOSPC; 130 ret = -ENOSPC;
131 inode = spufs_new_inode(sb, S_IFREG | mode); 131 inode = spufs_new_inode(sb, S_IFREG | mode);
132 if (!inode) 132 if (!inode)
133 goto out; 133 goto out;
134 134
135 ret = 0; 135 ret = 0;
136 inode->i_op = &spufs_file_iops; 136 inode->i_op = &spufs_file_iops;
137 inode->i_fop = fops; 137 inode->i_fop = fops;
138 inode->i_size = size; 138 inode->i_size = size;
139 inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx); 139 inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx);
140 d_add(dentry, inode); 140 d_add(dentry, inode);
141 out: 141 out:
142 return ret; 142 return ret;
143 } 143 }
144 144
145 static void 145 static void
146 spufs_evict_inode(struct inode *inode) 146 spufs_evict_inode(struct inode *inode)
147 { 147 {
148 struct spufs_inode_info *ei = SPUFS_I(inode); 148 struct spufs_inode_info *ei = SPUFS_I(inode);
149 end_writeback(inode); 149 end_writeback(inode);
150 if (ei->i_ctx) 150 if (ei->i_ctx)
151 put_spu_context(ei->i_ctx); 151 put_spu_context(ei->i_ctx);
152 if (ei->i_gang) 152 if (ei->i_gang)
153 put_spu_gang(ei->i_gang); 153 put_spu_gang(ei->i_gang);
154 } 154 }
155 155
156 static void spufs_prune_dir(struct dentry *dir) 156 static void spufs_prune_dir(struct dentry *dir)
157 { 157 {
158 struct dentry *dentry, *tmp; 158 struct dentry *dentry, *tmp;
159 159
160 mutex_lock(&dir->d_inode->i_mutex); 160 mutex_lock(&dir->d_inode->i_mutex);
161 list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { 161 list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
162 spin_lock(&dcache_lock); 162 spin_lock(&dcache_lock);
163 spin_lock(&dentry->d_lock); 163 spin_lock(&dentry->d_lock);
164 if (!(d_unhashed(dentry)) && dentry->d_inode) { 164 if (!(d_unhashed(dentry)) && dentry->d_inode) {
165 dget_locked_dlock(dentry); 165 dget_locked_dlock(dentry);
166 __d_drop(dentry); 166 __d_drop(dentry);
167 spin_unlock(&dentry->d_lock); 167 spin_unlock(&dentry->d_lock);
168 simple_unlink(dir->d_inode, dentry); 168 simple_unlink(dir->d_inode, dentry);
169 /* XXX: what is dcache_lock protecting here? Other
170 * filesystems (IB, configfs) release dcache_lock
171 * before unlink */
169 spin_unlock(&dcache_lock); 172 spin_unlock(&dcache_lock);
170 dput(dentry); 173 dput(dentry);
171 } else { 174 } else {
172 spin_unlock(&dentry->d_lock); 175 spin_unlock(&dentry->d_lock);
173 spin_unlock(&dcache_lock); 176 spin_unlock(&dcache_lock);
174 } 177 }
175 } 178 }
176 shrink_dcache_parent(dir); 179 shrink_dcache_parent(dir);
177 mutex_unlock(&dir->d_inode->i_mutex); 180 mutex_unlock(&dir->d_inode->i_mutex);
178 } 181 }
179 182
180 /* Caller must hold parent->i_mutex */ 183 /* Caller must hold parent->i_mutex */
181 static int spufs_rmdir(struct inode *parent, struct dentry *dir) 184 static int spufs_rmdir(struct inode *parent, struct dentry *dir)
182 { 185 {
183 /* remove all entries */ 186 /* remove all entries */
184 spufs_prune_dir(dir); 187 spufs_prune_dir(dir);
185 d_drop(dir); 188 d_drop(dir);
186 189
187 return simple_rmdir(parent, dir); 190 return simple_rmdir(parent, dir);
188 } 191 }
189 192
190 static int spufs_fill_dir(struct dentry *dir, 193 static int spufs_fill_dir(struct dentry *dir,
191 const struct spufs_tree_descr *files, int mode, 194 const struct spufs_tree_descr *files, int mode,
192 struct spu_context *ctx) 195 struct spu_context *ctx)
193 { 196 {
194 struct dentry *dentry, *tmp; 197 struct dentry *dentry, *tmp;
195 int ret; 198 int ret;
196 199
197 while (files->name && files->name[0]) { 200 while (files->name && files->name[0]) {
198 ret = -ENOMEM; 201 ret = -ENOMEM;
199 dentry = d_alloc_name(dir, files->name); 202 dentry = d_alloc_name(dir, files->name);
200 if (!dentry) 203 if (!dentry)
201 goto out; 204 goto out;
202 ret = spufs_new_file(dir->d_sb, dentry, files->ops, 205 ret = spufs_new_file(dir->d_sb, dentry, files->ops,
203 files->mode & mode, files->size, ctx); 206 files->mode & mode, files->size, ctx);
204 if (ret) 207 if (ret)
205 goto out; 208 goto out;
206 files++; 209 files++;
207 } 210 }
208 return 0; 211 return 0;
209 out: 212 out:
210 /* 213 /*
211 * remove all children from dir. dir->inode is not set so don't 214 * remove all children from dir. dir->inode is not set so don't
212 * just simply use spufs_prune_dir() and panic afterwards :) 215 * just simply use spufs_prune_dir() and panic afterwards :)
213 * dput() looks like it will do the right thing: 216 * dput() looks like it will do the right thing:
214 * - dec parent's ref counter 217 * - dec parent's ref counter
215 * - remove child from parent's child list 218 * - remove child from parent's child list
216 * - free child's inode if possible 219 * - free child's inode if possible
217 * - free child 220 * - free child
218 */ 221 */
219 list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { 222 list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
220 dput(dentry); 223 dput(dentry);
221 } 224 }
222 225
223 shrink_dcache_parent(dir); 226 shrink_dcache_parent(dir);
224 return ret; 227 return ret;
225 } 228 }
226 229
227 static int spufs_dir_close(struct inode *inode, struct file *file) 230 static int spufs_dir_close(struct inode *inode, struct file *file)
228 { 231 {
229 struct spu_context *ctx; 232 struct spu_context *ctx;
230 struct inode *parent; 233 struct inode *parent;
231 struct dentry *dir; 234 struct dentry *dir;
232 int ret; 235 int ret;
233 236
234 dir = file->f_path.dentry; 237 dir = file->f_path.dentry;
235 parent = dir->d_parent->d_inode; 238 parent = dir->d_parent->d_inode;
236 ctx = SPUFS_I(dir->d_inode)->i_ctx; 239 ctx = SPUFS_I(dir->d_inode)->i_ctx;
237 240
238 mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT); 241 mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
239 ret = spufs_rmdir(parent, dir); 242 ret = spufs_rmdir(parent, dir);
240 mutex_unlock(&parent->i_mutex); 243 mutex_unlock(&parent->i_mutex);
241 WARN_ON(ret); 244 WARN_ON(ret);
242 245
243 /* We have to give up the mm_struct */ 246 /* We have to give up the mm_struct */
244 spu_forget(ctx); 247 spu_forget(ctx);
245 248
246 return dcache_dir_close(inode, file); 249 return dcache_dir_close(inode, file);
247 } 250 }
248 251
249 const struct file_operations spufs_context_fops = { 252 const struct file_operations spufs_context_fops = {
250 .open = dcache_dir_open, 253 .open = dcache_dir_open,
251 .release = spufs_dir_close, 254 .release = spufs_dir_close,
252 .llseek = dcache_dir_lseek, 255 .llseek = dcache_dir_lseek,
253 .read = generic_read_dir, 256 .read = generic_read_dir,
254 .readdir = dcache_readdir, 257 .readdir = dcache_readdir,
255 .fsync = noop_fsync, 258 .fsync = noop_fsync,
256 }; 259 };
257 EXPORT_SYMBOL_GPL(spufs_context_fops); 260 EXPORT_SYMBOL_GPL(spufs_context_fops);
258 261
259 static int 262 static int
260 spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags, 263 spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
261 int mode) 264 int mode)
262 { 265 {
263 int ret; 266 int ret;
264 struct inode *inode; 267 struct inode *inode;
265 struct spu_context *ctx; 268 struct spu_context *ctx;
266 269
267 ret = -ENOSPC; 270 ret = -ENOSPC;
268 inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR); 271 inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
269 if (!inode) 272 if (!inode)
270 goto out; 273 goto out;
271 274
272 if (dir->i_mode & S_ISGID) { 275 if (dir->i_mode & S_ISGID) {
273 inode->i_gid = dir->i_gid; 276 inode->i_gid = dir->i_gid;
274 inode->i_mode &= S_ISGID; 277 inode->i_mode &= S_ISGID;
275 } 278 }
276 ctx = alloc_spu_context(SPUFS_I(dir)->i_gang); /* XXX gang */ 279 ctx = alloc_spu_context(SPUFS_I(dir)->i_gang); /* XXX gang */
277 SPUFS_I(inode)->i_ctx = ctx; 280 SPUFS_I(inode)->i_ctx = ctx;
278 if (!ctx) 281 if (!ctx)
279 goto out_iput; 282 goto out_iput;
280 283
281 ctx->flags = flags; 284 ctx->flags = flags;
282 inode->i_op = &simple_dir_inode_operations; 285 inode->i_op = &simple_dir_inode_operations;
283 inode->i_fop = &simple_dir_operations; 286 inode->i_fop = &simple_dir_operations;
284 if (flags & SPU_CREATE_NOSCHED) 287 if (flags & SPU_CREATE_NOSCHED)
285 ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents, 288 ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents,
286 mode, ctx); 289 mode, ctx);
287 else 290 else
288 ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx); 291 ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx);
289 292
290 if (ret) 293 if (ret)
291 goto out_free_ctx; 294 goto out_free_ctx;
292 295
293 if (spufs_get_sb_info(dir->i_sb)->debug) 296 if (spufs_get_sb_info(dir->i_sb)->debug)
294 ret = spufs_fill_dir(dentry, spufs_dir_debug_contents, 297 ret = spufs_fill_dir(dentry, spufs_dir_debug_contents,
295 mode, ctx); 298 mode, ctx);
296 299
297 if (ret) 300 if (ret)
298 goto out_free_ctx; 301 goto out_free_ctx;
299 302
300 d_instantiate(dentry, inode); 303 d_instantiate(dentry, inode);
301 dget(dentry); 304 dget(dentry);
302 inc_nlink(dir); 305 inc_nlink(dir);
303 inc_nlink(dentry->d_inode); 306 inc_nlink(dentry->d_inode);
304 goto out; 307 goto out;
305 308
306 out_free_ctx: 309 out_free_ctx:
307 spu_forget(ctx); 310 spu_forget(ctx);
308 put_spu_context(ctx); 311 put_spu_context(ctx);
309 out_iput: 312 out_iput:
310 iput(inode); 313 iput(inode);
311 out: 314 out:
312 return ret; 315 return ret;
313 } 316 }
314 317
315 static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt) 318 static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt)
316 { 319 {
317 int ret; 320 int ret;
318 struct file *filp; 321 struct file *filp;
319 322
320 ret = get_unused_fd(); 323 ret = get_unused_fd();
321 if (ret < 0) { 324 if (ret < 0) {
322 dput(dentry); 325 dput(dentry);
323 mntput(mnt); 326 mntput(mnt);
324 goto out; 327 goto out;
325 } 328 }
326 329
327 filp = dentry_open(dentry, mnt, O_RDONLY, current_cred()); 330 filp = dentry_open(dentry, mnt, O_RDONLY, current_cred());
328 if (IS_ERR(filp)) { 331 if (IS_ERR(filp)) {
329 put_unused_fd(ret); 332 put_unused_fd(ret);
330 ret = PTR_ERR(filp); 333 ret = PTR_ERR(filp);
331 goto out; 334 goto out;
332 } 335 }
333 336
334 filp->f_op = &spufs_context_fops; 337 filp->f_op = &spufs_context_fops;
335 fd_install(ret, filp); 338 fd_install(ret, filp);
336 out: 339 out:
337 return ret; 340 return ret;
338 } 341 }
339 342
340 static struct spu_context * 343 static struct spu_context *
341 spufs_assert_affinity(unsigned int flags, struct spu_gang *gang, 344 spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
342 struct file *filp) 345 struct file *filp)
343 { 346 {
344 struct spu_context *tmp, *neighbor, *err; 347 struct spu_context *tmp, *neighbor, *err;
345 int count, node; 348 int count, node;
346 int aff_supp; 349 int aff_supp;
347 350
348 aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next, 351 aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next,
349 struct spu, cbe_list))->aff_list); 352 struct spu, cbe_list))->aff_list);
350 353
351 if (!aff_supp) 354 if (!aff_supp)
352 return ERR_PTR(-EINVAL); 355 return ERR_PTR(-EINVAL);
353 356
354 if (flags & SPU_CREATE_GANG) 357 if (flags & SPU_CREATE_GANG)
355 return ERR_PTR(-EINVAL); 358 return ERR_PTR(-EINVAL);
356 359
357 if (flags & SPU_CREATE_AFFINITY_MEM && 360 if (flags & SPU_CREATE_AFFINITY_MEM &&
358 gang->aff_ref_ctx && 361 gang->aff_ref_ctx &&
359 gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM) 362 gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
360 return ERR_PTR(-EEXIST); 363 return ERR_PTR(-EEXIST);
361 364
362 if (gang->aff_flags & AFF_MERGED) 365 if (gang->aff_flags & AFF_MERGED)
363 return ERR_PTR(-EBUSY); 366 return ERR_PTR(-EBUSY);
364 367
365 neighbor = NULL; 368 neighbor = NULL;
366 if (flags & SPU_CREATE_AFFINITY_SPU) { 369 if (flags & SPU_CREATE_AFFINITY_SPU) {
367 if (!filp || filp->f_op != &spufs_context_fops) 370 if (!filp || filp->f_op != &spufs_context_fops)
368 return ERR_PTR(-EINVAL); 371 return ERR_PTR(-EINVAL);
369 372
370 neighbor = get_spu_context( 373 neighbor = get_spu_context(
371 SPUFS_I(filp->f_dentry->d_inode)->i_ctx); 374 SPUFS_I(filp->f_dentry->d_inode)->i_ctx);
372 375
373 if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) && 376 if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
374 !list_is_last(&neighbor->aff_list, &gang->aff_list_head) && 377 !list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
375 !list_entry(neighbor->aff_list.next, struct spu_context, 378 !list_entry(neighbor->aff_list.next, struct spu_context,
376 aff_list)->aff_head) { 379 aff_list)->aff_head) {
377 err = ERR_PTR(-EEXIST); 380 err = ERR_PTR(-EEXIST);
378 goto out_put_neighbor; 381 goto out_put_neighbor;
379 } 382 }
380 383
381 if (gang != neighbor->gang) { 384 if (gang != neighbor->gang) {
382 err = ERR_PTR(-EINVAL); 385 err = ERR_PTR(-EINVAL);
383 goto out_put_neighbor; 386 goto out_put_neighbor;
384 } 387 }
385 388
386 count = 1; 389 count = 1;
387 list_for_each_entry(tmp, &gang->aff_list_head, aff_list) 390 list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
388 count++; 391 count++;
389 if (list_empty(&neighbor->aff_list)) 392 if (list_empty(&neighbor->aff_list))
390 count++; 393 count++;
391 394
392 for (node = 0; node < MAX_NUMNODES; node++) { 395 for (node = 0; node < MAX_NUMNODES; node++) {
393 if ((cbe_spu_info[node].n_spus - atomic_read( 396 if ((cbe_spu_info[node].n_spus - atomic_read(
394 &cbe_spu_info[node].reserved_spus)) >= count) 397 &cbe_spu_info[node].reserved_spus)) >= count)
395 break; 398 break;
396 } 399 }
397 400
398 if (node == MAX_NUMNODES) { 401 if (node == MAX_NUMNODES) {
399 err = ERR_PTR(-EEXIST); 402 err = ERR_PTR(-EEXIST);
400 goto out_put_neighbor; 403 goto out_put_neighbor;
401 } 404 }
402 } 405 }
403 406
404 return neighbor; 407 return neighbor;
405 408
406 out_put_neighbor: 409 out_put_neighbor:
407 put_spu_context(neighbor); 410 put_spu_context(neighbor);
408 return err; 411 return err;
409 } 412 }
410 413
411 static void 414 static void
412 spufs_set_affinity(unsigned int flags, struct spu_context *ctx, 415 spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
413 struct spu_context *neighbor) 416 struct spu_context *neighbor)
414 { 417 {
415 if (flags & SPU_CREATE_AFFINITY_MEM) 418 if (flags & SPU_CREATE_AFFINITY_MEM)
416 ctx->gang->aff_ref_ctx = ctx; 419 ctx->gang->aff_ref_ctx = ctx;
417 420
418 if (flags & SPU_CREATE_AFFINITY_SPU) { 421 if (flags & SPU_CREATE_AFFINITY_SPU) {
419 if (list_empty(&neighbor->aff_list)) { 422 if (list_empty(&neighbor->aff_list)) {
420 list_add_tail(&neighbor->aff_list, 423 list_add_tail(&neighbor->aff_list,
421 &ctx->gang->aff_list_head); 424 &ctx->gang->aff_list_head);
422 neighbor->aff_head = 1; 425 neighbor->aff_head = 1;
423 } 426 }
424 427
425 if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head) 428 if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
426 || list_entry(neighbor->aff_list.next, struct spu_context, 429 || list_entry(neighbor->aff_list.next, struct spu_context,
427 aff_list)->aff_head) { 430 aff_list)->aff_head) {
428 list_add(&ctx->aff_list, &neighbor->aff_list); 431 list_add(&ctx->aff_list, &neighbor->aff_list);
429 } else { 432 } else {
430 list_add_tail(&ctx->aff_list, &neighbor->aff_list); 433 list_add_tail(&ctx->aff_list, &neighbor->aff_list);
431 if (neighbor->aff_head) { 434 if (neighbor->aff_head) {
432 neighbor->aff_head = 0; 435 neighbor->aff_head = 0;
433 ctx->aff_head = 1; 436 ctx->aff_head = 1;
434 } 437 }
435 } 438 }
436 439
437 if (!ctx->gang->aff_ref_ctx) 440 if (!ctx->gang->aff_ref_ctx)
438 ctx->gang->aff_ref_ctx = ctx; 441 ctx->gang->aff_ref_ctx = ctx;
439 } 442 }
440 } 443 }
441 444
442 static int 445 static int
443 spufs_create_context(struct inode *inode, struct dentry *dentry, 446 spufs_create_context(struct inode *inode, struct dentry *dentry,
444 struct vfsmount *mnt, int flags, int mode, 447 struct vfsmount *mnt, int flags, int mode,
445 struct file *aff_filp) 448 struct file *aff_filp)
446 { 449 {
447 int ret; 450 int ret;
448 int affinity; 451 int affinity;
449 struct spu_gang *gang; 452 struct spu_gang *gang;
450 struct spu_context *neighbor; 453 struct spu_context *neighbor;
451 454
452 ret = -EPERM; 455 ret = -EPERM;
453 if ((flags & SPU_CREATE_NOSCHED) && 456 if ((flags & SPU_CREATE_NOSCHED) &&
454 !capable(CAP_SYS_NICE)) 457 !capable(CAP_SYS_NICE))
455 goto out_unlock; 458 goto out_unlock;
456 459
457 ret = -EINVAL; 460 ret = -EINVAL;
458 if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE)) 461 if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE))
459 == SPU_CREATE_ISOLATE) 462 == SPU_CREATE_ISOLATE)
460 goto out_unlock; 463 goto out_unlock;
461 464
462 ret = -ENODEV; 465 ret = -ENODEV;
463 if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader) 466 if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
464 goto out_unlock; 467 goto out_unlock;
465 468
466 gang = NULL; 469 gang = NULL;
467 neighbor = NULL; 470 neighbor = NULL;
468 affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU); 471 affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
469 if (affinity) { 472 if (affinity) {
470 gang = SPUFS_I(inode)->i_gang; 473 gang = SPUFS_I(inode)->i_gang;
471 ret = -EINVAL; 474 ret = -EINVAL;
472 if (!gang) 475 if (!gang)
473 goto out_unlock; 476 goto out_unlock;
474 mutex_lock(&gang->aff_mutex); 477 mutex_lock(&gang->aff_mutex);
475 neighbor = spufs_assert_affinity(flags, gang, aff_filp); 478 neighbor = spufs_assert_affinity(flags, gang, aff_filp);
476 if (IS_ERR(neighbor)) { 479 if (IS_ERR(neighbor)) {
477 ret = PTR_ERR(neighbor); 480 ret = PTR_ERR(neighbor);
478 goto out_aff_unlock; 481 goto out_aff_unlock;
479 } 482 }
480 } 483 }
481 484
482 ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO); 485 ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
483 if (ret) 486 if (ret)
484 goto out_aff_unlock; 487 goto out_aff_unlock;
485 488
486 if (affinity) { 489 if (affinity) {
487 spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx, 490 spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx,
488 neighbor); 491 neighbor);
489 if (neighbor) 492 if (neighbor)
490 put_spu_context(neighbor); 493 put_spu_context(neighbor);
491 } 494 }
492 495
493 /* 496 /*
494 * get references for dget and mntget, will be released 497 * get references for dget and mntget, will be released
495 * in error path of *_open(). 498 * in error path of *_open().
496 */ 499 */
497 ret = spufs_context_open(dget(dentry), mntget(mnt)); 500 ret = spufs_context_open(dget(dentry), mntget(mnt));
498 if (ret < 0) { 501 if (ret < 0) {
499 WARN_ON(spufs_rmdir(inode, dentry)); 502 WARN_ON(spufs_rmdir(inode, dentry));
500 if (affinity) 503 if (affinity)
501 mutex_unlock(&gang->aff_mutex); 504 mutex_unlock(&gang->aff_mutex);
502 mutex_unlock(&inode->i_mutex); 505 mutex_unlock(&inode->i_mutex);
503 spu_forget(SPUFS_I(dentry->d_inode)->i_ctx); 506 spu_forget(SPUFS_I(dentry->d_inode)->i_ctx);
504 goto out; 507 goto out;
505 } 508 }
506 509
507 out_aff_unlock: 510 out_aff_unlock:
508 if (affinity) 511 if (affinity)
509 mutex_unlock(&gang->aff_mutex); 512 mutex_unlock(&gang->aff_mutex);
510 out_unlock: 513 out_unlock:
511 mutex_unlock(&inode->i_mutex); 514 mutex_unlock(&inode->i_mutex);
512 out: 515 out:
513 dput(dentry); 516 dput(dentry);
514 return ret; 517 return ret;
515 } 518 }
516 519
517 static int 520 static int
518 spufs_mkgang(struct inode *dir, struct dentry *dentry, int mode) 521 spufs_mkgang(struct inode *dir, struct dentry *dentry, int mode)
519 { 522 {
520 int ret; 523 int ret;
521 struct inode *inode; 524 struct inode *inode;
522 struct spu_gang *gang; 525 struct spu_gang *gang;
523 526
524 ret = -ENOSPC; 527 ret = -ENOSPC;
525 inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR); 528 inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
526 if (!inode) 529 if (!inode)
527 goto out; 530 goto out;
528 531
529 ret = 0; 532 ret = 0;
530 if (dir->i_mode & S_ISGID) { 533 if (dir->i_mode & S_ISGID) {
531 inode->i_gid = dir->i_gid; 534 inode->i_gid = dir->i_gid;
532 inode->i_mode &= S_ISGID; 535 inode->i_mode &= S_ISGID;
533 } 536 }
534 gang = alloc_spu_gang(); 537 gang = alloc_spu_gang();
535 SPUFS_I(inode)->i_ctx = NULL; 538 SPUFS_I(inode)->i_ctx = NULL;
536 SPUFS_I(inode)->i_gang = gang; 539 SPUFS_I(inode)->i_gang = gang;
537 if (!gang) 540 if (!gang)
538 goto out_iput; 541 goto out_iput;
539 542
540 inode->i_op = &simple_dir_inode_operations; 543 inode->i_op = &simple_dir_inode_operations;
541 inode->i_fop = &simple_dir_operations; 544 inode->i_fop = &simple_dir_operations;
542 545
543 d_instantiate(dentry, inode); 546 d_instantiate(dentry, inode);
544 inc_nlink(dir); 547 inc_nlink(dir);
545 inc_nlink(dentry->d_inode); 548 inc_nlink(dentry->d_inode);
546 return ret; 549 return ret;
547 550
548 out_iput: 551 out_iput:
549 iput(inode); 552 iput(inode);
550 out: 553 out:
551 return ret; 554 return ret;
552 } 555 }
553 556
554 static int spufs_gang_open(struct dentry *dentry, struct vfsmount *mnt) 557 static int spufs_gang_open(struct dentry *dentry, struct vfsmount *mnt)
555 { 558 {
556 int ret; 559 int ret;
557 struct file *filp; 560 struct file *filp;
558 561
559 ret = get_unused_fd(); 562 ret = get_unused_fd();
560 if (ret < 0) { 563 if (ret < 0) {
561 dput(dentry); 564 dput(dentry);
562 mntput(mnt); 565 mntput(mnt);
563 goto out; 566 goto out;
564 } 567 }
565 568
566 filp = dentry_open(dentry, mnt, O_RDONLY, current_cred()); 569 filp = dentry_open(dentry, mnt, O_RDONLY, current_cred());
567 if (IS_ERR(filp)) { 570 if (IS_ERR(filp)) {
568 put_unused_fd(ret); 571 put_unused_fd(ret);
569 ret = PTR_ERR(filp); 572 ret = PTR_ERR(filp);
570 goto out; 573 goto out;
571 } 574 }
572 575
573 filp->f_op = &simple_dir_operations; 576 filp->f_op = &simple_dir_operations;
574 fd_install(ret, filp); 577 fd_install(ret, filp);
575 out: 578 out:
576 return ret; 579 return ret;
577 } 580 }
578 581
579 static int spufs_create_gang(struct inode *inode, 582 static int spufs_create_gang(struct inode *inode,
580 struct dentry *dentry, 583 struct dentry *dentry,
581 struct vfsmount *mnt, int mode) 584 struct vfsmount *mnt, int mode)
582 { 585 {
583 int ret; 586 int ret;
584 587
585 ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO); 588 ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO);
586 if (ret) 589 if (ret)
587 goto out; 590 goto out;
588 591
589 /* 592 /*
590 * get references for dget and mntget, will be released 593 * get references for dget and mntget, will be released
591 * in error path of *_open(). 594 * in error path of *_open().
592 */ 595 */
593 ret = spufs_gang_open(dget(dentry), mntget(mnt)); 596 ret = spufs_gang_open(dget(dentry), mntget(mnt));
594 if (ret < 0) { 597 if (ret < 0) {
595 int err = simple_rmdir(inode, dentry); 598 int err = simple_rmdir(inode, dentry);
596 WARN_ON(err); 599 WARN_ON(err);
597 } 600 }
598 601
599 out: 602 out:
600 mutex_unlock(&inode->i_mutex); 603 mutex_unlock(&inode->i_mutex);
601 dput(dentry); 604 dput(dentry);
602 return ret; 605 return ret;
603 } 606 }
604 607
605 608
606 static struct file_system_type spufs_type; 609 static struct file_system_type spufs_type;
607 610
608 long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode, 611 long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode,
609 struct file *filp) 612 struct file *filp)
610 { 613 {
611 struct dentry *dentry; 614 struct dentry *dentry;
612 int ret; 615 int ret;
613 616
614 ret = -EINVAL; 617 ret = -EINVAL;
615 /* check if we are on spufs */ 618 /* check if we are on spufs */
616 if (nd->path.dentry->d_sb->s_type != &spufs_type) 619 if (nd->path.dentry->d_sb->s_type != &spufs_type)
617 goto out; 620 goto out;
618 621
619 /* don't accept undefined flags */ 622 /* don't accept undefined flags */
620 if (flags & (~SPU_CREATE_FLAG_ALL)) 623 if (flags & (~SPU_CREATE_FLAG_ALL))
621 goto out; 624 goto out;
622 625
623 /* only threads can be underneath a gang */ 626 /* only threads can be underneath a gang */
624 if (nd->path.dentry != nd->path.dentry->d_sb->s_root) { 627 if (nd->path.dentry != nd->path.dentry->d_sb->s_root) {
625 if ((flags & SPU_CREATE_GANG) || 628 if ((flags & SPU_CREATE_GANG) ||
626 !SPUFS_I(nd->path.dentry->d_inode)->i_gang) 629 !SPUFS_I(nd->path.dentry->d_inode)->i_gang)
627 goto out; 630 goto out;
628 } 631 }
629 632
630 dentry = lookup_create(nd, 1); 633 dentry = lookup_create(nd, 1);
631 ret = PTR_ERR(dentry); 634 ret = PTR_ERR(dentry);
632 if (IS_ERR(dentry)) 635 if (IS_ERR(dentry))
633 goto out_dir; 636 goto out_dir;
634 637
635 mode &= ~current_umask(); 638 mode &= ~current_umask();
636 639
637 if (flags & SPU_CREATE_GANG) 640 if (flags & SPU_CREATE_GANG)
638 ret = spufs_create_gang(nd->path.dentry->d_inode, 641 ret = spufs_create_gang(nd->path.dentry->d_inode,
639 dentry, nd->path.mnt, mode); 642 dentry, nd->path.mnt, mode);
640 else 643 else
641 ret = spufs_create_context(nd->path.dentry->d_inode, 644 ret = spufs_create_context(nd->path.dentry->d_inode,
642 dentry, nd->path.mnt, flags, mode, 645 dentry, nd->path.mnt, flags, mode,
643 filp); 646 filp);
644 if (ret >= 0) 647 if (ret >= 0)
645 fsnotify_mkdir(nd->path.dentry->d_inode, dentry); 648 fsnotify_mkdir(nd->path.dentry->d_inode, dentry);
646 return ret; 649 return ret;
647 650
648 out_dir: 651 out_dir:
649 mutex_unlock(&nd->path.dentry->d_inode->i_mutex); 652 mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
650 out: 653 out:
651 return ret; 654 return ret;
652 } 655 }
653 656
654 /* File system initialization */ 657 /* File system initialization */
655 enum { 658 enum {
656 Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err, 659 Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err,
657 }; 660 };
658 661
659 static const match_table_t spufs_tokens = { 662 static const match_table_t spufs_tokens = {
660 { Opt_uid, "uid=%d" }, 663 { Opt_uid, "uid=%d" },
661 { Opt_gid, "gid=%d" }, 664 { Opt_gid, "gid=%d" },
662 { Opt_mode, "mode=%o" }, 665 { Opt_mode, "mode=%o" },
663 { Opt_debug, "debug" }, 666 { Opt_debug, "debug" },
664 { Opt_err, NULL }, 667 { Opt_err, NULL },
665 }; 668 };
666 669
667 static int 670 static int
668 spufs_parse_options(struct super_block *sb, char *options, struct inode *root) 671 spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
669 { 672 {
670 char *p; 673 char *p;
671 substring_t args[MAX_OPT_ARGS]; 674 substring_t args[MAX_OPT_ARGS];
672 675
673 while ((p = strsep(&options, ",")) != NULL) { 676 while ((p = strsep(&options, ",")) != NULL) {
674 int token, option; 677 int token, option;
675 678
676 if (!*p) 679 if (!*p)
677 continue; 680 continue;
678 681
679 token = match_token(p, spufs_tokens, args); 682 token = match_token(p, spufs_tokens, args);
680 switch (token) { 683 switch (token) {
681 case Opt_uid: 684 case Opt_uid:
682 if (match_int(&args[0], &option)) 685 if (match_int(&args[0], &option))
683 return 0; 686 return 0;
684 root->i_uid = option; 687 root->i_uid = option;
685 break; 688 break;
686 case Opt_gid: 689 case Opt_gid:
687 if (match_int(&args[0], &option)) 690 if (match_int(&args[0], &option))
688 return 0; 691 return 0;
689 root->i_gid = option; 692 root->i_gid = option;
690 break; 693 break;
691 case Opt_mode: 694 case Opt_mode:
692 if (match_octal(&args[0], &option)) 695 if (match_octal(&args[0], &option))
693 return 0; 696 return 0;
694 root->i_mode = option | S_IFDIR; 697 root->i_mode = option | S_IFDIR;
695 break; 698 break;
696 case Opt_debug: 699 case Opt_debug:
697 spufs_get_sb_info(sb)->debug = 1; 700 spufs_get_sb_info(sb)->debug = 1;
698 break; 701 break;
699 default: 702 default:
700 return 0; 703 return 0;
701 } 704 }
702 } 705 }
703 return 1; 706 return 1;
704 } 707 }
705 708
706 static void spufs_exit_isolated_loader(void) 709 static void spufs_exit_isolated_loader(void)
707 { 710 {
708 free_pages((unsigned long) isolated_loader, 711 free_pages((unsigned long) isolated_loader,
709 get_order(isolated_loader_size)); 712 get_order(isolated_loader_size));
710 } 713 }
711 714
712 static void 715 static void
713 spufs_init_isolated_loader(void) 716 spufs_init_isolated_loader(void)
714 { 717 {
715 struct device_node *dn; 718 struct device_node *dn;
716 const char *loader; 719 const char *loader;
717 int size; 720 int size;
718 721
719 dn = of_find_node_by_path("/spu-isolation"); 722 dn = of_find_node_by_path("/spu-isolation");
720 if (!dn) 723 if (!dn)
721 return; 724 return;
722 725
723 loader = of_get_property(dn, "loader", &size); 726 loader = of_get_property(dn, "loader", &size);
724 if (!loader) 727 if (!loader)
725 return; 728 return;
726 729
727 /* the loader must be align on a 16 byte boundary */ 730 /* the loader must be align on a 16 byte boundary */
728 isolated_loader = (char *)__get_free_pages(GFP_KERNEL, get_order(size)); 731 isolated_loader = (char *)__get_free_pages(GFP_KERNEL, get_order(size));
729 if (!isolated_loader) 732 if (!isolated_loader)
730 return; 733 return;
731 734
732 isolated_loader_size = size; 735 isolated_loader_size = size;
733 memcpy(isolated_loader, loader, size); 736 memcpy(isolated_loader, loader, size);
734 printk(KERN_INFO "spufs: SPU isolation mode enabled\n"); 737 printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
735 } 738 }
736 739
737 static int 740 static int
738 spufs_create_root(struct super_block *sb, void *data) 741 spufs_create_root(struct super_block *sb, void *data)
739 { 742 {
740 struct inode *inode; 743 struct inode *inode;
741 int ret; 744 int ret;
742 745
743 ret = -ENODEV; 746 ret = -ENODEV;
744 if (!spu_management_ops) 747 if (!spu_management_ops)
745 goto out; 748 goto out;
746 749
747 ret = -ENOMEM; 750 ret = -ENOMEM;
748 inode = spufs_new_inode(sb, S_IFDIR | 0775); 751 inode = spufs_new_inode(sb, S_IFDIR | 0775);
749 if (!inode) 752 if (!inode)
750 goto out; 753 goto out;
751 754
752 inode->i_op = &simple_dir_inode_operations; 755 inode->i_op = &simple_dir_inode_operations;
753 inode->i_fop = &simple_dir_operations; 756 inode->i_fop = &simple_dir_operations;
754 SPUFS_I(inode)->i_ctx = NULL; 757 SPUFS_I(inode)->i_ctx = NULL;
755 inc_nlink(inode); 758 inc_nlink(inode);
756 759
757 ret = -EINVAL; 760 ret = -EINVAL;
758 if (!spufs_parse_options(sb, data, inode)) 761 if (!spufs_parse_options(sb, data, inode))
759 goto out_iput; 762 goto out_iput;
760 763
761 ret = -ENOMEM; 764 ret = -ENOMEM;
762 sb->s_root = d_alloc_root(inode); 765 sb->s_root = d_alloc_root(inode);
763 if (!sb->s_root) 766 if (!sb->s_root)
764 goto out_iput; 767 goto out_iput;
765 768
766 return 0; 769 return 0;
767 out_iput: 770 out_iput:
768 iput(inode); 771 iput(inode);
769 out: 772 out:
770 return ret; 773 return ret;
771 } 774 }
772 775
773 static int 776 static int
774 spufs_fill_super(struct super_block *sb, void *data, int silent) 777 spufs_fill_super(struct super_block *sb, void *data, int silent)
775 { 778 {
776 struct spufs_sb_info *info; 779 struct spufs_sb_info *info;
777 static const struct super_operations s_ops = { 780 static const struct super_operations s_ops = {
778 .alloc_inode = spufs_alloc_inode, 781 .alloc_inode = spufs_alloc_inode,
779 .destroy_inode = spufs_destroy_inode, 782 .destroy_inode = spufs_destroy_inode,
780 .statfs = simple_statfs, 783 .statfs = simple_statfs,
781 .evict_inode = spufs_evict_inode, 784 .evict_inode = spufs_evict_inode,
782 .show_options = generic_show_options, 785 .show_options = generic_show_options,
783 }; 786 };
784 787
785 save_mount_options(sb, data); 788 save_mount_options(sb, data);
786 789
787 info = kzalloc(sizeof(*info), GFP_KERNEL); 790 info = kzalloc(sizeof(*info), GFP_KERNEL);
788 if (!info) 791 if (!info)
789 return -ENOMEM; 792 return -ENOMEM;
790 793
791 sb->s_maxbytes = MAX_LFS_FILESIZE; 794 sb->s_maxbytes = MAX_LFS_FILESIZE;
792 sb->s_blocksize = PAGE_CACHE_SIZE; 795 sb->s_blocksize = PAGE_CACHE_SIZE;
793 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 796 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
794 sb->s_magic = SPUFS_MAGIC; 797 sb->s_magic = SPUFS_MAGIC;
795 sb->s_op = &s_ops; 798 sb->s_op = &s_ops;
796 sb->s_fs_info = info; 799 sb->s_fs_info = info;
797 800
798 return spufs_create_root(sb, data); 801 return spufs_create_root(sb, data);
799 } 802 }
800 803
801 static struct dentry * 804 static struct dentry *
802 spufs_mount(struct file_system_type *fstype, int flags, 805 spufs_mount(struct file_system_type *fstype, int flags,
803 const char *name, void *data) 806 const char *name, void *data)
804 { 807 {
805 return mount_single(fstype, flags, data, spufs_fill_super); 808 return mount_single(fstype, flags, data, spufs_fill_super);
806 } 809 }
807 810
808 static struct file_system_type spufs_type = { 811 static struct file_system_type spufs_type = {
809 .owner = THIS_MODULE, 812 .owner = THIS_MODULE,
810 .name = "spufs", 813 .name = "spufs",
811 .mount = spufs_mount, 814 .mount = spufs_mount,
812 .kill_sb = kill_litter_super, 815 .kill_sb = kill_litter_super,
813 }; 816 };
814 817
815 static int __init spufs_init(void) 818 static int __init spufs_init(void)
816 { 819 {
817 int ret; 820 int ret;
818 821
819 ret = -ENODEV; 822 ret = -ENODEV;
820 if (!spu_management_ops) 823 if (!spu_management_ops)
821 goto out; 824 goto out;
822 825
823 ret = -ENOMEM; 826 ret = -ENOMEM;
824 spufs_inode_cache = kmem_cache_create("spufs_inode_cache", 827 spufs_inode_cache = kmem_cache_create("spufs_inode_cache",
825 sizeof(struct spufs_inode_info), 0, 828 sizeof(struct spufs_inode_info), 0,
826 SLAB_HWCACHE_ALIGN, spufs_init_once); 829 SLAB_HWCACHE_ALIGN, spufs_init_once);
827 830
828 if (!spufs_inode_cache) 831 if (!spufs_inode_cache)
829 goto out; 832 goto out;
830 ret = spu_sched_init(); 833 ret = spu_sched_init();
831 if (ret) 834 if (ret)
832 goto out_cache; 835 goto out_cache;
833 ret = register_filesystem(&spufs_type); 836 ret = register_filesystem(&spufs_type);
834 if (ret) 837 if (ret)
835 goto out_sched; 838 goto out_sched;
836 ret = register_spu_syscalls(&spufs_calls); 839 ret = register_spu_syscalls(&spufs_calls);
837 if (ret) 840 if (ret)
838 goto out_fs; 841 goto out_fs;
839 842
840 spufs_init_isolated_loader(); 843 spufs_init_isolated_loader();
841 844
842 return 0; 845 return 0;
843 846
844 out_fs: 847 out_fs:
845 unregister_filesystem(&spufs_type); 848 unregister_filesystem(&spufs_type);
846 out_sched: 849 out_sched:
847 spu_sched_exit(); 850 spu_sched_exit();
848 out_cache: 851 out_cache:
849 kmem_cache_destroy(spufs_inode_cache); 852 kmem_cache_destroy(spufs_inode_cache);
850 out: 853 out:
851 return ret; 854 return ret;
852 } 855 }
853 module_init(spufs_init); 856 module_init(spufs_init);
854 857
855 static void __exit spufs_exit(void) 858 static void __exit spufs_exit(void)
856 { 859 {
857 spu_sched_exit(); 860 spu_sched_exit();
858 spufs_exit_isolated_loader(); 861 spufs_exit_isolated_loader();
859 unregister_spu_syscalls(&spufs_calls); 862 unregister_spu_syscalls(&spufs_calls);
860 unregister_filesystem(&spufs_type); 863 unregister_filesystem(&spufs_type);
861 kmem_cache_destroy(spufs_inode_cache); 864 kmem_cache_destroy(spufs_inode_cache);
862 } 865 }
863 module_exit(spufs_exit); 866 module_exit(spufs_exit);
864 867
865 MODULE_LICENSE("GPL"); 868 MODULE_LICENSE("GPL");
866 MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>"); 869 MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
867 870
868 871
drivers/usb/core/inode.c
1 /*****************************************************************************/ 1 /*****************************************************************************/
2 2
3 /* 3 /*
4 * inode.c -- Inode/Dentry functions for the USB device file system. 4 * inode.c -- Inode/Dentry functions for the USB device file system.
5 * 5 *
6 * Copyright (C) 2000 Thomas Sailer (sailer@ife.ee.ethz.ch) 6 * Copyright (C) 2000 Thomas Sailer (sailer@ife.ee.ethz.ch)
7 * Copyright (C) 2001,2002,2004 Greg Kroah-Hartman (greg@kroah.com) 7 * Copyright (C) 2001,2002,2004 Greg Kroah-Hartman (greg@kroah.com)
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or 11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version. 12 * (at your option) any later version.
13 * 13 *
14 * This program is distributed in the hope that it will be useful, 14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details. 17 * GNU General Public License for more details.
18 * 18 *
19 * You should have received a copy of the GNU General Public License 19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software 20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 * 22 *
23 * History: 23 * History:
24 * 0.1 04.01.2000 Created 24 * 0.1 04.01.2000 Created
25 * 0.2 10.12.2001 converted to use the vfs layer better 25 * 0.2 10.12.2001 converted to use the vfs layer better
26 */ 26 */
27 27
28 /*****************************************************************************/ 28 /*****************************************************************************/
29 29
30 #include <linux/module.h> 30 #include <linux/module.h>
31 #include <linux/fs.h> 31 #include <linux/fs.h>
32 #include <linux/mount.h> 32 #include <linux/mount.h>
33 #include <linux/pagemap.h> 33 #include <linux/pagemap.h>
34 #include <linux/init.h> 34 #include <linux/init.h>
35 #include <linux/proc_fs.h> 35 #include <linux/proc_fs.h>
36 #include <linux/usb.h> 36 #include <linux/usb.h>
37 #include <linux/namei.h> 37 #include <linux/namei.h>
38 #include <linux/usbdevice_fs.h> 38 #include <linux/usbdevice_fs.h>
39 #include <linux/parser.h> 39 #include <linux/parser.h>
40 #include <linux/notifier.h> 40 #include <linux/notifier.h>
41 #include <linux/seq_file.h> 41 #include <linux/seq_file.h>
42 #include <linux/usb/hcd.h> 42 #include <linux/usb/hcd.h>
43 #include <asm/byteorder.h> 43 #include <asm/byteorder.h>
44 #include "usb.h" 44 #include "usb.h"
45 45
46 #define USBFS_DEFAULT_DEVMODE (S_IWUSR | S_IRUGO) 46 #define USBFS_DEFAULT_DEVMODE (S_IWUSR | S_IRUGO)
47 #define USBFS_DEFAULT_BUSMODE (S_IXUGO | S_IRUGO) 47 #define USBFS_DEFAULT_BUSMODE (S_IXUGO | S_IRUGO)
48 #define USBFS_DEFAULT_LISTMODE S_IRUGO 48 #define USBFS_DEFAULT_LISTMODE S_IRUGO
49 49
50 static const struct file_operations default_file_operations; 50 static const struct file_operations default_file_operations;
51 static struct vfsmount *usbfs_mount; 51 static struct vfsmount *usbfs_mount;
52 static int usbfs_mount_count; /* = 0 */ 52 static int usbfs_mount_count; /* = 0 */
53 static int ignore_mount = 0; 53 static int ignore_mount = 0;
54 54
55 static struct dentry *devices_usbfs_dentry; 55 static struct dentry *devices_usbfs_dentry;
56 static int num_buses; /* = 0 */ 56 static int num_buses; /* = 0 */
57 57
58 static uid_t devuid; /* = 0 */ 58 static uid_t devuid; /* = 0 */
59 static uid_t busuid; /* = 0 */ 59 static uid_t busuid; /* = 0 */
60 static uid_t listuid; /* = 0 */ 60 static uid_t listuid; /* = 0 */
61 static gid_t devgid; /* = 0 */ 61 static gid_t devgid; /* = 0 */
62 static gid_t busgid; /* = 0 */ 62 static gid_t busgid; /* = 0 */
63 static gid_t listgid; /* = 0 */ 63 static gid_t listgid; /* = 0 */
64 static umode_t devmode = USBFS_DEFAULT_DEVMODE; 64 static umode_t devmode = USBFS_DEFAULT_DEVMODE;
65 static umode_t busmode = USBFS_DEFAULT_BUSMODE; 65 static umode_t busmode = USBFS_DEFAULT_BUSMODE;
66 static umode_t listmode = USBFS_DEFAULT_LISTMODE; 66 static umode_t listmode = USBFS_DEFAULT_LISTMODE;
67 67
68 static int usbfs_show_options(struct seq_file *seq, struct vfsmount *mnt) 68 static int usbfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
69 { 69 {
70 if (devuid != 0) 70 if (devuid != 0)
71 seq_printf(seq, ",devuid=%u", devuid); 71 seq_printf(seq, ",devuid=%u", devuid);
72 if (devgid != 0) 72 if (devgid != 0)
73 seq_printf(seq, ",devgid=%u", devgid); 73 seq_printf(seq, ",devgid=%u", devgid);
74 if (devmode != USBFS_DEFAULT_DEVMODE) 74 if (devmode != USBFS_DEFAULT_DEVMODE)
75 seq_printf(seq, ",devmode=%o", devmode); 75 seq_printf(seq, ",devmode=%o", devmode);
76 if (busuid != 0) 76 if (busuid != 0)
77 seq_printf(seq, ",busuid=%u", busuid); 77 seq_printf(seq, ",busuid=%u", busuid);
78 if (busgid != 0) 78 if (busgid != 0)
79 seq_printf(seq, ",busgid=%u", busgid); 79 seq_printf(seq, ",busgid=%u", busgid);
80 if (busmode != USBFS_DEFAULT_BUSMODE) 80 if (busmode != USBFS_DEFAULT_BUSMODE)
81 seq_printf(seq, ",busmode=%o", busmode); 81 seq_printf(seq, ",busmode=%o", busmode);
82 if (listuid != 0) 82 if (listuid != 0)
83 seq_printf(seq, ",listuid=%u", listuid); 83 seq_printf(seq, ",listuid=%u", listuid);
84 if (listgid != 0) 84 if (listgid != 0)
85 seq_printf(seq, ",listgid=%u", listgid); 85 seq_printf(seq, ",listgid=%u", listgid);
86 if (listmode != USBFS_DEFAULT_LISTMODE) 86 if (listmode != USBFS_DEFAULT_LISTMODE)
87 seq_printf(seq, ",listmode=%o", listmode); 87 seq_printf(seq, ",listmode=%o", listmode);
88 88
89 return 0; 89 return 0;
90 } 90 }
91 91
92 enum { 92 enum {
93 Opt_devuid, Opt_devgid, Opt_devmode, 93 Opt_devuid, Opt_devgid, Opt_devmode,
94 Opt_busuid, Opt_busgid, Opt_busmode, 94 Opt_busuid, Opt_busgid, Opt_busmode,
95 Opt_listuid, Opt_listgid, Opt_listmode, 95 Opt_listuid, Opt_listgid, Opt_listmode,
96 Opt_err, 96 Opt_err,
97 }; 97 };
98 98
99 static const match_table_t tokens = { 99 static const match_table_t tokens = {
100 {Opt_devuid, "devuid=%u"}, 100 {Opt_devuid, "devuid=%u"},
101 {Opt_devgid, "devgid=%u"}, 101 {Opt_devgid, "devgid=%u"},
102 {Opt_devmode, "devmode=%o"}, 102 {Opt_devmode, "devmode=%o"},
103 {Opt_busuid, "busuid=%u"}, 103 {Opt_busuid, "busuid=%u"},
104 {Opt_busgid, "busgid=%u"}, 104 {Opt_busgid, "busgid=%u"},
105 {Opt_busmode, "busmode=%o"}, 105 {Opt_busmode, "busmode=%o"},
106 {Opt_listuid, "listuid=%u"}, 106 {Opt_listuid, "listuid=%u"},
107 {Opt_listgid, "listgid=%u"}, 107 {Opt_listgid, "listgid=%u"},
108 {Opt_listmode, "listmode=%o"}, 108 {Opt_listmode, "listmode=%o"},
109 {Opt_err, NULL} 109 {Opt_err, NULL}
110 }; 110 };
111 111
112 static int parse_options(struct super_block *s, char *data) 112 static int parse_options(struct super_block *s, char *data)
113 { 113 {
114 char *p; 114 char *p;
115 int option; 115 int option;
116 116
117 /* (re)set to defaults. */ 117 /* (re)set to defaults. */
118 devuid = 0; 118 devuid = 0;
119 busuid = 0; 119 busuid = 0;
120 listuid = 0; 120 listuid = 0;
121 devgid = 0; 121 devgid = 0;
122 busgid = 0; 122 busgid = 0;
123 listgid = 0; 123 listgid = 0;
124 devmode = USBFS_DEFAULT_DEVMODE; 124 devmode = USBFS_DEFAULT_DEVMODE;
125 busmode = USBFS_DEFAULT_BUSMODE; 125 busmode = USBFS_DEFAULT_BUSMODE;
126 listmode = USBFS_DEFAULT_LISTMODE; 126 listmode = USBFS_DEFAULT_LISTMODE;
127 127
128 while ((p = strsep(&data, ",")) != NULL) { 128 while ((p = strsep(&data, ",")) != NULL) {
129 substring_t args[MAX_OPT_ARGS]; 129 substring_t args[MAX_OPT_ARGS];
130 int token; 130 int token;
131 if (!*p) 131 if (!*p)
132 continue; 132 continue;
133 133
134 token = match_token(p, tokens, args); 134 token = match_token(p, tokens, args);
135 switch (token) { 135 switch (token) {
136 case Opt_devuid: 136 case Opt_devuid:
137 if (match_int(&args[0], &option)) 137 if (match_int(&args[0], &option))
138 return -EINVAL; 138 return -EINVAL;
139 devuid = option; 139 devuid = option;
140 break; 140 break;
141 case Opt_devgid: 141 case Opt_devgid:
142 if (match_int(&args[0], &option)) 142 if (match_int(&args[0], &option))
143 return -EINVAL; 143 return -EINVAL;
144 devgid = option; 144 devgid = option;
145 break; 145 break;
146 case Opt_devmode: 146 case Opt_devmode:
147 if (match_octal(&args[0], &option)) 147 if (match_octal(&args[0], &option))
148 return -EINVAL; 148 return -EINVAL;
149 devmode = option & S_IRWXUGO; 149 devmode = option & S_IRWXUGO;
150 break; 150 break;
151 case Opt_busuid: 151 case Opt_busuid:
152 if (match_int(&args[0], &option)) 152 if (match_int(&args[0], &option))
153 return -EINVAL; 153 return -EINVAL;
154 busuid = option; 154 busuid = option;
155 break; 155 break;
156 case Opt_busgid: 156 case Opt_busgid:
157 if (match_int(&args[0], &option)) 157 if (match_int(&args[0], &option))
158 return -EINVAL; 158 return -EINVAL;
159 busgid = option; 159 busgid = option;
160 break; 160 break;
161 case Opt_busmode: 161 case Opt_busmode:
162 if (match_octal(&args[0], &option)) 162 if (match_octal(&args[0], &option))
163 return -EINVAL; 163 return -EINVAL;
164 busmode = option & S_IRWXUGO; 164 busmode = option & S_IRWXUGO;
165 break; 165 break;
166 case Opt_listuid: 166 case Opt_listuid:
167 if (match_int(&args[0], &option)) 167 if (match_int(&args[0], &option))
168 return -EINVAL; 168 return -EINVAL;
169 listuid = option; 169 listuid = option;
170 break; 170 break;
171 case Opt_listgid: 171 case Opt_listgid:
172 if (match_int(&args[0], &option)) 172 if (match_int(&args[0], &option))
173 return -EINVAL; 173 return -EINVAL;
174 listgid = option; 174 listgid = option;
175 break; 175 break;
176 case Opt_listmode: 176 case Opt_listmode:
177 if (match_octal(&args[0], &option)) 177 if (match_octal(&args[0], &option))
178 return -EINVAL; 178 return -EINVAL;
179 listmode = option & S_IRWXUGO; 179 listmode = option & S_IRWXUGO;
180 break; 180 break;
181 default: 181 default:
182 printk(KERN_ERR "usbfs: unrecognised mount option " 182 printk(KERN_ERR "usbfs: unrecognised mount option "
183 "\"%s\" or missing value\n", p); 183 "\"%s\" or missing value\n", p);
184 return -EINVAL; 184 return -EINVAL;
185 } 185 }
186 } 186 }
187 187
188 return 0; 188 return 0;
189 } 189 }
190 190
191 static void update_special(struct dentry *special) 191 static void update_special(struct dentry *special)
192 { 192 {
193 special->d_inode->i_uid = listuid; 193 special->d_inode->i_uid = listuid;
194 special->d_inode->i_gid = listgid; 194 special->d_inode->i_gid = listgid;
195 special->d_inode->i_mode = S_IFREG | listmode; 195 special->d_inode->i_mode = S_IFREG | listmode;
196 } 196 }
197 197
198 static void update_dev(struct dentry *dev) 198 static void update_dev(struct dentry *dev)
199 { 199 {
200 dev->d_inode->i_uid = devuid; 200 dev->d_inode->i_uid = devuid;
201 dev->d_inode->i_gid = devgid; 201 dev->d_inode->i_gid = devgid;
202 dev->d_inode->i_mode = S_IFREG | devmode; 202 dev->d_inode->i_mode = S_IFREG | devmode;
203 } 203 }
204 204
205 static void update_bus(struct dentry *bus) 205 static void update_bus(struct dentry *bus)
206 { 206 {
207 struct dentry *dev = NULL; 207 struct dentry *dev = NULL;
208 208
209 bus->d_inode->i_uid = busuid; 209 bus->d_inode->i_uid = busuid;
210 bus->d_inode->i_gid = busgid; 210 bus->d_inode->i_gid = busgid;
211 bus->d_inode->i_mode = S_IFDIR | busmode; 211 bus->d_inode->i_mode = S_IFDIR | busmode;
212 212
213 mutex_lock(&bus->d_inode->i_mutex); 213 mutex_lock(&bus->d_inode->i_mutex);
214 214
215 list_for_each_entry(dev, &bus->d_subdirs, d_u.d_child) 215 list_for_each_entry(dev, &bus->d_subdirs, d_u.d_child)
216 if (dev->d_inode) 216 if (dev->d_inode)
217 update_dev(dev); 217 update_dev(dev);
218 218
219 mutex_unlock(&bus->d_inode->i_mutex); 219 mutex_unlock(&bus->d_inode->i_mutex);
220 } 220 }
221 221
222 static void update_sb(struct super_block *sb) 222 static void update_sb(struct super_block *sb)
223 { 223 {
224 struct dentry *root = sb->s_root; 224 struct dentry *root = sb->s_root;
225 struct dentry *bus = NULL; 225 struct dentry *bus = NULL;
226 226
227 if (!root) 227 if (!root)
228 return; 228 return;
229 229
230 mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT); 230 mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT);
231 231
232 list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) { 232 list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) {
233 if (bus->d_inode) { 233 if (bus->d_inode) {
234 switch (S_IFMT & bus->d_inode->i_mode) { 234 switch (S_IFMT & bus->d_inode->i_mode) {
235 case S_IFDIR: 235 case S_IFDIR:
236 update_bus(bus); 236 update_bus(bus);
237 break; 237 break;
238 case S_IFREG: 238 case S_IFREG:
239 update_special(bus); 239 update_special(bus);
240 break; 240 break;
241 default: 241 default:
242 printk(KERN_WARNING "usbfs: Unknown node %s " 242 printk(KERN_WARNING "usbfs: Unknown node %s "
243 "mode %x found on remount!\n", 243 "mode %x found on remount!\n",
244 bus->d_name.name, bus->d_inode->i_mode); 244 bus->d_name.name, bus->d_inode->i_mode);
245 break; 245 break;
246 } 246 }
247 } 247 }
248 } 248 }
249 249
250 mutex_unlock(&root->d_inode->i_mutex); 250 mutex_unlock(&root->d_inode->i_mutex);
251 } 251 }
252 252
253 static int remount(struct super_block *sb, int *flags, char *data) 253 static int remount(struct super_block *sb, int *flags, char *data)
254 { 254 {
255 /* If this is not a real mount, 255 /* If this is not a real mount,
256 * i.e. it's a simple_pin_fs from create_special_files, 256 * i.e. it's a simple_pin_fs from create_special_files,
257 * then ignore it. 257 * then ignore it.
258 */ 258 */
259 if (ignore_mount) 259 if (ignore_mount)
260 return 0; 260 return 0;
261 261
262 if (parse_options(sb, data)) { 262 if (parse_options(sb, data)) {
263 printk(KERN_WARNING "usbfs: mount parameter error.\n"); 263 printk(KERN_WARNING "usbfs: mount parameter error.\n");
264 return -EINVAL; 264 return -EINVAL;
265 } 265 }
266 266
267 if (usbfs_mount && usbfs_mount->mnt_sb) 267 if (usbfs_mount && usbfs_mount->mnt_sb)
268 update_sb(usbfs_mount->mnt_sb); 268 update_sb(usbfs_mount->mnt_sb);
269 269
270 return 0; 270 return 0;
271 } 271 }
272 272
273 static struct inode *usbfs_get_inode (struct super_block *sb, int mode, dev_t dev) 273 static struct inode *usbfs_get_inode (struct super_block *sb, int mode, dev_t dev)
274 { 274 {
275 struct inode *inode = new_inode(sb); 275 struct inode *inode = new_inode(sb);
276 276
277 if (inode) { 277 if (inode) {
278 inode->i_ino = get_next_ino(); 278 inode->i_ino = get_next_ino();
279 inode->i_mode = mode; 279 inode->i_mode = mode;
280 inode->i_uid = current_fsuid(); 280 inode->i_uid = current_fsuid();
281 inode->i_gid = current_fsgid(); 281 inode->i_gid = current_fsgid();
282 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 282 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
283 switch (mode & S_IFMT) { 283 switch (mode & S_IFMT) {
284 default: 284 default:
285 init_special_inode(inode, mode, dev); 285 init_special_inode(inode, mode, dev);
286 break; 286 break;
287 case S_IFREG: 287 case S_IFREG:
288 inode->i_fop = &default_file_operations; 288 inode->i_fop = &default_file_operations;
289 break; 289 break;
290 case S_IFDIR: 290 case S_IFDIR:
291 inode->i_op = &simple_dir_inode_operations; 291 inode->i_op = &simple_dir_inode_operations;
292 inode->i_fop = &simple_dir_operations; 292 inode->i_fop = &simple_dir_operations;
293 293
294 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 294 /* directory inodes start off with i_nlink == 2 (for "." entry) */
295 inc_nlink(inode); 295 inc_nlink(inode);
296 break; 296 break;
297 } 297 }
298 } 298 }
299 return inode; 299 return inode;
300 } 300 }
301 301
302 /* SMP-safe */ 302 /* SMP-safe */
303 static int usbfs_mknod (struct inode *dir, struct dentry *dentry, int mode, 303 static int usbfs_mknod (struct inode *dir, struct dentry *dentry, int mode,
304 dev_t dev) 304 dev_t dev)
305 { 305 {
306 struct inode *inode = usbfs_get_inode(dir->i_sb, mode, dev); 306 struct inode *inode = usbfs_get_inode(dir->i_sb, mode, dev);
307 int error = -EPERM; 307 int error = -EPERM;
308 308
309 if (dentry->d_inode) 309 if (dentry->d_inode)
310 return -EEXIST; 310 return -EEXIST;
311 311
312 if (inode) { 312 if (inode) {
313 d_instantiate(dentry, inode); 313 d_instantiate(dentry, inode);
314 dget(dentry); 314 dget(dentry);
315 error = 0; 315 error = 0;
316 } 316 }
317 return error; 317 return error;
318 } 318 }
319 319
320 static int usbfs_mkdir (struct inode *dir, struct dentry *dentry, int mode) 320 static int usbfs_mkdir (struct inode *dir, struct dentry *dentry, int mode)
321 { 321 {
322 int res; 322 int res;
323 323
324 mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR; 324 mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
325 res = usbfs_mknod (dir, dentry, mode, 0); 325 res = usbfs_mknod (dir, dentry, mode, 0);
326 if (!res) 326 if (!res)
327 inc_nlink(dir); 327 inc_nlink(dir);
328 return res; 328 return res;
329 } 329 }
330 330
331 static int usbfs_create (struct inode *dir, struct dentry *dentry, int mode) 331 static int usbfs_create (struct inode *dir, struct dentry *dentry, int mode)
332 { 332 {
333 mode = (mode & S_IALLUGO) | S_IFREG; 333 mode = (mode & S_IALLUGO) | S_IFREG;
334 return usbfs_mknod (dir, dentry, mode, 0); 334 return usbfs_mknod (dir, dentry, mode, 0);
335 } 335 }
336 336
337 static inline int usbfs_positive (struct dentry *dentry) 337 static inline int usbfs_positive (struct dentry *dentry)
338 { 338 {
339 return dentry->d_inode && !d_unhashed(dentry); 339 return dentry->d_inode && !d_unhashed(dentry);
340 } 340 }
341 341
342 static int usbfs_empty (struct dentry *dentry) 342 static int usbfs_empty (struct dentry *dentry)
343 { 343 {
344 struct list_head *list; 344 struct list_head *list;
345 345
346 spin_lock(&dcache_lock); 346 spin_lock(&dcache_lock);
347 347
348 list_for_each(list, &dentry->d_subdirs) { 348 list_for_each(list, &dentry->d_subdirs) {
349 struct dentry *de = list_entry(list, struct dentry, d_u.d_child); 349 struct dentry *de = list_entry(list, struct dentry, d_u.d_child);
350 spin_lock(&de->d_lock);
350 if (usbfs_positive(de)) { 351 if (usbfs_positive(de)) {
352 spin_unlock(&de->d_lock);
351 spin_unlock(&dcache_lock); 353 spin_unlock(&dcache_lock);
352 return 0; 354 return 0;
353 } 355 }
356 spin_unlock(&de->d_lock);
354 } 357 }
355 358
356 spin_unlock(&dcache_lock); 359 spin_unlock(&dcache_lock);
357 return 1; 360 return 1;
358 } 361 }
359 362
360 static int usbfs_unlink (struct inode *dir, struct dentry *dentry) 363 static int usbfs_unlink (struct inode *dir, struct dentry *dentry)
361 { 364 {
362 struct inode *inode = dentry->d_inode; 365 struct inode *inode = dentry->d_inode;
363 mutex_lock(&inode->i_mutex); 366 mutex_lock(&inode->i_mutex);
364 drop_nlink(dentry->d_inode); 367 drop_nlink(dentry->d_inode);
365 dput(dentry); 368 dput(dentry);
366 mutex_unlock(&inode->i_mutex); 369 mutex_unlock(&inode->i_mutex);
367 d_delete(dentry); 370 d_delete(dentry);
368 return 0; 371 return 0;
369 } 372 }
370 373
371 static int usbfs_rmdir(struct inode *dir, struct dentry *dentry) 374 static int usbfs_rmdir(struct inode *dir, struct dentry *dentry)
372 { 375 {
373 int error = -ENOTEMPTY; 376 int error = -ENOTEMPTY;
374 struct inode * inode = dentry->d_inode; 377 struct inode * inode = dentry->d_inode;
375 378
376 mutex_lock(&inode->i_mutex); 379 mutex_lock(&inode->i_mutex);
377 dentry_unhash(dentry); 380 dentry_unhash(dentry);
378 if (usbfs_empty(dentry)) { 381 if (usbfs_empty(dentry)) {
379 dont_mount(dentry); 382 dont_mount(dentry);
380 drop_nlink(dentry->d_inode); 383 drop_nlink(dentry->d_inode);
381 drop_nlink(dentry->d_inode); 384 drop_nlink(dentry->d_inode);
382 dput(dentry); 385 dput(dentry);
383 inode->i_flags |= S_DEAD; 386 inode->i_flags |= S_DEAD;
384 drop_nlink(dir); 387 drop_nlink(dir);
385 error = 0; 388 error = 0;
386 } 389 }
387 mutex_unlock(&inode->i_mutex); 390 mutex_unlock(&inode->i_mutex);
388 if (!error) 391 if (!error)
389 d_delete(dentry); 392 d_delete(dentry);
390 dput(dentry); 393 dput(dentry);
391 return error; 394 return error;
392 } 395 }
393 396
394 397
395 /* default file operations */ 398 /* default file operations */
396 static ssize_t default_read_file (struct file *file, char __user *buf, 399 static ssize_t default_read_file (struct file *file, char __user *buf,
397 size_t count, loff_t *ppos) 400 size_t count, loff_t *ppos)
398 { 401 {
399 return 0; 402 return 0;
400 } 403 }
401 404
402 static ssize_t default_write_file (struct file *file, const char __user *buf, 405 static ssize_t default_write_file (struct file *file, const char __user *buf,
403 size_t count, loff_t *ppos) 406 size_t count, loff_t *ppos)
404 { 407 {
405 return count; 408 return count;
406 } 409 }
407 410
408 static loff_t default_file_lseek (struct file *file, loff_t offset, int orig) 411 static loff_t default_file_lseek (struct file *file, loff_t offset, int orig)
409 { 412 {
410 loff_t retval = -EINVAL; 413 loff_t retval = -EINVAL;
411 414
412 mutex_lock(&file->f_path.dentry->d_inode->i_mutex); 415 mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
413 switch(orig) { 416 switch(orig) {
414 case 0: 417 case 0:
415 if (offset > 0) { 418 if (offset > 0) {
416 file->f_pos = offset; 419 file->f_pos = offset;
417 retval = file->f_pos; 420 retval = file->f_pos;
418 } 421 }
419 break; 422 break;
420 case 1: 423 case 1:
421 if ((offset + file->f_pos) > 0) { 424 if ((offset + file->f_pos) > 0) {
422 file->f_pos += offset; 425 file->f_pos += offset;
423 retval = file->f_pos; 426 retval = file->f_pos;
424 } 427 }
425 break; 428 break;
426 default: 429 default:
427 break; 430 break;
428 } 431 }
429 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); 432 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
430 return retval; 433 return retval;
431 } 434 }
432 435
433 static int default_open (struct inode *inode, struct file *file) 436 static int default_open (struct inode *inode, struct file *file)
434 { 437 {
435 if (inode->i_private) 438 if (inode->i_private)
436 file->private_data = inode->i_private; 439 file->private_data = inode->i_private;
437 440
438 return 0; 441 return 0;
439 } 442 }
440 443
441 static const struct file_operations default_file_operations = { 444 static const struct file_operations default_file_operations = {
442 .read = default_read_file, 445 .read = default_read_file,
443 .write = default_write_file, 446 .write = default_write_file,
444 .open = default_open, 447 .open = default_open,
445 .llseek = default_file_lseek, 448 .llseek = default_file_lseek,
446 }; 449 };
447 450
448 static const struct super_operations usbfs_ops = { 451 static const struct super_operations usbfs_ops = {
449 .statfs = simple_statfs, 452 .statfs = simple_statfs,
450 .drop_inode = generic_delete_inode, 453 .drop_inode = generic_delete_inode,
451 .remount_fs = remount, 454 .remount_fs = remount,
452 .show_options = usbfs_show_options, 455 .show_options = usbfs_show_options,
453 }; 456 };
454 457
455 static int usbfs_fill_super(struct super_block *sb, void *data, int silent) 458 static int usbfs_fill_super(struct super_block *sb, void *data, int silent)
456 { 459 {
457 struct inode *inode; 460 struct inode *inode;
458 struct dentry *root; 461 struct dentry *root;
459 462
460 sb->s_blocksize = PAGE_CACHE_SIZE; 463 sb->s_blocksize = PAGE_CACHE_SIZE;
461 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 464 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
462 sb->s_magic = USBDEVICE_SUPER_MAGIC; 465 sb->s_magic = USBDEVICE_SUPER_MAGIC;
463 sb->s_op = &usbfs_ops; 466 sb->s_op = &usbfs_ops;
464 sb->s_time_gran = 1; 467 sb->s_time_gran = 1;
465 inode = usbfs_get_inode(sb, S_IFDIR | 0755, 0); 468 inode = usbfs_get_inode(sb, S_IFDIR | 0755, 0);
466 469
467 if (!inode) { 470 if (!inode) {
468 dbg("%s: could not get inode!",__func__); 471 dbg("%s: could not get inode!",__func__);
469 return -ENOMEM; 472 return -ENOMEM;
470 } 473 }
471 474
472 root = d_alloc_root(inode); 475 root = d_alloc_root(inode);
473 if (!root) { 476 if (!root) {
474 dbg("%s: could not get root dentry!",__func__); 477 dbg("%s: could not get root dentry!",__func__);
475 iput(inode); 478 iput(inode);
476 return -ENOMEM; 479 return -ENOMEM;
477 } 480 }
478 sb->s_root = root; 481 sb->s_root = root;
479 return 0; 482 return 0;
480 } 483 }
481 484
482 /* 485 /*
483 * fs_create_by_name - create a file, given a name 486 * fs_create_by_name - create a file, given a name
484 * @name: name of file 487 * @name: name of file
485 * @mode: type of file 488 * @mode: type of file
486 * @parent: dentry of directory to create it in 489 * @parent: dentry of directory to create it in
487 * @dentry: resulting dentry of file 490 * @dentry: resulting dentry of file
488 * 491 *
489 * This function handles both regular files and directories. 492 * This function handles both regular files and directories.
490 */ 493 */
491 static int fs_create_by_name (const char *name, mode_t mode, 494 static int fs_create_by_name (const char *name, mode_t mode,
492 struct dentry *parent, struct dentry **dentry) 495 struct dentry *parent, struct dentry **dentry)
493 { 496 {
494 int error = 0; 497 int error = 0;
495 498
496 /* If the parent is not specified, we create it in the root. 499 /* If the parent is not specified, we create it in the root.
497 * We need the root dentry to do this, which is in the super 500 * We need the root dentry to do this, which is in the super
498 * block. A pointer to that is in the struct vfsmount that we 501 * block. A pointer to that is in the struct vfsmount that we
499 * have around. 502 * have around.
500 */ 503 */
501 if (!parent ) { 504 if (!parent ) {
502 if (usbfs_mount && usbfs_mount->mnt_sb) { 505 if (usbfs_mount && usbfs_mount->mnt_sb) {
503 parent = usbfs_mount->mnt_sb->s_root; 506 parent = usbfs_mount->mnt_sb->s_root;
504 } 507 }
505 } 508 }
506 509
507 if (!parent) { 510 if (!parent) {
508 dbg("Ah! can not find a parent!"); 511 dbg("Ah! can not find a parent!");
509 return -EFAULT; 512 return -EFAULT;
510 } 513 }
511 514
512 *dentry = NULL; 515 *dentry = NULL;
513 mutex_lock(&parent->d_inode->i_mutex); 516 mutex_lock(&parent->d_inode->i_mutex);
514 *dentry = lookup_one_len(name, parent, strlen(name)); 517 *dentry = lookup_one_len(name, parent, strlen(name));
515 if (!IS_ERR(*dentry)) { 518 if (!IS_ERR(*dentry)) {
516 if ((mode & S_IFMT) == S_IFDIR) 519 if ((mode & S_IFMT) == S_IFDIR)
517 error = usbfs_mkdir (parent->d_inode, *dentry, mode); 520 error = usbfs_mkdir (parent->d_inode, *dentry, mode);
518 else 521 else
519 error = usbfs_create (parent->d_inode, *dentry, mode); 522 error = usbfs_create (parent->d_inode, *dentry, mode);
520 } else 523 } else
521 error = PTR_ERR(*dentry); 524 error = PTR_ERR(*dentry);
522 mutex_unlock(&parent->d_inode->i_mutex); 525 mutex_unlock(&parent->d_inode->i_mutex);
523 526
524 return error; 527 return error;
525 } 528 }
526 529
527 static struct dentry *fs_create_file (const char *name, mode_t mode, 530 static struct dentry *fs_create_file (const char *name, mode_t mode,
528 struct dentry *parent, void *data, 531 struct dentry *parent, void *data,
529 const struct file_operations *fops, 532 const struct file_operations *fops,
530 uid_t uid, gid_t gid) 533 uid_t uid, gid_t gid)
531 { 534 {
532 struct dentry *dentry; 535 struct dentry *dentry;
533 int error; 536 int error;
534 537
535 dbg("creating file '%s'",name); 538 dbg("creating file '%s'",name);
536 539
537 error = fs_create_by_name (name, mode, parent, &dentry); 540 error = fs_create_by_name (name, mode, parent, &dentry);
538 if (error) { 541 if (error) {
539 dentry = NULL; 542 dentry = NULL;
540 } else { 543 } else {
541 if (dentry->d_inode) { 544 if (dentry->d_inode) {
542 if (data) 545 if (data)
543 dentry->d_inode->i_private = data; 546 dentry->d_inode->i_private = data;
544 if (fops) 547 if (fops)
545 dentry->d_inode->i_fop = fops; 548 dentry->d_inode->i_fop = fops;
546 dentry->d_inode->i_uid = uid; 549 dentry->d_inode->i_uid = uid;
547 dentry->d_inode->i_gid = gid; 550 dentry->d_inode->i_gid = gid;
548 } 551 }
549 } 552 }
550 553
551 return dentry; 554 return dentry;
552 } 555 }
553 556
554 static void fs_remove_file (struct dentry *dentry) 557 static void fs_remove_file (struct dentry *dentry)
555 { 558 {
556 struct dentry *parent = dentry->d_parent; 559 struct dentry *parent = dentry->d_parent;
557 560
558 if (!parent || !parent->d_inode) 561 if (!parent || !parent->d_inode)
559 return; 562 return;
560 563
561 mutex_lock_nested(&parent->d_inode->i_mutex, I_MUTEX_PARENT); 564 mutex_lock_nested(&parent->d_inode->i_mutex, I_MUTEX_PARENT);
562 if (usbfs_positive(dentry)) { 565 if (usbfs_positive(dentry)) {
563 if (dentry->d_inode) { 566 if (dentry->d_inode) {
564 if (S_ISDIR(dentry->d_inode->i_mode)) 567 if (S_ISDIR(dentry->d_inode->i_mode))
565 usbfs_rmdir(parent->d_inode, dentry); 568 usbfs_rmdir(parent->d_inode, dentry);
566 else 569 else
567 usbfs_unlink(parent->d_inode, dentry); 570 usbfs_unlink(parent->d_inode, dentry);
568 dput(dentry); 571 dput(dentry);
569 } 572 }
570 } 573 }
571 mutex_unlock(&parent->d_inode->i_mutex); 574 mutex_unlock(&parent->d_inode->i_mutex);
572 } 575 }
573 576
574 /* --------------------------------------------------------------------- */ 577 /* --------------------------------------------------------------------- */
575 578
576 static struct dentry *usb_mount(struct file_system_type *fs_type, 579 static struct dentry *usb_mount(struct file_system_type *fs_type,
577 int flags, const char *dev_name, void *data) 580 int flags, const char *dev_name, void *data)
578 { 581 {
579 return mount_single(fs_type, flags, data, usbfs_fill_super); 582 return mount_single(fs_type, flags, data, usbfs_fill_super);
580 } 583 }
581 584
582 static struct file_system_type usb_fs_type = { 585 static struct file_system_type usb_fs_type = {
583 .owner = THIS_MODULE, 586 .owner = THIS_MODULE,
584 .name = "usbfs", 587 .name = "usbfs",
585 .mount = usb_mount, 588 .mount = usb_mount,
586 .kill_sb = kill_litter_super, 589 .kill_sb = kill_litter_super,
587 }; 590 };
588 591
589 /* --------------------------------------------------------------------- */ 592 /* --------------------------------------------------------------------- */
590 593
591 static int create_special_files (void) 594 static int create_special_files (void)
592 { 595 {
593 struct dentry *parent; 596 struct dentry *parent;
594 int retval; 597 int retval;
595 598
596 /* the simple_pin_fs calls will call remount with no options 599 /* the simple_pin_fs calls will call remount with no options
597 * without this flag that would overwrite the real mount options (if any) 600 * without this flag that would overwrite the real mount options (if any)
598 */ 601 */
599 ignore_mount = 1; 602 ignore_mount = 1;
600 603
601 /* create the devices special file */ 604 /* create the devices special file */
602 retval = simple_pin_fs(&usb_fs_type, &usbfs_mount, &usbfs_mount_count); 605 retval = simple_pin_fs(&usb_fs_type, &usbfs_mount, &usbfs_mount_count);
603 if (retval) { 606 if (retval) {
604 printk(KERN_ERR "Unable to get usbfs mount\n"); 607 printk(KERN_ERR "Unable to get usbfs mount\n");
605 goto exit; 608 goto exit;
606 } 609 }
607 610
608 ignore_mount = 0; 611 ignore_mount = 0;
609 612
610 parent = usbfs_mount->mnt_sb->s_root; 613 parent = usbfs_mount->mnt_sb->s_root;
611 devices_usbfs_dentry = fs_create_file ("devices", 614 devices_usbfs_dentry = fs_create_file ("devices",
612 listmode | S_IFREG, parent, 615 listmode | S_IFREG, parent,
613 NULL, &usbfs_devices_fops, 616 NULL, &usbfs_devices_fops,
614 listuid, listgid); 617 listuid, listgid);
615 if (devices_usbfs_dentry == NULL) { 618 if (devices_usbfs_dentry == NULL) {
616 printk(KERN_ERR "Unable to create devices usbfs file\n"); 619 printk(KERN_ERR "Unable to create devices usbfs file\n");
617 retval = -ENODEV; 620 retval = -ENODEV;
618 goto error_clean_mounts; 621 goto error_clean_mounts;
619 } 622 }
620 623
621 goto exit; 624 goto exit;
622 625
623 error_clean_mounts: 626 error_clean_mounts:
624 simple_release_fs(&usbfs_mount, &usbfs_mount_count); 627 simple_release_fs(&usbfs_mount, &usbfs_mount_count);
625 exit: 628 exit:
626 return retval; 629 return retval;
627 } 630 }
628 631
629 static void remove_special_files (void) 632 static void remove_special_files (void)
630 { 633 {
631 if (devices_usbfs_dentry) 634 if (devices_usbfs_dentry)
632 fs_remove_file (devices_usbfs_dentry); 635 fs_remove_file (devices_usbfs_dentry);
633 devices_usbfs_dentry = NULL; 636 devices_usbfs_dentry = NULL;
634 simple_release_fs(&usbfs_mount, &usbfs_mount_count); 637 simple_release_fs(&usbfs_mount, &usbfs_mount_count);
635 } 638 }
636 639
637 void usbfs_update_special (void) 640 void usbfs_update_special (void)
638 { 641 {
639 struct inode *inode; 642 struct inode *inode;
640 643
641 if (devices_usbfs_dentry) { 644 if (devices_usbfs_dentry) {
642 inode = devices_usbfs_dentry->d_inode; 645 inode = devices_usbfs_dentry->d_inode;
643 if (inode) 646 if (inode)
644 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 647 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
645 } 648 }
646 } 649 }
647 650
648 static void usbfs_add_bus(struct usb_bus *bus) 651 static void usbfs_add_bus(struct usb_bus *bus)
649 { 652 {
650 struct dentry *parent; 653 struct dentry *parent;
651 char name[8]; 654 char name[8];
652 int retval; 655 int retval;
653 656
654 /* create the special files if this is the first bus added */ 657 /* create the special files if this is the first bus added */
655 if (num_buses == 0) { 658 if (num_buses == 0) {
656 retval = create_special_files(); 659 retval = create_special_files();
657 if (retval) 660 if (retval)
658 return; 661 return;
659 } 662 }
660 ++num_buses; 663 ++num_buses;
661 664
662 sprintf (name, "%03d", bus->busnum); 665 sprintf (name, "%03d", bus->busnum);
663 666
664 parent = usbfs_mount->mnt_sb->s_root; 667 parent = usbfs_mount->mnt_sb->s_root;
665 bus->usbfs_dentry = fs_create_file (name, busmode | S_IFDIR, parent, 668 bus->usbfs_dentry = fs_create_file (name, busmode | S_IFDIR, parent,
666 bus, NULL, busuid, busgid); 669 bus, NULL, busuid, busgid);
667 if (bus->usbfs_dentry == NULL) { 670 if (bus->usbfs_dentry == NULL) {
668 printk(KERN_ERR "Error creating usbfs bus entry\n"); 671 printk(KERN_ERR "Error creating usbfs bus entry\n");
669 return; 672 return;
670 } 673 }
671 } 674 }
672 675
673 static void usbfs_remove_bus(struct usb_bus *bus) 676 static void usbfs_remove_bus(struct usb_bus *bus)
674 { 677 {
675 if (bus->usbfs_dentry) { 678 if (bus->usbfs_dentry) {
676 fs_remove_file (bus->usbfs_dentry); 679 fs_remove_file (bus->usbfs_dentry);
677 bus->usbfs_dentry = NULL; 680 bus->usbfs_dentry = NULL;
678 } 681 }
679 682
680 --num_buses; 683 --num_buses;
681 if (num_buses <= 0) { 684 if (num_buses <= 0) {
682 remove_special_files(); 685 remove_special_files();
683 num_buses = 0; 686 num_buses = 0;
684 } 687 }
685 } 688 }
686 689
687 static void usbfs_add_device(struct usb_device *dev) 690 static void usbfs_add_device(struct usb_device *dev)
688 { 691 {
689 char name[8]; 692 char name[8];
690 int i; 693 int i;
691 int i_size; 694 int i_size;
692 695
693 sprintf (name, "%03d", dev->devnum); 696 sprintf (name, "%03d", dev->devnum);
694 dev->usbfs_dentry = fs_create_file (name, devmode | S_IFREG, 697 dev->usbfs_dentry = fs_create_file (name, devmode | S_IFREG,
695 dev->bus->usbfs_dentry, dev, 698 dev->bus->usbfs_dentry, dev,
696 &usbdev_file_operations, 699 &usbdev_file_operations,
697 devuid, devgid); 700 devuid, devgid);
698 if (dev->usbfs_dentry == NULL) { 701 if (dev->usbfs_dentry == NULL) {
699 printk(KERN_ERR "Error creating usbfs device entry\n"); 702 printk(KERN_ERR "Error creating usbfs device entry\n");
700 return; 703 return;
701 } 704 }
702 705
703 /* Set the size of the device's file to be 706 /* Set the size of the device's file to be
704 * equal to the size of the device descriptors. */ 707 * equal to the size of the device descriptors. */
705 i_size = sizeof (struct usb_device_descriptor); 708 i_size = sizeof (struct usb_device_descriptor);
706 for (i = 0; i < dev->descriptor.bNumConfigurations; ++i) { 709 for (i = 0; i < dev->descriptor.bNumConfigurations; ++i) {
707 struct usb_config_descriptor *config = 710 struct usb_config_descriptor *config =
708 (struct usb_config_descriptor *)dev->rawdescriptors[i]; 711 (struct usb_config_descriptor *)dev->rawdescriptors[i];
709 i_size += le16_to_cpu(config->wTotalLength); 712 i_size += le16_to_cpu(config->wTotalLength);
710 } 713 }
711 if (dev->usbfs_dentry->d_inode) 714 if (dev->usbfs_dentry->d_inode)
712 dev->usbfs_dentry->d_inode->i_size = i_size; 715 dev->usbfs_dentry->d_inode->i_size = i_size;
713 } 716 }
714 717
715 static void usbfs_remove_device(struct usb_device *dev) 718 static void usbfs_remove_device(struct usb_device *dev)
716 { 719 {
717 if (dev->usbfs_dentry) { 720 if (dev->usbfs_dentry) {
718 fs_remove_file (dev->usbfs_dentry); 721 fs_remove_file (dev->usbfs_dentry);
719 dev->usbfs_dentry = NULL; 722 dev->usbfs_dentry = NULL;
720 } 723 }
721 } 724 }
722 725
723 static int usbfs_notify(struct notifier_block *self, unsigned long action, void *dev) 726 static int usbfs_notify(struct notifier_block *self, unsigned long action, void *dev)
724 { 727 {
725 switch (action) { 728 switch (action) {
726 case USB_DEVICE_ADD: 729 case USB_DEVICE_ADD:
727 usbfs_add_device(dev); 730 usbfs_add_device(dev);
728 break; 731 break;
729 case USB_DEVICE_REMOVE: 732 case USB_DEVICE_REMOVE:
730 usbfs_remove_device(dev); 733 usbfs_remove_device(dev);
731 break; 734 break;
732 case USB_BUS_ADD: 735 case USB_BUS_ADD:
733 usbfs_add_bus(dev); 736 usbfs_add_bus(dev);
734 break; 737 break;
735 case USB_BUS_REMOVE: 738 case USB_BUS_REMOVE:
736 usbfs_remove_bus(dev); 739 usbfs_remove_bus(dev);
737 } 740 }
738 741
739 usbfs_update_special(); 742 usbfs_update_special();
740 usbfs_conn_disc_event(); 743 usbfs_conn_disc_event();
741 return NOTIFY_OK; 744 return NOTIFY_OK;
742 } 745 }
743 746
744 static struct notifier_block usbfs_nb = { 747 static struct notifier_block usbfs_nb = {
745 .notifier_call = usbfs_notify, 748 .notifier_call = usbfs_notify,
746 }; 749 };
747 750
748 /* --------------------------------------------------------------------- */ 751 /* --------------------------------------------------------------------- */
749 752
750 static struct proc_dir_entry *usbdir = NULL; 753 static struct proc_dir_entry *usbdir = NULL;
751 754
752 int __init usbfs_init(void) 755 int __init usbfs_init(void)
753 { 756 {
754 int retval; 757 int retval;
755 758
756 retval = register_filesystem(&usb_fs_type); 759 retval = register_filesystem(&usb_fs_type);
757 if (retval) 760 if (retval)
758 return retval; 761 return retval;
759 762
760 usb_register_notify(&usbfs_nb); 763 usb_register_notify(&usbfs_nb);
761 764
762 /* create mount point for usbfs */ 765 /* create mount point for usbfs */
763 usbdir = proc_mkdir("bus/usb", NULL); 766 usbdir = proc_mkdir("bus/usb", NULL);
764 767
765 return 0; 768 return 0;
766 } 769 }
767 770
768 void usbfs_cleanup(void) 771 void usbfs_cleanup(void)
769 { 772 {
770 usb_unregister_notify(&usbfs_nb); 773 usb_unregister_notify(&usbfs_nb);
771 unregister_filesystem(&usb_fs_type); 774 unregister_filesystem(&usb_fs_type);
772 if (usbdir) 775 if (usbdir)
773 remove_proc_entry("bus/usb", NULL); 776 remove_proc_entry("bus/usb", NULL);
774 } 777 }
775 778
776 779
fs/autofs4/autofs_i.h
1 /* -*- c -*- ------------------------------------------------------------- * 1 /* -*- c -*- ------------------------------------------------------------- *
2 * 2 *
3 * linux/fs/autofs/autofs_i.h 3 * linux/fs/autofs/autofs_i.h
4 * 4 *
5 * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved 5 * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved
6 * Copyright 2005-2006 Ian Kent <raven@themaw.net> 6 * Copyright 2005-2006 Ian Kent <raven@themaw.net>
7 * 7 *
8 * This file is part of the Linux kernel and is made available under 8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your 9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference. 10 * option, any later version, incorporated herein by reference.
11 * 11 *
12 * ----------------------------------------------------------------------- */ 12 * ----------------------------------------------------------------------- */
13 13
14 /* Internal header file for autofs */ 14 /* Internal header file for autofs */
15 15
16 #include <linux/auto_fs4.h> 16 #include <linux/auto_fs4.h>
17 #include <linux/auto_dev-ioctl.h> 17 #include <linux/auto_dev-ioctl.h>
18 #include <linux/mutex.h> 18 #include <linux/mutex.h>
19 #include <linux/list.h> 19 #include <linux/list.h>
20 20
21 /* This is the range of ioctl() numbers we claim as ours */ 21 /* This is the range of ioctl() numbers we claim as ours */
22 #define AUTOFS_IOC_FIRST AUTOFS_IOC_READY 22 #define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
23 #define AUTOFS_IOC_COUNT 32 23 #define AUTOFS_IOC_COUNT 32
24 24
25 #define AUTOFS_DEV_IOCTL_IOC_FIRST (AUTOFS_DEV_IOCTL_VERSION) 25 #define AUTOFS_DEV_IOCTL_IOC_FIRST (AUTOFS_DEV_IOCTL_VERSION)
26 #define AUTOFS_DEV_IOCTL_IOC_COUNT (AUTOFS_IOC_COUNT - 11) 26 #define AUTOFS_DEV_IOCTL_IOC_COUNT (AUTOFS_IOC_COUNT - 11)
27 27
28 #include <linux/kernel.h> 28 #include <linux/kernel.h>
29 #include <linux/slab.h> 29 #include <linux/slab.h>
30 #include <linux/time.h> 30 #include <linux/time.h>
31 #include <linux/string.h> 31 #include <linux/string.h>
32 #include <linux/wait.h> 32 #include <linux/wait.h>
33 #include <linux/sched.h> 33 #include <linux/sched.h>
34 #include <linux/mount.h> 34 #include <linux/mount.h>
35 #include <linux/namei.h> 35 #include <linux/namei.h>
36 #include <asm/current.h> 36 #include <asm/current.h>
37 #include <asm/uaccess.h> 37 #include <asm/uaccess.h>
38 38
39 /* #define DEBUG */ 39 /* #define DEBUG */
40 40
41 #ifdef DEBUG 41 #ifdef DEBUG
42 #define DPRINTK(fmt, args...) \ 42 #define DPRINTK(fmt, args...) \
43 do { \ 43 do { \
44 printk(KERN_DEBUG "pid %d: %s: " fmt "\n", \ 44 printk(KERN_DEBUG "pid %d: %s: " fmt "\n", \
45 current->pid, __func__, ##args); \ 45 current->pid, __func__, ##args); \
46 } while (0) 46 } while (0)
47 #else 47 #else
48 #define DPRINTK(fmt, args...) do {} while (0) 48 #define DPRINTK(fmt, args...) do {} while (0)
49 #endif 49 #endif
50 50
51 #define AUTOFS_WARN(fmt, args...) \ 51 #define AUTOFS_WARN(fmt, args...) \
52 do { \ 52 do { \
53 printk(KERN_WARNING "pid %d: %s: " fmt "\n", \ 53 printk(KERN_WARNING "pid %d: %s: " fmt "\n", \
54 current->pid, __func__, ##args); \ 54 current->pid, __func__, ##args); \
55 } while (0) 55 } while (0)
56 56
57 #define AUTOFS_ERROR(fmt, args...) \ 57 #define AUTOFS_ERROR(fmt, args...) \
58 do { \ 58 do { \
59 printk(KERN_ERR "pid %d: %s: " fmt "\n", \ 59 printk(KERN_ERR "pid %d: %s: " fmt "\n", \
60 current->pid, __func__, ##args); \ 60 current->pid, __func__, ##args); \
61 } while (0) 61 } while (0)
62 62
63 /* Unified info structure. This is pointed to by both the dentry and 63 /* Unified info structure. This is pointed to by both the dentry and
64 inode structures. Each file in the filesystem has an instance of this 64 inode structures. Each file in the filesystem has an instance of this
65 structure. It holds a reference to the dentry, so dentries are never 65 structure. It holds a reference to the dentry, so dentries are never
66 flushed while the file exists. All name lookups are dealt with at the 66 flushed while the file exists. All name lookups are dealt with at the
67 dentry level, although the filesystem can interfere in the validation 67 dentry level, although the filesystem can interfere in the validation
68 process. Readdir is implemented by traversing the dentry lists. */ 68 process. Readdir is implemented by traversing the dentry lists. */
69 struct autofs_info { 69 struct autofs_info {
70 struct dentry *dentry; 70 struct dentry *dentry;
71 struct inode *inode; 71 struct inode *inode;
72 72
73 int flags; 73 int flags;
74 74
75 struct completion expire_complete; 75 struct completion expire_complete;
76 76
77 struct list_head active; 77 struct list_head active;
78 int active_count; 78 int active_count;
79 79
80 struct list_head expiring; 80 struct list_head expiring;
81 81
82 struct autofs_sb_info *sbi; 82 struct autofs_sb_info *sbi;
83 unsigned long last_used; 83 unsigned long last_used;
84 atomic_t count; 84 atomic_t count;
85 85
86 uid_t uid; 86 uid_t uid;
87 gid_t gid; 87 gid_t gid;
88 88
89 mode_t mode; 89 mode_t mode;
90 size_t size; 90 size_t size;
91 91
92 void (*free)(struct autofs_info *); 92 void (*free)(struct autofs_info *);
93 union { 93 union {
94 const char *symlink; 94 const char *symlink;
95 } u; 95 } u;
96 }; 96 };
97 97
98 #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ 98 #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
99 #define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */ 99 #define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */
100 #define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */ 100 #define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */
101 101
102 struct autofs_wait_queue { 102 struct autofs_wait_queue {
103 wait_queue_head_t queue; 103 wait_queue_head_t queue;
104 struct autofs_wait_queue *next; 104 struct autofs_wait_queue *next;
105 autofs_wqt_t wait_queue_token; 105 autofs_wqt_t wait_queue_token;
106 /* We use the following to see what we are waiting for */ 106 /* We use the following to see what we are waiting for */
107 struct qstr name; 107 struct qstr name;
108 u32 dev; 108 u32 dev;
109 u64 ino; 109 u64 ino;
110 uid_t uid; 110 uid_t uid;
111 gid_t gid; 111 gid_t gid;
112 pid_t pid; 112 pid_t pid;
113 pid_t tgid; 113 pid_t tgid;
114 /* This is for status reporting upon return */ 114 /* This is for status reporting upon return */
115 int status; 115 int status;
116 unsigned int wait_ctr; 116 unsigned int wait_ctr;
117 }; 117 };
118 118
119 #define AUTOFS_SBI_MAGIC 0x6d4a556d 119 #define AUTOFS_SBI_MAGIC 0x6d4a556d
120 120
121 struct autofs_sb_info { 121 struct autofs_sb_info {
122 u32 magic; 122 u32 magic;
123 int pipefd; 123 int pipefd;
124 struct file *pipe; 124 struct file *pipe;
125 pid_t oz_pgrp; 125 pid_t oz_pgrp;
126 int catatonic; 126 int catatonic;
127 int version; 127 int version;
128 int sub_version; 128 int sub_version;
129 int min_proto; 129 int min_proto;
130 int max_proto; 130 int max_proto;
131 unsigned long exp_timeout; 131 unsigned long exp_timeout;
132 unsigned int type; 132 unsigned int type;
133 int reghost_enabled; 133 int reghost_enabled;
134 int needs_reghost; 134 int needs_reghost;
135 struct super_block *sb; 135 struct super_block *sb;
136 struct mutex wq_mutex; 136 struct mutex wq_mutex;
137 spinlock_t fs_lock; 137 spinlock_t fs_lock;
138 struct autofs_wait_queue *queues; /* Wait queue pointer */ 138 struct autofs_wait_queue *queues; /* Wait queue pointer */
139 spinlock_t lookup_lock; 139 spinlock_t lookup_lock;
140 struct list_head active_list; 140 struct list_head active_list;
141 struct list_head expiring_list; 141 struct list_head expiring_list;
142 }; 142 };
143 143
144 static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb) 144 static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
145 { 145 {
146 return (struct autofs_sb_info *)(sb->s_fs_info); 146 return (struct autofs_sb_info *)(sb->s_fs_info);
147 } 147 }
148 148
149 static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry) 149 static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry)
150 { 150 {
151 return (struct autofs_info *)(dentry->d_fsdata); 151 return (struct autofs_info *)(dentry->d_fsdata);
152 } 152 }
153 153
154 /* autofs4_oz_mode(): do we see the man behind the curtain? (The 154 /* autofs4_oz_mode(): do we see the man behind the curtain? (The
155 processes which do manipulations for us in user space sees the raw 155 processes which do manipulations for us in user space sees the raw
156 filesystem without "magic".) */ 156 filesystem without "magic".) */
157 157
158 static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) { 158 static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
159 return sbi->catatonic || task_pgrp_nr(current) == sbi->oz_pgrp; 159 return sbi->catatonic || task_pgrp_nr(current) == sbi->oz_pgrp;
160 } 160 }
161 161
162 /* Does a dentry have some pending activity? */ 162 /* Does a dentry have some pending activity? */
163 static inline int autofs4_ispending(struct dentry *dentry) 163 static inline int autofs4_ispending(struct dentry *dentry)
164 { 164 {
165 struct autofs_info *inf = autofs4_dentry_ino(dentry); 165 struct autofs_info *inf = autofs4_dentry_ino(dentry);
166 166
167 if (inf->flags & AUTOFS_INF_PENDING) 167 if (inf->flags & AUTOFS_INF_PENDING)
168 return 1; 168 return 1;
169 169
170 if (inf->flags & AUTOFS_INF_EXPIRING) 170 if (inf->flags & AUTOFS_INF_EXPIRING)
171 return 1; 171 return 1;
172 172
173 return 0; 173 return 0;
174 } 174 }
175 175
176 static inline void autofs4_copy_atime(struct file *src, struct file *dst) 176 static inline void autofs4_copy_atime(struct file *src, struct file *dst)
177 { 177 {
178 dst->f_path.dentry->d_inode->i_atime = 178 dst->f_path.dentry->d_inode->i_atime =
179 src->f_path.dentry->d_inode->i_atime; 179 src->f_path.dentry->d_inode->i_atime;
180 return; 180 return;
181 } 181 }
182 182
183 struct inode *autofs4_get_inode(struct super_block *, struct autofs_info *); 183 struct inode *autofs4_get_inode(struct super_block *, struct autofs_info *);
184 void autofs4_free_ino(struct autofs_info *); 184 void autofs4_free_ino(struct autofs_info *);
185 185
186 /* Expiration */ 186 /* Expiration */
187 int is_autofs4_dentry(struct dentry *); 187 int is_autofs4_dentry(struct dentry *);
188 int autofs4_expire_wait(struct dentry *dentry); 188 int autofs4_expire_wait(struct dentry *dentry);
189 int autofs4_expire_run(struct super_block *, struct vfsmount *, 189 int autofs4_expire_run(struct super_block *, struct vfsmount *,
190 struct autofs_sb_info *, 190 struct autofs_sb_info *,
191 struct autofs_packet_expire __user *); 191 struct autofs_packet_expire __user *);
192 int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, 192 int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
193 struct autofs_sb_info *sbi, int when); 193 struct autofs_sb_info *sbi, int when);
194 int autofs4_expire_multi(struct super_block *, struct vfsmount *, 194 int autofs4_expire_multi(struct super_block *, struct vfsmount *,
195 struct autofs_sb_info *, int __user *); 195 struct autofs_sb_info *, int __user *);
196 struct dentry *autofs4_expire_direct(struct super_block *sb, 196 struct dentry *autofs4_expire_direct(struct super_block *sb,
197 struct vfsmount *mnt, 197 struct vfsmount *mnt,
198 struct autofs_sb_info *sbi, int how); 198 struct autofs_sb_info *sbi, int how);
199 struct dentry *autofs4_expire_indirect(struct super_block *sb, 199 struct dentry *autofs4_expire_indirect(struct super_block *sb,
200 struct vfsmount *mnt, 200 struct vfsmount *mnt,
201 struct autofs_sb_info *sbi, int how); 201 struct autofs_sb_info *sbi, int how);
202 202
203 /* Device node initialization */ 203 /* Device node initialization */
204 204
205 int autofs_dev_ioctl_init(void); 205 int autofs_dev_ioctl_init(void);
206 void autofs_dev_ioctl_exit(void); 206 void autofs_dev_ioctl_exit(void);
207 207
208 /* Operations structures */ 208 /* Operations structures */
209 209
210 extern const struct inode_operations autofs4_symlink_inode_operations; 210 extern const struct inode_operations autofs4_symlink_inode_operations;
211 extern const struct inode_operations autofs4_dir_inode_operations; 211 extern const struct inode_operations autofs4_dir_inode_operations;
212 extern const struct inode_operations autofs4_root_inode_operations; 212 extern const struct inode_operations autofs4_root_inode_operations;
213 extern const struct inode_operations autofs4_indirect_root_inode_operations; 213 extern const struct inode_operations autofs4_indirect_root_inode_operations;
214 extern const struct inode_operations autofs4_direct_root_inode_operations; 214 extern const struct inode_operations autofs4_direct_root_inode_operations;
215 extern const struct file_operations autofs4_dir_operations; 215 extern const struct file_operations autofs4_dir_operations;
216 extern const struct file_operations autofs4_root_operations; 216 extern const struct file_operations autofs4_root_operations;
217 217
218 /* Initializing function */ 218 /* Initializing function */
219 219
220 int autofs4_fill_super(struct super_block *, void *, int); 220 int autofs4_fill_super(struct super_block *, void *, int);
221 struct autofs_info *autofs4_init_ino(struct autofs_info *, struct autofs_sb_info *sbi, mode_t mode); 221 struct autofs_info *autofs4_init_ino(struct autofs_info *, struct autofs_sb_info *sbi, mode_t mode);
222 222
223 /* Queue management functions */ 223 /* Queue management functions */
224 224
225 int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); 225 int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
226 int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); 226 int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
227 void autofs4_catatonic_mode(struct autofs_sb_info *); 227 void autofs4_catatonic_mode(struct autofs_sb_info *);
228 228
229 static inline int autofs4_follow_mount(struct path *path) 229 static inline int autofs4_follow_mount(struct path *path)
230 { 230 {
231 int res = 0; 231 int res = 0;
232 232
233 while (d_mountpoint(path->dentry)) { 233 while (d_mountpoint(path->dentry)) {
234 int followed = follow_down(path); 234 int followed = follow_down(path);
235 if (!followed) 235 if (!followed)
236 break; 236 break;
237 res = 1; 237 res = 1;
238 } 238 }
239 return res; 239 return res;
240 } 240 }
241 241
242 static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi) 242 static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi)
243 { 243 {
244 return new_encode_dev(sbi->sb->s_dev); 244 return new_encode_dev(sbi->sb->s_dev);
245 } 245 }
246 246
247 static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi) 247 static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi)
248 { 248 {
249 return sbi->sb->s_root->d_inode->i_ino; 249 return sbi->sb->s_root->d_inode->i_ino;
250 } 250 }
251 251
252 static inline int simple_positive(struct dentry *dentry) 252 static inline int simple_positive(struct dentry *dentry)
253 { 253 {
254 return dentry->d_inode && !d_unhashed(dentry); 254 return dentry->d_inode && !d_unhashed(dentry);
255 } 255 }
256 256
257 static inline int __simple_empty(struct dentry *dentry)
258 {
259 struct dentry *child;
260 int ret = 0;
261
262 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
263 if (simple_positive(child))
264 goto out;
265 ret = 1;
266 out:
267 return ret;
268 }
269
270 static inline void autofs4_add_expiring(struct dentry *dentry) 257 static inline void autofs4_add_expiring(struct dentry *dentry)
271 { 258 {
272 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 259 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
273 struct autofs_info *ino = autofs4_dentry_ino(dentry); 260 struct autofs_info *ino = autofs4_dentry_ino(dentry);
274 if (ino) { 261 if (ino) {
275 spin_lock(&sbi->lookup_lock); 262 spin_lock(&sbi->lookup_lock);
276 if (list_empty(&ino->expiring)) 263 if (list_empty(&ino->expiring))
277 list_add(&ino->expiring, &sbi->expiring_list); 264 list_add(&ino->expiring, &sbi->expiring_list);
278 spin_unlock(&sbi->lookup_lock); 265 spin_unlock(&sbi->lookup_lock);
279 } 266 }
280 return; 267 return;
281 } 268 }
282 269
283 static inline void autofs4_del_expiring(struct dentry *dentry) 270 static inline void autofs4_del_expiring(struct dentry *dentry)
284 { 271 {
285 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 272 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
286 struct autofs_info *ino = autofs4_dentry_ino(dentry); 273 struct autofs_info *ino = autofs4_dentry_ino(dentry);
287 if (ino) { 274 if (ino) {
288 spin_lock(&sbi->lookup_lock); 275 spin_lock(&sbi->lookup_lock);
289 if (!list_empty(&ino->expiring)) 276 if (!list_empty(&ino->expiring))
290 list_del_init(&ino->expiring); 277 list_del_init(&ino->expiring);
291 spin_unlock(&sbi->lookup_lock); 278 spin_unlock(&sbi->lookup_lock);
292 } 279 }
293 return; 280 return;
294 } 281 }
295 282
296 void autofs4_dentry_release(struct dentry *); 283 void autofs4_dentry_release(struct dentry *);
297 extern void autofs4_kill_sb(struct super_block *); 284 extern void autofs4_kill_sb(struct super_block *);
298 285
1 /* -*- c -*- --------------------------------------------------------------- * 1 /* -*- c -*- --------------------------------------------------------------- *
2 * 2 *
3 * linux/fs/autofs/expire.c 3 * linux/fs/autofs/expire.c
4 * 4 *
5 * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved 5 * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
6 * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> 6 * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
7 * Copyright 2001-2006 Ian Kent <raven@themaw.net> 7 * Copyright 2001-2006 Ian Kent <raven@themaw.net>
8 * 8 *
9 * This file is part of the Linux kernel and is made available under 9 * This file is part of the Linux kernel and is made available under
10 * the terms of the GNU General Public License, version 2, or at your 10 * the terms of the GNU General Public License, version 2, or at your
11 * option, any later version, incorporated herein by reference. 11 * option, any later version, incorporated herein by reference.
12 * 12 *
13 * ------------------------------------------------------------------------- */ 13 * ------------------------------------------------------------------------- */
14 14
15 #include "autofs_i.h" 15 #include "autofs_i.h"
16 16
17 static unsigned long now; 17 static unsigned long now;
18 18
19 /* Check if a dentry can be expired */ 19 /* Check if a dentry can be expired */
20 static inline int autofs4_can_expire(struct dentry *dentry, 20 static inline int autofs4_can_expire(struct dentry *dentry,
21 unsigned long timeout, int do_now) 21 unsigned long timeout, int do_now)
22 { 22 {
23 struct autofs_info *ino = autofs4_dentry_ino(dentry); 23 struct autofs_info *ino = autofs4_dentry_ino(dentry);
24 24
25 /* dentry in the process of being deleted */ 25 /* dentry in the process of being deleted */
26 if (ino == NULL) 26 if (ino == NULL)
27 return 0; 27 return 0;
28 28
29 /* No point expiring a pending mount */ 29 /* No point expiring a pending mount */
30 if (ino->flags & AUTOFS_INF_PENDING) 30 if (ino->flags & AUTOFS_INF_PENDING)
31 return 0; 31 return 0;
32 32
33 if (!do_now) { 33 if (!do_now) {
34 /* Too young to die */ 34 /* Too young to die */
35 if (!timeout || time_after(ino->last_used + timeout, now)) 35 if (!timeout || time_after(ino->last_used + timeout, now))
36 return 0; 36 return 0;
37 37
38 /* update last_used here :- 38 /* update last_used here :-
39 - obviously makes sense if it is in use now 39 - obviously makes sense if it is in use now
40 - less obviously, prevents rapid-fire expire 40 - less obviously, prevents rapid-fire expire
41 attempts if expire fails the first time */ 41 attempts if expire fails the first time */
42 ino->last_used = now; 42 ino->last_used = now;
43 } 43 }
44 return 1; 44 return 1;
45 } 45 }
46 46
47 /* Check a mount point for busyness */ 47 /* Check a mount point for busyness */
48 static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) 48 static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
49 { 49 {
50 struct dentry *top = dentry; 50 struct dentry *top = dentry;
51 struct path path = {.mnt = mnt, .dentry = dentry}; 51 struct path path = {.mnt = mnt, .dentry = dentry};
52 int status = 1; 52 int status = 1;
53 53
54 DPRINTK("dentry %p %.*s", 54 DPRINTK("dentry %p %.*s",
55 dentry, (int)dentry->d_name.len, dentry->d_name.name); 55 dentry, (int)dentry->d_name.len, dentry->d_name.name);
56 56
57 path_get(&path); 57 path_get(&path);
58 58
59 if (!follow_down(&path)) 59 if (!follow_down(&path))
60 goto done; 60 goto done;
61 61
62 if (is_autofs4_dentry(path.dentry)) { 62 if (is_autofs4_dentry(path.dentry)) {
63 struct autofs_sb_info *sbi = autofs4_sbi(path.dentry->d_sb); 63 struct autofs_sb_info *sbi = autofs4_sbi(path.dentry->d_sb);
64 64
65 /* This is an autofs submount, we can't expire it */ 65 /* This is an autofs submount, we can't expire it */
66 if (autofs_type_indirect(sbi->type)) 66 if (autofs_type_indirect(sbi->type))
67 goto done; 67 goto done;
68 68
69 /* 69 /*
70 * Otherwise it's an offset mount and we need to check 70 * Otherwise it's an offset mount and we need to check
71 * if we can umount its mount, if there is one. 71 * if we can umount its mount, if there is one.
72 */ 72 */
73 if (!d_mountpoint(path.dentry)) { 73 if (!d_mountpoint(path.dentry)) {
74 status = 0; 74 status = 0;
75 goto done; 75 goto done;
76 } 76 }
77 } 77 }
78 78
79 /* Update the expiry counter if fs is busy */ 79 /* Update the expiry counter if fs is busy */
80 if (!may_umount_tree(path.mnt)) { 80 if (!may_umount_tree(path.mnt)) {
81 struct autofs_info *ino = autofs4_dentry_ino(top); 81 struct autofs_info *ino = autofs4_dentry_ino(top);
82 ino->last_used = jiffies; 82 ino->last_used = jiffies;
83 goto done; 83 goto done;
84 } 84 }
85 85
86 status = 0; 86 status = 0;
87 done: 87 done:
88 DPRINTK("returning = %d", status); 88 DPRINTK("returning = %d", status);
89 path_put(&path); 89 path_put(&path);
90 return status; 90 return status;
91 } 91 }
92 92
93 /* 93 /*
94 * Calculate next entry in top down tree traversal. 94 * Calculate next entry in top down tree traversal.
95 * From next_mnt in namespace.c - elegant. 95 * From next_mnt in namespace.c - elegant.
96 */ 96 */
97 static struct dentry *next_dentry(struct dentry *p, struct dentry *root) 97 static struct dentry *next_dentry(struct dentry *p, struct dentry *root)
98 { 98 {
99 struct list_head *next = p->d_subdirs.next; 99 struct list_head *next = p->d_subdirs.next;
100 100
101 if (next == &p->d_subdirs) { 101 if (next == &p->d_subdirs) {
102 while (1) { 102 while (1) {
103 if (p == root) 103 if (p == root)
104 return NULL; 104 return NULL;
105 next = p->d_u.d_child.next; 105 next = p->d_u.d_child.next;
106 if (next != &p->d_parent->d_subdirs) 106 if (next != &p->d_parent->d_subdirs)
107 break; 107 break;
108 p = p->d_parent; 108 p = p->d_parent;
109 } 109 }
110 } 110 }
111 return list_entry(next, struct dentry, d_u.d_child); 111 return list_entry(next, struct dentry, d_u.d_child);
112 } 112 }
113 113
114 /* 114 /*
115 * Check a direct mount point for busyness. 115 * Check a direct mount point for busyness.
116 * Direct mounts have similar expiry semantics to tree mounts. 116 * Direct mounts have similar expiry semantics to tree mounts.
117 * The tree is not busy iff no mountpoints are busy and there are no 117 * The tree is not busy iff no mountpoints are busy and there are no
118 * autofs submounts. 118 * autofs submounts.
119 */ 119 */
120 static int autofs4_direct_busy(struct vfsmount *mnt, 120 static int autofs4_direct_busy(struct vfsmount *mnt,
121 struct dentry *top, 121 struct dentry *top,
122 unsigned long timeout, 122 unsigned long timeout,
123 int do_now) 123 int do_now)
124 { 124 {
125 DPRINTK("top %p %.*s", 125 DPRINTK("top %p %.*s",
126 top, (int) top->d_name.len, top->d_name.name); 126 top, (int) top->d_name.len, top->d_name.name);
127 127
128 /* If it's busy update the expiry counters */ 128 /* If it's busy update the expiry counters */
129 if (!may_umount_tree(mnt)) { 129 if (!may_umount_tree(mnt)) {
130 struct autofs_info *ino = autofs4_dentry_ino(top); 130 struct autofs_info *ino = autofs4_dentry_ino(top);
131 if (ino) 131 if (ino)
132 ino->last_used = jiffies; 132 ino->last_used = jiffies;
133 return 1; 133 return 1;
134 } 134 }
135 135
136 /* Timeout of a direct mount is determined by its top dentry */ 136 /* Timeout of a direct mount is determined by its top dentry */
137 if (!autofs4_can_expire(top, timeout, do_now)) 137 if (!autofs4_can_expire(top, timeout, do_now))
138 return 1; 138 return 1;
139 139
140 return 0; 140 return 0;
141 } 141 }
142 142
143 /* Check a directory tree of mount points for busyness 143 /* Check a directory tree of mount points for busyness
144 * The tree is not busy iff no mountpoints are busy 144 * The tree is not busy iff no mountpoints are busy
145 */ 145 */
146 static int autofs4_tree_busy(struct vfsmount *mnt, 146 static int autofs4_tree_busy(struct vfsmount *mnt,
147 struct dentry *top, 147 struct dentry *top,
148 unsigned long timeout, 148 unsigned long timeout,
149 int do_now) 149 int do_now)
150 { 150 {
151 struct autofs_info *top_ino = autofs4_dentry_ino(top); 151 struct autofs_info *top_ino = autofs4_dentry_ino(top);
152 struct dentry *p; 152 struct dentry *p;
153 153
154 DPRINTK("top %p %.*s", 154 DPRINTK("top %p %.*s",
155 top, (int)top->d_name.len, top->d_name.name); 155 top, (int)top->d_name.len, top->d_name.name);
156 156
157 /* Negative dentry - give up */ 157 /* Negative dentry - give up */
158 if (!simple_positive(top)) 158 if (!simple_positive(top))
159 return 1; 159 return 1;
160 160
161 spin_lock(&dcache_lock); 161 spin_lock(&dcache_lock);
162 for (p = top; p; p = next_dentry(p, top)) { 162 for (p = top; p; p = next_dentry(p, top)) {
163 spin_lock(&p->d_lock);
163 /* Negative dentry - give up */ 164 /* Negative dentry - give up */
164 if (!simple_positive(p)) 165 if (!simple_positive(p)) {
166 spin_unlock(&p->d_lock);
165 continue; 167 continue;
168 }
166 169
167 DPRINTK("dentry %p %.*s", 170 DPRINTK("dentry %p %.*s",
168 p, (int) p->d_name.len, p->d_name.name); 171 p, (int) p->d_name.len, p->d_name.name);
169 172
170 p = dget(p); 173 p = dget_dlock(p);
174 spin_unlock(&p->d_lock);
171 spin_unlock(&dcache_lock); 175 spin_unlock(&dcache_lock);
172 176
173 /* 177 /*
174 * Is someone visiting anywhere in the subtree ? 178 * Is someone visiting anywhere in the subtree ?
175 * If there's no mount we need to check the usage 179 * If there's no mount we need to check the usage
176 * count for the autofs dentry. 180 * count for the autofs dentry.
177 * If the fs is busy update the expiry counter. 181 * If the fs is busy update the expiry counter.
178 */ 182 */
179 if (d_mountpoint(p)) { 183 if (d_mountpoint(p)) {
180 if (autofs4_mount_busy(mnt, p)) { 184 if (autofs4_mount_busy(mnt, p)) {
181 top_ino->last_used = jiffies; 185 top_ino->last_used = jiffies;
182 dput(p); 186 dput(p);
183 return 1; 187 return 1;
184 } 188 }
185 } else { 189 } else {
186 struct autofs_info *ino = autofs4_dentry_ino(p); 190 struct autofs_info *ino = autofs4_dentry_ino(p);
187 unsigned int ino_count = atomic_read(&ino->count); 191 unsigned int ino_count = atomic_read(&ino->count);
188 192
189 /* 193 /*
190 * Clean stale dentries below that have not been 194 * Clean stale dentries below that have not been
191 * invalidated after a mount fail during lookup 195 * invalidated after a mount fail during lookup
192 */ 196 */
193 d_invalidate(p); 197 d_invalidate(p);
194 198
195 /* allow for dget above and top is already dgot */ 199 /* allow for dget above and top is already dgot */
196 if (p == top) 200 if (p == top)
197 ino_count += 2; 201 ino_count += 2;
198 else 202 else
199 ino_count++; 203 ino_count++;
200 204
201 if (p->d_count > ino_count) { 205 if (p->d_count > ino_count) {
202 top_ino->last_used = jiffies; 206 top_ino->last_used = jiffies;
203 dput(p); 207 dput(p);
204 return 1; 208 return 1;
205 } 209 }
206 } 210 }
207 dput(p); 211 dput(p);
208 spin_lock(&dcache_lock); 212 spin_lock(&dcache_lock);
209 } 213 }
210 spin_unlock(&dcache_lock); 214 spin_unlock(&dcache_lock);
211 215
212 /* Timeout of a tree mount is ultimately determined by its top dentry */ 216 /* Timeout of a tree mount is ultimately determined by its top dentry */
213 if (!autofs4_can_expire(top, timeout, do_now)) 217 if (!autofs4_can_expire(top, timeout, do_now))
214 return 1; 218 return 1;
215 219
216 return 0; 220 return 0;
217 } 221 }
218 222
219 static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, 223 static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
220 struct dentry *parent, 224 struct dentry *parent,
221 unsigned long timeout, 225 unsigned long timeout,
222 int do_now) 226 int do_now)
223 { 227 {
224 struct dentry *p; 228 struct dentry *p;
225 229
226 DPRINTK("parent %p %.*s", 230 DPRINTK("parent %p %.*s",
227 parent, (int)parent->d_name.len, parent->d_name.name); 231 parent, (int)parent->d_name.len, parent->d_name.name);
228 232
229 spin_lock(&dcache_lock); 233 spin_lock(&dcache_lock);
230 for (p = parent; p; p = next_dentry(p, parent)) { 234 for (p = parent; p; p = next_dentry(p, parent)) {
235 spin_lock(&p->d_lock);
231 /* Negative dentry - give up */ 236 /* Negative dentry - give up */
232 if (!simple_positive(p)) 237 if (!simple_positive(p)) {
238 spin_unlock(&p->d_lock);
233 continue; 239 continue;
240 }
234 241
235 DPRINTK("dentry %p %.*s", 242 DPRINTK("dentry %p %.*s",
236 p, (int) p->d_name.len, p->d_name.name); 243 p, (int) p->d_name.len, p->d_name.name);
237 244
238 p = dget(p); 245 p = dget_dlock(p);
246 spin_unlock(&p->d_lock);
239 spin_unlock(&dcache_lock); 247 spin_unlock(&dcache_lock);
240 248
241 if (d_mountpoint(p)) { 249 if (d_mountpoint(p)) {
242 /* Can we umount this guy */ 250 /* Can we umount this guy */
243 if (autofs4_mount_busy(mnt, p)) 251 if (autofs4_mount_busy(mnt, p))
244 goto cont; 252 goto cont;
245 253
246 /* Can we expire this guy */ 254 /* Can we expire this guy */
247 if (autofs4_can_expire(p, timeout, do_now)) 255 if (autofs4_can_expire(p, timeout, do_now))
248 return p; 256 return p;
249 } 257 }
250 cont: 258 cont:
251 dput(p); 259 dput(p);
252 spin_lock(&dcache_lock); 260 spin_lock(&dcache_lock);
253 } 261 }
254 spin_unlock(&dcache_lock); 262 spin_unlock(&dcache_lock);
255 return NULL; 263 return NULL;
256 } 264 }
257 265
258 /* Check if we can expire a direct mount (possibly a tree) */ 266 /* Check if we can expire a direct mount (possibly a tree) */
259 struct dentry *autofs4_expire_direct(struct super_block *sb, 267 struct dentry *autofs4_expire_direct(struct super_block *sb,
260 struct vfsmount *mnt, 268 struct vfsmount *mnt,
261 struct autofs_sb_info *sbi, 269 struct autofs_sb_info *sbi,
262 int how) 270 int how)
263 { 271 {
264 unsigned long timeout; 272 unsigned long timeout;
265 struct dentry *root = dget(sb->s_root); 273 struct dentry *root = dget(sb->s_root);
266 int do_now = how & AUTOFS_EXP_IMMEDIATE; 274 int do_now = how & AUTOFS_EXP_IMMEDIATE;
267 275
268 if (!root) 276 if (!root)
269 return NULL; 277 return NULL;
270 278
271 now = jiffies; 279 now = jiffies;
272 timeout = sbi->exp_timeout; 280 timeout = sbi->exp_timeout;
273 281
274 spin_lock(&sbi->fs_lock); 282 spin_lock(&sbi->fs_lock);
275 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { 283 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
276 struct autofs_info *ino = autofs4_dentry_ino(root); 284 struct autofs_info *ino = autofs4_dentry_ino(root);
277 if (d_mountpoint(root)) { 285 if (d_mountpoint(root)) {
278 ino->flags |= AUTOFS_INF_MOUNTPOINT; 286 ino->flags |= AUTOFS_INF_MOUNTPOINT;
279 root->d_mounted--; 287 root->d_mounted--;
280 } 288 }
281 ino->flags |= AUTOFS_INF_EXPIRING; 289 ino->flags |= AUTOFS_INF_EXPIRING;
282 init_completion(&ino->expire_complete); 290 init_completion(&ino->expire_complete);
283 spin_unlock(&sbi->fs_lock); 291 spin_unlock(&sbi->fs_lock);
284 return root; 292 return root;
285 } 293 }
286 spin_unlock(&sbi->fs_lock); 294 spin_unlock(&sbi->fs_lock);
287 dput(root); 295 dput(root);
288 296
289 return NULL; 297 return NULL;
290 } 298 }
291 299
292 /* 300 /*
293 * Find an eligible tree to time-out 301 * Find an eligible tree to time-out
294 * A tree is eligible if :- 302 * A tree is eligible if :-
295 * - it is unused by any user process 303 * - it is unused by any user process
296 * - it has been unused for exp_timeout time 304 * - it has been unused for exp_timeout time
297 */ 305 */
298 struct dentry *autofs4_expire_indirect(struct super_block *sb, 306 struct dentry *autofs4_expire_indirect(struct super_block *sb,
299 struct vfsmount *mnt, 307 struct vfsmount *mnt,
300 struct autofs_sb_info *sbi, 308 struct autofs_sb_info *sbi,
301 int how) 309 int how)
302 { 310 {
303 unsigned long timeout; 311 unsigned long timeout;
304 struct dentry *root = sb->s_root; 312 struct dentry *root = sb->s_root;
305 struct dentry *expired = NULL; 313 struct dentry *expired = NULL;
306 struct list_head *next; 314 struct list_head *next;
307 int do_now = how & AUTOFS_EXP_IMMEDIATE; 315 int do_now = how & AUTOFS_EXP_IMMEDIATE;
308 int exp_leaves = how & AUTOFS_EXP_LEAVES; 316 int exp_leaves = how & AUTOFS_EXP_LEAVES;
309 struct autofs_info *ino; 317 struct autofs_info *ino;
310 unsigned int ino_count; 318 unsigned int ino_count;
311 319
312 if (!root) 320 if (!root)
313 return NULL; 321 return NULL;
314 322
315 now = jiffies; 323 now = jiffies;
316 timeout = sbi->exp_timeout; 324 timeout = sbi->exp_timeout;
317 325
318 spin_lock(&dcache_lock); 326 spin_lock(&dcache_lock);
319 next = root->d_subdirs.next; 327 next = root->d_subdirs.next;
320 328
321 /* On exit from the loop expire is set to a dgot dentry 329 /* On exit from the loop expire is set to a dgot dentry
322 * to expire or it's NULL */ 330 * to expire or it's NULL */
323 while ( next != &root->d_subdirs ) { 331 while ( next != &root->d_subdirs ) {
324 struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); 332 struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
325 333
326 /* Negative dentry - give up */ 334 /* Negative dentry - give up */
335 spin_lock(&dentry->d_lock);
327 if (!simple_positive(dentry)) { 336 if (!simple_positive(dentry)) {
328 next = next->next; 337 next = next->next;
338 spin_unlock(&dentry->d_lock);
329 continue; 339 continue;
330 } 340 }
331 341
332 dentry = dget(dentry); 342 dentry = dget_dlock(dentry);
343 spin_unlock(&dentry->d_lock);
333 spin_unlock(&dcache_lock); 344 spin_unlock(&dcache_lock);
334 345
335 spin_lock(&sbi->fs_lock); 346 spin_lock(&sbi->fs_lock);
336 ino = autofs4_dentry_ino(dentry); 347 ino = autofs4_dentry_ino(dentry);
337 348
338 /* 349 /*
339 * Case 1: (i) indirect mount or top level pseudo direct mount 350 * Case 1: (i) indirect mount or top level pseudo direct mount
340 * (autofs-4.1). 351 * (autofs-4.1).
341 * (ii) indirect mount with offset mount, check the "/" 352 * (ii) indirect mount with offset mount, check the "/"
342 * offset (autofs-5.0+). 353 * offset (autofs-5.0+).
343 */ 354 */
344 if (d_mountpoint(dentry)) { 355 if (d_mountpoint(dentry)) {
345 DPRINTK("checking mountpoint %p %.*s", 356 DPRINTK("checking mountpoint %p %.*s",
346 dentry, (int)dentry->d_name.len, dentry->d_name.name); 357 dentry, (int)dentry->d_name.len, dentry->d_name.name);
347 358
348 /* Path walk currently on this dentry? */ 359 /* Path walk currently on this dentry? */
349 ino_count = atomic_read(&ino->count) + 2; 360 ino_count = atomic_read(&ino->count) + 2;
350 if (dentry->d_count > ino_count) 361 if (dentry->d_count > ino_count)
351 goto next; 362 goto next;
352 363
353 /* Can we umount this guy */ 364 /* Can we umount this guy */
354 if (autofs4_mount_busy(mnt, dentry)) 365 if (autofs4_mount_busy(mnt, dentry))
355 goto next; 366 goto next;
356 367
357 /* Can we expire this guy */ 368 /* Can we expire this guy */
358 if (autofs4_can_expire(dentry, timeout, do_now)) { 369 if (autofs4_can_expire(dentry, timeout, do_now)) {
359 expired = dentry; 370 expired = dentry;
360 goto found; 371 goto found;
361 } 372 }
362 goto next; 373 goto next;
363 } 374 }
364 375
365 if (simple_empty(dentry)) 376 if (simple_empty(dentry))
366 goto next; 377 goto next;
367 378
368 /* Case 2: tree mount, expire iff entire tree is not busy */ 379 /* Case 2: tree mount, expire iff entire tree is not busy */
369 if (!exp_leaves) { 380 if (!exp_leaves) {
370 /* Path walk currently on this dentry? */ 381 /* Path walk currently on this dentry? */
371 ino_count = atomic_read(&ino->count) + 1; 382 ino_count = atomic_read(&ino->count) + 1;
372 if (dentry->d_count > ino_count) 383 if (dentry->d_count > ino_count)
373 goto next; 384 goto next;
374 385
375 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { 386 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
376 expired = dentry; 387 expired = dentry;
377 goto found; 388 goto found;
378 } 389 }
379 /* 390 /*
380 * Case 3: pseudo direct mount, expire individual leaves 391 * Case 3: pseudo direct mount, expire individual leaves
381 * (autofs-4.1). 392 * (autofs-4.1).
382 */ 393 */
383 } else { 394 } else {
384 /* Path walk currently on this dentry? */ 395 /* Path walk currently on this dentry? */
385 ino_count = atomic_read(&ino->count) + 1; 396 ino_count = atomic_read(&ino->count) + 1;
386 if (dentry->d_count > ino_count) 397 if (dentry->d_count > ino_count)
387 goto next; 398 goto next;
388 399
389 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); 400 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
390 if (expired) { 401 if (expired) {
391 dput(dentry); 402 dput(dentry);
392 goto found; 403 goto found;
393 } 404 }
394 } 405 }
395 next: 406 next:
396 spin_unlock(&sbi->fs_lock); 407 spin_unlock(&sbi->fs_lock);
397 dput(dentry); 408 dput(dentry);
398 spin_lock(&dcache_lock); 409 spin_lock(&dcache_lock);
399 next = next->next; 410 next = next->next;
400 } 411 }
401 spin_unlock(&dcache_lock); 412 spin_unlock(&dcache_lock);
402 return NULL; 413 return NULL;
403 414
404 found: 415 found:
405 DPRINTK("returning %p %.*s", 416 DPRINTK("returning %p %.*s",
406 expired, (int)expired->d_name.len, expired->d_name.name); 417 expired, (int)expired->d_name.len, expired->d_name.name);
407 ino = autofs4_dentry_ino(expired); 418 ino = autofs4_dentry_ino(expired);
408 ino->flags |= AUTOFS_INF_EXPIRING; 419 ino->flags |= AUTOFS_INF_EXPIRING;
409 init_completion(&ino->expire_complete); 420 init_completion(&ino->expire_complete);
410 spin_unlock(&sbi->fs_lock); 421 spin_unlock(&sbi->fs_lock);
411 spin_lock(&dcache_lock); 422 spin_lock(&dcache_lock);
412 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); 423 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
413 spin_unlock(&dcache_lock); 424 spin_unlock(&dcache_lock);
414 return expired; 425 return expired;
415 } 426 }
416 427
417 int autofs4_expire_wait(struct dentry *dentry) 428 int autofs4_expire_wait(struct dentry *dentry)
418 { 429 {
419 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 430 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
420 struct autofs_info *ino = autofs4_dentry_ino(dentry); 431 struct autofs_info *ino = autofs4_dentry_ino(dentry);
421 int status; 432 int status;
422 433
423 /* Block on any pending expire */ 434 /* Block on any pending expire */
424 spin_lock(&sbi->fs_lock); 435 spin_lock(&sbi->fs_lock);
425 if (ino->flags & AUTOFS_INF_EXPIRING) { 436 if (ino->flags & AUTOFS_INF_EXPIRING) {
426 spin_unlock(&sbi->fs_lock); 437 spin_unlock(&sbi->fs_lock);
427 438
428 DPRINTK("waiting for expire %p name=%.*s", 439 DPRINTK("waiting for expire %p name=%.*s",
429 dentry, dentry->d_name.len, dentry->d_name.name); 440 dentry, dentry->d_name.len, dentry->d_name.name);
430 441
431 status = autofs4_wait(sbi, dentry, NFY_NONE); 442 status = autofs4_wait(sbi, dentry, NFY_NONE);
432 wait_for_completion(&ino->expire_complete); 443 wait_for_completion(&ino->expire_complete);
433 444
434 DPRINTK("expire done status=%d", status); 445 DPRINTK("expire done status=%d", status);
435 446
436 if (d_unhashed(dentry)) 447 if (d_unhashed(dentry))
437 return -EAGAIN; 448 return -EAGAIN;
438 449
439 return status; 450 return status;
440 } 451 }
441 spin_unlock(&sbi->fs_lock); 452 spin_unlock(&sbi->fs_lock);
442 453
443 return 0; 454 return 0;
444 } 455 }
445 456
446 /* Perform an expiry operation */ 457 /* Perform an expiry operation */
447 int autofs4_expire_run(struct super_block *sb, 458 int autofs4_expire_run(struct super_block *sb,
448 struct vfsmount *mnt, 459 struct vfsmount *mnt,
449 struct autofs_sb_info *sbi, 460 struct autofs_sb_info *sbi,
450 struct autofs_packet_expire __user *pkt_p) 461 struct autofs_packet_expire __user *pkt_p)
451 { 462 {
452 struct autofs_packet_expire pkt; 463 struct autofs_packet_expire pkt;
453 struct autofs_info *ino; 464 struct autofs_info *ino;
454 struct dentry *dentry; 465 struct dentry *dentry;
455 int ret = 0; 466 int ret = 0;
456 467
457 memset(&pkt,0,sizeof pkt); 468 memset(&pkt,0,sizeof pkt);
458 469
459 pkt.hdr.proto_version = sbi->version; 470 pkt.hdr.proto_version = sbi->version;
460 pkt.hdr.type = autofs_ptype_expire; 471 pkt.hdr.type = autofs_ptype_expire;
461 472
462 if ((dentry = autofs4_expire_indirect(sb, mnt, sbi, 0)) == NULL) 473 if ((dentry = autofs4_expire_indirect(sb, mnt, sbi, 0)) == NULL)
463 return -EAGAIN; 474 return -EAGAIN;
464 475
465 pkt.len = dentry->d_name.len; 476 pkt.len = dentry->d_name.len;
466 memcpy(pkt.name, dentry->d_name.name, pkt.len); 477 memcpy(pkt.name, dentry->d_name.name, pkt.len);
467 pkt.name[pkt.len] = '\0'; 478 pkt.name[pkt.len] = '\0';
468 dput(dentry); 479 dput(dentry);
469 480
470 if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) ) 481 if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) )
471 ret = -EFAULT; 482 ret = -EFAULT;
472 483
473 spin_lock(&sbi->fs_lock); 484 spin_lock(&sbi->fs_lock);
474 ino = autofs4_dentry_ino(dentry); 485 ino = autofs4_dentry_ino(dentry);
475 ino->flags &= ~AUTOFS_INF_EXPIRING; 486 ino->flags &= ~AUTOFS_INF_EXPIRING;
476 complete_all(&ino->expire_complete); 487 complete_all(&ino->expire_complete);
477 spin_unlock(&sbi->fs_lock); 488 spin_unlock(&sbi->fs_lock);
478 489
479 return ret; 490 return ret;
480 } 491 }
481 492
482 int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, 493 int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
483 struct autofs_sb_info *sbi, int when) 494 struct autofs_sb_info *sbi, int when)
484 { 495 {
485 struct dentry *dentry; 496 struct dentry *dentry;
486 int ret = -EAGAIN; 497 int ret = -EAGAIN;
487 498
488 if (autofs_type_trigger(sbi->type)) 499 if (autofs_type_trigger(sbi->type))
489 dentry = autofs4_expire_direct(sb, mnt, sbi, when); 500 dentry = autofs4_expire_direct(sb, mnt, sbi, when);
490 else 501 else
491 dentry = autofs4_expire_indirect(sb, mnt, sbi, when); 502 dentry = autofs4_expire_indirect(sb, mnt, sbi, when);
492 503
493 if (dentry) { 504 if (dentry) {
494 struct autofs_info *ino = autofs4_dentry_ino(dentry); 505 struct autofs_info *ino = autofs4_dentry_ino(dentry);
495 506
496 /* This is synchronous because it makes the daemon a 507 /* This is synchronous because it makes the daemon a
497 little easier */ 508 little easier */
498 ret = autofs4_wait(sbi, dentry, NFY_EXPIRE); 509 ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
499 510
500 spin_lock(&sbi->fs_lock); 511 spin_lock(&sbi->fs_lock);
501 if (ino->flags & AUTOFS_INF_MOUNTPOINT) { 512 if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
502 sb->s_root->d_mounted++; 513 sb->s_root->d_mounted++;
503 ino->flags &= ~AUTOFS_INF_MOUNTPOINT; 514 ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
504 } 515 }
505 ino->flags &= ~AUTOFS_INF_EXPIRING; 516 ino->flags &= ~AUTOFS_INF_EXPIRING;
506 complete_all(&ino->expire_complete); 517 complete_all(&ino->expire_complete);
507 spin_unlock(&sbi->fs_lock); 518 spin_unlock(&sbi->fs_lock);
508 dput(dentry); 519 dput(dentry);
509 } 520 }
510 521
511 return ret; 522 return ret;
512 } 523 }
513 524
514 /* Call repeatedly until it returns -EAGAIN, meaning there's nothing 525 /* Call repeatedly until it returns -EAGAIN, meaning there's nothing
515 more to be done */ 526 more to be done */
516 int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, 527 int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
517 struct autofs_sb_info *sbi, int __user *arg) 528 struct autofs_sb_info *sbi, int __user *arg)
518 { 529 {
519 int do_now = 0; 530 int do_now = 0;
520 531
521 if (arg && get_user(do_now, arg)) 532 if (arg && get_user(do_now, arg))
522 return -EFAULT; 533 return -EFAULT;
523 534
524 return autofs4_do_expire_multi(sb, mnt, sbi, do_now); 535 return autofs4_do_expire_multi(sb, mnt, sbi, do_now);
525 } 536 }
526 537
527 538
1 #include <linux/ceph/ceph_debug.h> 1 #include <linux/ceph/ceph_debug.h>
2 2
3 #include <linux/spinlock.h> 3 #include <linux/spinlock.h>
4 #include <linux/fs_struct.h> 4 #include <linux/fs_struct.h>
5 #include <linux/namei.h> 5 #include <linux/namei.h>
6 #include <linux/slab.h> 6 #include <linux/slab.h>
7 #include <linux/sched.h> 7 #include <linux/sched.h>
8 8
9 #include "super.h" 9 #include "super.h"
10 #include "mds_client.h" 10 #include "mds_client.h"
11 11
12 /* 12 /*
13 * Directory operations: readdir, lookup, create, link, unlink, 13 * Directory operations: readdir, lookup, create, link, unlink,
14 * rename, etc. 14 * rename, etc.
15 */ 15 */
16 16
17 /* 17 /*
18 * Ceph MDS operations are specified in terms of a base ino and 18 * Ceph MDS operations are specified in terms of a base ino and
19 * relative path. Thus, the client can specify an operation on a 19 * relative path. Thus, the client can specify an operation on a
20 * specific inode (e.g., a getattr due to fstat(2)), or as a path 20 * specific inode (e.g., a getattr due to fstat(2)), or as a path
21 * relative to, say, the root directory. 21 * relative to, say, the root directory.
22 * 22 *
23 * Normally, we limit ourselves to strict inode ops (no path component) 23 * Normally, we limit ourselves to strict inode ops (no path component)
24 * or dentry operations (a single path component relative to an ino). The 24 * or dentry operations (a single path component relative to an ino). The
25 * exception to this is open_root_dentry(), which will open the mount 25 * exception to this is open_root_dentry(), which will open the mount
26 * point by name. 26 * point by name.
27 */ 27 */
28 28
29 const struct inode_operations ceph_dir_iops; 29 const struct inode_operations ceph_dir_iops;
30 const struct file_operations ceph_dir_fops; 30 const struct file_operations ceph_dir_fops;
31 const struct dentry_operations ceph_dentry_ops; 31 const struct dentry_operations ceph_dentry_ops;
32 32
33 /* 33 /*
34 * Initialize ceph dentry state. 34 * Initialize ceph dentry state.
35 */ 35 */
36 int ceph_init_dentry(struct dentry *dentry) 36 int ceph_init_dentry(struct dentry *dentry)
37 { 37 {
38 struct ceph_dentry_info *di; 38 struct ceph_dentry_info *di;
39 39
40 if (dentry->d_fsdata) 40 if (dentry->d_fsdata)
41 return 0; 41 return 0;
42 42
43 if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ 43 if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
44 ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) 44 ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
45 dentry->d_op = &ceph_dentry_ops; 45 dentry->d_op = &ceph_dentry_ops;
46 else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) 46 else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
47 dentry->d_op = &ceph_snapdir_dentry_ops; 47 dentry->d_op = &ceph_snapdir_dentry_ops;
48 else 48 else
49 dentry->d_op = &ceph_snap_dentry_ops; 49 dentry->d_op = &ceph_snap_dentry_ops;
50 50
51 di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); 51 di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
52 if (!di) 52 if (!di)
53 return -ENOMEM; /* oh well */ 53 return -ENOMEM; /* oh well */
54 54
55 spin_lock(&dentry->d_lock); 55 spin_lock(&dentry->d_lock);
56 if (dentry->d_fsdata) { 56 if (dentry->d_fsdata) {
57 /* lost a race */ 57 /* lost a race */
58 kmem_cache_free(ceph_dentry_cachep, di); 58 kmem_cache_free(ceph_dentry_cachep, di);
59 goto out_unlock; 59 goto out_unlock;
60 } 60 }
61 di->dentry = dentry; 61 di->dentry = dentry;
62 di->lease_session = NULL; 62 di->lease_session = NULL;
63 dentry->d_fsdata = di; 63 dentry->d_fsdata = di;
64 dentry->d_time = jiffies; 64 dentry->d_time = jiffies;
65 ceph_dentry_lru_add(dentry); 65 ceph_dentry_lru_add(dentry);
66 out_unlock: 66 out_unlock:
67 spin_unlock(&dentry->d_lock); 67 spin_unlock(&dentry->d_lock);
68 return 0; 68 return 0;
69 } 69 }
70 70
71 71
72 72
73 /* 73 /*
74 * for readdir, we encode the directory frag and offset within that 74 * for readdir, we encode the directory frag and offset within that
75 * frag into f_pos. 75 * frag into f_pos.
76 */ 76 */
77 static unsigned fpos_frag(loff_t p) 77 static unsigned fpos_frag(loff_t p)
78 { 78 {
79 return p >> 32; 79 return p >> 32;
80 } 80 }
81 static unsigned fpos_off(loff_t p) 81 static unsigned fpos_off(loff_t p)
82 { 82 {
83 return p & 0xffffffff; 83 return p & 0xffffffff;
84 } 84 }
85 85
86 /* 86 /*
87 * When possible, we try to satisfy a readdir by peeking at the 87 * When possible, we try to satisfy a readdir by peeking at the
88 * dcache. We make this work by carefully ordering dentries on 88 * dcache. We make this work by carefully ordering dentries on
89 * d_u.d_child when we initially get results back from the MDS, and 89 * d_u.d_child when we initially get results back from the MDS, and
90 * falling back to a "normal" sync readdir if any dentries in the dir 90 * falling back to a "normal" sync readdir if any dentries in the dir
91 * are dropped. 91 * are dropped.
92 * 92 *
93 * I_COMPLETE tells indicates we have all dentries in the dir. It is 93 * I_COMPLETE tells indicates we have all dentries in the dir. It is
94 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by 94 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
95 * the MDS if/when the directory is modified). 95 * the MDS if/when the directory is modified).
96 */ 96 */
97 static int __dcache_readdir(struct file *filp, 97 static int __dcache_readdir(struct file *filp,
98 void *dirent, filldir_t filldir) 98 void *dirent, filldir_t filldir)
99 { 99 {
100 struct ceph_file_info *fi = filp->private_data; 100 struct ceph_file_info *fi = filp->private_data;
101 struct dentry *parent = filp->f_dentry; 101 struct dentry *parent = filp->f_dentry;
102 struct inode *dir = parent->d_inode; 102 struct inode *dir = parent->d_inode;
103 struct list_head *p; 103 struct list_head *p;
104 struct dentry *dentry, *last; 104 struct dentry *dentry, *last;
105 struct ceph_dentry_info *di; 105 struct ceph_dentry_info *di;
106 int err = 0; 106 int err = 0;
107 107
108 /* claim ref on last dentry we returned */ 108 /* claim ref on last dentry we returned */
109 last = fi->dentry; 109 last = fi->dentry;
110 fi->dentry = NULL; 110 fi->dentry = NULL;
111 111
112 dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos, 112 dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos,
113 last); 113 last);
114 114
115 spin_lock(&dcache_lock); 115 spin_lock(&dcache_lock);
116 116
117 /* start at beginning? */ 117 /* start at beginning? */
118 if (filp->f_pos == 2 || last == NULL || 118 if (filp->f_pos == 2 || last == NULL ||
119 filp->f_pos < ceph_dentry(last)->offset) { 119 filp->f_pos < ceph_dentry(last)->offset) {
120 if (list_empty(&parent->d_subdirs)) 120 if (list_empty(&parent->d_subdirs))
121 goto out_unlock; 121 goto out_unlock;
122 p = parent->d_subdirs.prev; 122 p = parent->d_subdirs.prev;
123 dout(" initial p %p/%p\n", p->prev, p->next); 123 dout(" initial p %p/%p\n", p->prev, p->next);
124 } else { 124 } else {
125 p = last->d_u.d_child.prev; 125 p = last->d_u.d_child.prev;
126 } 126 }
127 127
128 more: 128 more:
129 dentry = list_entry(p, struct dentry, d_u.d_child); 129 dentry = list_entry(p, struct dentry, d_u.d_child);
130 di = ceph_dentry(dentry); 130 di = ceph_dentry(dentry);
131 while (1) { 131 while (1) {
132 dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, 132 dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
133 d_unhashed(dentry) ? "!hashed" : "hashed", 133 d_unhashed(dentry) ? "!hashed" : "hashed",
134 parent->d_subdirs.prev, parent->d_subdirs.next); 134 parent->d_subdirs.prev, parent->d_subdirs.next);
135 if (p == &parent->d_subdirs) { 135 if (p == &parent->d_subdirs) {
136 fi->at_end = 1; 136 fi->at_end = 1;
137 goto out_unlock; 137 goto out_unlock;
138 } 138 }
139 spin_lock(&dentry->d_lock);
139 if (!d_unhashed(dentry) && dentry->d_inode && 140 if (!d_unhashed(dentry) && dentry->d_inode &&
140 ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && 141 ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
141 ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && 142 ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
142 filp->f_pos <= di->offset) 143 filp->f_pos <= di->offset)
143 break; 144 break;
144 dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry, 145 dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry,
145 dentry->d_name.len, dentry->d_name.name, di->offset, 146 dentry->d_name.len, dentry->d_name.name, di->offset,
146 filp->f_pos, d_unhashed(dentry) ? " unhashed" : "", 147 filp->f_pos, d_unhashed(dentry) ? " unhashed" : "",
147 !dentry->d_inode ? " null" : ""); 148 !dentry->d_inode ? " null" : "");
149 spin_unlock(&dentry->d_lock);
148 p = p->prev; 150 p = p->prev;
149 dentry = list_entry(p, struct dentry, d_u.d_child); 151 dentry = list_entry(p, struct dentry, d_u.d_child);
150 di = ceph_dentry(dentry); 152 di = ceph_dentry(dentry);
151 } 153 }
152 154
153 spin_lock(&dentry->d_lock); 155 dget_dlock(dentry);
154 dentry->d_count++;
155 spin_unlock(&dentry->d_lock); 156 spin_unlock(&dentry->d_lock);
156 spin_unlock(&dcache_lock); 157 spin_unlock(&dcache_lock);
157 158
158 dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, 159 dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos,
159 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 160 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
160 filp->f_pos = di->offset; 161 filp->f_pos = di->offset;
161 err = filldir(dirent, dentry->d_name.name, 162 err = filldir(dirent, dentry->d_name.name,
162 dentry->d_name.len, di->offset, 163 dentry->d_name.len, di->offset,
163 dentry->d_inode->i_ino, 164 dentry->d_inode->i_ino,
164 dentry->d_inode->i_mode >> 12); 165 dentry->d_inode->i_mode >> 12);
165 166
166 if (last) { 167 if (last) {
167 if (err < 0) { 168 if (err < 0) {
168 /* remember our position */ 169 /* remember our position */
169 fi->dentry = last; 170 fi->dentry = last;
170 fi->next_offset = di->offset; 171 fi->next_offset = di->offset;
171 } else { 172 } else {
172 dput(last); 173 dput(last);
173 } 174 }
174 } 175 }
175 last = dentry; 176 last = dentry;
176 177
177 if (err < 0) 178 if (err < 0)
178 goto out; 179 goto out;
179 180
180 filp->f_pos++; 181 filp->f_pos++;
181 182
182 /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ 183 /* make sure a dentry wasn't dropped while we didn't have dcache_lock */
183 if (!ceph_i_test(dir, CEPH_I_COMPLETE)) { 184 if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
184 dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); 185 dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
185 err = -EAGAIN; 186 err = -EAGAIN;
186 goto out; 187 goto out;
187 } 188 }
188 189
189 spin_lock(&dcache_lock); 190 spin_lock(&dcache_lock);
190 p = p->prev; /* advance to next dentry */ 191 p = p->prev; /* advance to next dentry */
191 goto more; 192 goto more;
192 193
193 out_unlock: 194 out_unlock:
194 spin_unlock(&dcache_lock); 195 spin_unlock(&dcache_lock);
195 out: 196 out:
196 if (last) 197 if (last)
197 dput(last); 198 dput(last);
198 return err; 199 return err;
199 } 200 }
200 201
201 /* 202 /*
202 * make note of the last dentry we read, so we can 203 * make note of the last dentry we read, so we can
203 * continue at the same lexicographical point, 204 * continue at the same lexicographical point,
204 * regardless of what dir changes take place on the 205 * regardless of what dir changes take place on the
205 * server. 206 * server.
206 */ 207 */
207 static int note_last_dentry(struct ceph_file_info *fi, const char *name, 208 static int note_last_dentry(struct ceph_file_info *fi, const char *name,
208 int len) 209 int len)
209 { 210 {
210 kfree(fi->last_name); 211 kfree(fi->last_name);
211 fi->last_name = kmalloc(len+1, GFP_NOFS); 212 fi->last_name = kmalloc(len+1, GFP_NOFS);
212 if (!fi->last_name) 213 if (!fi->last_name)
213 return -ENOMEM; 214 return -ENOMEM;
214 memcpy(fi->last_name, name, len); 215 memcpy(fi->last_name, name, len);
215 fi->last_name[len] = 0; 216 fi->last_name[len] = 0;
216 dout("note_last_dentry '%s'\n", fi->last_name); 217 dout("note_last_dentry '%s'\n", fi->last_name);
217 return 0; 218 return 0;
218 } 219 }
219 220
220 static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) 221 static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
221 { 222 {
222 struct ceph_file_info *fi = filp->private_data; 223 struct ceph_file_info *fi = filp->private_data;
223 struct inode *inode = filp->f_dentry->d_inode; 224 struct inode *inode = filp->f_dentry->d_inode;
224 struct ceph_inode_info *ci = ceph_inode(inode); 225 struct ceph_inode_info *ci = ceph_inode(inode);
225 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 226 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
226 struct ceph_mds_client *mdsc = fsc->mdsc; 227 struct ceph_mds_client *mdsc = fsc->mdsc;
227 unsigned frag = fpos_frag(filp->f_pos); 228 unsigned frag = fpos_frag(filp->f_pos);
228 int off = fpos_off(filp->f_pos); 229 int off = fpos_off(filp->f_pos);
229 int err; 230 int err;
230 u32 ftype; 231 u32 ftype;
231 struct ceph_mds_reply_info_parsed *rinfo; 232 struct ceph_mds_reply_info_parsed *rinfo;
232 const int max_entries = fsc->mount_options->max_readdir; 233 const int max_entries = fsc->mount_options->max_readdir;
233 const int max_bytes = fsc->mount_options->max_readdir_bytes; 234 const int max_bytes = fsc->mount_options->max_readdir_bytes;
234 235
235 dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); 236 dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
236 if (fi->at_end) 237 if (fi->at_end)
237 return 0; 238 return 0;
238 239
239 /* always start with . and .. */ 240 /* always start with . and .. */
240 if (filp->f_pos == 0) { 241 if (filp->f_pos == 0) {
241 /* note dir version at start of readdir so we can tell 242 /* note dir version at start of readdir so we can tell
242 * if any dentries get dropped */ 243 * if any dentries get dropped */
243 fi->dir_release_count = ci->i_release_count; 244 fi->dir_release_count = ci->i_release_count;
244 245
245 dout("readdir off 0 -> '.'\n"); 246 dout("readdir off 0 -> '.'\n");
246 if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0), 247 if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0),
247 inode->i_ino, inode->i_mode >> 12) < 0) 248 inode->i_ino, inode->i_mode >> 12) < 0)
248 return 0; 249 return 0;
249 filp->f_pos = 1; 250 filp->f_pos = 1;
250 off = 1; 251 off = 1;
251 } 252 }
252 if (filp->f_pos == 1) { 253 if (filp->f_pos == 1) {
253 dout("readdir off 1 -> '..'\n"); 254 dout("readdir off 1 -> '..'\n");
254 if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1), 255 if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1),
255 filp->f_dentry->d_parent->d_inode->i_ino, 256 filp->f_dentry->d_parent->d_inode->i_ino,
256 inode->i_mode >> 12) < 0) 257 inode->i_mode >> 12) < 0)
257 return 0; 258 return 0;
258 filp->f_pos = 2; 259 filp->f_pos = 2;
259 off = 2; 260 off = 2;
260 } 261 }
261 262
262 /* can we use the dcache? */ 263 /* can we use the dcache? */
263 spin_lock(&inode->i_lock); 264 spin_lock(&inode->i_lock);
264 if ((filp->f_pos == 2 || fi->dentry) && 265 if ((filp->f_pos == 2 || fi->dentry) &&
265 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && 266 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
266 ceph_snap(inode) != CEPH_SNAPDIR && 267 ceph_snap(inode) != CEPH_SNAPDIR &&
267 (ci->i_ceph_flags & CEPH_I_COMPLETE) && 268 (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
268 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 269 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
269 spin_unlock(&inode->i_lock); 270 spin_unlock(&inode->i_lock);
270 err = __dcache_readdir(filp, dirent, filldir); 271 err = __dcache_readdir(filp, dirent, filldir);
271 if (err != -EAGAIN) 272 if (err != -EAGAIN)
272 return err; 273 return err;
273 } else { 274 } else {
274 spin_unlock(&inode->i_lock); 275 spin_unlock(&inode->i_lock);
275 } 276 }
276 if (fi->dentry) { 277 if (fi->dentry) {
277 err = note_last_dentry(fi, fi->dentry->d_name.name, 278 err = note_last_dentry(fi, fi->dentry->d_name.name,
278 fi->dentry->d_name.len); 279 fi->dentry->d_name.len);
279 if (err) 280 if (err)
280 return err; 281 return err;
281 dput(fi->dentry); 282 dput(fi->dentry);
282 fi->dentry = NULL; 283 fi->dentry = NULL;
283 } 284 }
284 285
285 /* proceed with a normal readdir */ 286 /* proceed with a normal readdir */
286 287
287 more: 288 more:
288 /* do we have the correct frag content buffered? */ 289 /* do we have the correct frag content buffered? */
289 if (fi->frag != frag || fi->last_readdir == NULL) { 290 if (fi->frag != frag || fi->last_readdir == NULL) {
290 struct ceph_mds_request *req; 291 struct ceph_mds_request *req;
291 int op = ceph_snap(inode) == CEPH_SNAPDIR ? 292 int op = ceph_snap(inode) == CEPH_SNAPDIR ?
292 CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; 293 CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
293 294
294 /* discard old result, if any */ 295 /* discard old result, if any */
295 if (fi->last_readdir) { 296 if (fi->last_readdir) {
296 ceph_mdsc_put_request(fi->last_readdir); 297 ceph_mdsc_put_request(fi->last_readdir);
297 fi->last_readdir = NULL; 298 fi->last_readdir = NULL;
298 } 299 }
299 300
300 /* requery frag tree, as the frag topology may have changed */ 301 /* requery frag tree, as the frag topology may have changed */
301 frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL); 302 frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL);
302 303
303 dout("readdir fetching %llx.%llx frag %x offset '%s'\n", 304 dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
304 ceph_vinop(inode), frag, fi->last_name); 305 ceph_vinop(inode), frag, fi->last_name);
305 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 306 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
306 if (IS_ERR(req)) 307 if (IS_ERR(req))
307 return PTR_ERR(req); 308 return PTR_ERR(req);
308 req->r_inode = igrab(inode); 309 req->r_inode = igrab(inode);
309 req->r_dentry = dget(filp->f_dentry); 310 req->r_dentry = dget(filp->f_dentry);
310 /* hints to request -> mds selection code */ 311 /* hints to request -> mds selection code */
311 req->r_direct_mode = USE_AUTH_MDS; 312 req->r_direct_mode = USE_AUTH_MDS;
312 req->r_direct_hash = ceph_frag_value(frag); 313 req->r_direct_hash = ceph_frag_value(frag);
313 req->r_direct_is_hash = true; 314 req->r_direct_is_hash = true;
314 req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); 315 req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
315 req->r_readdir_offset = fi->next_offset; 316 req->r_readdir_offset = fi->next_offset;
316 req->r_args.readdir.frag = cpu_to_le32(frag); 317 req->r_args.readdir.frag = cpu_to_le32(frag);
317 req->r_args.readdir.max_entries = cpu_to_le32(max_entries); 318 req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
318 req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes); 319 req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes);
319 req->r_num_caps = max_entries + 1; 320 req->r_num_caps = max_entries + 1;
320 err = ceph_mdsc_do_request(mdsc, NULL, req); 321 err = ceph_mdsc_do_request(mdsc, NULL, req);
321 if (err < 0) { 322 if (err < 0) {
322 ceph_mdsc_put_request(req); 323 ceph_mdsc_put_request(req);
323 return err; 324 return err;
324 } 325 }
325 dout("readdir got and parsed readdir result=%d" 326 dout("readdir got and parsed readdir result=%d"
326 " on frag %x, end=%d, complete=%d\n", err, frag, 327 " on frag %x, end=%d, complete=%d\n", err, frag,
327 (int)req->r_reply_info.dir_end, 328 (int)req->r_reply_info.dir_end,
328 (int)req->r_reply_info.dir_complete); 329 (int)req->r_reply_info.dir_complete);
329 330
330 if (!req->r_did_prepopulate) { 331 if (!req->r_did_prepopulate) {
331 dout("readdir !did_prepopulate"); 332 dout("readdir !did_prepopulate");
332 fi->dir_release_count--; /* preclude I_COMPLETE */ 333 fi->dir_release_count--; /* preclude I_COMPLETE */
333 } 334 }
334 335
335 /* note next offset and last dentry name */ 336 /* note next offset and last dentry name */
336 fi->offset = fi->next_offset; 337 fi->offset = fi->next_offset;
337 fi->last_readdir = req; 338 fi->last_readdir = req;
338 339
339 if (req->r_reply_info.dir_end) { 340 if (req->r_reply_info.dir_end) {
340 kfree(fi->last_name); 341 kfree(fi->last_name);
341 fi->last_name = NULL; 342 fi->last_name = NULL;
342 if (ceph_frag_is_rightmost(frag)) 343 if (ceph_frag_is_rightmost(frag))
343 fi->next_offset = 2; 344 fi->next_offset = 2;
344 else 345 else
345 fi->next_offset = 0; 346 fi->next_offset = 0;
346 } else { 347 } else {
347 rinfo = &req->r_reply_info; 348 rinfo = &req->r_reply_info;
348 err = note_last_dentry(fi, 349 err = note_last_dentry(fi,
349 rinfo->dir_dname[rinfo->dir_nr-1], 350 rinfo->dir_dname[rinfo->dir_nr-1],
350 rinfo->dir_dname_len[rinfo->dir_nr-1]); 351 rinfo->dir_dname_len[rinfo->dir_nr-1]);
351 if (err) 352 if (err)
352 return err; 353 return err;
353 fi->next_offset += rinfo->dir_nr; 354 fi->next_offset += rinfo->dir_nr;
354 } 355 }
355 } 356 }
356 357
357 rinfo = &fi->last_readdir->r_reply_info; 358 rinfo = &fi->last_readdir->r_reply_info;
358 dout("readdir frag %x num %d off %d chunkoff %d\n", frag, 359 dout("readdir frag %x num %d off %d chunkoff %d\n", frag,
359 rinfo->dir_nr, off, fi->offset); 360 rinfo->dir_nr, off, fi->offset);
360 while (off - fi->offset >= 0 && off - fi->offset < rinfo->dir_nr) { 361 while (off - fi->offset >= 0 && off - fi->offset < rinfo->dir_nr) {
361 u64 pos = ceph_make_fpos(frag, off); 362 u64 pos = ceph_make_fpos(frag, off);
362 struct ceph_mds_reply_inode *in = 363 struct ceph_mds_reply_inode *in =
363 rinfo->dir_in[off - fi->offset].in; 364 rinfo->dir_in[off - fi->offset].in;
364 struct ceph_vino vino; 365 struct ceph_vino vino;
365 ino_t ino; 366 ino_t ino;
366 367
367 dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", 368 dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n",
368 off, off - fi->offset, rinfo->dir_nr, pos, 369 off, off - fi->offset, rinfo->dir_nr, pos,
369 rinfo->dir_dname_len[off - fi->offset], 370 rinfo->dir_dname_len[off - fi->offset],
370 rinfo->dir_dname[off - fi->offset], in); 371 rinfo->dir_dname[off - fi->offset], in);
371 BUG_ON(!in); 372 BUG_ON(!in);
372 ftype = le32_to_cpu(in->mode) >> 12; 373 ftype = le32_to_cpu(in->mode) >> 12;
373 vino.ino = le64_to_cpu(in->ino); 374 vino.ino = le64_to_cpu(in->ino);
374 vino.snap = le64_to_cpu(in->snapid); 375 vino.snap = le64_to_cpu(in->snapid);
375 ino = ceph_vino_to_ino(vino); 376 ino = ceph_vino_to_ino(vino);
376 if (filldir(dirent, 377 if (filldir(dirent,
377 rinfo->dir_dname[off - fi->offset], 378 rinfo->dir_dname[off - fi->offset],
378 rinfo->dir_dname_len[off - fi->offset], 379 rinfo->dir_dname_len[off - fi->offset],
379 pos, ino, ftype) < 0) { 380 pos, ino, ftype) < 0) {
380 dout("filldir stopping us...\n"); 381 dout("filldir stopping us...\n");
381 return 0; 382 return 0;
382 } 383 }
383 off++; 384 off++;
384 filp->f_pos = pos + 1; 385 filp->f_pos = pos + 1;
385 } 386 }
386 387
387 if (fi->last_name) { 388 if (fi->last_name) {
388 ceph_mdsc_put_request(fi->last_readdir); 389 ceph_mdsc_put_request(fi->last_readdir);
389 fi->last_readdir = NULL; 390 fi->last_readdir = NULL;
390 goto more; 391 goto more;
391 } 392 }
392 393
393 /* more frags? */ 394 /* more frags? */
394 if (!ceph_frag_is_rightmost(frag)) { 395 if (!ceph_frag_is_rightmost(frag)) {
395 frag = ceph_frag_next(frag); 396 frag = ceph_frag_next(frag);
396 off = 0; 397 off = 0;
397 filp->f_pos = ceph_make_fpos(frag, off); 398 filp->f_pos = ceph_make_fpos(frag, off);
398 dout("readdir next frag is %x\n", frag); 399 dout("readdir next frag is %x\n", frag);
399 goto more; 400 goto more;
400 } 401 }
401 fi->at_end = 1; 402 fi->at_end = 1;
402 403
403 /* 404 /*
404 * if dir_release_count still matches the dir, no dentries 405 * if dir_release_count still matches the dir, no dentries
405 * were released during the whole readdir, and we should have 406 * were released during the whole readdir, and we should have
406 * the complete dir contents in our cache. 407 * the complete dir contents in our cache.
407 */ 408 */
408 spin_lock(&inode->i_lock); 409 spin_lock(&inode->i_lock);
409 if (ci->i_release_count == fi->dir_release_count) { 410 if (ci->i_release_count == fi->dir_release_count) {
410 dout(" marking %p complete\n", inode); 411 dout(" marking %p complete\n", inode);
411 ci->i_ceph_flags |= CEPH_I_COMPLETE; 412 ci->i_ceph_flags |= CEPH_I_COMPLETE;
412 ci->i_max_offset = filp->f_pos; 413 ci->i_max_offset = filp->f_pos;
413 } 414 }
414 spin_unlock(&inode->i_lock); 415 spin_unlock(&inode->i_lock);
415 416
416 dout("readdir %p filp %p done.\n", inode, filp); 417 dout("readdir %p filp %p done.\n", inode, filp);
417 return 0; 418 return 0;
418 } 419 }
419 420
420 static void reset_readdir(struct ceph_file_info *fi) 421 static void reset_readdir(struct ceph_file_info *fi)
421 { 422 {
422 if (fi->last_readdir) { 423 if (fi->last_readdir) {
423 ceph_mdsc_put_request(fi->last_readdir); 424 ceph_mdsc_put_request(fi->last_readdir);
424 fi->last_readdir = NULL; 425 fi->last_readdir = NULL;
425 } 426 }
426 kfree(fi->last_name); 427 kfree(fi->last_name);
427 fi->last_name = NULL; 428 fi->last_name = NULL;
428 fi->next_offset = 2; /* compensate for . and .. */ 429 fi->next_offset = 2; /* compensate for . and .. */
429 if (fi->dentry) { 430 if (fi->dentry) {
430 dput(fi->dentry); 431 dput(fi->dentry);
431 fi->dentry = NULL; 432 fi->dentry = NULL;
432 } 433 }
433 fi->at_end = 0; 434 fi->at_end = 0;
434 } 435 }
435 436
436 static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) 437 static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
437 { 438 {
438 struct ceph_file_info *fi = file->private_data; 439 struct ceph_file_info *fi = file->private_data;
439 struct inode *inode = file->f_mapping->host; 440 struct inode *inode = file->f_mapping->host;
440 loff_t old_offset = offset; 441 loff_t old_offset = offset;
441 loff_t retval; 442 loff_t retval;
442 443
443 mutex_lock(&inode->i_mutex); 444 mutex_lock(&inode->i_mutex);
444 switch (origin) { 445 switch (origin) {
445 case SEEK_END: 446 case SEEK_END:
446 offset += inode->i_size + 2; /* FIXME */ 447 offset += inode->i_size + 2; /* FIXME */
447 break; 448 break;
448 case SEEK_CUR: 449 case SEEK_CUR:
449 offset += file->f_pos; 450 offset += file->f_pos;
450 } 451 }
451 retval = -EINVAL; 452 retval = -EINVAL;
452 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { 453 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
453 if (offset != file->f_pos) { 454 if (offset != file->f_pos) {
454 file->f_pos = offset; 455 file->f_pos = offset;
455 file->f_version = 0; 456 file->f_version = 0;
456 fi->at_end = 0; 457 fi->at_end = 0;
457 } 458 }
458 retval = offset; 459 retval = offset;
459 460
460 /* 461 /*
461 * discard buffered readdir content on seekdir(0), or 462 * discard buffered readdir content on seekdir(0), or
462 * seek to new frag, or seek prior to current chunk. 463 * seek to new frag, or seek prior to current chunk.
463 */ 464 */
464 if (offset == 0 || 465 if (offset == 0 ||
465 fpos_frag(offset) != fpos_frag(old_offset) || 466 fpos_frag(offset) != fpos_frag(old_offset) ||
466 fpos_off(offset) < fi->offset) { 467 fpos_off(offset) < fi->offset) {
467 dout("dir_llseek dropping %p content\n", file); 468 dout("dir_llseek dropping %p content\n", file);
468 reset_readdir(fi); 469 reset_readdir(fi);
469 } 470 }
470 471
471 /* bump dir_release_count if we did a forward seek */ 472 /* bump dir_release_count if we did a forward seek */
472 if (offset > old_offset) 473 if (offset > old_offset)
473 fi->dir_release_count--; 474 fi->dir_release_count--;
474 } 475 }
475 mutex_unlock(&inode->i_mutex); 476 mutex_unlock(&inode->i_mutex);
476 return retval; 477 return retval;
477 } 478 }
478 479
479 /* 480 /*
480 * Process result of a lookup/open request. 481 * Process result of a lookup/open request.
481 * 482 *
482 * Mainly, make sure we return the final req->r_dentry (if it already 483 * Mainly, make sure we return the final req->r_dentry (if it already
483 * existed) in place of the original VFS-provided dentry when they 484 * existed) in place of the original VFS-provided dentry when they
484 * differ. 485 * differ.
485 * 486 *
486 * Gracefully handle the case where the MDS replies with -ENOENT and 487 * Gracefully handle the case where the MDS replies with -ENOENT and
487 * no trace (which it may do, at its discretion, e.g., if it doesn't 488 * no trace (which it may do, at its discretion, e.g., if it doesn't
488 * care to issue a lease on the negative dentry). 489 * care to issue a lease on the negative dentry).
489 */ 490 */
490 struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, 491 struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
491 struct dentry *dentry, int err) 492 struct dentry *dentry, int err)
492 { 493 {
493 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); 494 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
494 struct inode *parent = dentry->d_parent->d_inode; 495 struct inode *parent = dentry->d_parent->d_inode;
495 496
496 /* .snap dir? */ 497 /* .snap dir? */
497 if (err == -ENOENT && 498 if (err == -ENOENT &&
498 strcmp(dentry->d_name.name, 499 strcmp(dentry->d_name.name,
499 fsc->mount_options->snapdir_name) == 0) { 500 fsc->mount_options->snapdir_name) == 0) {
500 struct inode *inode = ceph_get_snapdir(parent); 501 struct inode *inode = ceph_get_snapdir(parent);
501 dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", 502 dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
502 dentry, dentry->d_name.len, dentry->d_name.name, inode); 503 dentry, dentry->d_name.len, dentry->d_name.name, inode);
503 BUG_ON(!d_unhashed(dentry)); 504 BUG_ON(!d_unhashed(dentry));
504 d_add(dentry, inode); 505 d_add(dentry, inode);
505 err = 0; 506 err = 0;
506 } 507 }
507 508
508 if (err == -ENOENT) { 509 if (err == -ENOENT) {
509 /* no trace? */ 510 /* no trace? */
510 err = 0; 511 err = 0;
511 if (!req->r_reply_info.head->is_dentry) { 512 if (!req->r_reply_info.head->is_dentry) {
512 dout("ENOENT and no trace, dentry %p inode %p\n", 513 dout("ENOENT and no trace, dentry %p inode %p\n",
513 dentry, dentry->d_inode); 514 dentry, dentry->d_inode);
514 if (dentry->d_inode) { 515 if (dentry->d_inode) {
515 d_drop(dentry); 516 d_drop(dentry);
516 err = -ENOENT; 517 err = -ENOENT;
517 } else { 518 } else {
518 d_add(dentry, NULL); 519 d_add(dentry, NULL);
519 } 520 }
520 } 521 }
521 } 522 }
522 if (err) 523 if (err)
523 dentry = ERR_PTR(err); 524 dentry = ERR_PTR(err);
524 else if (dentry != req->r_dentry) 525 else if (dentry != req->r_dentry)
525 dentry = dget(req->r_dentry); /* we got spliced */ 526 dentry = dget(req->r_dentry); /* we got spliced */
526 else 527 else
527 dentry = NULL; 528 dentry = NULL;
528 return dentry; 529 return dentry;
529 } 530 }
530 531
531 static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) 532 static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
532 { 533 {
533 return ceph_ino(inode) == CEPH_INO_ROOT && 534 return ceph_ino(inode) == CEPH_INO_ROOT &&
534 strncmp(dentry->d_name.name, ".ceph", 5) == 0; 535 strncmp(dentry->d_name.name, ".ceph", 5) == 0;
535 } 536 }
536 537
537 /* 538 /*
538 * Look up a single dir entry. If there is a lookup intent, inform 539 * Look up a single dir entry. If there is a lookup intent, inform
539 * the MDS so that it gets our 'caps wanted' value in a single op. 540 * the MDS so that it gets our 'caps wanted' value in a single op.
540 */ 541 */
541 static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, 542 static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
542 struct nameidata *nd) 543 struct nameidata *nd)
543 { 544 {
544 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 545 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
545 struct ceph_mds_client *mdsc = fsc->mdsc; 546 struct ceph_mds_client *mdsc = fsc->mdsc;
546 struct ceph_mds_request *req; 547 struct ceph_mds_request *req;
547 int op; 548 int op;
548 int err; 549 int err;
549 550
550 dout("lookup %p dentry %p '%.*s'\n", 551 dout("lookup %p dentry %p '%.*s'\n",
551 dir, dentry, dentry->d_name.len, dentry->d_name.name); 552 dir, dentry, dentry->d_name.len, dentry->d_name.name);
552 553
553 if (dentry->d_name.len > NAME_MAX) 554 if (dentry->d_name.len > NAME_MAX)
554 return ERR_PTR(-ENAMETOOLONG); 555 return ERR_PTR(-ENAMETOOLONG);
555 556
556 err = ceph_init_dentry(dentry); 557 err = ceph_init_dentry(dentry);
557 if (err < 0) 558 if (err < 0)
558 return ERR_PTR(err); 559 return ERR_PTR(err);
559 560
560 /* open (but not create!) intent? */ 561 /* open (but not create!) intent? */
561 if (nd && 562 if (nd &&
562 (nd->flags & LOOKUP_OPEN) && 563 (nd->flags & LOOKUP_OPEN) &&
563 (nd->flags & LOOKUP_CONTINUE) == 0 && /* only open last component */ 564 (nd->flags & LOOKUP_CONTINUE) == 0 && /* only open last component */
564 !(nd->intent.open.flags & O_CREAT)) { 565 !(nd->intent.open.flags & O_CREAT)) {
565 int mode = nd->intent.open.create_mode & ~current->fs->umask; 566 int mode = nd->intent.open.create_mode & ~current->fs->umask;
566 return ceph_lookup_open(dir, dentry, nd, mode, 1); 567 return ceph_lookup_open(dir, dentry, nd, mode, 1);
567 } 568 }
568 569
569 /* can we conclude ENOENT locally? */ 570 /* can we conclude ENOENT locally? */
570 if (dentry->d_inode == NULL) { 571 if (dentry->d_inode == NULL) {
571 struct ceph_inode_info *ci = ceph_inode(dir); 572 struct ceph_inode_info *ci = ceph_inode(dir);
572 struct ceph_dentry_info *di = ceph_dentry(dentry); 573 struct ceph_dentry_info *di = ceph_dentry(dentry);
573 574
574 spin_lock(&dir->i_lock); 575 spin_lock(&dir->i_lock);
575 dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); 576 dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
576 if (strncmp(dentry->d_name.name, 577 if (strncmp(dentry->d_name.name,
577 fsc->mount_options->snapdir_name, 578 fsc->mount_options->snapdir_name,
578 dentry->d_name.len) && 579 dentry->d_name.len) &&
579 !is_root_ceph_dentry(dir, dentry) && 580 !is_root_ceph_dentry(dir, dentry) &&
580 (ci->i_ceph_flags & CEPH_I_COMPLETE) && 581 (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
581 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { 582 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
582 spin_unlock(&dir->i_lock); 583 spin_unlock(&dir->i_lock);
583 dout(" dir %p complete, -ENOENT\n", dir); 584 dout(" dir %p complete, -ENOENT\n", dir);
584 d_add(dentry, NULL); 585 d_add(dentry, NULL);
585 di->lease_shared_gen = ci->i_shared_gen; 586 di->lease_shared_gen = ci->i_shared_gen;
586 return NULL; 587 return NULL;
587 } 588 }
588 spin_unlock(&dir->i_lock); 589 spin_unlock(&dir->i_lock);
589 } 590 }
590 591
591 op = ceph_snap(dir) == CEPH_SNAPDIR ? 592 op = ceph_snap(dir) == CEPH_SNAPDIR ?
592 CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; 593 CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
593 req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); 594 req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
594 if (IS_ERR(req)) 595 if (IS_ERR(req))
595 return ERR_CAST(req); 596 return ERR_CAST(req);
596 req->r_dentry = dget(dentry); 597 req->r_dentry = dget(dentry);
597 req->r_num_caps = 2; 598 req->r_num_caps = 2;
598 /* we only need inode linkage */ 599 /* we only need inode linkage */
599 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 600 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
600 req->r_locked_dir = dir; 601 req->r_locked_dir = dir;
601 err = ceph_mdsc_do_request(mdsc, NULL, req); 602 err = ceph_mdsc_do_request(mdsc, NULL, req);
602 dentry = ceph_finish_lookup(req, dentry, err); 603 dentry = ceph_finish_lookup(req, dentry, err);
603 ceph_mdsc_put_request(req); /* will dput(dentry) */ 604 ceph_mdsc_put_request(req); /* will dput(dentry) */
604 dout("lookup result=%p\n", dentry); 605 dout("lookup result=%p\n", dentry);
605 return dentry; 606 return dentry;
606 } 607 }
607 608
608 /* 609 /*
609 * If we do a create but get no trace back from the MDS, follow up with 610 * If we do a create but get no trace back from the MDS, follow up with
610 * a lookup (the VFS expects us to link up the provided dentry). 611 * a lookup (the VFS expects us to link up the provided dentry).
611 */ 612 */
612 int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) 613 int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
613 { 614 {
614 struct dentry *result = ceph_lookup(dir, dentry, NULL); 615 struct dentry *result = ceph_lookup(dir, dentry, NULL);
615 616
616 if (result && !IS_ERR(result)) { 617 if (result && !IS_ERR(result)) {
617 /* 618 /*
618 * We created the item, then did a lookup, and found 619 * We created the item, then did a lookup, and found
619 * it was already linked to another inode we already 620 * it was already linked to another inode we already
620 * had in our cache (and thus got spliced). Link our 621 * had in our cache (and thus got spliced). Link our
621 * dentry to that inode, but don't hash it, just in 622 * dentry to that inode, but don't hash it, just in
622 * case the VFS wants to dereference it. 623 * case the VFS wants to dereference it.
623 */ 624 */
624 BUG_ON(!result->d_inode); 625 BUG_ON(!result->d_inode);
625 d_instantiate(dentry, result->d_inode); 626 d_instantiate(dentry, result->d_inode);
626 return 0; 627 return 0;
627 } 628 }
628 return PTR_ERR(result); 629 return PTR_ERR(result);
629 } 630 }
630 631
631 static int ceph_mknod(struct inode *dir, struct dentry *dentry, 632 static int ceph_mknod(struct inode *dir, struct dentry *dentry,
632 int mode, dev_t rdev) 633 int mode, dev_t rdev)
633 { 634 {
634 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 635 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
635 struct ceph_mds_client *mdsc = fsc->mdsc; 636 struct ceph_mds_client *mdsc = fsc->mdsc;
636 struct ceph_mds_request *req; 637 struct ceph_mds_request *req;
637 int err; 638 int err;
638 639
639 if (ceph_snap(dir) != CEPH_NOSNAP) 640 if (ceph_snap(dir) != CEPH_NOSNAP)
640 return -EROFS; 641 return -EROFS;
641 642
642 dout("mknod in dir %p dentry %p mode 0%o rdev %d\n", 643 dout("mknod in dir %p dentry %p mode 0%o rdev %d\n",
643 dir, dentry, mode, rdev); 644 dir, dentry, mode, rdev);
644 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS); 645 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
645 if (IS_ERR(req)) { 646 if (IS_ERR(req)) {
646 d_drop(dentry); 647 d_drop(dentry);
647 return PTR_ERR(req); 648 return PTR_ERR(req);
648 } 649 }
649 req->r_dentry = dget(dentry); 650 req->r_dentry = dget(dentry);
650 req->r_num_caps = 2; 651 req->r_num_caps = 2;
651 req->r_locked_dir = dir; 652 req->r_locked_dir = dir;
652 req->r_args.mknod.mode = cpu_to_le32(mode); 653 req->r_args.mknod.mode = cpu_to_le32(mode);
653 req->r_args.mknod.rdev = cpu_to_le32(rdev); 654 req->r_args.mknod.rdev = cpu_to_le32(rdev);
654 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 655 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
655 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 656 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
656 err = ceph_mdsc_do_request(mdsc, dir, req); 657 err = ceph_mdsc_do_request(mdsc, dir, req);
657 if (!err && !req->r_reply_info.head->is_dentry) 658 if (!err && !req->r_reply_info.head->is_dentry)
658 err = ceph_handle_notrace_create(dir, dentry); 659 err = ceph_handle_notrace_create(dir, dentry);
659 ceph_mdsc_put_request(req); 660 ceph_mdsc_put_request(req);
660 if (err) 661 if (err)
661 d_drop(dentry); 662 d_drop(dentry);
662 return err; 663 return err;
663 } 664 }
664 665
665 static int ceph_create(struct inode *dir, struct dentry *dentry, int mode, 666 static int ceph_create(struct inode *dir, struct dentry *dentry, int mode,
666 struct nameidata *nd) 667 struct nameidata *nd)
667 { 668 {
668 dout("create in dir %p dentry %p name '%.*s'\n", 669 dout("create in dir %p dentry %p name '%.*s'\n",
669 dir, dentry, dentry->d_name.len, dentry->d_name.name); 670 dir, dentry, dentry->d_name.len, dentry->d_name.name);
670 671
671 if (ceph_snap(dir) != CEPH_NOSNAP) 672 if (ceph_snap(dir) != CEPH_NOSNAP)
672 return -EROFS; 673 return -EROFS;
673 674
674 if (nd) { 675 if (nd) {
675 BUG_ON((nd->flags & LOOKUP_OPEN) == 0); 676 BUG_ON((nd->flags & LOOKUP_OPEN) == 0);
676 dentry = ceph_lookup_open(dir, dentry, nd, mode, 0); 677 dentry = ceph_lookup_open(dir, dentry, nd, mode, 0);
677 /* hrm, what should i do here if we get aliased? */ 678 /* hrm, what should i do here if we get aliased? */
678 if (IS_ERR(dentry)) 679 if (IS_ERR(dentry))
679 return PTR_ERR(dentry); 680 return PTR_ERR(dentry);
680 return 0; 681 return 0;
681 } 682 }
682 683
683 /* fall back to mknod */ 684 /* fall back to mknod */
684 return ceph_mknod(dir, dentry, (mode & ~S_IFMT) | S_IFREG, 0); 685 return ceph_mknod(dir, dentry, (mode & ~S_IFMT) | S_IFREG, 0);
685 } 686 }
686 687
687 static int ceph_symlink(struct inode *dir, struct dentry *dentry, 688 static int ceph_symlink(struct inode *dir, struct dentry *dentry,
688 const char *dest) 689 const char *dest)
689 { 690 {
690 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 691 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
691 struct ceph_mds_client *mdsc = fsc->mdsc; 692 struct ceph_mds_client *mdsc = fsc->mdsc;
692 struct ceph_mds_request *req; 693 struct ceph_mds_request *req;
693 int err; 694 int err;
694 695
695 if (ceph_snap(dir) != CEPH_NOSNAP) 696 if (ceph_snap(dir) != CEPH_NOSNAP)
696 return -EROFS; 697 return -EROFS;
697 698
698 dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); 699 dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
699 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); 700 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
700 if (IS_ERR(req)) { 701 if (IS_ERR(req)) {
701 d_drop(dentry); 702 d_drop(dentry);
702 return PTR_ERR(req); 703 return PTR_ERR(req);
703 } 704 }
704 req->r_dentry = dget(dentry); 705 req->r_dentry = dget(dentry);
705 req->r_num_caps = 2; 706 req->r_num_caps = 2;
706 req->r_path2 = kstrdup(dest, GFP_NOFS); 707 req->r_path2 = kstrdup(dest, GFP_NOFS);
707 req->r_locked_dir = dir; 708 req->r_locked_dir = dir;
708 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 709 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
709 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 710 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
710 err = ceph_mdsc_do_request(mdsc, dir, req); 711 err = ceph_mdsc_do_request(mdsc, dir, req);
711 if (!err && !req->r_reply_info.head->is_dentry) 712 if (!err && !req->r_reply_info.head->is_dentry)
712 err = ceph_handle_notrace_create(dir, dentry); 713 err = ceph_handle_notrace_create(dir, dentry);
713 ceph_mdsc_put_request(req); 714 ceph_mdsc_put_request(req);
714 if (err) 715 if (err)
715 d_drop(dentry); 716 d_drop(dentry);
716 return err; 717 return err;
717 } 718 }
718 719
719 static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) 720 static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode)
720 { 721 {
721 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 722 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
722 struct ceph_mds_client *mdsc = fsc->mdsc; 723 struct ceph_mds_client *mdsc = fsc->mdsc;
723 struct ceph_mds_request *req; 724 struct ceph_mds_request *req;
724 int err = -EROFS; 725 int err = -EROFS;
725 int op; 726 int op;
726 727
727 if (ceph_snap(dir) == CEPH_SNAPDIR) { 728 if (ceph_snap(dir) == CEPH_SNAPDIR) {
728 /* mkdir .snap/foo is a MKSNAP */ 729 /* mkdir .snap/foo is a MKSNAP */
729 op = CEPH_MDS_OP_MKSNAP; 730 op = CEPH_MDS_OP_MKSNAP;
730 dout("mksnap dir %p snap '%.*s' dn %p\n", dir, 731 dout("mksnap dir %p snap '%.*s' dn %p\n", dir,
731 dentry->d_name.len, dentry->d_name.name, dentry); 732 dentry->d_name.len, dentry->d_name.name, dentry);
732 } else if (ceph_snap(dir) == CEPH_NOSNAP) { 733 } else if (ceph_snap(dir) == CEPH_NOSNAP) {
733 dout("mkdir dir %p dn %p mode 0%o\n", dir, dentry, mode); 734 dout("mkdir dir %p dn %p mode 0%o\n", dir, dentry, mode);
734 op = CEPH_MDS_OP_MKDIR; 735 op = CEPH_MDS_OP_MKDIR;
735 } else { 736 } else {
736 goto out; 737 goto out;
737 } 738 }
738 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 739 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
739 if (IS_ERR(req)) { 740 if (IS_ERR(req)) {
740 err = PTR_ERR(req); 741 err = PTR_ERR(req);
741 goto out; 742 goto out;
742 } 743 }
743 744
744 req->r_dentry = dget(dentry); 745 req->r_dentry = dget(dentry);
745 req->r_num_caps = 2; 746 req->r_num_caps = 2;
746 req->r_locked_dir = dir; 747 req->r_locked_dir = dir;
747 req->r_args.mkdir.mode = cpu_to_le32(mode); 748 req->r_args.mkdir.mode = cpu_to_le32(mode);
748 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 749 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
749 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 750 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
750 err = ceph_mdsc_do_request(mdsc, dir, req); 751 err = ceph_mdsc_do_request(mdsc, dir, req);
751 if (!err && !req->r_reply_info.head->is_dentry) 752 if (!err && !req->r_reply_info.head->is_dentry)
752 err = ceph_handle_notrace_create(dir, dentry); 753 err = ceph_handle_notrace_create(dir, dentry);
753 ceph_mdsc_put_request(req); 754 ceph_mdsc_put_request(req);
754 out: 755 out:
755 if (err < 0) 756 if (err < 0)
756 d_drop(dentry); 757 d_drop(dentry);
757 return err; 758 return err;
758 } 759 }
759 760
760 static int ceph_link(struct dentry *old_dentry, struct inode *dir, 761 static int ceph_link(struct dentry *old_dentry, struct inode *dir,
761 struct dentry *dentry) 762 struct dentry *dentry)
762 { 763 {
763 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 764 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
764 struct ceph_mds_client *mdsc = fsc->mdsc; 765 struct ceph_mds_client *mdsc = fsc->mdsc;
765 struct ceph_mds_request *req; 766 struct ceph_mds_request *req;
766 int err; 767 int err;
767 768
768 if (ceph_snap(dir) != CEPH_NOSNAP) 769 if (ceph_snap(dir) != CEPH_NOSNAP)
769 return -EROFS; 770 return -EROFS;
770 771
771 dout("link in dir %p old_dentry %p dentry %p\n", dir, 772 dout("link in dir %p old_dentry %p dentry %p\n", dir,
772 old_dentry, dentry); 773 old_dentry, dentry);
773 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS); 774 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
774 if (IS_ERR(req)) { 775 if (IS_ERR(req)) {
775 d_drop(dentry); 776 d_drop(dentry);
776 return PTR_ERR(req); 777 return PTR_ERR(req);
777 } 778 }
778 req->r_dentry = dget(dentry); 779 req->r_dentry = dget(dentry);
779 req->r_num_caps = 2; 780 req->r_num_caps = 2;
780 req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */ 781 req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */
781 req->r_locked_dir = dir; 782 req->r_locked_dir = dir;
782 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 783 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
783 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 784 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
784 err = ceph_mdsc_do_request(mdsc, dir, req); 785 err = ceph_mdsc_do_request(mdsc, dir, req);
785 if (err) 786 if (err)
786 d_drop(dentry); 787 d_drop(dentry);
787 else if (!req->r_reply_info.head->is_dentry) 788 else if (!req->r_reply_info.head->is_dentry)
788 d_instantiate(dentry, igrab(old_dentry->d_inode)); 789 d_instantiate(dentry, igrab(old_dentry->d_inode));
789 ceph_mdsc_put_request(req); 790 ceph_mdsc_put_request(req);
790 return err; 791 return err;
791 } 792 }
792 793
793 /* 794 /*
794 * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it 795 * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it
795 * looks like the link count will hit 0, drop any other caps (other 796 * looks like the link count will hit 0, drop any other caps (other
796 * than PIN) we don't specifically want (due to the file still being 797 * than PIN) we don't specifically want (due to the file still being
797 * open). 798 * open).
798 */ 799 */
799 static int drop_caps_for_unlink(struct inode *inode) 800 static int drop_caps_for_unlink(struct inode *inode)
800 { 801 {
801 struct ceph_inode_info *ci = ceph_inode(inode); 802 struct ceph_inode_info *ci = ceph_inode(inode);
802 int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; 803 int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
803 804
804 spin_lock(&inode->i_lock); 805 spin_lock(&inode->i_lock);
805 if (inode->i_nlink == 1) { 806 if (inode->i_nlink == 1) {
806 drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN); 807 drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
807 ci->i_ceph_flags |= CEPH_I_NODELAY; 808 ci->i_ceph_flags |= CEPH_I_NODELAY;
808 } 809 }
809 spin_unlock(&inode->i_lock); 810 spin_unlock(&inode->i_lock);
810 return drop; 811 return drop;
811 } 812 }
812 813
813 /* 814 /*
814 * rmdir and unlink are differ only by the metadata op code 815 * rmdir and unlink are differ only by the metadata op code
815 */ 816 */
816 static int ceph_unlink(struct inode *dir, struct dentry *dentry) 817 static int ceph_unlink(struct inode *dir, struct dentry *dentry)
817 { 818 {
818 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 819 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
819 struct ceph_mds_client *mdsc = fsc->mdsc; 820 struct ceph_mds_client *mdsc = fsc->mdsc;
820 struct inode *inode = dentry->d_inode; 821 struct inode *inode = dentry->d_inode;
821 struct ceph_mds_request *req; 822 struct ceph_mds_request *req;
822 int err = -EROFS; 823 int err = -EROFS;
823 int op; 824 int op;
824 825
825 if (ceph_snap(dir) == CEPH_SNAPDIR) { 826 if (ceph_snap(dir) == CEPH_SNAPDIR) {
826 /* rmdir .snap/foo is RMSNAP */ 827 /* rmdir .snap/foo is RMSNAP */
827 dout("rmsnap dir %p '%.*s' dn %p\n", dir, dentry->d_name.len, 828 dout("rmsnap dir %p '%.*s' dn %p\n", dir, dentry->d_name.len,
828 dentry->d_name.name, dentry); 829 dentry->d_name.name, dentry);
829 op = CEPH_MDS_OP_RMSNAP; 830 op = CEPH_MDS_OP_RMSNAP;
830 } else if (ceph_snap(dir) == CEPH_NOSNAP) { 831 } else if (ceph_snap(dir) == CEPH_NOSNAP) {
831 dout("unlink/rmdir dir %p dn %p inode %p\n", 832 dout("unlink/rmdir dir %p dn %p inode %p\n",
832 dir, dentry, inode); 833 dir, dentry, inode);
833 op = ((dentry->d_inode->i_mode & S_IFMT) == S_IFDIR) ? 834 op = ((dentry->d_inode->i_mode & S_IFMT) == S_IFDIR) ?
834 CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; 835 CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
835 } else 836 } else
836 goto out; 837 goto out;
837 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 838 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
838 if (IS_ERR(req)) { 839 if (IS_ERR(req)) {
839 err = PTR_ERR(req); 840 err = PTR_ERR(req);
840 goto out; 841 goto out;
841 } 842 }
842 req->r_dentry = dget(dentry); 843 req->r_dentry = dget(dentry);
843 req->r_num_caps = 2; 844 req->r_num_caps = 2;
844 req->r_locked_dir = dir; 845 req->r_locked_dir = dir;
845 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 846 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
846 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 847 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
847 req->r_inode_drop = drop_caps_for_unlink(inode); 848 req->r_inode_drop = drop_caps_for_unlink(inode);
848 err = ceph_mdsc_do_request(mdsc, dir, req); 849 err = ceph_mdsc_do_request(mdsc, dir, req);
849 if (!err && !req->r_reply_info.head->is_dentry) 850 if (!err && !req->r_reply_info.head->is_dentry)
850 d_delete(dentry); 851 d_delete(dentry);
851 ceph_mdsc_put_request(req); 852 ceph_mdsc_put_request(req);
852 out: 853 out:
853 return err; 854 return err;
854 } 855 }
855 856
856 static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, 857 static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
857 struct inode *new_dir, struct dentry *new_dentry) 858 struct inode *new_dir, struct dentry *new_dentry)
858 { 859 {
859 struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); 860 struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
860 struct ceph_mds_client *mdsc = fsc->mdsc; 861 struct ceph_mds_client *mdsc = fsc->mdsc;
861 struct ceph_mds_request *req; 862 struct ceph_mds_request *req;
862 int err; 863 int err;
863 864
864 if (ceph_snap(old_dir) != ceph_snap(new_dir)) 865 if (ceph_snap(old_dir) != ceph_snap(new_dir))
865 return -EXDEV; 866 return -EXDEV;
866 if (ceph_snap(old_dir) != CEPH_NOSNAP || 867 if (ceph_snap(old_dir) != CEPH_NOSNAP ||
867 ceph_snap(new_dir) != CEPH_NOSNAP) 868 ceph_snap(new_dir) != CEPH_NOSNAP)
868 return -EROFS; 869 return -EROFS;
869 dout("rename dir %p dentry %p to dir %p dentry %p\n", 870 dout("rename dir %p dentry %p to dir %p dentry %p\n",
870 old_dir, old_dentry, new_dir, new_dentry); 871 old_dir, old_dentry, new_dir, new_dentry);
871 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS); 872 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
872 if (IS_ERR(req)) 873 if (IS_ERR(req))
873 return PTR_ERR(req); 874 return PTR_ERR(req);
874 req->r_dentry = dget(new_dentry); 875 req->r_dentry = dget(new_dentry);
875 req->r_num_caps = 2; 876 req->r_num_caps = 2;
876 req->r_old_dentry = dget(old_dentry); 877 req->r_old_dentry = dget(old_dentry);
877 req->r_locked_dir = new_dir; 878 req->r_locked_dir = new_dir;
878 req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; 879 req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
879 req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; 880 req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
880 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 881 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
881 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 882 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
882 /* release LINK_RDCACHE on source inode (mds will lock it) */ 883 /* release LINK_RDCACHE on source inode (mds will lock it) */
883 req->r_old_inode_drop = CEPH_CAP_LINK_SHARED; 884 req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
884 if (new_dentry->d_inode) 885 if (new_dentry->d_inode)
885 req->r_inode_drop = drop_caps_for_unlink(new_dentry->d_inode); 886 req->r_inode_drop = drop_caps_for_unlink(new_dentry->d_inode);
886 err = ceph_mdsc_do_request(mdsc, old_dir, req); 887 err = ceph_mdsc_do_request(mdsc, old_dir, req);
887 if (!err && !req->r_reply_info.head->is_dentry) { 888 if (!err && !req->r_reply_info.head->is_dentry) {
888 /* 889 /*
889 * Normally d_move() is done by fill_trace (called by 890 * Normally d_move() is done by fill_trace (called by
890 * do_request, above). If there is no trace, we need 891 * do_request, above). If there is no trace, we need
891 * to do it here. 892 * to do it here.
892 */ 893 */
893 894
894 /* d_move screws up d_subdirs order */ 895 /* d_move screws up d_subdirs order */
895 ceph_i_clear(new_dir, CEPH_I_COMPLETE); 896 ceph_i_clear(new_dir, CEPH_I_COMPLETE);
896 897
897 d_move(old_dentry, new_dentry); 898 d_move(old_dentry, new_dentry);
898 899
899 /* ensure target dentry is invalidated, despite 900 /* ensure target dentry is invalidated, despite
900 rehashing bug in vfs_rename_dir */ 901 rehashing bug in vfs_rename_dir */
901 ceph_invalidate_dentry_lease(new_dentry); 902 ceph_invalidate_dentry_lease(new_dentry);
902 } 903 }
903 ceph_mdsc_put_request(req); 904 ceph_mdsc_put_request(req);
904 return err; 905 return err;
905 } 906 }
906 907
907 /* 908 /*
908 * Ensure a dentry lease will no longer revalidate. 909 * Ensure a dentry lease will no longer revalidate.
909 */ 910 */
910 void ceph_invalidate_dentry_lease(struct dentry *dentry) 911 void ceph_invalidate_dentry_lease(struct dentry *dentry)
911 { 912 {
912 spin_lock(&dentry->d_lock); 913 spin_lock(&dentry->d_lock);
913 dentry->d_time = jiffies; 914 dentry->d_time = jiffies;
914 ceph_dentry(dentry)->lease_shared_gen = 0; 915 ceph_dentry(dentry)->lease_shared_gen = 0;
915 spin_unlock(&dentry->d_lock); 916 spin_unlock(&dentry->d_lock);
916 } 917 }
917 918
918 /* 919 /*
919 * Check if dentry lease is valid. If not, delete the lease. Try to 920 * Check if dentry lease is valid. If not, delete the lease. Try to
920 * renew if the least is more than half up. 921 * renew if the least is more than half up.
921 */ 922 */
922 static int dentry_lease_is_valid(struct dentry *dentry) 923 static int dentry_lease_is_valid(struct dentry *dentry)
923 { 924 {
924 struct ceph_dentry_info *di; 925 struct ceph_dentry_info *di;
925 struct ceph_mds_session *s; 926 struct ceph_mds_session *s;
926 int valid = 0; 927 int valid = 0;
927 u32 gen; 928 u32 gen;
928 unsigned long ttl; 929 unsigned long ttl;
929 struct ceph_mds_session *session = NULL; 930 struct ceph_mds_session *session = NULL;
930 struct inode *dir = NULL; 931 struct inode *dir = NULL;
931 u32 seq = 0; 932 u32 seq = 0;
932 933
933 spin_lock(&dentry->d_lock); 934 spin_lock(&dentry->d_lock);
934 di = ceph_dentry(dentry); 935 di = ceph_dentry(dentry);
935 if (di && di->lease_session) { 936 if (di && di->lease_session) {
936 s = di->lease_session; 937 s = di->lease_session;
937 spin_lock(&s->s_cap_lock); 938 spin_lock(&s->s_cap_lock);
938 gen = s->s_cap_gen; 939 gen = s->s_cap_gen;
939 ttl = s->s_cap_ttl; 940 ttl = s->s_cap_ttl;
940 spin_unlock(&s->s_cap_lock); 941 spin_unlock(&s->s_cap_lock);
941 942
942 if (di->lease_gen == gen && 943 if (di->lease_gen == gen &&
943 time_before(jiffies, dentry->d_time) && 944 time_before(jiffies, dentry->d_time) &&
944 time_before(jiffies, ttl)) { 945 time_before(jiffies, ttl)) {
945 valid = 1; 946 valid = 1;
946 if (di->lease_renew_after && 947 if (di->lease_renew_after &&
947 time_after(jiffies, di->lease_renew_after)) { 948 time_after(jiffies, di->lease_renew_after)) {
948 /* we should renew */ 949 /* we should renew */
949 dir = dentry->d_parent->d_inode; 950 dir = dentry->d_parent->d_inode;
950 session = ceph_get_mds_session(s); 951 session = ceph_get_mds_session(s);
951 seq = di->lease_seq; 952 seq = di->lease_seq;
952 di->lease_renew_after = 0; 953 di->lease_renew_after = 0;
953 di->lease_renew_from = jiffies; 954 di->lease_renew_from = jiffies;
954 } 955 }
955 } 956 }
956 } 957 }
957 spin_unlock(&dentry->d_lock); 958 spin_unlock(&dentry->d_lock);
958 959
959 if (session) { 960 if (session) {
960 ceph_mdsc_lease_send_msg(session, dir, dentry, 961 ceph_mdsc_lease_send_msg(session, dir, dentry,
961 CEPH_MDS_LEASE_RENEW, seq); 962 CEPH_MDS_LEASE_RENEW, seq);
962 ceph_put_mds_session(session); 963 ceph_put_mds_session(session);
963 } 964 }
964 dout("dentry_lease_is_valid - dentry %p = %d\n", dentry, valid); 965 dout("dentry_lease_is_valid - dentry %p = %d\n", dentry, valid);
965 return valid; 966 return valid;
966 } 967 }
967 968
968 /* 969 /*
969 * Check if directory-wide content lease/cap is valid. 970 * Check if directory-wide content lease/cap is valid.
970 */ 971 */
971 static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) 972 static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
972 { 973 {
973 struct ceph_inode_info *ci = ceph_inode(dir); 974 struct ceph_inode_info *ci = ceph_inode(dir);
974 struct ceph_dentry_info *di = ceph_dentry(dentry); 975 struct ceph_dentry_info *di = ceph_dentry(dentry);
975 int valid = 0; 976 int valid = 0;
976 977
977 spin_lock(&dir->i_lock); 978 spin_lock(&dir->i_lock);
978 if (ci->i_shared_gen == di->lease_shared_gen) 979 if (ci->i_shared_gen == di->lease_shared_gen)
979 valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); 980 valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
980 spin_unlock(&dir->i_lock); 981 spin_unlock(&dir->i_lock);
981 dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n", 982 dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
982 dir, (unsigned)ci->i_shared_gen, dentry, 983 dir, (unsigned)ci->i_shared_gen, dentry,
983 (unsigned)di->lease_shared_gen, valid); 984 (unsigned)di->lease_shared_gen, valid);
984 return valid; 985 return valid;
985 } 986 }
986 987
987 /* 988 /*
988 * Check if cached dentry can be trusted. 989 * Check if cached dentry can be trusted.
989 */ 990 */
990 static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) 991 static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
991 { 992 {
992 struct inode *dir = dentry->d_parent->d_inode; 993 struct inode *dir = dentry->d_parent->d_inode;
993 994
994 dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, 995 dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
995 dentry->d_name.len, dentry->d_name.name, dentry->d_inode, 996 dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
996 ceph_dentry(dentry)->offset); 997 ceph_dentry(dentry)->offset);
997 998
998 /* always trust cached snapped dentries, snapdir dentry */ 999 /* always trust cached snapped dentries, snapdir dentry */
999 if (ceph_snap(dir) != CEPH_NOSNAP) { 1000 if (ceph_snap(dir) != CEPH_NOSNAP) {
1000 dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry, 1001 dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry,
1001 dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 1002 dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
1002 goto out_touch; 1003 goto out_touch;
1003 } 1004 }
1004 if (dentry->d_inode && ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) 1005 if (dentry->d_inode && ceph_snap(dentry->d_inode) == CEPH_SNAPDIR)
1005 goto out_touch; 1006 goto out_touch;
1006 1007
1007 if (dentry_lease_is_valid(dentry) || 1008 if (dentry_lease_is_valid(dentry) ||
1008 dir_lease_is_valid(dir, dentry)) 1009 dir_lease_is_valid(dir, dentry))
1009 goto out_touch; 1010 goto out_touch;
1010 1011
1011 dout("d_revalidate %p invalid\n", dentry); 1012 dout("d_revalidate %p invalid\n", dentry);
1012 d_drop(dentry); 1013 d_drop(dentry);
1013 return 0; 1014 return 0;
1014 out_touch: 1015 out_touch:
1015 ceph_dentry_lru_touch(dentry); 1016 ceph_dentry_lru_touch(dentry);
1016 return 1; 1017 return 1;
1017 } 1018 }
1018 1019
1019 /* 1020 /*
1020 * When a dentry is released, clear the dir I_COMPLETE if it was part 1021 * When a dentry is released, clear the dir I_COMPLETE if it was part
1021 * of the current dir gen or if this is in the snapshot namespace. 1022 * of the current dir gen or if this is in the snapshot namespace.
1022 */ 1023 */
1023 static void ceph_dentry_release(struct dentry *dentry) 1024 static void ceph_dentry_release(struct dentry *dentry)
1024 { 1025 {
1025 struct ceph_dentry_info *di = ceph_dentry(dentry); 1026 struct ceph_dentry_info *di = ceph_dentry(dentry);
1026 struct inode *parent_inode = NULL; 1027 struct inode *parent_inode = NULL;
1027 u64 snapid = CEPH_NOSNAP; 1028 u64 snapid = CEPH_NOSNAP;
1028 1029
1029 if (!IS_ROOT(dentry)) { 1030 if (!IS_ROOT(dentry)) {
1030 parent_inode = dentry->d_parent->d_inode; 1031 parent_inode = dentry->d_parent->d_inode;
1031 if (parent_inode) 1032 if (parent_inode)
1032 snapid = ceph_snap(parent_inode); 1033 snapid = ceph_snap(parent_inode);
1033 } 1034 }
1034 dout("dentry_release %p parent %p\n", dentry, parent_inode); 1035 dout("dentry_release %p parent %p\n", dentry, parent_inode);
1035 if (parent_inode && snapid != CEPH_SNAPDIR) { 1036 if (parent_inode && snapid != CEPH_SNAPDIR) {
1036 struct ceph_inode_info *ci = ceph_inode(parent_inode); 1037 struct ceph_inode_info *ci = ceph_inode(parent_inode);
1037 1038
1038 spin_lock(&parent_inode->i_lock); 1039 spin_lock(&parent_inode->i_lock);
1039 if (ci->i_shared_gen == di->lease_shared_gen || 1040 if (ci->i_shared_gen == di->lease_shared_gen ||
1040 snapid <= CEPH_MAXSNAP) { 1041 snapid <= CEPH_MAXSNAP) {
1041 dout(" clearing %p complete (d_release)\n", 1042 dout(" clearing %p complete (d_release)\n",
1042 parent_inode); 1043 parent_inode);
1043 ci->i_ceph_flags &= ~CEPH_I_COMPLETE; 1044 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
1044 ci->i_release_count++; 1045 ci->i_release_count++;
1045 } 1046 }
1046 spin_unlock(&parent_inode->i_lock); 1047 spin_unlock(&parent_inode->i_lock);
1047 } 1048 }
1048 if (di) { 1049 if (di) {
1049 ceph_dentry_lru_del(dentry); 1050 ceph_dentry_lru_del(dentry);
1050 if (di->lease_session) 1051 if (di->lease_session)
1051 ceph_put_mds_session(di->lease_session); 1052 ceph_put_mds_session(di->lease_session);
1052 kmem_cache_free(ceph_dentry_cachep, di); 1053 kmem_cache_free(ceph_dentry_cachep, di);
1053 dentry->d_fsdata = NULL; 1054 dentry->d_fsdata = NULL;
1054 } 1055 }
1055 } 1056 }
1056 1057
1057 static int ceph_snapdir_d_revalidate(struct dentry *dentry, 1058 static int ceph_snapdir_d_revalidate(struct dentry *dentry,
1058 struct nameidata *nd) 1059 struct nameidata *nd)
1059 { 1060 {
1060 /* 1061 /*
1061 * Eventually, we'll want to revalidate snapped metadata 1062 * Eventually, we'll want to revalidate snapped metadata
1062 * too... probably... 1063 * too... probably...
1063 */ 1064 */
1064 return 1; 1065 return 1;
1065 } 1066 }
1066 1067
1067 1068
1068 1069
1069 /* 1070 /*
1070 * read() on a dir. This weird interface hack only works if mounted 1071 * read() on a dir. This weird interface hack only works if mounted
1071 * with '-o dirstat'. 1072 * with '-o dirstat'.
1072 */ 1073 */
1073 static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, 1074 static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
1074 loff_t *ppos) 1075 loff_t *ppos)
1075 { 1076 {
1076 struct ceph_file_info *cf = file->private_data; 1077 struct ceph_file_info *cf = file->private_data;
1077 struct inode *inode = file->f_dentry->d_inode; 1078 struct inode *inode = file->f_dentry->d_inode;
1078 struct ceph_inode_info *ci = ceph_inode(inode); 1079 struct ceph_inode_info *ci = ceph_inode(inode);
1079 int left; 1080 int left;
1080 1081
1081 if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) 1082 if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
1082 return -EISDIR; 1083 return -EISDIR;
1083 1084
1084 if (!cf->dir_info) { 1085 if (!cf->dir_info) {
1085 cf->dir_info = kmalloc(1024, GFP_NOFS); 1086 cf->dir_info = kmalloc(1024, GFP_NOFS);
1086 if (!cf->dir_info) 1087 if (!cf->dir_info)
1087 return -ENOMEM; 1088 return -ENOMEM;
1088 cf->dir_info_len = 1089 cf->dir_info_len =
1089 sprintf(cf->dir_info, 1090 sprintf(cf->dir_info,
1090 "entries: %20lld\n" 1091 "entries: %20lld\n"
1091 " files: %20lld\n" 1092 " files: %20lld\n"
1092 " subdirs: %20lld\n" 1093 " subdirs: %20lld\n"
1093 "rentries: %20lld\n" 1094 "rentries: %20lld\n"
1094 " rfiles: %20lld\n" 1095 " rfiles: %20lld\n"
1095 " rsubdirs: %20lld\n" 1096 " rsubdirs: %20lld\n"
1096 "rbytes: %20lld\n" 1097 "rbytes: %20lld\n"
1097 "rctime: %10ld.%09ld\n", 1098 "rctime: %10ld.%09ld\n",
1098 ci->i_files + ci->i_subdirs, 1099 ci->i_files + ci->i_subdirs,
1099 ci->i_files, 1100 ci->i_files,
1100 ci->i_subdirs, 1101 ci->i_subdirs,
1101 ci->i_rfiles + ci->i_rsubdirs, 1102 ci->i_rfiles + ci->i_rsubdirs,
1102 ci->i_rfiles, 1103 ci->i_rfiles,
1103 ci->i_rsubdirs, 1104 ci->i_rsubdirs,
1104 ci->i_rbytes, 1105 ci->i_rbytes,
1105 (long)ci->i_rctime.tv_sec, 1106 (long)ci->i_rctime.tv_sec,
1106 (long)ci->i_rctime.tv_nsec); 1107 (long)ci->i_rctime.tv_nsec);
1107 } 1108 }
1108 1109
1109 if (*ppos >= cf->dir_info_len) 1110 if (*ppos >= cf->dir_info_len)
1110 return 0; 1111 return 0;
1111 size = min_t(unsigned, size, cf->dir_info_len-*ppos); 1112 size = min_t(unsigned, size, cf->dir_info_len-*ppos);
1112 left = copy_to_user(buf, cf->dir_info + *ppos, size); 1113 left = copy_to_user(buf, cf->dir_info + *ppos, size);
1113 if (left == size) 1114 if (left == size)
1114 return -EFAULT; 1115 return -EFAULT;
1115 *ppos += (size - left); 1116 *ppos += (size - left);
1116 return size - left; 1117 return size - left;
1117 } 1118 }
1118 1119
1119 /* 1120 /*
1120 * an fsync() on a dir will wait for any uncommitted directory 1121 * an fsync() on a dir will wait for any uncommitted directory
1121 * operations to commit. 1122 * operations to commit.
1122 */ 1123 */
1123 static int ceph_dir_fsync(struct file *file, int datasync) 1124 static int ceph_dir_fsync(struct file *file, int datasync)
1124 { 1125 {
1125 struct inode *inode = file->f_path.dentry->d_inode; 1126 struct inode *inode = file->f_path.dentry->d_inode;
1126 struct ceph_inode_info *ci = ceph_inode(inode); 1127 struct ceph_inode_info *ci = ceph_inode(inode);
1127 struct list_head *head = &ci->i_unsafe_dirops; 1128 struct list_head *head = &ci->i_unsafe_dirops;
1128 struct ceph_mds_request *req; 1129 struct ceph_mds_request *req;
1129 u64 last_tid; 1130 u64 last_tid;
1130 int ret = 0; 1131 int ret = 0;
1131 1132
1132 dout("dir_fsync %p\n", inode); 1133 dout("dir_fsync %p\n", inode);
1133 spin_lock(&ci->i_unsafe_lock); 1134 spin_lock(&ci->i_unsafe_lock);
1134 if (list_empty(head)) 1135 if (list_empty(head))
1135 goto out; 1136 goto out;
1136 1137
1137 req = list_entry(head->prev, 1138 req = list_entry(head->prev,
1138 struct ceph_mds_request, r_unsafe_dir_item); 1139 struct ceph_mds_request, r_unsafe_dir_item);
1139 last_tid = req->r_tid; 1140 last_tid = req->r_tid;
1140 1141
1141 do { 1142 do {
1142 ceph_mdsc_get_request(req); 1143 ceph_mdsc_get_request(req);
1143 spin_unlock(&ci->i_unsafe_lock); 1144 spin_unlock(&ci->i_unsafe_lock);
1144 dout("dir_fsync %p wait on tid %llu (until %llu)\n", 1145 dout("dir_fsync %p wait on tid %llu (until %llu)\n",
1145 inode, req->r_tid, last_tid); 1146 inode, req->r_tid, last_tid);
1146 if (req->r_timeout) { 1147 if (req->r_timeout) {
1147 ret = wait_for_completion_timeout( 1148 ret = wait_for_completion_timeout(
1148 &req->r_safe_completion, req->r_timeout); 1149 &req->r_safe_completion, req->r_timeout);
1149 if (ret > 0) 1150 if (ret > 0)
1150 ret = 0; 1151 ret = 0;
1151 else if (ret == 0) 1152 else if (ret == 0)
1152 ret = -EIO; /* timed out */ 1153 ret = -EIO; /* timed out */
1153 } else { 1154 } else {
1154 wait_for_completion(&req->r_safe_completion); 1155 wait_for_completion(&req->r_safe_completion);
1155 } 1156 }
1156 spin_lock(&ci->i_unsafe_lock); 1157 spin_lock(&ci->i_unsafe_lock);
1157 ceph_mdsc_put_request(req); 1158 ceph_mdsc_put_request(req);
1158 1159
1159 if (ret || list_empty(head)) 1160 if (ret || list_empty(head))
1160 break; 1161 break;
1161 req = list_entry(head->next, 1162 req = list_entry(head->next,
1162 struct ceph_mds_request, r_unsafe_dir_item); 1163 struct ceph_mds_request, r_unsafe_dir_item);
1163 } while (req->r_tid < last_tid); 1164 } while (req->r_tid < last_tid);
1164 out: 1165 out:
1165 spin_unlock(&ci->i_unsafe_lock); 1166 spin_unlock(&ci->i_unsafe_lock);
1166 return ret; 1167 return ret;
1167 } 1168 }
1168 1169
1169 /* 1170 /*
1170 * We maintain a private dentry LRU. 1171 * We maintain a private dentry LRU.
1171 * 1172 *
1172 * FIXME: this needs to be changed to a per-mds lru to be useful. 1173 * FIXME: this needs to be changed to a per-mds lru to be useful.
1173 */ 1174 */
1174 void ceph_dentry_lru_add(struct dentry *dn) 1175 void ceph_dentry_lru_add(struct dentry *dn)
1175 { 1176 {
1176 struct ceph_dentry_info *di = ceph_dentry(dn); 1177 struct ceph_dentry_info *di = ceph_dentry(dn);
1177 struct ceph_mds_client *mdsc; 1178 struct ceph_mds_client *mdsc;
1178 1179
1179 dout("dentry_lru_add %p %p '%.*s'\n", di, dn, 1180 dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
1180 dn->d_name.len, dn->d_name.name); 1181 dn->d_name.len, dn->d_name.name);
1181 if (di) { 1182 if (di) {
1182 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1183 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
1183 spin_lock(&mdsc->dentry_lru_lock); 1184 spin_lock(&mdsc->dentry_lru_lock);
1184 list_add_tail(&di->lru, &mdsc->dentry_lru); 1185 list_add_tail(&di->lru, &mdsc->dentry_lru);
1185 mdsc->num_dentry++; 1186 mdsc->num_dentry++;
1186 spin_unlock(&mdsc->dentry_lru_lock); 1187 spin_unlock(&mdsc->dentry_lru_lock);
1187 } 1188 }
1188 } 1189 }
1189 1190
1190 void ceph_dentry_lru_touch(struct dentry *dn) 1191 void ceph_dentry_lru_touch(struct dentry *dn)
1191 { 1192 {
1192 struct ceph_dentry_info *di = ceph_dentry(dn); 1193 struct ceph_dentry_info *di = ceph_dentry(dn);
1193 struct ceph_mds_client *mdsc; 1194 struct ceph_mds_client *mdsc;
1194 1195
1195 dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, 1196 dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
1196 dn->d_name.len, dn->d_name.name, di->offset); 1197 dn->d_name.len, dn->d_name.name, di->offset);
1197 if (di) { 1198 if (di) {
1198 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1199 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
1199 spin_lock(&mdsc->dentry_lru_lock); 1200 spin_lock(&mdsc->dentry_lru_lock);
1200 list_move_tail(&di->lru, &mdsc->dentry_lru); 1201 list_move_tail(&di->lru, &mdsc->dentry_lru);
1201 spin_unlock(&mdsc->dentry_lru_lock); 1202 spin_unlock(&mdsc->dentry_lru_lock);
1202 } 1203 }
1203 } 1204 }
1204 1205
1205 void ceph_dentry_lru_del(struct dentry *dn) 1206 void ceph_dentry_lru_del(struct dentry *dn)
1206 { 1207 {
1207 struct ceph_dentry_info *di = ceph_dentry(dn); 1208 struct ceph_dentry_info *di = ceph_dentry(dn);
1208 struct ceph_mds_client *mdsc; 1209 struct ceph_mds_client *mdsc;
1209 1210
1210 dout("dentry_lru_del %p %p '%.*s'\n", di, dn, 1211 dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
1211 dn->d_name.len, dn->d_name.name); 1212 dn->d_name.len, dn->d_name.name);
1212 if (di) { 1213 if (di) {
1213 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1214 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
1214 spin_lock(&mdsc->dentry_lru_lock); 1215 spin_lock(&mdsc->dentry_lru_lock);
1215 list_del_init(&di->lru); 1216 list_del_init(&di->lru);
1216 mdsc->num_dentry--; 1217 mdsc->num_dentry--;
1217 spin_unlock(&mdsc->dentry_lru_lock); 1218 spin_unlock(&mdsc->dentry_lru_lock);
1218 } 1219 }
1219 } 1220 }
1220 1221
1221 const struct file_operations ceph_dir_fops = { 1222 const struct file_operations ceph_dir_fops = {
1222 .read = ceph_read_dir, 1223 .read = ceph_read_dir,
1223 .readdir = ceph_readdir, 1224 .readdir = ceph_readdir,
1224 .llseek = ceph_dir_llseek, 1225 .llseek = ceph_dir_llseek,
1225 .open = ceph_open, 1226 .open = ceph_open,
1226 .release = ceph_release, 1227 .release = ceph_release,
1227 .unlocked_ioctl = ceph_ioctl, 1228 .unlocked_ioctl = ceph_ioctl,
1228 .fsync = ceph_dir_fsync, 1229 .fsync = ceph_dir_fsync,
1229 }; 1230 };
1230 1231
1231 const struct inode_operations ceph_dir_iops = { 1232 const struct inode_operations ceph_dir_iops = {
1232 .lookup = ceph_lookup, 1233 .lookup = ceph_lookup,
1233 .permission = ceph_permission, 1234 .permission = ceph_permission,
1234 .getattr = ceph_getattr, 1235 .getattr = ceph_getattr,
1235 .setattr = ceph_setattr, 1236 .setattr = ceph_setattr,
1236 .setxattr = ceph_setxattr, 1237 .setxattr = ceph_setxattr,
1237 .getxattr = ceph_getxattr, 1238 .getxattr = ceph_getxattr,
1238 .listxattr = ceph_listxattr, 1239 .listxattr = ceph_listxattr,
1239 .removexattr = ceph_removexattr, 1240 .removexattr = ceph_removexattr,
1240 .mknod = ceph_mknod, 1241 .mknod = ceph_mknod,
1241 .symlink = ceph_symlink, 1242 .symlink = ceph_symlink,
1242 .mkdir = ceph_mkdir, 1243 .mkdir = ceph_mkdir,
1243 .link = ceph_link, 1244 .link = ceph_link,
1244 .unlink = ceph_unlink, 1245 .unlink = ceph_unlink,
1245 .rmdir = ceph_unlink, 1246 .rmdir = ceph_unlink,
1246 .rename = ceph_rename, 1247 .rename = ceph_rename,
1247 .create = ceph_create, 1248 .create = ceph_create,
1248 }; 1249 };
1249 1250
1250 const struct dentry_operations ceph_dentry_ops = { 1251 const struct dentry_operations ceph_dentry_ops = {
1251 .d_revalidate = ceph_d_revalidate, 1252 .d_revalidate = ceph_d_revalidate,
1252 .d_release = ceph_dentry_release, 1253 .d_release = ceph_dentry_release,
1253 }; 1254 };
1254 1255
1255 const struct dentry_operations ceph_snapdir_dentry_ops = { 1256 const struct dentry_operations ceph_snapdir_dentry_ops = {
1256 .d_revalidate = ceph_snapdir_d_revalidate, 1257 .d_revalidate = ceph_snapdir_d_revalidate,
1257 .d_release = ceph_dentry_release, 1258 .d_release = ceph_dentry_release,
1258 }; 1259 };
1259 1260
1260 const struct dentry_operations ceph_snap_dentry_ops = { 1261 const struct dentry_operations ceph_snap_dentry_ops = {
1261 .d_release = ceph_dentry_release, 1262 .d_release = ceph_dentry_release,
1262 }; 1263 };
fs/configfs/configfs_internal.h
1 /* -*- mode: c; c-basic-offset:8; -*- 1 /* -*- mode: c; c-basic-offset:8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0: 2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 * 3 *
4 * configfs_internal.h - Internal stuff for configfs 4 * configfs_internal.h - Internal stuff for configfs
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public 7 * modify it under the terms of the GNU General Public
8 * License as published by the Free Software Foundation; either 8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version. 9 * version 2 of the License, or (at your option) any later version.
10 * 10 *
11 * This program is distributed in the hope that it will be useful, 11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details. 14 * General Public License for more details.
15 * 15 *
16 * You should have received a copy of the GNU General Public 16 * You should have received a copy of the GNU General Public
17 * License along with this program; if not, write to the 17 * License along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA. 19 * Boston, MA 021110-1307, USA.
20 * 20 *
21 * Based on sysfs: 21 * Based on sysfs:
22 * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel 22 * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
23 * 23 *
24 * configfs Copyright (C) 2005 Oracle. All rights reserved. 24 * configfs Copyright (C) 2005 Oracle. All rights reserved.
25 */ 25 */
26 26
27 #include <linux/slab.h> 27 #include <linux/slab.h>
28 #include <linux/list.h> 28 #include <linux/list.h>
29 #include <linux/spinlock.h> 29 #include <linux/spinlock.h>
30 30
31 struct configfs_dirent { 31 struct configfs_dirent {
32 atomic_t s_count; 32 atomic_t s_count;
33 int s_dependent_count; 33 int s_dependent_count;
34 struct list_head s_sibling; 34 struct list_head s_sibling;
35 struct list_head s_children; 35 struct list_head s_children;
36 struct list_head s_links; 36 struct list_head s_links;
37 void * s_element; 37 void * s_element;
38 int s_type; 38 int s_type;
39 umode_t s_mode; 39 umode_t s_mode;
40 struct dentry * s_dentry; 40 struct dentry * s_dentry;
41 struct iattr * s_iattr; 41 struct iattr * s_iattr;
42 #ifdef CONFIG_LOCKDEP 42 #ifdef CONFIG_LOCKDEP
43 int s_depth; 43 int s_depth;
44 #endif 44 #endif
45 }; 45 };
46 46
47 #define CONFIGFS_ROOT 0x0001 47 #define CONFIGFS_ROOT 0x0001
48 #define CONFIGFS_DIR 0x0002 48 #define CONFIGFS_DIR 0x0002
49 #define CONFIGFS_ITEM_ATTR 0x0004 49 #define CONFIGFS_ITEM_ATTR 0x0004
50 #define CONFIGFS_ITEM_LINK 0x0020 50 #define CONFIGFS_ITEM_LINK 0x0020
51 #define CONFIGFS_USET_DIR 0x0040 51 #define CONFIGFS_USET_DIR 0x0040
52 #define CONFIGFS_USET_DEFAULT 0x0080 52 #define CONFIGFS_USET_DEFAULT 0x0080
53 #define CONFIGFS_USET_DROPPING 0x0100 53 #define CONFIGFS_USET_DROPPING 0x0100
54 #define CONFIGFS_USET_IN_MKDIR 0x0200 54 #define CONFIGFS_USET_IN_MKDIR 0x0200
55 #define CONFIGFS_USET_CREATING 0x0400 55 #define CONFIGFS_USET_CREATING 0x0400
56 #define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) 56 #define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR)
57 57
58 extern struct mutex configfs_symlink_mutex; 58 extern struct mutex configfs_symlink_mutex;
59 extern spinlock_t configfs_dirent_lock; 59 extern spinlock_t configfs_dirent_lock;
60 60
61 extern struct vfsmount * configfs_mount; 61 extern struct vfsmount * configfs_mount;
62 extern struct kmem_cache *configfs_dir_cachep; 62 extern struct kmem_cache *configfs_dir_cachep;
63 63
64 extern int configfs_is_root(struct config_item *item); 64 extern int configfs_is_root(struct config_item *item);
65 65
66 extern struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent *); 66 extern struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent *);
67 extern int configfs_create(struct dentry *, int mode, int (*init)(struct inode *)); 67 extern int configfs_create(struct dentry *, int mode, int (*init)(struct inode *));
68 extern int configfs_inode_init(void); 68 extern int configfs_inode_init(void);
69 extern void configfs_inode_exit(void); 69 extern void configfs_inode_exit(void);
70 70
71 extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); 71 extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
72 extern int configfs_make_dirent(struct configfs_dirent *, 72 extern int configfs_make_dirent(struct configfs_dirent *,
73 struct dentry *, void *, umode_t, int); 73 struct dentry *, void *, umode_t, int);
74 extern int configfs_dirent_is_ready(struct configfs_dirent *); 74 extern int configfs_dirent_is_ready(struct configfs_dirent *);
75 75
76 extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int); 76 extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int);
77 extern void configfs_hash_and_remove(struct dentry * dir, const char * name); 77 extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
78 78
79 extern const unsigned char * configfs_get_name(struct configfs_dirent *sd); 79 extern const unsigned char * configfs_get_name(struct configfs_dirent *sd);
80 extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent); 80 extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent);
81 extern int configfs_setattr(struct dentry *dentry, struct iattr *iattr); 81 extern int configfs_setattr(struct dentry *dentry, struct iattr *iattr);
82 82
83 extern int configfs_pin_fs(void); 83 extern int configfs_pin_fs(void);
84 extern void configfs_release_fs(void); 84 extern void configfs_release_fs(void);
85 85
86 extern struct rw_semaphore configfs_rename_sem; 86 extern struct rw_semaphore configfs_rename_sem;
87 extern struct super_block * configfs_sb; 87 extern struct super_block * configfs_sb;
88 extern const struct file_operations configfs_dir_operations; 88 extern const struct file_operations configfs_dir_operations;
89 extern const struct file_operations configfs_file_operations; 89 extern const struct file_operations configfs_file_operations;
90 extern const struct file_operations bin_fops; 90 extern const struct file_operations bin_fops;
91 extern const struct inode_operations configfs_dir_inode_operations; 91 extern const struct inode_operations configfs_dir_inode_operations;
92 extern const struct inode_operations configfs_symlink_inode_operations; 92 extern const struct inode_operations configfs_symlink_inode_operations;
93 93
94 extern int configfs_symlink(struct inode *dir, struct dentry *dentry, 94 extern int configfs_symlink(struct inode *dir, struct dentry *dentry,
95 const char *symname); 95 const char *symname);
96 extern int configfs_unlink(struct inode *dir, struct dentry *dentry); 96 extern int configfs_unlink(struct inode *dir, struct dentry *dentry);
97 97
98 struct configfs_symlink { 98 struct configfs_symlink {
99 struct list_head sl_list; 99 struct list_head sl_list;
100 struct config_item *sl_target; 100 struct config_item *sl_target;
101 }; 101 };
102 102
103 extern int configfs_create_link(struct configfs_symlink *sl, 103 extern int configfs_create_link(struct configfs_symlink *sl,
104 struct dentry *parent, 104 struct dentry *parent,
105 struct dentry *dentry); 105 struct dentry *dentry);
106 106
107 static inline struct config_item * to_item(struct dentry * dentry) 107 static inline struct config_item * to_item(struct dentry * dentry)
108 { 108 {
109 struct configfs_dirent * sd = dentry->d_fsdata; 109 struct configfs_dirent * sd = dentry->d_fsdata;
110 return ((struct config_item *) sd->s_element); 110 return ((struct config_item *) sd->s_element);
111 } 111 }
112 112
113 static inline struct configfs_attribute * to_attr(struct dentry * dentry) 113 static inline struct configfs_attribute * to_attr(struct dentry * dentry)
114 { 114 {
115 struct configfs_dirent * sd = dentry->d_fsdata; 115 struct configfs_dirent * sd = dentry->d_fsdata;
116 return ((struct configfs_attribute *) sd->s_element); 116 return ((struct configfs_attribute *) sd->s_element);
117 } 117 }
118 118
119 static inline struct config_item *configfs_get_config_item(struct dentry *dentry) 119 static inline struct config_item *configfs_get_config_item(struct dentry *dentry)
120 { 120 {
121 struct config_item * item = NULL; 121 struct config_item * item = NULL;
122 122
123 spin_lock(&dcache_lock); 123 spin_lock(&dcache_lock);
124 spin_lock(&dentry->d_lock);
124 if (!d_unhashed(dentry)) { 125 if (!d_unhashed(dentry)) {
125 struct configfs_dirent * sd = dentry->d_fsdata; 126 struct configfs_dirent * sd = dentry->d_fsdata;
126 if (sd->s_type & CONFIGFS_ITEM_LINK) { 127 if (sd->s_type & CONFIGFS_ITEM_LINK) {
127 struct configfs_symlink * sl = sd->s_element; 128 struct configfs_symlink * sl = sd->s_element;
128 item = config_item_get(sl->sl_target); 129 item = config_item_get(sl->sl_target);
129 } else 130 } else
130 item = config_item_get(sd->s_element); 131 item = config_item_get(sd->s_element);
131 } 132 }
133 spin_unlock(&dentry->d_lock);
132 spin_unlock(&dcache_lock); 134 spin_unlock(&dcache_lock);
133 135
134 return item; 136 return item;
135 } 137 }
136 138
137 static inline void release_configfs_dirent(struct configfs_dirent * sd) 139 static inline void release_configfs_dirent(struct configfs_dirent * sd)
138 { 140 {
139 if (!(sd->s_type & CONFIGFS_ROOT)) { 141 if (!(sd->s_type & CONFIGFS_ROOT)) {
140 kfree(sd->s_iattr); 142 kfree(sd->s_iattr);
141 kmem_cache_free(configfs_dir_cachep, sd); 143 kmem_cache_free(configfs_dir_cachep, sd);
142 } 144 }
143 } 145 }
144 146
145 static inline struct configfs_dirent * configfs_get(struct configfs_dirent * sd) 147 static inline struct configfs_dirent * configfs_get(struct configfs_dirent * sd)
146 { 148 {
147 if (sd) { 149 if (sd) {
148 WARN_ON(!atomic_read(&sd->s_count)); 150 WARN_ON(!atomic_read(&sd->s_count));
149 atomic_inc(&sd->s_count); 151 atomic_inc(&sd->s_count);
150 } 152 }
151 return sd; 153 return sd;
152 } 154 }
153 155
154 static inline void configfs_put(struct configfs_dirent * sd) 156 static inline void configfs_put(struct configfs_dirent * sd)
155 { 157 {
156 WARN_ON(!atomic_read(&sd->s_count)); 158 WARN_ON(!atomic_read(&sd->s_count));
157 if (atomic_dec_and_test(&sd->s_count)) 159 if (atomic_dec_and_test(&sd->s_count))
158 release_configfs_dirent(sd); 160 release_configfs_dirent(sd);
159 } 161 }
160 162
161 163
1 /* 1 /*
2 * fs/dcache.c 2 * fs/dcache.c
3 * 3 *
4 * Complete reimplementation 4 * Complete reimplementation
5 * (C) 1997 Thomas Schoebel-Theuer, 5 * (C) 1997 Thomas Schoebel-Theuer,
6 * with heavy changes by Linus Torvalds 6 * with heavy changes by Linus Torvalds
7 */ 7 */
8 8
9 /* 9 /*
10 * Notes on the allocation strategy: 10 * Notes on the allocation strategy:
11 * 11 *
12 * The dcache is a master of the icache - whenever a dcache entry 12 * The dcache is a master of the icache - whenever a dcache entry
13 * exists, the inode will always exist. "iput()" is done either when 13 * exists, the inode will always exist. "iput()" is done either when
14 * the dcache entry is deleted or garbage collected. 14 * the dcache entry is deleted or garbage collected.
15 */ 15 */
16 16
17 #include <linux/syscalls.h> 17 #include <linux/syscalls.h>
18 #include <linux/string.h> 18 #include <linux/string.h>
19 #include <linux/mm.h> 19 #include <linux/mm.h>
20 #include <linux/fs.h> 20 #include <linux/fs.h>
21 #include <linux/fsnotify.h> 21 #include <linux/fsnotify.h>
22 #include <linux/slab.h> 22 #include <linux/slab.h>
23 #include <linux/init.h> 23 #include <linux/init.h>
24 #include <linux/hash.h> 24 #include <linux/hash.h>
25 #include <linux/cache.h> 25 #include <linux/cache.h>
26 #include <linux/module.h> 26 #include <linux/module.h>
27 #include <linux/mount.h> 27 #include <linux/mount.h>
28 #include <linux/file.h> 28 #include <linux/file.h>
29 #include <asm/uaccess.h> 29 #include <asm/uaccess.h>
30 #include <linux/security.h> 30 #include <linux/security.h>
31 #include <linux/seqlock.h> 31 #include <linux/seqlock.h>
32 #include <linux/swap.h> 32 #include <linux/swap.h>
33 #include <linux/bootmem.h> 33 #include <linux/bootmem.h>
34 #include <linux/fs_struct.h> 34 #include <linux/fs_struct.h>
35 #include <linux/hardirq.h> 35 #include <linux/hardirq.h>
36 #include "internal.h" 36 #include "internal.h"
37 37
38 /* 38 /*
39 * Usage: 39 * Usage:
40 * dcache_hash_lock protects: 40 * dcache_hash_lock protects:
41 * - the dcache hash table, s_anon lists 41 * - the dcache hash table, s_anon lists
42 * dcache_lru_lock protects: 42 * dcache_lru_lock protects:
43 * - the dcache lru lists and counters 43 * - the dcache lru lists and counters
44 * d_lock protects: 44 * d_lock protects:
45 * - d_flags 45 * - d_flags
46 * - d_name 46 * - d_name
47 * - d_lru 47 * - d_lru
48 * - d_count 48 * - d_count
49 * - d_unhashed()
49 * 50 *
50 * Ordering: 51 * Ordering:
51 * dcache_lock 52 * dcache_lock
52 * dentry->d_lock 53 * dentry->d_lock
53 * dcache_lru_lock 54 * dcache_lru_lock
54 * dcache_hash_lock 55 * dcache_hash_lock
55 * 56 *
57 * If there is an ancestor relationship:
58 * dentry->d_parent->...->d_parent->d_lock
59 * ...
60 * dentry->d_parent->d_lock
61 * dentry->d_lock
62 *
63 * If no ancestor relationship:
56 * if (dentry1 < dentry2) 64 * if (dentry1 < dentry2)
57 * dentry1->d_lock 65 * dentry1->d_lock
58 * dentry2->d_lock 66 * dentry2->d_lock
59 */ 67 */
60 int sysctl_vfs_cache_pressure __read_mostly = 100; 68 int sysctl_vfs_cache_pressure __read_mostly = 100;
61 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 69 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
62 70
63 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock); 71 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock);
64 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); 72 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
65 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); 73 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
66 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 74 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
67 75
68 EXPORT_SYMBOL(dcache_lock); 76 EXPORT_SYMBOL(dcache_lock);
69 77
70 static struct kmem_cache *dentry_cache __read_mostly; 78 static struct kmem_cache *dentry_cache __read_mostly;
71 79
72 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) 80 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
73 81
74 /* 82 /*
75 * This is the single most critical data structure when it comes 83 * This is the single most critical data structure when it comes
76 * to the dcache: the hashtable for lookups. Somebody should try 84 * to the dcache: the hashtable for lookups. Somebody should try
77 * to make this good - I've just made it work. 85 * to make this good - I've just made it work.
78 * 86 *
79 * This hash-function tries to avoid losing too many bits of hash 87 * This hash-function tries to avoid losing too many bits of hash
80 * information, yet avoid using a prime hash-size or similar. 88 * information, yet avoid using a prime hash-size or similar.
81 */ 89 */
82 #define D_HASHBITS d_hash_shift 90 #define D_HASHBITS d_hash_shift
83 #define D_HASHMASK d_hash_mask 91 #define D_HASHMASK d_hash_mask
84 92
85 static unsigned int d_hash_mask __read_mostly; 93 static unsigned int d_hash_mask __read_mostly;
86 static unsigned int d_hash_shift __read_mostly; 94 static unsigned int d_hash_shift __read_mostly;
87 static struct hlist_head *dentry_hashtable __read_mostly; 95 static struct hlist_head *dentry_hashtable __read_mostly;
88 96
89 /* Statistics gathering. */ 97 /* Statistics gathering. */
90 struct dentry_stat_t dentry_stat = { 98 struct dentry_stat_t dentry_stat = {
91 .age_limit = 45, 99 .age_limit = 45,
92 }; 100 };
93 101
94 static DEFINE_PER_CPU(unsigned int, nr_dentry); 102 static DEFINE_PER_CPU(unsigned int, nr_dentry);
95 103
96 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) 104 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
97 static int get_nr_dentry(void) 105 static int get_nr_dentry(void)
98 { 106 {
99 int i; 107 int i;
100 int sum = 0; 108 int sum = 0;
101 for_each_possible_cpu(i) 109 for_each_possible_cpu(i)
102 sum += per_cpu(nr_dentry, i); 110 sum += per_cpu(nr_dentry, i);
103 return sum < 0 ? 0 : sum; 111 return sum < 0 ? 0 : sum;
104 } 112 }
105 113
106 int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, 114 int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
107 size_t *lenp, loff_t *ppos) 115 size_t *lenp, loff_t *ppos)
108 { 116 {
109 dentry_stat.nr_dentry = get_nr_dentry(); 117 dentry_stat.nr_dentry = get_nr_dentry();
110 return proc_dointvec(table, write, buffer, lenp, ppos); 118 return proc_dointvec(table, write, buffer, lenp, ppos);
111 } 119 }
112 #endif 120 #endif
113 121
114 static void __d_free(struct rcu_head *head) 122 static void __d_free(struct rcu_head *head)
115 { 123 {
116 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); 124 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
117 125
118 WARN_ON(!list_empty(&dentry->d_alias)); 126 WARN_ON(!list_empty(&dentry->d_alias));
119 if (dname_external(dentry)) 127 if (dname_external(dentry))
120 kfree(dentry->d_name.name); 128 kfree(dentry->d_name.name);
121 kmem_cache_free(dentry_cache, dentry); 129 kmem_cache_free(dentry_cache, dentry);
122 } 130 }
123 131
124 /* 132 /*
125 * no dcache_lock, please. 133 * no dcache_lock, please.
126 */ 134 */
127 static void d_free(struct dentry *dentry) 135 static void d_free(struct dentry *dentry)
128 { 136 {
129 BUG_ON(dentry->d_count); 137 BUG_ON(dentry->d_count);
130 this_cpu_dec(nr_dentry); 138 this_cpu_dec(nr_dentry);
131 if (dentry->d_op && dentry->d_op->d_release) 139 if (dentry->d_op && dentry->d_op->d_release)
132 dentry->d_op->d_release(dentry); 140 dentry->d_op->d_release(dentry);
133 141
134 /* if dentry was never inserted into hash, immediate free is OK */ 142 /* if dentry was never inserted into hash, immediate free is OK */
135 if (hlist_unhashed(&dentry->d_hash)) 143 if (hlist_unhashed(&dentry->d_hash))
136 __d_free(&dentry->d_u.d_rcu); 144 __d_free(&dentry->d_u.d_rcu);
137 else 145 else
138 call_rcu(&dentry->d_u.d_rcu, __d_free); 146 call_rcu(&dentry->d_u.d_rcu, __d_free);
139 } 147 }
140 148
141 /* 149 /*
142 * Release the dentry's inode, using the filesystem 150 * Release the dentry's inode, using the filesystem
143 * d_iput() operation if defined. 151 * d_iput() operation if defined.
144 */ 152 */
145 static void dentry_iput(struct dentry * dentry) 153 static void dentry_iput(struct dentry * dentry)
146 __releases(dentry->d_lock) 154 __releases(dentry->d_lock)
147 __releases(dcache_lock) 155 __releases(dcache_lock)
148 { 156 {
149 struct inode *inode = dentry->d_inode; 157 struct inode *inode = dentry->d_inode;
150 if (inode) { 158 if (inode) {
151 dentry->d_inode = NULL; 159 dentry->d_inode = NULL;
152 list_del_init(&dentry->d_alias); 160 list_del_init(&dentry->d_alias);
153 spin_unlock(&dentry->d_lock); 161 spin_unlock(&dentry->d_lock);
154 spin_unlock(&dcache_lock); 162 spin_unlock(&dcache_lock);
155 if (!inode->i_nlink) 163 if (!inode->i_nlink)
156 fsnotify_inoderemove(inode); 164 fsnotify_inoderemove(inode);
157 if (dentry->d_op && dentry->d_op->d_iput) 165 if (dentry->d_op && dentry->d_op->d_iput)
158 dentry->d_op->d_iput(dentry, inode); 166 dentry->d_op->d_iput(dentry, inode);
159 else 167 else
160 iput(inode); 168 iput(inode);
161 } else { 169 } else {
162 spin_unlock(&dentry->d_lock); 170 spin_unlock(&dentry->d_lock);
163 spin_unlock(&dcache_lock); 171 spin_unlock(&dcache_lock);
164 } 172 }
165 } 173 }
166 174
167 /* 175 /*
168 * dentry_lru_(add|del|move_tail) must be called with d_lock held. 176 * dentry_lru_(add|del|move_tail) must be called with d_lock held.
169 */ 177 */
170 static void dentry_lru_add(struct dentry *dentry) 178 static void dentry_lru_add(struct dentry *dentry)
171 { 179 {
172 if (list_empty(&dentry->d_lru)) { 180 if (list_empty(&dentry->d_lru)) {
173 spin_lock(&dcache_lru_lock); 181 spin_lock(&dcache_lru_lock);
174 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 182 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
175 dentry->d_sb->s_nr_dentry_unused++; 183 dentry->d_sb->s_nr_dentry_unused++;
176 dentry_stat.nr_unused++; 184 dentry_stat.nr_unused++;
177 spin_unlock(&dcache_lru_lock); 185 spin_unlock(&dcache_lru_lock);
178 } 186 }
179 } 187 }
180 188
181 static void __dentry_lru_del(struct dentry *dentry) 189 static void __dentry_lru_del(struct dentry *dentry)
182 { 190 {
183 list_del_init(&dentry->d_lru); 191 list_del_init(&dentry->d_lru);
184 dentry->d_sb->s_nr_dentry_unused--; 192 dentry->d_sb->s_nr_dentry_unused--;
185 dentry_stat.nr_unused--; 193 dentry_stat.nr_unused--;
186 } 194 }
187 195
188 static void dentry_lru_del(struct dentry *dentry) 196 static void dentry_lru_del(struct dentry *dentry)
189 { 197 {
190 if (!list_empty(&dentry->d_lru)) { 198 if (!list_empty(&dentry->d_lru)) {
191 spin_lock(&dcache_lru_lock); 199 spin_lock(&dcache_lru_lock);
192 __dentry_lru_del(dentry); 200 __dentry_lru_del(dentry);
193 spin_unlock(&dcache_lru_lock); 201 spin_unlock(&dcache_lru_lock);
194 } 202 }
195 } 203 }
196 204
197 static void dentry_lru_move_tail(struct dentry *dentry) 205 static void dentry_lru_move_tail(struct dentry *dentry)
198 { 206 {
199 spin_lock(&dcache_lru_lock); 207 spin_lock(&dcache_lru_lock);
200 if (list_empty(&dentry->d_lru)) { 208 if (list_empty(&dentry->d_lru)) {
201 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 209 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
202 dentry->d_sb->s_nr_dentry_unused++; 210 dentry->d_sb->s_nr_dentry_unused++;
203 dentry_stat.nr_unused++; 211 dentry_stat.nr_unused++;
204 } else { 212 } else {
205 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 213 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
206 } 214 }
207 spin_unlock(&dcache_lru_lock); 215 spin_unlock(&dcache_lru_lock);
208 } 216 }
209 217
210 /** 218 /**
211 * d_kill - kill dentry and return parent 219 * d_kill - kill dentry and return parent
212 * @dentry: dentry to kill 220 * @dentry: dentry to kill
213 * 221 *
214 * The dentry must already be unhashed and removed from the LRU. 222 * The dentry must already be unhashed and removed from the LRU.
215 * 223 *
216 * If this is the root of the dentry tree, return NULL. 224 * If this is the root of the dentry tree, return NULL.
217 * 225 *
218 * dcache_lock and d_lock must be held by caller, are dropped by d_kill. 226 * dcache_lock and d_lock must be held by caller, are dropped by d_kill.
219 */ 227 */
220 static struct dentry *d_kill(struct dentry *dentry) 228 static struct dentry *d_kill(struct dentry *dentry)
221 __releases(dentry->d_lock) 229 __releases(dentry->d_lock)
222 __releases(dcache_lock) 230 __releases(dcache_lock)
223 { 231 {
224 struct dentry *parent; 232 struct dentry *parent;
225 233
226 list_del(&dentry->d_u.d_child); 234 list_del(&dentry->d_u.d_child);
227 dentry_iput(dentry); 235 dentry_iput(dentry);
228 /* 236 /*
229 * dentry_iput drops the locks, at which point nobody (except 237 * dentry_iput drops the locks, at which point nobody (except
230 * transient RCU lookups) can reach this dentry. 238 * transient RCU lookups) can reach this dentry.
231 */ 239 */
232 if (IS_ROOT(dentry)) 240 if (IS_ROOT(dentry))
233 parent = NULL; 241 parent = NULL;
234 else 242 else
235 parent = dentry->d_parent; 243 parent = dentry->d_parent;
236 d_free(dentry); 244 d_free(dentry);
237 return parent; 245 return parent;
238 } 246 }
239 247
240 /** 248 /**
241 * d_drop - drop a dentry 249 * d_drop - drop a dentry
242 * @dentry: dentry to drop 250 * @dentry: dentry to drop
243 * 251 *
244 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't 252 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
245 * be found through a VFS lookup any more. Note that this is different from 253 * be found through a VFS lookup any more. Note that this is different from
246 * deleting the dentry - d_delete will try to mark the dentry negative if 254 * deleting the dentry - d_delete will try to mark the dentry negative if
247 * possible, giving a successful _negative_ lookup, while d_drop will 255 * possible, giving a successful _negative_ lookup, while d_drop will
248 * just make the cache lookup fail. 256 * just make the cache lookup fail.
249 * 257 *
250 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some 258 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
251 * reason (NFS timeouts or autofs deletes). 259 * reason (NFS timeouts or autofs deletes).
252 * 260 *
253 * __d_drop requires dentry->d_lock. 261 * __d_drop requires dentry->d_lock.
254 */ 262 */
255 void __d_drop(struct dentry *dentry) 263 void __d_drop(struct dentry *dentry)
256 { 264 {
257 if (!(dentry->d_flags & DCACHE_UNHASHED)) { 265 if (!(dentry->d_flags & DCACHE_UNHASHED)) {
258 dentry->d_flags |= DCACHE_UNHASHED; 266 dentry->d_flags |= DCACHE_UNHASHED;
259 spin_lock(&dcache_hash_lock); 267 spin_lock(&dcache_hash_lock);
260 hlist_del_rcu(&dentry->d_hash); 268 hlist_del_rcu(&dentry->d_hash);
261 spin_unlock(&dcache_hash_lock); 269 spin_unlock(&dcache_hash_lock);
262 } 270 }
263 } 271 }
264 EXPORT_SYMBOL(__d_drop); 272 EXPORT_SYMBOL(__d_drop);
265 273
266 void d_drop(struct dentry *dentry) 274 void d_drop(struct dentry *dentry)
267 { 275 {
268 spin_lock(&dcache_lock); 276 spin_lock(&dcache_lock);
269 spin_lock(&dentry->d_lock); 277 spin_lock(&dentry->d_lock);
270 __d_drop(dentry); 278 __d_drop(dentry);
271 spin_unlock(&dentry->d_lock); 279 spin_unlock(&dentry->d_lock);
272 spin_unlock(&dcache_lock); 280 spin_unlock(&dcache_lock);
273 } 281 }
274 EXPORT_SYMBOL(d_drop); 282 EXPORT_SYMBOL(d_drop);
275 283
276 /* 284 /*
277 * This is dput 285 * This is dput
278 * 286 *
279 * This is complicated by the fact that we do not want to put 287 * This is complicated by the fact that we do not want to put
280 * dentries that are no longer on any hash chain on the unused 288 * dentries that are no longer on any hash chain on the unused
281 * list: we'd much rather just get rid of them immediately. 289 * list: we'd much rather just get rid of them immediately.
282 * 290 *
283 * However, that implies that we have to traverse the dentry 291 * However, that implies that we have to traverse the dentry
284 * tree upwards to the parents which might _also_ now be 292 * tree upwards to the parents which might _also_ now be
285 * scheduled for deletion (it may have been only waiting for 293 * scheduled for deletion (it may have been only waiting for
286 * its last child to go away). 294 * its last child to go away).
287 * 295 *
288 * This tail recursion is done by hand as we don't want to depend 296 * This tail recursion is done by hand as we don't want to depend
289 * on the compiler to always get this right (gcc generally doesn't). 297 * on the compiler to always get this right (gcc generally doesn't).
290 * Real recursion would eat up our stack space. 298 * Real recursion would eat up our stack space.
291 */ 299 */
292 300
293 /* 301 /*
294 * dput - release a dentry 302 * dput - release a dentry
295 * @dentry: dentry to release 303 * @dentry: dentry to release
296 * 304 *
297 * Release a dentry. This will drop the usage count and if appropriate 305 * Release a dentry. This will drop the usage count and if appropriate
298 * call the dentry unlink method as well as removing it from the queues and 306 * call the dentry unlink method as well as removing it from the queues and
299 * releasing its resources. If the parent dentries were scheduled for release 307 * releasing its resources. If the parent dentries were scheduled for release
300 * they too may now get deleted. 308 * they too may now get deleted.
301 * 309 *
302 * no dcache lock, please. 310 * no dcache lock, please.
303 */ 311 */
304 312
305 void dput(struct dentry *dentry) 313 void dput(struct dentry *dentry)
306 { 314 {
307 if (!dentry) 315 if (!dentry)
308 return; 316 return;
309 317
310 repeat: 318 repeat:
311 if (dentry->d_count == 1) 319 if (dentry->d_count == 1)
312 might_sleep(); 320 might_sleep();
313 spin_lock(&dentry->d_lock); 321 spin_lock(&dentry->d_lock);
314 if (dentry->d_count == 1) { 322 if (dentry->d_count == 1) {
315 if (!spin_trylock(&dcache_lock)) { 323 if (!spin_trylock(&dcache_lock)) {
316 /* 324 /*
317 * Something of a livelock possibility we could avoid 325 * Something of a livelock possibility we could avoid
318 * by taking dcache_lock and trying again, but we 326 * by taking dcache_lock and trying again, but we
319 * want to reduce dcache_lock anyway so this will 327 * want to reduce dcache_lock anyway so this will
320 * get improved. 328 * get improved.
321 */ 329 */
322 spin_unlock(&dentry->d_lock); 330 spin_unlock(&dentry->d_lock);
323 goto repeat; 331 goto repeat;
324 } 332 }
325 } 333 }
326 dentry->d_count--; 334 dentry->d_count--;
327 if (dentry->d_count) { 335 if (dentry->d_count) {
328 spin_unlock(&dentry->d_lock); 336 spin_unlock(&dentry->d_lock);
329 spin_unlock(&dcache_lock); 337 spin_unlock(&dcache_lock);
330 return; 338 return;
331 } 339 }
332 340
333 /* 341 /*
334 * AV: ->d_delete() is _NOT_ allowed to block now. 342 * AV: ->d_delete() is _NOT_ allowed to block now.
335 */ 343 */
336 if (dentry->d_op && dentry->d_op->d_delete) { 344 if (dentry->d_op && dentry->d_op->d_delete) {
337 if (dentry->d_op->d_delete(dentry)) 345 if (dentry->d_op->d_delete(dentry))
338 goto unhash_it; 346 goto unhash_it;
339 } 347 }
340 348
341 /* Unreachable? Get rid of it */ 349 /* Unreachable? Get rid of it */
342 if (d_unhashed(dentry)) 350 if (d_unhashed(dentry))
343 goto kill_it; 351 goto kill_it;
344 352
345 /* Otherwise leave it cached and ensure it's on the LRU */ 353 /* Otherwise leave it cached and ensure it's on the LRU */
346 dentry->d_flags |= DCACHE_REFERENCED; 354 dentry->d_flags |= DCACHE_REFERENCED;
347 dentry_lru_add(dentry); 355 dentry_lru_add(dentry);
348 356
349 spin_unlock(&dentry->d_lock); 357 spin_unlock(&dentry->d_lock);
350 spin_unlock(&dcache_lock); 358 spin_unlock(&dcache_lock);
351 return; 359 return;
352 360
353 unhash_it: 361 unhash_it:
354 __d_drop(dentry); 362 __d_drop(dentry);
355 kill_it: 363 kill_it:
356 /* if dentry was on the d_lru list delete it from there */ 364 /* if dentry was on the d_lru list delete it from there */
357 dentry_lru_del(dentry); 365 dentry_lru_del(dentry);
358 dentry = d_kill(dentry); 366 dentry = d_kill(dentry);
359 if (dentry) 367 if (dentry)
360 goto repeat; 368 goto repeat;
361 } 369 }
362 EXPORT_SYMBOL(dput); 370 EXPORT_SYMBOL(dput);
363 371
364 /** 372 /**
365 * d_invalidate - invalidate a dentry 373 * d_invalidate - invalidate a dentry
366 * @dentry: dentry to invalidate 374 * @dentry: dentry to invalidate
367 * 375 *
368 * Try to invalidate the dentry if it turns out to be 376 * Try to invalidate the dentry if it turns out to be
369 * possible. If there are other dentries that can be 377 * possible. If there are other dentries that can be
370 * reached through this one we can't delete it and we 378 * reached through this one we can't delete it and we
371 * return -EBUSY. On success we return 0. 379 * return -EBUSY. On success we return 0.
372 * 380 *
373 * no dcache lock. 381 * no dcache lock.
374 */ 382 */
375 383
376 int d_invalidate(struct dentry * dentry) 384 int d_invalidate(struct dentry * dentry)
377 { 385 {
378 /* 386 /*
379 * If it's already been dropped, return OK. 387 * If it's already been dropped, return OK.
380 */ 388 */
381 spin_lock(&dcache_lock); 389 spin_lock(&dcache_lock);
390 spin_lock(&dentry->d_lock);
382 if (d_unhashed(dentry)) { 391 if (d_unhashed(dentry)) {
392 spin_unlock(&dentry->d_lock);
383 spin_unlock(&dcache_lock); 393 spin_unlock(&dcache_lock);
384 return 0; 394 return 0;
385 } 395 }
386 /* 396 /*
387 * Check whether to do a partial shrink_dcache 397 * Check whether to do a partial shrink_dcache
388 * to get rid of unused child entries. 398 * to get rid of unused child entries.
389 */ 399 */
390 if (!list_empty(&dentry->d_subdirs)) { 400 if (!list_empty(&dentry->d_subdirs)) {
401 spin_unlock(&dentry->d_lock);
391 spin_unlock(&dcache_lock); 402 spin_unlock(&dcache_lock);
392 shrink_dcache_parent(dentry); 403 shrink_dcache_parent(dentry);
393 spin_lock(&dcache_lock); 404 spin_lock(&dcache_lock);
405 spin_lock(&dentry->d_lock);
394 } 406 }
395 407
396 /* 408 /*
397 * Somebody else still using it? 409 * Somebody else still using it?
398 * 410 *
399 * If it's a directory, we can't drop it 411 * If it's a directory, we can't drop it
400 * for fear of somebody re-populating it 412 * for fear of somebody re-populating it
401 * with children (even though dropping it 413 * with children (even though dropping it
402 * would make it unreachable from the root, 414 * would make it unreachable from the root,
403 * we might still populate it if it was a 415 * we might still populate it if it was a
404 * working directory or similar). 416 * working directory or similar).
405 */ 417 */
406 spin_lock(&dentry->d_lock);
407 if (dentry->d_count > 1) { 418 if (dentry->d_count > 1) {
408 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { 419 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
409 spin_unlock(&dentry->d_lock); 420 spin_unlock(&dentry->d_lock);
410 spin_unlock(&dcache_lock); 421 spin_unlock(&dcache_lock);
411 return -EBUSY; 422 return -EBUSY;
412 } 423 }
413 } 424 }
414 425
415 __d_drop(dentry); 426 __d_drop(dentry);
416 spin_unlock(&dentry->d_lock); 427 spin_unlock(&dentry->d_lock);
417 spin_unlock(&dcache_lock); 428 spin_unlock(&dcache_lock);
418 return 0; 429 return 0;
419 } 430 }
420 EXPORT_SYMBOL(d_invalidate); 431 EXPORT_SYMBOL(d_invalidate);
421 432
422 /* This must be called with dcache_lock and d_lock held */ 433 /* This must be called with dcache_lock and d_lock held */
423 static inline struct dentry * __dget_locked_dlock(struct dentry *dentry) 434 static inline struct dentry * __dget_locked_dlock(struct dentry *dentry)
424 { 435 {
425 dentry->d_count++; 436 dentry->d_count++;
426 dentry_lru_del(dentry); 437 dentry_lru_del(dentry);
427 return dentry; 438 return dentry;
428 } 439 }
429 440
430 /* This should be called _only_ with dcache_lock held */ 441 /* This should be called _only_ with dcache_lock held */
431 static inline struct dentry * __dget_locked(struct dentry *dentry) 442 static inline struct dentry * __dget_locked(struct dentry *dentry)
432 { 443 {
433 spin_lock(&dentry->d_lock); 444 spin_lock(&dentry->d_lock);
434 __dget_locked_dlock(dentry); 445 __dget_locked_dlock(dentry);
435 spin_unlock(&dentry->d_lock); 446 spin_unlock(&dentry->d_lock);
436 return dentry; 447 return dentry;
437 } 448 }
438 449
439 struct dentry * dget_locked_dlock(struct dentry *dentry) 450 struct dentry * dget_locked_dlock(struct dentry *dentry)
440 { 451 {
441 return __dget_locked_dlock(dentry); 452 return __dget_locked_dlock(dentry);
442 } 453 }
443 454
444 struct dentry * dget_locked(struct dentry *dentry) 455 struct dentry * dget_locked(struct dentry *dentry)
445 { 456 {
446 return __dget_locked(dentry); 457 return __dget_locked(dentry);
447 } 458 }
448 EXPORT_SYMBOL(dget_locked); 459 EXPORT_SYMBOL(dget_locked);
449 460
450 struct dentry *dget_parent(struct dentry *dentry) 461 struct dentry *dget_parent(struct dentry *dentry)
451 { 462 {
452 struct dentry *ret; 463 struct dentry *ret;
453 464
454 repeat: 465 repeat:
455 spin_lock(&dentry->d_lock); 466 spin_lock(&dentry->d_lock);
456 ret = dentry->d_parent; 467 ret = dentry->d_parent;
457 if (!ret) 468 if (!ret)
458 goto out; 469 goto out;
459 if (dentry == ret) { 470 if (dentry == ret) {
460 ret->d_count++; 471 ret->d_count++;
461 goto out; 472 goto out;
462 } 473 }
463 if (!spin_trylock(&ret->d_lock)) { 474 if (!spin_trylock(&ret->d_lock)) {
464 spin_unlock(&dentry->d_lock); 475 spin_unlock(&dentry->d_lock);
465 cpu_relax(); 476 cpu_relax();
466 goto repeat; 477 goto repeat;
467 } 478 }
468 BUG_ON(!ret->d_count); 479 BUG_ON(!ret->d_count);
469 ret->d_count++; 480 ret->d_count++;
470 spin_unlock(&ret->d_lock); 481 spin_unlock(&ret->d_lock);
471 out: 482 out:
472 spin_unlock(&dentry->d_lock); 483 spin_unlock(&dentry->d_lock);
473 return ret; 484 return ret;
474 } 485 }
475 EXPORT_SYMBOL(dget_parent); 486 EXPORT_SYMBOL(dget_parent);
476 487
477 /** 488 /**
478 * d_find_alias - grab a hashed alias of inode 489 * d_find_alias - grab a hashed alias of inode
479 * @inode: inode in question 490 * @inode: inode in question
480 * @want_discon: flag, used by d_splice_alias, to request 491 * @want_discon: flag, used by d_splice_alias, to request
481 * that only a DISCONNECTED alias be returned. 492 * that only a DISCONNECTED alias be returned.
482 * 493 *
483 * If inode has a hashed alias, or is a directory and has any alias, 494 * If inode has a hashed alias, or is a directory and has any alias,
484 * acquire the reference to alias and return it. Otherwise return NULL. 495 * acquire the reference to alias and return it. Otherwise return NULL.
485 * Notice that if inode is a directory there can be only one alias and 496 * Notice that if inode is a directory there can be only one alias and
486 * it can be unhashed only if it has no children, or if it is the root 497 * it can be unhashed only if it has no children, or if it is the root
487 * of a filesystem. 498 * of a filesystem.
488 * 499 *
489 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer 500 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
490 * any other hashed alias over that one unless @want_discon is set, 501 * any other hashed alias over that one unless @want_discon is set,
491 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. 502 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
492 */ 503 */
493 504 static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
494 static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
495 { 505 {
496 struct list_head *head, *next, *tmp; 506 struct dentry *alias, *discon_alias;
497 struct dentry *alias, *discon_alias=NULL;
498 507
499 head = &inode->i_dentry; 508 again:
500 next = inode->i_dentry.next; 509 discon_alias = NULL;
501 while (next != head) { 510 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
502 tmp = next; 511 spin_lock(&alias->d_lock);
503 next = tmp->next;
504 prefetch(next);
505 alias = list_entry(tmp, struct dentry, d_alias);
506 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 512 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
507 if (IS_ROOT(alias) && 513 if (IS_ROOT(alias) &&
508 (alias->d_flags & DCACHE_DISCONNECTED)) 514 (alias->d_flags & DCACHE_DISCONNECTED)) {
509 discon_alias = alias; 515 discon_alias = alias;
510 else if (!want_discon) { 516 } else if (!want_discon) {
511 __dget_locked(alias); 517 __dget_locked_dlock(alias);
518 spin_unlock(&alias->d_lock);
512 return alias; 519 return alias;
513 } 520 }
514 } 521 }
522 spin_unlock(&alias->d_lock);
515 } 523 }
516 if (discon_alias) 524 if (discon_alias) {
517 __dget_locked(discon_alias); 525 alias = discon_alias;
518 return discon_alias; 526 spin_lock(&alias->d_lock);
527 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
528 if (IS_ROOT(alias) &&
529 (alias->d_flags & DCACHE_DISCONNECTED)) {
530 __dget_locked_dlock(alias);
531 spin_unlock(&alias->d_lock);
532 return alias;
533 }
534 }
535 spin_unlock(&alias->d_lock);
536 goto again;
537 }
538 return NULL;
519 } 539 }
520 540
521 struct dentry * d_find_alias(struct inode *inode) 541 struct dentry *d_find_alias(struct inode *inode)
522 { 542 {
523 struct dentry *de = NULL; 543 struct dentry *de = NULL;
524 544
525 if (!list_empty(&inode->i_dentry)) { 545 if (!list_empty(&inode->i_dentry)) {
526 spin_lock(&dcache_lock); 546 spin_lock(&dcache_lock);
527 de = __d_find_alias(inode, 0); 547 de = __d_find_alias(inode, 0);
528 spin_unlock(&dcache_lock); 548 spin_unlock(&dcache_lock);
529 } 549 }
530 return de; 550 return de;
531 } 551 }
532 EXPORT_SYMBOL(d_find_alias); 552 EXPORT_SYMBOL(d_find_alias);
533 553
534 /* 554 /*
535 * Try to kill dentries associated with this inode. 555 * Try to kill dentries associated with this inode.
536 * WARNING: you must own a reference to inode. 556 * WARNING: you must own a reference to inode.
537 */ 557 */
538 void d_prune_aliases(struct inode *inode) 558 void d_prune_aliases(struct inode *inode)
539 { 559 {
540 struct dentry *dentry; 560 struct dentry *dentry;
541 restart: 561 restart:
542 spin_lock(&dcache_lock); 562 spin_lock(&dcache_lock);
543 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 563 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
544 spin_lock(&dentry->d_lock); 564 spin_lock(&dentry->d_lock);
545 if (!dentry->d_count) { 565 if (!dentry->d_count) {
546 __dget_locked_dlock(dentry); 566 __dget_locked_dlock(dentry);
547 __d_drop(dentry); 567 __d_drop(dentry);
548 spin_unlock(&dentry->d_lock); 568 spin_unlock(&dentry->d_lock);
549 spin_unlock(&dcache_lock); 569 spin_unlock(&dcache_lock);
550 dput(dentry); 570 dput(dentry);
551 goto restart; 571 goto restart;
552 } 572 }
553 spin_unlock(&dentry->d_lock); 573 spin_unlock(&dentry->d_lock);
554 } 574 }
555 spin_unlock(&dcache_lock); 575 spin_unlock(&dcache_lock);
556 } 576 }
557 EXPORT_SYMBOL(d_prune_aliases); 577 EXPORT_SYMBOL(d_prune_aliases);
558 578
559 /* 579 /*
560 * Throw away a dentry - free the inode, dput the parent. This requires that 580 * Throw away a dentry - free the inode, dput the parent. This requires that
561 * the LRU list has already been removed. 581 * the LRU list has already been removed.
562 * 582 *
563 * Try to prune ancestors as well. This is necessary to prevent 583 * Try to prune ancestors as well. This is necessary to prevent
564 * quadratic behavior of shrink_dcache_parent(), but is also expected 584 * quadratic behavior of shrink_dcache_parent(), but is also expected
565 * to be beneficial in reducing dentry cache fragmentation. 585 * to be beneficial in reducing dentry cache fragmentation.
566 */ 586 */
567 static void prune_one_dentry(struct dentry * dentry) 587 static void prune_one_dentry(struct dentry * dentry)
568 __releases(dentry->d_lock) 588 __releases(dentry->d_lock)
569 __releases(dcache_lock) 589 __releases(dcache_lock)
570 { 590 {
571 __d_drop(dentry); 591 __d_drop(dentry);
572 dentry = d_kill(dentry); 592 dentry = d_kill(dentry);
573 593
574 /* 594 /*
575 * Prune ancestors. Locking is simpler than in dput(), 595 * Prune ancestors. Locking is simpler than in dput(),
576 * because dcache_lock needs to be taken anyway. 596 * because dcache_lock needs to be taken anyway.
577 */ 597 */
578 while (dentry) { 598 while (dentry) {
579 spin_lock(&dcache_lock); 599 spin_lock(&dcache_lock);
580 spin_lock(&dentry->d_lock); 600 spin_lock(&dentry->d_lock);
581 dentry->d_count--; 601 dentry->d_count--;
582 if (dentry->d_count) { 602 if (dentry->d_count) {
583 spin_unlock(&dentry->d_lock); 603 spin_unlock(&dentry->d_lock);
584 spin_unlock(&dcache_lock); 604 spin_unlock(&dcache_lock);
585 return; 605 return;
586 } 606 }
587 607
588 dentry_lru_del(dentry); 608 dentry_lru_del(dentry);
589 __d_drop(dentry); 609 __d_drop(dentry);
590 dentry = d_kill(dentry); 610 dentry = d_kill(dentry);
591 } 611 }
592 } 612 }
593 613
594 static void shrink_dentry_list(struct list_head *list) 614 static void shrink_dentry_list(struct list_head *list)
595 { 615 {
596 struct dentry *dentry; 616 struct dentry *dentry;
597 617
598 while (!list_empty(list)) { 618 while (!list_empty(list)) {
599 dentry = list_entry(list->prev, struct dentry, d_lru); 619 dentry = list_entry(list->prev, struct dentry, d_lru);
600 620
601 if (!spin_trylock(&dentry->d_lock)) { 621 if (!spin_trylock(&dentry->d_lock)) {
602 spin_unlock(&dcache_lru_lock); 622 spin_unlock(&dcache_lru_lock);
603 cpu_relax(); 623 cpu_relax();
604 spin_lock(&dcache_lru_lock); 624 spin_lock(&dcache_lru_lock);
605 continue; 625 continue;
606 } 626 }
607 627
608 __dentry_lru_del(dentry); 628 __dentry_lru_del(dentry);
609 629
610 /* 630 /*
611 * We found an inuse dentry which was not removed from 631 * We found an inuse dentry which was not removed from
612 * the LRU because of laziness during lookup. Do not free 632 * the LRU because of laziness during lookup. Do not free
613 * it - just keep it off the LRU list. 633 * it - just keep it off the LRU list.
614 */ 634 */
615 if (dentry->d_count) { 635 if (dentry->d_count) {
616 spin_unlock(&dentry->d_lock); 636 spin_unlock(&dentry->d_lock);
617 continue; 637 continue;
618 } 638 }
619 spin_unlock(&dcache_lru_lock); 639 spin_unlock(&dcache_lru_lock);
620 640
621 prune_one_dentry(dentry); 641 prune_one_dentry(dentry);
622 /* dcache_lock and dentry->d_lock dropped */ 642 /* dcache_lock and dentry->d_lock dropped */
623 spin_lock(&dcache_lock); 643 spin_lock(&dcache_lock);
624 spin_lock(&dcache_lru_lock); 644 spin_lock(&dcache_lru_lock);
625 } 645 }
626 } 646 }
627 647
628 /** 648 /**
629 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock 649 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock
630 * @sb: superblock to shrink dentry LRU. 650 * @sb: superblock to shrink dentry LRU.
631 * @count: number of entries to prune 651 * @count: number of entries to prune
632 * @flags: flags to control the dentry processing 652 * @flags: flags to control the dentry processing
633 * 653 *
634 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned. 654 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
635 */ 655 */
636 static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) 656 static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
637 { 657 {
638 /* called from prune_dcache() and shrink_dcache_parent() */ 658 /* called from prune_dcache() and shrink_dcache_parent() */
639 struct dentry *dentry; 659 struct dentry *dentry;
640 LIST_HEAD(referenced); 660 LIST_HEAD(referenced);
641 LIST_HEAD(tmp); 661 LIST_HEAD(tmp);
642 int cnt = *count; 662 int cnt = *count;
643 663
644 spin_lock(&dcache_lock); 664 spin_lock(&dcache_lock);
645 relock: 665 relock:
646 spin_lock(&dcache_lru_lock); 666 spin_lock(&dcache_lru_lock);
647 while (!list_empty(&sb->s_dentry_lru)) { 667 while (!list_empty(&sb->s_dentry_lru)) {
648 dentry = list_entry(sb->s_dentry_lru.prev, 668 dentry = list_entry(sb->s_dentry_lru.prev,
649 struct dentry, d_lru); 669 struct dentry, d_lru);
650 BUG_ON(dentry->d_sb != sb); 670 BUG_ON(dentry->d_sb != sb);
651 671
652 if (!spin_trylock(&dentry->d_lock)) { 672 if (!spin_trylock(&dentry->d_lock)) {
653 spin_unlock(&dcache_lru_lock); 673 spin_unlock(&dcache_lru_lock);
654 cpu_relax(); 674 cpu_relax();
655 goto relock; 675 goto relock;
656 } 676 }
657 677
658 /* 678 /*
659 * If we are honouring the DCACHE_REFERENCED flag and the 679 * If we are honouring the DCACHE_REFERENCED flag and the
660 * dentry has this flag set, don't free it. Clear the flag 680 * dentry has this flag set, don't free it. Clear the flag
661 * and put it back on the LRU. 681 * and put it back on the LRU.
662 */ 682 */
663 if (flags & DCACHE_REFERENCED && 683 if (flags & DCACHE_REFERENCED &&
664 dentry->d_flags & DCACHE_REFERENCED) { 684 dentry->d_flags & DCACHE_REFERENCED) {
665 dentry->d_flags &= ~DCACHE_REFERENCED; 685 dentry->d_flags &= ~DCACHE_REFERENCED;
666 list_move(&dentry->d_lru, &referenced); 686 list_move(&dentry->d_lru, &referenced);
667 spin_unlock(&dentry->d_lock); 687 spin_unlock(&dentry->d_lock);
668 } else { 688 } else {
669 list_move_tail(&dentry->d_lru, &tmp); 689 list_move_tail(&dentry->d_lru, &tmp);
670 spin_unlock(&dentry->d_lock); 690 spin_unlock(&dentry->d_lock);
671 if (!--cnt) 691 if (!--cnt)
672 break; 692 break;
673 } 693 }
674 /* XXX: re-add cond_resched_lock when dcache_lock goes away */ 694 /* XXX: re-add cond_resched_lock when dcache_lock goes away */
675 } 695 }
676 696
677 *count = cnt; 697 *count = cnt;
678 shrink_dentry_list(&tmp); 698 shrink_dentry_list(&tmp);
679 699
680 if (!list_empty(&referenced)) 700 if (!list_empty(&referenced))
681 list_splice(&referenced, &sb->s_dentry_lru); 701 list_splice(&referenced, &sb->s_dentry_lru);
682 spin_unlock(&dcache_lru_lock); 702 spin_unlock(&dcache_lru_lock);
683 spin_unlock(&dcache_lock); 703 spin_unlock(&dcache_lock);
684 704
685 } 705 }
686 706
687 /** 707 /**
688 * prune_dcache - shrink the dcache 708 * prune_dcache - shrink the dcache
689 * @count: number of entries to try to free 709 * @count: number of entries to try to free
690 * 710 *
691 * Shrink the dcache. This is done when we need more memory, or simply when we 711 * Shrink the dcache. This is done when we need more memory, or simply when we
692 * need to unmount something (at which point we need to unuse all dentries). 712 * need to unmount something (at which point we need to unuse all dentries).
693 * 713 *
694 * This function may fail to free any resources if all the dentries are in use. 714 * This function may fail to free any resources if all the dentries are in use.
695 */ 715 */
696 static void prune_dcache(int count) 716 static void prune_dcache(int count)
697 { 717 {
698 struct super_block *sb, *p = NULL; 718 struct super_block *sb, *p = NULL;
699 int w_count; 719 int w_count;
700 int unused = dentry_stat.nr_unused; 720 int unused = dentry_stat.nr_unused;
701 int prune_ratio; 721 int prune_ratio;
702 int pruned; 722 int pruned;
703 723
704 if (unused == 0 || count == 0) 724 if (unused == 0 || count == 0)
705 return; 725 return;
706 spin_lock(&dcache_lock); 726 spin_lock(&dcache_lock);
707 if (count >= unused) 727 if (count >= unused)
708 prune_ratio = 1; 728 prune_ratio = 1;
709 else 729 else
710 prune_ratio = unused / count; 730 prune_ratio = unused / count;
711 spin_lock(&sb_lock); 731 spin_lock(&sb_lock);
712 list_for_each_entry(sb, &super_blocks, s_list) { 732 list_for_each_entry(sb, &super_blocks, s_list) {
713 if (list_empty(&sb->s_instances)) 733 if (list_empty(&sb->s_instances))
714 continue; 734 continue;
715 if (sb->s_nr_dentry_unused == 0) 735 if (sb->s_nr_dentry_unused == 0)
716 continue; 736 continue;
717 sb->s_count++; 737 sb->s_count++;
718 /* Now, we reclaim unused dentrins with fairness. 738 /* Now, we reclaim unused dentrins with fairness.
719 * We reclaim them same percentage from each superblock. 739 * We reclaim them same percentage from each superblock.
720 * We calculate number of dentries to scan on this sb 740 * We calculate number of dentries to scan on this sb
721 * as follows, but the implementation is arranged to avoid 741 * as follows, but the implementation is arranged to avoid
722 * overflows: 742 * overflows:
723 * number of dentries to scan on this sb = 743 * number of dentries to scan on this sb =
724 * count * (number of dentries on this sb / 744 * count * (number of dentries on this sb /
725 * number of dentries in the machine) 745 * number of dentries in the machine)
726 */ 746 */
727 spin_unlock(&sb_lock); 747 spin_unlock(&sb_lock);
728 if (prune_ratio != 1) 748 if (prune_ratio != 1)
729 w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1; 749 w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
730 else 750 else
731 w_count = sb->s_nr_dentry_unused; 751 w_count = sb->s_nr_dentry_unused;
732 pruned = w_count; 752 pruned = w_count;
733 /* 753 /*
734 * We need to be sure this filesystem isn't being unmounted, 754 * We need to be sure this filesystem isn't being unmounted,
735 * otherwise we could race with generic_shutdown_super(), and 755 * otherwise we could race with generic_shutdown_super(), and
736 * end up holding a reference to an inode while the filesystem 756 * end up holding a reference to an inode while the filesystem
737 * is unmounted. So we try to get s_umount, and make sure 757 * is unmounted. So we try to get s_umount, and make sure
738 * s_root isn't NULL. 758 * s_root isn't NULL.
739 */ 759 */
740 if (down_read_trylock(&sb->s_umount)) { 760 if (down_read_trylock(&sb->s_umount)) {
741 if ((sb->s_root != NULL) && 761 if ((sb->s_root != NULL) &&
742 (!list_empty(&sb->s_dentry_lru))) { 762 (!list_empty(&sb->s_dentry_lru))) {
743 spin_unlock(&dcache_lock); 763 spin_unlock(&dcache_lock);
744 __shrink_dcache_sb(sb, &w_count, 764 __shrink_dcache_sb(sb, &w_count,
745 DCACHE_REFERENCED); 765 DCACHE_REFERENCED);
746 pruned -= w_count; 766 pruned -= w_count;
747 spin_lock(&dcache_lock); 767 spin_lock(&dcache_lock);
748 } 768 }
749 up_read(&sb->s_umount); 769 up_read(&sb->s_umount);
750 } 770 }
751 spin_lock(&sb_lock); 771 spin_lock(&sb_lock);
752 if (p) 772 if (p)
753 __put_super(p); 773 __put_super(p);
754 count -= pruned; 774 count -= pruned;
755 p = sb; 775 p = sb;
756 /* more work left to do? */ 776 /* more work left to do? */
757 if (count <= 0) 777 if (count <= 0)
758 break; 778 break;
759 } 779 }
760 if (p) 780 if (p)
761 __put_super(p); 781 __put_super(p);
762 spin_unlock(&sb_lock); 782 spin_unlock(&sb_lock);
763 spin_unlock(&dcache_lock); 783 spin_unlock(&dcache_lock);
764 } 784 }
765 785
766 /** 786 /**
767 * shrink_dcache_sb - shrink dcache for a superblock 787 * shrink_dcache_sb - shrink dcache for a superblock
768 * @sb: superblock 788 * @sb: superblock
769 * 789 *
770 * Shrink the dcache for the specified super block. This is used to free 790 * Shrink the dcache for the specified super block. This is used to free
771 * the dcache before unmounting a file system. 791 * the dcache before unmounting a file system.
772 */ 792 */
773 void shrink_dcache_sb(struct super_block *sb) 793 void shrink_dcache_sb(struct super_block *sb)
774 { 794 {
775 LIST_HEAD(tmp); 795 LIST_HEAD(tmp);
776 796
777 spin_lock(&dcache_lock); 797 spin_lock(&dcache_lock);
778 spin_lock(&dcache_lru_lock); 798 spin_lock(&dcache_lru_lock);
779 while (!list_empty(&sb->s_dentry_lru)) { 799 while (!list_empty(&sb->s_dentry_lru)) {
780 list_splice_init(&sb->s_dentry_lru, &tmp); 800 list_splice_init(&sb->s_dentry_lru, &tmp);
781 shrink_dentry_list(&tmp); 801 shrink_dentry_list(&tmp);
782 } 802 }
783 spin_unlock(&dcache_lru_lock); 803 spin_unlock(&dcache_lru_lock);
784 spin_unlock(&dcache_lock); 804 spin_unlock(&dcache_lock);
785 } 805 }
786 EXPORT_SYMBOL(shrink_dcache_sb); 806 EXPORT_SYMBOL(shrink_dcache_sb);
787 807
788 /* 808 /*
789 * destroy a single subtree of dentries for unmount 809 * destroy a single subtree of dentries for unmount
790 * - see the comments on shrink_dcache_for_umount() for a description of the 810 * - see the comments on shrink_dcache_for_umount() for a description of the
791 * locking 811 * locking
792 */ 812 */
793 static void shrink_dcache_for_umount_subtree(struct dentry *dentry) 813 static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
794 { 814 {
795 struct dentry *parent; 815 struct dentry *parent;
796 unsigned detached = 0; 816 unsigned detached = 0;
797 817
798 BUG_ON(!IS_ROOT(dentry)); 818 BUG_ON(!IS_ROOT(dentry));
799 819
800 /* detach this root from the system */ 820 /* detach this root from the system */
801 spin_lock(&dcache_lock); 821 spin_lock(&dcache_lock);
802 spin_lock(&dentry->d_lock); 822 spin_lock(&dentry->d_lock);
803 dentry_lru_del(dentry); 823 dentry_lru_del(dentry);
804 spin_unlock(&dentry->d_lock);
805 __d_drop(dentry); 824 __d_drop(dentry);
825 spin_unlock(&dentry->d_lock);
806 spin_unlock(&dcache_lock); 826 spin_unlock(&dcache_lock);
807 827
808 for (;;) { 828 for (;;) {
809 /* descend to the first leaf in the current subtree */ 829 /* descend to the first leaf in the current subtree */
810 while (!list_empty(&dentry->d_subdirs)) { 830 while (!list_empty(&dentry->d_subdirs)) {
811 struct dentry *loop; 831 struct dentry *loop;
812 832
813 /* this is a branch with children - detach all of them 833 /* this is a branch with children - detach all of them
814 * from the system in one go */ 834 * from the system in one go */
815 spin_lock(&dcache_lock); 835 spin_lock(&dcache_lock);
816 list_for_each_entry(loop, &dentry->d_subdirs, 836 list_for_each_entry(loop, &dentry->d_subdirs,
817 d_u.d_child) { 837 d_u.d_child) {
818 spin_lock(&loop->d_lock); 838 spin_lock(&loop->d_lock);
819 dentry_lru_del(loop); 839 dentry_lru_del(loop);
820 spin_unlock(&loop->d_lock);
821 __d_drop(loop); 840 __d_drop(loop);
841 spin_unlock(&loop->d_lock);
822 cond_resched_lock(&dcache_lock); 842 cond_resched_lock(&dcache_lock);
823 } 843 }
824 spin_unlock(&dcache_lock); 844 spin_unlock(&dcache_lock);
825 845
826 /* move to the first child */ 846 /* move to the first child */
827 dentry = list_entry(dentry->d_subdirs.next, 847 dentry = list_entry(dentry->d_subdirs.next,
828 struct dentry, d_u.d_child); 848 struct dentry, d_u.d_child);
829 } 849 }
830 850
831 /* consume the dentries from this leaf up through its parents 851 /* consume the dentries from this leaf up through its parents
832 * until we find one with children or run out altogether */ 852 * until we find one with children or run out altogether */
833 do { 853 do {
834 struct inode *inode; 854 struct inode *inode;
835 855
836 if (dentry->d_count != 0) { 856 if (dentry->d_count != 0) {
837 printk(KERN_ERR 857 printk(KERN_ERR
838 "BUG: Dentry %p{i=%lx,n=%s}" 858 "BUG: Dentry %p{i=%lx,n=%s}"
839 " still in use (%d)" 859 " still in use (%d)"
840 " [unmount of %s %s]\n", 860 " [unmount of %s %s]\n",
841 dentry, 861 dentry,
842 dentry->d_inode ? 862 dentry->d_inode ?
843 dentry->d_inode->i_ino : 0UL, 863 dentry->d_inode->i_ino : 0UL,
844 dentry->d_name.name, 864 dentry->d_name.name,
845 dentry->d_count, 865 dentry->d_count,
846 dentry->d_sb->s_type->name, 866 dentry->d_sb->s_type->name,
847 dentry->d_sb->s_id); 867 dentry->d_sb->s_id);
848 BUG(); 868 BUG();
849 } 869 }
850 870
851 if (IS_ROOT(dentry)) 871 if (IS_ROOT(dentry))
852 parent = NULL; 872 parent = NULL;
853 else { 873 else {
854 parent = dentry->d_parent; 874 parent = dentry->d_parent;
855 spin_lock(&parent->d_lock); 875 spin_lock(&parent->d_lock);
856 parent->d_count--; 876 parent->d_count--;
857 spin_unlock(&parent->d_lock); 877 spin_unlock(&parent->d_lock);
858 } 878 }
859 879
860 list_del(&dentry->d_u.d_child); 880 list_del(&dentry->d_u.d_child);
861 detached++; 881 detached++;
862 882
863 inode = dentry->d_inode; 883 inode = dentry->d_inode;
864 if (inode) { 884 if (inode) {
865 dentry->d_inode = NULL; 885 dentry->d_inode = NULL;
866 list_del_init(&dentry->d_alias); 886 list_del_init(&dentry->d_alias);
867 if (dentry->d_op && dentry->d_op->d_iput) 887 if (dentry->d_op && dentry->d_op->d_iput)
868 dentry->d_op->d_iput(dentry, inode); 888 dentry->d_op->d_iput(dentry, inode);
869 else 889 else
870 iput(inode); 890 iput(inode);
871 } 891 }
872 892
873 d_free(dentry); 893 d_free(dentry);
874 894
875 /* finished when we fall off the top of the tree, 895 /* finished when we fall off the top of the tree,
876 * otherwise we ascend to the parent and move to the 896 * otherwise we ascend to the parent and move to the
877 * next sibling if there is one */ 897 * next sibling if there is one */
878 if (!parent) 898 if (!parent)
879 return; 899 return;
880 dentry = parent; 900 dentry = parent;
881 } while (list_empty(&dentry->d_subdirs)); 901 } while (list_empty(&dentry->d_subdirs));
882 902
883 dentry = list_entry(dentry->d_subdirs.next, 903 dentry = list_entry(dentry->d_subdirs.next,
884 struct dentry, d_u.d_child); 904 struct dentry, d_u.d_child);
885 } 905 }
886 } 906 }
887 907
888 /* 908 /*
889 * destroy the dentries attached to a superblock on unmounting 909 * destroy the dentries attached to a superblock on unmounting
890 * - we don't need to use dentry->d_lock, and only need dcache_lock when 910 * - we don't need to use dentry->d_lock, and only need dcache_lock when
891 * removing the dentry from the system lists and hashes because: 911 * removing the dentry from the system lists and hashes because:
892 * - the superblock is detached from all mountings and open files, so the 912 * - the superblock is detached from all mountings and open files, so the
893 * dentry trees will not be rearranged by the VFS 913 * dentry trees will not be rearranged by the VFS
894 * - s_umount is write-locked, so the memory pressure shrinker will ignore 914 * - s_umount is write-locked, so the memory pressure shrinker will ignore
895 * any dentries belonging to this superblock that it comes across 915 * any dentries belonging to this superblock that it comes across
896 * - the filesystem itself is no longer permitted to rearrange the dentries 916 * - the filesystem itself is no longer permitted to rearrange the dentries
897 * in this superblock 917 * in this superblock
898 */ 918 */
899 void shrink_dcache_for_umount(struct super_block *sb) 919 void shrink_dcache_for_umount(struct super_block *sb)
900 { 920 {
901 struct dentry *dentry; 921 struct dentry *dentry;
902 922
903 if (down_read_trylock(&sb->s_umount)) 923 if (down_read_trylock(&sb->s_umount))
904 BUG(); 924 BUG();
905 925
906 dentry = sb->s_root; 926 dentry = sb->s_root;
907 sb->s_root = NULL; 927 sb->s_root = NULL;
908 spin_lock(&dentry->d_lock); 928 spin_lock(&dentry->d_lock);
909 dentry->d_count--; 929 dentry->d_count--;
910 spin_unlock(&dentry->d_lock); 930 spin_unlock(&dentry->d_lock);
911 shrink_dcache_for_umount_subtree(dentry); 931 shrink_dcache_for_umount_subtree(dentry);
912 932
913 while (!hlist_empty(&sb->s_anon)) { 933 while (!hlist_empty(&sb->s_anon)) {
914 dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); 934 dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
915 shrink_dcache_for_umount_subtree(dentry); 935 shrink_dcache_for_umount_subtree(dentry);
916 } 936 }
917 } 937 }
918 938
919 /* 939 /*
920 * Search for at least 1 mount point in the dentry's subdirs. 940 * Search for at least 1 mount point in the dentry's subdirs.
921 * We descend to the next level whenever the d_subdirs 941 * We descend to the next level whenever the d_subdirs
922 * list is non-empty and continue searching. 942 * list is non-empty and continue searching.
923 */ 943 */
924 944
925 /** 945 /**
926 * have_submounts - check for mounts over a dentry 946 * have_submounts - check for mounts over a dentry
927 * @parent: dentry to check. 947 * @parent: dentry to check.
928 * 948 *
929 * Return true if the parent or its subdirectories contain 949 * Return true if the parent or its subdirectories contain
930 * a mount point 950 * a mount point
931 */ 951 */
932 952
933 int have_submounts(struct dentry *parent) 953 int have_submounts(struct dentry *parent)
934 { 954 {
935 struct dentry *this_parent = parent; 955 struct dentry *this_parent = parent;
936 struct list_head *next; 956 struct list_head *next;
937 957
938 spin_lock(&dcache_lock); 958 spin_lock(&dcache_lock);
939 if (d_mountpoint(parent)) 959 if (d_mountpoint(parent))
940 goto positive; 960 goto positive;
941 repeat: 961 repeat:
942 next = this_parent->d_subdirs.next; 962 next = this_parent->d_subdirs.next;
943 resume: 963 resume:
944 while (next != &this_parent->d_subdirs) { 964 while (next != &this_parent->d_subdirs) {
945 struct list_head *tmp = next; 965 struct list_head *tmp = next;
946 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 966 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
947 next = tmp->next; 967 next = tmp->next;
948 /* Have we found a mount point ? */ 968 /* Have we found a mount point ? */
949 if (d_mountpoint(dentry)) 969 if (d_mountpoint(dentry))
950 goto positive; 970 goto positive;
951 if (!list_empty(&dentry->d_subdirs)) { 971 if (!list_empty(&dentry->d_subdirs)) {
952 this_parent = dentry; 972 this_parent = dentry;
953 goto repeat; 973 goto repeat;
954 } 974 }
955 } 975 }
956 /* 976 /*
957 * All done at this level ... ascend and resume the search. 977 * All done at this level ... ascend and resume the search.
958 */ 978 */
959 if (this_parent != parent) { 979 if (this_parent != parent) {
960 next = this_parent->d_u.d_child.next; 980 next = this_parent->d_u.d_child.next;
961 this_parent = this_parent->d_parent; 981 this_parent = this_parent->d_parent;
962 goto resume; 982 goto resume;
963 } 983 }
964 spin_unlock(&dcache_lock); 984 spin_unlock(&dcache_lock);
965 return 0; /* No mount points found in tree */ 985 return 0; /* No mount points found in tree */
966 positive: 986 positive:
967 spin_unlock(&dcache_lock); 987 spin_unlock(&dcache_lock);
968 return 1; 988 return 1;
969 } 989 }
970 EXPORT_SYMBOL(have_submounts); 990 EXPORT_SYMBOL(have_submounts);
971 991
972 /* 992 /*
973 * Search the dentry child list for the specified parent, 993 * Search the dentry child list for the specified parent,
974 * and move any unused dentries to the end of the unused 994 * and move any unused dentries to the end of the unused
975 * list for prune_dcache(). We descend to the next level 995 * list for prune_dcache(). We descend to the next level
976 * whenever the d_subdirs list is non-empty and continue 996 * whenever the d_subdirs list is non-empty and continue
977 * searching. 997 * searching.
978 * 998 *
979 * It returns zero iff there are no unused children, 999 * It returns zero iff there are no unused children,
980 * otherwise it returns the number of children moved to 1000 * otherwise it returns the number of children moved to
981 * the end of the unused list. This may not be the total 1001 * the end of the unused list. This may not be the total
982 * number of unused children, because select_parent can 1002 * number of unused children, because select_parent can
983 * drop the lock and return early due to latency 1003 * drop the lock and return early due to latency
984 * constraints. 1004 * constraints.
985 */ 1005 */
986 static int select_parent(struct dentry * parent) 1006 static int select_parent(struct dentry * parent)
987 { 1007 {
988 struct dentry *this_parent = parent; 1008 struct dentry *this_parent = parent;
989 struct list_head *next; 1009 struct list_head *next;
990 int found = 0; 1010 int found = 0;
991 1011
992 spin_lock(&dcache_lock); 1012 spin_lock(&dcache_lock);
993 repeat: 1013 repeat:
994 next = this_parent->d_subdirs.next; 1014 next = this_parent->d_subdirs.next;
995 resume: 1015 resume:
996 while (next != &this_parent->d_subdirs) { 1016 while (next != &this_parent->d_subdirs) {
997 struct list_head *tmp = next; 1017 struct list_head *tmp = next;
998 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1018 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
999 next = tmp->next; 1019 next = tmp->next;
1000 1020
1001 spin_lock(&dentry->d_lock); 1021 spin_lock(&dentry->d_lock);
1002 1022
1003 /* 1023 /*
1004 * move only zero ref count dentries to the end 1024 * move only zero ref count dentries to the end
1005 * of the unused list for prune_dcache 1025 * of the unused list for prune_dcache
1006 */ 1026 */
1007 if (!dentry->d_count) { 1027 if (!dentry->d_count) {
1008 dentry_lru_move_tail(dentry); 1028 dentry_lru_move_tail(dentry);
1009 found++; 1029 found++;
1010 } else { 1030 } else {
1011 dentry_lru_del(dentry); 1031 dentry_lru_del(dentry);
1012 } 1032 }
1013 1033
1014 spin_unlock(&dentry->d_lock); 1034 spin_unlock(&dentry->d_lock);
1015 1035
1016 /* 1036 /*
1017 * We can return to the caller if we have found some (this 1037 * We can return to the caller if we have found some (this
1018 * ensures forward progress). We'll be coming back to find 1038 * ensures forward progress). We'll be coming back to find
1019 * the rest. 1039 * the rest.
1020 */ 1040 */
1021 if (found && need_resched()) 1041 if (found && need_resched())
1022 goto out; 1042 goto out;
1023 1043
1024 /* 1044 /*
1025 * Descend a level if the d_subdirs list is non-empty. 1045 * Descend a level if the d_subdirs list is non-empty.
1026 */ 1046 */
1027 if (!list_empty(&dentry->d_subdirs)) { 1047 if (!list_empty(&dentry->d_subdirs)) {
1028 this_parent = dentry; 1048 this_parent = dentry;
1029 goto repeat; 1049 goto repeat;
1030 } 1050 }
1031 } 1051 }
1032 /* 1052 /*
1033 * All done at this level ... ascend and resume the search. 1053 * All done at this level ... ascend and resume the search.
1034 */ 1054 */
1035 if (this_parent != parent) { 1055 if (this_parent != parent) {
1036 next = this_parent->d_u.d_child.next; 1056 next = this_parent->d_u.d_child.next;
1037 this_parent = this_parent->d_parent; 1057 this_parent = this_parent->d_parent;
1038 goto resume; 1058 goto resume;
1039 } 1059 }
1040 out: 1060 out:
1041 spin_unlock(&dcache_lock); 1061 spin_unlock(&dcache_lock);
1042 return found; 1062 return found;
1043 } 1063 }
1044 1064
1045 /** 1065 /**
1046 * shrink_dcache_parent - prune dcache 1066 * shrink_dcache_parent - prune dcache
1047 * @parent: parent of entries to prune 1067 * @parent: parent of entries to prune
1048 * 1068 *
1049 * Prune the dcache to remove unused children of the parent dentry. 1069 * Prune the dcache to remove unused children of the parent dentry.
1050 */ 1070 */
1051 1071
1052 void shrink_dcache_parent(struct dentry * parent) 1072 void shrink_dcache_parent(struct dentry * parent)
1053 { 1073 {
1054 struct super_block *sb = parent->d_sb; 1074 struct super_block *sb = parent->d_sb;
1055 int found; 1075 int found;
1056 1076
1057 while ((found = select_parent(parent)) != 0) 1077 while ((found = select_parent(parent)) != 0)
1058 __shrink_dcache_sb(sb, &found, 0); 1078 __shrink_dcache_sb(sb, &found, 0);
1059 } 1079 }
1060 EXPORT_SYMBOL(shrink_dcache_parent); 1080 EXPORT_SYMBOL(shrink_dcache_parent);
1061 1081
1062 /* 1082 /*
1063 * Scan `nr' dentries and return the number which remain. 1083 * Scan `nr' dentries and return the number which remain.
1064 * 1084 *
1065 * We need to avoid reentering the filesystem if the caller is performing a 1085 * We need to avoid reentering the filesystem if the caller is performing a
1066 * GFP_NOFS allocation attempt. One example deadlock is: 1086 * GFP_NOFS allocation attempt. One example deadlock is:
1067 * 1087 *
1068 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> 1088 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
1069 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode-> 1089 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
1070 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK. 1090 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
1071 * 1091 *
1072 * In this case we return -1 to tell the caller that we baled. 1092 * In this case we return -1 to tell the caller that we baled.
1073 */ 1093 */
1074 static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 1094 static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
1075 { 1095 {
1076 if (nr) { 1096 if (nr) {
1077 if (!(gfp_mask & __GFP_FS)) 1097 if (!(gfp_mask & __GFP_FS))
1078 return -1; 1098 return -1;
1079 prune_dcache(nr); 1099 prune_dcache(nr);
1080 } 1100 }
1081 1101
1082 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 1102 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
1083 } 1103 }
1084 1104
1085 static struct shrinker dcache_shrinker = { 1105 static struct shrinker dcache_shrinker = {
1086 .shrink = shrink_dcache_memory, 1106 .shrink = shrink_dcache_memory,
1087 .seeks = DEFAULT_SEEKS, 1107 .seeks = DEFAULT_SEEKS,
1088 }; 1108 };
1089 1109
1090 /** 1110 /**
1091 * d_alloc - allocate a dcache entry 1111 * d_alloc - allocate a dcache entry
1092 * @parent: parent of entry to allocate 1112 * @parent: parent of entry to allocate
1093 * @name: qstr of the name 1113 * @name: qstr of the name
1094 * 1114 *
1095 * Allocates a dentry. It returns %NULL if there is insufficient memory 1115 * Allocates a dentry. It returns %NULL if there is insufficient memory
1096 * available. On a success the dentry is returned. The name passed in is 1116 * available. On a success the dentry is returned. The name passed in is
1097 * copied and the copy passed in may be reused after this call. 1117 * copied and the copy passed in may be reused after this call.
1098 */ 1118 */
1099 1119
1100 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) 1120 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1101 { 1121 {
1102 struct dentry *dentry; 1122 struct dentry *dentry;
1103 char *dname; 1123 char *dname;
1104 1124
1105 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 1125 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1106 if (!dentry) 1126 if (!dentry)
1107 return NULL; 1127 return NULL;
1108 1128
1109 if (name->len > DNAME_INLINE_LEN-1) { 1129 if (name->len > DNAME_INLINE_LEN-1) {
1110 dname = kmalloc(name->len + 1, GFP_KERNEL); 1130 dname = kmalloc(name->len + 1, GFP_KERNEL);
1111 if (!dname) { 1131 if (!dname) {
1112 kmem_cache_free(dentry_cache, dentry); 1132 kmem_cache_free(dentry_cache, dentry);
1113 return NULL; 1133 return NULL;
1114 } 1134 }
1115 } else { 1135 } else {
1116 dname = dentry->d_iname; 1136 dname = dentry->d_iname;
1117 } 1137 }
1118 dentry->d_name.name = dname; 1138 dentry->d_name.name = dname;
1119 1139
1120 dentry->d_name.len = name->len; 1140 dentry->d_name.len = name->len;
1121 dentry->d_name.hash = name->hash; 1141 dentry->d_name.hash = name->hash;
1122 memcpy(dname, name->name, name->len); 1142 memcpy(dname, name->name, name->len);
1123 dname[name->len] = 0; 1143 dname[name->len] = 0;
1124 1144
1125 dentry->d_count = 1; 1145 dentry->d_count = 1;
1126 dentry->d_flags = DCACHE_UNHASHED; 1146 dentry->d_flags = DCACHE_UNHASHED;
1127 spin_lock_init(&dentry->d_lock); 1147 spin_lock_init(&dentry->d_lock);
1128 dentry->d_inode = NULL; 1148 dentry->d_inode = NULL;
1129 dentry->d_parent = NULL; 1149 dentry->d_parent = NULL;
1130 dentry->d_sb = NULL; 1150 dentry->d_sb = NULL;
1131 dentry->d_op = NULL; 1151 dentry->d_op = NULL;
1132 dentry->d_fsdata = NULL; 1152 dentry->d_fsdata = NULL;
1133 dentry->d_mounted = 0; 1153 dentry->d_mounted = 0;
1134 INIT_HLIST_NODE(&dentry->d_hash); 1154 INIT_HLIST_NODE(&dentry->d_hash);
1135 INIT_LIST_HEAD(&dentry->d_lru); 1155 INIT_LIST_HEAD(&dentry->d_lru);
1136 INIT_LIST_HEAD(&dentry->d_subdirs); 1156 INIT_LIST_HEAD(&dentry->d_subdirs);
1137 INIT_LIST_HEAD(&dentry->d_alias); 1157 INIT_LIST_HEAD(&dentry->d_alias);
1138 1158
1139 if (parent) { 1159 if (parent) {
1140 dentry->d_parent = dget(parent); 1160 dentry->d_parent = dget(parent);
1141 dentry->d_sb = parent->d_sb; 1161 dentry->d_sb = parent->d_sb;
1142 } else { 1162 } else {
1143 INIT_LIST_HEAD(&dentry->d_u.d_child); 1163 INIT_LIST_HEAD(&dentry->d_u.d_child);
1144 } 1164 }
1145 1165
1146 spin_lock(&dcache_lock); 1166 spin_lock(&dcache_lock);
1147 if (parent) 1167 if (parent)
1148 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 1168 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1149 spin_unlock(&dcache_lock); 1169 spin_unlock(&dcache_lock);
1150 1170
1151 this_cpu_inc(nr_dentry); 1171 this_cpu_inc(nr_dentry);
1152 1172
1153 return dentry; 1173 return dentry;
1154 } 1174 }
1155 EXPORT_SYMBOL(d_alloc); 1175 EXPORT_SYMBOL(d_alloc);
1156 1176
1157 struct dentry *d_alloc_name(struct dentry *parent, const char *name) 1177 struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1158 { 1178 {
1159 struct qstr q; 1179 struct qstr q;
1160 1180
1161 q.name = name; 1181 q.name = name;
1162 q.len = strlen(name); 1182 q.len = strlen(name);
1163 q.hash = full_name_hash(q.name, q.len); 1183 q.hash = full_name_hash(q.name, q.len);
1164 return d_alloc(parent, &q); 1184 return d_alloc(parent, &q);
1165 } 1185 }
1166 EXPORT_SYMBOL(d_alloc_name); 1186 EXPORT_SYMBOL(d_alloc_name);
1167 1187
1168 /* the caller must hold dcache_lock */ 1188 /* the caller must hold dcache_lock */
1169 static void __d_instantiate(struct dentry *dentry, struct inode *inode) 1189 static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1170 { 1190 {
1171 if (inode) 1191 if (inode)
1172 list_add(&dentry->d_alias, &inode->i_dentry); 1192 list_add(&dentry->d_alias, &inode->i_dentry);
1173 dentry->d_inode = inode; 1193 dentry->d_inode = inode;
1174 fsnotify_d_instantiate(dentry, inode); 1194 fsnotify_d_instantiate(dentry, inode);
1175 } 1195 }
1176 1196
1177 /** 1197 /**
1178 * d_instantiate - fill in inode information for a dentry 1198 * d_instantiate - fill in inode information for a dentry
1179 * @entry: dentry to complete 1199 * @entry: dentry to complete
1180 * @inode: inode to attach to this dentry 1200 * @inode: inode to attach to this dentry
1181 * 1201 *
1182 * Fill in inode information in the entry. 1202 * Fill in inode information in the entry.
1183 * 1203 *
1184 * This turns negative dentries into productive full members 1204 * This turns negative dentries into productive full members
1185 * of society. 1205 * of society.
1186 * 1206 *
1187 * NOTE! This assumes that the inode count has been incremented 1207 * NOTE! This assumes that the inode count has been incremented
1188 * (or otherwise set) by the caller to indicate that it is now 1208 * (or otherwise set) by the caller to indicate that it is now
1189 * in use by the dcache. 1209 * in use by the dcache.
1190 */ 1210 */
1191 1211
1192 void d_instantiate(struct dentry *entry, struct inode * inode) 1212 void d_instantiate(struct dentry *entry, struct inode * inode)
1193 { 1213 {
1194 BUG_ON(!list_empty(&entry->d_alias)); 1214 BUG_ON(!list_empty(&entry->d_alias));
1195 spin_lock(&dcache_lock); 1215 spin_lock(&dcache_lock);
1196 __d_instantiate(entry, inode); 1216 __d_instantiate(entry, inode);
1197 spin_unlock(&dcache_lock); 1217 spin_unlock(&dcache_lock);
1198 security_d_instantiate(entry, inode); 1218 security_d_instantiate(entry, inode);
1199 } 1219 }
1200 EXPORT_SYMBOL(d_instantiate); 1220 EXPORT_SYMBOL(d_instantiate);
1201 1221
1202 /** 1222 /**
1203 * d_instantiate_unique - instantiate a non-aliased dentry 1223 * d_instantiate_unique - instantiate a non-aliased dentry
1204 * @entry: dentry to instantiate 1224 * @entry: dentry to instantiate
1205 * @inode: inode to attach to this dentry 1225 * @inode: inode to attach to this dentry
1206 * 1226 *
1207 * Fill in inode information in the entry. On success, it returns NULL. 1227 * Fill in inode information in the entry. On success, it returns NULL.
1208 * If an unhashed alias of "entry" already exists, then we return the 1228 * If an unhashed alias of "entry" already exists, then we return the
1209 * aliased dentry instead and drop one reference to inode. 1229 * aliased dentry instead and drop one reference to inode.
1210 * 1230 *
1211 * Note that in order to avoid conflicts with rename() etc, the caller 1231 * Note that in order to avoid conflicts with rename() etc, the caller
1212 * had better be holding the parent directory semaphore. 1232 * had better be holding the parent directory semaphore.
1213 * 1233 *
1214 * This also assumes that the inode count has been incremented 1234 * This also assumes that the inode count has been incremented
1215 * (or otherwise set) by the caller to indicate that it is now 1235 * (or otherwise set) by the caller to indicate that it is now
1216 * in use by the dcache. 1236 * in use by the dcache.
1217 */ 1237 */
1218 static struct dentry *__d_instantiate_unique(struct dentry *entry, 1238 static struct dentry *__d_instantiate_unique(struct dentry *entry,
1219 struct inode *inode) 1239 struct inode *inode)
1220 { 1240 {
1221 struct dentry *alias; 1241 struct dentry *alias;
1222 int len = entry->d_name.len; 1242 int len = entry->d_name.len;
1223 const char *name = entry->d_name.name; 1243 const char *name = entry->d_name.name;
1224 unsigned int hash = entry->d_name.hash; 1244 unsigned int hash = entry->d_name.hash;
1225 1245
1226 if (!inode) { 1246 if (!inode) {
1227 __d_instantiate(entry, NULL); 1247 __d_instantiate(entry, NULL);
1228 return NULL; 1248 return NULL;
1229 } 1249 }
1230 1250
1231 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 1251 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1232 struct qstr *qstr = &alias->d_name; 1252 struct qstr *qstr = &alias->d_name;
1233 1253
1234 if (qstr->hash != hash) 1254 if (qstr->hash != hash)
1235 continue; 1255 continue;
1236 if (alias->d_parent != entry->d_parent) 1256 if (alias->d_parent != entry->d_parent)
1237 continue; 1257 continue;
1238 if (qstr->len != len) 1258 if (qstr->len != len)
1239 continue; 1259 continue;
1240 if (memcmp(qstr->name, name, len)) 1260 if (memcmp(qstr->name, name, len))
1241 continue; 1261 continue;
1242 dget_locked(alias); 1262 dget_locked(alias);
1243 return alias; 1263 return alias;
1244 } 1264 }
1245 1265
1246 __d_instantiate(entry, inode); 1266 __d_instantiate(entry, inode);
1247 return NULL; 1267 return NULL;
1248 } 1268 }
1249 1269
1250 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) 1270 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1251 { 1271 {
1252 struct dentry *result; 1272 struct dentry *result;
1253 1273
1254 BUG_ON(!list_empty(&entry->d_alias)); 1274 BUG_ON(!list_empty(&entry->d_alias));
1255 1275
1256 spin_lock(&dcache_lock); 1276 spin_lock(&dcache_lock);
1257 result = __d_instantiate_unique(entry, inode); 1277 result = __d_instantiate_unique(entry, inode);
1258 spin_unlock(&dcache_lock); 1278 spin_unlock(&dcache_lock);
1259 1279
1260 if (!result) { 1280 if (!result) {
1261 security_d_instantiate(entry, inode); 1281 security_d_instantiate(entry, inode);
1262 return NULL; 1282 return NULL;
1263 } 1283 }
1264 1284
1265 BUG_ON(!d_unhashed(result)); 1285 BUG_ON(!d_unhashed(result));
1266 iput(inode); 1286 iput(inode);
1267 return result; 1287 return result;
1268 } 1288 }
1269 1289
1270 EXPORT_SYMBOL(d_instantiate_unique); 1290 EXPORT_SYMBOL(d_instantiate_unique);
1271 1291
1272 /** 1292 /**
1273 * d_alloc_root - allocate root dentry 1293 * d_alloc_root - allocate root dentry
1274 * @root_inode: inode to allocate the root for 1294 * @root_inode: inode to allocate the root for
1275 * 1295 *
1276 * Allocate a root ("/") dentry for the inode given. The inode is 1296 * Allocate a root ("/") dentry for the inode given. The inode is
1277 * instantiated and returned. %NULL is returned if there is insufficient 1297 * instantiated and returned. %NULL is returned if there is insufficient
1278 * memory or the inode passed is %NULL. 1298 * memory or the inode passed is %NULL.
1279 */ 1299 */
1280 1300
1281 struct dentry * d_alloc_root(struct inode * root_inode) 1301 struct dentry * d_alloc_root(struct inode * root_inode)
1282 { 1302 {
1283 struct dentry *res = NULL; 1303 struct dentry *res = NULL;
1284 1304
1285 if (root_inode) { 1305 if (root_inode) {
1286 static const struct qstr name = { .name = "/", .len = 1 }; 1306 static const struct qstr name = { .name = "/", .len = 1 };
1287 1307
1288 res = d_alloc(NULL, &name); 1308 res = d_alloc(NULL, &name);
1289 if (res) { 1309 if (res) {
1290 res->d_sb = root_inode->i_sb; 1310 res->d_sb = root_inode->i_sb;
1291 res->d_parent = res; 1311 res->d_parent = res;
1292 d_instantiate(res, root_inode); 1312 d_instantiate(res, root_inode);
1293 } 1313 }
1294 } 1314 }
1295 return res; 1315 return res;
1296 } 1316 }
1297 EXPORT_SYMBOL(d_alloc_root); 1317 EXPORT_SYMBOL(d_alloc_root);
1298 1318
1299 static inline struct hlist_head *d_hash(struct dentry *parent, 1319 static inline struct hlist_head *d_hash(struct dentry *parent,
1300 unsigned long hash) 1320 unsigned long hash)
1301 { 1321 {
1302 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 1322 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
1303 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); 1323 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
1304 return dentry_hashtable + (hash & D_HASHMASK); 1324 return dentry_hashtable + (hash & D_HASHMASK);
1305 } 1325 }
1306 1326
1307 /** 1327 /**
1308 * d_obtain_alias - find or allocate a dentry for a given inode 1328 * d_obtain_alias - find or allocate a dentry for a given inode
1309 * @inode: inode to allocate the dentry for 1329 * @inode: inode to allocate the dentry for
1310 * 1330 *
1311 * Obtain a dentry for an inode resulting from NFS filehandle conversion or 1331 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1312 * similar open by handle operations. The returned dentry may be anonymous, 1332 * similar open by handle operations. The returned dentry may be anonymous,
1313 * or may have a full name (if the inode was already in the cache). 1333 * or may have a full name (if the inode was already in the cache).
1314 * 1334 *
1315 * When called on a directory inode, we must ensure that the inode only ever 1335 * When called on a directory inode, we must ensure that the inode only ever
1316 * has one dentry. If a dentry is found, that is returned instead of 1336 * has one dentry. If a dentry is found, that is returned instead of
1317 * allocating a new one. 1337 * allocating a new one.
1318 * 1338 *
1319 * On successful return, the reference to the inode has been transferred 1339 * On successful return, the reference to the inode has been transferred
1320 * to the dentry. In case of an error the reference on the inode is released. 1340 * to the dentry. In case of an error the reference on the inode is released.
1321 * To make it easier to use in export operations a %NULL or IS_ERR inode may 1341 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1322 * be passed in and will be the error will be propagate to the return value, 1342 * be passed in and will be the error will be propagate to the return value,
1323 * with a %NULL @inode replaced by ERR_PTR(-ESTALE). 1343 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1324 */ 1344 */
1325 struct dentry *d_obtain_alias(struct inode *inode) 1345 struct dentry *d_obtain_alias(struct inode *inode)
1326 { 1346 {
1327 static const struct qstr anonstring = { .name = "" }; 1347 static const struct qstr anonstring = { .name = "" };
1328 struct dentry *tmp; 1348 struct dentry *tmp;
1329 struct dentry *res; 1349 struct dentry *res;
1330 1350
1331 if (!inode) 1351 if (!inode)
1332 return ERR_PTR(-ESTALE); 1352 return ERR_PTR(-ESTALE);
1333 if (IS_ERR(inode)) 1353 if (IS_ERR(inode))
1334 return ERR_CAST(inode); 1354 return ERR_CAST(inode);
1335 1355
1336 res = d_find_alias(inode); 1356 res = d_find_alias(inode);
1337 if (res) 1357 if (res)
1338 goto out_iput; 1358 goto out_iput;
1339 1359
1340 tmp = d_alloc(NULL, &anonstring); 1360 tmp = d_alloc(NULL, &anonstring);
1341 if (!tmp) { 1361 if (!tmp) {
1342 res = ERR_PTR(-ENOMEM); 1362 res = ERR_PTR(-ENOMEM);
1343 goto out_iput; 1363 goto out_iput;
1344 } 1364 }
1345 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 1365 tmp->d_parent = tmp; /* make sure dput doesn't croak */
1346 1366
1347 spin_lock(&dcache_lock); 1367 spin_lock(&dcache_lock);
1348 res = __d_find_alias(inode, 0); 1368 res = __d_find_alias(inode, 0);
1349 if (res) { 1369 if (res) {
1350 spin_unlock(&dcache_lock); 1370 spin_unlock(&dcache_lock);
1351 dput(tmp); 1371 dput(tmp);
1352 goto out_iput; 1372 goto out_iput;
1353 } 1373 }
1354 1374
1355 /* attach a disconnected dentry */ 1375 /* attach a disconnected dentry */
1356 spin_lock(&tmp->d_lock); 1376 spin_lock(&tmp->d_lock);
1357 tmp->d_sb = inode->i_sb; 1377 tmp->d_sb = inode->i_sb;
1358 tmp->d_inode = inode; 1378 tmp->d_inode = inode;
1359 tmp->d_flags |= DCACHE_DISCONNECTED; 1379 tmp->d_flags |= DCACHE_DISCONNECTED;
1360 tmp->d_flags &= ~DCACHE_UNHASHED; 1380 tmp->d_flags &= ~DCACHE_UNHASHED;
1361 list_add(&tmp->d_alias, &inode->i_dentry); 1381 list_add(&tmp->d_alias, &inode->i_dentry);
1362 spin_lock(&dcache_hash_lock); 1382 spin_lock(&dcache_hash_lock);
1363 hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); 1383 hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
1364 spin_unlock(&dcache_hash_lock); 1384 spin_unlock(&dcache_hash_lock);
1365 spin_unlock(&tmp->d_lock); 1385 spin_unlock(&tmp->d_lock);
1366 1386
1367 spin_unlock(&dcache_lock); 1387 spin_unlock(&dcache_lock);
1368 return tmp; 1388 return tmp;
1369 1389
1370 out_iput: 1390 out_iput:
1371 iput(inode); 1391 iput(inode);
1372 return res; 1392 return res;
1373 } 1393 }
1374 EXPORT_SYMBOL(d_obtain_alias); 1394 EXPORT_SYMBOL(d_obtain_alias);
1375 1395
1376 /** 1396 /**
1377 * d_splice_alias - splice a disconnected dentry into the tree if one exists 1397 * d_splice_alias - splice a disconnected dentry into the tree if one exists
1378 * @inode: the inode which may have a disconnected dentry 1398 * @inode: the inode which may have a disconnected dentry
1379 * @dentry: a negative dentry which we want to point to the inode. 1399 * @dentry: a negative dentry which we want to point to the inode.
1380 * 1400 *
1381 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and 1401 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1382 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry 1402 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1383 * and return it, else simply d_add the inode to the dentry and return NULL. 1403 * and return it, else simply d_add the inode to the dentry and return NULL.
1384 * 1404 *
1385 * This is needed in the lookup routine of any filesystem that is exportable 1405 * This is needed in the lookup routine of any filesystem that is exportable
1386 * (via knfsd) so that we can build dcache paths to directories effectively. 1406 * (via knfsd) so that we can build dcache paths to directories effectively.
1387 * 1407 *
1388 * If a dentry was found and moved, then it is returned. Otherwise NULL 1408 * If a dentry was found and moved, then it is returned. Otherwise NULL
1389 * is returned. This matches the expected return value of ->lookup. 1409 * is returned. This matches the expected return value of ->lookup.
1390 * 1410 *
1391 */ 1411 */
1392 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) 1412 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1393 { 1413 {
1394 struct dentry *new = NULL; 1414 struct dentry *new = NULL;
1395 1415
1396 if (inode && S_ISDIR(inode->i_mode)) { 1416 if (inode && S_ISDIR(inode->i_mode)) {
1397 spin_lock(&dcache_lock); 1417 spin_lock(&dcache_lock);
1398 new = __d_find_alias(inode, 1); 1418 new = __d_find_alias(inode, 1);
1399 if (new) { 1419 if (new) {
1400 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1420 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1401 spin_unlock(&dcache_lock); 1421 spin_unlock(&dcache_lock);
1402 security_d_instantiate(new, inode); 1422 security_d_instantiate(new, inode);
1403 d_move(new, dentry); 1423 d_move(new, dentry);
1404 iput(inode); 1424 iput(inode);
1405 } else { 1425 } else {
1406 /* already taking dcache_lock, so d_add() by hand */ 1426 /* already taking dcache_lock, so d_add() by hand */
1407 __d_instantiate(dentry, inode); 1427 __d_instantiate(dentry, inode);
1408 spin_unlock(&dcache_lock); 1428 spin_unlock(&dcache_lock);
1409 security_d_instantiate(dentry, inode); 1429 security_d_instantiate(dentry, inode);
1410 d_rehash(dentry); 1430 d_rehash(dentry);
1411 } 1431 }
1412 } else 1432 } else
1413 d_add(dentry, inode); 1433 d_add(dentry, inode);
1414 return new; 1434 return new;
1415 } 1435 }
1416 EXPORT_SYMBOL(d_splice_alias); 1436 EXPORT_SYMBOL(d_splice_alias);
1417 1437
1418 /** 1438 /**
1419 * d_add_ci - lookup or allocate new dentry with case-exact name 1439 * d_add_ci - lookup or allocate new dentry with case-exact name
1420 * @inode: the inode case-insensitive lookup has found 1440 * @inode: the inode case-insensitive lookup has found
1421 * @dentry: the negative dentry that was passed to the parent's lookup func 1441 * @dentry: the negative dentry that was passed to the parent's lookup func
1422 * @name: the case-exact name to be associated with the returned dentry 1442 * @name: the case-exact name to be associated with the returned dentry
1423 * 1443 *
1424 * This is to avoid filling the dcache with case-insensitive names to the 1444 * This is to avoid filling the dcache with case-insensitive names to the
1425 * same inode, only the actual correct case is stored in the dcache for 1445 * same inode, only the actual correct case is stored in the dcache for
1426 * case-insensitive filesystems. 1446 * case-insensitive filesystems.
1427 * 1447 *
1428 * For a case-insensitive lookup match and if the the case-exact dentry 1448 * For a case-insensitive lookup match and if the the case-exact dentry
1429 * already exists in in the dcache, use it and return it. 1449 * already exists in in the dcache, use it and return it.
1430 * 1450 *
1431 * If no entry exists with the exact case name, allocate new dentry with 1451 * If no entry exists with the exact case name, allocate new dentry with
1432 * the exact case, and return the spliced entry. 1452 * the exact case, and return the spliced entry.
1433 */ 1453 */
1434 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, 1454 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1435 struct qstr *name) 1455 struct qstr *name)
1436 { 1456 {
1437 int error; 1457 int error;
1438 struct dentry *found; 1458 struct dentry *found;
1439 struct dentry *new; 1459 struct dentry *new;
1440 1460
1441 /* 1461 /*
1442 * First check if a dentry matching the name already exists, 1462 * First check if a dentry matching the name already exists,
1443 * if not go ahead and create it now. 1463 * if not go ahead and create it now.
1444 */ 1464 */
1445 found = d_hash_and_lookup(dentry->d_parent, name); 1465 found = d_hash_and_lookup(dentry->d_parent, name);
1446 if (!found) { 1466 if (!found) {
1447 new = d_alloc(dentry->d_parent, name); 1467 new = d_alloc(dentry->d_parent, name);
1448 if (!new) { 1468 if (!new) {
1449 error = -ENOMEM; 1469 error = -ENOMEM;
1450 goto err_out; 1470 goto err_out;
1451 } 1471 }
1452 1472
1453 found = d_splice_alias(inode, new); 1473 found = d_splice_alias(inode, new);
1454 if (found) { 1474 if (found) {
1455 dput(new); 1475 dput(new);
1456 return found; 1476 return found;
1457 } 1477 }
1458 return new; 1478 return new;
1459 } 1479 }
1460 1480
1461 /* 1481 /*
1462 * If a matching dentry exists, and it's not negative use it. 1482 * If a matching dentry exists, and it's not negative use it.
1463 * 1483 *
1464 * Decrement the reference count to balance the iget() done 1484 * Decrement the reference count to balance the iget() done
1465 * earlier on. 1485 * earlier on.
1466 */ 1486 */
1467 if (found->d_inode) { 1487 if (found->d_inode) {
1468 if (unlikely(found->d_inode != inode)) { 1488 if (unlikely(found->d_inode != inode)) {
1469 /* This can't happen because bad inodes are unhashed. */ 1489 /* This can't happen because bad inodes are unhashed. */
1470 BUG_ON(!is_bad_inode(inode)); 1490 BUG_ON(!is_bad_inode(inode));
1471 BUG_ON(!is_bad_inode(found->d_inode)); 1491 BUG_ON(!is_bad_inode(found->d_inode));
1472 } 1492 }
1473 iput(inode); 1493 iput(inode);
1474 return found; 1494 return found;
1475 } 1495 }
1476 1496
1477 /* 1497 /*
1478 * Negative dentry: instantiate it unless the inode is a directory and 1498 * Negative dentry: instantiate it unless the inode is a directory and
1479 * already has a dentry. 1499 * already has a dentry.
1480 */ 1500 */
1481 spin_lock(&dcache_lock); 1501 spin_lock(&dcache_lock);
1482 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { 1502 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
1483 __d_instantiate(found, inode); 1503 __d_instantiate(found, inode);
1484 spin_unlock(&dcache_lock); 1504 spin_unlock(&dcache_lock);
1485 security_d_instantiate(found, inode); 1505 security_d_instantiate(found, inode);
1486 return found; 1506 return found;
1487 } 1507 }
1488 1508
1489 /* 1509 /*
1490 * In case a directory already has a (disconnected) entry grab a 1510 * In case a directory already has a (disconnected) entry grab a
1491 * reference to it, move it in place and use it. 1511 * reference to it, move it in place and use it.
1492 */ 1512 */
1493 new = list_entry(inode->i_dentry.next, struct dentry, d_alias); 1513 new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1494 dget_locked(new); 1514 dget_locked(new);
1495 spin_unlock(&dcache_lock); 1515 spin_unlock(&dcache_lock);
1496 security_d_instantiate(found, inode); 1516 security_d_instantiate(found, inode);
1497 d_move(new, found); 1517 d_move(new, found);
1498 iput(inode); 1518 iput(inode);
1499 dput(found); 1519 dput(found);
1500 return new; 1520 return new;
1501 1521
1502 err_out: 1522 err_out:
1503 iput(inode); 1523 iput(inode);
1504 return ERR_PTR(error); 1524 return ERR_PTR(error);
1505 } 1525 }
1506 EXPORT_SYMBOL(d_add_ci); 1526 EXPORT_SYMBOL(d_add_ci);
1507 1527
1508 /** 1528 /**
1509 * d_lookup - search for a dentry 1529 * d_lookup - search for a dentry
1510 * @parent: parent dentry 1530 * @parent: parent dentry
1511 * @name: qstr of name we wish to find 1531 * @name: qstr of name we wish to find
1512 * Returns: dentry, or NULL 1532 * Returns: dentry, or NULL
1513 * 1533 *
1514 * d_lookup searches the children of the parent dentry for the name in 1534 * d_lookup searches the children of the parent dentry for the name in
1515 * question. If the dentry is found its reference count is incremented and the 1535 * question. If the dentry is found its reference count is incremented and the
1516 * dentry is returned. The caller must use dput to free the entry when it has 1536 * dentry is returned. The caller must use dput to free the entry when it has
1517 * finished using it. %NULL is returned if the dentry does not exist. 1537 * finished using it. %NULL is returned if the dentry does not exist.
1518 */ 1538 */
1519 struct dentry * d_lookup(struct dentry * parent, struct qstr * name) 1539 struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1520 { 1540 {
1521 struct dentry * dentry = NULL; 1541 struct dentry * dentry = NULL;
1522 unsigned long seq; 1542 unsigned long seq;
1523 1543
1524 do { 1544 do {
1525 seq = read_seqbegin(&rename_lock); 1545 seq = read_seqbegin(&rename_lock);
1526 dentry = __d_lookup(parent, name); 1546 dentry = __d_lookup(parent, name);
1527 if (dentry) 1547 if (dentry)
1528 break; 1548 break;
1529 } while (read_seqretry(&rename_lock, seq)); 1549 } while (read_seqretry(&rename_lock, seq));
1530 return dentry; 1550 return dentry;
1531 } 1551 }
1532 EXPORT_SYMBOL(d_lookup); 1552 EXPORT_SYMBOL(d_lookup);
1533 1553
1534 /* 1554 /*
1535 * __d_lookup - search for a dentry (racy) 1555 * __d_lookup - search for a dentry (racy)
1536 * @parent: parent dentry 1556 * @parent: parent dentry
1537 * @name: qstr of name we wish to find 1557 * @name: qstr of name we wish to find
1538 * Returns: dentry, or NULL 1558 * Returns: dentry, or NULL
1539 * 1559 *
1540 * __d_lookup is like d_lookup, however it may (rarely) return a 1560 * __d_lookup is like d_lookup, however it may (rarely) return a
1541 * false-negative result due to unrelated rename activity. 1561 * false-negative result due to unrelated rename activity.
1542 * 1562 *
1543 * __d_lookup is slightly faster by avoiding rename_lock read seqlock, 1563 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1544 * however it must be used carefully, eg. with a following d_lookup in 1564 * however it must be used carefully, eg. with a following d_lookup in
1545 * the case of failure. 1565 * the case of failure.
1546 * 1566 *
1547 * __d_lookup callers must be commented. 1567 * __d_lookup callers must be commented.
1548 */ 1568 */
1549 struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1569 struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1550 { 1570 {
1551 unsigned int len = name->len; 1571 unsigned int len = name->len;
1552 unsigned int hash = name->hash; 1572 unsigned int hash = name->hash;
1553 const unsigned char *str = name->name; 1573 const unsigned char *str = name->name;
1554 struct hlist_head *head = d_hash(parent,hash); 1574 struct hlist_head *head = d_hash(parent,hash);
1555 struct dentry *found = NULL; 1575 struct dentry *found = NULL;
1556 struct hlist_node *node; 1576 struct hlist_node *node;
1557 struct dentry *dentry; 1577 struct dentry *dentry;
1558 1578
1559 /* 1579 /*
1560 * The hash list is protected using RCU. 1580 * The hash list is protected using RCU.
1561 * 1581 *
1562 * Take d_lock when comparing a candidate dentry, to avoid races 1582 * Take d_lock when comparing a candidate dentry, to avoid races
1563 * with d_move(). 1583 * with d_move().
1564 * 1584 *
1565 * It is possible that concurrent renames can mess up our list 1585 * It is possible that concurrent renames can mess up our list
1566 * walk here and result in missing our dentry, resulting in the 1586 * walk here and result in missing our dentry, resulting in the
1567 * false-negative result. d_lookup() protects against concurrent 1587 * false-negative result. d_lookup() protects against concurrent
1568 * renames using rename_lock seqlock. 1588 * renames using rename_lock seqlock.
1569 * 1589 *
1570 * See Documentation/vfs/dcache-locking.txt for more details. 1590 * See Documentation/vfs/dcache-locking.txt for more details.
1571 */ 1591 */
1572 rcu_read_lock(); 1592 rcu_read_lock();
1573 1593
1574 hlist_for_each_entry_rcu(dentry, node, head, d_hash) { 1594 hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
1575 struct qstr *qstr; 1595 struct qstr *qstr;
1576 1596
1577 if (dentry->d_name.hash != hash) 1597 if (dentry->d_name.hash != hash)
1578 continue; 1598 continue;
1579 if (dentry->d_parent != parent) 1599 if (dentry->d_parent != parent)
1580 continue; 1600 continue;
1581 1601
1582 spin_lock(&dentry->d_lock); 1602 spin_lock(&dentry->d_lock);
1583 1603
1584 /* 1604 /*
1585 * Recheck the dentry after taking the lock - d_move may have 1605 * Recheck the dentry after taking the lock - d_move may have
1586 * changed things. Don't bother checking the hash because 1606 * changed things. Don't bother checking the hash because
1587 * we're about to compare the whole name anyway. 1607 * we're about to compare the whole name anyway.
1588 */ 1608 */
1589 if (dentry->d_parent != parent) 1609 if (dentry->d_parent != parent)
1590 goto next; 1610 goto next;
1591 1611
1592 /* non-existing due to RCU? */ 1612 /* non-existing due to RCU? */
1593 if (d_unhashed(dentry)) 1613 if (d_unhashed(dentry))
1594 goto next; 1614 goto next;
1595 1615
1596 /* 1616 /*
1597 * It is safe to compare names since d_move() cannot 1617 * It is safe to compare names since d_move() cannot
1598 * change the qstr (protected by d_lock). 1618 * change the qstr (protected by d_lock).
1599 */ 1619 */
1600 qstr = &dentry->d_name; 1620 qstr = &dentry->d_name;
1601 if (parent->d_op && parent->d_op->d_compare) { 1621 if (parent->d_op && parent->d_op->d_compare) {
1602 if (parent->d_op->d_compare(parent, parent->d_inode, 1622 if (parent->d_op->d_compare(parent, parent->d_inode,
1603 dentry, dentry->d_inode, 1623 dentry, dentry->d_inode,
1604 qstr->len, qstr->name, name)) 1624 qstr->len, qstr->name, name))
1605 goto next; 1625 goto next;
1606 } else { 1626 } else {
1607 if (qstr->len != len) 1627 if (qstr->len != len)
1608 goto next; 1628 goto next;
1609 if (memcmp(qstr->name, str, len)) 1629 if (memcmp(qstr->name, str, len))
1610 goto next; 1630 goto next;
1611 } 1631 }
1612 1632
1613 dentry->d_count++; 1633 dentry->d_count++;
1614 found = dentry; 1634 found = dentry;
1615 spin_unlock(&dentry->d_lock); 1635 spin_unlock(&dentry->d_lock);
1616 break; 1636 break;
1617 next: 1637 next:
1618 spin_unlock(&dentry->d_lock); 1638 spin_unlock(&dentry->d_lock);
1619 } 1639 }
1620 rcu_read_unlock(); 1640 rcu_read_unlock();
1621 1641
1622 return found; 1642 return found;
1623 } 1643 }
1624 1644
1625 /** 1645 /**
1626 * d_hash_and_lookup - hash the qstr then search for a dentry 1646 * d_hash_and_lookup - hash the qstr then search for a dentry
1627 * @dir: Directory to search in 1647 * @dir: Directory to search in
1628 * @name: qstr of name we wish to find 1648 * @name: qstr of name we wish to find
1629 * 1649 *
1630 * On hash failure or on lookup failure NULL is returned. 1650 * On hash failure or on lookup failure NULL is returned.
1631 */ 1651 */
1632 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) 1652 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1633 { 1653 {
1634 struct dentry *dentry = NULL; 1654 struct dentry *dentry = NULL;
1635 1655
1636 /* 1656 /*
1637 * Check for a fs-specific hash function. Note that we must 1657 * Check for a fs-specific hash function. Note that we must
1638 * calculate the standard hash first, as the d_op->d_hash() 1658 * calculate the standard hash first, as the d_op->d_hash()
1639 * routine may choose to leave the hash value unchanged. 1659 * routine may choose to leave the hash value unchanged.
1640 */ 1660 */
1641 name->hash = full_name_hash(name->name, name->len); 1661 name->hash = full_name_hash(name->name, name->len);
1642 if (dir->d_op && dir->d_op->d_hash) { 1662 if (dir->d_op && dir->d_op->d_hash) {
1643 if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) 1663 if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
1644 goto out; 1664 goto out;
1645 } 1665 }
1646 dentry = d_lookup(dir, name); 1666 dentry = d_lookup(dir, name);
1647 out: 1667 out:
1648 return dentry; 1668 return dentry;
1649 } 1669 }
1650 1670
1651 /** 1671 /**
1652 * d_validate - verify dentry provided from insecure source (deprecated) 1672 * d_validate - verify dentry provided from insecure source (deprecated)
1653 * @dentry: The dentry alleged to be valid child of @dparent 1673 * @dentry: The dentry alleged to be valid child of @dparent
1654 * @dparent: The parent dentry (known to be valid) 1674 * @dparent: The parent dentry (known to be valid)
1655 * 1675 *
1656 * An insecure source has sent us a dentry, here we verify it and dget() it. 1676 * An insecure source has sent us a dentry, here we verify it and dget() it.
1657 * This is used by ncpfs in its readdir implementation. 1677 * This is used by ncpfs in its readdir implementation.
1658 * Zero is returned in the dentry is invalid. 1678 * Zero is returned in the dentry is invalid.
1659 * 1679 *
1660 * This function is slow for big directories, and deprecated, do not use it. 1680 * This function is slow for big directories, and deprecated, do not use it.
1661 */ 1681 */
1662 int d_validate(struct dentry *dentry, struct dentry *dparent) 1682 int d_validate(struct dentry *dentry, struct dentry *dparent)
1663 { 1683 {
1664 struct dentry *child; 1684 struct dentry *child;
1665 1685
1666 spin_lock(&dcache_lock); 1686 spin_lock(&dcache_lock);
1667 list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { 1687 list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
1668 if (dentry == child) { 1688 if (dentry == child) {
1669 __dget_locked(dentry); 1689 __dget_locked(dentry);
1670 spin_unlock(&dcache_lock); 1690 spin_unlock(&dcache_lock);
1671 return 1; 1691 return 1;
1672 } 1692 }
1673 } 1693 }
1674 spin_unlock(&dcache_lock); 1694 spin_unlock(&dcache_lock);
1675 1695
1676 return 0; 1696 return 0;
1677 } 1697 }
1678 EXPORT_SYMBOL(d_validate); 1698 EXPORT_SYMBOL(d_validate);
1679 1699
1680 /* 1700 /*
1681 * When a file is deleted, we have two options: 1701 * When a file is deleted, we have two options:
1682 * - turn this dentry into a negative dentry 1702 * - turn this dentry into a negative dentry
1683 * - unhash this dentry and free it. 1703 * - unhash this dentry and free it.
1684 * 1704 *
1685 * Usually, we want to just turn this into 1705 * Usually, we want to just turn this into
1686 * a negative dentry, but if anybody else is 1706 * a negative dentry, but if anybody else is
1687 * currently using the dentry or the inode 1707 * currently using the dentry or the inode
1688 * we can't do that and we fall back on removing 1708 * we can't do that and we fall back on removing
1689 * it from the hash queues and waiting for 1709 * it from the hash queues and waiting for
1690 * it to be deleted later when it has no users 1710 * it to be deleted later when it has no users
1691 */ 1711 */
1692 1712
1693 /** 1713 /**
1694 * d_delete - delete a dentry 1714 * d_delete - delete a dentry
1695 * @dentry: The dentry to delete 1715 * @dentry: The dentry to delete
1696 * 1716 *
1697 * Turn the dentry into a negative dentry if possible, otherwise 1717 * Turn the dentry into a negative dentry if possible, otherwise
1698 * remove it from the hash queues so it can be deleted later 1718 * remove it from the hash queues so it can be deleted later
1699 */ 1719 */
1700 1720
1701 void d_delete(struct dentry * dentry) 1721 void d_delete(struct dentry * dentry)
1702 { 1722 {
1703 int isdir = 0; 1723 int isdir = 0;
1704 /* 1724 /*
1705 * Are we the only user? 1725 * Are we the only user?
1706 */ 1726 */
1707 spin_lock(&dcache_lock); 1727 spin_lock(&dcache_lock);
1708 spin_lock(&dentry->d_lock); 1728 spin_lock(&dentry->d_lock);
1709 isdir = S_ISDIR(dentry->d_inode->i_mode); 1729 isdir = S_ISDIR(dentry->d_inode->i_mode);
1710 if (dentry->d_count == 1) { 1730 if (dentry->d_count == 1) {
1711 dentry->d_flags &= ~DCACHE_CANT_MOUNT; 1731 dentry->d_flags &= ~DCACHE_CANT_MOUNT;
1712 dentry_iput(dentry); 1732 dentry_iput(dentry);
1713 fsnotify_nameremove(dentry, isdir); 1733 fsnotify_nameremove(dentry, isdir);
1714 return; 1734 return;
1715 } 1735 }
1716 1736
1717 if (!d_unhashed(dentry)) 1737 if (!d_unhashed(dentry))
1718 __d_drop(dentry); 1738 __d_drop(dentry);
1719 1739
1720 spin_unlock(&dentry->d_lock); 1740 spin_unlock(&dentry->d_lock);
1721 spin_unlock(&dcache_lock); 1741 spin_unlock(&dcache_lock);
1722 1742
1723 fsnotify_nameremove(dentry, isdir); 1743 fsnotify_nameremove(dentry, isdir);
1724 } 1744 }
1725 EXPORT_SYMBOL(d_delete); 1745 EXPORT_SYMBOL(d_delete);
1726 1746
1727 static void __d_rehash(struct dentry * entry, struct hlist_head *list) 1747 static void __d_rehash(struct dentry * entry, struct hlist_head *list)
1728 { 1748 {
1729 1749
1730 entry->d_flags &= ~DCACHE_UNHASHED; 1750 entry->d_flags &= ~DCACHE_UNHASHED;
1731 hlist_add_head_rcu(&entry->d_hash, list); 1751 hlist_add_head_rcu(&entry->d_hash, list);
1732 } 1752 }
1733 1753
1734 static void _d_rehash(struct dentry * entry) 1754 static void _d_rehash(struct dentry * entry)
1735 { 1755 {
1736 __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash)); 1756 __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
1737 } 1757 }
1738 1758
1739 /** 1759 /**
1740 * d_rehash - add an entry back to the hash 1760 * d_rehash - add an entry back to the hash
1741 * @entry: dentry to add to the hash 1761 * @entry: dentry to add to the hash
1742 * 1762 *
1743 * Adds a dentry to the hash according to its name. 1763 * Adds a dentry to the hash according to its name.
1744 */ 1764 */
1745 1765
1746 void d_rehash(struct dentry * entry) 1766 void d_rehash(struct dentry * entry)
1747 { 1767 {
1748 spin_lock(&dcache_lock); 1768 spin_lock(&dcache_lock);
1749 spin_lock(&entry->d_lock); 1769 spin_lock(&entry->d_lock);
1750 spin_lock(&dcache_hash_lock); 1770 spin_lock(&dcache_hash_lock);
1751 _d_rehash(entry); 1771 _d_rehash(entry);
1752 spin_unlock(&dcache_hash_lock); 1772 spin_unlock(&dcache_hash_lock);
1753 spin_unlock(&entry->d_lock); 1773 spin_unlock(&entry->d_lock);
1754 spin_unlock(&dcache_lock); 1774 spin_unlock(&dcache_lock);
1755 } 1775 }
1756 EXPORT_SYMBOL(d_rehash); 1776 EXPORT_SYMBOL(d_rehash);
1757 1777
1758 /** 1778 /**
1759 * dentry_update_name_case - update case insensitive dentry with a new name 1779 * dentry_update_name_case - update case insensitive dentry with a new name
1760 * @dentry: dentry to be updated 1780 * @dentry: dentry to be updated
1761 * @name: new name 1781 * @name: new name
1762 * 1782 *
1763 * Update a case insensitive dentry with new case of name. 1783 * Update a case insensitive dentry with new case of name.
1764 * 1784 *
1765 * dentry must have been returned by d_lookup with name @name. Old and new 1785 * dentry must have been returned by d_lookup with name @name. Old and new
1766 * name lengths must match (ie. no d_compare which allows mismatched name 1786 * name lengths must match (ie. no d_compare which allows mismatched name
1767 * lengths). 1787 * lengths).
1768 * 1788 *
1769 * Parent inode i_mutex must be held over d_lookup and into this call (to 1789 * Parent inode i_mutex must be held over d_lookup and into this call (to
1770 * keep renames and concurrent inserts, and readdir(2) away). 1790 * keep renames and concurrent inserts, and readdir(2) away).
1771 */ 1791 */
1772 void dentry_update_name_case(struct dentry *dentry, struct qstr *name) 1792 void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
1773 { 1793 {
1774 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 1794 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
1775 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ 1795 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
1776 1796
1777 spin_lock(&dcache_lock); 1797 spin_lock(&dcache_lock);
1778 spin_lock(&dentry->d_lock); 1798 spin_lock(&dentry->d_lock);
1779 memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); 1799 memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
1780 spin_unlock(&dentry->d_lock); 1800 spin_unlock(&dentry->d_lock);
1781 spin_unlock(&dcache_lock); 1801 spin_unlock(&dcache_lock);
1782 } 1802 }
1783 EXPORT_SYMBOL(dentry_update_name_case); 1803 EXPORT_SYMBOL(dentry_update_name_case);
1784 1804
1785 /* 1805 /*
1786 * When switching names, the actual string doesn't strictly have to 1806 * When switching names, the actual string doesn't strictly have to
1787 * be preserved in the target - because we're dropping the target 1807 * be preserved in the target - because we're dropping the target
1788 * anyway. As such, we can just do a simple memcpy() to copy over 1808 * anyway. As such, we can just do a simple memcpy() to copy over
1789 * the new name before we switch. 1809 * the new name before we switch.
1790 * 1810 *
1791 * Note that we have to be a lot more careful about getting the hash 1811 * Note that we have to be a lot more careful about getting the hash
1792 * switched - we have to switch the hash value properly even if it 1812 * switched - we have to switch the hash value properly even if it
1793 * then no longer matches the actual (corrupted) string of the target. 1813 * then no longer matches the actual (corrupted) string of the target.
1794 * The hash value has to match the hash queue that the dentry is on.. 1814 * The hash value has to match the hash queue that the dentry is on..
1795 */ 1815 */
1796 static void switch_names(struct dentry *dentry, struct dentry *target) 1816 static void switch_names(struct dentry *dentry, struct dentry *target)
1797 { 1817 {
1798 if (dname_external(target)) { 1818 if (dname_external(target)) {
1799 if (dname_external(dentry)) { 1819 if (dname_external(dentry)) {
1800 /* 1820 /*
1801 * Both external: swap the pointers 1821 * Both external: swap the pointers
1802 */ 1822 */
1803 swap(target->d_name.name, dentry->d_name.name); 1823 swap(target->d_name.name, dentry->d_name.name);
1804 } else { 1824 } else {
1805 /* 1825 /*
1806 * dentry:internal, target:external. Steal target's 1826 * dentry:internal, target:external. Steal target's
1807 * storage and make target internal. 1827 * storage and make target internal.
1808 */ 1828 */
1809 memcpy(target->d_iname, dentry->d_name.name, 1829 memcpy(target->d_iname, dentry->d_name.name,
1810 dentry->d_name.len + 1); 1830 dentry->d_name.len + 1);
1811 dentry->d_name.name = target->d_name.name; 1831 dentry->d_name.name = target->d_name.name;
1812 target->d_name.name = target->d_iname; 1832 target->d_name.name = target->d_iname;
1813 } 1833 }
1814 } else { 1834 } else {
1815 if (dname_external(dentry)) { 1835 if (dname_external(dentry)) {
1816 /* 1836 /*
1817 * dentry:external, target:internal. Give dentry's 1837 * dentry:external, target:internal. Give dentry's
1818 * storage to target and make dentry internal 1838 * storage to target and make dentry internal
1819 */ 1839 */
1820 memcpy(dentry->d_iname, target->d_name.name, 1840 memcpy(dentry->d_iname, target->d_name.name,
1821 target->d_name.len + 1); 1841 target->d_name.len + 1);
1822 target->d_name.name = dentry->d_name.name; 1842 target->d_name.name = dentry->d_name.name;
1823 dentry->d_name.name = dentry->d_iname; 1843 dentry->d_name.name = dentry->d_iname;
1824 } else { 1844 } else {
1825 /* 1845 /*
1826 * Both are internal. Just copy target to dentry 1846 * Both are internal. Just copy target to dentry
1827 */ 1847 */
1828 memcpy(dentry->d_iname, target->d_name.name, 1848 memcpy(dentry->d_iname, target->d_name.name,
1829 target->d_name.len + 1); 1849 target->d_name.len + 1);
1830 dentry->d_name.len = target->d_name.len; 1850 dentry->d_name.len = target->d_name.len;
1831 return; 1851 return;
1832 } 1852 }
1833 } 1853 }
1834 swap(dentry->d_name.len, target->d_name.len); 1854 swap(dentry->d_name.len, target->d_name.len);
1835 } 1855 }
1836 1856
1837 /* 1857 /*
1838 * We cannibalize "target" when moving dentry on top of it, 1858 * We cannibalize "target" when moving dentry on top of it,
1839 * because it's going to be thrown away anyway. We could be more 1859 * because it's going to be thrown away anyway. We could be more
1840 * polite about it, though. 1860 * polite about it, though.
1841 * 1861 *
1842 * This forceful removal will result in ugly /proc output if 1862 * This forceful removal will result in ugly /proc output if
1843 * somebody holds a file open that got deleted due to a rename. 1863 * somebody holds a file open that got deleted due to a rename.
1844 * We could be nicer about the deleted file, and let it show 1864 * We could be nicer about the deleted file, and let it show
1845 * up under the name it had before it was deleted rather than 1865 * up under the name it had before it was deleted rather than
1846 * under the original name of the file that was moved on top of it. 1866 * under the original name of the file that was moved on top of it.
1847 */ 1867 */
1848 1868
1849 /* 1869 /*
1850 * d_move_locked - move a dentry 1870 * d_move_locked - move a dentry
1851 * @dentry: entry to move 1871 * @dentry: entry to move
1852 * @target: new dentry 1872 * @target: new dentry
1853 * 1873 *
1854 * Update the dcache to reflect the move of a file name. Negative 1874 * Update the dcache to reflect the move of a file name. Negative
1855 * dcache entries should not be moved in this way. 1875 * dcache entries should not be moved in this way.
1856 */ 1876 */
1857 static void d_move_locked(struct dentry * dentry, struct dentry * target) 1877 static void d_move_locked(struct dentry * dentry, struct dentry * target)
1858 { 1878 {
1859 if (!dentry->d_inode) 1879 if (!dentry->d_inode)
1860 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 1880 printk(KERN_WARNING "VFS: moving negative dcache entry\n");
1861 1881
1862 write_seqlock(&rename_lock); 1882 write_seqlock(&rename_lock);
1863 /* 1883 /*
1864 * XXXX: do we really need to take target->d_lock? 1884 * XXXX: do we really need to take target->d_lock?
1865 */ 1885 */
1866 if (target < dentry) { 1886 if (d_ancestor(dentry, target)) {
1887 spin_lock(&dentry->d_lock);
1888 spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
1889 } else if (d_ancestor(target, dentry) || target < dentry) {
1867 spin_lock(&target->d_lock); 1890 spin_lock(&target->d_lock);
1868 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1891 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1869 } else { 1892 } else {
1870 spin_lock(&dentry->d_lock); 1893 spin_lock(&dentry->d_lock);
1871 spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); 1894 spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
1872 } 1895 }
1873 1896
1874 /* Move the dentry to the target hash queue, if on different bucket */ 1897 /* Move the dentry to the target hash queue, if on different bucket */
1875 spin_lock(&dcache_hash_lock); 1898 spin_lock(&dcache_hash_lock);
1876 if (!d_unhashed(dentry)) 1899 if (!d_unhashed(dentry))
1877 hlist_del_rcu(&dentry->d_hash); 1900 hlist_del_rcu(&dentry->d_hash);
1878 __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); 1901 __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
1879 spin_unlock(&dcache_hash_lock); 1902 spin_unlock(&dcache_hash_lock);
1880 1903
1881 /* Unhash the target: dput() will then get rid of it */ 1904 /* Unhash the target: dput() will then get rid of it */
1882 __d_drop(target); 1905 __d_drop(target);
1883 1906
1884 list_del(&dentry->d_u.d_child); 1907 list_del(&dentry->d_u.d_child);
1885 list_del(&target->d_u.d_child); 1908 list_del(&target->d_u.d_child);
1886 1909
1887 /* Switch the names.. */ 1910 /* Switch the names.. */
1888 switch_names(dentry, target); 1911 switch_names(dentry, target);
1889 swap(dentry->d_name.hash, target->d_name.hash); 1912 swap(dentry->d_name.hash, target->d_name.hash);
1890 1913
1891 /* ... and switch the parents */ 1914 /* ... and switch the parents */
1892 if (IS_ROOT(dentry)) { 1915 if (IS_ROOT(dentry)) {
1893 dentry->d_parent = target->d_parent; 1916 dentry->d_parent = target->d_parent;
1894 target->d_parent = target; 1917 target->d_parent = target;
1895 INIT_LIST_HEAD(&target->d_u.d_child); 1918 INIT_LIST_HEAD(&target->d_u.d_child);
1896 } else { 1919 } else {
1897 swap(dentry->d_parent, target->d_parent); 1920 swap(dentry->d_parent, target->d_parent);
1898 1921
1899 /* And add them back to the (new) parent lists */ 1922 /* And add them back to the (new) parent lists */
1900 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); 1923 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
1901 } 1924 }
1902 1925
1903 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 1926 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
1904 spin_unlock(&target->d_lock); 1927 spin_unlock(&target->d_lock);
1905 fsnotify_d_move(dentry); 1928 fsnotify_d_move(dentry);
1906 spin_unlock(&dentry->d_lock); 1929 spin_unlock(&dentry->d_lock);
1907 write_sequnlock(&rename_lock); 1930 write_sequnlock(&rename_lock);
1908 } 1931 }
1909 1932
1910 /** 1933 /**
1911 * d_move - move a dentry 1934 * d_move - move a dentry
1912 * @dentry: entry to move 1935 * @dentry: entry to move
1913 * @target: new dentry 1936 * @target: new dentry
1914 * 1937 *
1915 * Update the dcache to reflect the move of a file name. Negative 1938 * Update the dcache to reflect the move of a file name. Negative
1916 * dcache entries should not be moved in this way. 1939 * dcache entries should not be moved in this way.
1917 */ 1940 */
1918 1941
1919 void d_move(struct dentry * dentry, struct dentry * target) 1942 void d_move(struct dentry * dentry, struct dentry * target)
1920 { 1943 {
1921 spin_lock(&dcache_lock); 1944 spin_lock(&dcache_lock);
1922 d_move_locked(dentry, target); 1945 d_move_locked(dentry, target);
1923 spin_unlock(&dcache_lock); 1946 spin_unlock(&dcache_lock);
1924 } 1947 }
1925 EXPORT_SYMBOL(d_move); 1948 EXPORT_SYMBOL(d_move);
1926 1949
1927 /** 1950 /**
1928 * d_ancestor - search for an ancestor 1951 * d_ancestor - search for an ancestor
1929 * @p1: ancestor dentry 1952 * @p1: ancestor dentry
1930 * @p2: child dentry 1953 * @p2: child dentry
1931 * 1954 *
1932 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is 1955 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
1933 * an ancestor of p2, else NULL. 1956 * an ancestor of p2, else NULL.
1934 */ 1957 */
1935 struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) 1958 struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
1936 { 1959 {
1937 struct dentry *p; 1960 struct dentry *p;
1938 1961
1939 for (p = p2; !IS_ROOT(p); p = p->d_parent) { 1962 for (p = p2; !IS_ROOT(p); p = p->d_parent) {
1940 if (p->d_parent == p1) 1963 if (p->d_parent == p1)
1941 return p; 1964 return p;
1942 } 1965 }
1943 return NULL; 1966 return NULL;
1944 } 1967 }
1945 1968
1946 /* 1969 /*
1947 * This helper attempts to cope with remotely renamed directories 1970 * This helper attempts to cope with remotely renamed directories
1948 * 1971 *
1949 * It assumes that the caller is already holding 1972 * It assumes that the caller is already holding
1950 * dentry->d_parent->d_inode->i_mutex and the dcache_lock 1973 * dentry->d_parent->d_inode->i_mutex and the dcache_lock
1951 * 1974 *
1952 * Note: If ever the locking in lock_rename() changes, then please 1975 * Note: If ever the locking in lock_rename() changes, then please
1953 * remember to update this too... 1976 * remember to update this too...
1954 */ 1977 */
1955 static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) 1978 static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
1956 __releases(dcache_lock) 1979 __releases(dcache_lock)
1957 { 1980 {
1958 struct mutex *m1 = NULL, *m2 = NULL; 1981 struct mutex *m1 = NULL, *m2 = NULL;
1959 struct dentry *ret; 1982 struct dentry *ret;
1960 1983
1961 /* If alias and dentry share a parent, then no extra locks required */ 1984 /* If alias and dentry share a parent, then no extra locks required */
1962 if (alias->d_parent == dentry->d_parent) 1985 if (alias->d_parent == dentry->d_parent)
1963 goto out_unalias; 1986 goto out_unalias;
1964 1987
1965 /* Check for loops */ 1988 /* Check for loops */
1966 ret = ERR_PTR(-ELOOP); 1989 ret = ERR_PTR(-ELOOP);
1967 if (d_ancestor(alias, dentry)) 1990 if (d_ancestor(alias, dentry))
1968 goto out_err; 1991 goto out_err;
1969 1992
1970 /* See lock_rename() */ 1993 /* See lock_rename() */
1971 ret = ERR_PTR(-EBUSY); 1994 ret = ERR_PTR(-EBUSY);
1972 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) 1995 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
1973 goto out_err; 1996 goto out_err;
1974 m1 = &dentry->d_sb->s_vfs_rename_mutex; 1997 m1 = &dentry->d_sb->s_vfs_rename_mutex;
1975 if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex)) 1998 if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
1976 goto out_err; 1999 goto out_err;
1977 m2 = &alias->d_parent->d_inode->i_mutex; 2000 m2 = &alias->d_parent->d_inode->i_mutex;
1978 out_unalias: 2001 out_unalias:
1979 d_move_locked(alias, dentry); 2002 d_move_locked(alias, dentry);
1980 ret = alias; 2003 ret = alias;
1981 out_err: 2004 out_err:
1982 spin_unlock(&dcache_lock); 2005 spin_unlock(&dcache_lock);
1983 if (m2) 2006 if (m2)
1984 mutex_unlock(m2); 2007 mutex_unlock(m2);
1985 if (m1) 2008 if (m1)
1986 mutex_unlock(m1); 2009 mutex_unlock(m1);
1987 return ret; 2010 return ret;
1988 } 2011 }
1989 2012
1990 /* 2013 /*
1991 * Prepare an anonymous dentry for life in the superblock's dentry tree as a 2014 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
1992 * named dentry in place of the dentry to be replaced. 2015 * named dentry in place of the dentry to be replaced.
1993 */ 2016 */
1994 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) 2017 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1995 { 2018 {
1996 struct dentry *dparent, *aparent; 2019 struct dentry *dparent, *aparent;
1997 2020
1998 switch_names(dentry, anon); 2021 switch_names(dentry, anon);
1999 swap(dentry->d_name.hash, anon->d_name.hash); 2022 swap(dentry->d_name.hash, anon->d_name.hash);
2000 2023
2001 dparent = dentry->d_parent; 2024 dparent = dentry->d_parent;
2002 aparent = anon->d_parent; 2025 aparent = anon->d_parent;
2003 2026
2004 dentry->d_parent = (aparent == anon) ? dentry : aparent; 2027 dentry->d_parent = (aparent == anon) ? dentry : aparent;
2005 list_del(&dentry->d_u.d_child); 2028 list_del(&dentry->d_u.d_child);
2006 if (!IS_ROOT(dentry)) 2029 if (!IS_ROOT(dentry))
2007 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2030 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2008 else 2031 else
2009 INIT_LIST_HEAD(&dentry->d_u.d_child); 2032 INIT_LIST_HEAD(&dentry->d_u.d_child);
2010 2033
2011 anon->d_parent = (dparent == dentry) ? anon : dparent; 2034 anon->d_parent = (dparent == dentry) ? anon : dparent;
2012 list_del(&anon->d_u.d_child); 2035 list_del(&anon->d_u.d_child);
2013 if (!IS_ROOT(anon)) 2036 if (!IS_ROOT(anon))
2014 list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs); 2037 list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
2015 else 2038 else
2016 INIT_LIST_HEAD(&anon->d_u.d_child); 2039 INIT_LIST_HEAD(&anon->d_u.d_child);
2017 2040
2018 anon->d_flags &= ~DCACHE_DISCONNECTED; 2041 anon->d_flags &= ~DCACHE_DISCONNECTED;
2019 } 2042 }
2020 2043
2021 /** 2044 /**
2022 * d_materialise_unique - introduce an inode into the tree 2045 * d_materialise_unique - introduce an inode into the tree
2023 * @dentry: candidate dentry 2046 * @dentry: candidate dentry
2024 * @inode: inode to bind to the dentry, to which aliases may be attached 2047 * @inode: inode to bind to the dentry, to which aliases may be attached
2025 * 2048 *
2026 * Introduces an dentry into the tree, substituting an extant disconnected 2049 * Introduces an dentry into the tree, substituting an extant disconnected
2027 * root directory alias in its place if there is one 2050 * root directory alias in its place if there is one
2028 */ 2051 */
2029 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) 2052 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2030 { 2053 {
2031 struct dentry *actual; 2054 struct dentry *actual;
2032 2055
2033 BUG_ON(!d_unhashed(dentry)); 2056 BUG_ON(!d_unhashed(dentry));
2034 2057
2035 spin_lock(&dcache_lock); 2058 spin_lock(&dcache_lock);
2036 2059
2037 if (!inode) { 2060 if (!inode) {
2038 actual = dentry; 2061 actual = dentry;
2039 __d_instantiate(dentry, NULL); 2062 __d_instantiate(dentry, NULL);
2040 goto found_lock; 2063 goto found_lock;
2041 } 2064 }
2042 2065
2043 if (S_ISDIR(inode->i_mode)) { 2066 if (S_ISDIR(inode->i_mode)) {
2044 struct dentry *alias; 2067 struct dentry *alias;
2045 2068
2046 /* Does an aliased dentry already exist? */ 2069 /* Does an aliased dentry already exist? */
2047 alias = __d_find_alias(inode, 0); 2070 alias = __d_find_alias(inode, 0);
2048 if (alias) { 2071 if (alias) {
2049 actual = alias; 2072 actual = alias;
2050 /* Is this an anonymous mountpoint that we could splice 2073 /* Is this an anonymous mountpoint that we could splice
2051 * into our tree? */ 2074 * into our tree? */
2052 if (IS_ROOT(alias)) { 2075 if (IS_ROOT(alias)) {
2053 spin_lock(&alias->d_lock); 2076 spin_lock(&alias->d_lock);
2054 __d_materialise_dentry(dentry, alias); 2077 __d_materialise_dentry(dentry, alias);
2055 __d_drop(alias); 2078 __d_drop(alias);
2056 goto found; 2079 goto found;
2057 } 2080 }
2058 /* Nope, but we must(!) avoid directory aliasing */ 2081 /* Nope, but we must(!) avoid directory aliasing */
2059 actual = __d_unalias(dentry, alias); 2082 actual = __d_unalias(dentry, alias);
2060 if (IS_ERR(actual)) 2083 if (IS_ERR(actual))
2061 dput(alias); 2084 dput(alias);
2062 goto out_nolock; 2085 goto out_nolock;
2063 } 2086 }
2064 } 2087 }
2065 2088
2066 /* Add a unique reference */ 2089 /* Add a unique reference */
2067 actual = __d_instantiate_unique(dentry, inode); 2090 actual = __d_instantiate_unique(dentry, inode);
2068 if (!actual) 2091 if (!actual)
2069 actual = dentry; 2092 actual = dentry;
2070 else if (unlikely(!d_unhashed(actual))) 2093 else if (unlikely(!d_unhashed(actual)))
2071 goto shouldnt_be_hashed; 2094 goto shouldnt_be_hashed;
2072 2095
2073 found_lock: 2096 found_lock:
2074 spin_lock(&actual->d_lock); 2097 spin_lock(&actual->d_lock);
2075 found: 2098 found:
2076 spin_lock(&dcache_hash_lock); 2099 spin_lock(&dcache_hash_lock);
2077 _d_rehash(actual); 2100 _d_rehash(actual);
2078 spin_unlock(&dcache_hash_lock); 2101 spin_unlock(&dcache_hash_lock);
2079 spin_unlock(&actual->d_lock); 2102 spin_unlock(&actual->d_lock);
2080 spin_unlock(&dcache_lock); 2103 spin_unlock(&dcache_lock);
2081 out_nolock: 2104 out_nolock:
2082 if (actual == dentry) { 2105 if (actual == dentry) {
2083 security_d_instantiate(dentry, inode); 2106 security_d_instantiate(dentry, inode);
2084 return NULL; 2107 return NULL;
2085 } 2108 }
2086 2109
2087 iput(inode); 2110 iput(inode);
2088 return actual; 2111 return actual;
2089 2112
2090 shouldnt_be_hashed: 2113 shouldnt_be_hashed:
2091 spin_unlock(&dcache_lock); 2114 spin_unlock(&dcache_lock);
2092 BUG(); 2115 BUG();
2093 } 2116 }
2094 EXPORT_SYMBOL_GPL(d_materialise_unique); 2117 EXPORT_SYMBOL_GPL(d_materialise_unique);
2095 2118
2096 static int prepend(char **buffer, int *buflen, const char *str, int namelen) 2119 static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2097 { 2120 {
2098 *buflen -= namelen; 2121 *buflen -= namelen;
2099 if (*buflen < 0) 2122 if (*buflen < 0)
2100 return -ENAMETOOLONG; 2123 return -ENAMETOOLONG;
2101 *buffer -= namelen; 2124 *buffer -= namelen;
2102 memcpy(*buffer, str, namelen); 2125 memcpy(*buffer, str, namelen);
2103 return 0; 2126 return 0;
2104 } 2127 }
2105 2128
2106 static int prepend_name(char **buffer, int *buflen, struct qstr *name) 2129 static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2107 { 2130 {
2108 return prepend(buffer, buflen, name->name, name->len); 2131 return prepend(buffer, buflen, name->name, name->len);
2109 } 2132 }
2110 2133
2111 /** 2134 /**
2112 * Prepend path string to a buffer 2135 * Prepend path string to a buffer
2113 * 2136 *
2114 * @path: the dentry/vfsmount to report 2137 * @path: the dentry/vfsmount to report
2115 * @root: root vfsmnt/dentry (may be modified by this function) 2138 * @root: root vfsmnt/dentry (may be modified by this function)
2116 * @buffer: pointer to the end of the buffer 2139 * @buffer: pointer to the end of the buffer
2117 * @buflen: pointer to buffer length 2140 * @buflen: pointer to buffer length
2118 * 2141 *
2119 * Caller holds the dcache_lock. 2142 * Caller holds the dcache_lock.
2120 * 2143 *
2121 * If path is not reachable from the supplied root, then the value of 2144 * If path is not reachable from the supplied root, then the value of
2122 * root is changed (without modifying refcounts). 2145 * root is changed (without modifying refcounts).
2123 */ 2146 */
2124 static int prepend_path(const struct path *path, struct path *root, 2147 static int prepend_path(const struct path *path, struct path *root,
2125 char **buffer, int *buflen) 2148 char **buffer, int *buflen)
2126 { 2149 {
2127 struct dentry *dentry = path->dentry; 2150 struct dentry *dentry = path->dentry;
2128 struct vfsmount *vfsmnt = path->mnt; 2151 struct vfsmount *vfsmnt = path->mnt;
2129 bool slash = false; 2152 bool slash = false;
2130 int error = 0; 2153 int error = 0;
2131 2154
2132 br_read_lock(vfsmount_lock); 2155 br_read_lock(vfsmount_lock);
2133 while (dentry != root->dentry || vfsmnt != root->mnt) { 2156 while (dentry != root->dentry || vfsmnt != root->mnt) {
2134 struct dentry * parent; 2157 struct dentry * parent;
2135 2158
2136 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 2159 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2137 /* Global root? */ 2160 /* Global root? */
2138 if (vfsmnt->mnt_parent == vfsmnt) { 2161 if (vfsmnt->mnt_parent == vfsmnt) {
2139 goto global_root; 2162 goto global_root;
2140 } 2163 }
2141 dentry = vfsmnt->mnt_mountpoint; 2164 dentry = vfsmnt->mnt_mountpoint;
2142 vfsmnt = vfsmnt->mnt_parent; 2165 vfsmnt = vfsmnt->mnt_parent;
2143 continue; 2166 continue;
2144 } 2167 }
2145 parent = dentry->d_parent; 2168 parent = dentry->d_parent;
2146 prefetch(parent); 2169 prefetch(parent);
2147 error = prepend_name(buffer, buflen, &dentry->d_name); 2170 error = prepend_name(buffer, buflen, &dentry->d_name);
2148 if (!error) 2171 if (!error)
2149 error = prepend(buffer, buflen, "/", 1); 2172 error = prepend(buffer, buflen, "/", 1);
2150 if (error) 2173 if (error)
2151 break; 2174 break;
2152 2175
2153 slash = true; 2176 slash = true;
2154 dentry = parent; 2177 dentry = parent;
2155 } 2178 }
2156 2179
2157 out: 2180 out:
2158 if (!error && !slash) 2181 if (!error && !slash)
2159 error = prepend(buffer, buflen, "/", 1); 2182 error = prepend(buffer, buflen, "/", 1);
2160 2183
2161 br_read_unlock(vfsmount_lock); 2184 br_read_unlock(vfsmount_lock);
2162 return error; 2185 return error;
2163 2186
2164 global_root: 2187 global_root:
2165 /* 2188 /*
2166 * Filesystems needing to implement special "root names" 2189 * Filesystems needing to implement special "root names"
2167 * should do so with ->d_dname() 2190 * should do so with ->d_dname()
2168 */ 2191 */
2169 if (IS_ROOT(dentry) && 2192 if (IS_ROOT(dentry) &&
2170 (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) { 2193 (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
2171 WARN(1, "Root dentry has weird name <%.*s>\n", 2194 WARN(1, "Root dentry has weird name <%.*s>\n",
2172 (int) dentry->d_name.len, dentry->d_name.name); 2195 (int) dentry->d_name.len, dentry->d_name.name);
2173 } 2196 }
2174 root->mnt = vfsmnt; 2197 root->mnt = vfsmnt;
2175 root->dentry = dentry; 2198 root->dentry = dentry;
2176 goto out; 2199 goto out;
2177 } 2200 }
2178 2201
2179 /** 2202 /**
2180 * __d_path - return the path of a dentry 2203 * __d_path - return the path of a dentry
2181 * @path: the dentry/vfsmount to report 2204 * @path: the dentry/vfsmount to report
2182 * @root: root vfsmnt/dentry (may be modified by this function) 2205 * @root: root vfsmnt/dentry (may be modified by this function)
2183 * @buf: buffer to return value in 2206 * @buf: buffer to return value in
2184 * @buflen: buffer length 2207 * @buflen: buffer length
2185 * 2208 *
2186 * Convert a dentry into an ASCII path name. 2209 * Convert a dentry into an ASCII path name.
2187 * 2210 *
2188 * Returns a pointer into the buffer or an error code if the 2211 * Returns a pointer into the buffer or an error code if the
2189 * path was too long. 2212 * path was too long.
2190 * 2213 *
2191 * "buflen" should be positive. 2214 * "buflen" should be positive.
2192 * 2215 *
2193 * If path is not reachable from the supplied root, then the value of 2216 * If path is not reachable from the supplied root, then the value of
2194 * root is changed (without modifying refcounts). 2217 * root is changed (without modifying refcounts).
2195 */ 2218 */
2196 char *__d_path(const struct path *path, struct path *root, 2219 char *__d_path(const struct path *path, struct path *root,
2197 char *buf, int buflen) 2220 char *buf, int buflen)
2198 { 2221 {
2199 char *res = buf + buflen; 2222 char *res = buf + buflen;
2200 int error; 2223 int error;
2201 2224
2202 prepend(&res, &buflen, "\0", 1); 2225 prepend(&res, &buflen, "\0", 1);
2203 spin_lock(&dcache_lock); 2226 spin_lock(&dcache_lock);
2204 error = prepend_path(path, root, &res, &buflen); 2227 error = prepend_path(path, root, &res, &buflen);
2205 spin_unlock(&dcache_lock); 2228 spin_unlock(&dcache_lock);
2206 2229
2207 if (error) 2230 if (error)
2208 return ERR_PTR(error); 2231 return ERR_PTR(error);
2209 return res; 2232 return res;
2210 } 2233 }
2211 2234
2212 /* 2235 /*
2213 * same as __d_path but appends "(deleted)" for unlinked files. 2236 * same as __d_path but appends "(deleted)" for unlinked files.
2214 */ 2237 */
2215 static int path_with_deleted(const struct path *path, struct path *root, 2238 static int path_with_deleted(const struct path *path, struct path *root,
2216 char **buf, int *buflen) 2239 char **buf, int *buflen)
2217 { 2240 {
2218 prepend(buf, buflen, "\0", 1); 2241 prepend(buf, buflen, "\0", 1);
2219 if (d_unlinked(path->dentry)) { 2242 if (d_unlinked(path->dentry)) {
2220 int error = prepend(buf, buflen, " (deleted)", 10); 2243 int error = prepend(buf, buflen, " (deleted)", 10);
2221 if (error) 2244 if (error)
2222 return error; 2245 return error;
2223 } 2246 }
2224 2247
2225 return prepend_path(path, root, buf, buflen); 2248 return prepend_path(path, root, buf, buflen);
2226 } 2249 }
2227 2250
2228 static int prepend_unreachable(char **buffer, int *buflen) 2251 static int prepend_unreachable(char **buffer, int *buflen)
2229 { 2252 {
2230 return prepend(buffer, buflen, "(unreachable)", 13); 2253 return prepend(buffer, buflen, "(unreachable)", 13);
2231 } 2254 }
2232 2255
2233 /** 2256 /**
2234 * d_path - return the path of a dentry 2257 * d_path - return the path of a dentry
2235 * @path: path to report 2258 * @path: path to report
2236 * @buf: buffer to return value in 2259 * @buf: buffer to return value in
2237 * @buflen: buffer length 2260 * @buflen: buffer length
2238 * 2261 *
2239 * Convert a dentry into an ASCII path name. If the entry has been deleted 2262 * Convert a dentry into an ASCII path name. If the entry has been deleted
2240 * the string " (deleted)" is appended. Note that this is ambiguous. 2263 * the string " (deleted)" is appended. Note that this is ambiguous.
2241 * 2264 *
2242 * Returns a pointer into the buffer or an error code if the path was 2265 * Returns a pointer into the buffer or an error code if the path was
2243 * too long. Note: Callers should use the returned pointer, not the passed 2266 * too long. Note: Callers should use the returned pointer, not the passed
2244 * in buffer, to use the name! The implementation often starts at an offset 2267 * in buffer, to use the name! The implementation often starts at an offset
2245 * into the buffer, and may leave 0 bytes at the start. 2268 * into the buffer, and may leave 0 bytes at the start.
2246 * 2269 *
2247 * "buflen" should be positive. 2270 * "buflen" should be positive.
2248 */ 2271 */
2249 char *d_path(const struct path *path, char *buf, int buflen) 2272 char *d_path(const struct path *path, char *buf, int buflen)
2250 { 2273 {
2251 char *res = buf + buflen; 2274 char *res = buf + buflen;
2252 struct path root; 2275 struct path root;
2253 struct path tmp; 2276 struct path tmp;
2254 int error; 2277 int error;
2255 2278
2256 /* 2279 /*
2257 * We have various synthetic filesystems that never get mounted. On 2280 * We have various synthetic filesystems that never get mounted. On
2258 * these filesystems dentries are never used for lookup purposes, and 2281 * these filesystems dentries are never used for lookup purposes, and
2259 * thus don't need to be hashed. They also don't need a name until a 2282 * thus don't need to be hashed. They also don't need a name until a
2260 * user wants to identify the object in /proc/pid/fd/. The little hack 2283 * user wants to identify the object in /proc/pid/fd/. The little hack
2261 * below allows us to generate a name for these objects on demand: 2284 * below allows us to generate a name for these objects on demand:
2262 */ 2285 */
2263 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2286 if (path->dentry->d_op && path->dentry->d_op->d_dname)
2264 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2287 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2265 2288
2266 get_fs_root(current->fs, &root); 2289 get_fs_root(current->fs, &root);
2267 spin_lock(&dcache_lock); 2290 spin_lock(&dcache_lock);
2268 tmp = root; 2291 tmp = root;
2269 error = path_with_deleted(path, &tmp, &res, &buflen); 2292 error = path_with_deleted(path, &tmp, &res, &buflen);
2270 if (error) 2293 if (error)
2271 res = ERR_PTR(error); 2294 res = ERR_PTR(error);
2272 spin_unlock(&dcache_lock); 2295 spin_unlock(&dcache_lock);
2273 path_put(&root); 2296 path_put(&root);
2274 return res; 2297 return res;
2275 } 2298 }
2276 EXPORT_SYMBOL(d_path); 2299 EXPORT_SYMBOL(d_path);
2277 2300
2278 /** 2301 /**
2279 * d_path_with_unreachable - return the path of a dentry 2302 * d_path_with_unreachable - return the path of a dentry
2280 * @path: path to report 2303 * @path: path to report
2281 * @buf: buffer to return value in 2304 * @buf: buffer to return value in
2282 * @buflen: buffer length 2305 * @buflen: buffer length
2283 * 2306 *
2284 * The difference from d_path() is that this prepends "(unreachable)" 2307 * The difference from d_path() is that this prepends "(unreachable)"
2285 * to paths which are unreachable from the current process' root. 2308 * to paths which are unreachable from the current process' root.
2286 */ 2309 */
2287 char *d_path_with_unreachable(const struct path *path, char *buf, int buflen) 2310 char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2288 { 2311 {
2289 char *res = buf + buflen; 2312 char *res = buf + buflen;
2290 struct path root; 2313 struct path root;
2291 struct path tmp; 2314 struct path tmp;
2292 int error; 2315 int error;
2293 2316
2294 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2317 if (path->dentry->d_op && path->dentry->d_op->d_dname)
2295 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2318 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2296 2319
2297 get_fs_root(current->fs, &root); 2320 get_fs_root(current->fs, &root);
2298 spin_lock(&dcache_lock); 2321 spin_lock(&dcache_lock);
2299 tmp = root; 2322 tmp = root;
2300 error = path_with_deleted(path, &tmp, &res, &buflen); 2323 error = path_with_deleted(path, &tmp, &res, &buflen);
2301 if (!error && !path_equal(&tmp, &root)) 2324 if (!error && !path_equal(&tmp, &root))
2302 error = prepend_unreachable(&res, &buflen); 2325 error = prepend_unreachable(&res, &buflen);
2303 spin_unlock(&dcache_lock); 2326 spin_unlock(&dcache_lock);
2304 path_put(&root); 2327 path_put(&root);
2305 if (error) 2328 if (error)
2306 res = ERR_PTR(error); 2329 res = ERR_PTR(error);
2307 2330
2308 return res; 2331 return res;
2309 } 2332 }
2310 2333
2311 /* 2334 /*
2312 * Helper function for dentry_operations.d_dname() members 2335 * Helper function for dentry_operations.d_dname() members
2313 */ 2336 */
2314 char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, 2337 char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2315 const char *fmt, ...) 2338 const char *fmt, ...)
2316 { 2339 {
2317 va_list args; 2340 va_list args;
2318 char temp[64]; 2341 char temp[64];
2319 int sz; 2342 int sz;
2320 2343
2321 va_start(args, fmt); 2344 va_start(args, fmt);
2322 sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1; 2345 sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2323 va_end(args); 2346 va_end(args);
2324 2347
2325 if (sz > sizeof(temp) || sz > buflen) 2348 if (sz > sizeof(temp) || sz > buflen)
2326 return ERR_PTR(-ENAMETOOLONG); 2349 return ERR_PTR(-ENAMETOOLONG);
2327 2350
2328 buffer += buflen - sz; 2351 buffer += buflen - sz;
2329 return memcpy(buffer, temp, sz); 2352 return memcpy(buffer, temp, sz);
2330 } 2353 }
2331 2354
2332 /* 2355 /*
2333 * Write full pathname from the root of the filesystem into the buffer. 2356 * Write full pathname from the root of the filesystem into the buffer.
2334 */ 2357 */
2335 static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) 2358 static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2336 { 2359 {
2337 char *end = buf + buflen; 2360 char *end = buf + buflen;
2338 char *retval; 2361 char *retval;
2339 2362
2340 prepend(&end, &buflen, "\0", 1); 2363 prepend(&end, &buflen, "\0", 1);
2341 if (buflen < 1) 2364 if (buflen < 1)
2342 goto Elong; 2365 goto Elong;
2343 /* Get '/' right */ 2366 /* Get '/' right */
2344 retval = end-1; 2367 retval = end-1;
2345 *retval = '/'; 2368 *retval = '/';
2346 2369
2347 while (!IS_ROOT(dentry)) { 2370 while (!IS_ROOT(dentry)) {
2348 struct dentry *parent = dentry->d_parent; 2371 struct dentry *parent = dentry->d_parent;
2349 2372
2350 prefetch(parent); 2373 prefetch(parent);
2351 if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || 2374 if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) ||
2352 (prepend(&end, &buflen, "/", 1) != 0)) 2375 (prepend(&end, &buflen, "/", 1) != 0))
2353 goto Elong; 2376 goto Elong;
2354 2377
2355 retval = end; 2378 retval = end;
2356 dentry = parent; 2379 dentry = parent;
2357 } 2380 }
2358 return retval; 2381 return retval;
2359 Elong: 2382 Elong:
2360 return ERR_PTR(-ENAMETOOLONG); 2383 return ERR_PTR(-ENAMETOOLONG);
2361 } 2384 }
2362 2385
2363 char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) 2386 char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2364 { 2387 {
2365 char *retval; 2388 char *retval;
2366 2389
2367 spin_lock(&dcache_lock); 2390 spin_lock(&dcache_lock);
2368 retval = __dentry_path(dentry, buf, buflen); 2391 retval = __dentry_path(dentry, buf, buflen);
2369 spin_unlock(&dcache_lock); 2392 spin_unlock(&dcache_lock);
2370 2393
2371 return retval; 2394 return retval;
2372 } 2395 }
2373 EXPORT_SYMBOL(dentry_path_raw); 2396 EXPORT_SYMBOL(dentry_path_raw);
2374 2397
2375 char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2398 char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2376 { 2399 {
2377 char *p = NULL; 2400 char *p = NULL;
2378 char *retval; 2401 char *retval;
2379 2402
2380 spin_lock(&dcache_lock); 2403 spin_lock(&dcache_lock);
2381 if (d_unlinked(dentry)) { 2404 if (d_unlinked(dentry)) {
2382 p = buf + buflen; 2405 p = buf + buflen;
2383 if (prepend(&p, &buflen, "//deleted", 10) != 0) 2406 if (prepend(&p, &buflen, "//deleted", 10) != 0)
2384 goto Elong; 2407 goto Elong;
2385 buflen++; 2408 buflen++;
2386 } 2409 }
2387 retval = __dentry_path(dentry, buf, buflen); 2410 retval = __dentry_path(dentry, buf, buflen);
2388 spin_unlock(&dcache_lock); 2411 spin_unlock(&dcache_lock);
2389 if (!IS_ERR(retval) && p) 2412 if (!IS_ERR(retval) && p)
2390 *p = '/'; /* restore '/' overriden with '\0' */ 2413 *p = '/'; /* restore '/' overriden with '\0' */
2391 return retval; 2414 return retval;
2392 Elong: 2415 Elong:
2393 spin_unlock(&dcache_lock); 2416 spin_unlock(&dcache_lock);
2394 return ERR_PTR(-ENAMETOOLONG); 2417 return ERR_PTR(-ENAMETOOLONG);
2395 } 2418 }
2396 2419
2397 /* 2420 /*
2398 * NOTE! The user-level library version returns a 2421 * NOTE! The user-level library version returns a
2399 * character pointer. The kernel system call just 2422 * character pointer. The kernel system call just
2400 * returns the length of the buffer filled (which 2423 * returns the length of the buffer filled (which
2401 * includes the ending '\0' character), or a negative 2424 * includes the ending '\0' character), or a negative
2402 * error value. So libc would do something like 2425 * error value. So libc would do something like
2403 * 2426 *
2404 * char *getcwd(char * buf, size_t size) 2427 * char *getcwd(char * buf, size_t size)
2405 * { 2428 * {
2406 * int retval; 2429 * int retval;
2407 * 2430 *
2408 * retval = sys_getcwd(buf, size); 2431 * retval = sys_getcwd(buf, size);
2409 * if (retval >= 0) 2432 * if (retval >= 0)
2410 * return buf; 2433 * return buf;
2411 * errno = -retval; 2434 * errno = -retval;
2412 * return NULL; 2435 * return NULL;
2413 * } 2436 * }
2414 */ 2437 */
2415 SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) 2438 SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2416 { 2439 {
2417 int error; 2440 int error;
2418 struct path pwd, root; 2441 struct path pwd, root;
2419 char *page = (char *) __get_free_page(GFP_USER); 2442 char *page = (char *) __get_free_page(GFP_USER);
2420 2443
2421 if (!page) 2444 if (!page)
2422 return -ENOMEM; 2445 return -ENOMEM;
2423 2446
2424 get_fs_root_and_pwd(current->fs, &root, &pwd); 2447 get_fs_root_and_pwd(current->fs, &root, &pwd);
2425 2448
2426 error = -ENOENT; 2449 error = -ENOENT;
2427 spin_lock(&dcache_lock); 2450 spin_lock(&dcache_lock);
2428 if (!d_unlinked(pwd.dentry)) { 2451 if (!d_unlinked(pwd.dentry)) {
2429 unsigned long len; 2452 unsigned long len;
2430 struct path tmp = root; 2453 struct path tmp = root;
2431 char *cwd = page + PAGE_SIZE; 2454 char *cwd = page + PAGE_SIZE;
2432 int buflen = PAGE_SIZE; 2455 int buflen = PAGE_SIZE;
2433 2456
2434 prepend(&cwd, &buflen, "\0", 1); 2457 prepend(&cwd, &buflen, "\0", 1);
2435 error = prepend_path(&pwd, &tmp, &cwd, &buflen); 2458 error = prepend_path(&pwd, &tmp, &cwd, &buflen);
2436 spin_unlock(&dcache_lock); 2459 spin_unlock(&dcache_lock);
2437 2460
2438 if (error) 2461 if (error)
2439 goto out; 2462 goto out;
2440 2463
2441 /* Unreachable from current root */ 2464 /* Unreachable from current root */
2442 if (!path_equal(&tmp, &root)) { 2465 if (!path_equal(&tmp, &root)) {
2443 error = prepend_unreachable(&cwd, &buflen); 2466 error = prepend_unreachable(&cwd, &buflen);
2444 if (error) 2467 if (error)
2445 goto out; 2468 goto out;
2446 } 2469 }
2447 2470
2448 error = -ERANGE; 2471 error = -ERANGE;
2449 len = PAGE_SIZE + page - cwd; 2472 len = PAGE_SIZE + page - cwd;
2450 if (len <= size) { 2473 if (len <= size) {
2451 error = len; 2474 error = len;
2452 if (copy_to_user(buf, cwd, len)) 2475 if (copy_to_user(buf, cwd, len))
2453 error = -EFAULT; 2476 error = -EFAULT;
2454 } 2477 }
2455 } else 2478 } else
2456 spin_unlock(&dcache_lock); 2479 spin_unlock(&dcache_lock);
2457 2480
2458 out: 2481 out:
2459 path_put(&pwd); 2482 path_put(&pwd);
2460 path_put(&root); 2483 path_put(&root);
2461 free_page((unsigned long) page); 2484 free_page((unsigned long) page);
2462 return error; 2485 return error;
2463 } 2486 }
2464 2487
2465 /* 2488 /*
2466 * Test whether new_dentry is a subdirectory of old_dentry. 2489 * Test whether new_dentry is a subdirectory of old_dentry.
2467 * 2490 *
2468 * Trivially implemented using the dcache structure 2491 * Trivially implemented using the dcache structure
2469 */ 2492 */
2470 2493
2471 /** 2494 /**
2472 * is_subdir - is new dentry a subdirectory of old_dentry 2495 * is_subdir - is new dentry a subdirectory of old_dentry
2473 * @new_dentry: new dentry 2496 * @new_dentry: new dentry
2474 * @old_dentry: old dentry 2497 * @old_dentry: old dentry
2475 * 2498 *
2476 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth). 2499 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2477 * Returns 0 otherwise. 2500 * Returns 0 otherwise.
2478 * Caller must ensure that "new_dentry" is pinned before calling is_subdir() 2501 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2479 */ 2502 */
2480 2503
2481 int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) 2504 int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2482 { 2505 {
2483 int result; 2506 int result;
2484 unsigned long seq; 2507 unsigned long seq;
2485 2508
2486 if (new_dentry == old_dentry) 2509 if (new_dentry == old_dentry)
2487 return 1; 2510 return 1;
2488 2511
2489 /* 2512 /*
2490 * Need rcu_readlock to protect against the d_parent trashing 2513 * Need rcu_readlock to protect against the d_parent trashing
2491 * due to d_move 2514 * due to d_move
2492 */ 2515 */
2493 rcu_read_lock(); 2516 rcu_read_lock();
2494 do { 2517 do {
2495 /* for restarting inner loop in case of seq retry */ 2518 /* for restarting inner loop in case of seq retry */
2496 seq = read_seqbegin(&rename_lock); 2519 seq = read_seqbegin(&rename_lock);
2497 if (d_ancestor(old_dentry, new_dentry)) 2520 if (d_ancestor(old_dentry, new_dentry))
2498 result = 1; 2521 result = 1;
2499 else 2522 else
2500 result = 0; 2523 result = 0;
2501 } while (read_seqretry(&rename_lock, seq)); 2524 } while (read_seqretry(&rename_lock, seq));
2502 rcu_read_unlock(); 2525 rcu_read_unlock();
2503 2526
2504 return result; 2527 return result;
2505 } 2528 }
2506 2529
2507 int path_is_under(struct path *path1, struct path *path2) 2530 int path_is_under(struct path *path1, struct path *path2)
2508 { 2531 {
2509 struct vfsmount *mnt = path1->mnt; 2532 struct vfsmount *mnt = path1->mnt;
2510 struct dentry *dentry = path1->dentry; 2533 struct dentry *dentry = path1->dentry;
2511 int res; 2534 int res;
2512 2535
2513 br_read_lock(vfsmount_lock); 2536 br_read_lock(vfsmount_lock);
2514 if (mnt != path2->mnt) { 2537 if (mnt != path2->mnt) {
2515 for (;;) { 2538 for (;;) {
2516 if (mnt->mnt_parent == mnt) { 2539 if (mnt->mnt_parent == mnt) {
2517 br_read_unlock(vfsmount_lock); 2540 br_read_unlock(vfsmount_lock);
2518 return 0; 2541 return 0;
2519 } 2542 }
2520 if (mnt->mnt_parent == path2->mnt) 2543 if (mnt->mnt_parent == path2->mnt)
2521 break; 2544 break;
2522 mnt = mnt->mnt_parent; 2545 mnt = mnt->mnt_parent;
2523 } 2546 }
2524 dentry = mnt->mnt_mountpoint; 2547 dentry = mnt->mnt_mountpoint;
2525 } 2548 }
2526 res = is_subdir(dentry, path2->dentry); 2549 res = is_subdir(dentry, path2->dentry);
2527 br_read_unlock(vfsmount_lock); 2550 br_read_unlock(vfsmount_lock);
2528 return res; 2551 return res;
2529 } 2552 }
2530 EXPORT_SYMBOL(path_is_under); 2553 EXPORT_SYMBOL(path_is_under);
2531 2554
2532 void d_genocide(struct dentry *root) 2555 void d_genocide(struct dentry *root)
2533 { 2556 {
2534 struct dentry *this_parent = root; 2557 struct dentry *this_parent = root;
2535 struct list_head *next; 2558 struct list_head *next;
2536 2559
2537 spin_lock(&dcache_lock); 2560 spin_lock(&dcache_lock);
2538 repeat: 2561 repeat:
2539 next = this_parent->d_subdirs.next; 2562 next = this_parent->d_subdirs.next;
2540 resume: 2563 resume:
2541 while (next != &this_parent->d_subdirs) { 2564 while (next != &this_parent->d_subdirs) {
2542 struct list_head *tmp = next; 2565 struct list_head *tmp = next;
2543 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 2566 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2544 next = tmp->next; 2567 next = tmp->next;
2545 if (d_unhashed(dentry)||!dentry->d_inode) 2568 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2569 if (d_unhashed(dentry) || !dentry->d_inode) {
2570 spin_unlock(&dentry->d_lock);
2546 continue; 2571 continue;
2572 }
2547 if (!list_empty(&dentry->d_subdirs)) { 2573 if (!list_empty(&dentry->d_subdirs)) {
2574 spin_unlock(&dentry->d_lock);
2548 this_parent = dentry; 2575 this_parent = dentry;
2549 goto repeat; 2576 goto repeat;
2550 } 2577 }
2551 spin_lock(&dentry->d_lock);
2552 dentry->d_count--; 2578 dentry->d_count--;
2553 spin_unlock(&dentry->d_lock); 2579 spin_unlock(&dentry->d_lock);
2554 } 2580 }
2555 if (this_parent != root) { 2581 if (this_parent != root) {
2556 next = this_parent->d_u.d_child.next; 2582 next = this_parent->d_u.d_child.next;
2557 spin_lock(&this_parent->d_lock); 2583 spin_lock(&this_parent->d_lock);
2558 this_parent->d_count--; 2584 this_parent->d_count--;
2559 spin_unlock(&this_parent->d_lock); 2585 spin_unlock(&this_parent->d_lock);
2560 this_parent = this_parent->d_parent; 2586 this_parent = this_parent->d_parent;
2561 goto resume; 2587 goto resume;
2562 } 2588 }
2563 spin_unlock(&dcache_lock); 2589 spin_unlock(&dcache_lock);
2564 } 2590 }
2565 2591
2566 /** 2592 /**
2567 * find_inode_number - check for dentry with name 2593 * find_inode_number - check for dentry with name
2568 * @dir: directory to check 2594 * @dir: directory to check
2569 * @name: Name to find. 2595 * @name: Name to find.
2570 * 2596 *
2571 * Check whether a dentry already exists for the given name, 2597 * Check whether a dentry already exists for the given name,
2572 * and return the inode number if it has an inode. Otherwise 2598 * and return the inode number if it has an inode. Otherwise
2573 * 0 is returned. 2599 * 0 is returned.
2574 * 2600 *
2575 * This routine is used to post-process directory listings for 2601 * This routine is used to post-process directory listings for
2576 * filesystems using synthetic inode numbers, and is necessary 2602 * filesystems using synthetic inode numbers, and is necessary
2577 * to keep getcwd() working. 2603 * to keep getcwd() working.
2578 */ 2604 */
2579 2605
2580 ino_t find_inode_number(struct dentry *dir, struct qstr *name) 2606 ino_t find_inode_number(struct dentry *dir, struct qstr *name)
2581 { 2607 {
2582 struct dentry * dentry; 2608 struct dentry * dentry;
2583 ino_t ino = 0; 2609 ino_t ino = 0;
2584 2610
2585 dentry = d_hash_and_lookup(dir, name); 2611 dentry = d_hash_and_lookup(dir, name);
2586 if (dentry) { 2612 if (dentry) {
2587 if (dentry->d_inode) 2613 if (dentry->d_inode)
2588 ino = dentry->d_inode->i_ino; 2614 ino = dentry->d_inode->i_ino;
2589 dput(dentry); 2615 dput(dentry);
2590 } 2616 }
2591 return ino; 2617 return ino;
2592 } 2618 }
2593 EXPORT_SYMBOL(find_inode_number); 2619 EXPORT_SYMBOL(find_inode_number);
2594 2620
2595 static __initdata unsigned long dhash_entries; 2621 static __initdata unsigned long dhash_entries;
2596 static int __init set_dhash_entries(char *str) 2622 static int __init set_dhash_entries(char *str)
2597 { 2623 {
2598 if (!str) 2624 if (!str)
2599 return 0; 2625 return 0;
2600 dhash_entries = simple_strtoul(str, &str, 0); 2626 dhash_entries = simple_strtoul(str, &str, 0);
2601 return 1; 2627 return 1;
2602 } 2628 }
2603 __setup("dhash_entries=", set_dhash_entries); 2629 __setup("dhash_entries=", set_dhash_entries);
2604 2630
2605 static void __init dcache_init_early(void) 2631 static void __init dcache_init_early(void)
2606 { 2632 {
2607 int loop; 2633 int loop;
2608 2634
2609 /* If hashes are distributed across NUMA nodes, defer 2635 /* If hashes are distributed across NUMA nodes, defer
2610 * hash allocation until vmalloc space is available. 2636 * hash allocation until vmalloc space is available.
2611 */ 2637 */
2612 if (hashdist) 2638 if (hashdist)
2613 return; 2639 return;
2614 2640
2615 dentry_hashtable = 2641 dentry_hashtable =
2616 alloc_large_system_hash("Dentry cache", 2642 alloc_large_system_hash("Dentry cache",
2617 sizeof(struct hlist_head), 2643 sizeof(struct hlist_head),
2618 dhash_entries, 2644 dhash_entries,
2619 13, 2645 13,
2620 HASH_EARLY, 2646 HASH_EARLY,
2621 &d_hash_shift, 2647 &d_hash_shift,
2622 &d_hash_mask, 2648 &d_hash_mask,
2623 0); 2649 0);
2624 2650
2625 for (loop = 0; loop < (1 << d_hash_shift); loop++) 2651 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2626 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 2652 INIT_HLIST_HEAD(&dentry_hashtable[loop]);
2627 } 2653 }
2628 2654
2629 static void __init dcache_init(void) 2655 static void __init dcache_init(void)
2630 { 2656 {
2631 int loop; 2657 int loop;
2632 2658
2633 /* 2659 /*
2634 * A constructor could be added for stable state like the lists, 2660 * A constructor could be added for stable state like the lists,
2635 * but it is probably not worth it because of the cache nature 2661 * but it is probably not worth it because of the cache nature
2636 * of the dcache. 2662 * of the dcache.
2637 */ 2663 */
2638 dentry_cache = KMEM_CACHE(dentry, 2664 dentry_cache = KMEM_CACHE(dentry,
2639 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); 2665 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
2640 2666
2641 register_shrinker(&dcache_shrinker); 2667 register_shrinker(&dcache_shrinker);
2642 2668
2643 /* Hash may have been set up in dcache_init_early */ 2669 /* Hash may have been set up in dcache_init_early */
2644 if (!hashdist) 2670 if (!hashdist)
2645 return; 2671 return;
2646 2672
2647 dentry_hashtable = 2673 dentry_hashtable =
2648 alloc_large_system_hash("Dentry cache", 2674 alloc_large_system_hash("Dentry cache",
2649 sizeof(struct hlist_head), 2675 sizeof(struct hlist_head),
2650 dhash_entries, 2676 dhash_entries,
2651 13, 2677 13,
2652 0, 2678 0,
2653 &d_hash_shift, 2679 &d_hash_shift,
2654 &d_hash_mask, 2680 &d_hash_mask,
2655 0); 2681 0);
2656 2682
2657 for (loop = 0; loop < (1 << d_hash_shift); loop++) 2683 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2658 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 2684 INIT_HLIST_HEAD(&dentry_hashtable[loop]);
2659 } 2685 }
2660 2686
2661 /* SLAB cache for __getname() consumers */ 2687 /* SLAB cache for __getname() consumers */
2662 struct kmem_cache *names_cachep __read_mostly; 2688 struct kmem_cache *names_cachep __read_mostly;
2663 EXPORT_SYMBOL(names_cachep); 2689 EXPORT_SYMBOL(names_cachep);
2664 2690
2665 EXPORT_SYMBOL(d_genocide); 2691 EXPORT_SYMBOL(d_genocide);
2666 2692
2667 void __init vfs_caches_init_early(void) 2693 void __init vfs_caches_init_early(void)
2668 { 2694 {
2669 dcache_init_early(); 2695 dcache_init_early();
2670 inode_init_early(); 2696 inode_init_early();
2671 } 2697 }
2672 2698
2673 void __init vfs_caches_init(unsigned long mempages) 2699 void __init vfs_caches_init(unsigned long mempages)
2674 { 2700 {
2675 unsigned long reserve; 2701 unsigned long reserve;
2676 2702
2677 /* Base hash sizes on available memory, with a reserve equal to 2703 /* Base hash sizes on available memory, with a reserve equal to
2678 150% of current kernel size */ 2704 150% of current kernel size */
2679 2705
2680 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); 2706 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
2681 mempages -= reserve; 2707 mempages -= reserve;
2682 2708
2683 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 2709 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
2684 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 2710 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1 /* 1 /*
2 * fs/libfs.c 2 * fs/libfs.c
3 * Library for filesystems writers. 3 * Library for filesystems writers.
4 */ 4 */
5 5
6 #include <linux/module.h> 6 #include <linux/module.h>
7 #include <linux/pagemap.h> 7 #include <linux/pagemap.h>
8 #include <linux/slab.h> 8 #include <linux/slab.h>
9 #include <linux/mount.h> 9 #include <linux/mount.h>
10 #include <linux/vfs.h> 10 #include <linux/vfs.h>
11 #include <linux/quotaops.h> 11 #include <linux/quotaops.h>
12 #include <linux/mutex.h> 12 #include <linux/mutex.h>
13 #include <linux/exportfs.h> 13 #include <linux/exportfs.h>
14 #include <linux/writeback.h> 14 #include <linux/writeback.h>
15 #include <linux/buffer_head.h> 15 #include <linux/buffer_head.h>
16 16
17 #include <asm/uaccess.h> 17 #include <asm/uaccess.h>
18 18
19 static inline int simple_positive(struct dentry *dentry)
20 {
21 return dentry->d_inode && !d_unhashed(dentry);
22 }
23
19 int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, 24 int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
20 struct kstat *stat) 25 struct kstat *stat)
21 { 26 {
22 struct inode *inode = dentry->d_inode; 27 struct inode *inode = dentry->d_inode;
23 generic_fillattr(inode, stat); 28 generic_fillattr(inode, stat);
24 stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9); 29 stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9);
25 return 0; 30 return 0;
26 } 31 }
27 32
28 int simple_statfs(struct dentry *dentry, struct kstatfs *buf) 33 int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
29 { 34 {
30 buf->f_type = dentry->d_sb->s_magic; 35 buf->f_type = dentry->d_sb->s_magic;
31 buf->f_bsize = PAGE_CACHE_SIZE; 36 buf->f_bsize = PAGE_CACHE_SIZE;
32 buf->f_namelen = NAME_MAX; 37 buf->f_namelen = NAME_MAX;
33 return 0; 38 return 0;
34 } 39 }
35 40
36 /* 41 /*
37 * Retaining negative dentries for an in-memory filesystem just wastes 42 * Retaining negative dentries for an in-memory filesystem just wastes
38 * memory and lookup time: arrange for them to be deleted immediately. 43 * memory and lookup time: arrange for them to be deleted immediately.
39 */ 44 */
40 static int simple_delete_dentry(const struct dentry *dentry) 45 static int simple_delete_dentry(const struct dentry *dentry)
41 { 46 {
42 return 1; 47 return 1;
43 } 48 }
44 49
45 /* 50 /*
46 * Lookup the data. This is trivial - if the dentry didn't already 51 * Lookup the data. This is trivial - if the dentry didn't already
47 * exist, we know it is negative. Set d_op to delete negative dentries. 52 * exist, we know it is negative. Set d_op to delete negative dentries.
48 */ 53 */
49 struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 54 struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
50 { 55 {
51 static const struct dentry_operations simple_dentry_operations = { 56 static const struct dentry_operations simple_dentry_operations = {
52 .d_delete = simple_delete_dentry, 57 .d_delete = simple_delete_dentry,
53 }; 58 };
54 59
55 if (dentry->d_name.len > NAME_MAX) 60 if (dentry->d_name.len > NAME_MAX)
56 return ERR_PTR(-ENAMETOOLONG); 61 return ERR_PTR(-ENAMETOOLONG);
57 dentry->d_op = &simple_dentry_operations; 62 dentry->d_op = &simple_dentry_operations;
58 d_add(dentry, NULL); 63 d_add(dentry, NULL);
59 return NULL; 64 return NULL;
60 } 65 }
61 66
62 int dcache_dir_open(struct inode *inode, struct file *file) 67 int dcache_dir_open(struct inode *inode, struct file *file)
63 { 68 {
64 static struct qstr cursor_name = {.len = 1, .name = "."}; 69 static struct qstr cursor_name = {.len = 1, .name = "."};
65 70
66 file->private_data = d_alloc(file->f_path.dentry, &cursor_name); 71 file->private_data = d_alloc(file->f_path.dentry, &cursor_name);
67 72
68 return file->private_data ? 0 : -ENOMEM; 73 return file->private_data ? 0 : -ENOMEM;
69 } 74 }
70 75
71 int dcache_dir_close(struct inode *inode, struct file *file) 76 int dcache_dir_close(struct inode *inode, struct file *file)
72 { 77 {
73 dput(file->private_data); 78 dput(file->private_data);
74 return 0; 79 return 0;
75 } 80 }
76 81
77 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) 82 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
78 { 83 {
79 mutex_lock(&file->f_path.dentry->d_inode->i_mutex); 84 mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
80 switch (origin) { 85 switch (origin) {
81 case 1: 86 case 1:
82 offset += file->f_pos; 87 offset += file->f_pos;
83 case 0: 88 case 0:
84 if (offset >= 0) 89 if (offset >= 0)
85 break; 90 break;
86 default: 91 default:
87 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); 92 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
88 return -EINVAL; 93 return -EINVAL;
89 } 94 }
90 if (offset != file->f_pos) { 95 if (offset != file->f_pos) {
91 file->f_pos = offset; 96 file->f_pos = offset;
92 if (file->f_pos >= 2) { 97 if (file->f_pos >= 2) {
93 struct list_head *p; 98 struct list_head *p;
94 struct dentry *cursor = file->private_data; 99 struct dentry *cursor = file->private_data;
95 loff_t n = file->f_pos - 2; 100 loff_t n = file->f_pos - 2;
96 101
97 spin_lock(&dcache_lock); 102 spin_lock(&dcache_lock);
98 list_del(&cursor->d_u.d_child); 103 list_del(&cursor->d_u.d_child);
99 p = file->f_path.dentry->d_subdirs.next; 104 p = file->f_path.dentry->d_subdirs.next;
100 while (n && p != &file->f_path.dentry->d_subdirs) { 105 while (n && p != &file->f_path.dentry->d_subdirs) {
101 struct dentry *next; 106 struct dentry *next;
102 next = list_entry(p, struct dentry, d_u.d_child); 107 next = list_entry(p, struct dentry, d_u.d_child);
103 if (!d_unhashed(next) && next->d_inode) 108 spin_lock(&next->d_lock);
109 if (simple_positive(next))
104 n--; 110 n--;
111 spin_unlock(&next->d_lock);
105 p = p->next; 112 p = p->next;
106 } 113 }
107 list_add_tail(&cursor->d_u.d_child, p); 114 list_add_tail(&cursor->d_u.d_child, p);
108 spin_unlock(&dcache_lock); 115 spin_unlock(&dcache_lock);
109 } 116 }
110 } 117 }
111 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); 118 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
112 return offset; 119 return offset;
113 } 120 }
114 121
115 /* Relationship between i_mode and the DT_xxx types */ 122 /* Relationship between i_mode and the DT_xxx types */
116 static inline unsigned char dt_type(struct inode *inode) 123 static inline unsigned char dt_type(struct inode *inode)
117 { 124 {
118 return (inode->i_mode >> 12) & 15; 125 return (inode->i_mode >> 12) & 15;
119 } 126 }
120 127
121 /* 128 /*
122 * Directory is locked and all positive dentries in it are safe, since 129 * Directory is locked and all positive dentries in it are safe, since
123 * for ramfs-type trees they can't go away without unlink() or rmdir(), 130 * for ramfs-type trees they can't go away without unlink() or rmdir(),
124 * both impossible due to the lock on directory. 131 * both impossible due to the lock on directory.
125 */ 132 */
126 133
127 int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) 134 int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
128 { 135 {
129 struct dentry *dentry = filp->f_path.dentry; 136 struct dentry *dentry = filp->f_path.dentry;
130 struct dentry *cursor = filp->private_data; 137 struct dentry *cursor = filp->private_data;
131 struct list_head *p, *q = &cursor->d_u.d_child; 138 struct list_head *p, *q = &cursor->d_u.d_child;
132 ino_t ino; 139 ino_t ino;
133 int i = filp->f_pos; 140 int i = filp->f_pos;
134 141
135 switch (i) { 142 switch (i) {
136 case 0: 143 case 0:
137 ino = dentry->d_inode->i_ino; 144 ino = dentry->d_inode->i_ino;
138 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 145 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
139 break; 146 break;
140 filp->f_pos++; 147 filp->f_pos++;
141 i++; 148 i++;
142 /* fallthrough */ 149 /* fallthrough */
143 case 1: 150 case 1:
144 ino = parent_ino(dentry); 151 ino = parent_ino(dentry);
145 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) 152 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
146 break; 153 break;
147 filp->f_pos++; 154 filp->f_pos++;
148 i++; 155 i++;
149 /* fallthrough */ 156 /* fallthrough */
150 default: 157 default:
151 spin_lock(&dcache_lock); 158 spin_lock(&dcache_lock);
152 if (filp->f_pos == 2) 159 if (filp->f_pos == 2)
153 list_move(q, &dentry->d_subdirs); 160 list_move(q, &dentry->d_subdirs);
154 161
155 for (p=q->next; p != &dentry->d_subdirs; p=p->next) { 162 for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
156 struct dentry *next; 163 struct dentry *next;
157 next = list_entry(p, struct dentry, d_u.d_child); 164 next = list_entry(p, struct dentry, d_u.d_child);
158 if (d_unhashed(next) || !next->d_inode) 165 spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
166 if (!simple_positive(next)) {
167 spin_unlock(&next->d_lock);
159 continue; 168 continue;
169 }
160 170
171 spin_unlock(&next->d_lock);
161 spin_unlock(&dcache_lock); 172 spin_unlock(&dcache_lock);
162 if (filldir(dirent, next->d_name.name, 173 if (filldir(dirent, next->d_name.name,
163 next->d_name.len, filp->f_pos, 174 next->d_name.len, filp->f_pos,
164 next->d_inode->i_ino, 175 next->d_inode->i_ino,
165 dt_type(next->d_inode)) < 0) 176 dt_type(next->d_inode)) < 0)
166 return 0; 177 return 0;
167 spin_lock(&dcache_lock); 178 spin_lock(&dcache_lock);
168 /* next is still alive */ 179 /* next is still alive */
169 list_move(q, p); 180 list_move(q, p);
170 p = q; 181 p = q;
171 filp->f_pos++; 182 filp->f_pos++;
172 } 183 }
173 spin_unlock(&dcache_lock); 184 spin_unlock(&dcache_lock);
174 } 185 }
175 return 0; 186 return 0;
176 } 187 }
177 188
178 ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) 189 ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
179 { 190 {
180 return -EISDIR; 191 return -EISDIR;
181 } 192 }
182 193
183 const struct file_operations simple_dir_operations = { 194 const struct file_operations simple_dir_operations = {
184 .open = dcache_dir_open, 195 .open = dcache_dir_open,
185 .release = dcache_dir_close, 196 .release = dcache_dir_close,
186 .llseek = dcache_dir_lseek, 197 .llseek = dcache_dir_lseek,
187 .read = generic_read_dir, 198 .read = generic_read_dir,
188 .readdir = dcache_readdir, 199 .readdir = dcache_readdir,
189 .fsync = noop_fsync, 200 .fsync = noop_fsync,
190 }; 201 };
191 202
192 const struct inode_operations simple_dir_inode_operations = { 203 const struct inode_operations simple_dir_inode_operations = {
193 .lookup = simple_lookup, 204 .lookup = simple_lookup,
194 }; 205 };
195 206
196 static const struct super_operations simple_super_operations = { 207 static const struct super_operations simple_super_operations = {
197 .statfs = simple_statfs, 208 .statfs = simple_statfs,
198 }; 209 };
199 210
200 /* 211 /*
201 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that 212 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
202 * will never be mountable) 213 * will never be mountable)
203 */ 214 */
204 struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name, 215 struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
205 const struct super_operations *ops, unsigned long magic) 216 const struct super_operations *ops, unsigned long magic)
206 { 217 {
207 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 218 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
208 struct dentry *dentry; 219 struct dentry *dentry;
209 struct inode *root; 220 struct inode *root;
210 struct qstr d_name = {.name = name, .len = strlen(name)}; 221 struct qstr d_name = {.name = name, .len = strlen(name)};
211 222
212 if (IS_ERR(s)) 223 if (IS_ERR(s))
213 return ERR_CAST(s); 224 return ERR_CAST(s);
214 225
215 s->s_flags = MS_NOUSER; 226 s->s_flags = MS_NOUSER;
216 s->s_maxbytes = MAX_LFS_FILESIZE; 227 s->s_maxbytes = MAX_LFS_FILESIZE;
217 s->s_blocksize = PAGE_SIZE; 228 s->s_blocksize = PAGE_SIZE;
218 s->s_blocksize_bits = PAGE_SHIFT; 229 s->s_blocksize_bits = PAGE_SHIFT;
219 s->s_magic = magic; 230 s->s_magic = magic;
220 s->s_op = ops ? ops : &simple_super_operations; 231 s->s_op = ops ? ops : &simple_super_operations;
221 s->s_time_gran = 1; 232 s->s_time_gran = 1;
222 root = new_inode(s); 233 root = new_inode(s);
223 if (!root) 234 if (!root)
224 goto Enomem; 235 goto Enomem;
225 /* 236 /*
226 * since this is the first inode, make it number 1. New inodes created 237 * since this is the first inode, make it number 1. New inodes created
227 * after this must take care not to collide with it (by passing 238 * after this must take care not to collide with it (by passing
228 * max_reserved of 1 to iunique). 239 * max_reserved of 1 to iunique).
229 */ 240 */
230 root->i_ino = 1; 241 root->i_ino = 1;
231 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; 242 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
232 root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; 243 root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
233 dentry = d_alloc(NULL, &d_name); 244 dentry = d_alloc(NULL, &d_name);
234 if (!dentry) { 245 if (!dentry) {
235 iput(root); 246 iput(root);
236 goto Enomem; 247 goto Enomem;
237 } 248 }
238 dentry->d_sb = s; 249 dentry->d_sb = s;
239 dentry->d_parent = dentry; 250 dentry->d_parent = dentry;
240 d_instantiate(dentry, root); 251 d_instantiate(dentry, root);
241 s->s_root = dentry; 252 s->s_root = dentry;
242 s->s_flags |= MS_ACTIVE; 253 s->s_flags |= MS_ACTIVE;
243 return dget(s->s_root); 254 return dget(s->s_root);
244 255
245 Enomem: 256 Enomem:
246 deactivate_locked_super(s); 257 deactivate_locked_super(s);
247 return ERR_PTR(-ENOMEM); 258 return ERR_PTR(-ENOMEM);
248 } 259 }
249 260
250 int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 261 int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
251 { 262 {
252 struct inode *inode = old_dentry->d_inode; 263 struct inode *inode = old_dentry->d_inode;
253 264
254 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 265 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
255 inc_nlink(inode); 266 inc_nlink(inode);
256 ihold(inode); 267 ihold(inode);
257 dget(dentry); 268 dget(dentry);
258 d_instantiate(dentry, inode); 269 d_instantiate(dentry, inode);
259 return 0; 270 return 0;
260 } 271 }
261 272
262 static inline int simple_positive(struct dentry *dentry)
263 {
264 return dentry->d_inode && !d_unhashed(dentry);
265 }
266
267 int simple_empty(struct dentry *dentry) 273 int simple_empty(struct dentry *dentry)
268 { 274 {
269 struct dentry *child; 275 struct dentry *child;
270 int ret = 0; 276 int ret = 0;
271 277
272 spin_lock(&dcache_lock); 278 spin_lock(&dcache_lock);
273 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) 279 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
274 if (simple_positive(child)) 280 spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
281 if (simple_positive(child)) {
282 spin_unlock(&child->d_lock);
275 goto out; 283 goto out;
284 }
285 spin_unlock(&child->d_lock);
286 }
276 ret = 1; 287 ret = 1;
277 out: 288 out:
278 spin_unlock(&dcache_lock); 289 spin_unlock(&dcache_lock);
279 return ret; 290 return ret;
280 } 291 }
281 292
282 int simple_unlink(struct inode *dir, struct dentry *dentry) 293 int simple_unlink(struct inode *dir, struct dentry *dentry)
283 { 294 {
284 struct inode *inode = dentry->d_inode; 295 struct inode *inode = dentry->d_inode;
285 296
286 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 297 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
287 drop_nlink(inode); 298 drop_nlink(inode);
288 dput(dentry); 299 dput(dentry);
289 return 0; 300 return 0;
290 } 301 }
291 302
292 int simple_rmdir(struct inode *dir, struct dentry *dentry) 303 int simple_rmdir(struct inode *dir, struct dentry *dentry)
293 { 304 {
294 if (!simple_empty(dentry)) 305 if (!simple_empty(dentry))
295 return -ENOTEMPTY; 306 return -ENOTEMPTY;
296 307
297 drop_nlink(dentry->d_inode); 308 drop_nlink(dentry->d_inode);
298 simple_unlink(dir, dentry); 309 simple_unlink(dir, dentry);
299 drop_nlink(dir); 310 drop_nlink(dir);
300 return 0; 311 return 0;
301 } 312 }
302 313
303 int simple_rename(struct inode *old_dir, struct dentry *old_dentry, 314 int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
304 struct inode *new_dir, struct dentry *new_dentry) 315 struct inode *new_dir, struct dentry *new_dentry)
305 { 316 {
306 struct inode *inode = old_dentry->d_inode; 317 struct inode *inode = old_dentry->d_inode;
307 int they_are_dirs = S_ISDIR(old_dentry->d_inode->i_mode); 318 int they_are_dirs = S_ISDIR(old_dentry->d_inode->i_mode);
308 319
309 if (!simple_empty(new_dentry)) 320 if (!simple_empty(new_dentry))
310 return -ENOTEMPTY; 321 return -ENOTEMPTY;
311 322
312 if (new_dentry->d_inode) { 323 if (new_dentry->d_inode) {
313 simple_unlink(new_dir, new_dentry); 324 simple_unlink(new_dir, new_dentry);
314 if (they_are_dirs) 325 if (they_are_dirs)
315 drop_nlink(old_dir); 326 drop_nlink(old_dir);
316 } else if (they_are_dirs) { 327 } else if (they_are_dirs) {
317 drop_nlink(old_dir); 328 drop_nlink(old_dir);
318 inc_nlink(new_dir); 329 inc_nlink(new_dir);
319 } 330 }
320 331
321 old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = 332 old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime =
322 new_dir->i_mtime = inode->i_ctime = CURRENT_TIME; 333 new_dir->i_mtime = inode->i_ctime = CURRENT_TIME;
323 334
324 return 0; 335 return 0;
325 } 336 }
326 337
327 /** 338 /**
328 * simple_setattr - setattr for simple filesystem 339 * simple_setattr - setattr for simple filesystem
329 * @dentry: dentry 340 * @dentry: dentry
330 * @iattr: iattr structure 341 * @iattr: iattr structure
331 * 342 *
332 * Returns 0 on success, -error on failure. 343 * Returns 0 on success, -error on failure.
333 * 344 *
334 * simple_setattr is a simple ->setattr implementation without a proper 345 * simple_setattr is a simple ->setattr implementation without a proper
335 * implementation of size changes. 346 * implementation of size changes.
336 * 347 *
337 * It can either be used for in-memory filesystems or special files 348 * It can either be used for in-memory filesystems or special files
338 * on simple regular filesystems. Anything that needs to change on-disk 349 * on simple regular filesystems. Anything that needs to change on-disk
339 * or wire state on size changes needs its own setattr method. 350 * or wire state on size changes needs its own setattr method.
340 */ 351 */
341 int simple_setattr(struct dentry *dentry, struct iattr *iattr) 352 int simple_setattr(struct dentry *dentry, struct iattr *iattr)
342 { 353 {
343 struct inode *inode = dentry->d_inode; 354 struct inode *inode = dentry->d_inode;
344 int error; 355 int error;
345 356
346 WARN_ON_ONCE(inode->i_op->truncate); 357 WARN_ON_ONCE(inode->i_op->truncate);
347 358
348 error = inode_change_ok(inode, iattr); 359 error = inode_change_ok(inode, iattr);
349 if (error) 360 if (error)
350 return error; 361 return error;
351 362
352 if (iattr->ia_valid & ATTR_SIZE) 363 if (iattr->ia_valid & ATTR_SIZE)
353 truncate_setsize(inode, iattr->ia_size); 364 truncate_setsize(inode, iattr->ia_size);
354 setattr_copy(inode, iattr); 365 setattr_copy(inode, iattr);
355 mark_inode_dirty(inode); 366 mark_inode_dirty(inode);
356 return 0; 367 return 0;
357 } 368 }
358 EXPORT_SYMBOL(simple_setattr); 369 EXPORT_SYMBOL(simple_setattr);
359 370
360 int simple_readpage(struct file *file, struct page *page) 371 int simple_readpage(struct file *file, struct page *page)
361 { 372 {
362 clear_highpage(page); 373 clear_highpage(page);
363 flush_dcache_page(page); 374 flush_dcache_page(page);
364 SetPageUptodate(page); 375 SetPageUptodate(page);
365 unlock_page(page); 376 unlock_page(page);
366 return 0; 377 return 0;
367 } 378 }
368 379
369 int simple_write_begin(struct file *file, struct address_space *mapping, 380 int simple_write_begin(struct file *file, struct address_space *mapping,
370 loff_t pos, unsigned len, unsigned flags, 381 loff_t pos, unsigned len, unsigned flags,
371 struct page **pagep, void **fsdata) 382 struct page **pagep, void **fsdata)
372 { 383 {
373 struct page *page; 384 struct page *page;
374 pgoff_t index; 385 pgoff_t index;
375 386
376 index = pos >> PAGE_CACHE_SHIFT; 387 index = pos >> PAGE_CACHE_SHIFT;
377 388
378 page = grab_cache_page_write_begin(mapping, index, flags); 389 page = grab_cache_page_write_begin(mapping, index, flags);
379 if (!page) 390 if (!page)
380 return -ENOMEM; 391 return -ENOMEM;
381 392
382 *pagep = page; 393 *pagep = page;
383 394
384 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { 395 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
385 unsigned from = pos & (PAGE_CACHE_SIZE - 1); 396 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
386 397
387 zero_user_segments(page, 0, from, from + len, PAGE_CACHE_SIZE); 398 zero_user_segments(page, 0, from, from + len, PAGE_CACHE_SIZE);
388 } 399 }
389 return 0; 400 return 0;
390 } 401 }
391 402
392 /** 403 /**
393 * simple_write_end - .write_end helper for non-block-device FSes 404 * simple_write_end - .write_end helper for non-block-device FSes
394 * @available: See .write_end of address_space_operations 405 * @available: See .write_end of address_space_operations
395 * @file: " 406 * @file: "
396 * @mapping: " 407 * @mapping: "
397 * @pos: " 408 * @pos: "
398 * @len: " 409 * @len: "
399 * @copied: " 410 * @copied: "
400 * @page: " 411 * @page: "
401 * @fsdata: " 412 * @fsdata: "
402 * 413 *
403 * simple_write_end does the minimum needed for updating a page after writing is 414 * simple_write_end does the minimum needed for updating a page after writing is
404 * done. It has the same API signature as the .write_end of 415 * done. It has the same API signature as the .write_end of
405 * address_space_operations vector. So it can just be set onto .write_end for 416 * address_space_operations vector. So it can just be set onto .write_end for
406 * FSes that don't need any other processing. i_mutex is assumed to be held. 417 * FSes that don't need any other processing. i_mutex is assumed to be held.
407 * Block based filesystems should use generic_write_end(). 418 * Block based filesystems should use generic_write_end().
408 * NOTE: Even though i_size might get updated by this function, mark_inode_dirty 419 * NOTE: Even though i_size might get updated by this function, mark_inode_dirty
409 * is not called, so a filesystem that actually does store data in .write_inode 420 * is not called, so a filesystem that actually does store data in .write_inode
410 * should extend on what's done here with a call to mark_inode_dirty() in the 421 * should extend on what's done here with a call to mark_inode_dirty() in the
411 * case that i_size has changed. 422 * case that i_size has changed.
412 */ 423 */
413 int simple_write_end(struct file *file, struct address_space *mapping, 424 int simple_write_end(struct file *file, struct address_space *mapping,
414 loff_t pos, unsigned len, unsigned copied, 425 loff_t pos, unsigned len, unsigned copied,
415 struct page *page, void *fsdata) 426 struct page *page, void *fsdata)
416 { 427 {
417 struct inode *inode = page->mapping->host; 428 struct inode *inode = page->mapping->host;
418 loff_t last_pos = pos + copied; 429 loff_t last_pos = pos + copied;
419 430
420 /* zero the stale part of the page if we did a short copy */ 431 /* zero the stale part of the page if we did a short copy */
421 if (copied < len) { 432 if (copied < len) {
422 unsigned from = pos & (PAGE_CACHE_SIZE - 1); 433 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
423 434
424 zero_user(page, from + copied, len - copied); 435 zero_user(page, from + copied, len - copied);
425 } 436 }
426 437
427 if (!PageUptodate(page)) 438 if (!PageUptodate(page))
428 SetPageUptodate(page); 439 SetPageUptodate(page);
429 /* 440 /*
430 * No need to use i_size_read() here, the i_size 441 * No need to use i_size_read() here, the i_size
431 * cannot change under us because we hold the i_mutex. 442 * cannot change under us because we hold the i_mutex.
432 */ 443 */
433 if (last_pos > inode->i_size) 444 if (last_pos > inode->i_size)
434 i_size_write(inode, last_pos); 445 i_size_write(inode, last_pos);
435 446
436 set_page_dirty(page); 447 set_page_dirty(page);
437 unlock_page(page); 448 unlock_page(page);
438 page_cache_release(page); 449 page_cache_release(page);
439 450
440 return copied; 451 return copied;
441 } 452 }
442 453
443 /* 454 /*
444 * the inodes created here are not hashed. If you use iunique to generate 455 * the inodes created here are not hashed. If you use iunique to generate
445 * unique inode values later for this filesystem, then you must take care 456 * unique inode values later for this filesystem, then you must take care
446 * to pass it an appropriate max_reserved value to avoid collisions. 457 * to pass it an appropriate max_reserved value to avoid collisions.
447 */ 458 */
448 int simple_fill_super(struct super_block *s, unsigned long magic, 459 int simple_fill_super(struct super_block *s, unsigned long magic,
449 struct tree_descr *files) 460 struct tree_descr *files)
450 { 461 {
451 struct inode *inode; 462 struct inode *inode;
452 struct dentry *root; 463 struct dentry *root;
453 struct dentry *dentry; 464 struct dentry *dentry;
454 int i; 465 int i;
455 466
456 s->s_blocksize = PAGE_CACHE_SIZE; 467 s->s_blocksize = PAGE_CACHE_SIZE;
457 s->s_blocksize_bits = PAGE_CACHE_SHIFT; 468 s->s_blocksize_bits = PAGE_CACHE_SHIFT;
458 s->s_magic = magic; 469 s->s_magic = magic;
459 s->s_op = &simple_super_operations; 470 s->s_op = &simple_super_operations;
460 s->s_time_gran = 1; 471 s->s_time_gran = 1;
461 472
462 inode = new_inode(s); 473 inode = new_inode(s);
463 if (!inode) 474 if (!inode)
464 return -ENOMEM; 475 return -ENOMEM;
465 /* 476 /*
466 * because the root inode is 1, the files array must not contain an 477 * because the root inode is 1, the files array must not contain an
467 * entry at index 1 478 * entry at index 1
468 */ 479 */
469 inode->i_ino = 1; 480 inode->i_ino = 1;
470 inode->i_mode = S_IFDIR | 0755; 481 inode->i_mode = S_IFDIR | 0755;
471 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 482 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
472 inode->i_op = &simple_dir_inode_operations; 483 inode->i_op = &simple_dir_inode_operations;
473 inode->i_fop = &simple_dir_operations; 484 inode->i_fop = &simple_dir_operations;
474 inode->i_nlink = 2; 485 inode->i_nlink = 2;
475 root = d_alloc_root(inode); 486 root = d_alloc_root(inode);
476 if (!root) { 487 if (!root) {
477 iput(inode); 488 iput(inode);
478 return -ENOMEM; 489 return -ENOMEM;
479 } 490 }
480 for (i = 0; !files->name || files->name[0]; i++, files++) { 491 for (i = 0; !files->name || files->name[0]; i++, files++) {
481 if (!files->name) 492 if (!files->name)
482 continue; 493 continue;
483 494
484 /* warn if it tries to conflict with the root inode */ 495 /* warn if it tries to conflict with the root inode */
485 if (unlikely(i == 1)) 496 if (unlikely(i == 1))
486 printk(KERN_WARNING "%s: %s passed in a files array" 497 printk(KERN_WARNING "%s: %s passed in a files array"
487 "with an index of 1!\n", __func__, 498 "with an index of 1!\n", __func__,
488 s->s_type->name); 499 s->s_type->name);
489 500
490 dentry = d_alloc_name(root, files->name); 501 dentry = d_alloc_name(root, files->name);
491 if (!dentry) 502 if (!dentry)
492 goto out; 503 goto out;
493 inode = new_inode(s); 504 inode = new_inode(s);
494 if (!inode) 505 if (!inode)
495 goto out; 506 goto out;
496 inode->i_mode = S_IFREG | files->mode; 507 inode->i_mode = S_IFREG | files->mode;
497 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 508 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
498 inode->i_fop = files->ops; 509 inode->i_fop = files->ops;
499 inode->i_ino = i; 510 inode->i_ino = i;
500 d_add(dentry, inode); 511 d_add(dentry, inode);
501 } 512 }
502 s->s_root = root; 513 s->s_root = root;
503 return 0; 514 return 0;
504 out: 515 out:
505 d_genocide(root); 516 d_genocide(root);
506 dput(root); 517 dput(root);
507 return -ENOMEM; 518 return -ENOMEM;
508 } 519 }
509 520
510 static DEFINE_SPINLOCK(pin_fs_lock); 521 static DEFINE_SPINLOCK(pin_fs_lock);
511 522
512 int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count) 523 int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count)
513 { 524 {
514 struct vfsmount *mnt = NULL; 525 struct vfsmount *mnt = NULL;
515 spin_lock(&pin_fs_lock); 526 spin_lock(&pin_fs_lock);
516 if (unlikely(!*mount)) { 527 if (unlikely(!*mount)) {
517 spin_unlock(&pin_fs_lock); 528 spin_unlock(&pin_fs_lock);
518 mnt = vfs_kern_mount(type, 0, type->name, NULL); 529 mnt = vfs_kern_mount(type, 0, type->name, NULL);
519 if (IS_ERR(mnt)) 530 if (IS_ERR(mnt))
520 return PTR_ERR(mnt); 531 return PTR_ERR(mnt);
521 spin_lock(&pin_fs_lock); 532 spin_lock(&pin_fs_lock);
522 if (!*mount) 533 if (!*mount)
523 *mount = mnt; 534 *mount = mnt;
524 } 535 }
525 mntget(*mount); 536 mntget(*mount);
526 ++*count; 537 ++*count;
527 spin_unlock(&pin_fs_lock); 538 spin_unlock(&pin_fs_lock);
528 mntput(mnt); 539 mntput(mnt);
529 return 0; 540 return 0;
530 } 541 }
531 542
532 void simple_release_fs(struct vfsmount **mount, int *count) 543 void simple_release_fs(struct vfsmount **mount, int *count)
533 { 544 {
534 struct vfsmount *mnt; 545 struct vfsmount *mnt;
535 spin_lock(&pin_fs_lock); 546 spin_lock(&pin_fs_lock);
536 mnt = *mount; 547 mnt = *mount;
537 if (!--*count) 548 if (!--*count)
538 *mount = NULL; 549 *mount = NULL;
539 spin_unlock(&pin_fs_lock); 550 spin_unlock(&pin_fs_lock);
540 mntput(mnt); 551 mntput(mnt);
541 } 552 }
542 553
543 /** 554 /**
544 * simple_read_from_buffer - copy data from the buffer to user space 555 * simple_read_from_buffer - copy data from the buffer to user space
545 * @to: the user space buffer to read to 556 * @to: the user space buffer to read to
546 * @count: the maximum number of bytes to read 557 * @count: the maximum number of bytes to read
547 * @ppos: the current position in the buffer 558 * @ppos: the current position in the buffer
548 * @from: the buffer to read from 559 * @from: the buffer to read from
549 * @available: the size of the buffer 560 * @available: the size of the buffer
550 * 561 *
551 * The simple_read_from_buffer() function reads up to @count bytes from the 562 * The simple_read_from_buffer() function reads up to @count bytes from the
552 * buffer @from at offset @ppos into the user space address starting at @to. 563 * buffer @from at offset @ppos into the user space address starting at @to.
553 * 564 *
554 * On success, the number of bytes read is returned and the offset @ppos is 565 * On success, the number of bytes read is returned and the offset @ppos is
555 * advanced by this number, or negative value is returned on error. 566 * advanced by this number, or negative value is returned on error.
556 **/ 567 **/
557 ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, 568 ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
558 const void *from, size_t available) 569 const void *from, size_t available)
559 { 570 {
560 loff_t pos = *ppos; 571 loff_t pos = *ppos;
561 size_t ret; 572 size_t ret;
562 573
563 if (pos < 0) 574 if (pos < 0)
564 return -EINVAL; 575 return -EINVAL;
565 if (pos >= available || !count) 576 if (pos >= available || !count)
566 return 0; 577 return 0;
567 if (count > available - pos) 578 if (count > available - pos)
568 count = available - pos; 579 count = available - pos;
569 ret = copy_to_user(to, from + pos, count); 580 ret = copy_to_user(to, from + pos, count);
570 if (ret == count) 581 if (ret == count)
571 return -EFAULT; 582 return -EFAULT;
572 count -= ret; 583 count -= ret;
573 *ppos = pos + count; 584 *ppos = pos + count;
574 return count; 585 return count;
575 } 586 }
576 587
577 /** 588 /**
578 * simple_write_to_buffer - copy data from user space to the buffer 589 * simple_write_to_buffer - copy data from user space to the buffer
579 * @to: the buffer to write to 590 * @to: the buffer to write to
580 * @available: the size of the buffer 591 * @available: the size of the buffer
581 * @ppos: the current position in the buffer 592 * @ppos: the current position in the buffer
582 * @from: the user space buffer to read from 593 * @from: the user space buffer to read from
583 * @count: the maximum number of bytes to read 594 * @count: the maximum number of bytes to read
584 * 595 *
585 * The simple_write_to_buffer() function reads up to @count bytes from the user 596 * The simple_write_to_buffer() function reads up to @count bytes from the user
586 * space address starting at @from into the buffer @to at offset @ppos. 597 * space address starting at @from into the buffer @to at offset @ppos.
587 * 598 *
588 * On success, the number of bytes written is returned and the offset @ppos is 599 * On success, the number of bytes written is returned and the offset @ppos is
589 * advanced by this number, or negative value is returned on error. 600 * advanced by this number, or negative value is returned on error.
590 **/ 601 **/
591 ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, 602 ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
592 const void __user *from, size_t count) 603 const void __user *from, size_t count)
593 { 604 {
594 loff_t pos = *ppos; 605 loff_t pos = *ppos;
595 size_t res; 606 size_t res;
596 607
597 if (pos < 0) 608 if (pos < 0)
598 return -EINVAL; 609 return -EINVAL;
599 if (pos >= available || !count) 610 if (pos >= available || !count)
600 return 0; 611 return 0;
601 if (count > available - pos) 612 if (count > available - pos)
602 count = available - pos; 613 count = available - pos;
603 res = copy_from_user(to + pos, from, count); 614 res = copy_from_user(to + pos, from, count);
604 if (res == count) 615 if (res == count)
605 return -EFAULT; 616 return -EFAULT;
606 count -= res; 617 count -= res;
607 *ppos = pos + count; 618 *ppos = pos + count;
608 return count; 619 return count;
609 } 620 }
610 621
611 /** 622 /**
612 * memory_read_from_buffer - copy data from the buffer 623 * memory_read_from_buffer - copy data from the buffer
613 * @to: the kernel space buffer to read to 624 * @to: the kernel space buffer to read to
614 * @count: the maximum number of bytes to read 625 * @count: the maximum number of bytes to read
615 * @ppos: the current position in the buffer 626 * @ppos: the current position in the buffer
616 * @from: the buffer to read from 627 * @from: the buffer to read from
617 * @available: the size of the buffer 628 * @available: the size of the buffer
618 * 629 *
619 * The memory_read_from_buffer() function reads up to @count bytes from the 630 * The memory_read_from_buffer() function reads up to @count bytes from the
620 * buffer @from at offset @ppos into the kernel space address starting at @to. 631 * buffer @from at offset @ppos into the kernel space address starting at @to.
621 * 632 *
622 * On success, the number of bytes read is returned and the offset @ppos is 633 * On success, the number of bytes read is returned and the offset @ppos is
623 * advanced by this number, or negative value is returned on error. 634 * advanced by this number, or negative value is returned on error.
624 **/ 635 **/
625 ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, 636 ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
626 const void *from, size_t available) 637 const void *from, size_t available)
627 { 638 {
628 loff_t pos = *ppos; 639 loff_t pos = *ppos;
629 640
630 if (pos < 0) 641 if (pos < 0)
631 return -EINVAL; 642 return -EINVAL;
632 if (pos >= available) 643 if (pos >= available)
633 return 0; 644 return 0;
634 if (count > available - pos) 645 if (count > available - pos)
635 count = available - pos; 646 count = available - pos;
636 memcpy(to, from + pos, count); 647 memcpy(to, from + pos, count);
637 *ppos = pos + count; 648 *ppos = pos + count;
638 649
639 return count; 650 return count;
640 } 651 }
641 652
642 /* 653 /*
643 * Transaction based IO. 654 * Transaction based IO.
644 * The file expects a single write which triggers the transaction, and then 655 * The file expects a single write which triggers the transaction, and then
645 * possibly a read which collects the result - which is stored in a 656 * possibly a read which collects the result - which is stored in a
646 * file-local buffer. 657 * file-local buffer.
647 */ 658 */
648 659
649 void simple_transaction_set(struct file *file, size_t n) 660 void simple_transaction_set(struct file *file, size_t n)
650 { 661 {
651 struct simple_transaction_argresp *ar = file->private_data; 662 struct simple_transaction_argresp *ar = file->private_data;
652 663
653 BUG_ON(n > SIMPLE_TRANSACTION_LIMIT); 664 BUG_ON(n > SIMPLE_TRANSACTION_LIMIT);
654 665
655 /* 666 /*
656 * The barrier ensures that ar->size will really remain zero until 667 * The barrier ensures that ar->size will really remain zero until
657 * ar->data is ready for reading. 668 * ar->data is ready for reading.
658 */ 669 */
659 smp_mb(); 670 smp_mb();
660 ar->size = n; 671 ar->size = n;
661 } 672 }
662 673
663 char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) 674 char *simple_transaction_get(struct file *file, const char __user *buf, size_t size)
664 { 675 {
665 struct simple_transaction_argresp *ar; 676 struct simple_transaction_argresp *ar;
666 static DEFINE_SPINLOCK(simple_transaction_lock); 677 static DEFINE_SPINLOCK(simple_transaction_lock);
667 678
668 if (size > SIMPLE_TRANSACTION_LIMIT - 1) 679 if (size > SIMPLE_TRANSACTION_LIMIT - 1)
669 return ERR_PTR(-EFBIG); 680 return ERR_PTR(-EFBIG);
670 681
671 ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL); 682 ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL);
672 if (!ar) 683 if (!ar)
673 return ERR_PTR(-ENOMEM); 684 return ERR_PTR(-ENOMEM);
674 685
675 spin_lock(&simple_transaction_lock); 686 spin_lock(&simple_transaction_lock);
676 687
677 /* only one write allowed per open */ 688 /* only one write allowed per open */
678 if (file->private_data) { 689 if (file->private_data) {
679 spin_unlock(&simple_transaction_lock); 690 spin_unlock(&simple_transaction_lock);
680 free_page((unsigned long)ar); 691 free_page((unsigned long)ar);
681 return ERR_PTR(-EBUSY); 692 return ERR_PTR(-EBUSY);
682 } 693 }
683 694
684 file->private_data = ar; 695 file->private_data = ar;
685 696
686 spin_unlock(&simple_transaction_lock); 697 spin_unlock(&simple_transaction_lock);
687 698
688 if (copy_from_user(ar->data, buf, size)) 699 if (copy_from_user(ar->data, buf, size))
689 return ERR_PTR(-EFAULT); 700 return ERR_PTR(-EFAULT);
690 701
691 return ar->data; 702 return ar->data;
692 } 703 }
693 704
694 ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) 705 ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
695 { 706 {
696 struct simple_transaction_argresp *ar = file->private_data; 707 struct simple_transaction_argresp *ar = file->private_data;
697 708
698 if (!ar) 709 if (!ar)
699 return 0; 710 return 0;
700 return simple_read_from_buffer(buf, size, pos, ar->data, ar->size); 711 return simple_read_from_buffer(buf, size, pos, ar->data, ar->size);
701 } 712 }
702 713
703 int simple_transaction_release(struct inode *inode, struct file *file) 714 int simple_transaction_release(struct inode *inode, struct file *file)
704 { 715 {
705 free_page((unsigned long)file->private_data); 716 free_page((unsigned long)file->private_data);
706 return 0; 717 return 0;
707 } 718 }
708 719
709 /* Simple attribute files */ 720 /* Simple attribute files */
710 721
711 struct simple_attr { 722 struct simple_attr {
712 int (*get)(void *, u64 *); 723 int (*get)(void *, u64 *);
713 int (*set)(void *, u64); 724 int (*set)(void *, u64);
714 char get_buf[24]; /* enough to store a u64 and "\n\0" */ 725 char get_buf[24]; /* enough to store a u64 and "\n\0" */
715 char set_buf[24]; 726 char set_buf[24];
716 void *data; 727 void *data;
717 const char *fmt; /* format for read operation */ 728 const char *fmt; /* format for read operation */
718 struct mutex mutex; /* protects access to these buffers */ 729 struct mutex mutex; /* protects access to these buffers */
719 }; 730 };
720 731
721 /* simple_attr_open is called by an actual attribute open file operation 732 /* simple_attr_open is called by an actual attribute open file operation
722 * to set the attribute specific access operations. */ 733 * to set the attribute specific access operations. */
723 int simple_attr_open(struct inode *inode, struct file *file, 734 int simple_attr_open(struct inode *inode, struct file *file,
724 int (*get)(void *, u64 *), int (*set)(void *, u64), 735 int (*get)(void *, u64 *), int (*set)(void *, u64),
725 const char *fmt) 736 const char *fmt)
726 { 737 {
727 struct simple_attr *attr; 738 struct simple_attr *attr;
728 739
729 attr = kmalloc(sizeof(*attr), GFP_KERNEL); 740 attr = kmalloc(sizeof(*attr), GFP_KERNEL);
730 if (!attr) 741 if (!attr)
731 return -ENOMEM; 742 return -ENOMEM;
732 743
733 attr->get = get; 744 attr->get = get;
734 attr->set = set; 745 attr->set = set;
735 attr->data = inode->i_private; 746 attr->data = inode->i_private;
736 attr->fmt = fmt; 747 attr->fmt = fmt;
737 mutex_init(&attr->mutex); 748 mutex_init(&attr->mutex);
738 749
739 file->private_data = attr; 750 file->private_data = attr;
740 751
741 return nonseekable_open(inode, file); 752 return nonseekable_open(inode, file);
742 } 753 }
743 754
744 int simple_attr_release(struct inode *inode, struct file *file) 755 int simple_attr_release(struct inode *inode, struct file *file)
745 { 756 {
746 kfree(file->private_data); 757 kfree(file->private_data);
747 return 0; 758 return 0;
748 } 759 }
749 760
750 /* read from the buffer that is filled with the get function */ 761 /* read from the buffer that is filled with the get function */
751 ssize_t simple_attr_read(struct file *file, char __user *buf, 762 ssize_t simple_attr_read(struct file *file, char __user *buf,
752 size_t len, loff_t *ppos) 763 size_t len, loff_t *ppos)
753 { 764 {
754 struct simple_attr *attr; 765 struct simple_attr *attr;
755 size_t size; 766 size_t size;
756 ssize_t ret; 767 ssize_t ret;
757 768
758 attr = file->private_data; 769 attr = file->private_data;
759 770
760 if (!attr->get) 771 if (!attr->get)
761 return -EACCES; 772 return -EACCES;
762 773
763 ret = mutex_lock_interruptible(&attr->mutex); 774 ret = mutex_lock_interruptible(&attr->mutex);
764 if (ret) 775 if (ret)
765 return ret; 776 return ret;
766 777
767 if (*ppos) { /* continued read */ 778 if (*ppos) { /* continued read */
768 size = strlen(attr->get_buf); 779 size = strlen(attr->get_buf);
769 } else { /* first read */ 780 } else { /* first read */
770 u64 val; 781 u64 val;
771 ret = attr->get(attr->data, &val); 782 ret = attr->get(attr->data, &val);
772 if (ret) 783 if (ret)
773 goto out; 784 goto out;
774 785
775 size = scnprintf(attr->get_buf, sizeof(attr->get_buf), 786 size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
776 attr->fmt, (unsigned long long)val); 787 attr->fmt, (unsigned long long)val);
777 } 788 }
778 789
779 ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size); 790 ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
780 out: 791 out:
781 mutex_unlock(&attr->mutex); 792 mutex_unlock(&attr->mutex);
782 return ret; 793 return ret;
783 } 794 }
784 795
785 /* interpret the buffer as a number to call the set function with */ 796 /* interpret the buffer as a number to call the set function with */
786 ssize_t simple_attr_write(struct file *file, const char __user *buf, 797 ssize_t simple_attr_write(struct file *file, const char __user *buf,
787 size_t len, loff_t *ppos) 798 size_t len, loff_t *ppos)
788 { 799 {
789 struct simple_attr *attr; 800 struct simple_attr *attr;
790 u64 val; 801 u64 val;
791 size_t size; 802 size_t size;
792 ssize_t ret; 803 ssize_t ret;
793 804
794 attr = file->private_data; 805 attr = file->private_data;
795 if (!attr->set) 806 if (!attr->set)
796 return -EACCES; 807 return -EACCES;
797 808
798 ret = mutex_lock_interruptible(&attr->mutex); 809 ret = mutex_lock_interruptible(&attr->mutex);
799 if (ret) 810 if (ret)
800 return ret; 811 return ret;
801 812
802 ret = -EFAULT; 813 ret = -EFAULT;
803 size = min(sizeof(attr->set_buf) - 1, len); 814 size = min(sizeof(attr->set_buf) - 1, len);
804 if (copy_from_user(attr->set_buf, buf, size)) 815 if (copy_from_user(attr->set_buf, buf, size))
805 goto out; 816 goto out;
806 817
807 attr->set_buf[size] = '\0'; 818 attr->set_buf[size] = '\0';
808 val = simple_strtol(attr->set_buf, NULL, 0); 819 val = simple_strtol(attr->set_buf, NULL, 0);
809 ret = attr->set(attr->data, val); 820 ret = attr->set(attr->data, val);
810 if (ret == 0) 821 if (ret == 0)
811 ret = len; /* on success, claim we got the whole input */ 822 ret = len; /* on success, claim we got the whole input */
812 out: 823 out:
813 mutex_unlock(&attr->mutex); 824 mutex_unlock(&attr->mutex);
814 return ret; 825 return ret;
815 } 826 }
816 827
817 /** 828 /**
818 * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation 829 * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
819 * @sb: filesystem to do the file handle conversion on 830 * @sb: filesystem to do the file handle conversion on
820 * @fid: file handle to convert 831 * @fid: file handle to convert
821 * @fh_len: length of the file handle in bytes 832 * @fh_len: length of the file handle in bytes
822 * @fh_type: type of file handle 833 * @fh_type: type of file handle
823 * @get_inode: filesystem callback to retrieve inode 834 * @get_inode: filesystem callback to retrieve inode
824 * 835 *
825 * This function decodes @fid as long as it has one of the well-known 836 * This function decodes @fid as long as it has one of the well-known
826 * Linux filehandle types and calls @get_inode on it to retrieve the 837 * Linux filehandle types and calls @get_inode on it to retrieve the
827 * inode for the object specified in the file handle. 838 * inode for the object specified in the file handle.
828 */ 839 */
829 struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, 840 struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid,
830 int fh_len, int fh_type, struct inode *(*get_inode) 841 int fh_len, int fh_type, struct inode *(*get_inode)
831 (struct super_block *sb, u64 ino, u32 gen)) 842 (struct super_block *sb, u64 ino, u32 gen))
832 { 843 {
833 struct inode *inode = NULL; 844 struct inode *inode = NULL;
834 845
835 if (fh_len < 2) 846 if (fh_len < 2)
836 return NULL; 847 return NULL;
837 848
838 switch (fh_type) { 849 switch (fh_type) {
839 case FILEID_INO32_GEN: 850 case FILEID_INO32_GEN:
840 case FILEID_INO32_GEN_PARENT: 851 case FILEID_INO32_GEN_PARENT:
841 inode = get_inode(sb, fid->i32.ino, fid->i32.gen); 852 inode = get_inode(sb, fid->i32.ino, fid->i32.gen);
842 break; 853 break;
843 } 854 }
844 855
845 return d_obtain_alias(inode); 856 return d_obtain_alias(inode);
846 } 857 }
847 EXPORT_SYMBOL_GPL(generic_fh_to_dentry); 858 EXPORT_SYMBOL_GPL(generic_fh_to_dentry);
848 859
849 /** 860 /**
850 * generic_fh_to_dentry - generic helper for the fh_to_parent export operation 861 * generic_fh_to_dentry - generic helper for the fh_to_parent export operation
851 * @sb: filesystem to do the file handle conversion on 862 * @sb: filesystem to do the file handle conversion on
852 * @fid: file handle to convert 863 * @fid: file handle to convert
853 * @fh_len: length of the file handle in bytes 864 * @fh_len: length of the file handle in bytes
854 * @fh_type: type of file handle 865 * @fh_type: type of file handle
855 * @get_inode: filesystem callback to retrieve inode 866 * @get_inode: filesystem callback to retrieve inode
856 * 867 *
857 * This function decodes @fid as long as it has one of the well-known 868 * This function decodes @fid as long as it has one of the well-known
858 * Linux filehandle types and calls @get_inode on it to retrieve the 869 * Linux filehandle types and calls @get_inode on it to retrieve the
859 * inode for the _parent_ object specified in the file handle if it 870 * inode for the _parent_ object specified in the file handle if it
860 * is specified in the file handle, or NULL otherwise. 871 * is specified in the file handle, or NULL otherwise.
861 */ 872 */
862 struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, 873 struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
863 int fh_len, int fh_type, struct inode *(*get_inode) 874 int fh_len, int fh_type, struct inode *(*get_inode)
864 (struct super_block *sb, u64 ino, u32 gen)) 875 (struct super_block *sb, u64 ino, u32 gen))
865 { 876 {
866 struct inode *inode = NULL; 877 struct inode *inode = NULL;
867 878
868 if (fh_len <= 2) 879 if (fh_len <= 2)
869 return NULL; 880 return NULL;
870 881
871 switch (fh_type) { 882 switch (fh_type) {
872 case FILEID_INO32_GEN_PARENT: 883 case FILEID_INO32_GEN_PARENT:
873 inode = get_inode(sb, fid->i32.parent_ino, 884 inode = get_inode(sb, fid->i32.parent_ino,
874 (fh_len > 3 ? fid->i32.parent_gen : 0)); 885 (fh_len > 3 ? fid->i32.parent_gen : 0));
875 break; 886 break;
876 } 887 }
877 888
878 return d_obtain_alias(inode); 889 return d_obtain_alias(inode);
879 } 890 }
880 EXPORT_SYMBOL_GPL(generic_fh_to_parent); 891 EXPORT_SYMBOL_GPL(generic_fh_to_parent);
881 892
882 /** 893 /**
883 * generic_file_fsync - generic fsync implementation for simple filesystems 894 * generic_file_fsync - generic fsync implementation for simple filesystems
884 * @file: file to synchronize 895 * @file: file to synchronize
885 * @datasync: only synchronize essential metadata if true 896 * @datasync: only synchronize essential metadata if true
886 * 897 *
887 * This is a generic implementation of the fsync method for simple 898 * This is a generic implementation of the fsync method for simple
888 * filesystems which track all non-inode metadata in the buffers list 899 * filesystems which track all non-inode metadata in the buffers list
889 * hanging off the address_space structure. 900 * hanging off the address_space structure.
890 */ 901 */
891 int generic_file_fsync(struct file *file, int datasync) 902 int generic_file_fsync(struct file *file, int datasync)
892 { 903 {
893 struct inode *inode = file->f_mapping->host; 904 struct inode *inode = file->f_mapping->host;
894 int err; 905 int err;
895 int ret; 906 int ret;
896 907
897 ret = sync_mapping_buffers(inode->i_mapping); 908 ret = sync_mapping_buffers(inode->i_mapping);
898 if (!(inode->i_state & I_DIRTY)) 909 if (!(inode->i_state & I_DIRTY))
899 return ret; 910 return ret;
900 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 911 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
901 return ret; 912 return ret;
902 913
903 err = sync_inode_metadata(inode, 1); 914 err = sync_inode_metadata(inode, 1);
904 if (ret == 0) 915 if (ret == 0)
905 ret = err; 916 ret = err;
906 return ret; 917 return ret;
907 } 918 }
908 EXPORT_SYMBOL(generic_file_fsync); 919 EXPORT_SYMBOL(generic_file_fsync);
909 920
910 /** 921 /**
911 * generic_check_addressable - Check addressability of file system 922 * generic_check_addressable - Check addressability of file system
912 * @blocksize_bits: log of file system block size 923 * @blocksize_bits: log of file system block size
913 * @num_blocks: number of blocks in file system 924 * @num_blocks: number of blocks in file system
914 * 925 *
915 * Determine whether a file system with @num_blocks blocks (and a 926 * Determine whether a file system with @num_blocks blocks (and a
916 * block size of 2**@blocksize_bits) is addressable by the sector_t 927 * block size of 2**@blocksize_bits) is addressable by the sector_t
917 * and page cache of the system. Return 0 if so and -EFBIG otherwise. 928 * and page cache of the system. Return 0 if so and -EFBIG otherwise.
918 */ 929 */
919 int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks) 930 int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks)
920 { 931 {
921 u64 last_fs_block = num_blocks - 1; 932 u64 last_fs_block = num_blocks - 1;
922 u64 last_fs_page = 933 u64 last_fs_page =
923 last_fs_block >> (PAGE_CACHE_SHIFT - blocksize_bits); 934 last_fs_block >> (PAGE_CACHE_SHIFT - blocksize_bits);
924 935
925 if (unlikely(num_blocks == 0)) 936 if (unlikely(num_blocks == 0))
926 return 0; 937 return 0;
927 938
928 if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT)) 939 if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT))
929 return -EINVAL; 940 return -EINVAL;
930 941
931 if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) || 942 if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||
932 (last_fs_page > (pgoff_t)(~0ULL))) { 943 (last_fs_page > (pgoff_t)(~0ULL))) {
933 return -EFBIG; 944 return -EFBIG;
934 } 945 }
935 return 0; 946 return 0;
936 } 947 }
937 EXPORT_SYMBOL(generic_check_addressable); 948 EXPORT_SYMBOL(generic_check_addressable);
938 949
939 /* 950 /*
940 * No-op implementation of ->fsync for in-memory filesystems. 951 * No-op implementation of ->fsync for in-memory filesystems.
941 */ 952 */
942 int noop_fsync(struct file *file, int datasync) 953 int noop_fsync(struct file *file, int datasync)
943 { 954 {
944 return 0; 955 return 0;
945 } 956 }
946 957
947 EXPORT_SYMBOL(dcache_dir_close); 958 EXPORT_SYMBOL(dcache_dir_close);
948 EXPORT_SYMBOL(dcache_dir_lseek); 959 EXPORT_SYMBOL(dcache_dir_lseek);
949 EXPORT_SYMBOL(dcache_dir_open); 960 EXPORT_SYMBOL(dcache_dir_open);
950 EXPORT_SYMBOL(dcache_readdir); 961 EXPORT_SYMBOL(dcache_readdir);
951 EXPORT_SYMBOL(generic_read_dir); 962 EXPORT_SYMBOL(generic_read_dir);
952 EXPORT_SYMBOL(mount_pseudo); 963 EXPORT_SYMBOL(mount_pseudo);
953 EXPORT_SYMBOL(simple_write_begin); 964 EXPORT_SYMBOL(simple_write_begin);
954 EXPORT_SYMBOL(simple_write_end); 965 EXPORT_SYMBOL(simple_write_end);
955 EXPORT_SYMBOL(simple_dir_inode_operations); 966 EXPORT_SYMBOL(simple_dir_inode_operations);
956 EXPORT_SYMBOL(simple_dir_operations); 967 EXPORT_SYMBOL(simple_dir_operations);
957 EXPORT_SYMBOL(simple_empty); 968 EXPORT_SYMBOL(simple_empty);
958 EXPORT_SYMBOL(simple_fill_super); 969 EXPORT_SYMBOL(simple_fill_super);
959 EXPORT_SYMBOL(simple_getattr); 970 EXPORT_SYMBOL(simple_getattr);
960 EXPORT_SYMBOL(simple_link); 971 EXPORT_SYMBOL(simple_link);
961 EXPORT_SYMBOL(simple_lookup); 972 EXPORT_SYMBOL(simple_lookup);
962 EXPORT_SYMBOL(simple_pin_fs); 973 EXPORT_SYMBOL(simple_pin_fs);
963 EXPORT_SYMBOL(simple_readpage); 974 EXPORT_SYMBOL(simple_readpage);
964 EXPORT_SYMBOL(simple_release_fs); 975 EXPORT_SYMBOL(simple_release_fs);
965 EXPORT_SYMBOL(simple_rename); 976 EXPORT_SYMBOL(simple_rename);
966 EXPORT_SYMBOL(simple_rmdir); 977 EXPORT_SYMBOL(simple_rmdir);
967 EXPORT_SYMBOL(simple_statfs); 978 EXPORT_SYMBOL(simple_statfs);
968 EXPORT_SYMBOL(noop_fsync); 979 EXPORT_SYMBOL(noop_fsync);
969 EXPORT_SYMBOL(simple_unlink); 980 EXPORT_SYMBOL(simple_unlink);
970 EXPORT_SYMBOL(simple_read_from_buffer); 981 EXPORT_SYMBOL(simple_read_from_buffer);
971 EXPORT_SYMBOL(simple_write_to_buffer); 982 EXPORT_SYMBOL(simple_write_to_buffer);
972 EXPORT_SYMBOL(memory_read_from_buffer); 983 EXPORT_SYMBOL(memory_read_from_buffer);
973 EXPORT_SYMBOL(simple_transaction_set); 984 EXPORT_SYMBOL(simple_transaction_set);
974 EXPORT_SYMBOL(simple_transaction_get); 985 EXPORT_SYMBOL(simple_transaction_get);
975 EXPORT_SYMBOL(simple_transaction_read); 986 EXPORT_SYMBOL(simple_transaction_read);
976 EXPORT_SYMBOL(simple_transaction_release); 987 EXPORT_SYMBOL(simple_transaction_release);
1 /* -*- mode: c; c-basic-offset: 8; -*- 1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0: 2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 * 3 *
4 * dcache.c 4 * dcache.c
5 * 5 *
6 * dentry cache handling code 6 * dentry cache handling code
7 * 7 *
8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
9 * 9 *
10 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public 11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either 12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version. 13 * version 2 of the License, or (at your option) any later version.
14 * 14 *
15 * This program is distributed in the hope that it will be useful, 15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details. 18 * General Public License for more details.
19 * 19 *
20 * You should have received a copy of the GNU General Public 20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the 21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA. 23 * Boston, MA 021110-1307, USA.
24 */ 24 */
25 25
26 #include <linux/fs.h> 26 #include <linux/fs.h>
27 #include <linux/types.h> 27 #include <linux/types.h>
28 #include <linux/slab.h> 28 #include <linux/slab.h>
29 #include <linux/namei.h> 29 #include <linux/namei.h>
30 30
31 #define MLOG_MASK_PREFIX ML_DCACHE 31 #define MLOG_MASK_PREFIX ML_DCACHE
32 #include <cluster/masklog.h> 32 #include <cluster/masklog.h>
33 33
34 #include "ocfs2.h" 34 #include "ocfs2.h"
35 35
36 #include "alloc.h" 36 #include "alloc.h"
37 #include "dcache.h" 37 #include "dcache.h"
38 #include "dlmglue.h" 38 #include "dlmglue.h"
39 #include "file.h" 39 #include "file.h"
40 #include "inode.h" 40 #include "inode.h"
41 #include "super.h" 41 #include "super.h"
42 42
43 void ocfs2_dentry_attach_gen(struct dentry *dentry) 43 void ocfs2_dentry_attach_gen(struct dentry *dentry)
44 { 44 {
45 unsigned long gen = 45 unsigned long gen =
46 OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen; 46 OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
47 BUG_ON(dentry->d_inode); 47 BUG_ON(dentry->d_inode);
48 dentry->d_fsdata = (void *)gen; 48 dentry->d_fsdata = (void *)gen;
49 } 49 }
50 50
51 51
52 static int ocfs2_dentry_revalidate(struct dentry *dentry, 52 static int ocfs2_dentry_revalidate(struct dentry *dentry,
53 struct nameidata *nd) 53 struct nameidata *nd)
54 { 54 {
55 struct inode *inode = dentry->d_inode; 55 struct inode *inode = dentry->d_inode;
56 int ret = 0; /* if all else fails, just return false */ 56 int ret = 0; /* if all else fails, just return false */
57 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 57 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
58 58
59 mlog_entry("(0x%p, '%.*s')\n", dentry, 59 mlog_entry("(0x%p, '%.*s')\n", dentry,
60 dentry->d_name.len, dentry->d_name.name); 60 dentry->d_name.len, dentry->d_name.name);
61 61
62 /* For a negative dentry - 62 /* For a negative dentry -
63 * check the generation number of the parent and compare with the 63 * check the generation number of the parent and compare with the
64 * one stored in the inode. 64 * one stored in the inode.
65 */ 65 */
66 if (inode == NULL) { 66 if (inode == NULL) {
67 unsigned long gen = (unsigned long) dentry->d_fsdata; 67 unsigned long gen = (unsigned long) dentry->d_fsdata;
68 unsigned long pgen = 68 unsigned long pgen =
69 OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen; 69 OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
70 mlog(0, "negative dentry: %.*s parent gen: %lu " 70 mlog(0, "negative dentry: %.*s parent gen: %lu "
71 "dentry gen: %lu\n", 71 "dentry gen: %lu\n",
72 dentry->d_name.len, dentry->d_name.name, pgen, gen); 72 dentry->d_name.len, dentry->d_name.name, pgen, gen);
73 if (gen != pgen) 73 if (gen != pgen)
74 goto bail; 74 goto bail;
75 goto valid; 75 goto valid;
76 } 76 }
77 77
78 BUG_ON(!osb); 78 BUG_ON(!osb);
79 79
80 if (inode == osb->root_inode || is_bad_inode(inode)) 80 if (inode == osb->root_inode || is_bad_inode(inode))
81 goto bail; 81 goto bail;
82 82
83 spin_lock(&OCFS2_I(inode)->ip_lock); 83 spin_lock(&OCFS2_I(inode)->ip_lock);
84 /* did we or someone else delete this inode? */ 84 /* did we or someone else delete this inode? */
85 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { 85 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
86 spin_unlock(&OCFS2_I(inode)->ip_lock); 86 spin_unlock(&OCFS2_I(inode)->ip_lock);
87 mlog(0, "inode (%llu) deleted, returning false\n", 87 mlog(0, "inode (%llu) deleted, returning false\n",
88 (unsigned long long)OCFS2_I(inode)->ip_blkno); 88 (unsigned long long)OCFS2_I(inode)->ip_blkno);
89 goto bail; 89 goto bail;
90 } 90 }
91 spin_unlock(&OCFS2_I(inode)->ip_lock); 91 spin_unlock(&OCFS2_I(inode)->ip_lock);
92 92
93 /* 93 /*
94 * We don't need a cluster lock to test this because once an 94 * We don't need a cluster lock to test this because once an
95 * inode nlink hits zero, it never goes back. 95 * inode nlink hits zero, it never goes back.
96 */ 96 */
97 if (inode->i_nlink == 0) { 97 if (inode->i_nlink == 0) {
98 mlog(0, "Inode %llu orphaned, returning false " 98 mlog(0, "Inode %llu orphaned, returning false "
99 "dir = %d\n", 99 "dir = %d\n",
100 (unsigned long long)OCFS2_I(inode)->ip_blkno, 100 (unsigned long long)OCFS2_I(inode)->ip_blkno,
101 S_ISDIR(inode->i_mode)); 101 S_ISDIR(inode->i_mode));
102 goto bail; 102 goto bail;
103 } 103 }
104 104
105 /* 105 /*
106 * If the last lookup failed to create dentry lock, let us 106 * If the last lookup failed to create dentry lock, let us
107 * redo it. 107 * redo it.
108 */ 108 */
109 if (!dentry->d_fsdata) { 109 if (!dentry->d_fsdata) {
110 mlog(0, "Inode %llu doesn't have dentry lock, " 110 mlog(0, "Inode %llu doesn't have dentry lock, "
111 "returning false\n", 111 "returning false\n",
112 (unsigned long long)OCFS2_I(inode)->ip_blkno); 112 (unsigned long long)OCFS2_I(inode)->ip_blkno);
113 goto bail; 113 goto bail;
114 } 114 }
115 115
116 valid: 116 valid:
117 ret = 1; 117 ret = 1;
118 118
119 bail: 119 bail:
120 mlog_exit(ret); 120 mlog_exit(ret);
121 121
122 return ret; 122 return ret;
123 } 123 }
124 124
125 static int ocfs2_match_dentry(struct dentry *dentry, 125 static int ocfs2_match_dentry(struct dentry *dentry,
126 u64 parent_blkno, 126 u64 parent_blkno,
127 int skip_unhashed) 127 int skip_unhashed)
128 { 128 {
129 struct inode *parent; 129 struct inode *parent;
130 130
131 /* 131 /*
132 * ocfs2_lookup() does a d_splice_alias() _before_ attaching 132 * ocfs2_lookup() does a d_splice_alias() _before_ attaching
133 * to the lock data, so we skip those here, otherwise 133 * to the lock data, so we skip those here, otherwise
134 * ocfs2_dentry_attach_lock() will get its original dentry 134 * ocfs2_dentry_attach_lock() will get its original dentry
135 * back. 135 * back.
136 */ 136 */
137 if (!dentry->d_fsdata) 137 if (!dentry->d_fsdata)
138 return 0; 138 return 0;
139 139
140 if (!dentry->d_parent) 140 if (!dentry->d_parent)
141 return 0; 141 return 0;
142 142
143 if (skip_unhashed && d_unhashed(dentry)) 143 if (skip_unhashed && d_unhashed(dentry))
144 return 0; 144 return 0;
145 145
146 parent = dentry->d_parent->d_inode; 146 parent = dentry->d_parent->d_inode;
147 /* Negative parent dentry? */ 147 /* Negative parent dentry? */
148 if (!parent) 148 if (!parent)
149 return 0; 149 return 0;
150 150
151 /* Name is in a different directory. */ 151 /* Name is in a different directory. */
152 if (OCFS2_I(parent)->ip_blkno != parent_blkno) 152 if (OCFS2_I(parent)->ip_blkno != parent_blkno)
153 return 0; 153 return 0;
154 154
155 return 1; 155 return 1;
156 } 156 }
157 157
158 /* 158 /*
159 * Walk the inode alias list, and find a dentry which has a given 159 * Walk the inode alias list, and find a dentry which has a given
160 * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it 160 * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
161 * is looking for a dentry_lock reference. The downconvert thread is 161 * is looking for a dentry_lock reference. The downconvert thread is
162 * looking to unhash aliases, so we allow it to skip any that already 162 * looking to unhash aliases, so we allow it to skip any that already
163 * have that property. 163 * have that property.
164 */ 164 */
165 struct dentry *ocfs2_find_local_alias(struct inode *inode, 165 struct dentry *ocfs2_find_local_alias(struct inode *inode,
166 u64 parent_blkno, 166 u64 parent_blkno,
167 int skip_unhashed) 167 int skip_unhashed)
168 { 168 {
169 struct list_head *p; 169 struct list_head *p;
170 struct dentry *dentry = NULL; 170 struct dentry *dentry = NULL;
171 171
172 spin_lock(&dcache_lock); 172 spin_lock(&dcache_lock);
173 173
174 list_for_each(p, &inode->i_dentry) { 174 list_for_each(p, &inode->i_dentry) {
175 dentry = list_entry(p, struct dentry, d_alias); 175 dentry = list_entry(p, struct dentry, d_alias);
176 176
177 spin_lock(&dentry->d_lock);
177 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { 178 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
178 mlog(0, "dentry found: %.*s\n", 179 mlog(0, "dentry found: %.*s\n",
179 dentry->d_name.len, dentry->d_name.name); 180 dentry->d_name.len, dentry->d_name.name);
180 181
181 dget_locked(dentry); 182 dget_locked_dlock(dentry);
183 spin_unlock(&dentry->d_lock);
182 break; 184 break;
183 } 185 }
186 spin_unlock(&dentry->d_lock);
184 187
185 dentry = NULL; 188 dentry = NULL;
186 } 189 }
187 190
188 spin_unlock(&dcache_lock); 191 spin_unlock(&dcache_lock);
189 192
190 return dentry; 193 return dentry;
191 } 194 }
192 195
193 DEFINE_SPINLOCK(dentry_attach_lock); 196 DEFINE_SPINLOCK(dentry_attach_lock);
194 197
195 /* 198 /*
196 * Attach this dentry to a cluster lock. 199 * Attach this dentry to a cluster lock.
197 * 200 *
198 * Dentry locks cover all links in a given directory to a particular 201 * Dentry locks cover all links in a given directory to a particular
199 * inode. We do this so that ocfs2 can build a lock name which all 202 * inode. We do this so that ocfs2 can build a lock name which all
200 * nodes in the cluster can agree on at all times. Shoving full names 203 * nodes in the cluster can agree on at all times. Shoving full names
201 * in the cluster lock won't work due to size restrictions. Covering 204 * in the cluster lock won't work due to size restrictions. Covering
202 * links inside of a directory is a good compromise because it still 205 * links inside of a directory is a good compromise because it still
203 * allows us to use the parent directory lock to synchronize 206 * allows us to use the parent directory lock to synchronize
204 * operations. 207 * operations.
205 * 208 *
206 * Call this function with the parent dir semaphore and the parent dir 209 * Call this function with the parent dir semaphore and the parent dir
207 * cluster lock held. 210 * cluster lock held.
208 * 211 *
209 * The dir semaphore will protect us from having to worry about 212 * The dir semaphore will protect us from having to worry about
210 * concurrent processes on our node trying to attach a lock at the 213 * concurrent processes on our node trying to attach a lock at the
211 * same time. 214 * same time.
212 * 215 *
213 * The dir cluster lock (held at either PR or EX mode) protects us 216 * The dir cluster lock (held at either PR or EX mode) protects us
214 * from unlink and rename on other nodes. 217 * from unlink and rename on other nodes.
215 * 218 *
216 * A dput() can happen asynchronously due to pruning, so we cover 219 * A dput() can happen asynchronously due to pruning, so we cover
217 * attaching and detaching the dentry lock with a 220 * attaching and detaching the dentry lock with a
218 * dentry_attach_lock. 221 * dentry_attach_lock.
219 * 222 *
220 * A node which has done lookup on a name retains a protected read 223 * A node which has done lookup on a name retains a protected read
221 * lock until final dput. If the user requests and unlink or rename, 224 * lock until final dput. If the user requests and unlink or rename,
222 * the protected read is upgraded to an exclusive lock. Other nodes 225 * the protected read is upgraded to an exclusive lock. Other nodes
223 * who have seen the dentry will then be informed that they need to 226 * who have seen the dentry will then be informed that they need to
224 * downgrade their lock, which will involve d_delete on the 227 * downgrade their lock, which will involve d_delete on the
225 * dentry. This happens in ocfs2_dentry_convert_worker(). 228 * dentry. This happens in ocfs2_dentry_convert_worker().
226 */ 229 */
227 int ocfs2_dentry_attach_lock(struct dentry *dentry, 230 int ocfs2_dentry_attach_lock(struct dentry *dentry,
228 struct inode *inode, 231 struct inode *inode,
229 u64 parent_blkno) 232 u64 parent_blkno)
230 { 233 {
231 int ret; 234 int ret;
232 struct dentry *alias; 235 struct dentry *alias;
233 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 236 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
234 237
235 mlog(0, "Attach \"%.*s\", parent %llu, fsdata: %p\n", 238 mlog(0, "Attach \"%.*s\", parent %llu, fsdata: %p\n",
236 dentry->d_name.len, dentry->d_name.name, 239 dentry->d_name.len, dentry->d_name.name,
237 (unsigned long long)parent_blkno, dl); 240 (unsigned long long)parent_blkno, dl);
238 241
239 /* 242 /*
240 * Negative dentry. We ignore these for now. 243 * Negative dentry. We ignore these for now.
241 * 244 *
242 * XXX: Could we can improve ocfs2_dentry_revalidate() by 245 * XXX: Could we can improve ocfs2_dentry_revalidate() by
243 * tracking these? 246 * tracking these?
244 */ 247 */
245 if (!inode) 248 if (!inode)
246 return 0; 249 return 0;
247 250
248 if (!dentry->d_inode && dentry->d_fsdata) { 251 if (!dentry->d_inode && dentry->d_fsdata) {
249 /* Converting a negative dentry to positive 252 /* Converting a negative dentry to positive
250 Clear dentry->d_fsdata */ 253 Clear dentry->d_fsdata */
251 dentry->d_fsdata = dl = NULL; 254 dentry->d_fsdata = dl = NULL;
252 } 255 }
253 256
254 if (dl) { 257 if (dl) {
255 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, 258 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
256 " \"%.*s\": old parent: %llu, new: %llu\n", 259 " \"%.*s\": old parent: %llu, new: %llu\n",
257 dentry->d_name.len, dentry->d_name.name, 260 dentry->d_name.len, dentry->d_name.name,
258 (unsigned long long)parent_blkno, 261 (unsigned long long)parent_blkno,
259 (unsigned long long)dl->dl_parent_blkno); 262 (unsigned long long)dl->dl_parent_blkno);
260 return 0; 263 return 0;
261 } 264 }
262 265
263 alias = ocfs2_find_local_alias(inode, parent_blkno, 0); 266 alias = ocfs2_find_local_alias(inode, parent_blkno, 0);
264 if (alias) { 267 if (alias) {
265 /* 268 /*
266 * Great, an alias exists, which means we must have a 269 * Great, an alias exists, which means we must have a
267 * dentry lock already. We can just grab the lock off 270 * dentry lock already. We can just grab the lock off
268 * the alias and add it to the list. 271 * the alias and add it to the list.
269 * 272 *
270 * We're depending here on the fact that this dentry 273 * We're depending here on the fact that this dentry
271 * was found and exists in the dcache and so must have 274 * was found and exists in the dcache and so must have
272 * a reference to the dentry_lock because we can't 275 * a reference to the dentry_lock because we can't
273 * race creates. Final dput() cannot happen on it 276 * race creates. Final dput() cannot happen on it
274 * since we have it pinned, so our reference is safe. 277 * since we have it pinned, so our reference is safe.
275 */ 278 */
276 dl = alias->d_fsdata; 279 dl = alias->d_fsdata;
277 mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n", 280 mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n",
278 (unsigned long long)parent_blkno, 281 (unsigned long long)parent_blkno,
279 (unsigned long long)OCFS2_I(inode)->ip_blkno); 282 (unsigned long long)OCFS2_I(inode)->ip_blkno);
280 283
281 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, 284 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
282 " \"%.*s\": old parent: %llu, new: %llu\n", 285 " \"%.*s\": old parent: %llu, new: %llu\n",
283 dentry->d_name.len, dentry->d_name.name, 286 dentry->d_name.len, dentry->d_name.name,
284 (unsigned long long)parent_blkno, 287 (unsigned long long)parent_blkno,
285 (unsigned long long)dl->dl_parent_blkno); 288 (unsigned long long)dl->dl_parent_blkno);
286 289
287 mlog(0, "Found: %s\n", dl->dl_lockres.l_name); 290 mlog(0, "Found: %s\n", dl->dl_lockres.l_name);
288 291
289 goto out_attach; 292 goto out_attach;
290 } 293 }
291 294
292 /* 295 /*
293 * There are no other aliases 296 * There are no other aliases
294 */ 297 */
295 dl = kmalloc(sizeof(*dl), GFP_NOFS); 298 dl = kmalloc(sizeof(*dl), GFP_NOFS);
296 if (!dl) { 299 if (!dl) {
297 ret = -ENOMEM; 300 ret = -ENOMEM;
298 mlog_errno(ret); 301 mlog_errno(ret);
299 return ret; 302 return ret;
300 } 303 }
301 304
302 dl->dl_count = 0; 305 dl->dl_count = 0;
303 /* 306 /*
304 * Does this have to happen below, for all attaches, in case 307 * Does this have to happen below, for all attaches, in case
305 * the struct inode gets blown away by the downconvert thread? 308 * the struct inode gets blown away by the downconvert thread?
306 */ 309 */
307 dl->dl_inode = igrab(inode); 310 dl->dl_inode = igrab(inode);
308 dl->dl_parent_blkno = parent_blkno; 311 dl->dl_parent_blkno = parent_blkno;
309 ocfs2_dentry_lock_res_init(dl, parent_blkno, inode); 312 ocfs2_dentry_lock_res_init(dl, parent_blkno, inode);
310 313
311 out_attach: 314 out_attach:
312 spin_lock(&dentry_attach_lock); 315 spin_lock(&dentry_attach_lock);
313 dentry->d_fsdata = dl; 316 dentry->d_fsdata = dl;
314 dl->dl_count++; 317 dl->dl_count++;
315 spin_unlock(&dentry_attach_lock); 318 spin_unlock(&dentry_attach_lock);
316 319
317 /* 320 /*
318 * This actually gets us our PRMODE level lock. From now on, 321 * This actually gets us our PRMODE level lock. From now on,
319 * we'll have a notification if one of these names is 322 * we'll have a notification if one of these names is
320 * destroyed on another node. 323 * destroyed on another node.
321 */ 324 */
322 ret = ocfs2_dentry_lock(dentry, 0); 325 ret = ocfs2_dentry_lock(dentry, 0);
323 if (!ret) 326 if (!ret)
324 ocfs2_dentry_unlock(dentry, 0); 327 ocfs2_dentry_unlock(dentry, 0);
325 else 328 else
326 mlog_errno(ret); 329 mlog_errno(ret);
327 330
328 /* 331 /*
329 * In case of error, manually free the allocation and do the iput(). 332 * In case of error, manually free the allocation and do the iput().
330 * We need to do this because error here means no d_instantiate(), 333 * We need to do this because error here means no d_instantiate(),
331 * which means iput() will not be called during dput(dentry). 334 * which means iput() will not be called during dput(dentry).
332 */ 335 */
333 if (ret < 0 && !alias) { 336 if (ret < 0 && !alias) {
334 ocfs2_lock_res_free(&dl->dl_lockres); 337 ocfs2_lock_res_free(&dl->dl_lockres);
335 BUG_ON(dl->dl_count != 1); 338 BUG_ON(dl->dl_count != 1);
336 spin_lock(&dentry_attach_lock); 339 spin_lock(&dentry_attach_lock);
337 dentry->d_fsdata = NULL; 340 dentry->d_fsdata = NULL;
338 spin_unlock(&dentry_attach_lock); 341 spin_unlock(&dentry_attach_lock);
339 kfree(dl); 342 kfree(dl);
340 iput(inode); 343 iput(inode);
341 } 344 }
342 345
343 dput(alias); 346 dput(alias);
344 347
345 return ret; 348 return ret;
346 } 349 }
347 350
348 DEFINE_SPINLOCK(dentry_list_lock); 351 DEFINE_SPINLOCK(dentry_list_lock);
349 352
350 /* We limit the number of dentry locks to drop in one go. We have 353 /* We limit the number of dentry locks to drop in one go. We have
351 * this limit so that we don't starve other users of ocfs2_wq. */ 354 * this limit so that we don't starve other users of ocfs2_wq. */
352 #define DL_INODE_DROP_COUNT 64 355 #define DL_INODE_DROP_COUNT 64
353 356
354 /* Drop inode references from dentry locks */ 357 /* Drop inode references from dentry locks */
355 static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count) 358 static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
356 { 359 {
357 struct ocfs2_dentry_lock *dl; 360 struct ocfs2_dentry_lock *dl;
358 361
359 spin_lock(&dentry_list_lock); 362 spin_lock(&dentry_list_lock);
360 while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) { 363 while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
361 dl = osb->dentry_lock_list; 364 dl = osb->dentry_lock_list;
362 osb->dentry_lock_list = dl->dl_next; 365 osb->dentry_lock_list = dl->dl_next;
363 spin_unlock(&dentry_list_lock); 366 spin_unlock(&dentry_list_lock);
364 iput(dl->dl_inode); 367 iput(dl->dl_inode);
365 kfree(dl); 368 kfree(dl);
366 spin_lock(&dentry_list_lock); 369 spin_lock(&dentry_list_lock);
367 } 370 }
368 spin_unlock(&dentry_list_lock); 371 spin_unlock(&dentry_list_lock);
369 } 372 }
370 373
371 void ocfs2_drop_dl_inodes(struct work_struct *work) 374 void ocfs2_drop_dl_inodes(struct work_struct *work)
372 { 375 {
373 struct ocfs2_super *osb = container_of(work, struct ocfs2_super, 376 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
374 dentry_lock_work); 377 dentry_lock_work);
375 378
376 __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT); 379 __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
377 /* 380 /*
378 * Don't queue dropping if umount is in progress. We flush the 381 * Don't queue dropping if umount is in progress. We flush the
379 * list in ocfs2_dismount_volume 382 * list in ocfs2_dismount_volume
380 */ 383 */
381 spin_lock(&dentry_list_lock); 384 spin_lock(&dentry_list_lock);
382 if (osb->dentry_lock_list && 385 if (osb->dentry_lock_list &&
383 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED)) 386 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
384 queue_work(ocfs2_wq, &osb->dentry_lock_work); 387 queue_work(ocfs2_wq, &osb->dentry_lock_work);
385 spin_unlock(&dentry_list_lock); 388 spin_unlock(&dentry_list_lock);
386 } 389 }
387 390
388 /* Flush the whole work queue */ 391 /* Flush the whole work queue */
389 void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb) 392 void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
390 { 393 {
391 __ocfs2_drop_dl_inodes(osb, -1); 394 __ocfs2_drop_dl_inodes(osb, -1);
392 } 395 }
393 396
394 /* 397 /*
395 * ocfs2_dentry_iput() and friends. 398 * ocfs2_dentry_iput() and friends.
396 * 399 *
397 * At this point, our particular dentry is detached from the inodes 400 * At this point, our particular dentry is detached from the inodes
398 * alias list, so there's no way that the locking code can find it. 401 * alias list, so there's no way that the locking code can find it.
399 * 402 *
400 * The interesting stuff happens when we determine that our lock needs 403 * The interesting stuff happens when we determine that our lock needs
401 * to go away because this is the last subdir alias in the 404 * to go away because this is the last subdir alias in the
402 * system. This function needs to handle a couple things: 405 * system. This function needs to handle a couple things:
403 * 406 *
404 * 1) Synchronizing lock shutdown with the downconvert threads. This 407 * 1) Synchronizing lock shutdown with the downconvert threads. This
405 * is already handled for us via the lockres release drop function 408 * is already handled for us via the lockres release drop function
406 * called in ocfs2_release_dentry_lock() 409 * called in ocfs2_release_dentry_lock()
407 * 410 *
408 * 2) A race may occur when we're doing our lock shutdown and 411 * 2) A race may occur when we're doing our lock shutdown and
409 * another process wants to create a new dentry lock. Right now we 412 * another process wants to create a new dentry lock. Right now we
410 * let them race, which means that for a very short while, this 413 * let them race, which means that for a very short while, this
411 * node might have two locks on a lock resource. This should be a 414 * node might have two locks on a lock resource. This should be a
412 * problem though because one of them is in the process of being 415 * problem though because one of them is in the process of being
413 * thrown out. 416 * thrown out.
414 */ 417 */
415 static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, 418 static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
416 struct ocfs2_dentry_lock *dl) 419 struct ocfs2_dentry_lock *dl)
417 { 420 {
418 ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); 421 ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
419 ocfs2_lock_res_free(&dl->dl_lockres); 422 ocfs2_lock_res_free(&dl->dl_lockres);
420 423
421 /* We leave dropping of inode reference to ocfs2_wq as that can 424 /* We leave dropping of inode reference to ocfs2_wq as that can
422 * possibly lead to inode deletion which gets tricky */ 425 * possibly lead to inode deletion which gets tricky */
423 spin_lock(&dentry_list_lock); 426 spin_lock(&dentry_list_lock);
424 if (!osb->dentry_lock_list && 427 if (!osb->dentry_lock_list &&
425 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED)) 428 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
426 queue_work(ocfs2_wq, &osb->dentry_lock_work); 429 queue_work(ocfs2_wq, &osb->dentry_lock_work);
427 dl->dl_next = osb->dentry_lock_list; 430 dl->dl_next = osb->dentry_lock_list;
428 osb->dentry_lock_list = dl; 431 osb->dentry_lock_list = dl;
429 spin_unlock(&dentry_list_lock); 432 spin_unlock(&dentry_list_lock);
430 } 433 }
431 434
432 void ocfs2_dentry_lock_put(struct ocfs2_super *osb, 435 void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
433 struct ocfs2_dentry_lock *dl) 436 struct ocfs2_dentry_lock *dl)
434 { 437 {
435 int unlock; 438 int unlock;
436 439
437 BUG_ON(dl->dl_count == 0); 440 BUG_ON(dl->dl_count == 0);
438 441
439 spin_lock(&dentry_attach_lock); 442 spin_lock(&dentry_attach_lock);
440 dl->dl_count--; 443 dl->dl_count--;
441 unlock = !dl->dl_count; 444 unlock = !dl->dl_count;
442 spin_unlock(&dentry_attach_lock); 445 spin_unlock(&dentry_attach_lock);
443 446
444 if (unlock) 447 if (unlock)
445 ocfs2_drop_dentry_lock(osb, dl); 448 ocfs2_drop_dentry_lock(osb, dl);
446 } 449 }
447 450
448 static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode) 451 static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
449 { 452 {
450 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 453 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
451 454
452 if (!dl) { 455 if (!dl) {
453 /* 456 /*
454 * No dentry lock is ok if we're disconnected or 457 * No dentry lock is ok if we're disconnected or
455 * unhashed. 458 * unhashed.
456 */ 459 */
457 if (!(dentry->d_flags & DCACHE_DISCONNECTED) && 460 if (!(dentry->d_flags & DCACHE_DISCONNECTED) &&
458 !d_unhashed(dentry)) { 461 !d_unhashed(dentry)) {
459 unsigned long long ino = 0ULL; 462 unsigned long long ino = 0ULL;
460 if (inode) 463 if (inode)
461 ino = (unsigned long long)OCFS2_I(inode)->ip_blkno; 464 ino = (unsigned long long)OCFS2_I(inode)->ip_blkno;
462 mlog(ML_ERROR, "Dentry is missing cluster lock. " 465 mlog(ML_ERROR, "Dentry is missing cluster lock. "
463 "inode: %llu, d_flags: 0x%x, d_name: %.*s\n", 466 "inode: %llu, d_flags: 0x%x, d_name: %.*s\n",
464 ino, dentry->d_flags, dentry->d_name.len, 467 ino, dentry->d_flags, dentry->d_name.len,
465 dentry->d_name.name); 468 dentry->d_name.name);
466 } 469 }
467 470
468 goto out; 471 goto out;
469 } 472 }
470 473
471 mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n", 474 mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n",
472 dentry->d_name.len, dentry->d_name.name, 475 dentry->d_name.len, dentry->d_name.name,
473 dl->dl_count); 476 dl->dl_count);
474 477
475 ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl); 478 ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl);
476 479
477 out: 480 out:
478 iput(inode); 481 iput(inode);
479 } 482 }
480 483
481 /* 484 /*
482 * d_move(), but keep the locks in sync. 485 * d_move(), but keep the locks in sync.
483 * 486 *
484 * When we are done, "dentry" will have the parent dir and name of 487 * When we are done, "dentry" will have the parent dir and name of
485 * "target", which will be thrown away. 488 * "target", which will be thrown away.
486 * 489 *
487 * We manually update the lock of "dentry" if need be. 490 * We manually update the lock of "dentry" if need be.
488 * 491 *
489 * "target" doesn't have it's dentry lock touched - we allow the later 492 * "target" doesn't have it's dentry lock touched - we allow the later
490 * dput() to handle this for us. 493 * dput() to handle this for us.
491 * 494 *
492 * This is called during ocfs2_rename(), while holding parent 495 * This is called during ocfs2_rename(), while holding parent
493 * directory locks. The dentries have already been deleted on other 496 * directory locks. The dentries have already been deleted on other
494 * nodes via ocfs2_remote_dentry_delete(). 497 * nodes via ocfs2_remote_dentry_delete().
495 * 498 *
496 * Normally, the VFS handles the d_move() for the file system, after 499 * Normally, the VFS handles the d_move() for the file system, after
497 * the ->rename() callback. OCFS2 wants to handle this internally, so 500 * the ->rename() callback. OCFS2 wants to handle this internally, so
498 * the new lock can be created atomically with respect to the cluster. 501 * the new lock can be created atomically with respect to the cluster.
499 */ 502 */
500 void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target, 503 void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
501 struct inode *old_dir, struct inode *new_dir) 504 struct inode *old_dir, struct inode *new_dir)
502 { 505 {
503 int ret; 506 int ret;
504 struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb); 507 struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb);
505 struct inode *inode = dentry->d_inode; 508 struct inode *inode = dentry->d_inode;
506 509
507 /* 510 /*
508 * Move within the same directory, so the actual lock info won't 511 * Move within the same directory, so the actual lock info won't
509 * change. 512 * change.
510 * 513 *
511 * XXX: Is there any advantage to dropping the lock here? 514 * XXX: Is there any advantage to dropping the lock here?
512 */ 515 */
513 if (old_dir == new_dir) 516 if (old_dir == new_dir)
514 goto out_move; 517 goto out_move;
515 518
516 ocfs2_dentry_lock_put(osb, dentry->d_fsdata); 519 ocfs2_dentry_lock_put(osb, dentry->d_fsdata);
517 520
518 dentry->d_fsdata = NULL; 521 dentry->d_fsdata = NULL;
519 ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno); 522 ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno);
520 if (ret) 523 if (ret)
521 mlog_errno(ret); 524 mlog_errno(ret);
522 525
523 out_move: 526 out_move:
524 d_move(dentry, target); 527 d_move(dentry, target);
525 } 528 }
526 529
527 const struct dentry_operations ocfs2_dentry_ops = { 530 const struct dentry_operations ocfs2_dentry_ops = {
528 .d_revalidate = ocfs2_dentry_revalidate, 531 .d_revalidate = ocfs2_dentry_revalidate,
529 .d_iput = ocfs2_dentry_iput, 532 .d_iput = ocfs2_dentry_iput,
530 }; 533 };
531 534
security/tomoyo/realpath.c
1 /* 1 /*
2 * security/tomoyo/realpath.c 2 * security/tomoyo/realpath.c
3 * 3 *
4 * Pathname calculation functions for TOMOYO. 4 * Pathname calculation functions for TOMOYO.
5 * 5 *
6 * Copyright (C) 2005-2010 NTT DATA CORPORATION 6 * Copyright (C) 2005-2010 NTT DATA CORPORATION
7 */ 7 */
8 8
9 #include <linux/types.h> 9 #include <linux/types.h>
10 #include <linux/mount.h> 10 #include <linux/mount.h>
11 #include <linux/mnt_namespace.h> 11 #include <linux/mnt_namespace.h>
12 #include <linux/fs_struct.h> 12 #include <linux/fs_struct.h>
13 #include <linux/magic.h> 13 #include <linux/magic.h>
14 #include <linux/slab.h> 14 #include <linux/slab.h>
15 #include <net/sock.h> 15 #include <net/sock.h>
16 #include "common.h" 16 #include "common.h"
17 #include "../../fs/internal.h"
17 18
18 /** 19 /**
19 * tomoyo_encode: Convert binary string to ascii string. 20 * tomoyo_encode: Convert binary string to ascii string.
20 * 21 *
21 * @str: String in binary format. 22 * @str: String in binary format.
22 * 23 *
23 * Returns pointer to @str in ascii format on success, NULL otherwise. 24 * Returns pointer to @str in ascii format on success, NULL otherwise.
24 * 25 *
25 * This function uses kzalloc(), so caller must kfree() if this function 26 * This function uses kzalloc(), so caller must kfree() if this function
26 * didn't return NULL. 27 * didn't return NULL.
27 */ 28 */
28 char *tomoyo_encode(const char *str) 29 char *tomoyo_encode(const char *str)
29 { 30 {
30 int len = 0; 31 int len = 0;
31 const char *p = str; 32 const char *p = str;
32 char *cp; 33 char *cp;
33 char *cp0; 34 char *cp0;
34 35
35 if (!p) 36 if (!p)
36 return NULL; 37 return NULL;
37 while (*p) { 38 while (*p) {
38 const unsigned char c = *p++; 39 const unsigned char c = *p++;
39 if (c == '\\') 40 if (c == '\\')
40 len += 2; 41 len += 2;
41 else if (c > ' ' && c < 127) 42 else if (c > ' ' && c < 127)
42 len++; 43 len++;
43 else 44 else
44 len += 4; 45 len += 4;
45 } 46 }
46 len++; 47 len++;
47 /* Reserve space for appending "/". */ 48 /* Reserve space for appending "/". */
48 cp = kzalloc(len + 10, GFP_NOFS); 49 cp = kzalloc(len + 10, GFP_NOFS);
49 if (!cp) 50 if (!cp)
50 return NULL; 51 return NULL;
51 cp0 = cp; 52 cp0 = cp;
52 p = str; 53 p = str;
53 while (*p) { 54 while (*p) {
54 const unsigned char c = *p++; 55 const unsigned char c = *p++;
55 56
56 if (c == '\\') { 57 if (c == '\\') {
57 *cp++ = '\\'; 58 *cp++ = '\\';
58 *cp++ = '\\'; 59 *cp++ = '\\';
59 } else if (c > ' ' && c < 127) { 60 } else if (c > ' ' && c < 127) {
60 *cp++ = c; 61 *cp++ = c;
61 } else { 62 } else {
62 *cp++ = '\\'; 63 *cp++ = '\\';
63 *cp++ = (c >> 6) + '0'; 64 *cp++ = (c >> 6) + '0';
64 *cp++ = ((c >> 3) & 7) + '0'; 65 *cp++ = ((c >> 3) & 7) + '0';
65 *cp++ = (c & 7) + '0'; 66 *cp++ = (c & 7) + '0';
66 } 67 }
67 } 68 }
68 return cp0; 69 return cp0;
69 } 70 }
70 71
71 /** 72 /**
72 * tomoyo_realpath_from_path - Returns realpath(3) of the given pathname but ignores chroot'ed root. 73 * tomoyo_realpath_from_path - Returns realpath(3) of the given pathname but ignores chroot'ed root.
73 * 74 *
74 * @path: Pointer to "struct path". 75 * @path: Pointer to "struct path".
75 * 76 *
76 * Returns the realpath of the given @path on success, NULL otherwise. 77 * Returns the realpath of the given @path on success, NULL otherwise.
77 * 78 *
78 * If dentry is a directory, trailing '/' is appended. 79 * If dentry is a directory, trailing '/' is appended.
79 * Characters out of 0x20 < c < 0x7F range are converted to 80 * Characters out of 0x20 < c < 0x7F range are converted to
80 * \ooo style octal string. 81 * \ooo style octal string.
81 * Character \ is converted to \\ string. 82 * Character \ is converted to \\ string.
82 * 83 *
83 * These functions use kzalloc(), so the caller must call kfree() 84 * These functions use kzalloc(), so the caller must call kfree()
84 * if these functions didn't return NULL. 85 * if these functions didn't return NULL.
85 */ 86 */
86 char *tomoyo_realpath_from_path(struct path *path) 87 char *tomoyo_realpath_from_path(struct path *path)
87 { 88 {
88 char *buf = NULL; 89 char *buf = NULL;
89 char *name = NULL; 90 char *name = NULL;
90 unsigned int buf_len = PAGE_SIZE / 2; 91 unsigned int buf_len = PAGE_SIZE / 2;
91 struct dentry *dentry = path->dentry; 92 struct dentry *dentry = path->dentry;
92 bool is_dir; 93 bool is_dir;
93 if (!dentry) 94 if (!dentry)
94 return NULL; 95 return NULL;
95 is_dir = dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode); 96 is_dir = dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode);
96 while (1) { 97 while (1) {
97 struct path ns_root = { .mnt = NULL, .dentry = NULL }; 98 struct path ns_root = { .mnt = NULL, .dentry = NULL };
98 char *pos; 99 char *pos;
99 buf_len <<= 1; 100 buf_len <<= 1;
100 kfree(buf); 101 kfree(buf);
101 buf = kmalloc(buf_len, GFP_NOFS); 102 buf = kmalloc(buf_len, GFP_NOFS);
102 if (!buf) 103 if (!buf)
103 break; 104 break;
104 /* Get better name for socket. */ 105 /* Get better name for socket. */
105 if (dentry->d_sb && dentry->d_sb->s_magic == SOCKFS_MAGIC) { 106 if (dentry->d_sb && dentry->d_sb->s_magic == SOCKFS_MAGIC) {
106 struct inode *inode = dentry->d_inode; 107 struct inode *inode = dentry->d_inode;
107 struct socket *sock = inode ? SOCKET_I(inode) : NULL; 108 struct socket *sock = inode ? SOCKET_I(inode) : NULL;
108 struct sock *sk = sock ? sock->sk : NULL; 109 struct sock *sk = sock ? sock->sk : NULL;
109 if (sk) { 110 if (sk) {
110 snprintf(buf, buf_len - 1, "socket:[family=%u:" 111 snprintf(buf, buf_len - 1, "socket:[family=%u:"
111 "type=%u:protocol=%u]", sk->sk_family, 112 "type=%u:protocol=%u]", sk->sk_family,
112 sk->sk_type, sk->sk_protocol); 113 sk->sk_type, sk->sk_protocol);
113 } else { 114 } else {
114 snprintf(buf, buf_len - 1, "socket:[unknown]"); 115 snprintf(buf, buf_len - 1, "socket:[unknown]");
115 } 116 }
116 name = tomoyo_encode(buf); 117 name = tomoyo_encode(buf);
117 break; 118 break;
118 } 119 }
119 /* For "socket:[\$]" and "pipe:[\$]". */ 120 /* For "socket:[\$]" and "pipe:[\$]". */
120 if (dentry->d_op && dentry->d_op->d_dname) { 121 if (dentry->d_op && dentry->d_op->d_dname) {
121 pos = dentry->d_op->d_dname(dentry, buf, buf_len - 1); 122 pos = dentry->d_op->d_dname(dentry, buf, buf_len - 1);
122 if (IS_ERR(pos)) 123 if (IS_ERR(pos))
123 continue; 124 continue;
124 name = tomoyo_encode(pos); 125 name = tomoyo_encode(pos);
125 break; 126 break;
126 } 127 }
127 /* If we don't have a vfsmount, we can't calculate. */ 128 /* If we don't have a vfsmount, we can't calculate. */
128 if (!path->mnt) 129 if (!path->mnt)
129 break; 130 break;
130 /* go to whatever namespace root we are under */ 131 /* go to whatever namespace root we are under */
131 pos = __d_path(path, &ns_root, buf, buf_len); 132 pos = __d_path(path, &ns_root, buf, buf_len);
132 /* Prepend "/proc" prefix if using internal proc vfs mount. */ 133 /* Prepend "/proc" prefix if using internal proc vfs mount. */
133 if (!IS_ERR(pos) && (path->mnt->mnt_flags & MNT_INTERNAL) && 134 if (!IS_ERR(pos) && (path->mnt->mnt_flags & MNT_INTERNAL) &&
134 (path->mnt->mnt_sb->s_magic == PROC_SUPER_MAGIC)) { 135 (path->mnt->mnt_sb->s_magic == PROC_SUPER_MAGIC)) {
135 pos -= 5; 136 pos -= 5;
136 if (pos >= buf) 137 if (pos >= buf)
137 memcpy(pos, "/proc", 5); 138 memcpy(pos, "/proc", 5);
138 else 139 else
139 pos = ERR_PTR(-ENOMEM); 140 pos = ERR_PTR(-ENOMEM);
140 } 141 }
141 if (IS_ERR(pos)) 142 if (IS_ERR(pos))
142 continue; 143 continue;
143 name = tomoyo_encode(pos); 144 name = tomoyo_encode(pos);
144 break; 145 break;
145 } 146 }
146 kfree(buf); 147 kfree(buf);
147 if (!name) 148 if (!name)
148 tomoyo_warn_oom(__func__); 149 tomoyo_warn_oom(__func__);
149 else if (is_dir && *name) { 150 else if (is_dir && *name) {
150 /* Append trailing '/' if dentry is a directory. */ 151 /* Append trailing '/' if dentry is a directory. */
151 char *pos = name + strlen(name) - 1; 152 char *pos = name + strlen(name) - 1;
152 if (*pos != '/') 153 if (*pos != '/')
153 /* 154 /*
154 * This is OK because tomoyo_encode() reserves space 155 * This is OK because tomoyo_encode() reserves space
155 * for appending "/". 156 * for appending "/".
156 */ 157 */
157 *++pos = '/'; 158 *++pos = '/';
158 } 159 }
159 return name; 160 return name;
160 } 161 }
161 162
162 /** 163 /**
163 * tomoyo_realpath_nofollow - Get realpath of a pathname. 164 * tomoyo_realpath_nofollow - Get realpath of a pathname.
164 * 165 *
165 * @pathname: The pathname to solve. 166 * @pathname: The pathname to solve.
166 * 167 *
167 * Returns the realpath of @pathname on success, NULL otherwise. 168 * Returns the realpath of @pathname on success, NULL otherwise.
168 */ 169 */
169 char *tomoyo_realpath_nofollow(const char *pathname) 170 char *tomoyo_realpath_nofollow(const char *pathname)
170 { 171 {
171 struct path path; 172 struct path path;
172 173
173 if (pathname && kern_path(pathname, 0, &path) == 0) { 174 if (pathname && kern_path(pathname, 0, &path) == 0) {
174 char *buf = tomoyo_realpath_from_path(&path); 175 char *buf = tomoyo_realpath_from_path(&path);
175 path_put(&path); 176 path_put(&path);
176 return buf; 177 return buf;
177 } 178 }
178 return NULL; 179 return NULL;
179 } 180 }
180 181