Doug / smarc-fsl-linux-kernel | Embedian Git Server

Commit da5029563a0a026c64821b09e8e7b4fd81d3fe1b

Authored by Nick Piggin 2011-01-07 14:49:33 +0800

Exists in master and in 7 other branches

fs: dcache scale d_unhashed

Protect d_unhashed(dentry) condition with d_lock. This means keeping
DCACHE_UNHASHED bit in synch with hash manipulations.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>

Showing 10 changed files with 102 additions and 54 deletions Inline Diff

arch/powerpc/platforms/cell/spufs/inode.c
drivers/usb/core/inode.c
fs/autofs4/autofs_i.h
fs/autofs4/expire.c
fs/ceph/dir.c
fs/configfs/configfs_internal.h
fs/dcache.c
fs/libfs.c
fs/ocfs2/dcache.c
security/tomoyo/realpath.c

arch/powerpc/platforms/cell/spufs/inode.c

Diff comments View file @ da50295

 /*
  * SPU file system
  *
  * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
  *
  * Author: Arnd Bergmann <arndb@de.ibm.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/fsnotify.h>
 #include <linux/backing-dev.h>
 #include <linux/init.h>
 #include <linux/ioctl.h>
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/pagemap.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
 #include <linux/parser.h>
 #include <asm/prom.h>
 #include <asm/spu.h>
 #include <asm/spu_priv1.h>
 #include <asm/uaccess.h>
 #include "spufs.h"
 struct spufs_sb_info {
 	int debug;
 };
 static struct kmem_cache *spufs_inode_cache;
 char *isolated_loader;
 static int isolated_loader_size;
 static struct spufs_sb_info *spufs_get_sb_info(struct super_block *sb)
 {
 	return sb->s_fs_info;
 }
 static struct inode *
 spufs_alloc_inode(struct super_block *sb)
 {
 	struct spufs_inode_info *ei;
 	ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	ei->i_gang = NULL;
 	ei->i_ctx = NULL;
 	ei->i_openers = 0;
 	return &ei->vfs_inode;
 }
 static void
 spufs_destroy_inode(struct inode *inode)
 {
 	kmem_cache_free(spufs_inode_cache, SPUFS_I(inode));
 }
 static void
 spufs_init_once(void *p)
 {
 	struct spufs_inode_info *ei = p;
 	inode_init_once(&ei->vfs_inode);
 }
 static struct inode *
 spufs_new_inode(struct super_block *sb, int mode)
 {
 	struct inode *inode;
 	inode = new_inode(sb);
 	if (!inode)
 		goto out;
 	inode->i_mode = mode;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 out:
 	return inode;
 }
 static int
 spufs_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
 	if ((attr->ia_valid & ATTR_SIZE) &&
 	    (attr->ia_size != inode->i_size))
 		return -EINVAL;
 	setattr_copy(inode, attr);
 	mark_inode_dirty(inode);
 	return 0;
 }
 static int
 spufs_new_file(struct super_block *sb, struct dentry *dentry,
 		const struct file_operations *fops, int mode,
 		size_t size, struct spu_context *ctx)
 {
 	static const struct inode_operations spufs_file_iops = {
 		.setattr = spufs_setattr,
 	};
 	struct inode *inode;
 	int ret;
 	ret = -ENOSPC;
 	inode = spufs_new_inode(sb, S_IFREG | mode);
 	if (!inode)
 		goto out;
 	ret = 0;
 	inode->i_op = &spufs_file_iops;
 	inode->i_fop = fops;
 	inode->i_size = size;
 	inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx);
 	d_add(dentry, inode);
 out:
 	return ret;
 }
 static void
 spufs_evict_inode(struct inode *inode)
 {
 	struct spufs_inode_info *ei = SPUFS_I(inode);
 	end_writeback(inode);
 	if (ei->i_ctx)
 		put_spu_context(ei->i_ctx);
 	if (ei->i_gang)
 		put_spu_gang(ei->i_gang);
 }
 static void spufs_prune_dir(struct dentry *dir)
 {
 	struct dentry *dentry, *tmp;
 	mutex_lock(&dir->d_inode->i_mutex);
 	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
 		spin_lock(&dcache_lock);
 		spin_lock(&dentry->d_lock);
 		if (!(d_unhashed(dentry)) && dentry->d_inode) {
 			dget_locked_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
 			simple_unlink(dir->d_inode, dentry);
+			/* XXX: what is dcache_lock protecting here? Other
+			 * filesystems (IB, configfs) release dcache_lock
+			 * before unlink */
 			spin_unlock(&dcache_lock);
 			dput(dentry);
 		} else {
 			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
 		}
 	}
 	shrink_dcache_parent(dir);
 	mutex_unlock(&dir->d_inode->i_mutex);
 }
 /* Caller must hold parent->i_mutex */
 static int spufs_rmdir(struct inode *parent, struct dentry *dir)
 {
 	/* remove all entries */
 	spufs_prune_dir(dir);
 	d_drop(dir);
 	return simple_rmdir(parent, dir);
 }
 static int spufs_fill_dir(struct dentry *dir,
 		const struct spufs_tree_descr *files, int mode,
 		struct spu_context *ctx)
 {
 	struct dentry *dentry, *tmp;
 	int ret;
 	while (files->name && files->name[0]) {
 		ret = -ENOMEM;
 		dentry = d_alloc_name(dir, files->name);
 		if (!dentry)
 			goto out;
 		ret = spufs_new_file(dir->d_sb, dentry, files->ops,
 					files->mode & mode, files->size, ctx);
 		if (ret)
 			goto out;
 		files++;
 	}
 	return 0;
 out:
 	/*
 	 * remove all children from dir. dir->inode is not set so don't
 	 * just simply use spufs_prune_dir() and panic afterwards :)
 	 * dput() looks like it will do the right thing:
 	 * - dec parent's ref counter
 	 * - remove child from parent's child list
 	 * - free child's inode if possible
 	 * - free child
 	 */
 	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
 		dput(dentry);
 	}
 	shrink_dcache_parent(dir);
 	return ret;
 }
 static int spufs_dir_close(struct inode *inode, struct file *file)
 {
 	struct spu_context *ctx;
 	struct inode *parent;
 	struct dentry *dir;
 	int ret;
 	dir = file->f_path.dentry;
 	parent = dir->d_parent->d_inode;
 	ctx = SPUFS_I(dir->d_inode)->i_ctx;
 	mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
 	ret = spufs_rmdir(parent, dir);
 	mutex_unlock(&parent->i_mutex);
 	WARN_ON(ret);
 	/* We have to give up the mm_struct */
 	spu_forget(ctx);
 	return dcache_dir_close(inode, file);
 }
 const struct file_operations spufs_context_fops = {
 	.open		= dcache_dir_open,
 	.release	= spufs_dir_close,
 	.llseek		= dcache_dir_lseek,
 	.read		= generic_read_dir,
 	.readdir	= dcache_readdir,
 	.fsync		= noop_fsync,
 };
 EXPORT_SYMBOL_GPL(spufs_context_fops);
 static int
 spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
 		int mode)
 {
 	int ret;
 	struct inode *inode;
 	struct spu_context *ctx;
 	ret = -ENOSPC;
 	inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
 	if (!inode)
 		goto out;
 	if (dir->i_mode & S_ISGID) {
 		inode->i_gid = dir->i_gid;
 		inode->i_mode &= S_ISGID;
 	}
 	ctx = alloc_spu_context(SPUFS_I(dir)->i_gang); /* XXX gang */
 	SPUFS_I(inode)->i_ctx = ctx;
 	if (!ctx)
 		goto out_iput;
 	ctx->flags = flags;
 	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
 	if (flags & SPU_CREATE_NOSCHED)
 		ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents,
 					 mode, ctx);
 	else
 		ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx);
 	if (ret)
 		goto out_free_ctx;
 	if (spufs_get_sb_info(dir->i_sb)->debug)
 		ret = spufs_fill_dir(dentry, spufs_dir_debug_contents,
 				mode, ctx);
 	if (ret)
 		goto out_free_ctx;
 	d_instantiate(dentry, inode);
 	dget(dentry);
 	inc_nlink(dir);
 	inc_nlink(dentry->d_inode);
 	goto out;
 out_free_ctx:
 	spu_forget(ctx);
 	put_spu_context(ctx);
 out_iput:
 	iput(inode);
 out:
 	return ret;
 }
 static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt)
 {
 	int ret;
 	struct file *filp;
 	ret = get_unused_fd();
 	if (ret < 0) {
 		dput(dentry);
 		mntput(mnt);
 		goto out;
 	}
 	filp = dentry_open(dentry, mnt, O_RDONLY, current_cred());
 	if (IS_ERR(filp)) {
 		put_unused_fd(ret);
 		ret = PTR_ERR(filp);
 		goto out;
 	}
 	filp->f_op = &spufs_context_fops;
 	fd_install(ret, filp);
 out:
 	return ret;
 }
 static struct spu_context *
 spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
 						struct file *filp)
 {
 	struct spu_context *tmp, *neighbor, *err;
 	int count, node;
 	int aff_supp;
 	aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next,
 					struct spu, cbe_list))->aff_list);
 	if (!aff_supp)
 		return ERR_PTR(-EINVAL);
 	if (flags & SPU_CREATE_GANG)
 		return ERR_PTR(-EINVAL);
 	if (flags & SPU_CREATE_AFFINITY_MEM &&
 	    gang->aff_ref_ctx &&
 	    gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
 		return ERR_PTR(-EEXIST);
 	if (gang->aff_flags & AFF_MERGED)
 		return ERR_PTR(-EBUSY);
 	neighbor = NULL;
 	if (flags & SPU_CREATE_AFFINITY_SPU) {
 		if (!filp || filp->f_op != &spufs_context_fops)
 			return ERR_PTR(-EINVAL);
 		neighbor = get_spu_context(
 				SPUFS_I(filp->f_dentry->d_inode)->i_ctx);
 		if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
 		    !list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
 		    !list_entry(neighbor->aff_list.next, struct spu_context,
 		    aff_list)->aff_head) {
 			err = ERR_PTR(-EEXIST);
 			goto out_put_neighbor;
 		}
 		if (gang != neighbor->gang) {
 			err = ERR_PTR(-EINVAL);
 			goto out_put_neighbor;
 		}
 		count = 1;
 		list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
 			count++;
 		if (list_empty(&neighbor->aff_list))
 			count++;
 		for (node = 0; node < MAX_NUMNODES; node++) {
 			if ((cbe_spu_info[node].n_spus - atomic_read(
 				&cbe_spu_info[node].reserved_spus)) >= count)
 				break;
 		}
 		if (node == MAX_NUMNODES) {
 			err = ERR_PTR(-EEXIST);
 			goto out_put_neighbor;
 		}
 	}
 	return neighbor;
 out_put_neighbor:
 	put_spu_context(neighbor);
 	return err;
 }
 static void
 spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
 					struct spu_context *neighbor)
 {
 	if (flags & SPU_CREATE_AFFINITY_MEM)
 		ctx->gang->aff_ref_ctx = ctx;
 	if (flags & SPU_CREATE_AFFINITY_SPU) {
 		if (list_empty(&neighbor->aff_list)) {
 			list_add_tail(&neighbor->aff_list,
 				&ctx->gang->aff_list_head);
 			neighbor->aff_head = 1;
 		}
 		if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
 		    || list_entry(neighbor->aff_list.next, struct spu_context,
 							aff_list)->aff_head) {
 			list_add(&ctx->aff_list, &neighbor->aff_list);
 		} else  {
 			list_add_tail(&ctx->aff_list, &neighbor->aff_list);
 			if (neighbor->aff_head) {
 				neighbor->aff_head = 0;
 				ctx->aff_head = 1;
 			}
 		}
 		if (!ctx->gang->aff_ref_ctx)
 			ctx->gang->aff_ref_ctx = ctx;
 	}
 }
 static int
 spufs_create_context(struct inode *inode, struct dentry *dentry,
 			struct vfsmount *mnt, int flags, int mode,
 			struct file *aff_filp)
 {
 	int ret;
 	int affinity;
 	struct spu_gang *gang;
 	struct spu_context *neighbor;
 	ret = -EPERM;
 	if ((flags & SPU_CREATE_NOSCHED) &&
 	    !capable(CAP_SYS_NICE))
 		goto out_unlock;
 	ret = -EINVAL;
 	if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE))
 	    == SPU_CREATE_ISOLATE)
 		goto out_unlock;
 	ret = -ENODEV;
 	if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
 		goto out_unlock;
 	gang = NULL;
 	neighbor = NULL;
 	affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
 	if (affinity) {
 		gang = SPUFS_I(inode)->i_gang;
 		ret = -EINVAL;
 		if (!gang)
 			goto out_unlock;
 		mutex_lock(&gang->aff_mutex);
 		neighbor = spufs_assert_affinity(flags, gang, aff_filp);
 		if (IS_ERR(neighbor)) {
 			ret = PTR_ERR(neighbor);
 			goto out_aff_unlock;
 		}
 	}
 	ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
 	if (ret)
 		goto out_aff_unlock;
 	if (affinity) {
 		spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx,
 								neighbor);
 		if (neighbor)
 			put_spu_context(neighbor);
 	}
 	/*
 	 * get references for dget and mntget, will be released
 	 * in error path of *_open().
 	 */
 	ret = spufs_context_open(dget(dentry), mntget(mnt));
 	if (ret < 0) {
 		WARN_ON(spufs_rmdir(inode, dentry));
 		if (affinity)
 			mutex_unlock(&gang->aff_mutex);
 		mutex_unlock(&inode->i_mutex);
 		spu_forget(SPUFS_I(dentry->d_inode)->i_ctx);
 		goto out;
 	}
 out_aff_unlock:
 	if (affinity)
 		mutex_unlock(&gang->aff_mutex);
 out_unlock:
 	mutex_unlock(&inode->i_mutex);
 out:
 	dput(dentry);
 	return ret;
 }
 static int
 spufs_mkgang(struct inode *dir, struct dentry *dentry, int mode)
 {
 	int ret;
 	struct inode *inode;
 	struct spu_gang *gang;
 	ret = -ENOSPC;
 	inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
 	if (!inode)
 		goto out;
 	ret = 0;
 	if (dir->i_mode & S_ISGID) {
 		inode->i_gid = dir->i_gid;
 		inode->i_mode &= S_ISGID;
 	}
 	gang = alloc_spu_gang();
 	SPUFS_I(inode)->i_ctx = NULL;
 	SPUFS_I(inode)->i_gang = gang;
 	if (!gang)
 		goto out_iput;
 	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
 	d_instantiate(dentry, inode);
 	inc_nlink(dir);
 	inc_nlink(dentry->d_inode);
 	return ret;
 out_iput:
 	iput(inode);
 out:
 	return ret;
 }
 static int spufs_gang_open(struct dentry *dentry, struct vfsmount *mnt)
 {
 	int ret;
 	struct file *filp;
 	ret = get_unused_fd();
 	if (ret < 0) {
 		dput(dentry);
 		mntput(mnt);
 		goto out;
 	}
 	filp = dentry_open(dentry, mnt, O_RDONLY, current_cred());
 	if (IS_ERR(filp)) {
 		put_unused_fd(ret);
 		ret = PTR_ERR(filp);
 		goto out;
 	}
 	filp->f_op = &simple_dir_operations;
 	fd_install(ret, filp);
 out:
 	return ret;
 }
 static int spufs_create_gang(struct inode *inode,
 			struct dentry *dentry,
 			struct vfsmount *mnt, int mode)
 {
 	int ret;
 	ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO);
 	if (ret)
 		goto out;
 	/*
 	 * get references for dget and mntget, will be released
 	 * in error path of *_open().
 	 */
 	ret = spufs_gang_open(dget(dentry), mntget(mnt));
 	if (ret < 0) {
 		int err = simple_rmdir(inode, dentry);
 		WARN_ON(err);
 	}
 out:
 	mutex_unlock(&inode->i_mutex);
 	dput(dentry);
 	return ret;
 }
 static struct file_system_type spufs_type;
 long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode,
 							struct file *filp)
 {
 	struct dentry *dentry;
 	int ret;
 	ret = -EINVAL;
 	/* check if we are on spufs */
 	if (nd->path.dentry->d_sb->s_type != &spufs_type)
 		goto out;
 	/* don't accept undefined flags */
 	if (flags & (~SPU_CREATE_FLAG_ALL))
 		goto out;
 	/* only threads can be underneath a gang */
 	if (nd->path.dentry != nd->path.dentry->d_sb->s_root) {
 		if ((flags & SPU_CREATE_GANG) ||
 		    !SPUFS_I(nd->path.dentry->d_inode)->i_gang)
 			goto out;
 	}
 	dentry = lookup_create(nd, 1);
 	ret = PTR_ERR(dentry);
 	if (IS_ERR(dentry))
 		goto out_dir;
 	mode &= ~current_umask();
 	if (flags & SPU_CREATE_GANG)
 		ret = spufs_create_gang(nd->path.dentry->d_inode,
 					 dentry, nd->path.mnt, mode);
 	else
 		ret = spufs_create_context(nd->path.dentry->d_inode,
 					    dentry, nd->path.mnt, flags, mode,
 					    filp);
 	if (ret >= 0)
 		fsnotify_mkdir(nd->path.dentry->d_inode, dentry);
 	return ret;
 out_dir:
 	mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
 out:
 	return ret;
 }
 /* File system initialization */
 enum {
 	Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err,
 };
 static const match_table_t spufs_tokens = {
 	{ Opt_uid,   "uid=%d" },
 	{ Opt_gid,   "gid=%d" },
 	{ Opt_mode,  "mode=%o" },
 	{ Opt_debug, "debug" },
 	{ Opt_err,    NULL  },
 };
 static int
 spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
 {
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token, option;
 		if (!*p)
 			continue;
 		token = match_token(p, spufs_tokens, args);
 		switch (token) {
 		case Opt_uid:
 			if (match_int(&args[0], &option))
 				return 0;
 			root->i_uid = option;
 			break;
 		case Opt_gid:
 			if (match_int(&args[0], &option))
 				return 0;
 			root->i_gid = option;
 			break;
 		case Opt_mode:
 			if (match_octal(&args[0], &option))
 				return 0;
 			root->i_mode = option | S_IFDIR;
 			break;
 		case Opt_debug:
 			spufs_get_sb_info(sb)->debug = 1;
 			break;
 		default:
 			return 0;
 		}
 	}
 	return 1;
 }
 static void spufs_exit_isolated_loader(void)
 {
 	free_pages((unsigned long) isolated_loader,
 			get_order(isolated_loader_size));
 }
 static void
 spufs_init_isolated_loader(void)
 {
 	struct device_node *dn;
 	const char *loader;
 	int size;
 	dn = of_find_node_by_path("/spu-isolation");
 	if (!dn)
 		return;
 	loader = of_get_property(dn, "loader", &size);
 	if (!loader)
 		return;
 	/* the loader must be align on a 16 byte boundary */
 	isolated_loader = (char *)__get_free_pages(GFP_KERNEL, get_order(size));
 	if (!isolated_loader)
 		return;
 	isolated_loader_size = size;
 	memcpy(isolated_loader, loader, size);
 	printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
 }
 static int
 spufs_create_root(struct super_block *sb, void *data)
 {
 	struct inode *inode;
 	int ret;
 	ret = -ENODEV;
 	if (!spu_management_ops)
 		goto out;
 	ret = -ENOMEM;
 	inode = spufs_new_inode(sb, S_IFDIR | 0775);
 	if (!inode)
 		goto out;
 	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
 	SPUFS_I(inode)->i_ctx = NULL;
 	inc_nlink(inode);
 	ret = -EINVAL;
 	if (!spufs_parse_options(sb, data, inode))
 		goto out_iput;
 	ret = -ENOMEM;
 	sb->s_root = d_alloc_root(inode);
 	if (!sb->s_root)
 		goto out_iput;
 	return 0;
 out_iput:
 	iput(inode);
 out:
 	return ret;
 }
 static int
 spufs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct spufs_sb_info *info;
 	static const struct super_operations s_ops = {
 		.alloc_inode = spufs_alloc_inode,
 		.destroy_inode = spufs_destroy_inode,
 		.statfs = simple_statfs,
 		.evict_inode = spufs_evict_inode,
 		.show_options = generic_show_options,
 	};
 	save_mount_options(sb, data);
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info)
 		return -ENOMEM;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_blocksize = PAGE_CACHE_SIZE;
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = SPUFS_MAGIC;
 	sb->s_op = &s_ops;
 	sb->s_fs_info = info;
 	return spufs_create_root(sb, data);
 }
 static struct dentry *
 spufs_mount(struct file_system_type *fstype, int flags,
 		const char *name, void *data)
 {
 	return mount_single(fstype, flags, data, spufs_fill_super);
 }
 static struct file_system_type spufs_type = {
 	.owner = THIS_MODULE,
 	.name = "spufs",
 	.mount = spufs_mount,
 	.kill_sb = kill_litter_super,
 };
 static int __init spufs_init(void)
 {
 	int ret;
 	ret = -ENODEV;
 	if (!spu_management_ops)
 		goto out;
 	ret = -ENOMEM;
 	spufs_inode_cache = kmem_cache_create("spufs_inode_cache",
 			sizeof(struct spufs_inode_info), 0,
 			SLAB_HWCACHE_ALIGN, spufs_init_once);
 	if (!spufs_inode_cache)
 		goto out;
 	ret = spu_sched_init();
 	if (ret)
 		goto out_cache;
 	ret = register_filesystem(&spufs_type);
 	if (ret)
 		goto out_sched;
 	ret = register_spu_syscalls(&spufs_calls);
 	if (ret)
 		goto out_fs;
 	spufs_init_isolated_loader();
 	return 0;
 out_fs:
 	unregister_filesystem(&spufs_type);
 out_sched:
 	spu_sched_exit();
 out_cache:
 	kmem_cache_destroy(spufs_inode_cache);
 out:
 	return ret;
 }
 module_init(spufs_init);
 static void __exit spufs_exit(void)
 {
 	spu_sched_exit();
 	spufs_exit_isolated_loader();
 	unregister_spu_syscalls(&spufs_calls);
 	unregister_filesystem(&spufs_type);
 	kmem_cache_destroy(spufs_inode_cache);
 }
 module_exit(spufs_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");

drivers/usb/core/inode.c

Diff comments View file @ da50295

 /*****************************************************************************/
 /*
  *	inode.c  --  Inode/Dentry functions for the USB device file system.
  *
  *	Copyright (C) 2000 Thomas Sailer (sailer@ife.ee.ethz.ch)
  *	Copyright (C) 2001,2002,2004 Greg Kroah-Hartman (greg@kroah.com)
  *
  *	This program is free software; you can redistribute it and/or modify
  *	it under the terms of the GNU General Public License as published by
  *	the Free Software Foundation; either version 2 of the License, or
  *	(at your option) any later version.
  *
  *	This program is distributed in the hope that it will be useful,
  *	but WITHOUT ANY WARRANTY; without even the implied warranty of
  *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *	GNU General Public License for more details.
  *
  *	You should have received a copy of the GNU General Public License
  *	along with this program; if not, write to the Free Software
  *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  *  History:
  *   0.1  04.01.2000  Created
  *   0.2  10.12.2001  converted to use the vfs layer better
  */
 /*****************************************************************************/
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/usb.h>
 #include <linux/namei.h>
 #include <linux/usbdevice_fs.h>
 #include <linux/parser.h>
 #include <linux/notifier.h>
 #include <linux/seq_file.h>
 #include <linux/usb/hcd.h>
 #include <asm/byteorder.h>
 #include "usb.h"
 #define USBFS_DEFAULT_DEVMODE (S_IWUSR | S_IRUGO)
 #define USBFS_DEFAULT_BUSMODE (S_IXUGO | S_IRUGO)
 #define USBFS_DEFAULT_LISTMODE S_IRUGO
 static const struct file_operations default_file_operations;
 static struct vfsmount *usbfs_mount;
 static int usbfs_mount_count;	/* = 0 */
 static int ignore_mount = 0;
 static struct dentry *devices_usbfs_dentry;
 static int num_buses;	/* = 0 */
 static uid_t devuid;	/* = 0 */
 static uid_t busuid;	/* = 0 */
 static uid_t listuid;	/* = 0 */
 static gid_t devgid;	/* = 0 */
 static gid_t busgid;	/* = 0 */
 static gid_t listgid;	/* = 0 */
 static umode_t devmode = USBFS_DEFAULT_DEVMODE;
 static umode_t busmode = USBFS_DEFAULT_BUSMODE;
 static umode_t listmode = USBFS_DEFAULT_LISTMODE;
 static int usbfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
 {
 	if (devuid != 0)
 		seq_printf(seq, ",devuid=%u", devuid);
 	if (devgid != 0)
 		seq_printf(seq, ",devgid=%u", devgid);
 	if (devmode != USBFS_DEFAULT_DEVMODE)
 		seq_printf(seq, ",devmode=%o", devmode);
 	if (busuid != 0)
 		seq_printf(seq, ",busuid=%u", busuid);
 	if (busgid != 0)
 		seq_printf(seq, ",busgid=%u", busgid);
 	if (busmode != USBFS_DEFAULT_BUSMODE)
 		seq_printf(seq, ",busmode=%o", busmode);
 	if (listuid != 0)
 		seq_printf(seq, ",listuid=%u", listuid);
 	if (listgid != 0)
 		seq_printf(seq, ",listgid=%u", listgid);
 	if (listmode != USBFS_DEFAULT_LISTMODE)
 		seq_printf(seq, ",listmode=%o", listmode);
 	return 0;
 }
 enum {
 	Opt_devuid, Opt_devgid, Opt_devmode,
 	Opt_busuid, Opt_busgid, Opt_busmode,
 	Opt_listuid, Opt_listgid, Opt_listmode,
 	Opt_err,
 };
 static const match_table_t tokens = {
 	{Opt_devuid, "devuid=%u"},
 	{Opt_devgid, "devgid=%u"},
 	{Opt_devmode, "devmode=%o"},
 	{Opt_busuid, "busuid=%u"},
 	{Opt_busgid, "busgid=%u"},
 	{Opt_busmode, "busmode=%o"},
 	{Opt_listuid, "listuid=%u"},
 	{Opt_listgid, "listgid=%u"},
 	{Opt_listmode, "listmode=%o"},
 	{Opt_err, NULL}
 };
 static int parse_options(struct super_block *s, char *data)
 {
 	char *p;
 	int option;
 	/* (re)set to defaults. */
 	devuid = 0;
 	busuid = 0;
 	listuid = 0;
 	devgid = 0;
 	busgid = 0;
 	listgid = 0;
 	devmode = USBFS_DEFAULT_DEVMODE;
 	busmode = USBFS_DEFAULT_BUSMODE;
 	listmode = USBFS_DEFAULT_LISTMODE;
 	while ((p = strsep(&data, ",")) != NULL) {
 		substring_t args[MAX_OPT_ARGS];
 		int token;
 		if (!*p)
 			continue;
 		token = match_token(p, tokens, args);
 		switch (token) {
 		case Opt_devuid:
 			if (match_int(&args[0], &option))
 			       return -EINVAL;
 			devuid = option;
 			break;
 		case Opt_devgid:
 			if (match_int(&args[0], &option))
 			       return -EINVAL;
 			devgid = option;
 			break;
 		case Opt_devmode:
 			if (match_octal(&args[0], &option))
 				return -EINVAL;
 			devmode = option & S_IRWXUGO;
 			break;
 		case Opt_busuid:
 			if (match_int(&args[0], &option))
 			       return -EINVAL;
 			busuid = option;
 			break;
 		case Opt_busgid:
 			if (match_int(&args[0], &option))
 			       return -EINVAL;
 			busgid = option;
 			break;
 		case Opt_busmode:
 			if (match_octal(&args[0], &option))
 				return -EINVAL;
 			busmode = option & S_IRWXUGO;
 			break;
 		case Opt_listuid:
 			if (match_int(&args[0], &option))
 			       return -EINVAL;
 			listuid = option;
 			break;
 		case Opt_listgid:
 			if (match_int(&args[0], &option))
 			       return -EINVAL;
 			listgid = option;
 			break;
 		case Opt_listmode:
 			if (match_octal(&args[0], &option))
 				return -EINVAL;
 			listmode = option & S_IRWXUGO;
 			break;
 		default:
 			printk(KERN_ERR "usbfs: unrecognised mount option "
 			       "\"%s\" or missing value\n", p);
 			return -EINVAL;
 		}
 	}
 	return 0;
 }
 static void update_special(struct dentry *special)
 {
 	special->d_inode->i_uid = listuid;
 	special->d_inode->i_gid = listgid;
 	special->d_inode->i_mode = S_IFREG | listmode;
 }
 static void update_dev(struct dentry *dev)
 {
 	dev->d_inode->i_uid = devuid;
 	dev->d_inode->i_gid = devgid;
 	dev->d_inode->i_mode = S_IFREG | devmode;
 }
 static void update_bus(struct dentry *bus)
 {
 	struct dentry *dev = NULL;
 	bus->d_inode->i_uid = busuid;
 	bus->d_inode->i_gid = busgid;
 	bus->d_inode->i_mode = S_IFDIR | busmode;
 	mutex_lock(&bus->d_inode->i_mutex);
 	list_for_each_entry(dev, &bus->d_subdirs, d_u.d_child)
 		if (dev->d_inode)
 			update_dev(dev);
 	mutex_unlock(&bus->d_inode->i_mutex);
 }
 static void update_sb(struct super_block *sb)
 {
 	struct dentry *root = sb->s_root;
 	struct dentry *bus = NULL;
 	if (!root)
 		return;
 	mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT);
 	list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) {
 		if (bus->d_inode) {
 			switch (S_IFMT & bus->d_inode->i_mode) {
 			case S_IFDIR:
 				update_bus(bus);
 				break;
 			case S_IFREG:
 				update_special(bus);
 				break;
 			default:
 				printk(KERN_WARNING "usbfs: Unknown node %s "
 				       "mode %x found on remount!\n",
 				       bus->d_name.name, bus->d_inode->i_mode);
 				break;
 			}
 		}
 	}
 	mutex_unlock(&root->d_inode->i_mutex);
 }
 static int remount(struct super_block *sb, int *flags, char *data)
 {
 	/* If this is not a real mount,
 	 * i.e. it's a simple_pin_fs from create_special_files,
 	 * then ignore it.
 	 */
 	if (ignore_mount)
 		return 0;
 	if (parse_options(sb, data)) {
 		printk(KERN_WARNING "usbfs: mount parameter error.\n");
 		return -EINVAL;
 	}
 	if (usbfs_mount && usbfs_mount->mnt_sb)
 		update_sb(usbfs_mount->mnt_sb);
 	return 0;
 }
 static struct inode *usbfs_get_inode (struct super_block *sb, int mode, dev_t dev)
 {
 	struct inode *inode = new_inode(sb);
 	if (inode) {
 		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_uid = current_fsuid();
 		inode->i_gid = current_fsgid();
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		switch (mode & S_IFMT) {
 		default:
 			init_special_inode(inode, mode, dev);
 			break;
 		case S_IFREG:
 			inode->i_fop = &default_file_operations;
 			break;
 		case S_IFDIR:
 			inode->i_op = &simple_dir_inode_operations;
 			inode->i_fop = &simple_dir_operations;
 			/* directory inodes start off with i_nlink == 2 (for "." entry) */
 			inc_nlink(inode);
 			break;
 		}
 	}
 	return inode;
 }
 /* SMP-safe */
 static int usbfs_mknod (struct inode *dir, struct dentry *dentry, int mode,
 			dev_t dev)
 {
 	struct inode *inode = usbfs_get_inode(dir->i_sb, mode, dev);
 	int error = -EPERM;
 	if (dentry->d_inode)
 		return -EEXIST;
 	if (inode) {
 		d_instantiate(dentry, inode);
 		dget(dentry);
 		error = 0;
 	}
 	return error;
 }
 static int usbfs_mkdir (struct inode *dir, struct dentry *dentry, int mode)
 {
 	int res;
 	mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
 	res = usbfs_mknod (dir, dentry, mode, 0);
 	if (!res)
 		inc_nlink(dir);
 	return res;
 }
 static int usbfs_create (struct inode *dir, struct dentry *dentry, int mode)
 {
 	mode = (mode & S_IALLUGO) | S_IFREG;
 	return usbfs_mknod (dir, dentry, mode, 0);
 }
 static inline int usbfs_positive (struct dentry *dentry)
 {
 	return dentry->d_inode && !d_unhashed(dentry);
 }
 static int usbfs_empty (struct dentry *dentry)
 {
 	struct list_head *list;
 	spin_lock(&dcache_lock);
 	list_for_each(list, &dentry->d_subdirs) {
 		struct dentry *de = list_entry(list, struct dentry, d_u.d_child);
+		spin_lock(&de->d_lock);
 		if (usbfs_positive(de)) {
+			spin_unlock(&de->d_lock);
 			spin_unlock(&dcache_lock);
 			return 0;
 		}
+		spin_unlock(&de->d_lock);
 	}
 	spin_unlock(&dcache_lock);
 	return 1;
 }
 static int usbfs_unlink (struct inode *dir, struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
 	mutex_lock(&inode->i_mutex);
 	drop_nlink(dentry->d_inode);
 	dput(dentry);
 	mutex_unlock(&inode->i_mutex);
 	d_delete(dentry);
 	return 0;
 }
 static int usbfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	int error = -ENOTEMPTY;
 	struct inode * inode = dentry->d_inode;
 	mutex_lock(&inode->i_mutex);
 	dentry_unhash(dentry);
 	if (usbfs_empty(dentry)) {
 		dont_mount(dentry);
 		drop_nlink(dentry->d_inode);
 		drop_nlink(dentry->d_inode);
 		dput(dentry);
 		inode->i_flags |= S_DEAD;
 		drop_nlink(dir);
 		error = 0;
 	}
 	mutex_unlock(&inode->i_mutex);
 	if (!error)
 		d_delete(dentry);
 	dput(dentry);
 	return error;
 }
 /* default file operations */
 static ssize_t default_read_file (struct file *file, char __user *buf,
 				  size_t count, loff_t *ppos)
 {
 	return 0;
 }
 static ssize_t default_write_file (struct file *file, const char __user *buf,
 				   size_t count, loff_t *ppos)
 {
 	return count;
 }
 static loff_t default_file_lseek (struct file *file, loff_t offset, int orig)
 {
 	loff_t retval = -EINVAL;
 	mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
 	switch(orig) {
 	case 0:
 		if (offset > 0) {
 			file->f_pos = offset;
 			retval = file->f_pos;
 		}
 		break;
 	case 1:
 		if ((offset + file->f_pos) > 0) {
 			file->f_pos += offset;
 			retval = file->f_pos;
 		}
 		break;
 	default:
 		break;
 	}
 	mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
 	return retval;
 }
 static int default_open (struct inode *inode, struct file *file)
 {
 	if (inode->i_private)
 		file->private_data = inode->i_private;
 	return 0;
 }
 static const struct file_operations default_file_operations = {
 	.read =		default_read_file,
 	.write =	default_write_file,
 	.open =		default_open,
 	.llseek =	default_file_lseek,
 };
 static const struct super_operations usbfs_ops = {
 	.statfs =	simple_statfs,
 	.drop_inode =	generic_delete_inode,
 	.remount_fs =	remount,
 	.show_options = usbfs_show_options,
 };
 static int usbfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *inode;
 	struct dentry *root;
 	sb->s_blocksize = PAGE_CACHE_SIZE;
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = USBDEVICE_SUPER_MAGIC;
 	sb->s_op = &usbfs_ops;
 	sb->s_time_gran = 1;
 	inode = usbfs_get_inode(sb, S_IFDIR | 0755, 0);
 	if (!inode) {
 		dbg("%s: could not get inode!",__func__);
 		return -ENOMEM;
 	}
 	root = d_alloc_root(inode);
 	if (!root) {
 		dbg("%s: could not get root dentry!",__func__);
 		iput(inode);
 		return -ENOMEM;
 	}
 	sb->s_root = root;
 	return 0;
 }
 /*
  * fs_create_by_name - create a file, given a name
  * @name:	name of file
  * @mode:	type of file
  * @parent:	dentry of directory to create it in
  * @dentry:	resulting dentry of file
  *
  * This function handles both regular files and directories.
  */
 static int fs_create_by_name (const char *name, mode_t mode,
 			      struct dentry *parent, struct dentry **dentry)
 {
 	int error = 0;
 	/* If the parent is not specified, we create it in the root.
 	 * We need the root dentry to do this, which is in the super
 	 * block. A pointer to that is in the struct vfsmount that we
 	 * have around.
 	 */
 	if (!parent ) {
 		if (usbfs_mount && usbfs_mount->mnt_sb) {
 			parent = usbfs_mount->mnt_sb->s_root;
 		}
 	}
 	if (!parent) {
 		dbg("Ah! can not find a parent!");
 		return -EFAULT;
 	}
 	*dentry = NULL;
 	mutex_lock(&parent->d_inode->i_mutex);
 	*dentry = lookup_one_len(name, parent, strlen(name));
 	if (!IS_ERR(*dentry)) {
 		if ((mode & S_IFMT) == S_IFDIR)
 			error = usbfs_mkdir (parent->d_inode, *dentry, mode);
 		else
 			error = usbfs_create (parent->d_inode, *dentry, mode);
 	} else
 		error = PTR_ERR(*dentry);
 	mutex_unlock(&parent->d_inode->i_mutex);
 	return error;
 }
 static struct dentry *fs_create_file (const char *name, mode_t mode,
 				      struct dentry *parent, void *data,
 				      const struct file_operations *fops,
 				      uid_t uid, gid_t gid)
 {
 	struct dentry *dentry;
 	int error;
 	dbg("creating file '%s'",name);
 	error = fs_create_by_name (name, mode, parent, &dentry);
 	if (error) {
 		dentry = NULL;
 	} else {
 		if (dentry->d_inode) {
 			if (data)
 				dentry->d_inode->i_private = data;
 			if (fops)
 				dentry->d_inode->i_fop = fops;
 			dentry->d_inode->i_uid = uid;
 			dentry->d_inode->i_gid = gid;
 		}
 	}
 	return dentry;
 }
 static void fs_remove_file (struct dentry *dentry)
 {
 	struct dentry *parent = dentry->d_parent;
 	if (!parent || !parent->d_inode)
 		return;
 	mutex_lock_nested(&parent->d_inode->i_mutex, I_MUTEX_PARENT);
 	if (usbfs_positive(dentry)) {
 		if (dentry->d_inode) {
 			if (S_ISDIR(dentry->d_inode->i_mode))
 				usbfs_rmdir(parent->d_inode, dentry);
 			else
 				usbfs_unlink(parent->d_inode, dentry);
 		dput(dentry);
 		}
 	}
 	mutex_unlock(&parent->d_inode->i_mutex);
 }
 /* --------------------------------------------------------------------- */
 static struct dentry *usb_mount(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
 {
 	return mount_single(fs_type, flags, data, usbfs_fill_super);
 }
 static struct file_system_type usb_fs_type = {
 	.owner =	THIS_MODULE,
 	.name =		"usbfs",
 	.mount =	usb_mount,
 	.kill_sb =	kill_litter_super,
 };
 /* --------------------------------------------------------------------- */
 static int create_special_files (void)
 {
 	struct dentry *parent;
 	int retval;
 	/* the simple_pin_fs calls will call remount with no options
 	 * without this flag that would overwrite the real mount options (if any)
 	 */
 	ignore_mount = 1;
 	/* create the devices special file */
 	retval = simple_pin_fs(&usb_fs_type, &usbfs_mount, &usbfs_mount_count);
 	if (retval) {
 		printk(KERN_ERR "Unable to get usbfs mount\n");
 		goto exit;
 	}
 	ignore_mount = 0;
 	parent = usbfs_mount->mnt_sb->s_root;
 	devices_usbfs_dentry = fs_create_file ("devices",
 					       listmode | S_IFREG, parent,
 					       NULL, &usbfs_devices_fops,
 					       listuid, listgid);
 	if (devices_usbfs_dentry == NULL) {
 		printk(KERN_ERR "Unable to create devices usbfs file\n");
 		retval = -ENODEV;
 		goto error_clean_mounts;
 	}
 	goto exit;
 error_clean_mounts:
 	simple_release_fs(&usbfs_mount, &usbfs_mount_count);
 exit:
 	return retval;
 }
 static void remove_special_files (void)
 {
 	if (devices_usbfs_dentry)
 		fs_remove_file (devices_usbfs_dentry);
 	devices_usbfs_dentry = NULL;
 	simple_release_fs(&usbfs_mount, &usbfs_mount_count);
 }
 void usbfs_update_special (void)
 {
 	struct inode *inode;
 	if (devices_usbfs_dentry) {
 		inode = devices_usbfs_dentry->d_inode;
 		if (inode)
 			inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	}
 }
 static void usbfs_add_bus(struct usb_bus *bus)
 {
 	struct dentry *parent;
 	char name[8];
 	int retval;
 	/* create the special files if this is the first bus added */
 	if (num_buses == 0) {
 		retval = create_special_files();
 		if (retval)
 			return;
 	}
 	++num_buses;
 	sprintf (name, "%03d", bus->busnum);
 	parent = usbfs_mount->mnt_sb->s_root;
 	bus->usbfs_dentry = fs_create_file (name, busmode | S_IFDIR, parent,
 					    bus, NULL, busuid, busgid);
 	if (bus->usbfs_dentry == NULL) {
 		printk(KERN_ERR "Error creating usbfs bus entry\n");
 		return;
 	}
 }
 static void usbfs_remove_bus(struct usb_bus *bus)
 {
 	if (bus->usbfs_dentry) {
 		fs_remove_file (bus->usbfs_dentry);
 		bus->usbfs_dentry = NULL;
 	}
 	--num_buses;
 	if (num_buses <= 0) {
 		remove_special_files();
 		num_buses = 0;
 	}
 }
 static void usbfs_add_device(struct usb_device *dev)
 {
 	char name[8];
 	int i;
 	int i_size;
 	sprintf (name, "%03d", dev->devnum);
 	dev->usbfs_dentry = fs_create_file (name, devmode | S_IFREG,
 					    dev->bus->usbfs_dentry, dev,
 					    &usbdev_file_operations,
 					    devuid, devgid);
 	if (dev->usbfs_dentry == NULL) {
 		printk(KERN_ERR "Error creating usbfs device entry\n");
 		return;
 	}
 	/* Set the size of the device's file to be
 	 * equal to the size of the device descriptors. */
 	i_size = sizeof (struct usb_device_descriptor);
 	for (i = 0; i < dev->descriptor.bNumConfigurations; ++i) {
 		struct usb_config_descriptor *config =
 			(struct usb_config_descriptor *)dev->rawdescriptors[i];
 		i_size += le16_to_cpu(config->wTotalLength);
 	}
 	if (dev->usbfs_dentry->d_inode)
 		dev->usbfs_dentry->d_inode->i_size = i_size;
 }
 static void usbfs_remove_device(struct usb_device *dev)
 {
 	if (dev->usbfs_dentry) {
 		fs_remove_file (dev->usbfs_dentry);
 		dev->usbfs_dentry = NULL;
 	}
 }
 static int usbfs_notify(struct notifier_block *self, unsigned long action, void *dev)
 {
 	switch (action) {
 	case USB_DEVICE_ADD:
 		usbfs_add_device(dev);
 		break;
 	case USB_DEVICE_REMOVE:
 		usbfs_remove_device(dev);
 		break;
 	case USB_BUS_ADD:
 		usbfs_add_bus(dev);
 		break;
 	case USB_BUS_REMOVE:
 		usbfs_remove_bus(dev);
 	}
 	usbfs_update_special();
 	usbfs_conn_disc_event();
 	return NOTIFY_OK;
 }
 static struct notifier_block usbfs_nb = {
 	.notifier_call = 	usbfs_notify,
 };
 /* --------------------------------------------------------------------- */
 static struct proc_dir_entry *usbdir = NULL;
 int __init usbfs_init(void)
 {
 	int retval;
 	retval = register_filesystem(&usb_fs_type);
 	if (retval)
 		return retval;
 	usb_register_notify(&usbfs_nb);
 	/* create mount point for usbfs */
 	usbdir = proc_mkdir("bus/usb", NULL);
 	return 0;
 }
 void usbfs_cleanup(void)
 {
 	usb_unregister_notify(&usbfs_nb);
 	unregister_filesystem(&usb_fs_type);
 	if (usbdir)
 		remove_proc_entry("bus/usb", NULL);
 }

fs/autofs4/autofs_i.h

Diff comments View file @ da50295

 /* -*- c -*- ------------------------------------------------------------- *
  *
  * linux/fs/autofs/autofs_i.h
  *
  *   Copyright 1997-1998 Transmeta Corporation - All Rights Reserved
  *   Copyright 2005-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
  * option, any later version, incorporated herein by reference.
  *
  * ----------------------------------------------------------------------- */
 /* Internal header file for autofs */
 #include <linux/auto_fs4.h>
 #include <linux/auto_dev-ioctl.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
 /* This is the range of ioctl() numbers we claim as ours */
 #define AUTOFS_IOC_FIRST     AUTOFS_IOC_READY
 #define AUTOFS_IOC_COUNT     32
 #define AUTOFS_DEV_IOCTL_IOC_FIRST	(AUTOFS_DEV_IOCTL_VERSION)
 #define AUTOFS_DEV_IOCTL_IOC_COUNT	(AUTOFS_IOC_COUNT - 11)
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/string.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <asm/current.h>
 #include <asm/uaccess.h>
 /* #define DEBUG */
 #ifdef DEBUG
 #define DPRINTK(fmt, args...)				\
 do {							\
 	printk(KERN_DEBUG "pid %d: %s: " fmt "\n",	\
 		current->pid, __func__, ##args);	\
 } while (0)
 #else
 #define DPRINTK(fmt, args...) do {} while (0)
 #endif
 #define AUTOFS_WARN(fmt, args...)			\
 do {							\
 	printk(KERN_WARNING "pid %d: %s: " fmt "\n",	\
 		current->pid, __func__, ##args);	\
 } while (0)
 #define AUTOFS_ERROR(fmt, args...)			\
 do {							\
 	printk(KERN_ERR "pid %d: %s: " fmt "\n",	\
 		current->pid, __func__, ##args);	\
 } while (0)
 /* Unified info structure.  This is pointed to by both the dentry and
    inode structures.  Each file in the filesystem has an instance of this
    structure.  It holds a reference to the dentry, so dentries are never
    flushed while the file exists.  All name lookups are dealt with at the
    dentry level, although the filesystem can interfere in the validation
    process.  Readdir is implemented by traversing the dentry lists. */
 struct autofs_info {
 	struct dentry	*dentry;
 	struct inode	*inode;
 	int		flags;
 	struct completion expire_complete;
 	struct list_head active;
 	int active_count;
 	struct list_head expiring;
 	struct autofs_sb_info *sbi;
 	unsigned long last_used;
 	atomic_t count;
 	uid_t uid;
 	gid_t gid;
 	mode_t	mode;
 	size_t	size;
 	void (*free)(struct autofs_info *);
 	union {
 		const char *symlink;
 	} u;
 };
 #define AUTOFS_INF_EXPIRING	(1<<0) /* dentry is in the process of expiring */
 #define AUTOFS_INF_MOUNTPOINT	(1<<1) /* mountpoint status for direct expire */
 #define AUTOFS_INF_PENDING	(1<<2) /* dentry pending mount */
 struct autofs_wait_queue {
 	wait_queue_head_t queue;
 	struct autofs_wait_queue *next;
 	autofs_wqt_t wait_queue_token;
 	/* We use the following to see what we are waiting for */
 	struct qstr name;
 	u32 dev;
 	u64 ino;
 	uid_t uid;
 	gid_t gid;
 	pid_t pid;
 	pid_t tgid;
 	/* This is for status reporting upon return */
 	int status;
 	unsigned int wait_ctr;
 };
 #define AUTOFS_SBI_MAGIC 0x6d4a556d
 struct autofs_sb_info {
 	u32 magic;
 	int pipefd;
 	struct file *pipe;
 	pid_t oz_pgrp;
 	int catatonic;
 	int version;
 	int sub_version;
 	int min_proto;
 	int max_proto;
 	unsigned long exp_timeout;
 	unsigned int type;
 	int reghost_enabled;
 	int needs_reghost;
 	struct super_block *sb;
 	struct mutex wq_mutex;
 	spinlock_t fs_lock;
 	struct autofs_wait_queue *queues; /* Wait queue pointer */
 	spinlock_t lookup_lock;
 	struct list_head active_list;
 	struct list_head expiring_list;
 };
 static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
 {
 	return (struct autofs_sb_info *)(sb->s_fs_info);
 }
 static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry)
 {
 	return (struct autofs_info *)(dentry->d_fsdata);
 }
 /* autofs4_oz_mode(): do we see the man behind the curtain?  (The
    processes which do manipulations for us in user space sees the raw
    filesystem without "magic".) */
 static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
 	return sbi->catatonic || task_pgrp_nr(current) == sbi->oz_pgrp;
 }
 /* Does a dentry have some pending activity? */
 static inline int autofs4_ispending(struct dentry *dentry)
 {
 	struct autofs_info *inf = autofs4_dentry_ino(dentry);
 	if (inf->flags & AUTOFS_INF_PENDING)
 		return 1;
 	if (inf->flags & AUTOFS_INF_EXPIRING)
 		return 1;
 	return 0;
 }
 static inline void autofs4_copy_atime(struct file *src, struct file *dst)
 {
 	dst->f_path.dentry->d_inode->i_atime =
 		src->f_path.dentry->d_inode->i_atime;
 	return;
 }
 struct inode *autofs4_get_inode(struct super_block *, struct autofs_info *);
 void autofs4_free_ino(struct autofs_info *);
 /* Expiration */
 int is_autofs4_dentry(struct dentry *);
 int autofs4_expire_wait(struct dentry *dentry);
 int autofs4_expire_run(struct super_block *, struct vfsmount *,
 			struct autofs_sb_info *,
 			struct autofs_packet_expire __user *);
 int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 			    struct autofs_sb_info *sbi, int when);
 int autofs4_expire_multi(struct super_block *, struct vfsmount *,
 			struct autofs_sb_info *, int __user *);
 struct dentry *autofs4_expire_direct(struct super_block *sb,
 				     struct vfsmount *mnt,
 				     struct autofs_sb_info *sbi, int how);
 struct dentry *autofs4_expire_indirect(struct super_block *sb,
 				       struct vfsmount *mnt,
 				       struct autofs_sb_info *sbi, int how);
 /* Device node initialization */
 int autofs_dev_ioctl_init(void);
 void autofs_dev_ioctl_exit(void);
 /* Operations structures */
 extern const struct inode_operations autofs4_symlink_inode_operations;
 extern const struct inode_operations autofs4_dir_inode_operations;
 extern const struct inode_operations autofs4_root_inode_operations;
 extern const struct inode_operations autofs4_indirect_root_inode_operations;
 extern const struct inode_operations autofs4_direct_root_inode_operations;
 extern const struct file_operations autofs4_dir_operations;
 extern const struct file_operations autofs4_root_operations;
 /* Initializing function */
 int autofs4_fill_super(struct super_block *, void *, int);
 struct autofs_info *autofs4_init_ino(struct autofs_info *, struct autofs_sb_info *sbi, mode_t mode);
 /* Queue management functions */
 int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
 int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
 void autofs4_catatonic_mode(struct autofs_sb_info *);
 static inline int autofs4_follow_mount(struct path *path)
 {
 	int res = 0;
 	while (d_mountpoint(path->dentry)) {
 		int followed = follow_down(path);
 		if (!followed)
 			break;
 		res = 1;
 	}
 	return res;
 }
 static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi)
 {
 	return new_encode_dev(sbi->sb->s_dev);
 }
 static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi)
 {
 	return sbi->sb->s_root->d_inode->i_ino;
 }
 static inline int simple_positive(struct dentry *dentry)
 {
 	return dentry->d_inode && !d_unhashed(dentry);
 }
-static inline int __simple_empty(struct dentry *dentry)
-{
-	struct dentry *child;
-	int ret = 0;
-	list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
-		if (simple_positive(child))
-			goto out;
-	ret = 1;
-out:
-	return ret;
-}
 static inline void autofs4_add_expiring(struct dentry *dentry)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	if (ino) {
 		spin_lock(&sbi->lookup_lock);
 		if (list_empty(&ino->expiring))
 			list_add(&ino->expiring, &sbi->expiring_list);
 		spin_unlock(&sbi->lookup_lock);
 	}
 	return;
 }
 static inline void autofs4_del_expiring(struct dentry *dentry)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	if (ino) {
 		spin_lock(&sbi->lookup_lock);
 		if (!list_empty(&ino->expiring))
 			list_del_init(&ino->expiring);
 		spin_unlock(&sbi->lookup_lock);
 	}
 	return;
 }
 void autofs4_dentry_release(struct dentry *);
 extern void autofs4_kill_sb(struct super_block *);

fs/autofs4/expire.c

Diff comments View file @ da50295

 /* -*- c -*- --------------------------------------------------------------- *
  *
  * linux/fs/autofs/expire.c
  *
  *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
  *  Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
  *  Copyright 2001-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
  * option, any later version, incorporated herein by reference.
  *
  * ------------------------------------------------------------------------- */
 #include "autofs_i.h"
 static unsigned long now;
 /* Check if a dentry can be expired */
 static inline int autofs4_can_expire(struct dentry *dentry,
 					unsigned long timeout, int do_now)
 {
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	/* dentry in the process of being deleted */
 	if (ino == NULL)
 		return 0;
 	/* No point expiring a pending mount */
 	if (ino->flags & AUTOFS_INF_PENDING)
 		return 0;
 	if (!do_now) {
 		/* Too young to die */
 		if (!timeout || time_after(ino->last_used + timeout, now))
 			return 0;
 		/* update last_used here :-
 		   - obviously makes sense if it is in use now
 		   - less obviously, prevents rapid-fire expire
 		     attempts if expire fails the first time */
 		ino->last_used = now;
 	}
 	return 1;
 }
 /* Check a mount point for busyness */
 static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 {
 	struct dentry *top = dentry;
 	struct path path = {.mnt = mnt, .dentry = dentry};
 	int status = 1;
 	DPRINTK("dentry %p %.*s",
 		dentry, (int)dentry->d_name.len, dentry->d_name.name);
 	path_get(&path);
 	if (!follow_down(&path))
 		goto done;
 	if (is_autofs4_dentry(path.dentry)) {
 		struct autofs_sb_info *sbi = autofs4_sbi(path.dentry->d_sb);
 		/* This is an autofs submount, we can't expire it */
 		if (autofs_type_indirect(sbi->type))
 			goto done;
 		/*
 		 * Otherwise it's an offset mount and we need to check
 		 * if we can umount its mount, if there is one.
 		 */
 		if (!d_mountpoint(path.dentry)) {
 			status = 0;
 			goto done;
 		}
 	}
 	/* Update the expiry counter if fs is busy */
 	if (!may_umount_tree(path.mnt)) {
 		struct autofs_info *ino = autofs4_dentry_ino(top);
 		ino->last_used = jiffies;
 		goto done;
 	}
 	status = 0;
 done:
 	DPRINTK("returning = %d", status);
 	path_put(&path);
 	return status;
 }
 /*
  * Calculate next entry in top down tree traversal.
  * From next_mnt in namespace.c - elegant.
  */
 static struct dentry *next_dentry(struct dentry *p, struct dentry *root)
 {
 	struct list_head *next = p->d_subdirs.next;
 	if (next == &p->d_subdirs) {
 		while (1) {
 			if (p == root)
 				return NULL;
 			next = p->d_u.d_child.next;
 			if (next != &p->d_parent->d_subdirs)
 				break;
 			p = p->d_parent;
 		}
 	}
 	return list_entry(next, struct dentry, d_u.d_child);
 }
 /*
  * Check a direct mount point for busyness.
  * Direct mounts have similar expiry semantics to tree mounts.
  * The tree is not busy iff no mountpoints are busy and there are no
  * autofs submounts.
  */
 static int autofs4_direct_busy(struct vfsmount *mnt,
 				struct dentry *top,
 				unsigned long timeout,
 				int do_now)
 {
 	DPRINTK("top %p %.*s",
 		top, (int) top->d_name.len, top->d_name.name);
 	/* If it's busy update the expiry counters */
 	if (!may_umount_tree(mnt)) {
 		struct autofs_info *ino = autofs4_dentry_ino(top);
 		if (ino)
 			ino->last_used = jiffies;
 		return 1;
 	}
 	/* Timeout of a direct mount is determined by its top dentry */
 	if (!autofs4_can_expire(top, timeout, do_now))
 		return 1;
 	return 0;
 }
 /* Check a directory tree of mount points for busyness
  * The tree is not busy iff no mountpoints are busy
  */
 static int autofs4_tree_busy(struct vfsmount *mnt,
 	       		     struct dentry *top,
 			     unsigned long timeout,
 			     int do_now)
 {
 	struct autofs_info *top_ino = autofs4_dentry_ino(top);
 	struct dentry *p;
 	DPRINTK("top %p %.*s",
 		top, (int)top->d_name.len, top->d_name.name);
 	/* Negative dentry - give up */
 	if (!simple_positive(top))
 		return 1;
 	spin_lock(&dcache_lock);
 	for (p = top; p; p = next_dentry(p, top)) {
+		spin_lock(&p->d_lock);
 		/* Negative dentry - give up */
-		if (!simple_positive(p))
+		if (!simple_positive(p)) {
+			spin_unlock(&p->d_lock);
 			continue;
+		}
 		DPRINTK("dentry %p %.*s",
 			p, (int) p->d_name.len, p->d_name.name);
-		p = dget(p);
+		p = dget_dlock(p);
+		spin_unlock(&p->d_lock);
 		spin_unlock(&dcache_lock);
 		/*
 		 * Is someone visiting anywhere in the subtree ?
 		 * If there's no mount we need to check the usage
 		 * count for the autofs dentry.
 		 * If the fs is busy update the expiry counter.
 		 */
 		if (d_mountpoint(p)) {
 			if (autofs4_mount_busy(mnt, p)) {
 				top_ino->last_used = jiffies;
 				dput(p);
 				return 1;
 			}
 		} else {
 			struct autofs_info *ino = autofs4_dentry_ino(p);
 			unsigned int ino_count = atomic_read(&ino->count);
 			/*
 			 * Clean stale dentries below that have not been
 			 * invalidated after a mount fail during lookup
 			 */
 			d_invalidate(p);
 			/* allow for dget above and top is already dgot */
 			if (p == top)
 				ino_count += 2;
 			else
 				ino_count++;
 			if (p->d_count > ino_count) {
 				top_ino->last_used = jiffies;
 				dput(p);
 				return 1;
 			}
 		}
 		dput(p);
 		spin_lock(&dcache_lock);
 	}
 	spin_unlock(&dcache_lock);
 	/* Timeout of a tree mount is ultimately determined by its top dentry */
 	if (!autofs4_can_expire(top, timeout, do_now))
 		return 1;
 	return 0;
 }
 static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
 					   struct dentry *parent,
 					   unsigned long timeout,
 					   int do_now)
 {
 	struct dentry *p;
 	DPRINTK("parent %p %.*s",
 		parent, (int)parent->d_name.len, parent->d_name.name);
 	spin_lock(&dcache_lock);
 	for (p = parent; p; p = next_dentry(p, parent)) {
+		spin_lock(&p->d_lock);
 		/* Negative dentry - give up */
-		if (!simple_positive(p))
+		if (!simple_positive(p)) {
+			spin_unlock(&p->d_lock);
 			continue;
+		}
 		DPRINTK("dentry %p %.*s",
 			p, (int) p->d_name.len, p->d_name.name);
-		p = dget(p);
+		p = dget_dlock(p);
+		spin_unlock(&p->d_lock);
 		spin_unlock(&dcache_lock);
 		if (d_mountpoint(p)) {
 			/* Can we umount this guy */
 			if (autofs4_mount_busy(mnt, p))
 				goto cont;
 			/* Can we expire this guy */
 			if (autofs4_can_expire(p, timeout, do_now))
 				return p;
 		}
 cont:
 		dput(p);
 		spin_lock(&dcache_lock);
 	}
 	spin_unlock(&dcache_lock);
 	return NULL;
 }
 /* Check if we can expire a direct mount (possibly a tree) */
 struct dentry *autofs4_expire_direct(struct super_block *sb,
 				     struct vfsmount *mnt,
 				     struct autofs_sb_info *sbi,
 				     int how)
 {
 	unsigned long timeout;
 	struct dentry *root = dget(sb->s_root);
 	int do_now = how & AUTOFS_EXP_IMMEDIATE;
 	if (!root)
 		return NULL;
 	now = jiffies;
 	timeout = sbi->exp_timeout;
 	spin_lock(&sbi->fs_lock);
 	if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
 		struct autofs_info *ino = autofs4_dentry_ino(root);
 		if (d_mountpoint(root)) {
 			ino->flags |= AUTOFS_INF_MOUNTPOINT;
 			root->d_mounted--;
 		}
 		ino->flags |= AUTOFS_INF_EXPIRING;
 		init_completion(&ino->expire_complete);
 		spin_unlock(&sbi->fs_lock);
 		return root;
 	}
 	spin_unlock(&sbi->fs_lock);
 	dput(root);
 	return NULL;
 }
 /*
  * Find an eligible tree to time-out
  * A tree is eligible if :-
  *  - it is unused by any user process
  *  - it has been unused for exp_timeout time
  */
 struct dentry *autofs4_expire_indirect(struct super_block *sb,
 				       struct vfsmount *mnt,
 				       struct autofs_sb_info *sbi,
 				       int how)
 {
 	unsigned long timeout;
 	struct dentry *root = sb->s_root;
 	struct dentry *expired = NULL;
 	struct list_head *next;
 	int do_now = how & AUTOFS_EXP_IMMEDIATE;
 	int exp_leaves = how & AUTOFS_EXP_LEAVES;
 	struct autofs_info *ino;
 	unsigned int ino_count;
 	if (!root)
 		return NULL;
 	now = jiffies;
 	timeout = sbi->exp_timeout;
 	spin_lock(&dcache_lock);
 	next = root->d_subdirs.next;
 	/* On exit from the loop expire is set to a dgot dentry
 	 * to expire or it's NULL */
 	while ( next != &root->d_subdirs ) {
 		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 		/* Negative dentry - give up */
+		spin_lock(&dentry->d_lock);
 		if (!simple_positive(dentry)) {
 			next = next->next;
+			spin_unlock(&dentry->d_lock);
 			continue;
 		}
-		dentry = dget(dentry);
+		dentry = dget_dlock(dentry);
+		spin_unlock(&dentry->d_lock);
 		spin_unlock(&dcache_lock);
 		spin_lock(&sbi->fs_lock);
 		ino = autofs4_dentry_ino(dentry);
 		/*
 		 * Case 1: (i) indirect mount or top level pseudo direct mount
 		 *	   (autofs-4.1).
 		 *	   (ii) indirect mount with offset mount, check the "/"
 		 *	   offset (autofs-5.0+).
 		 */
 		if (d_mountpoint(dentry)) {
 			DPRINTK("checking mountpoint %p %.*s",
 				dentry, (int)dentry->d_name.len, dentry->d_name.name);
 			/* Path walk currently on this dentry? */
 			ino_count = atomic_read(&ino->count) + 2;
 			if (dentry->d_count > ino_count)
 				goto next;
 			/* Can we umount this guy */
 			if (autofs4_mount_busy(mnt, dentry))
 				goto next;
 			/* Can we expire this guy */
 			if (autofs4_can_expire(dentry, timeout, do_now)) {
 				expired = dentry;
 				goto found;
 			}
 			goto next;
 		}
 		if (simple_empty(dentry))
 			goto next;
 		/* Case 2: tree mount, expire iff entire tree is not busy */
 		if (!exp_leaves) {
 			/* Path walk currently on this dentry? */
 			ino_count = atomic_read(&ino->count) + 1;
 			if (dentry->d_count > ino_count)
 				goto next;
 			if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
 				expired = dentry;
 				goto found;
 			}
 		/*
 		 * Case 3: pseudo direct mount, expire individual leaves
 		 *	   (autofs-4.1).
 		 */
 		} else {
 			/* Path walk currently on this dentry? */
 			ino_count = atomic_read(&ino->count) + 1;
 			if (dentry->d_count > ino_count)
 				goto next;
 			expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
 			if (expired) {
 				dput(dentry);
 				goto found;
 			}
 		}
 next:
 		spin_unlock(&sbi->fs_lock);
 		dput(dentry);
 		spin_lock(&dcache_lock);
 		next = next->next;
 	}
 	spin_unlock(&dcache_lock);
 	return NULL;
 found:
 	DPRINTK("returning %p %.*s",
 		expired, (int)expired->d_name.len, expired->d_name.name);
 	ino = autofs4_dentry_ino(expired);
 	ino->flags |= AUTOFS_INF_EXPIRING;
 	init_completion(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
 	spin_lock(&dcache_lock);
 	list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
 	spin_unlock(&dcache_lock);
 	return expired;
 }
 int autofs4_expire_wait(struct dentry *dentry)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	int status;
 	/* Block on any pending expire */
 	spin_lock(&sbi->fs_lock);
 	if (ino->flags & AUTOFS_INF_EXPIRING) {
 		spin_unlock(&sbi->fs_lock);
 		DPRINTK("waiting for expire %p name=%.*s",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 		status = autofs4_wait(sbi, dentry, NFY_NONE);
 		wait_for_completion(&ino->expire_complete);
 		DPRINTK("expire done status=%d", status);
 		if (d_unhashed(dentry))
 			return -EAGAIN;
 		return status;
 	}
 	spin_unlock(&sbi->fs_lock);
 	return 0;
 }
 /* Perform an expiry operation */
 int autofs4_expire_run(struct super_block *sb,
 		      struct vfsmount *mnt,
 		      struct autofs_sb_info *sbi,
 		      struct autofs_packet_expire __user *pkt_p)
 {
 	struct autofs_packet_expire pkt;
 	struct autofs_info *ino;
 	struct dentry *dentry;
 	int ret = 0;
 	memset(&pkt,0,sizeof pkt);
 	pkt.hdr.proto_version = sbi->version;
 	pkt.hdr.type = autofs_ptype_expire;
 	if ((dentry = autofs4_expire_indirect(sb, mnt, sbi, 0)) == NULL)
 		return -EAGAIN;
 	pkt.len = dentry->d_name.len;
 	memcpy(pkt.name, dentry->d_name.name, pkt.len);
 	pkt.name[pkt.len] = '\0';
 	dput(dentry);
 	if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) )
 		ret = -EFAULT;
 	spin_lock(&sbi->fs_lock);
 	ino = autofs4_dentry_ino(dentry);
 	ino->flags &= ~AUTOFS_INF_EXPIRING;
 	complete_all(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
 	return ret;
 }
 int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 			    struct autofs_sb_info *sbi, int when)
 {
 	struct dentry *dentry;
 	int ret = -EAGAIN;
 	if (autofs_type_trigger(sbi->type))
 		dentry = autofs4_expire_direct(sb, mnt, sbi, when);
 	else
 		dentry = autofs4_expire_indirect(sb, mnt, sbi, when);
 	if (dentry) {
 		struct autofs_info *ino = autofs4_dentry_ino(dentry);
 		/* This is synchronous because it makes the daemon a
                    little easier */
 		ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
 		spin_lock(&sbi->fs_lock);
 		if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
 			sb->s_root->d_mounted++;
 			ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
 		}
 		ino->flags &= ~AUTOFS_INF_EXPIRING;
 		complete_all(&ino->expire_complete);
 		spin_unlock(&sbi->fs_lock);
 		dput(dentry);
 	}
 	return ret;
 }
 /* Call repeatedly until it returns -EAGAIN, meaning there's nothing
    more to be done */
 int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 			struct autofs_sb_info *sbi, int __user *arg)
 {
 	int do_now = 0;
 	if (arg && get_user(do_now, arg))
 		return -EFAULT;
 	return autofs4_do_expire_multi(sb, mnt, sbi, do_now);
 }

fs/ceph/dir.c

Diff comments View file @ da50295

 #include <linux/ceph/ceph_debug.h>
 #include <linux/spinlock.h>
 #include <linux/fs_struct.h>
 #include <linux/namei.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include "super.h"
 #include "mds_client.h"
 /*
  * Directory operations: readdir, lookup, create, link, unlink,
  * rename, etc.
  */
 /*
  * Ceph MDS operations are specified in terms of a base ino and
  * relative path.  Thus, the client can specify an operation on a
  * specific inode (e.g., a getattr due to fstat(2)), or as a path
  * relative to, say, the root directory.
  *
  * Normally, we limit ourselves to strict inode ops (no path component)
  * or dentry operations (a single path component relative to an ino).  The
  * exception to this is open_root_dentry(), which will open the mount
  * point by name.
  */
 const struct inode_operations ceph_dir_iops;
 const struct file_operations ceph_dir_fops;
 const struct dentry_operations ceph_dentry_ops;
 /*
  * Initialize ceph dentry state.
  */
 int ceph_init_dentry(struct dentry *dentry)
 {
 	struct ceph_dentry_info *di;
 	if (dentry->d_fsdata)
 		return 0;
 	if (dentry->d_parent == NULL ||   /* nfs fh_to_dentry */
 	    ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
 		dentry->d_op = &ceph_dentry_ops;
 	else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
 		dentry->d_op = &ceph_snapdir_dentry_ops;
 	else
 		dentry->d_op = &ceph_snap_dentry_ops;
 	di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
 	if (!di)
 		return -ENOMEM;          /* oh well */
 	spin_lock(&dentry->d_lock);
 	if (dentry->d_fsdata) {
 		/* lost a race */
 		kmem_cache_free(ceph_dentry_cachep, di);
 		goto out_unlock;
 	}
 	di->dentry = dentry;
 	di->lease_session = NULL;
 	dentry->d_fsdata = di;
 	dentry->d_time = jiffies;
 	ceph_dentry_lru_add(dentry);
 out_unlock:
 	spin_unlock(&dentry->d_lock);
 	return 0;
 }
 /*
  * for readdir, we encode the directory frag and offset within that
  * frag into f_pos.
  */
 static unsigned fpos_frag(loff_t p)
 {
 	return p >> 32;
 }
 static unsigned fpos_off(loff_t p)
 {
 	return p & 0xffffffff;
 }
 /*
  * When possible, we try to satisfy a readdir by peeking at the
  * dcache.  We make this work by carefully ordering dentries on
  * d_u.d_child when we initially get results back from the MDS, and
  * falling back to a "normal" sync readdir if any dentries in the dir
  * are dropped.
  *
  * I_COMPLETE tells indicates we have all dentries in the dir.  It is
  * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
  * the MDS if/when the directory is modified).
  */
 static int __dcache_readdir(struct file *filp,
 			    void *dirent, filldir_t filldir)
 {
 	struct ceph_file_info *fi = filp->private_data;
 	struct dentry *parent = filp->f_dentry;
 	struct inode *dir = parent->d_inode;
 	struct list_head *p;
 	struct dentry *dentry, *last;
 	struct ceph_dentry_info *di;
 	int err = 0;
 	/* claim ref on last dentry we returned */
 	last = fi->dentry;
 	fi->dentry = NULL;
 	dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos,
 	     last);
 	spin_lock(&dcache_lock);
 	/* start at beginning? */
 	if (filp->f_pos == 2 || last == NULL ||
 	    filp->f_pos < ceph_dentry(last)->offset) {
 		if (list_empty(&parent->d_subdirs))
 			goto out_unlock;
 		p = parent->d_subdirs.prev;
 		dout(" initial p %p/%p\n", p->prev, p->next);
 	} else {
 		p = last->d_u.d_child.prev;
 	}
 more:
 	dentry = list_entry(p, struct dentry, d_u.d_child);
 	di = ceph_dentry(dentry);
 	while (1) {
 		dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
 		     d_unhashed(dentry) ? "!hashed" : "hashed",
 		     parent->d_subdirs.prev, parent->d_subdirs.next);
 		if (p == &parent->d_subdirs) {
 			fi->at_end = 1;
 			goto out_unlock;
 		}
+		spin_lock(&dentry->d_lock);
 		if (!d_unhashed(dentry) && dentry->d_inode &&
 		    ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
 		    ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
 		    filp->f_pos <= di->offset)
 			break;
 		dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry,
 		     dentry->d_name.len, dentry->d_name.name, di->offset,
 		     filp->f_pos, d_unhashed(dentry) ? " unhashed" : "",
 		     !dentry->d_inode ? " null" : "");
+		spin_unlock(&dentry->d_lock);
 		p = p->prev;
 		dentry = list_entry(p, struct dentry, d_u.d_child);
 		di = ceph_dentry(dentry);
 	}
-	spin_lock(&dentry->d_lock);
+	dget_dlock(dentry);
-	dentry->d_count++;
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 	dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos,
 	     dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
 	filp->f_pos = di->offset;
 	err = filldir(dirent, dentry->d_name.name,
 		      dentry->d_name.len, di->offset,
 		      dentry->d_inode->i_ino,
 		      dentry->d_inode->i_mode >> 12);
 	if (last) {
 		if (err < 0) {
 			/* remember our position */
 			fi->dentry = last;
 			fi->next_offset = di->offset;
 		} else {
 			dput(last);
 		}
 	}
 	last = dentry;
 	if (err < 0)
 		goto out;
 	filp->f_pos++;
 	/* make sure a dentry wasn't dropped while we didn't have dcache_lock */
 	if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
 		dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
 		err = -EAGAIN;
 		goto out;
 	}
 	spin_lock(&dcache_lock);
 	p = p->prev;	/* advance to next dentry */
 	goto more;
 out_unlock:
 	spin_unlock(&dcache_lock);
 out:
 	if (last)
 		dput(last);
 	return err;
 }
 /*
  * make note of the last dentry we read, so we can
  * continue at the same lexicographical point,
  * regardless of what dir changes take place on the
  * server.
  */
 static int note_last_dentry(struct ceph_file_info *fi, const char *name,
 			    int len)
 {
 	kfree(fi->last_name);
 	fi->last_name = kmalloc(len+1, GFP_NOFS);
 	if (!fi->last_name)
 		return -ENOMEM;
 	memcpy(fi->last_name, name, len);
 	fi->last_name[len] = 0;
 	dout("note_last_dentry '%s'\n", fi->last_name);
 	return 0;
 }
 static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	struct ceph_file_info *fi = filp->private_data;
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	unsigned frag = fpos_frag(filp->f_pos);
 	int off = fpos_off(filp->f_pos);
 	int err;
 	u32 ftype;
 	struct ceph_mds_reply_info_parsed *rinfo;
 	const int max_entries = fsc->mount_options->max_readdir;
 	const int max_bytes = fsc->mount_options->max_readdir_bytes;
 	dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
 	if (fi->at_end)
 		return 0;
 	/* always start with . and .. */
 	if (filp->f_pos == 0) {
 		/* note dir version at start of readdir so we can tell
 		 * if any dentries get dropped */
 		fi->dir_release_count = ci->i_release_count;
 		dout("readdir off 0 -> '.'\n");
 		if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0),
 			    inode->i_ino, inode->i_mode >> 12) < 0)
 			return 0;
 		filp->f_pos = 1;
 		off = 1;
 	}
 	if (filp->f_pos == 1) {
 		dout("readdir off 1 -> '..'\n");
 		if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1),
 			    filp->f_dentry->d_parent->d_inode->i_ino,
 			    inode->i_mode >> 12) < 0)
 			return 0;
 		filp->f_pos = 2;
 		off = 2;
 	}
 	/* can we use the dcache? */
 	spin_lock(&inode->i_lock);
 	if ((filp->f_pos == 2 || fi->dentry) &&
 	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
 	    ceph_snap(inode) != CEPH_SNAPDIR &&
 	    (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
 	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
 		spin_unlock(&inode->i_lock);
 		err = __dcache_readdir(filp, dirent, filldir);
 		if (err != -EAGAIN)
 			return err;
 	} else {
 		spin_unlock(&inode->i_lock);
 	}
 	if (fi->dentry) {
 		err = note_last_dentry(fi, fi->dentry->d_name.name,
 				       fi->dentry->d_name.len);
 		if (err)
 			return err;
 		dput(fi->dentry);
 		fi->dentry = NULL;
 	}
 	/* proceed with a normal readdir */
 more:
 	/* do we have the correct frag content buffered? */
 	if (fi->frag != frag || fi->last_readdir == NULL) {
 		struct ceph_mds_request *req;
 		int op = ceph_snap(inode) == CEPH_SNAPDIR ?
 			CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
 		/* discard old result, if any */
 		if (fi->last_readdir) {
 			ceph_mdsc_put_request(fi->last_readdir);
 			fi->last_readdir = NULL;
 		}
 		/* requery frag tree, as the frag topology may have changed */
 		frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL);
 		dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
 		     ceph_vinop(inode), frag, fi->last_name);
 		req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
 		if (IS_ERR(req))
 			return PTR_ERR(req);
 		req->r_inode = igrab(inode);
 		req->r_dentry = dget(filp->f_dentry);
 		/* hints to request -> mds selection code */
 		req->r_direct_mode = USE_AUTH_MDS;
 		req->r_direct_hash = ceph_frag_value(frag);
 		req->r_direct_is_hash = true;
 		req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
 		req->r_readdir_offset = fi->next_offset;
 		req->r_args.readdir.frag = cpu_to_le32(frag);
 		req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
 		req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes);
 		req->r_num_caps = max_entries + 1;
 		err = ceph_mdsc_do_request(mdsc, NULL, req);
 		if (err < 0) {
 			ceph_mdsc_put_request(req);
 			return err;
 		}
 		dout("readdir got and parsed readdir result=%d"
 		     " on frag %x, end=%d, complete=%d\n", err, frag,
 		     (int)req->r_reply_info.dir_end,
 		     (int)req->r_reply_info.dir_complete);
 		if (!req->r_did_prepopulate) {
 			dout("readdir !did_prepopulate");
 			fi->dir_release_count--;    /* preclude I_COMPLETE */
 		}
 		/* note next offset and last dentry name */
 		fi->offset = fi->next_offset;
 		fi->last_readdir = req;
 		if (req->r_reply_info.dir_end) {
 			kfree(fi->last_name);
 			fi->last_name = NULL;
 			if (ceph_frag_is_rightmost(frag))
 				fi->next_offset = 2;
 			else
 				fi->next_offset = 0;
 		} else {
 			rinfo = &req->r_reply_info;
 			err = note_last_dentry(fi,
 				       rinfo->dir_dname[rinfo->dir_nr-1],
 				       rinfo->dir_dname_len[rinfo->dir_nr-1]);
 			if (err)
 				return err;
 			fi->next_offset += rinfo->dir_nr;
 		}
 	}
 	rinfo = &fi->last_readdir->r_reply_info;
 	dout("readdir frag %x num %d off %d chunkoff %d\n", frag,
 	     rinfo->dir_nr, off, fi->offset);
 	while (off - fi->offset >= 0 && off - fi->offset < rinfo->dir_nr) {
 		u64 pos = ceph_make_fpos(frag, off);
 		struct ceph_mds_reply_inode *in =
 			rinfo->dir_in[off - fi->offset].in;
 		struct ceph_vino vino;
 		ino_t ino;
 		dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n",
 		     off, off - fi->offset, rinfo->dir_nr, pos,
 		     rinfo->dir_dname_len[off - fi->offset],
 		     rinfo->dir_dname[off - fi->offset], in);
 		BUG_ON(!in);
 		ftype = le32_to_cpu(in->mode) >> 12;
 		vino.ino = le64_to_cpu(in->ino);
 		vino.snap = le64_to_cpu(in->snapid);
 		ino = ceph_vino_to_ino(vino);
 		if (filldir(dirent,
 			    rinfo->dir_dname[off - fi->offset],
 			    rinfo->dir_dname_len[off - fi->offset],
 			    pos, ino, ftype) < 0) {
 			dout("filldir stopping us...\n");
 			return 0;
 		}
 		off++;
 		filp->f_pos = pos + 1;
 	}
 	if (fi->last_name) {
 		ceph_mdsc_put_request(fi->last_readdir);
 		fi->last_readdir = NULL;
 		goto more;
 	}
 	/* more frags? */
 	if (!ceph_frag_is_rightmost(frag)) {
 		frag = ceph_frag_next(frag);
 		off = 0;
 		filp->f_pos = ceph_make_fpos(frag, off);
 		dout("readdir next frag is %x\n", frag);
 		goto more;
 	}
 	fi->at_end = 1;
 	/*
 	 * if dir_release_count still matches the dir, no dentries
 	 * were released during the whole readdir, and we should have
 	 * the complete dir contents in our cache.
 	 */
 	spin_lock(&inode->i_lock);
 	if (ci->i_release_count == fi->dir_release_count) {
 		dout(" marking %p complete\n", inode);
 		ci->i_ceph_flags |= CEPH_I_COMPLETE;
 		ci->i_max_offset = filp->f_pos;
 	}
 	spin_unlock(&inode->i_lock);
 	dout("readdir %p filp %p done.\n", inode, filp);
 	return 0;
 }
 static void reset_readdir(struct ceph_file_info *fi)
 {
 	if (fi->last_readdir) {
 		ceph_mdsc_put_request(fi->last_readdir);
 		fi->last_readdir = NULL;
 	}
 	kfree(fi->last_name);
 	fi->last_name = NULL;
 	fi->next_offset = 2;  /* compensate for . and .. */
 	if (fi->dentry) {
 		dput(fi->dentry);
 		fi->dentry = NULL;
 	}
 	fi->at_end = 0;
 }
 static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
 {
 	struct ceph_file_info *fi = file->private_data;
 	struct inode *inode = file->f_mapping->host;
 	loff_t old_offset = offset;
 	loff_t retval;
 	mutex_lock(&inode->i_mutex);
 	switch (origin) {
 	case SEEK_END:
 		offset += inode->i_size + 2;   /* FIXME */
 		break;
 	case SEEK_CUR:
 		offset += file->f_pos;
 	}
 	retval = -EINVAL;
 	if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
 		if (offset != file->f_pos) {
 			file->f_pos = offset;
 			file->f_version = 0;
 			fi->at_end = 0;
 		}
 		retval = offset;
 		/*
 		 * discard buffered readdir content on seekdir(0), or
 		 * seek to new frag, or seek prior to current chunk.
 		 */
 		if (offset == 0 ||
 		    fpos_frag(offset) != fpos_frag(old_offset) ||
 		    fpos_off(offset) < fi->offset) {
 			dout("dir_llseek dropping %p content\n", file);
 			reset_readdir(fi);
 		}
 		/* bump dir_release_count if we did a forward seek */
 		if (offset > old_offset)
 			fi->dir_release_count--;
 	}
 	mutex_unlock(&inode->i_mutex);
 	return retval;
 }
 /*
  * Process result of a lookup/open request.
  *
  * Mainly, make sure we return the final req->r_dentry (if it already
  * existed) in place of the original VFS-provided dentry when they
  * differ.
  *
  * Gracefully handle the case where the MDS replies with -ENOENT and
  * no trace (which it may do, at its discretion, e.g., if it doesn't
  * care to issue a lease on the negative dentry).
  */
 struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
 				  struct dentry *dentry, int err)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
 	struct inode *parent = dentry->d_parent->d_inode;
 	/* .snap dir? */
 	if (err == -ENOENT &&
 	    strcmp(dentry->d_name.name,
 		   fsc->mount_options->snapdir_name) == 0) {
 		struct inode *inode = ceph_get_snapdir(parent);
 		dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
 		     dentry, dentry->d_name.len, dentry->d_name.name, inode);
 		BUG_ON(!d_unhashed(dentry));
 		d_add(dentry, inode);
 		err = 0;
 	}
 	if (err == -ENOENT) {
 		/* no trace? */
 		err = 0;
 		if (!req->r_reply_info.head->is_dentry) {
 			dout("ENOENT and no trace, dentry %p inode %p\n",
 			     dentry, dentry->d_inode);
 			if (dentry->d_inode) {
 				d_drop(dentry);
 				err = -ENOENT;
 			} else {
 				d_add(dentry, NULL);
 			}
 		}
 	}
 	if (err)
 		dentry = ERR_PTR(err);
 	else if (dentry != req->r_dentry)
 		dentry = dget(req->r_dentry);   /* we got spliced */
 	else
 		dentry = NULL;
 	return dentry;
 }
 static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
 {
 	return ceph_ino(inode) == CEPH_INO_ROOT &&
 		strncmp(dentry->d_name.name, ".ceph", 5) == 0;
 }
 /*
  * Look up a single dir entry.  If there is a lookup intent, inform
  * the MDS so that it gets our 'caps wanted' value in a single op.
  */
 static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 				  struct nameidata *nd)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int op;
 	int err;
 	dout("lookup %p dentry %p '%.*s'\n",
 	     dir, dentry, dentry->d_name.len, dentry->d_name.name);
 	if (dentry->d_name.len > NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
 	err = ceph_init_dentry(dentry);
 	if (err < 0)
 		return ERR_PTR(err);
 	/* open (but not create!) intent? */
 	if (nd &&
 	    (nd->flags & LOOKUP_OPEN) &&
 	    (nd->flags & LOOKUP_CONTINUE) == 0 && /* only open last component */
 	    !(nd->intent.open.flags & O_CREAT)) {
 		int mode = nd->intent.open.create_mode & ~current->fs->umask;
 		return ceph_lookup_open(dir, dentry, nd, mode, 1);
 	}
 	/* can we conclude ENOENT locally? */
 	if (dentry->d_inode == NULL) {
 		struct ceph_inode_info *ci = ceph_inode(dir);
 		struct ceph_dentry_info *di = ceph_dentry(dentry);
 		spin_lock(&dir->i_lock);
 		dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
 		if (strncmp(dentry->d_name.name,
 			    fsc->mount_options->snapdir_name,
 			    dentry->d_name.len) &&
 		    !is_root_ceph_dentry(dir, dentry) &&
 		    (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
 		    (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
 			spin_unlock(&dir->i_lock);
 			dout(" dir %p complete, -ENOENT\n", dir);
 			d_add(dentry, NULL);
 			di->lease_shared_gen = ci->i_shared_gen;
 			return NULL;
 		}
 		spin_unlock(&dir->i_lock);
 	}
 	op = ceph_snap(dir) == CEPH_SNAPDIR ?
 		CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
 	req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
 	if (IS_ERR(req))
 		return ERR_CAST(req);
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
 	/* we only need inode linkage */
 	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
 	req->r_locked_dir = dir;
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
 	dentry = ceph_finish_lookup(req, dentry, err);
 	ceph_mdsc_put_request(req);  /* will dput(dentry) */
 	dout("lookup result=%p\n", dentry);
 	return dentry;
 }
 /*
  * If we do a create but get no trace back from the MDS, follow up with
  * a lookup (the VFS expects us to link up the provided dentry).
  */
 int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
 {
 	struct dentry *result = ceph_lookup(dir, dentry, NULL);
 	if (result && !IS_ERR(result)) {
 		/*
 		 * We created the item, then did a lookup, and found
 		 * it was already linked to another inode we already
 		 * had in our cache (and thus got spliced).  Link our
 		 * dentry to that inode, but don't hash it, just in
 		 * case the VFS wants to dereference it.
 		 */
 		BUG_ON(!result->d_inode);
 		d_instantiate(dentry, result->d_inode);
 		return 0;
 	}
 	return PTR_ERR(result);
 }
 static int ceph_mknod(struct inode *dir, struct dentry *dentry,
 		      int mode, dev_t rdev)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 	if (ceph_snap(dir) != CEPH_NOSNAP)
 		return -EROFS;
 	dout("mknod in dir %p dentry %p mode 0%o rdev %d\n",
 	     dir, dentry, mode, rdev);
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
 	if (IS_ERR(req)) {
 		d_drop(dentry);
 		return PTR_ERR(req);
 	}
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
 	req->r_locked_dir = dir;
 	req->r_args.mknod.mode = cpu_to_le32(mode);
 	req->r_args.mknod.rdev = cpu_to_le32(rdev);
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	err = ceph_mdsc_do_request(mdsc, dir, req);
 	if (!err && !req->r_reply_info.head->is_dentry)
 		err = ceph_handle_notrace_create(dir, dentry);
 	ceph_mdsc_put_request(req);
 	if (err)
 		d_drop(dentry);
 	return err;
 }
 static int ceph_create(struct inode *dir, struct dentry *dentry, int mode,
 		       struct nameidata *nd)
 {
 	dout("create in dir %p dentry %p name '%.*s'\n",
 	     dir, dentry, dentry->d_name.len, dentry->d_name.name);
 	if (ceph_snap(dir) != CEPH_NOSNAP)
 		return -EROFS;
 	if (nd) {
 		BUG_ON((nd->flags & LOOKUP_OPEN) == 0);
 		dentry = ceph_lookup_open(dir, dentry, nd, mode, 0);
 		/* hrm, what should i do here if we get aliased? */
 		if (IS_ERR(dentry))
 			return PTR_ERR(dentry);
 		return 0;
 	}
 	/* fall back to mknod */
 	return ceph_mknod(dir, dentry, (mode & ~S_IFMT) | S_IFREG, 0);
 }
 static int ceph_symlink(struct inode *dir, struct dentry *dentry,
 			    const char *dest)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 	if (ceph_snap(dir) != CEPH_NOSNAP)
 		return -EROFS;
 	dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
 	if (IS_ERR(req)) {
 		d_drop(dentry);
 		return PTR_ERR(req);
 	}
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
 	req->r_path2 = kstrdup(dest, GFP_NOFS);
 	req->r_locked_dir = dir;
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	err = ceph_mdsc_do_request(mdsc, dir, req);
 	if (!err && !req->r_reply_info.head->is_dentry)
 		err = ceph_handle_notrace_create(dir, dentry);
 	ceph_mdsc_put_request(req);
 	if (err)
 		d_drop(dentry);
 	return err;
 }
 static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int err = -EROFS;
 	int op;
 	if (ceph_snap(dir) == CEPH_SNAPDIR) {
 		/* mkdir .snap/foo is a MKSNAP */
 		op = CEPH_MDS_OP_MKSNAP;
 		dout("mksnap dir %p snap '%.*s' dn %p\n", dir,
 		     dentry->d_name.len, dentry->d_name.name, dentry);
 	} else if (ceph_snap(dir) == CEPH_NOSNAP) {
 		dout("mkdir dir %p dn %p mode 0%o\n", dir, dentry, mode);
 		op = CEPH_MDS_OP_MKDIR;
 	} else {
 		goto out;
 	}
 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto out;
 	}
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
 	req->r_locked_dir = dir;
 	req->r_args.mkdir.mode = cpu_to_le32(mode);
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	err = ceph_mdsc_do_request(mdsc, dir, req);
 	if (!err && !req->r_reply_info.head->is_dentry)
 		err = ceph_handle_notrace_create(dir, dentry);
 	ceph_mdsc_put_request(req);
 out:
 	if (err < 0)
 		d_drop(dentry);
 	return err;
 }
 static int ceph_link(struct dentry *old_dentry, struct inode *dir,
 		     struct dentry *dentry)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 	if (ceph_snap(dir) != CEPH_NOSNAP)
 		return -EROFS;
 	dout("link in dir %p old_dentry %p dentry %p\n", dir,
 	     old_dentry, dentry);
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
 	if (IS_ERR(req)) {
 		d_drop(dentry);
 		return PTR_ERR(req);
 	}
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
 	req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */
 	req->r_locked_dir = dir;
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	err = ceph_mdsc_do_request(mdsc, dir, req);
 	if (err)
 		d_drop(dentry);
 	else if (!req->r_reply_info.head->is_dentry)
 		d_instantiate(dentry, igrab(old_dentry->d_inode));
 	ceph_mdsc_put_request(req);
 	return err;
 }
 /*
  * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps.  If it
  * looks like the link count will hit 0, drop any other caps (other
  * than PIN) we don't specifically want (due to the file still being
  * open).
  */
 static int drop_caps_for_unlink(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
 	spin_lock(&inode->i_lock);
 	if (inode->i_nlink == 1) {
 		drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
 		ci->i_ceph_flags |= CEPH_I_NODELAY;
 	}
 	spin_unlock(&inode->i_lock);
 	return drop;
 }
 /*
  * rmdir and unlink are differ only by the metadata op code
  */
 static int ceph_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct inode *inode = dentry->d_inode;
 	struct ceph_mds_request *req;
 	int err = -EROFS;
 	int op;
 	if (ceph_snap(dir) == CEPH_SNAPDIR) {
 		/* rmdir .snap/foo is RMSNAP */
 		dout("rmsnap dir %p '%.*s' dn %p\n", dir, dentry->d_name.len,
 		     dentry->d_name.name, dentry);
 		op = CEPH_MDS_OP_RMSNAP;
 	} else if (ceph_snap(dir) == CEPH_NOSNAP) {
 		dout("unlink/rmdir dir %p dn %p inode %p\n",
 		     dir, dentry, inode);
 		op = ((dentry->d_inode->i_mode & S_IFMT) == S_IFDIR) ?
 			CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
 	} else
 		goto out;
 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto out;
 	}
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
 	req->r_locked_dir = dir;
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	req->r_inode_drop = drop_caps_for_unlink(inode);
 	err = ceph_mdsc_do_request(mdsc, dir, req);
 	if (!err && !req->r_reply_info.head->is_dentry)
 		d_delete(dentry);
 	ceph_mdsc_put_request(req);
 out:
 	return err;
 }
 static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 		       struct inode *new_dir, struct dentry *new_dentry)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 	if (ceph_snap(old_dir) != ceph_snap(new_dir))
 		return -EXDEV;
 	if (ceph_snap(old_dir) != CEPH_NOSNAP ||
 	    ceph_snap(new_dir) != CEPH_NOSNAP)
 		return -EROFS;
 	dout("rename dir %p dentry %p to dir %p dentry %p\n",
 	     old_dir, old_dentry, new_dir, new_dentry);
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 	req->r_dentry = dget(new_dentry);
 	req->r_num_caps = 2;
 	req->r_old_dentry = dget(old_dentry);
 	req->r_locked_dir = new_dir;
 	req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	/* release LINK_RDCACHE on source inode (mds will lock it) */
 	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
 	if (new_dentry->d_inode)
 		req->r_inode_drop = drop_caps_for_unlink(new_dentry->d_inode);
 	err = ceph_mdsc_do_request(mdsc, old_dir, req);
 	if (!err && !req->r_reply_info.head->is_dentry) {
 		/*
 		 * Normally d_move() is done by fill_trace (called by
 		 * do_request, above).  If there is no trace, we need
 		 * to do it here.
 		 */
 		/* d_move screws up d_subdirs order */
 		ceph_i_clear(new_dir, CEPH_I_COMPLETE);
 		d_move(old_dentry, new_dentry);
 		/* ensure target dentry is invalidated, despite
 		   rehashing bug in vfs_rename_dir */
 		ceph_invalidate_dentry_lease(new_dentry);
 	}
 	ceph_mdsc_put_request(req);
 	return err;
 }
 /*
  * Ensure a dentry lease will no longer revalidate.
  */
 void ceph_invalidate_dentry_lease(struct dentry *dentry)
 {
 	spin_lock(&dentry->d_lock);
 	dentry->d_time = jiffies;
 	ceph_dentry(dentry)->lease_shared_gen = 0;
 	spin_unlock(&dentry->d_lock);
 }
 /*
  * Check if dentry lease is valid.  If not, delete the lease.  Try to
  * renew if the least is more than half up.
  */
 static int dentry_lease_is_valid(struct dentry *dentry)
 {
 	struct ceph_dentry_info *di;
 	struct ceph_mds_session *s;
 	int valid = 0;
 	u32 gen;
 	unsigned long ttl;
 	struct ceph_mds_session *session = NULL;
 	struct inode *dir = NULL;
 	u32 seq = 0;
 	spin_lock(&dentry->d_lock);
 	di = ceph_dentry(dentry);
 	if (di && di->lease_session) {
 		s = di->lease_session;
 		spin_lock(&s->s_cap_lock);
 		gen = s->s_cap_gen;
 		ttl = s->s_cap_ttl;
 		spin_unlock(&s->s_cap_lock);
 		if (di->lease_gen == gen &&
 		    time_before(jiffies, dentry->d_time) &&
 		    time_before(jiffies, ttl)) {
 			valid = 1;
 			if (di->lease_renew_after &&
 			    time_after(jiffies, di->lease_renew_after)) {
 				/* we should renew */
 				dir = dentry->d_parent->d_inode;
 				session = ceph_get_mds_session(s);
 				seq = di->lease_seq;
 				di->lease_renew_after = 0;
 				di->lease_renew_from = jiffies;
 			}
 		}
 	}
 	spin_unlock(&dentry->d_lock);
 	if (session) {
 		ceph_mdsc_lease_send_msg(session, dir, dentry,
 					 CEPH_MDS_LEASE_RENEW, seq);
 		ceph_put_mds_session(session);
 	}
 	dout("dentry_lease_is_valid - dentry %p = %d\n", dentry, valid);
 	return valid;
 }
 /*
  * Check if directory-wide content lease/cap is valid.
  */
 static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
 {
 	struct ceph_inode_info *ci = ceph_inode(dir);
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
 	int valid = 0;
 	spin_lock(&dir->i_lock);
 	if (ci->i_shared_gen == di->lease_shared_gen)
 		valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
 	spin_unlock(&dir->i_lock);
 	dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
 	     dir, (unsigned)ci->i_shared_gen, dentry,
 	     (unsigned)di->lease_shared_gen, valid);
 	return valid;
 }
 /*
  * Check if cached dentry can be trusted.
  */
 static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *dir = dentry->d_parent->d_inode;
 	dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
 	     dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
 	     ceph_dentry(dentry)->offset);
 	/* always trust cached snapped dentries, snapdir dentry */
 	if (ceph_snap(dir) != CEPH_NOSNAP) {
 		dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry,
 		     dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
 		goto out_touch;
 	}
 	if (dentry->d_inode && ceph_snap(dentry->d_inode) == CEPH_SNAPDIR)
 		goto out_touch;
 	if (dentry_lease_is_valid(dentry) ||
 	    dir_lease_is_valid(dir, dentry))
 		goto out_touch;
 	dout("d_revalidate %p invalid\n", dentry);
 	d_drop(dentry);
 	return 0;
 out_touch:
 	ceph_dentry_lru_touch(dentry);
 	return 1;
 }
 /*
  * When a dentry is released, clear the dir I_COMPLETE if it was part
  * of the current dir gen or if this is in the snapshot namespace.
  */
 static void ceph_dentry_release(struct dentry *dentry)
 {
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
 	struct inode *parent_inode = NULL;
 	u64 snapid = CEPH_NOSNAP;
 	if (!IS_ROOT(dentry)) {
 		parent_inode = dentry->d_parent->d_inode;
 		if (parent_inode)
 			snapid = ceph_snap(parent_inode);
 	}
 	dout("dentry_release %p parent %p\n", dentry, parent_inode);
 	if (parent_inode && snapid != CEPH_SNAPDIR) {
 		struct ceph_inode_info *ci = ceph_inode(parent_inode);
 		spin_lock(&parent_inode->i_lock);
 		if (ci->i_shared_gen == di->lease_shared_gen ||
 		    snapid <= CEPH_MAXSNAP) {
 			dout(" clearing %p complete (d_release)\n",
 			     parent_inode);
 			ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
 			ci->i_release_count++;
 		}
 		spin_unlock(&parent_inode->i_lock);
 	}
 	if (di) {
 		ceph_dentry_lru_del(dentry);
 		if (di->lease_session)
 			ceph_put_mds_session(di->lease_session);
 		kmem_cache_free(ceph_dentry_cachep, di);
 		dentry->d_fsdata = NULL;
 	}
 }
 static int ceph_snapdir_d_revalidate(struct dentry *dentry,
 					  struct nameidata *nd)
 {
 	/*
 	 * Eventually, we'll want to revalidate snapped metadata
 	 * too... probably...
 	 */
 	return 1;
 }
 /*
  * read() on a dir.  This weird interface hack only works if mounted
  * with '-o dirstat'.
  */
 static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 			     loff_t *ppos)
 {
 	struct ceph_file_info *cf = file->private_data;
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int left;
 	if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
 		return -EISDIR;
 	if (!cf->dir_info) {
 		cf->dir_info = kmalloc(1024, GFP_NOFS);
 		if (!cf->dir_info)
 			return -ENOMEM;
 		cf->dir_info_len =
 			sprintf(cf->dir_info,
 				"entries:   %20lld\n"
 				" files:    %20lld\n"
 				" subdirs:  %20lld\n"
 				"rentries:  %20lld\n"
 				" rfiles:   %20lld\n"
 				" rsubdirs: %20lld\n"
 				"rbytes:    %20lld\n"
 				"rctime:    %10ld.%09ld\n",
 				ci->i_files + ci->i_subdirs,
 				ci->i_files,
 				ci->i_subdirs,
 				ci->i_rfiles + ci->i_rsubdirs,
 				ci->i_rfiles,
 				ci->i_rsubdirs,
 				ci->i_rbytes,
 				(long)ci->i_rctime.tv_sec,
 				(long)ci->i_rctime.tv_nsec);
 	}
 	if (*ppos >= cf->dir_info_len)
 		return 0;
 	size = min_t(unsigned, size, cf->dir_info_len-*ppos);
 	left = copy_to_user(buf, cf->dir_info + *ppos, size);
 	if (left == size)
 		return -EFAULT;
 	*ppos += (size - left);
 	return size - left;
 }
 /*
  * an fsync() on a dir will wait for any uncommitted directory
  * operations to commit.
  */
 static int ceph_dir_fsync(struct file *file, int datasync)
 {
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct list_head *head = &ci->i_unsafe_dirops;
 	struct ceph_mds_request *req;
 	u64 last_tid;
 	int ret = 0;
 	dout("dir_fsync %p\n", inode);
 	spin_lock(&ci->i_unsafe_lock);
 	if (list_empty(head))
 		goto out;
 	req = list_entry(head->prev,
 			 struct ceph_mds_request, r_unsafe_dir_item);
 	last_tid = req->r_tid;
 	do {
 		ceph_mdsc_get_request(req);
 		spin_unlock(&ci->i_unsafe_lock);
 		dout("dir_fsync %p wait on tid %llu (until %llu)\n",
 		     inode, req->r_tid, last_tid);
 		if (req->r_timeout) {
 			ret = wait_for_completion_timeout(
 				&req->r_safe_completion, req->r_timeout);
 			if (ret > 0)
 				ret = 0;
 			else if (ret == 0)
 				ret = -EIO;  /* timed out */
 		} else {
 			wait_for_completion(&req->r_safe_completion);
 		}
 		spin_lock(&ci->i_unsafe_lock);
 		ceph_mdsc_put_request(req);
 		if (ret || list_empty(head))
 			break;
 		req = list_entry(head->next,
 				 struct ceph_mds_request, r_unsafe_dir_item);
 	} while (req->r_tid < last_tid);
 out:
 	spin_unlock(&ci->i_unsafe_lock);
 	return ret;
 }
 /*
  * We maintain a private dentry LRU.
  *
  * FIXME: this needs to be changed to a per-mds lru to be useful.
  */
 void ceph_dentry_lru_add(struct dentry *dn)
 {
 	struct ceph_dentry_info *di = ceph_dentry(dn);
 	struct ceph_mds_client *mdsc;
 	dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
 	     dn->d_name.len, dn->d_name.name);
 	if (di) {
 		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
 		spin_lock(&mdsc->dentry_lru_lock);
 		list_add_tail(&di->lru, &mdsc->dentry_lru);
 		mdsc->num_dentry++;
 		spin_unlock(&mdsc->dentry_lru_lock);
 	}
 }
 void ceph_dentry_lru_touch(struct dentry *dn)
 {
 	struct ceph_dentry_info *di = ceph_dentry(dn);
 	struct ceph_mds_client *mdsc;
 	dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
 	     dn->d_name.len, dn->d_name.name, di->offset);
 	if (di) {
 		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
 		spin_lock(&mdsc->dentry_lru_lock);
 		list_move_tail(&di->lru, &mdsc->dentry_lru);
 		spin_unlock(&mdsc->dentry_lru_lock);
 	}
 }
 void ceph_dentry_lru_del(struct dentry *dn)
 {
 	struct ceph_dentry_info *di = ceph_dentry(dn);
 	struct ceph_mds_client *mdsc;
 	dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
 	     dn->d_name.len, dn->d_name.name);
 	if (di) {
 		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
 		spin_lock(&mdsc->dentry_lru_lock);
 		list_del_init(&di->lru);
 		mdsc->num_dentry--;
 		spin_unlock(&mdsc->dentry_lru_lock);
 	}
 }
 const struct file_operations ceph_dir_fops = {
 	.read = ceph_read_dir,
 	.readdir = ceph_readdir,
 	.llseek = ceph_dir_llseek,
 	.open = ceph_open,
 	.release = ceph_release,
 	.unlocked_ioctl = ceph_ioctl,
 	.fsync = ceph_dir_fsync,
 };
 const struct inode_operations ceph_dir_iops = {
 	.lookup = ceph_lookup,
 	.permission = ceph_permission,
 	.getattr = ceph_getattr,
 	.setattr = ceph_setattr,
 	.setxattr = ceph_setxattr,
 	.getxattr = ceph_getxattr,
 	.listxattr = ceph_listxattr,
 	.removexattr = ceph_removexattr,
 	.mknod = ceph_mknod,
 	.symlink = ceph_symlink,
 	.mkdir = ceph_mkdir,
 	.link = ceph_link,
 	.unlink = ceph_unlink,
 	.rmdir = ceph_unlink,
 	.rename = ceph_rename,
 	.create = ceph_create,
 };
 const struct dentry_operations ceph_dentry_ops = {
 	.d_revalidate = ceph_d_revalidate,
 	.d_release = ceph_dentry_release,
 };
 const struct dentry_operations ceph_snapdir_dentry_ops = {
 	.d_revalidate = ceph_snapdir_d_revalidate,
 	.d_release = ceph_dentry_release,
 };
 const struct dentry_operations ceph_snap_dentry_ops = {
 	.d_release = ceph_dentry_release,
 };

fs/configfs/configfs_internal.h

Diff comments View file @ da50295

 /* -*- mode: c; c-basic-offset:8; -*-
  * vim: noexpandtab sw=8 ts=8 sts=0:
  *
  * configfs_internal.h - Internal stuff for configfs
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this program; if not, write to the
  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  * Boston, MA 021110-1307, USA.
  *
  * Based on sysfs:
  * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
  *
  * configfs Copyright (C) 2005 Oracle.  All rights reserved.
  */
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 struct configfs_dirent {
 	atomic_t		s_count;
 	int			s_dependent_count;
 	struct list_head	s_sibling;
 	struct list_head	s_children;
 	struct list_head	s_links;
 	void			* s_element;
 	int			s_type;
 	umode_t			s_mode;
 	struct dentry		* s_dentry;
 	struct iattr		* s_iattr;
 #ifdef CONFIG_LOCKDEP
 	int			s_depth;
 #endif
 };
 #define CONFIGFS_ROOT		0x0001
 #define CONFIGFS_DIR		0x0002
 #define CONFIGFS_ITEM_ATTR	0x0004
 #define CONFIGFS_ITEM_LINK	0x0020
 #define CONFIGFS_USET_DIR	0x0040
 #define CONFIGFS_USET_DEFAULT	0x0080
 #define CONFIGFS_USET_DROPPING	0x0100
 #define CONFIGFS_USET_IN_MKDIR	0x0200
 #define CONFIGFS_USET_CREATING	0x0400
 #define CONFIGFS_NOT_PINNED	(CONFIGFS_ITEM_ATTR)
 extern struct mutex configfs_symlink_mutex;
 extern spinlock_t configfs_dirent_lock;
 extern struct vfsmount * configfs_mount;
 extern struct kmem_cache *configfs_dir_cachep;
 extern int configfs_is_root(struct config_item *item);
 extern struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent *);
 extern int configfs_create(struct dentry *, int mode, int (*init)(struct inode *));
 extern int configfs_inode_init(void);
 extern void configfs_inode_exit(void);
 extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
 extern int configfs_make_dirent(struct configfs_dirent *,
 				struct dentry *, void *, umode_t, int);
 extern int configfs_dirent_is_ready(struct configfs_dirent *);
 extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int);
 extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
 extern const unsigned char * configfs_get_name(struct configfs_dirent *sd);
 extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent);
 extern int configfs_setattr(struct dentry *dentry, struct iattr *iattr);
 extern int configfs_pin_fs(void);
 extern void configfs_release_fs(void);
 extern struct rw_semaphore configfs_rename_sem;
 extern struct super_block * configfs_sb;
 extern const struct file_operations configfs_dir_operations;
 extern const struct file_operations configfs_file_operations;
 extern const struct file_operations bin_fops;
 extern const struct inode_operations configfs_dir_inode_operations;
 extern const struct inode_operations configfs_symlink_inode_operations;
 extern int configfs_symlink(struct inode *dir, struct dentry *dentry,
 			    const char *symname);
 extern int configfs_unlink(struct inode *dir, struct dentry *dentry);
 struct configfs_symlink {
 	struct list_head sl_list;
 	struct config_item *sl_target;
 };
 extern int configfs_create_link(struct configfs_symlink *sl,
 				struct dentry *parent,
 				struct dentry *dentry);
 static inline struct config_item * to_item(struct dentry * dentry)
 {
 	struct configfs_dirent * sd = dentry->d_fsdata;
 	return ((struct config_item *) sd->s_element);
 }
 static inline struct configfs_attribute * to_attr(struct dentry * dentry)
 {
 	struct configfs_dirent * sd = dentry->d_fsdata;
 	return ((struct configfs_attribute *) sd->s_element);
 }
 static inline struct config_item *configfs_get_config_item(struct dentry *dentry)
 {
 	struct config_item * item = NULL;
 	spin_lock(&dcache_lock);
+	spin_lock(&dentry->d_lock);
 	if (!d_unhashed(dentry)) {
 		struct configfs_dirent * sd = dentry->d_fsdata;
 		if (sd->s_type & CONFIGFS_ITEM_LINK) {
 			struct configfs_symlink * sl = sd->s_element;
 			item = config_item_get(sl->sl_target);
 		} else
 			item = config_item_get(sd->s_element);
 	}
+	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 	return item;
 }
 static inline void release_configfs_dirent(struct configfs_dirent * sd)
 {
 	if (!(sd->s_type & CONFIGFS_ROOT)) {
 		kfree(sd->s_iattr);
 		kmem_cache_free(configfs_dir_cachep, sd);
 	}
 }
 static inline struct configfs_dirent * configfs_get(struct configfs_dirent * sd)
 {
 	if (sd) {
 		WARN_ON(!atomic_read(&sd->s_count));
 		atomic_inc(&sd->s_count);
 	}
 	return sd;
 }
 static inline void configfs_put(struct configfs_dirent * sd)
 {
 	WARN_ON(!atomic_read(&sd->s_count));
 	if (atomic_dec_and_test(&sd->s_count))
 		release_configfs_dirent(sd);
 }

fs/dcache.c

Diff comments View file @ da50295

1	/*	1	/*
2	* fs/dcache.c	2	* fs/dcache.c
3	*	3	*
4	* Complete reimplementation	4	* Complete reimplementation
5	* (C) 1997 Thomas Schoebel-Theuer,	5	* (C) 1997 Thomas Schoebel-Theuer,
6	* with heavy changes by Linus Torvalds	6	* with heavy changes by Linus Torvalds
7	*/	7	*/
8		8
9	/*	9	/*
10	* Notes on the allocation strategy:	10	* Notes on the allocation strategy:
11	*	11	*
12	* The dcache is a master of the icache - whenever a dcache entry	12	* The dcache is a master of the icache - whenever a dcache entry
13	* exists, the inode will always exist. "iput()" is done either when	13	* exists, the inode will always exist. "iput()" is done either when
14	* the dcache entry is deleted or garbage collected.	14	* the dcache entry is deleted or garbage collected.
15	*/	15	*/
16		16
17	#include <linux/syscalls.h>	17	#include <linux/syscalls.h>
18	#include <linux/string.h>	18	#include <linux/string.h>
19	#include <linux/mm.h>	19	#include <linux/mm.h>
20	#include <linux/fs.h>	20	#include <linux/fs.h>
21	#include <linux/fsnotify.h>	21	#include <linux/fsnotify.h>
22	#include <linux/slab.h>	22	#include <linux/slab.h>
23	#include <linux/init.h>	23	#include <linux/init.h>
24	#include <linux/hash.h>	24	#include <linux/hash.h>
25	#include <linux/cache.h>	25	#include <linux/cache.h>
26	#include <linux/module.h>	26	#include <linux/module.h>
27	#include <linux/mount.h>	27	#include <linux/mount.h>
28	#include <linux/file.h>	28	#include <linux/file.h>
29	#include <asm/uaccess.h>	29	#include <asm/uaccess.h>
30	#include <linux/security.h>	30	#include <linux/security.h>
31	#include <linux/seqlock.h>	31	#include <linux/seqlock.h>
32	#include <linux/swap.h>	32	#include <linux/swap.h>
33	#include <linux/bootmem.h>	33	#include <linux/bootmem.h>
34	#include <linux/fs_struct.h>	34	#include <linux/fs_struct.h>
35	#include <linux/hardirq.h>	35	#include <linux/hardirq.h>
36	#include "internal.h"	36	#include "internal.h"
37		37
38	/*	38	/*
39	* Usage:	39	* Usage:
40	* dcache_hash_lock protects:	40	* dcache_hash_lock protects:
41	* - the dcache hash table, s_anon lists	41	* - the dcache hash table, s_anon lists
42	* dcache_lru_lock protects:	42	* dcache_lru_lock protects:
43	* - the dcache lru lists and counters	43	* - the dcache lru lists and counters
44	* d_lock protects:	44	* d_lock protects:
45	* - d_flags	45	* - d_flags
46	* - d_name	46	* - d_name
47	* - d_lru	47	* - d_lru
48	* - d_count	48	* - d_count
		49	* - d_unhashed()
49	*	50	*
50	* Ordering:	51	* Ordering:
51	* dcache_lock	52	* dcache_lock
52	* dentry->d_lock	53	* dentry->d_lock
53	* dcache_lru_lock	54	* dcache_lru_lock
54	* dcache_hash_lock	55	* dcache_hash_lock
55	*	56	*
		57	* If there is an ancestor relationship:
		58	* dentry->d_parent->...->d_parent->d_lock
		59	* ...
		60	* dentry->d_parent->d_lock
		61	* dentry->d_lock
		62	*
		63	* If no ancestor relationship:
56	* if (dentry1 < dentry2)	64	* if (dentry1 < dentry2)
57	* dentry1->d_lock	65	* dentry1->d_lock
58	* dentry2->d_lock	66	* dentry2->d_lock
59	*/	67	*/
60	int sysctl_vfs_cache_pressure __read_mostly = 100;	68	int sysctl_vfs_cache_pressure __read_mostly = 100;
61	EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);	69	EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
62		70
63	static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock);	71	static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock);
64	static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);	72	static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
65	__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);	73	__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
66	__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);	74	__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
67		75
68	EXPORT_SYMBOL(dcache_lock);	76	EXPORT_SYMBOL(dcache_lock);
69		77
70	static struct kmem_cache *dentry_cache __read_mostly;	78	static struct kmem_cache *dentry_cache __read_mostly;
71		79
72	#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))	80	#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
73		81
74	/*	82	/*
75	* This is the single most critical data structure when it comes	83	* This is the single most critical data structure when it comes
76	* to the dcache: the hashtable for lookups. Somebody should try	84	* to the dcache: the hashtable for lookups. Somebody should try
77	* to make this good - I've just made it work.	85	* to make this good - I've just made it work.
78	*	86	*
79	* This hash-function tries to avoid losing too many bits of hash	87	* This hash-function tries to avoid losing too many bits of hash
80	* information, yet avoid using a prime hash-size or similar.	88	* information, yet avoid using a prime hash-size or similar.
81	*/	89	*/
82	#define D_HASHBITS d_hash_shift	90	#define D_HASHBITS d_hash_shift
83	#define D_HASHMASK d_hash_mask	91	#define D_HASHMASK d_hash_mask
84		92
85	static unsigned int d_hash_mask __read_mostly;	93	static unsigned int d_hash_mask __read_mostly;
86	static unsigned int d_hash_shift __read_mostly;	94	static unsigned int d_hash_shift __read_mostly;
87	static struct hlist_head *dentry_hashtable __read_mostly;	95	static struct hlist_head *dentry_hashtable __read_mostly;
88		96
89	/* Statistics gathering. */	97	/* Statistics gathering. */
90	struct dentry_stat_t dentry_stat = {	98	struct dentry_stat_t dentry_stat = {
91	.age_limit = 45,	99	.age_limit = 45,
92	};	100	};
93		101
94	static DEFINE_PER_CPU(unsigned int, nr_dentry);	102	static DEFINE_PER_CPU(unsigned int, nr_dentry);
95		103
96	#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)	104	#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
97	static int get_nr_dentry(void)	105	static int get_nr_dentry(void)
98	{	106	{
99	int i;	107	int i;
100	int sum = 0;	108	int sum = 0;
101	for_each_possible_cpu(i)	109	for_each_possible_cpu(i)
102	sum += per_cpu(nr_dentry, i);	110	sum += per_cpu(nr_dentry, i);
103	return sum < 0 ? 0 : sum;	111	return sum < 0 ? 0 : sum;
104	}	112	}
105		113
106	int proc_nr_dentry(ctl_table table, int write, void __user buffer,	114	int proc_nr_dentry(ctl_table table, int write, void __user buffer,
107	size_t lenp, loff_t ppos)	115	size_t lenp, loff_t ppos)
108	{	116	{
109	dentry_stat.nr_dentry = get_nr_dentry();	117	dentry_stat.nr_dentry = get_nr_dentry();
110	return proc_dointvec(table, write, buffer, lenp, ppos);	118	return proc_dointvec(table, write, buffer, lenp, ppos);
111	}	119	}
112	#endif	120	#endif
113		121
114	static void __d_free(struct rcu_head *head)	122	static void __d_free(struct rcu_head *head)
115	{	123	{
116	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);	124	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
117		125
118	WARN_ON(!list_empty(&dentry->d_alias));	126	WARN_ON(!list_empty(&dentry->d_alias));
119	if (dname_external(dentry))	127	if (dname_external(dentry))
120	kfree(dentry->d_name.name);	128	kfree(dentry->d_name.name);
121	kmem_cache_free(dentry_cache, dentry);	129	kmem_cache_free(dentry_cache, dentry);
122	}	130	}
123		131
124	/*	132	/*
125	* no dcache_lock, please.	133	* no dcache_lock, please.
126	*/	134	*/
127	static void d_free(struct dentry *dentry)	135	static void d_free(struct dentry *dentry)
128	{	136	{
129	BUG_ON(dentry->d_count);	137	BUG_ON(dentry->d_count);
130	this_cpu_dec(nr_dentry);	138	this_cpu_dec(nr_dentry);
131	if (dentry->d_op && dentry->d_op->d_release)	139	if (dentry->d_op && dentry->d_op->d_release)
132	dentry->d_op->d_release(dentry);	140	dentry->d_op->d_release(dentry);
133		141
134	/* if dentry was never inserted into hash, immediate free is OK */	142	/* if dentry was never inserted into hash, immediate free is OK */
135	if (hlist_unhashed(&dentry->d_hash))	143	if (hlist_unhashed(&dentry->d_hash))
136	__d_free(&dentry->d_u.d_rcu);	144	__d_free(&dentry->d_u.d_rcu);
137	else	145	else
138	call_rcu(&dentry->d_u.d_rcu, __d_free);	146	call_rcu(&dentry->d_u.d_rcu, __d_free);
139	}	147	}
140		148
141	/*	149	/*
142	* Release the dentry's inode, using the filesystem	150	* Release the dentry's inode, using the filesystem
143	* d_iput() operation if defined.	151	* d_iput() operation if defined.
144	*/	152	*/
145	static void dentry_iput(struct dentry * dentry)	153	static void dentry_iput(struct dentry * dentry)
146	__releases(dentry->d_lock)	154	__releases(dentry->d_lock)
147	__releases(dcache_lock)	155	__releases(dcache_lock)
148	{	156	{
149	struct inode *inode = dentry->d_inode;	157	struct inode *inode = dentry->d_inode;
150	if (inode) {	158	if (inode) {
151	dentry->d_inode = NULL;	159	dentry->d_inode = NULL;
152	list_del_init(&dentry->d_alias);	160	list_del_init(&dentry->d_alias);
153	spin_unlock(&dentry->d_lock);	161	spin_unlock(&dentry->d_lock);
154	spin_unlock(&dcache_lock);	162	spin_unlock(&dcache_lock);
155	if (!inode->i_nlink)	163	if (!inode->i_nlink)
156	fsnotify_inoderemove(inode);	164	fsnotify_inoderemove(inode);
157	if (dentry->d_op && dentry->d_op->d_iput)	165	if (dentry->d_op && dentry->d_op->d_iput)
158	dentry->d_op->d_iput(dentry, inode);	166	dentry->d_op->d_iput(dentry, inode);
159	else	167	else
160	iput(inode);	168	iput(inode);
161	} else {	169	} else {
162	spin_unlock(&dentry->d_lock);	170	spin_unlock(&dentry->d_lock);
163	spin_unlock(&dcache_lock);	171	spin_unlock(&dcache_lock);
164	}	172	}
165	}	173	}
166		174
167	/*	175	/*
168	* dentry_lru_(add\|del\|move_tail) must be called with d_lock held.	176	* dentry_lru_(add\|del\|move_tail) must be called with d_lock held.
169	*/	177	*/
170	static void dentry_lru_add(struct dentry *dentry)	178	static void dentry_lru_add(struct dentry *dentry)
171	{	179	{
172	if (list_empty(&dentry->d_lru)) {	180	if (list_empty(&dentry->d_lru)) {
173	spin_lock(&dcache_lru_lock);	181	spin_lock(&dcache_lru_lock);
174	list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);	182	list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
175	dentry->d_sb->s_nr_dentry_unused++;	183	dentry->d_sb->s_nr_dentry_unused++;
176	dentry_stat.nr_unused++;	184	dentry_stat.nr_unused++;
177	spin_unlock(&dcache_lru_lock);	185	spin_unlock(&dcache_lru_lock);
178	}	186	}
179	}	187	}
180		188
181	static void __dentry_lru_del(struct dentry *dentry)	189	static void __dentry_lru_del(struct dentry *dentry)
182	{	190	{
183	list_del_init(&dentry->d_lru);	191	list_del_init(&dentry->d_lru);
184	dentry->d_sb->s_nr_dentry_unused--;	192	dentry->d_sb->s_nr_dentry_unused--;
185	dentry_stat.nr_unused--;	193	dentry_stat.nr_unused--;
186	}	194	}
187		195
188	static void dentry_lru_del(struct dentry *dentry)	196	static void dentry_lru_del(struct dentry *dentry)
189	{	197	{
190	if (!list_empty(&dentry->d_lru)) {	198	if (!list_empty(&dentry->d_lru)) {
191	spin_lock(&dcache_lru_lock);	199	spin_lock(&dcache_lru_lock);
192	__dentry_lru_del(dentry);	200	__dentry_lru_del(dentry);
193	spin_unlock(&dcache_lru_lock);	201	spin_unlock(&dcache_lru_lock);
194	}	202	}
195	}	203	}
196		204
197	static void dentry_lru_move_tail(struct dentry *dentry)	205	static void dentry_lru_move_tail(struct dentry *dentry)
198	{	206	{
199	spin_lock(&dcache_lru_lock);	207	spin_lock(&dcache_lru_lock);
200	if (list_empty(&dentry->d_lru)) {	208	if (list_empty(&dentry->d_lru)) {
201	list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);	209	list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
202	dentry->d_sb->s_nr_dentry_unused++;	210	dentry->d_sb->s_nr_dentry_unused++;
203	dentry_stat.nr_unused++;	211	dentry_stat.nr_unused++;
204	} else {	212	} else {
205	list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);	213	list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
206	}	214	}
207	spin_unlock(&dcache_lru_lock);	215	spin_unlock(&dcache_lru_lock);
208	}	216	}
209		217
210	/**	218	/**
211	* d_kill - kill dentry and return parent	219	* d_kill - kill dentry and return parent
212	* @dentry: dentry to kill	220	* @dentry: dentry to kill
213	*	221	*
214	* The dentry must already be unhashed and removed from the LRU.	222	* The dentry must already be unhashed and removed from the LRU.
215	*	223	*
216	* If this is the root of the dentry tree, return NULL.	224	* If this is the root of the dentry tree, return NULL.
217	*	225	*
218	* dcache_lock and d_lock must be held by caller, are dropped by d_kill.	226	* dcache_lock and d_lock must be held by caller, are dropped by d_kill.
219	*/	227	*/
220	static struct dentry d_kill(struct dentry dentry)	228	static struct dentry d_kill(struct dentry dentry)
221	__releases(dentry->d_lock)	229	__releases(dentry->d_lock)
222	__releases(dcache_lock)	230	__releases(dcache_lock)
223	{	231	{
224	struct dentry *parent;	232	struct dentry *parent;
225		233
226	list_del(&dentry->d_u.d_child);	234	list_del(&dentry->d_u.d_child);
227	dentry_iput(dentry);	235	dentry_iput(dentry);
228	/*	236	/*
229	* dentry_iput drops the locks, at which point nobody (except	237	* dentry_iput drops the locks, at which point nobody (except
230	* transient RCU lookups) can reach this dentry.	238	* transient RCU lookups) can reach this dentry.
231	*/	239	*/
232	if (IS_ROOT(dentry))	240	if (IS_ROOT(dentry))
233	parent = NULL;	241	parent = NULL;
234	else	242	else
235	parent = dentry->d_parent;	243	parent = dentry->d_parent;
236	d_free(dentry);	244	d_free(dentry);
237	return parent;	245	return parent;
238	}	246	}
239		247
240	/**	248	/**
241	* d_drop - drop a dentry	249	* d_drop - drop a dentry
242	* @dentry: dentry to drop	250	* @dentry: dentry to drop
243	*	251	*
244	* d_drop() unhashes the entry from the parent dentry hashes, so that it won't	252	* d_drop() unhashes the entry from the parent dentry hashes, so that it won't
245	* be found through a VFS lookup any more. Note that this is different from	253	* be found through a VFS lookup any more. Note that this is different from
246	* deleting the dentry - d_delete will try to mark the dentry negative if	254	* deleting the dentry - d_delete will try to mark the dentry negative if
247	* possible, giving a successful _negative_ lookup, while d_drop will	255	* possible, giving a successful _negative_ lookup, while d_drop will
248	* just make the cache lookup fail.	256	* just make the cache lookup fail.
249	*	257	*
250	* d_drop() is used mainly for stuff that wants to invalidate a dentry for some	258	* d_drop() is used mainly for stuff that wants to invalidate a dentry for some
251	* reason (NFS timeouts or autofs deletes).	259	* reason (NFS timeouts or autofs deletes).
252	*	260	*
253	* __d_drop requires dentry->d_lock.	261	* __d_drop requires dentry->d_lock.
254	*/	262	*/
255	void __d_drop(struct dentry *dentry)	263	void __d_drop(struct dentry *dentry)
256	{	264	{
257	if (!(dentry->d_flags & DCACHE_UNHASHED)) {	265	if (!(dentry->d_flags & DCACHE_UNHASHED)) {
258	dentry->d_flags \|= DCACHE_UNHASHED;	266	dentry->d_flags \|= DCACHE_UNHASHED;
259	spin_lock(&dcache_hash_lock);	267	spin_lock(&dcache_hash_lock);
260	hlist_del_rcu(&dentry->d_hash);	268	hlist_del_rcu(&dentry->d_hash);
261	spin_unlock(&dcache_hash_lock);	269	spin_unlock(&dcache_hash_lock);
262	}	270	}
263	}	271	}
264	EXPORT_SYMBOL(__d_drop);	272	EXPORT_SYMBOL(__d_drop);
265		273
266	void d_drop(struct dentry *dentry)	274	void d_drop(struct dentry *dentry)
267	{	275	{
268	spin_lock(&dcache_lock);	276	spin_lock(&dcache_lock);
269	spin_lock(&dentry->d_lock);	277	spin_lock(&dentry->d_lock);
270	__d_drop(dentry);	278	__d_drop(dentry);
271	spin_unlock(&dentry->d_lock);	279	spin_unlock(&dentry->d_lock);
272	spin_unlock(&dcache_lock);	280	spin_unlock(&dcache_lock);
273	}	281	}
274	EXPORT_SYMBOL(d_drop);	282	EXPORT_SYMBOL(d_drop);
275		283
276	/*	284	/*
277	* This is dput	285	* This is dput
278	*	286	*
279	* This is complicated by the fact that we do not want to put	287	* This is complicated by the fact that we do not want to put
280	* dentries that are no longer on any hash chain on the unused	288	* dentries that are no longer on any hash chain on the unused
281	* list: we'd much rather just get rid of them immediately.	289	* list: we'd much rather just get rid of them immediately.
282	*	290	*
283	* However, that implies that we have to traverse the dentry	291	* However, that implies that we have to traverse the dentry
284	* tree upwards to the parents which might _also_ now be	292	* tree upwards to the parents which might _also_ now be
285	* scheduled for deletion (it may have been only waiting for	293	* scheduled for deletion (it may have been only waiting for
286	* its last child to go away).	294	* its last child to go away).
287	*	295	*
288	* This tail recursion is done by hand as we don't want to depend	296	* This tail recursion is done by hand as we don't want to depend
289	* on the compiler to always get this right (gcc generally doesn't).	297	* on the compiler to always get this right (gcc generally doesn't).
290	* Real recursion would eat up our stack space.	298	* Real recursion would eat up our stack space.
291	*/	299	*/
292		300
293	/*	301	/*
294	* dput - release a dentry	302	* dput - release a dentry
295	* @dentry: dentry to release	303	* @dentry: dentry to release
296	*	304	*
297	* Release a dentry. This will drop the usage count and if appropriate	305	* Release a dentry. This will drop the usage count and if appropriate
298	* call the dentry unlink method as well as removing it from the queues and	306	* call the dentry unlink method as well as removing it from the queues and
299	* releasing its resources. If the parent dentries were scheduled for release	307	* releasing its resources. If the parent dentries were scheduled for release
300	* they too may now get deleted.	308	* they too may now get deleted.
301	*	309	*
302	* no dcache lock, please.	310	* no dcache lock, please.
303	*/	311	*/
304		312
305	void dput(struct dentry *dentry)	313	void dput(struct dentry *dentry)
306	{	314	{
307	if (!dentry)	315	if (!dentry)
308	return;	316	return;
309		317
310	repeat:	318	repeat:
311	if (dentry->d_count == 1)	319	if (dentry->d_count == 1)
312	might_sleep();	320	might_sleep();
313	spin_lock(&dentry->d_lock);	321	spin_lock(&dentry->d_lock);
314	if (dentry->d_count == 1) {	322	if (dentry->d_count == 1) {
315	if (!spin_trylock(&dcache_lock)) {	323	if (!spin_trylock(&dcache_lock)) {
316	/*	324	/*
317	* Something of a livelock possibility we could avoid	325	* Something of a livelock possibility we could avoid
318	* by taking dcache_lock and trying again, but we	326	* by taking dcache_lock and trying again, but we
319	* want to reduce dcache_lock anyway so this will	327	* want to reduce dcache_lock anyway so this will
320	* get improved.	328	* get improved.
321	*/	329	*/
322	spin_unlock(&dentry->d_lock);	330	spin_unlock(&dentry->d_lock);
323	goto repeat;	331	goto repeat;
324	}	332	}
325	}	333	}
326	dentry->d_count--;	334	dentry->d_count--;
327	if (dentry->d_count) {	335	if (dentry->d_count) {
328	spin_unlock(&dentry->d_lock);	336	spin_unlock(&dentry->d_lock);
329	spin_unlock(&dcache_lock);	337	spin_unlock(&dcache_lock);
330	return;	338	return;
331	}	339	}
332		340
333	/*	341	/*
334	* AV: ->d_delete() is _NOT_ allowed to block now.	342	* AV: ->d_delete() is _NOT_ allowed to block now.
335	*/	343	*/
336	if (dentry->d_op && dentry->d_op->d_delete) {	344	if (dentry->d_op && dentry->d_op->d_delete) {
337	if (dentry->d_op->d_delete(dentry))	345	if (dentry->d_op->d_delete(dentry))
338	goto unhash_it;	346	goto unhash_it;
339	}	347	}
340		348
341	/* Unreachable? Get rid of it */	349	/* Unreachable? Get rid of it */
342	if (d_unhashed(dentry))	350	if (d_unhashed(dentry))
343	goto kill_it;	351	goto kill_it;
344		352
345	/* Otherwise leave it cached and ensure it's on the LRU */	353	/* Otherwise leave it cached and ensure it's on the LRU */
346	dentry->d_flags \|= DCACHE_REFERENCED;	354	dentry->d_flags \|= DCACHE_REFERENCED;
347	dentry_lru_add(dentry);	355	dentry_lru_add(dentry);
348		356
349	spin_unlock(&dentry->d_lock);	357	spin_unlock(&dentry->d_lock);
350	spin_unlock(&dcache_lock);	358	spin_unlock(&dcache_lock);
351	return;	359	return;
352		360
353	unhash_it:	361	unhash_it:
354	__d_drop(dentry);	362	__d_drop(dentry);
355	kill_it:	363	kill_it:
356	/* if dentry was on the d_lru list delete it from there */	364	/* if dentry was on the d_lru list delete it from there */
357	dentry_lru_del(dentry);	365	dentry_lru_del(dentry);
358	dentry = d_kill(dentry);	366	dentry = d_kill(dentry);
359	if (dentry)	367	if (dentry)
360	goto repeat;	368	goto repeat;
361	}	369	}
362	EXPORT_SYMBOL(dput);	370	EXPORT_SYMBOL(dput);
363		371
364	/**	372	/**
365	* d_invalidate - invalidate a dentry	373	* d_invalidate - invalidate a dentry
366	* @dentry: dentry to invalidate	374	* @dentry: dentry to invalidate
367	*	375	*
368	* Try to invalidate the dentry if it turns out to be	376	* Try to invalidate the dentry if it turns out to be
369	* possible. If there are other dentries that can be	377	* possible. If there are other dentries that can be
370	* reached through this one we can't delete it and we	378	* reached through this one we can't delete it and we
371	* return -EBUSY. On success we return 0.	379	* return -EBUSY. On success we return 0.
372	*	380	*
373	* no dcache lock.	381	* no dcache lock.
374	*/	382	*/
375		383
376	int d_invalidate(struct dentry * dentry)	384	int d_invalidate(struct dentry * dentry)
377	{	385	{
378	/*	386	/*
379	* If it's already been dropped, return OK.	387	* If it's already been dropped, return OK.
380	*/	388	*/
381	spin_lock(&dcache_lock);	389	spin_lock(&dcache_lock);
		390	spin_lock(&dentry->d_lock);
382	if (d_unhashed(dentry)) {	391	if (d_unhashed(dentry)) {
		392	spin_unlock(&dentry->d_lock);
383	spin_unlock(&dcache_lock);	393	spin_unlock(&dcache_lock);
384	return 0;	394	return 0;
385	}	395	}
386	/*	396	/*
387	* Check whether to do a partial shrink_dcache	397	* Check whether to do a partial shrink_dcache
388	* to get rid of unused child entries.	398	* to get rid of unused child entries.
389	*/	399	*/
390	if (!list_empty(&dentry->d_subdirs)) {	400	if (!list_empty(&dentry->d_subdirs)) {
		401	spin_unlock(&dentry->d_lock);
391	spin_unlock(&dcache_lock);	402	spin_unlock(&dcache_lock);
392	shrink_dcache_parent(dentry);	403	shrink_dcache_parent(dentry);
393	spin_lock(&dcache_lock);	404	spin_lock(&dcache_lock);
		405	spin_lock(&dentry->d_lock);
394	}	406	}
395		407
396	/*	408	/*
397	* Somebody else still using it?	409	* Somebody else still using it?
398	*	410	*
399	* If it's a directory, we can't drop it	411	* If it's a directory, we can't drop it
400	* for fear of somebody re-populating it	412	* for fear of somebody re-populating it
401	* with children (even though dropping it	413	* with children (even though dropping it
402	* would make it unreachable from the root,	414	* would make it unreachable from the root,
403	* we might still populate it if it was a	415	* we might still populate it if it was a
404	* working directory or similar).	416	* working directory or similar).
405	*/	417	*/
406	spin_lock(&dentry->d_lock);
407	if (dentry->d_count > 1) {	418	if (dentry->d_count > 1) {
408	if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {	419	if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
409	spin_unlock(&dentry->d_lock);	420	spin_unlock(&dentry->d_lock);
410	spin_unlock(&dcache_lock);	421	spin_unlock(&dcache_lock);
411	return -EBUSY;	422	return -EBUSY;
412	}	423	}
413	}	424	}
414		425
415	__d_drop(dentry);	426	__d_drop(dentry);
416	spin_unlock(&dentry->d_lock);	427	spin_unlock(&dentry->d_lock);
417	spin_unlock(&dcache_lock);	428	spin_unlock(&dcache_lock);
418	return 0;	429	return 0;
419	}	430	}
420	EXPORT_SYMBOL(d_invalidate);	431	EXPORT_SYMBOL(d_invalidate);
421		432
422	/* This must be called with dcache_lock and d_lock held */	433	/* This must be called with dcache_lock and d_lock held */
423	static inline struct dentry * __dget_locked_dlock(struct dentry *dentry)	434	static inline struct dentry * __dget_locked_dlock(struct dentry *dentry)
424	{	435	{
425	dentry->d_count++;	436	dentry->d_count++;
426	dentry_lru_del(dentry);	437	dentry_lru_del(dentry);
427	return dentry;	438	return dentry;
428	}	439	}
429		440
430	/* This should be called _only_ with dcache_lock held */	441	/* This should be called _only_ with dcache_lock held */
431	static inline struct dentry * __dget_locked(struct dentry *dentry)	442	static inline struct dentry * __dget_locked(struct dentry *dentry)
432	{	443	{
433	spin_lock(&dentry->d_lock);	444	spin_lock(&dentry->d_lock);
434	__dget_locked_dlock(dentry);	445	__dget_locked_dlock(dentry);
435	spin_unlock(&dentry->d_lock);	446	spin_unlock(&dentry->d_lock);
436	return dentry;	447	return dentry;
437	}	448	}
438		449
439	struct dentry * dget_locked_dlock(struct dentry *dentry)	450	struct dentry * dget_locked_dlock(struct dentry *dentry)
440	{	451	{
441	return __dget_locked_dlock(dentry);	452	return __dget_locked_dlock(dentry);
442	}	453	}
443		454
444	struct dentry * dget_locked(struct dentry *dentry)	455	struct dentry * dget_locked(struct dentry *dentry)
445	{	456	{
446	return __dget_locked(dentry);	457	return __dget_locked(dentry);
447	}	458	}
448	EXPORT_SYMBOL(dget_locked);	459	EXPORT_SYMBOL(dget_locked);
449		460
450	struct dentry dget_parent(struct dentry dentry)	461	struct dentry dget_parent(struct dentry dentry)
451	{	462	{
452	struct dentry *ret;	463	struct dentry *ret;
453		464
454	repeat:	465	repeat:
455	spin_lock(&dentry->d_lock);	466	spin_lock(&dentry->d_lock);
456	ret = dentry->d_parent;	467	ret = dentry->d_parent;
457	if (!ret)	468	if (!ret)
458	goto out;	469	goto out;
459	if (dentry == ret) {	470	if (dentry == ret) {
460	ret->d_count++;	471	ret->d_count++;
461	goto out;	472	goto out;
462	}	473	}
463	if (!spin_trylock(&ret->d_lock)) {	474	if (!spin_trylock(&ret->d_lock)) {
464	spin_unlock(&dentry->d_lock);	475	spin_unlock(&dentry->d_lock);
465	cpu_relax();	476	cpu_relax();
466	goto repeat;	477	goto repeat;
467	}	478	}
468	BUG_ON(!ret->d_count);	479	BUG_ON(!ret->d_count);
469	ret->d_count++;	480	ret->d_count++;
470	spin_unlock(&ret->d_lock);	481	spin_unlock(&ret->d_lock);
471	out:	482	out:
472	spin_unlock(&dentry->d_lock);	483	spin_unlock(&dentry->d_lock);
473	return ret;	484	return ret;
474	}	485	}
475	EXPORT_SYMBOL(dget_parent);	486	EXPORT_SYMBOL(dget_parent);
476		487
477	/**	488	/**
478	* d_find_alias - grab a hashed alias of inode	489	* d_find_alias - grab a hashed alias of inode
479	* @inode: inode in question	490	* @inode: inode in question
480	* @want_discon: flag, used by d_splice_alias, to request	491	* @want_discon: flag, used by d_splice_alias, to request
481	* that only a DISCONNECTED alias be returned.	492	* that only a DISCONNECTED alias be returned.
482	*	493	*
483	* If inode has a hashed alias, or is a directory and has any alias,	494	* If inode has a hashed alias, or is a directory and has any alias,
484	* acquire the reference to alias and return it. Otherwise return NULL.	495	* acquire the reference to alias and return it. Otherwise return NULL.
485	* Notice that if inode is a directory there can be only one alias and	496	* Notice that if inode is a directory there can be only one alias and
486	* it can be unhashed only if it has no children, or if it is the root	497	* it can be unhashed only if it has no children, or if it is the root
487	* of a filesystem.	498	* of a filesystem.
488	*	499	*
489	* If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer	500	* If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
490	* any other hashed alias over that one unless @want_discon is set,	501	* any other hashed alias over that one unless @want_discon is set,
491	* in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.	502	* in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
492	*/	503	*/
493		504	static struct dentry __d_find_alias(struct inode inode, int want_discon)
494	static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
495	{	505	{
496	struct list_head head, next, *tmp;	506	struct dentry alias, discon_alias;
497	struct dentry alias, discon_alias=NULL;
498		507
499	head = &inode->i_dentry;	508	again:
500	next = inode->i_dentry.next;	509	discon_alias = NULL;
501	while (next != head) {	510	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
502	tmp = next;	511	spin_lock(&alias->d_lock);
503	next = tmp->next;
504	prefetch(next);
505	alias = list_entry(tmp, struct dentry, d_alias);
506	if (S_ISDIR(inode->i_mode) \|\| !d_unhashed(alias)) {	512	if (S_ISDIR(inode->i_mode) \|\| !d_unhashed(alias)) {
507	if (IS_ROOT(alias) &&	513	if (IS_ROOT(alias) &&
508	(alias->d_flags & DCACHE_DISCONNECTED))	514	(alias->d_flags & DCACHE_DISCONNECTED)) {
509	discon_alias = alias;	515	discon_alias = alias;
510	else if (!want_discon) {	516	} else if (!want_discon) {
511	__dget_locked(alias);	517	__dget_locked_dlock(alias);
		518	spin_unlock(&alias->d_lock);
512	return alias;	519	return alias;
513	}	520	}
514	}	521	}
		522	spin_unlock(&alias->d_lock);
515	}	523	}
516	if (discon_alias)	524	if (discon_alias) {
517	__dget_locked(discon_alias);	525	alias = discon_alias;
518	return discon_alias;	526	spin_lock(&alias->d_lock);
		527	if (S_ISDIR(inode->i_mode) \|\| !d_unhashed(alias)) {
		528	if (IS_ROOT(alias) &&
		529	(alias->d_flags & DCACHE_DISCONNECTED)) {
		530	__dget_locked_dlock(alias);
		531	spin_unlock(&alias->d_lock);
		532	return alias;
		533	}
		534	}
		535	spin_unlock(&alias->d_lock);
		536	goto again;
		537	}
		538	return NULL;
519	}	539	}
520		540
521	struct dentry * d_find_alias(struct inode *inode)	541	struct dentry d_find_alias(struct inode inode)
522	{	542	{
523	struct dentry *de = NULL;	543	struct dentry *de = NULL;
524		544
525	if (!list_empty(&inode->i_dentry)) {	545	if (!list_empty(&inode->i_dentry)) {
526	spin_lock(&dcache_lock);	546	spin_lock(&dcache_lock);
527	de = __d_find_alias(inode, 0);	547	de = __d_find_alias(inode, 0);
528	spin_unlock(&dcache_lock);	548	spin_unlock(&dcache_lock);
529	}	549	}
530	return de;	550	return de;
531	}	551	}
532	EXPORT_SYMBOL(d_find_alias);	552	EXPORT_SYMBOL(d_find_alias);
533		553
534	/*	554	/*
535	* Try to kill dentries associated with this inode.	555	* Try to kill dentries associated with this inode.
536	* WARNING: you must own a reference to inode.	556	* WARNING: you must own a reference to inode.
537	*/	557	*/
538	void d_prune_aliases(struct inode *inode)	558	void d_prune_aliases(struct inode *inode)
539	{	559	{
540	struct dentry *dentry;	560	struct dentry *dentry;
541	restart:	561	restart:
542	spin_lock(&dcache_lock);	562	spin_lock(&dcache_lock);
543	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {	563	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
544	spin_lock(&dentry->d_lock);	564	spin_lock(&dentry->d_lock);
545	if (!dentry->d_count) {	565	if (!dentry->d_count) {
546	__dget_locked_dlock(dentry);	566	__dget_locked_dlock(dentry);
547	__d_drop(dentry);	567	__d_drop(dentry);
548	spin_unlock(&dentry->d_lock);	568	spin_unlock(&dentry->d_lock);
549	spin_unlock(&dcache_lock);	569	spin_unlock(&dcache_lock);
550	dput(dentry);	570	dput(dentry);
551	goto restart;	571	goto restart;
552	}	572	}
553	spin_unlock(&dentry->d_lock);	573	spin_unlock(&dentry->d_lock);
554	}	574	}
555	spin_unlock(&dcache_lock);	575	spin_unlock(&dcache_lock);
556	}	576	}
557	EXPORT_SYMBOL(d_prune_aliases);	577	EXPORT_SYMBOL(d_prune_aliases);
558		578
559	/*	579	/*
560	* Throw away a dentry - free the inode, dput the parent. This requires that	580	* Throw away a dentry - free the inode, dput the parent. This requires that
561	* the LRU list has already been removed.	581	* the LRU list has already been removed.
562	*	582	*
563	* Try to prune ancestors as well. This is necessary to prevent	583	* Try to prune ancestors as well. This is necessary to prevent
564	* quadratic behavior of shrink_dcache_parent(), but is also expected	584	* quadratic behavior of shrink_dcache_parent(), but is also expected
565	* to be beneficial in reducing dentry cache fragmentation.	585	* to be beneficial in reducing dentry cache fragmentation.
566	*/	586	*/
567	static void prune_one_dentry(struct dentry * dentry)	587	static void prune_one_dentry(struct dentry * dentry)
568	__releases(dentry->d_lock)	588	__releases(dentry->d_lock)
569	__releases(dcache_lock)	589	__releases(dcache_lock)
570	{	590	{
571	__d_drop(dentry);	591	__d_drop(dentry);
572	dentry = d_kill(dentry);	592	dentry = d_kill(dentry);
573		593
574	/*	594	/*
575	* Prune ancestors. Locking is simpler than in dput(),	595	* Prune ancestors. Locking is simpler than in dput(),
576	* because dcache_lock needs to be taken anyway.	596	* because dcache_lock needs to be taken anyway.
577	*/	597	*/
578	while (dentry) {	598	while (dentry) {
579	spin_lock(&dcache_lock);	599	spin_lock(&dcache_lock);
580	spin_lock(&dentry->d_lock);	600	spin_lock(&dentry->d_lock);
581	dentry->d_count--;	601	dentry->d_count--;
582	if (dentry->d_count) {	602	if (dentry->d_count) {
583	spin_unlock(&dentry->d_lock);	603	spin_unlock(&dentry->d_lock);
584	spin_unlock(&dcache_lock);	604	spin_unlock(&dcache_lock);
585	return;	605	return;
586	}	606	}
587		607
588	dentry_lru_del(dentry);	608	dentry_lru_del(dentry);
589	__d_drop(dentry);	609	__d_drop(dentry);
590	dentry = d_kill(dentry);	610	dentry = d_kill(dentry);
591	}	611	}
592	}	612	}
593		613
594	static void shrink_dentry_list(struct list_head *list)	614	static void shrink_dentry_list(struct list_head *list)
595	{	615	{
596	struct dentry *dentry;	616	struct dentry *dentry;
597		617
598	while (!list_empty(list)) {	618	while (!list_empty(list)) {
599	dentry = list_entry(list->prev, struct dentry, d_lru);	619	dentry = list_entry(list->prev, struct dentry, d_lru);
600		620
601	if (!spin_trylock(&dentry->d_lock)) {	621	if (!spin_trylock(&dentry->d_lock)) {
602	spin_unlock(&dcache_lru_lock);	622	spin_unlock(&dcache_lru_lock);
603	cpu_relax();	623	cpu_relax();
604	spin_lock(&dcache_lru_lock);	624	spin_lock(&dcache_lru_lock);
605	continue;	625	continue;
606	}	626	}
607		627
608	__dentry_lru_del(dentry);	628	__dentry_lru_del(dentry);
609		629
610	/*	630	/*
611	* We found an inuse dentry which was not removed from	631	* We found an inuse dentry which was not removed from
612	* the LRU because of laziness during lookup. Do not free	632	* the LRU because of laziness during lookup. Do not free
613	* it - just keep it off the LRU list.	633	* it - just keep it off the LRU list.
614	*/	634	*/
615	if (dentry->d_count) {	635	if (dentry->d_count) {
616	spin_unlock(&dentry->d_lock);	636	spin_unlock(&dentry->d_lock);
617	continue;	637	continue;
618	}	638	}
619	spin_unlock(&dcache_lru_lock);	639	spin_unlock(&dcache_lru_lock);
620		640
621	prune_one_dentry(dentry);	641	prune_one_dentry(dentry);
622	/* dcache_lock and dentry->d_lock dropped */	642	/* dcache_lock and dentry->d_lock dropped */
623	spin_lock(&dcache_lock);	643	spin_lock(&dcache_lock);
624	spin_lock(&dcache_lru_lock);	644	spin_lock(&dcache_lru_lock);
625	}	645	}
626	}	646	}
627		647
628	/**	648	/**
629	* __shrink_dcache_sb - shrink the dentry LRU on a given superblock	649	* __shrink_dcache_sb - shrink the dentry LRU on a given superblock
630	* @sb: superblock to shrink dentry LRU.	650	* @sb: superblock to shrink dentry LRU.
631	* @count: number of entries to prune	651	* @count: number of entries to prune
632	* @flags: flags to control the dentry processing	652	* @flags: flags to control the dentry processing
633	*	653	*
634	* If flags contains DCACHE_REFERENCED reference dentries will not be pruned.	654	* If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
635	*/	655	*/
636	static void __shrink_dcache_sb(struct super_block sb, int count, int flags)	656	static void __shrink_dcache_sb(struct super_block sb, int count, int flags)
637	{	657	{
638	/* called from prune_dcache() and shrink_dcache_parent() */	658	/* called from prune_dcache() and shrink_dcache_parent() */
639	struct dentry *dentry;	659	struct dentry *dentry;
640	LIST_HEAD(referenced);	660	LIST_HEAD(referenced);
641	LIST_HEAD(tmp);	661	LIST_HEAD(tmp);
642	int cnt = *count;	662	int cnt = *count;
643		663
644	spin_lock(&dcache_lock);	664	spin_lock(&dcache_lock);
645	relock:	665	relock:
646	spin_lock(&dcache_lru_lock);	666	spin_lock(&dcache_lru_lock);
647	while (!list_empty(&sb->s_dentry_lru)) {	667	while (!list_empty(&sb->s_dentry_lru)) {
648	dentry = list_entry(sb->s_dentry_lru.prev,	668	dentry = list_entry(sb->s_dentry_lru.prev,
649	struct dentry, d_lru);	669	struct dentry, d_lru);
650	BUG_ON(dentry->d_sb != sb);	670	BUG_ON(dentry->d_sb != sb);
651		671
652	if (!spin_trylock(&dentry->d_lock)) {	672	if (!spin_trylock(&dentry->d_lock)) {
653	spin_unlock(&dcache_lru_lock);	673	spin_unlock(&dcache_lru_lock);
654	cpu_relax();	674	cpu_relax();
655	goto relock;	675	goto relock;
656	}	676	}
657		677
658	/*	678	/*
659	* If we are honouring the DCACHE_REFERENCED flag and the	679	* If we are honouring the DCACHE_REFERENCED flag and the
660	* dentry has this flag set, don't free it. Clear the flag	680	* dentry has this flag set, don't free it. Clear the flag
661	* and put it back on the LRU.	681	* and put it back on the LRU.
662	*/	682	*/
663	if (flags & DCACHE_REFERENCED &&	683	if (flags & DCACHE_REFERENCED &&
664	dentry->d_flags & DCACHE_REFERENCED) {	684	dentry->d_flags & DCACHE_REFERENCED) {
665	dentry->d_flags &= ~DCACHE_REFERENCED;	685	dentry->d_flags &= ~DCACHE_REFERENCED;
666	list_move(&dentry->d_lru, &referenced);	686	list_move(&dentry->d_lru, &referenced);
667	spin_unlock(&dentry->d_lock);	687	spin_unlock(&dentry->d_lock);
668	} else {	688	} else {
669	list_move_tail(&dentry->d_lru, &tmp);	689	list_move_tail(&dentry->d_lru, &tmp);
670	spin_unlock(&dentry->d_lock);	690	spin_unlock(&dentry->d_lock);
671	if (!--cnt)	691	if (!--cnt)
672	break;	692	break;
673	}	693	}
674	/* XXX: re-add cond_resched_lock when dcache_lock goes away */	694	/* XXX: re-add cond_resched_lock when dcache_lock goes away */
675	}	695	}
676		696
677	*count = cnt;	697	*count = cnt;
678	shrink_dentry_list(&tmp);	698	shrink_dentry_list(&tmp);
679		699
680	if (!list_empty(&referenced))	700	if (!list_empty(&referenced))
681	list_splice(&referenced, &sb->s_dentry_lru);	701	list_splice(&referenced, &sb->s_dentry_lru);
682	spin_unlock(&dcache_lru_lock);	702	spin_unlock(&dcache_lru_lock);
683	spin_unlock(&dcache_lock);	703	spin_unlock(&dcache_lock);
684		704
685	}	705	}
686		706
687	/**	707	/**
688	* prune_dcache - shrink the dcache	708	* prune_dcache - shrink the dcache
689	* @count: number of entries to try to free	709	* @count: number of entries to try to free
690	*	710	*
691	* Shrink the dcache. This is done when we need more memory, or simply when we	711	* Shrink the dcache. This is done when we need more memory, or simply when we
692	* need to unmount something (at which point we need to unuse all dentries).	712	* need to unmount something (at which point we need to unuse all dentries).
693	*	713	*
694	* This function may fail to free any resources if all the dentries are in use.	714	* This function may fail to free any resources if all the dentries are in use.
695	*/	715	*/
696	static void prune_dcache(int count)	716	static void prune_dcache(int count)
697	{	717	{
698	struct super_block sb, p = NULL;	718	struct super_block sb, p = NULL;
699	int w_count;	719	int w_count;
700	int unused = dentry_stat.nr_unused;	720	int unused = dentry_stat.nr_unused;
701	int prune_ratio;	721	int prune_ratio;
702	int pruned;	722	int pruned;
703		723
704	if (unused == 0 \|\| count == 0)	724	if (unused == 0 \|\| count == 0)
705	return;	725	return;
706	spin_lock(&dcache_lock);	726	spin_lock(&dcache_lock);
707	if (count >= unused)	727	if (count >= unused)
708	prune_ratio = 1;	728	prune_ratio = 1;
709	else	729	else
710	prune_ratio = unused / count;	730	prune_ratio = unused / count;
711	spin_lock(&sb_lock);	731	spin_lock(&sb_lock);
712	list_for_each_entry(sb, &super_blocks, s_list) {	732	list_for_each_entry(sb, &super_blocks, s_list) {
713	if (list_empty(&sb->s_instances))	733	if (list_empty(&sb->s_instances))
714	continue;	734	continue;
715	if (sb->s_nr_dentry_unused == 0)	735	if (sb->s_nr_dentry_unused == 0)
716	continue;	736	continue;
717	sb->s_count++;	737	sb->s_count++;
718	/* Now, we reclaim unused dentrins with fairness.	738	/* Now, we reclaim unused dentrins with fairness.
719	* We reclaim them same percentage from each superblock.	739	* We reclaim them same percentage from each superblock.
720	* We calculate number of dentries to scan on this sb	740	* We calculate number of dentries to scan on this sb
721	* as follows, but the implementation is arranged to avoid	741	* as follows, but the implementation is arranged to avoid
722	* overflows:	742	* overflows:
723	* number of dentries to scan on this sb =	743	* number of dentries to scan on this sb =
724	* count * (number of dentries on this sb /	744	* count * (number of dentries on this sb /
725	* number of dentries in the machine)	745	* number of dentries in the machine)
726	*/	746	*/
727	spin_unlock(&sb_lock);	747	spin_unlock(&sb_lock);
728	if (prune_ratio != 1)	748	if (prune_ratio != 1)
729	w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;	749	w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
730	else	750	else
731	w_count = sb->s_nr_dentry_unused;	751	w_count = sb->s_nr_dentry_unused;
732	pruned = w_count;	752	pruned = w_count;
733	/*	753	/*
734	* We need to be sure this filesystem isn't being unmounted,	754	* We need to be sure this filesystem isn't being unmounted,
735	* otherwise we could race with generic_shutdown_super(), and	755	* otherwise we could race with generic_shutdown_super(), and
736	* end up holding a reference to an inode while the filesystem	756	* end up holding a reference to an inode while the filesystem
737	* is unmounted. So we try to get s_umount, and make sure	757	* is unmounted. So we try to get s_umount, and make sure
738	* s_root isn't NULL.	758	* s_root isn't NULL.
739	*/	759	*/
740	if (down_read_trylock(&sb->s_umount)) {	760	if (down_read_trylock(&sb->s_umount)) {
741	if ((sb->s_root != NULL) &&	761	if ((sb->s_root != NULL) &&
742	(!list_empty(&sb->s_dentry_lru))) {	762	(!list_empty(&sb->s_dentry_lru))) {
743	spin_unlock(&dcache_lock);	763	spin_unlock(&dcache_lock);
744	__shrink_dcache_sb(sb, &w_count,	764	__shrink_dcache_sb(sb, &w_count,
745	DCACHE_REFERENCED);	765	DCACHE_REFERENCED);
746	pruned -= w_count;	766	pruned -= w_count;
747	spin_lock(&dcache_lock);	767	spin_lock(&dcache_lock);
748	}	768	}
749	up_read(&sb->s_umount);	769	up_read(&sb->s_umount);
750	}	770	}
751	spin_lock(&sb_lock);	771	spin_lock(&sb_lock);
752	if (p)	772	if (p)
753	__put_super(p);	773	__put_super(p);
754	count -= pruned;	774	count -= pruned;
755	p = sb;	775	p = sb;
756	/* more work left to do? */	776	/* more work left to do? */
757	if (count <= 0)	777	if (count <= 0)
758	break;	778	break;
759	}	779	}
760	if (p)	780	if (p)
761	__put_super(p);	781	__put_super(p);
762	spin_unlock(&sb_lock);	782	spin_unlock(&sb_lock);
763	spin_unlock(&dcache_lock);	783	spin_unlock(&dcache_lock);
764	}	784	}
765		785
766	/**	786	/**
767	* shrink_dcache_sb - shrink dcache for a superblock	787	* shrink_dcache_sb - shrink dcache for a superblock
768	* @sb: superblock	788	* @sb: superblock
769	*	789	*
770	* Shrink the dcache for the specified super block. This is used to free	790	* Shrink the dcache for the specified super block. This is used to free
771	* the dcache before unmounting a file system.	791	* the dcache before unmounting a file system.
772	*/	792	*/
773	void shrink_dcache_sb(struct super_block *sb)	793	void shrink_dcache_sb(struct super_block *sb)
774	{	794	{
775	LIST_HEAD(tmp);	795	LIST_HEAD(tmp);
776		796
777	spin_lock(&dcache_lock);	797	spin_lock(&dcache_lock);
778	spin_lock(&dcache_lru_lock);	798	spin_lock(&dcache_lru_lock);
779	while (!list_empty(&sb->s_dentry_lru)) {	799	while (!list_empty(&sb->s_dentry_lru)) {
780	list_splice_init(&sb->s_dentry_lru, &tmp);	800	list_splice_init(&sb->s_dentry_lru, &tmp);
781	shrink_dentry_list(&tmp);	801	shrink_dentry_list(&tmp);
782	}	802	}
783	spin_unlock(&dcache_lru_lock);	803	spin_unlock(&dcache_lru_lock);
784	spin_unlock(&dcache_lock);	804	spin_unlock(&dcache_lock);
785	}	805	}
786	EXPORT_SYMBOL(shrink_dcache_sb);	806	EXPORT_SYMBOL(shrink_dcache_sb);
787		807
788	/*	808	/*
789	* destroy a single subtree of dentries for unmount	809	* destroy a single subtree of dentries for unmount
790	* - see the comments on shrink_dcache_for_umount() for a description of the	810	* - see the comments on shrink_dcache_for_umount() for a description of the
791	* locking	811	* locking
792	*/	812	*/
793	static void shrink_dcache_for_umount_subtree(struct dentry *dentry)	813	static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
794	{	814	{
795	struct dentry *parent;	815	struct dentry *parent;
796	unsigned detached = 0;	816	unsigned detached = 0;
797		817
798	BUG_ON(!IS_ROOT(dentry));	818	BUG_ON(!IS_ROOT(dentry));
799		819
800	/* detach this root from the system */	820	/* detach this root from the system */
801	spin_lock(&dcache_lock);	821	spin_lock(&dcache_lock);
802	spin_lock(&dentry->d_lock);	822	spin_lock(&dentry->d_lock);
803	dentry_lru_del(dentry);	823	dentry_lru_del(dentry);
804	spin_unlock(&dentry->d_lock);
805	__d_drop(dentry);	824	__d_drop(dentry);
		825	spin_unlock(&dentry->d_lock);
806	spin_unlock(&dcache_lock);	826	spin_unlock(&dcache_lock);
807		827
808	for (;;) {	828	for (;;) {
809	/* descend to the first leaf in the current subtree */	829	/* descend to the first leaf in the current subtree */
810	while (!list_empty(&dentry->d_subdirs)) {	830	while (!list_empty(&dentry->d_subdirs)) {
811	struct dentry *loop;	831	struct dentry *loop;
812		832
813	/* this is a branch with children - detach all of them	833	/* this is a branch with children - detach all of them
814	* from the system in one go */	834	* from the system in one go */
815	spin_lock(&dcache_lock);	835	spin_lock(&dcache_lock);
816	list_for_each_entry(loop, &dentry->d_subdirs,	836	list_for_each_entry(loop, &dentry->d_subdirs,
817	d_u.d_child) {	837	d_u.d_child) {
818	spin_lock(&loop->d_lock);	838	spin_lock(&loop->d_lock);
819	dentry_lru_del(loop);	839	dentry_lru_del(loop);
820	spin_unlock(&loop->d_lock);
821	__d_drop(loop);	840	__d_drop(loop);
		841	spin_unlock(&loop->d_lock);
822	cond_resched_lock(&dcache_lock);	842	cond_resched_lock(&dcache_lock);
823	}	843	}
824	spin_unlock(&dcache_lock);	844	spin_unlock(&dcache_lock);
825		845
826	/* move to the first child */	846	/* move to the first child */
827	dentry = list_entry(dentry->d_subdirs.next,	847	dentry = list_entry(dentry->d_subdirs.next,
828	struct dentry, d_u.d_child);	848	struct dentry, d_u.d_child);
829	}	849	}
830		850
831	/* consume the dentries from this leaf up through its parents	851	/* consume the dentries from this leaf up through its parents
832	* until we find one with children or run out altogether */	852	* until we find one with children or run out altogether */
833	do {	853	do {
834	struct inode *inode;	854	struct inode *inode;
835		855
836	if (dentry->d_count != 0) {	856	if (dentry->d_count != 0) {
837	printk(KERN_ERR	857	printk(KERN_ERR
838	"BUG: Dentry %p{i=%lx,n=%s}"	858	"BUG: Dentry %p{i=%lx,n=%s}"
839	" still in use (%d)"	859	" still in use (%d)"
840	" [unmount of %s %s]\n",	860	" [unmount of %s %s]\n",
841	dentry,	861	dentry,
842	dentry->d_inode ?	862	dentry->d_inode ?
843	dentry->d_inode->i_ino : 0UL,	863	dentry->d_inode->i_ino : 0UL,
844	dentry->d_name.name,	864	dentry->d_name.name,
845	dentry->d_count,	865	dentry->d_count,
846	dentry->d_sb->s_type->name,	866	dentry->d_sb->s_type->name,
847	dentry->d_sb->s_id);	867	dentry->d_sb->s_id);
848	BUG();	868	BUG();
849	}	869	}
850		870
851	if (IS_ROOT(dentry))	871	if (IS_ROOT(dentry))
852	parent = NULL;	872	parent = NULL;
853	else {	873	else {
854	parent = dentry->d_parent;	874	parent = dentry->d_parent;
855	spin_lock(&parent->d_lock);	875	spin_lock(&parent->d_lock);
856	parent->d_count--;	876	parent->d_count--;
857	spin_unlock(&parent->d_lock);	877	spin_unlock(&parent->d_lock);
858	}	878	}
859		879
860	list_del(&dentry->d_u.d_child);	880	list_del(&dentry->d_u.d_child);
861	detached++;	881	detached++;
862		882
863	inode = dentry->d_inode;	883	inode = dentry->d_inode;
864	if (inode) {	884	if (inode) {
865	dentry->d_inode = NULL;	885	dentry->d_inode = NULL;
866	list_del_init(&dentry->d_alias);	886	list_del_init(&dentry->d_alias);
867	if (dentry->d_op && dentry->d_op->d_iput)	887	if (dentry->d_op && dentry->d_op->d_iput)
868	dentry->d_op->d_iput(dentry, inode);	888	dentry->d_op->d_iput(dentry, inode);
869	else	889	else
870	iput(inode);	890	iput(inode);
871	}	891	}
872		892
873	d_free(dentry);	893	d_free(dentry);
874		894
875	/* finished when we fall off the top of the tree,	895	/* finished when we fall off the top of the tree,
876	* otherwise we ascend to the parent and move to the	896	* otherwise we ascend to the parent and move to the
877	* next sibling if there is one */	897	* next sibling if there is one */
878	if (!parent)	898	if (!parent)
879	return;	899	return;
880	dentry = parent;	900	dentry = parent;
881	} while (list_empty(&dentry->d_subdirs));	901	} while (list_empty(&dentry->d_subdirs));
882		902
883	dentry = list_entry(dentry->d_subdirs.next,	903	dentry = list_entry(dentry->d_subdirs.next,
884	struct dentry, d_u.d_child);	904	struct dentry, d_u.d_child);
885	}	905	}
886	}	906	}
887		907
888	/*	908	/*
889	* destroy the dentries attached to a superblock on unmounting	909	* destroy the dentries attached to a superblock on unmounting
890	* - we don't need to use dentry->d_lock, and only need dcache_lock when	910	* - we don't need to use dentry->d_lock, and only need dcache_lock when
891	* removing the dentry from the system lists and hashes because:	911	* removing the dentry from the system lists and hashes because:
892	* - the superblock is detached from all mountings and open files, so the	912	* - the superblock is detached from all mountings and open files, so the
893	* dentry trees will not be rearranged by the VFS	913	* dentry trees will not be rearranged by the VFS
894	* - s_umount is write-locked, so the memory pressure shrinker will ignore	914	* - s_umount is write-locked, so the memory pressure shrinker will ignore
895	* any dentries belonging to this superblock that it comes across	915	* any dentries belonging to this superblock that it comes across
896	* - the filesystem itself is no longer permitted to rearrange the dentries	916	* - the filesystem itself is no longer permitted to rearrange the dentries
897	* in this superblock	917	* in this superblock
898	*/	918	*/
899	void shrink_dcache_for_umount(struct super_block *sb)	919	void shrink_dcache_for_umount(struct super_block *sb)
900	{	920	{
901	struct dentry *dentry;	921	struct dentry *dentry;
902		922
903	if (down_read_trylock(&sb->s_umount))	923	if (down_read_trylock(&sb->s_umount))
904	BUG();	924	BUG();
905		925
906	dentry = sb->s_root;	926	dentry = sb->s_root;
907	sb->s_root = NULL;	927	sb->s_root = NULL;
908	spin_lock(&dentry->d_lock);	928	spin_lock(&dentry->d_lock);
909	dentry->d_count--;	929	dentry->d_count--;
910	spin_unlock(&dentry->d_lock);	930	spin_unlock(&dentry->d_lock);
911	shrink_dcache_for_umount_subtree(dentry);	931	shrink_dcache_for_umount_subtree(dentry);
912		932
913	while (!hlist_empty(&sb->s_anon)) {	933	while (!hlist_empty(&sb->s_anon)) {
914	dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);	934	dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
915	shrink_dcache_for_umount_subtree(dentry);	935	shrink_dcache_for_umount_subtree(dentry);
916	}	936	}
917	}	937	}
918		938
919	/*	939	/*
920	* Search for at least 1 mount point in the dentry's subdirs.	940	* Search for at least 1 mount point in the dentry's subdirs.
921	* We descend to the next level whenever the d_subdirs	941	* We descend to the next level whenever the d_subdirs
922	* list is non-empty and continue searching.	942	* list is non-empty and continue searching.
923	*/	943	*/
924		944
925	/**	945	/**
926	* have_submounts - check for mounts over a dentry	946	* have_submounts - check for mounts over a dentry
927	* @parent: dentry to check.	947	* @parent: dentry to check.
928	*	948	*
929	* Return true if the parent or its subdirectories contain	949	* Return true if the parent or its subdirectories contain
930	* a mount point	950	* a mount point
931	*/	951	*/
932		952
933	int have_submounts(struct dentry *parent)	953	int have_submounts(struct dentry *parent)
934	{	954	{
935	struct dentry *this_parent = parent;	955	struct dentry *this_parent = parent;
936	struct list_head *next;	956	struct list_head *next;
937		957
938	spin_lock(&dcache_lock);	958	spin_lock(&dcache_lock);
939	if (d_mountpoint(parent))	959	if (d_mountpoint(parent))
940	goto positive;	960	goto positive;
941	repeat:	961	repeat:
942	next = this_parent->d_subdirs.next;	962	next = this_parent->d_subdirs.next;
943	resume:	963	resume:
944	while (next != &this_parent->d_subdirs) {	964	while (next != &this_parent->d_subdirs) {
945	struct list_head *tmp = next;	965	struct list_head *tmp = next;
946	struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);	966	struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
947	next = tmp->next;	967	next = tmp->next;
948	/* Have we found a mount point ? */	968	/* Have we found a mount point ? */
949	if (d_mountpoint(dentry))	969	if (d_mountpoint(dentry))
950	goto positive;	970	goto positive;
951	if (!list_empty(&dentry->d_subdirs)) {	971	if (!list_empty(&dentry->d_subdirs)) {
952	this_parent = dentry;	972	this_parent = dentry;
953	goto repeat;	973	goto repeat;
954	}	974	}
955	}	975	}
956	/*	976	/*
957	* All done at this level ... ascend and resume the search.	977	* All done at this level ... ascend and resume the search.
958	*/	978	*/
959	if (this_parent != parent) {	979	if (this_parent != parent) {
960	next = this_parent->d_u.d_child.next;	980	next = this_parent->d_u.d_child.next;
961	this_parent = this_parent->d_parent;	981	this_parent = this_parent->d_parent;
962	goto resume;	982	goto resume;
963	}	983	}
964	spin_unlock(&dcache_lock);	984	spin_unlock(&dcache_lock);
965	return 0; /* No mount points found in tree */	985	return 0; /* No mount points found in tree */
966	positive:	986	positive:
967	spin_unlock(&dcache_lock);	987	spin_unlock(&dcache_lock);
968	return 1;	988	return 1;
969	}	989	}
970	EXPORT_SYMBOL(have_submounts);	990	EXPORT_SYMBOL(have_submounts);
971		991
972	/*	992	/*
973	* Search the dentry child list for the specified parent,	993	* Search the dentry child list for the specified parent,
974	* and move any unused dentries to the end of the unused	994	* and move any unused dentries to the end of the unused
975	* list for prune_dcache(). We descend to the next level	995	* list for prune_dcache(). We descend to the next level
976	* whenever the d_subdirs list is non-empty and continue	996	* whenever the d_subdirs list is non-empty and continue
977	* searching.	997	* searching.
978	*	998	*
979	* It returns zero iff there are no unused children,	999	* It returns zero iff there are no unused children,
980	* otherwise it returns the number of children moved to	1000	* otherwise it returns the number of children moved to
981	* the end of the unused list. This may not be the total	1001	* the end of the unused list. This may not be the total
982	* number of unused children, because select_parent can	1002	* number of unused children, because select_parent can
983	* drop the lock and return early due to latency	1003	* drop the lock and return early due to latency
984	* constraints.	1004	* constraints.
985	*/	1005	*/
986	static int select_parent(struct dentry * parent)	1006	static int select_parent(struct dentry * parent)
987	{	1007	{
988	struct dentry *this_parent = parent;	1008	struct dentry *this_parent = parent;
989	struct list_head *next;	1009	struct list_head *next;
990	int found = 0;	1010	int found = 0;
991		1011
992	spin_lock(&dcache_lock);	1012	spin_lock(&dcache_lock);
993	repeat:	1013	repeat:
994	next = this_parent->d_subdirs.next;	1014	next = this_parent->d_subdirs.next;
995	resume:	1015	resume:
996	while (next != &this_parent->d_subdirs) {	1016	while (next != &this_parent->d_subdirs) {
997	struct list_head *tmp = next;	1017	struct list_head *tmp = next;
998	struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);	1018	struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
999	next = tmp->next;	1019	next = tmp->next;
1000		1020
1001	spin_lock(&dentry->d_lock);	1021	spin_lock(&dentry->d_lock);
1002		1022
1003	/*	1023	/*
1004	* move only zero ref count dentries to the end	1024	* move only zero ref count dentries to the end
1005	* of the unused list for prune_dcache	1025	* of the unused list for prune_dcache
1006	*/	1026	*/
1007	if (!dentry->d_count) {	1027	if (!dentry->d_count) {
1008	dentry_lru_move_tail(dentry);	1028	dentry_lru_move_tail(dentry);
1009	found++;	1029	found++;
1010	} else {	1030	} else {
1011	dentry_lru_del(dentry);	1031	dentry_lru_del(dentry);
1012	}	1032	}
1013		1033
1014	spin_unlock(&dentry->d_lock);	1034	spin_unlock(&dentry->d_lock);
1015		1035
1016	/*	1036	/*
1017	* We can return to the caller if we have found some (this	1037	* We can return to the caller if we have found some (this
1018	* ensures forward progress). We'll be coming back to find	1038	* ensures forward progress). We'll be coming back to find
1019	* the rest.	1039	* the rest.
1020	*/	1040	*/
1021	if (found && need_resched())	1041	if (found && need_resched())
1022	goto out;	1042	goto out;
1023		1043
1024	/*	1044	/*
1025	* Descend a level if the d_subdirs list is non-empty.	1045	* Descend a level if the d_subdirs list is non-empty.
1026	*/	1046	*/
1027	if (!list_empty(&dentry->d_subdirs)) {	1047	if (!list_empty(&dentry->d_subdirs)) {
1028	this_parent = dentry;	1048	this_parent = dentry;
1029	goto repeat;	1049	goto repeat;
1030	}	1050	}
1031	}	1051	}
1032	/*	1052	/*
1033	* All done at this level ... ascend and resume the search.	1053	* All done at this level ... ascend and resume the search.
1034	*/	1054	*/
1035	if (this_parent != parent) {	1055	if (this_parent != parent) {
1036	next = this_parent->d_u.d_child.next;	1056	next = this_parent->d_u.d_child.next;
1037	this_parent = this_parent->d_parent;	1057	this_parent = this_parent->d_parent;
1038	goto resume;	1058	goto resume;
1039	}	1059	}
1040	out:	1060	out:
1041	spin_unlock(&dcache_lock);	1061	spin_unlock(&dcache_lock);
1042	return found;	1062	return found;
1043	}	1063	}
1044		1064
1045	/**	1065	/**
1046	* shrink_dcache_parent - prune dcache	1066	* shrink_dcache_parent - prune dcache
1047	* @parent: parent of entries to prune	1067	* @parent: parent of entries to prune
1048	*	1068	*
1049	* Prune the dcache to remove unused children of the parent dentry.	1069	* Prune the dcache to remove unused children of the parent dentry.
1050	*/	1070	*/
1051		1071
1052	void shrink_dcache_parent(struct dentry * parent)	1072	void shrink_dcache_parent(struct dentry * parent)
1053	{	1073	{
1054	struct super_block *sb = parent->d_sb;	1074	struct super_block *sb = parent->d_sb;
1055	int found;	1075	int found;
1056		1076
1057	while ((found = select_parent(parent)) != 0)	1077	while ((found = select_parent(parent)) != 0)
1058	__shrink_dcache_sb(sb, &found, 0);	1078	__shrink_dcache_sb(sb, &found, 0);
1059	}	1079	}
1060	EXPORT_SYMBOL(shrink_dcache_parent);	1080	EXPORT_SYMBOL(shrink_dcache_parent);
1061		1081
1062	/*	1082	/*
1063	* Scan `nr' dentries and return the number which remain.	1083	* Scan `nr' dentries and return the number which remain.
1064	*	1084	*
1065	* We need to avoid reentering the filesystem if the caller is performing a	1085	* We need to avoid reentering the filesystem if the caller is performing a
1066	* GFP_NOFS allocation attempt. One example deadlock is:	1086	* GFP_NOFS allocation attempt. One example deadlock is:
1067	*	1087	*
1068	* ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->	1088	* ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
1069	* prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->	1089	* prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
1070	* ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.	1090	* ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
1071	*	1091	*
1072	* In this case we return -1 to tell the caller that we baled.	1092	* In this case we return -1 to tell the caller that we baled.
1073	*/	1093	*/
1074	static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)	1094	static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
1075	{	1095	{
1076	if (nr) {	1096	if (nr) {
1077	if (!(gfp_mask & __GFP_FS))	1097	if (!(gfp_mask & __GFP_FS))
1078	return -1;	1098	return -1;
1079	prune_dcache(nr);	1099	prune_dcache(nr);
1080	}	1100	}
1081		1101
1082	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;	1102	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
1083	}	1103	}
1084		1104
1085	static struct shrinker dcache_shrinker = {	1105	static struct shrinker dcache_shrinker = {
1086	.shrink = shrink_dcache_memory,	1106	.shrink = shrink_dcache_memory,
1087	.seeks = DEFAULT_SEEKS,	1107	.seeks = DEFAULT_SEEKS,
1088	};	1108	};
1089		1109
1090	/**	1110	/**
1091	* d_alloc - allocate a dcache entry	1111	* d_alloc - allocate a dcache entry
1092	* @parent: parent of entry to allocate	1112	* @parent: parent of entry to allocate
1093	* @name: qstr of the name	1113	* @name: qstr of the name
1094	*	1114	*
1095	* Allocates a dentry. It returns %NULL if there is insufficient memory	1115	* Allocates a dentry. It returns %NULL if there is insufficient memory
1096	* available. On a success the dentry is returned. The name passed in is	1116	* available. On a success the dentry is returned. The name passed in is
1097	* copied and the copy passed in may be reused after this call.	1117	* copied and the copy passed in may be reused after this call.
1098	*/	1118	*/
1099		1119
1100	struct dentry d_alloc(struct dentry parent, const struct qstr *name)	1120	struct dentry d_alloc(struct dentry parent, const struct qstr *name)
1101	{	1121	{
1102	struct dentry *dentry;	1122	struct dentry *dentry;
1103	char *dname;	1123	char *dname;
1104		1124
1105	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);	1125	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1106	if (!dentry)	1126	if (!dentry)
1107	return NULL;	1127	return NULL;
1108		1128
1109	if (name->len > DNAME_INLINE_LEN-1) {	1129	if (name->len > DNAME_INLINE_LEN-1) {
1110	dname = kmalloc(name->len + 1, GFP_KERNEL);	1130	dname = kmalloc(name->len + 1, GFP_KERNEL);
1111	if (!dname) {	1131	if (!dname) {
1112	kmem_cache_free(dentry_cache, dentry);	1132	kmem_cache_free(dentry_cache, dentry);
1113	return NULL;	1133	return NULL;
1114	}	1134	}
1115	} else {	1135	} else {
1116	dname = dentry->d_iname;	1136	dname = dentry->d_iname;
1117	}	1137	}
1118	dentry->d_name.name = dname;	1138	dentry->d_name.name = dname;
1119		1139
1120	dentry->d_name.len = name->len;	1140	dentry->d_name.len = name->len;
1121	dentry->d_name.hash = name->hash;	1141	dentry->d_name.hash = name->hash;
1122	memcpy(dname, name->name, name->len);	1142	memcpy(dname, name->name, name->len);
1123	dname[name->len] = 0;	1143	dname[name->len] = 0;
1124		1144
1125	dentry->d_count = 1;	1145	dentry->d_count = 1;
1126	dentry->d_flags = DCACHE_UNHASHED;	1146	dentry->d_flags = DCACHE_UNHASHED;
1127	spin_lock_init(&dentry->d_lock);	1147	spin_lock_init(&dentry->d_lock);
1128	dentry->d_inode = NULL;	1148	dentry->d_inode = NULL;
1129	dentry->d_parent = NULL;	1149	dentry->d_parent = NULL;
1130	dentry->d_sb = NULL;	1150	dentry->d_sb = NULL;
1131	dentry->d_op = NULL;	1151	dentry->d_op = NULL;
1132	dentry->d_fsdata = NULL;	1152	dentry->d_fsdata = NULL;
1133	dentry->d_mounted = 0;	1153	dentry->d_mounted = 0;
1134	INIT_HLIST_NODE(&dentry->d_hash);	1154	INIT_HLIST_NODE(&dentry->d_hash);
1135	INIT_LIST_HEAD(&dentry->d_lru);	1155	INIT_LIST_HEAD(&dentry->d_lru);
1136	INIT_LIST_HEAD(&dentry->d_subdirs);	1156	INIT_LIST_HEAD(&dentry->d_subdirs);
1137	INIT_LIST_HEAD(&dentry->d_alias);	1157	INIT_LIST_HEAD(&dentry->d_alias);
1138		1158
1139	if (parent) {	1159	if (parent) {
1140	dentry->d_parent = dget(parent);	1160	dentry->d_parent = dget(parent);
1141	dentry->d_sb = parent->d_sb;	1161	dentry->d_sb = parent->d_sb;
1142	} else {	1162	} else {
1143	INIT_LIST_HEAD(&dentry->d_u.d_child);	1163	INIT_LIST_HEAD(&dentry->d_u.d_child);
1144	}	1164	}
1145		1165
1146	spin_lock(&dcache_lock);	1166	spin_lock(&dcache_lock);
1147	if (parent)	1167	if (parent)
1148	list_add(&dentry->d_u.d_child, &parent->d_subdirs);	1168	list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1149	spin_unlock(&dcache_lock);	1169	spin_unlock(&dcache_lock);
1150		1170
1151	this_cpu_inc(nr_dentry);	1171	this_cpu_inc(nr_dentry);
1152		1172
1153	return dentry;	1173	return dentry;
1154	}	1174	}
1155	EXPORT_SYMBOL(d_alloc);	1175	EXPORT_SYMBOL(d_alloc);
1156		1176
1157	struct dentry d_alloc_name(struct dentry parent, const char *name)	1177	struct dentry d_alloc_name(struct dentry parent, const char *name)
1158	{	1178	{
1159	struct qstr q;	1179	struct qstr q;
1160		1180
1161	q.name = name;	1181	q.name = name;
1162	q.len = strlen(name);	1182	q.len = strlen(name);
1163	q.hash = full_name_hash(q.name, q.len);	1183	q.hash = full_name_hash(q.name, q.len);
1164	return d_alloc(parent, &q);	1184	return d_alloc(parent, &q);
1165	}	1185	}
1166	EXPORT_SYMBOL(d_alloc_name);	1186	EXPORT_SYMBOL(d_alloc_name);
1167		1187
1168	/* the caller must hold dcache_lock */	1188	/* the caller must hold dcache_lock */
1169	static void __d_instantiate(struct dentry dentry, struct inode inode)	1189	static void __d_instantiate(struct dentry dentry, struct inode inode)
1170	{	1190	{
1171	if (inode)	1191	if (inode)
1172	list_add(&dentry->d_alias, &inode->i_dentry);	1192	list_add(&dentry->d_alias, &inode->i_dentry);
1173	dentry->d_inode = inode;	1193	dentry->d_inode = inode;
1174	fsnotify_d_instantiate(dentry, inode);	1194	fsnotify_d_instantiate(dentry, inode);
1175	}	1195	}
1176		1196
1177	/**	1197	/**
1178	* d_instantiate - fill in inode information for a dentry	1198	* d_instantiate - fill in inode information for a dentry
1179	* @entry: dentry to complete	1199	* @entry: dentry to complete
1180	* @inode: inode to attach to this dentry	1200	* @inode: inode to attach to this dentry
1181	*	1201	*
1182	* Fill in inode information in the entry.	1202	* Fill in inode information in the entry.
1183	*	1203	*
1184	* This turns negative dentries into productive full members	1204	* This turns negative dentries into productive full members
1185	* of society.	1205	* of society.
1186	*	1206	*
1187	* NOTE! This assumes that the inode count has been incremented	1207	* NOTE! This assumes that the inode count has been incremented
1188	* (or otherwise set) by the caller to indicate that it is now	1208	* (or otherwise set) by the caller to indicate that it is now
1189	* in use by the dcache.	1209	* in use by the dcache.
1190	*/	1210	*/
1191		1211
1192	void d_instantiate(struct dentry entry, struct inode inode)	1212	void d_instantiate(struct dentry entry, struct inode inode)
1193	{	1213	{
1194	BUG_ON(!list_empty(&entry->d_alias));	1214	BUG_ON(!list_empty(&entry->d_alias));
1195	spin_lock(&dcache_lock);	1215	spin_lock(&dcache_lock);
1196	__d_instantiate(entry, inode);	1216	__d_instantiate(entry, inode);
1197	spin_unlock(&dcache_lock);	1217	spin_unlock(&dcache_lock);
1198	security_d_instantiate(entry, inode);	1218	security_d_instantiate(entry, inode);
1199	}	1219	}
1200	EXPORT_SYMBOL(d_instantiate);	1220	EXPORT_SYMBOL(d_instantiate);
1201		1221
1202	/**	1222	/**
1203	* d_instantiate_unique - instantiate a non-aliased dentry	1223	* d_instantiate_unique - instantiate a non-aliased dentry
1204	* @entry: dentry to instantiate	1224	* @entry: dentry to instantiate
1205	* @inode: inode to attach to this dentry	1225	* @inode: inode to attach to this dentry
1206	*	1226	*
1207	* Fill in inode information in the entry. On success, it returns NULL.	1227	* Fill in inode information in the entry. On success, it returns NULL.
1208	* If an unhashed alias of "entry" already exists, then we return the	1228	* If an unhashed alias of "entry" already exists, then we return the
1209	* aliased dentry instead and drop one reference to inode.	1229	* aliased dentry instead and drop one reference to inode.
1210	*	1230	*
1211	* Note that in order to avoid conflicts with rename() etc, the caller	1231	* Note that in order to avoid conflicts with rename() etc, the caller
1212	* had better be holding the parent directory semaphore.	1232	* had better be holding the parent directory semaphore.
1213	*	1233	*
1214	* This also assumes that the inode count has been incremented	1234	* This also assumes that the inode count has been incremented
1215	* (or otherwise set) by the caller to indicate that it is now	1235	* (or otherwise set) by the caller to indicate that it is now
1216	* in use by the dcache.	1236	* in use by the dcache.
1217	*/	1237	*/
1218	static struct dentry __d_instantiate_unique(struct dentry entry,	1238	static struct dentry __d_instantiate_unique(struct dentry entry,
1219	struct inode *inode)	1239	struct inode *inode)
1220	{	1240	{
1221	struct dentry *alias;	1241	struct dentry *alias;
1222	int len = entry->d_name.len;	1242	int len = entry->d_name.len;
1223	const char *name = entry->d_name.name;	1243	const char *name = entry->d_name.name;
1224	unsigned int hash = entry->d_name.hash;	1244	unsigned int hash = entry->d_name.hash;
1225		1245
1226	if (!inode) {	1246	if (!inode) {
1227	__d_instantiate(entry, NULL);	1247	__d_instantiate(entry, NULL);
1228	return NULL;	1248	return NULL;
1229	}	1249	}
1230		1250
1231	list_for_each_entry(alias, &inode->i_dentry, d_alias) {	1251	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1232	struct qstr *qstr = &alias->d_name;	1252	struct qstr *qstr = &alias->d_name;
1233		1253
1234	if (qstr->hash != hash)	1254	if (qstr->hash != hash)
1235	continue;	1255	continue;
1236	if (alias->d_parent != entry->d_parent)	1256	if (alias->d_parent != entry->d_parent)
1237	continue;	1257	continue;
1238	if (qstr->len != len)	1258	if (qstr->len != len)
1239	continue;	1259	continue;
1240	if (memcmp(qstr->name, name, len))	1260	if (memcmp(qstr->name, name, len))
1241	continue;	1261	continue;
1242	dget_locked(alias);	1262	dget_locked(alias);
1243	return alias;	1263	return alias;
1244	}	1264	}
1245		1265
1246	__d_instantiate(entry, inode);	1266	__d_instantiate(entry, inode);
1247	return NULL;	1267	return NULL;
1248	}	1268	}
1249		1269
1250	struct dentry d_instantiate_unique(struct dentry entry, struct inode *inode)	1270	struct dentry d_instantiate_unique(struct dentry entry, struct inode *inode)
1251	{	1271	{
1252	struct dentry *result;	1272	struct dentry *result;
1253		1273
1254	BUG_ON(!list_empty(&entry->d_alias));	1274	BUG_ON(!list_empty(&entry->d_alias));
1255		1275
1256	spin_lock(&dcache_lock);	1276	spin_lock(&dcache_lock);
1257	result = __d_instantiate_unique(entry, inode);	1277	result = __d_instantiate_unique(entry, inode);
1258	spin_unlock(&dcache_lock);	1278	spin_unlock(&dcache_lock);
1259		1279
1260	if (!result) {	1280	if (!result) {
1261	security_d_instantiate(entry, inode);	1281	security_d_instantiate(entry, inode);
1262	return NULL;	1282	return NULL;
1263	}	1283	}
1264		1284
1265	BUG_ON(!d_unhashed(result));	1285	BUG_ON(!d_unhashed(result));
1266	iput(inode);	1286	iput(inode);
1267	return result;	1287	return result;
1268	}	1288	}
1269		1289
1270	EXPORT_SYMBOL(d_instantiate_unique);	1290	EXPORT_SYMBOL(d_instantiate_unique);
1271		1291
1272	/**	1292	/**
1273	* d_alloc_root - allocate root dentry	1293	* d_alloc_root - allocate root dentry
1274	* @root_inode: inode to allocate the root for	1294	* @root_inode: inode to allocate the root for
1275	*	1295	*
1276	* Allocate a root ("/") dentry for the inode given. The inode is	1296	* Allocate a root ("/") dentry for the inode given. The inode is
1277	* instantiated and returned. %NULL is returned if there is insufficient	1297	* instantiated and returned. %NULL is returned if there is insufficient
1278	* memory or the inode passed is %NULL.	1298	* memory or the inode passed is %NULL.
1279	*/	1299	*/
1280		1300
1281	struct dentry * d_alloc_root(struct inode * root_inode)	1301	struct dentry * d_alloc_root(struct inode * root_inode)
1282	{	1302	{
1283	struct dentry *res = NULL;	1303	struct dentry *res = NULL;
1284		1304
1285	if (root_inode) {	1305	if (root_inode) {
1286	static const struct qstr name = { .name = "/", .len = 1 };	1306	static const struct qstr name = { .name = "/", .len = 1 };
1287		1307
1288	res = d_alloc(NULL, &name);	1308	res = d_alloc(NULL, &name);
1289	if (res) {	1309	if (res) {
1290	res->d_sb = root_inode->i_sb;	1310	res->d_sb = root_inode->i_sb;
1291	res->d_parent = res;	1311	res->d_parent = res;
1292	d_instantiate(res, root_inode);	1312	d_instantiate(res, root_inode);
1293	}	1313	}
1294	}	1314	}
1295	return res;	1315	return res;
1296	}	1316	}
1297	EXPORT_SYMBOL(d_alloc_root);	1317	EXPORT_SYMBOL(d_alloc_root);
1298		1318
1299	static inline struct hlist_head d_hash(struct dentry parent,	1319	static inline struct hlist_head d_hash(struct dentry parent,
1300	unsigned long hash)	1320	unsigned long hash)
1301	{	1321	{
1302	hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;	1322	hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
1303	hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);	1323	hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
1304	return dentry_hashtable + (hash & D_HASHMASK);	1324	return dentry_hashtable + (hash & D_HASHMASK);
1305	}	1325	}
1306		1326
1307	/**	1327	/**
1308	* d_obtain_alias - find or allocate a dentry for a given inode	1328	* d_obtain_alias - find or allocate a dentry for a given inode
1309	* @inode: inode to allocate the dentry for	1329	* @inode: inode to allocate the dentry for
1310	*	1330	*
1311	* Obtain a dentry for an inode resulting from NFS filehandle conversion or	1331	* Obtain a dentry for an inode resulting from NFS filehandle conversion or
1312	* similar open by handle operations. The returned dentry may be anonymous,	1332	* similar open by handle operations. The returned dentry may be anonymous,
1313	* or may have a full name (if the inode was already in the cache).	1333	* or may have a full name (if the inode was already in the cache).
1314	*	1334	*
1315	* When called on a directory inode, we must ensure that the inode only ever	1335	* When called on a directory inode, we must ensure that the inode only ever
1316	* has one dentry. If a dentry is found, that is returned instead of	1336	* has one dentry. If a dentry is found, that is returned instead of
1317	* allocating a new one.	1337	* allocating a new one.
1318	*	1338	*
1319	* On successful return, the reference to the inode has been transferred	1339	* On successful return, the reference to the inode has been transferred
1320	* to the dentry. In case of an error the reference on the inode is released.	1340	* to the dentry. In case of an error the reference on the inode is released.
1321	* To make it easier to use in export operations a %NULL or IS_ERR inode may	1341	* To make it easier to use in export operations a %NULL or IS_ERR inode may
1322	* be passed in and will be the error will be propagate to the return value,	1342	* be passed in and will be the error will be propagate to the return value,
1323	* with a %NULL @inode replaced by ERR_PTR(-ESTALE).	1343	* with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1324	*/	1344	*/
1325	struct dentry d_obtain_alias(struct inode inode)	1345	struct dentry d_obtain_alias(struct inode inode)
1326	{	1346	{
1327	static const struct qstr anonstring = { .name = "" };	1347	static const struct qstr anonstring = { .name = "" };
1328	struct dentry *tmp;	1348	struct dentry *tmp;
1329	struct dentry *res;	1349	struct dentry *res;
1330		1350
1331	if (!inode)	1351	if (!inode)
1332	return ERR_PTR(-ESTALE);	1352	return ERR_PTR(-ESTALE);
1333	if (IS_ERR(inode))	1353	if (IS_ERR(inode))
1334	return ERR_CAST(inode);	1354	return ERR_CAST(inode);
1335		1355
1336	res = d_find_alias(inode);	1356	res = d_find_alias(inode);
1337	if (res)	1357	if (res)
1338	goto out_iput;	1358	goto out_iput;
1339		1359
1340	tmp = d_alloc(NULL, &anonstring);	1360	tmp = d_alloc(NULL, &anonstring);
1341	if (!tmp) {	1361	if (!tmp) {
1342	res = ERR_PTR(-ENOMEM);	1362	res = ERR_PTR(-ENOMEM);
1343	goto out_iput;	1363	goto out_iput;
1344	}	1364	}
1345	tmp->d_parent = tmp; /* make sure dput doesn't croak */	1365	tmp->d_parent = tmp; /* make sure dput doesn't croak */
1346		1366
1347	spin_lock(&dcache_lock);	1367	spin_lock(&dcache_lock);
1348	res = __d_find_alias(inode, 0);	1368	res = __d_find_alias(inode, 0);
1349	if (res) {	1369	if (res) {
1350	spin_unlock(&dcache_lock);	1370	spin_unlock(&dcache_lock);
1351	dput(tmp);	1371	dput(tmp);
1352	goto out_iput;	1372	goto out_iput;
1353	}	1373	}
1354		1374
1355	/* attach a disconnected dentry */	1375	/* attach a disconnected dentry */
1356	spin_lock(&tmp->d_lock);	1376	spin_lock(&tmp->d_lock);
1357	tmp->d_sb = inode->i_sb;	1377	tmp->d_sb = inode->i_sb;
1358	tmp->d_inode = inode;	1378	tmp->d_inode = inode;
1359	tmp->d_flags \|= DCACHE_DISCONNECTED;	1379	tmp->d_flags \|= DCACHE_DISCONNECTED;
1360	tmp->d_flags &= ~DCACHE_UNHASHED;	1380	tmp->d_flags &= ~DCACHE_UNHASHED;
1361	list_add(&tmp->d_alias, &inode->i_dentry);	1381	list_add(&tmp->d_alias, &inode->i_dentry);
1362	spin_lock(&dcache_hash_lock);	1382	spin_lock(&dcache_hash_lock);
1363	hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);	1383	hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
1364	spin_unlock(&dcache_hash_lock);	1384	spin_unlock(&dcache_hash_lock);
1365	spin_unlock(&tmp->d_lock);	1385	spin_unlock(&tmp->d_lock);
1366		1386
1367	spin_unlock(&dcache_lock);	1387	spin_unlock(&dcache_lock);
1368	return tmp;	1388	return tmp;
1369		1389
1370	out_iput:	1390	out_iput:
1371	iput(inode);	1391	iput(inode);
1372	return res;	1392	return res;
1373	}	1393	}
1374	EXPORT_SYMBOL(d_obtain_alias);	1394	EXPORT_SYMBOL(d_obtain_alias);
1375		1395
1376	/**	1396	/**
1377	* d_splice_alias - splice a disconnected dentry into the tree if one exists	1397	* d_splice_alias - splice a disconnected dentry into the tree if one exists
1378	* @inode: the inode which may have a disconnected dentry	1398	* @inode: the inode which may have a disconnected dentry
1379	* @dentry: a negative dentry which we want to point to the inode.	1399	* @dentry: a negative dentry which we want to point to the inode.
1380	*	1400	*
1381	* If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and	1401	* If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1382	* DCACHE_DISCONNECTED), then d_move that in place of the given dentry	1402	* DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1383	* and return it, else simply d_add the inode to the dentry and return NULL.	1403	* and return it, else simply d_add the inode to the dentry and return NULL.
1384	*	1404	*
1385	* This is needed in the lookup routine of any filesystem that is exportable	1405	* This is needed in the lookup routine of any filesystem that is exportable
1386	* (via knfsd) so that we can build dcache paths to directories effectively.	1406	* (via knfsd) so that we can build dcache paths to directories effectively.
1387	*	1407	*
1388	* If a dentry was found and moved, then it is returned. Otherwise NULL	1408	* If a dentry was found and moved, then it is returned. Otherwise NULL
1389	* is returned. This matches the expected return value of ->lookup.	1409	* is returned. This matches the expected return value of ->lookup.
1390	*	1410	*
1391	*/	1411	*/
1392	struct dentry d_splice_alias(struct inode inode, struct dentry *dentry)	1412	struct dentry d_splice_alias(struct inode inode, struct dentry *dentry)
1393	{	1413	{
1394	struct dentry *new = NULL;	1414	struct dentry *new = NULL;
1395		1415
1396	if (inode && S_ISDIR(inode->i_mode)) {	1416	if (inode && S_ISDIR(inode->i_mode)) {
1397	spin_lock(&dcache_lock);	1417	spin_lock(&dcache_lock);
1398	new = __d_find_alias(inode, 1);	1418	new = __d_find_alias(inode, 1);
1399	if (new) {	1419	if (new) {
1400	BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));	1420	BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1401	spin_unlock(&dcache_lock);	1421	spin_unlock(&dcache_lock);
1402	security_d_instantiate(new, inode);	1422	security_d_instantiate(new, inode);
1403	d_move(new, dentry);	1423	d_move(new, dentry);
1404	iput(inode);	1424	iput(inode);
1405	} else {	1425	} else {
1406	/* already taking dcache_lock, so d_add() by hand */	1426	/* already taking dcache_lock, so d_add() by hand */
1407	__d_instantiate(dentry, inode);	1427	__d_instantiate(dentry, inode);
1408	spin_unlock(&dcache_lock);	1428	spin_unlock(&dcache_lock);
1409	security_d_instantiate(dentry, inode);	1429	security_d_instantiate(dentry, inode);
1410	d_rehash(dentry);	1430	d_rehash(dentry);
1411	}	1431	}
1412	} else	1432	} else
1413	d_add(dentry, inode);	1433	d_add(dentry, inode);
1414	return new;	1434	return new;
1415	}	1435	}
1416	EXPORT_SYMBOL(d_splice_alias);	1436	EXPORT_SYMBOL(d_splice_alias);
1417		1437
1418	/**	1438	/**
1419	* d_add_ci - lookup or allocate new dentry with case-exact name	1439	* d_add_ci - lookup or allocate new dentry with case-exact name
1420	* @inode: the inode case-insensitive lookup has found	1440	* @inode: the inode case-insensitive lookup has found
1421	* @dentry: the negative dentry that was passed to the parent's lookup func	1441	* @dentry: the negative dentry that was passed to the parent's lookup func
1422	* @name: the case-exact name to be associated with the returned dentry	1442	* @name: the case-exact name to be associated with the returned dentry
1423	*	1443	*
1424	* This is to avoid filling the dcache with case-insensitive names to the	1444	* This is to avoid filling the dcache with case-insensitive names to the
1425	* same inode, only the actual correct case is stored in the dcache for	1445	* same inode, only the actual correct case is stored in the dcache for
1426	* case-insensitive filesystems.	1446	* case-insensitive filesystems.
1427	*	1447	*
1428	* For a case-insensitive lookup match and if the the case-exact dentry	1448	* For a case-insensitive lookup match and if the the case-exact dentry
1429	* already exists in in the dcache, use it and return it.	1449	* already exists in in the dcache, use it and return it.
1430	*	1450	*
1431	* If no entry exists with the exact case name, allocate new dentry with	1451	* If no entry exists with the exact case name, allocate new dentry with
1432	* the exact case, and return the spliced entry.	1452	* the exact case, and return the spliced entry.
1433	*/	1453	*/
1434	struct dentry d_add_ci(struct dentry dentry, struct inode *inode,	1454	struct dentry d_add_ci(struct dentry dentry, struct inode *inode,
1435	struct qstr *name)	1455	struct qstr *name)
1436	{	1456	{
1437	int error;	1457	int error;
1438	struct dentry *found;	1458	struct dentry *found;
1439	struct dentry *new;	1459	struct dentry *new;
1440		1460
1441	/*	1461	/*
1442	* First check if a dentry matching the name already exists,	1462	* First check if a dentry matching the name already exists,
1443	* if not go ahead and create it now.	1463	* if not go ahead and create it now.
1444	*/	1464	*/
1445	found = d_hash_and_lookup(dentry->d_parent, name);	1465	found = d_hash_and_lookup(dentry->d_parent, name);
1446	if (!found) {	1466	if (!found) {
1447	new = d_alloc(dentry->d_parent, name);	1467	new = d_alloc(dentry->d_parent, name);
1448	if (!new) {	1468	if (!new) {
1449	error = -ENOMEM;	1469	error = -ENOMEM;
1450	goto err_out;	1470	goto err_out;
1451	}	1471	}
1452		1472
1453	found = d_splice_alias(inode, new);	1473	found = d_splice_alias(inode, new);
1454	if (found) {	1474	if (found) {
1455	dput(new);	1475	dput(new);
1456	return found;	1476	return found;
1457	}	1477	}
1458	return new;	1478	return new;
1459	}	1479	}
1460		1480
1461	/*	1481	/*
1462	* If a matching dentry exists, and it's not negative use it.	1482	* If a matching dentry exists, and it's not negative use it.
1463	*	1483	*
1464	* Decrement the reference count to balance the iget() done	1484	* Decrement the reference count to balance the iget() done
1465	* earlier on.	1485	* earlier on.
1466	*/	1486	*/
1467	if (found->d_inode) {	1487	if (found->d_inode) {
1468	if (unlikely(found->d_inode != inode)) {	1488	if (unlikely(found->d_inode != inode)) {
1469	/* This can't happen because bad inodes are unhashed. */	1489	/* This can't happen because bad inodes are unhashed. */
1470	BUG_ON(!is_bad_inode(inode));	1490	BUG_ON(!is_bad_inode(inode));
1471	BUG_ON(!is_bad_inode(found->d_inode));	1491	BUG_ON(!is_bad_inode(found->d_inode));
1472	}	1492	}
1473	iput(inode);	1493	iput(inode);
1474	return found;	1494	return found;
1475	}	1495	}
1476		1496
1477	/*	1497	/*
1478	* Negative dentry: instantiate it unless the inode is a directory and	1498	* Negative dentry: instantiate it unless the inode is a directory and
1479	* already has a dentry.	1499	* already has a dentry.
1480	*/	1500	*/
1481	spin_lock(&dcache_lock);	1501	spin_lock(&dcache_lock);
1482	if (!S_ISDIR(inode->i_mode) \|\| list_empty(&inode->i_dentry)) {	1502	if (!S_ISDIR(inode->i_mode) \|\| list_empty(&inode->i_dentry)) {
1483	__d_instantiate(found, inode);	1503	__d_instantiate(found, inode);
1484	spin_unlock(&dcache_lock);	1504	spin_unlock(&dcache_lock);
1485	security_d_instantiate(found, inode);	1505	security_d_instantiate(found, inode);
1486	return found;	1506	return found;
1487	}	1507	}
1488		1508
1489	/*	1509	/*
1490	* In case a directory already has a (disconnected) entry grab a	1510	* In case a directory already has a (disconnected) entry grab a
1491	* reference to it, move it in place and use it.	1511	* reference to it, move it in place and use it.
1492	*/	1512	*/
1493	new = list_entry(inode->i_dentry.next, struct dentry, d_alias);	1513	new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1494	dget_locked(new);	1514	dget_locked(new);
1495	spin_unlock(&dcache_lock);	1515	spin_unlock(&dcache_lock);
1496	security_d_instantiate(found, inode);	1516	security_d_instantiate(found, inode);
1497	d_move(new, found);	1517	d_move(new, found);
1498	iput(inode);	1518	iput(inode);
1499	dput(found);	1519	dput(found);
1500	return new;	1520	return new;
1501		1521
1502	err_out:	1522	err_out:
1503	iput(inode);	1523	iput(inode);
1504	return ERR_PTR(error);	1524	return ERR_PTR(error);
1505	}	1525	}
1506	EXPORT_SYMBOL(d_add_ci);	1526	EXPORT_SYMBOL(d_add_ci);
1507		1527
1508	/**	1528	/**
1509	* d_lookup - search for a dentry	1529	* d_lookup - search for a dentry
1510	* @parent: parent dentry	1530	* @parent: parent dentry
1511	* @name: qstr of name we wish to find	1531	* @name: qstr of name we wish to find
1512	* Returns: dentry, or NULL	1532	* Returns: dentry, or NULL
1513	*	1533	*
1514	* d_lookup searches the children of the parent dentry for the name in	1534	* d_lookup searches the children of the parent dentry for the name in
1515	* question. If the dentry is found its reference count is incremented and the	1535	* question. If the dentry is found its reference count is incremented and the
1516	* dentry is returned. The caller must use dput to free the entry when it has	1536	* dentry is returned. The caller must use dput to free the entry when it has
1517	* finished using it. %NULL is returned if the dentry does not exist.	1537	* finished using it. %NULL is returned if the dentry does not exist.
1518	*/	1538	*/
1519	struct dentry * d_lookup(struct dentry * parent, struct qstr * name)	1539	struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1520	{	1540	{
1521	struct dentry * dentry = NULL;	1541	struct dentry * dentry = NULL;
1522	unsigned long seq;	1542	unsigned long seq;
1523		1543
1524	do {	1544	do {
1525	seq = read_seqbegin(&rename_lock);	1545	seq = read_seqbegin(&rename_lock);
1526	dentry = __d_lookup(parent, name);	1546	dentry = __d_lookup(parent, name);
1527	if (dentry)	1547	if (dentry)
1528	break;	1548	break;
1529	} while (read_seqretry(&rename_lock, seq));	1549	} while (read_seqretry(&rename_lock, seq));
1530	return dentry;	1550	return dentry;
1531	}	1551	}
1532	EXPORT_SYMBOL(d_lookup);	1552	EXPORT_SYMBOL(d_lookup);
1533		1553
1534	/*	1554	/*
1535	* __d_lookup - search for a dentry (racy)	1555	* __d_lookup - search for a dentry (racy)
1536	* @parent: parent dentry	1556	* @parent: parent dentry
1537	* @name: qstr of name we wish to find	1557	* @name: qstr of name we wish to find
1538	* Returns: dentry, or NULL	1558	* Returns: dentry, or NULL
1539	*	1559	*
1540	* __d_lookup is like d_lookup, however it may (rarely) return a	1560	* __d_lookup is like d_lookup, however it may (rarely) return a
1541	* false-negative result due to unrelated rename activity.	1561	* false-negative result due to unrelated rename activity.
1542	*	1562	*
1543	* __d_lookup is slightly faster by avoiding rename_lock read seqlock,	1563	* __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1544	* however it must be used carefully, eg. with a following d_lookup in	1564	* however it must be used carefully, eg. with a following d_lookup in
1545	* the case of failure.	1565	* the case of failure.
1546	*	1566	*
1547	* __d_lookup callers must be commented.	1567	* __d_lookup callers must be commented.
1548	*/	1568	*/
1549	struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)	1569	struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1550	{	1570	{
1551	unsigned int len = name->len;	1571	unsigned int len = name->len;
1552	unsigned int hash = name->hash;	1572	unsigned int hash = name->hash;
1553	const unsigned char *str = name->name;	1573	const unsigned char *str = name->name;
1554	struct hlist_head *head = d_hash(parent,hash);	1574	struct hlist_head *head = d_hash(parent,hash);
1555	struct dentry *found = NULL;	1575	struct dentry *found = NULL;
1556	struct hlist_node *node;	1576	struct hlist_node *node;
1557	struct dentry *dentry;	1577	struct dentry *dentry;
1558		1578
1559	/*	1579	/*
1560	* The hash list is protected using RCU.	1580	* The hash list is protected using RCU.
1561	*	1581	*
1562	* Take d_lock when comparing a candidate dentry, to avoid races	1582	* Take d_lock when comparing a candidate dentry, to avoid races
1563	* with d_move().	1583	* with d_move().
1564	*	1584	*
1565	* It is possible that concurrent renames can mess up our list	1585	* It is possible that concurrent renames can mess up our list
1566	* walk here and result in missing our dentry, resulting in the	1586	* walk here and result in missing our dentry, resulting in the
1567	* false-negative result. d_lookup() protects against concurrent	1587	* false-negative result. d_lookup() protects against concurrent
1568	* renames using rename_lock seqlock.	1588	* renames using rename_lock seqlock.
1569	*	1589	*
1570	* See Documentation/vfs/dcache-locking.txt for more details.	1590	* See Documentation/vfs/dcache-locking.txt for more details.
1571	*/	1591	*/
1572	rcu_read_lock();	1592	rcu_read_lock();
1573		1593
1574	hlist_for_each_entry_rcu(dentry, node, head, d_hash) {	1594	hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
1575	struct qstr *qstr;	1595	struct qstr *qstr;
1576		1596
1577	if (dentry->d_name.hash != hash)	1597	if (dentry->d_name.hash != hash)
1578	continue;	1598	continue;
1579	if (dentry->d_parent != parent)	1599	if (dentry->d_parent != parent)
1580	continue;	1600	continue;
1581		1601
1582	spin_lock(&dentry->d_lock);	1602	spin_lock(&dentry->d_lock);
1583		1603
1584	/*	1604	/*
1585	* Recheck the dentry after taking the lock - d_move may have	1605	* Recheck the dentry after taking the lock - d_move may have
1586	* changed things. Don't bother checking the hash because	1606	* changed things. Don't bother checking the hash because
1587	* we're about to compare the whole name anyway.	1607	* we're about to compare the whole name anyway.
1588	*/	1608	*/
1589	if (dentry->d_parent != parent)	1609	if (dentry->d_parent != parent)
1590	goto next;	1610	goto next;
1591		1611
1592	/* non-existing due to RCU? */	1612	/* non-existing due to RCU? */
1593	if (d_unhashed(dentry))	1613	if (d_unhashed(dentry))
1594	goto next;	1614	goto next;
1595		1615
1596	/*	1616	/*
1597	* It is safe to compare names since d_move() cannot	1617	* It is safe to compare names since d_move() cannot
1598	* change the qstr (protected by d_lock).	1618	* change the qstr (protected by d_lock).
1599	*/	1619	*/
1600	qstr = &dentry->d_name;	1620	qstr = &dentry->d_name;
1601	if (parent->d_op && parent->d_op->d_compare) {	1621	if (parent->d_op && parent->d_op->d_compare) {
1602	if (parent->d_op->d_compare(parent, parent->d_inode,	1622	if (parent->d_op->d_compare(parent, parent->d_inode,
1603	dentry, dentry->d_inode,	1623	dentry, dentry->d_inode,
1604	qstr->len, qstr->name, name))	1624	qstr->len, qstr->name, name))
1605	goto next;	1625	goto next;
1606	} else {	1626	} else {
1607	if (qstr->len != len)	1627	if (qstr->len != len)
1608	goto next;	1628	goto next;
1609	if (memcmp(qstr->name, str, len))	1629	if (memcmp(qstr->name, str, len))
1610	goto next;	1630	goto next;
1611	}	1631	}
1612		1632
1613	dentry->d_count++;	1633	dentry->d_count++;
1614	found = dentry;	1634	found = dentry;
1615	spin_unlock(&dentry->d_lock);	1635	spin_unlock(&dentry->d_lock);
1616	break;	1636	break;
1617	next:	1637	next:
1618	spin_unlock(&dentry->d_lock);	1638	spin_unlock(&dentry->d_lock);
1619	}	1639	}
1620	rcu_read_unlock();	1640	rcu_read_unlock();
1621		1641
1622	return found;	1642	return found;
1623	}	1643	}
1624		1644
1625	/**	1645	/**
1626	* d_hash_and_lookup - hash the qstr then search for a dentry	1646	* d_hash_and_lookup - hash the qstr then search for a dentry
1627	* @dir: Directory to search in	1647	* @dir: Directory to search in
1628	* @name: qstr of name we wish to find	1648	* @name: qstr of name we wish to find
1629	*	1649	*
1630	* On hash failure or on lookup failure NULL is returned.	1650	* On hash failure or on lookup failure NULL is returned.
1631	*/	1651	*/
1632	struct dentry d_hash_and_lookup(struct dentry dir, struct qstr *name)	1652	struct dentry d_hash_and_lookup(struct dentry dir, struct qstr *name)
1633	{	1653	{
1634	struct dentry *dentry = NULL;	1654	struct dentry *dentry = NULL;
1635		1655
1636	/*	1656	/*
1637	* Check for a fs-specific hash function. Note that we must	1657	* Check for a fs-specific hash function. Note that we must
1638	* calculate the standard hash first, as the d_op->d_hash()	1658	* calculate the standard hash first, as the d_op->d_hash()
1639	* routine may choose to leave the hash value unchanged.	1659	* routine may choose to leave the hash value unchanged.
1640	*/	1660	*/
1641	name->hash = full_name_hash(name->name, name->len);	1661	name->hash = full_name_hash(name->name, name->len);
1642	if (dir->d_op && dir->d_op->d_hash) {	1662	if (dir->d_op && dir->d_op->d_hash) {
1643	if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)	1663	if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
1644	goto out;	1664	goto out;
1645	}	1665	}
1646	dentry = d_lookup(dir, name);	1666	dentry = d_lookup(dir, name);
1647	out:	1667	out:
1648	return dentry;	1668	return dentry;
1649	}	1669	}
1650		1670
1651	/**	1671	/**
1652	* d_validate - verify dentry provided from insecure source (deprecated)	1672	* d_validate - verify dentry provided from insecure source (deprecated)
1653	* @dentry: The dentry alleged to be valid child of @dparent	1673	* @dentry: The dentry alleged to be valid child of @dparent
1654	* @dparent: The parent dentry (known to be valid)	1674	* @dparent: The parent dentry (known to be valid)
1655	*	1675	*
1656	* An insecure source has sent us a dentry, here we verify it and dget() it.	1676	* An insecure source has sent us a dentry, here we verify it and dget() it.
1657	* This is used by ncpfs in its readdir implementation.	1677	* This is used by ncpfs in its readdir implementation.
1658	* Zero is returned in the dentry is invalid.	1678	* Zero is returned in the dentry is invalid.
1659	*	1679	*
1660	* This function is slow for big directories, and deprecated, do not use it.	1680	* This function is slow for big directories, and deprecated, do not use it.
1661	*/	1681	*/
1662	int d_validate(struct dentry dentry, struct dentry dparent)	1682	int d_validate(struct dentry dentry, struct dentry dparent)
1663	{	1683	{
1664	struct dentry *child;	1684	struct dentry *child;
1665		1685
1666	spin_lock(&dcache_lock);	1686	spin_lock(&dcache_lock);
1667	list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {	1687	list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
1668	if (dentry == child) {	1688	if (dentry == child) {
1669	__dget_locked(dentry);	1689	__dget_locked(dentry);
1670	spin_unlock(&dcache_lock);	1690	spin_unlock(&dcache_lock);
1671	return 1;	1691	return 1;
1672	}	1692	}
1673	}	1693	}
1674	spin_unlock(&dcache_lock);	1694	spin_unlock(&dcache_lock);
1675		1695
1676	return 0;	1696	return 0;
1677	}	1697	}
1678	EXPORT_SYMBOL(d_validate);	1698	EXPORT_SYMBOL(d_validate);
1679		1699
1680	/*	1700	/*
1681	* When a file is deleted, we have two options:	1701	* When a file is deleted, we have two options:
1682	* - turn this dentry into a negative dentry	1702	* - turn this dentry into a negative dentry
1683	* - unhash this dentry and free it.	1703	* - unhash this dentry and free it.
1684	*	1704	*
1685	* Usually, we want to just turn this into	1705	* Usually, we want to just turn this into
1686	* a negative dentry, but if anybody else is	1706	* a negative dentry, but if anybody else is
1687	* currently using the dentry or the inode	1707	* currently using the dentry or the inode
1688	* we can't do that and we fall back on removing	1708	* we can't do that and we fall back on removing
1689	* it from the hash queues and waiting for	1709	* it from the hash queues and waiting for
1690	* it to be deleted later when it has no users	1710	* it to be deleted later when it has no users
1691	*/	1711	*/
1692		1712
1693	/**	1713	/**
1694	* d_delete - delete a dentry	1714	* d_delete - delete a dentry
1695	* @dentry: The dentry to delete	1715	* @dentry: The dentry to delete
1696	*	1716	*
1697	* Turn the dentry into a negative dentry if possible, otherwise	1717	* Turn the dentry into a negative dentry if possible, otherwise
1698	* remove it from the hash queues so it can be deleted later	1718	* remove it from the hash queues so it can be deleted later
1699	*/	1719	*/
1700		1720
1701	void d_delete(struct dentry * dentry)	1721	void d_delete(struct dentry * dentry)
1702	{	1722	{
1703	int isdir = 0;	1723	int isdir = 0;
1704	/*	1724	/*
1705	* Are we the only user?	1725	* Are we the only user?
1706	*/	1726	*/
1707	spin_lock(&dcache_lock);	1727	spin_lock(&dcache_lock);
1708	spin_lock(&dentry->d_lock);	1728	spin_lock(&dentry->d_lock);
1709	isdir = S_ISDIR(dentry->d_inode->i_mode);	1729	isdir = S_ISDIR(dentry->d_inode->i_mode);
1710	if (dentry->d_count == 1) {	1730	if (dentry->d_count == 1) {
1711	dentry->d_flags &= ~DCACHE_CANT_MOUNT;	1731	dentry->d_flags &= ~DCACHE_CANT_MOUNT;
1712	dentry_iput(dentry);	1732	dentry_iput(dentry);
1713	fsnotify_nameremove(dentry, isdir);	1733	fsnotify_nameremove(dentry, isdir);
1714	return;	1734	return;
1715	}	1735	}
1716		1736
1717	if (!d_unhashed(dentry))	1737	if (!d_unhashed(dentry))
1718	__d_drop(dentry);	1738	__d_drop(dentry);
1719		1739
1720	spin_unlock(&dentry->d_lock);	1740	spin_unlock(&dentry->d_lock);
1721	spin_unlock(&dcache_lock);	1741	spin_unlock(&dcache_lock);
1722		1742
1723	fsnotify_nameremove(dentry, isdir);	1743	fsnotify_nameremove(dentry, isdir);
1724	}	1744	}
1725	EXPORT_SYMBOL(d_delete);	1745	EXPORT_SYMBOL(d_delete);
1726		1746
1727	static void __d_rehash(struct dentry * entry, struct hlist_head *list)	1747	static void __d_rehash(struct dentry * entry, struct hlist_head *list)
1728	{	1748	{
1729		1749
1730	entry->d_flags &= ~DCACHE_UNHASHED;	1750	entry->d_flags &= ~DCACHE_UNHASHED;
1731	hlist_add_head_rcu(&entry->d_hash, list);	1751	hlist_add_head_rcu(&entry->d_hash, list);
1732	}	1752	}
1733		1753
1734	static void _d_rehash(struct dentry * entry)	1754	static void _d_rehash(struct dentry * entry)
1735	{	1755	{
1736	__d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));	1756	__d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
1737	}	1757	}
1738		1758
1739	/**	1759	/**
1740	* d_rehash - add an entry back to the hash	1760	* d_rehash - add an entry back to the hash
1741	* @entry: dentry to add to the hash	1761	* @entry: dentry to add to the hash
1742	*	1762	*
1743	* Adds a dentry to the hash according to its name.	1763	* Adds a dentry to the hash according to its name.
1744	*/	1764	*/
1745		1765
1746	void d_rehash(struct dentry * entry)	1766	void d_rehash(struct dentry * entry)
1747	{	1767	{
1748	spin_lock(&dcache_lock);	1768	spin_lock(&dcache_lock);
1749	spin_lock(&entry->d_lock);	1769	spin_lock(&entry->d_lock);
1750	spin_lock(&dcache_hash_lock);	1770	spin_lock(&dcache_hash_lock);
1751	_d_rehash(entry);	1771	_d_rehash(entry);
1752	spin_unlock(&dcache_hash_lock);	1772	spin_unlock(&dcache_hash_lock);
1753	spin_unlock(&entry->d_lock);	1773	spin_unlock(&entry->d_lock);
1754	spin_unlock(&dcache_lock);	1774	spin_unlock(&dcache_lock);
1755	}	1775	}
1756	EXPORT_SYMBOL(d_rehash);	1776	EXPORT_SYMBOL(d_rehash);
1757		1777
1758	/**	1778	/**
1759	* dentry_update_name_case - update case insensitive dentry with a new name	1779	* dentry_update_name_case - update case insensitive dentry with a new name
1760	* @dentry: dentry to be updated	1780	* @dentry: dentry to be updated
1761	* @name: new name	1781	* @name: new name
1762	*	1782	*
1763	* Update a case insensitive dentry with new case of name.	1783	* Update a case insensitive dentry with new case of name.
1764	*	1784	*
1765	* dentry must have been returned by d_lookup with name @name. Old and new	1785	* dentry must have been returned by d_lookup with name @name. Old and new
1766	* name lengths must match (ie. no d_compare which allows mismatched name	1786	* name lengths must match (ie. no d_compare which allows mismatched name
1767	* lengths).	1787	* lengths).
1768	*	1788	*
1769	* Parent inode i_mutex must be held over d_lookup and into this call (to	1789	* Parent inode i_mutex must be held over d_lookup and into this call (to
1770	* keep renames and concurrent inserts, and readdir(2) away).	1790	* keep renames and concurrent inserts, and readdir(2) away).
1771	*/	1791	*/
1772	void dentry_update_name_case(struct dentry dentry, struct qstr name)	1792	void dentry_update_name_case(struct dentry dentry, struct qstr name)
1773	{	1793	{
1774	BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));	1794	BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
1775	BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */	1795	BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
1776		1796
1777	spin_lock(&dcache_lock);	1797	spin_lock(&dcache_lock);
1778	spin_lock(&dentry->d_lock);	1798	spin_lock(&dentry->d_lock);
1779	memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);	1799	memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
1780	spin_unlock(&dentry->d_lock);	1800	spin_unlock(&dentry->d_lock);
1781	spin_unlock(&dcache_lock);	1801	spin_unlock(&dcache_lock);
1782	}	1802	}
1783	EXPORT_SYMBOL(dentry_update_name_case);	1803	EXPORT_SYMBOL(dentry_update_name_case);
1784		1804
1785	/*	1805	/*
1786	* When switching names, the actual string doesn't strictly have to	1806	* When switching names, the actual string doesn't strictly have to
1787	* be preserved in the target - because we're dropping the target	1807	* be preserved in the target - because we're dropping the target
1788	* anyway. As such, we can just do a simple memcpy() to copy over	1808	* anyway. As such, we can just do a simple memcpy() to copy over
1789	* the new name before we switch.	1809	* the new name before we switch.
1790	*	1810	*
1791	* Note that we have to be a lot more careful about getting the hash	1811	* Note that we have to be a lot more careful about getting the hash
1792	* switched - we have to switch the hash value properly even if it	1812	* switched - we have to switch the hash value properly even if it
1793	* then no longer matches the actual (corrupted) string of the target.	1813	* then no longer matches the actual (corrupted) string of the target.
1794	* The hash value has to match the hash queue that the dentry is on..	1814	* The hash value has to match the hash queue that the dentry is on..
1795	*/	1815	*/
1796	static void switch_names(struct dentry dentry, struct dentry target)	1816	static void switch_names(struct dentry dentry, struct dentry target)
1797	{	1817	{
1798	if (dname_external(target)) {	1818	if (dname_external(target)) {
1799	if (dname_external(dentry)) {	1819	if (dname_external(dentry)) {
1800	/*	1820	/*
1801	* Both external: swap the pointers	1821	* Both external: swap the pointers
1802	*/	1822	*/
1803	swap(target->d_name.name, dentry->d_name.name);	1823	swap(target->d_name.name, dentry->d_name.name);
1804	} else {	1824	} else {
1805	/*	1825	/*
1806	* dentry:internal, target:external. Steal target's	1826	* dentry:internal, target:external. Steal target's
1807	* storage and make target internal.	1827	* storage and make target internal.
1808	*/	1828	*/
1809	memcpy(target->d_iname, dentry->d_name.name,	1829	memcpy(target->d_iname, dentry->d_name.name,
1810	dentry->d_name.len + 1);	1830	dentry->d_name.len + 1);
1811	dentry->d_name.name = target->d_name.name;	1831	dentry->d_name.name = target->d_name.name;
1812	target->d_name.name = target->d_iname;	1832	target->d_name.name = target->d_iname;
1813	}	1833	}
1814	} else {	1834	} else {
1815	if (dname_external(dentry)) {	1835	if (dname_external(dentry)) {
1816	/*	1836	/*
1817	* dentry:external, target:internal. Give dentry's	1837	* dentry:external, target:internal. Give dentry's
1818	* storage to target and make dentry internal	1838	* storage to target and make dentry internal
1819	*/	1839	*/
1820	memcpy(dentry->d_iname, target->d_name.name,	1840	memcpy(dentry->d_iname, target->d_name.name,
1821	target->d_name.len + 1);	1841	target->d_name.len + 1);
1822	target->d_name.name = dentry->d_name.name;	1842	target->d_name.name = dentry->d_name.name;
1823	dentry->d_name.name = dentry->d_iname;	1843	dentry->d_name.name = dentry->d_iname;
1824	} else {	1844	} else {
1825	/*	1845	/*
1826	* Both are internal. Just copy target to dentry	1846	* Both are internal. Just copy target to dentry
1827	*/	1847	*/
1828	memcpy(dentry->d_iname, target->d_name.name,	1848	memcpy(dentry->d_iname, target->d_name.name,
1829	target->d_name.len + 1);	1849	target->d_name.len + 1);
1830	dentry->d_name.len = target->d_name.len;	1850	dentry->d_name.len = target->d_name.len;
1831	return;	1851	return;
1832	}	1852	}
1833	}	1853	}
1834	swap(dentry->d_name.len, target->d_name.len);	1854	swap(dentry->d_name.len, target->d_name.len);
1835	}	1855	}
1836		1856
1837	/*	1857	/*
1838	* We cannibalize "target" when moving dentry on top of it,	1858	* We cannibalize "target" when moving dentry on top of it,
1839	* because it's going to be thrown away anyway. We could be more	1859	* because it's going to be thrown away anyway. We could be more
1840	* polite about it, though.	1860	* polite about it, though.
1841	*	1861	*
1842	* This forceful removal will result in ugly /proc output if	1862	* This forceful removal will result in ugly /proc output if
1843	* somebody holds a file open that got deleted due to a rename.	1863	* somebody holds a file open that got deleted due to a rename.
1844	* We could be nicer about the deleted file, and let it show	1864	* We could be nicer about the deleted file, and let it show
1845	* up under the name it had before it was deleted rather than	1865	* up under the name it had before it was deleted rather than
1846	* under the original name of the file that was moved on top of it.	1866	* under the original name of the file that was moved on top of it.
1847	*/	1867	*/
1848		1868
1849	/*	1869	/*
1850	* d_move_locked - move a dentry	1870	* d_move_locked - move a dentry
1851	* @dentry: entry to move	1871	* @dentry: entry to move
1852	* @target: new dentry	1872	* @target: new dentry
1853	*	1873	*
1854	* Update the dcache to reflect the move of a file name. Negative	1874	* Update the dcache to reflect the move of a file name. Negative
1855	* dcache entries should not be moved in this way.	1875	* dcache entries should not be moved in this way.
1856	*/	1876	*/
1857	static void d_move_locked(struct dentry * dentry, struct dentry * target)	1877	static void d_move_locked(struct dentry * dentry, struct dentry * target)
1858	{	1878	{
1859	if (!dentry->d_inode)	1879	if (!dentry->d_inode)
1860	printk(KERN_WARNING "VFS: moving negative dcache entry\n");	1880	printk(KERN_WARNING "VFS: moving negative dcache entry\n");
1861		1881
1862	write_seqlock(&rename_lock);	1882	write_seqlock(&rename_lock);
1863	/*	1883	/*
1864	* XXXX: do we really need to take target->d_lock?	1884	* XXXX: do we really need to take target->d_lock?
1865	*/	1885	*/
1866	if (target < dentry) {	1886	if (d_ancestor(dentry, target)) {
		1887	spin_lock(&dentry->d_lock);
		1888	spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
		1889	} else if (d_ancestor(target, dentry) \|\| target < dentry) {
1867	spin_lock(&target->d_lock);	1890	spin_lock(&target->d_lock);
1868	spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);	1891	spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1869	} else {	1892	} else {
1870	spin_lock(&dentry->d_lock);	1893	spin_lock(&dentry->d_lock);
1871	spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);	1894	spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
1872	}	1895	}
1873		1896
1874	/* Move the dentry to the target hash queue, if on different bucket */	1897	/* Move the dentry to the target hash queue, if on different bucket */
1875	spin_lock(&dcache_hash_lock);	1898	spin_lock(&dcache_hash_lock);
1876	if (!d_unhashed(dentry))	1899	if (!d_unhashed(dentry))
1877	hlist_del_rcu(&dentry->d_hash);	1900	hlist_del_rcu(&dentry->d_hash);
1878	__d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));	1901	__d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
1879	spin_unlock(&dcache_hash_lock);	1902	spin_unlock(&dcache_hash_lock);
1880		1903
1881	/* Unhash the target: dput() will then get rid of it */	1904	/* Unhash the target: dput() will then get rid of it */
1882	__d_drop(target);	1905	__d_drop(target);
1883		1906
1884	list_del(&dentry->d_u.d_child);	1907	list_del(&dentry->d_u.d_child);
1885	list_del(&target->d_u.d_child);	1908	list_del(&target->d_u.d_child);
1886		1909
1887	/* Switch the names.. */	1910	/* Switch the names.. */
1888	switch_names(dentry, target);	1911	switch_names(dentry, target);
1889	swap(dentry->d_name.hash, target->d_name.hash);	1912	swap(dentry->d_name.hash, target->d_name.hash);
1890		1913
1891	/* ... and switch the parents */	1914	/* ... and switch the parents */
1892	if (IS_ROOT(dentry)) {	1915	if (IS_ROOT(dentry)) {
1893	dentry->d_parent = target->d_parent;	1916	dentry->d_parent = target->d_parent;
1894	target->d_parent = target;	1917	target->d_parent = target;
1895	INIT_LIST_HEAD(&target->d_u.d_child);	1918	INIT_LIST_HEAD(&target->d_u.d_child);
1896	} else {	1919	} else {
1897	swap(dentry->d_parent, target->d_parent);	1920	swap(dentry->d_parent, target->d_parent);
1898		1921
1899	/* And add them back to the (new) parent lists */	1922	/* And add them back to the (new) parent lists */
1900	list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);	1923	list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
1901	}	1924	}
1902		1925
1903	list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);	1926	list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
1904	spin_unlock(&target->d_lock);	1927	spin_unlock(&target->d_lock);
1905	fsnotify_d_move(dentry);	1928	fsnotify_d_move(dentry);
1906	spin_unlock(&dentry->d_lock);	1929	spin_unlock(&dentry->d_lock);
1907	write_sequnlock(&rename_lock);	1930	write_sequnlock(&rename_lock);
1908	}	1931	}
1909		1932
1910	/**	1933	/**
1911	* d_move - move a dentry	1934	* d_move - move a dentry
1912	* @dentry: entry to move	1935	* @dentry: entry to move
1913	* @target: new dentry	1936	* @target: new dentry
1914	*	1937	*
1915	* Update the dcache to reflect the move of a file name. Negative	1938	* Update the dcache to reflect the move of a file name. Negative
1916	* dcache entries should not be moved in this way.	1939	* dcache entries should not be moved in this way.
1917	*/	1940	*/
1918		1941
1919	void d_move(struct dentry * dentry, struct dentry * target)	1942	void d_move(struct dentry * dentry, struct dentry * target)
1920	{	1943	{
1921	spin_lock(&dcache_lock);	1944	spin_lock(&dcache_lock);
1922	d_move_locked(dentry, target);	1945	d_move_locked(dentry, target);
1923	spin_unlock(&dcache_lock);	1946	spin_unlock(&dcache_lock);
1924	}	1947	}
1925	EXPORT_SYMBOL(d_move);	1948	EXPORT_SYMBOL(d_move);
1926		1949
1927	/**	1950	/**
1928	* d_ancestor - search for an ancestor	1951	* d_ancestor - search for an ancestor
1929	* @p1: ancestor dentry	1952	* @p1: ancestor dentry
1930	* @p2: child dentry	1953	* @p2: child dentry
1931	*	1954	*
1932	* Returns the ancestor dentry of p2 which is a child of p1, if p1 is	1955	* Returns the ancestor dentry of p2 which is a child of p1, if p1 is
1933	* an ancestor of p2, else NULL.	1956	* an ancestor of p2, else NULL.
1934	*/	1957	*/
1935	struct dentry d_ancestor(struct dentry p1, struct dentry *p2)	1958	struct dentry d_ancestor(struct dentry p1, struct dentry *p2)
1936	{	1959	{
1937	struct dentry *p;	1960	struct dentry *p;
1938		1961
1939	for (p = p2; !IS_ROOT(p); p = p->d_parent) {	1962	for (p = p2; !IS_ROOT(p); p = p->d_parent) {
1940	if (p->d_parent == p1)	1963	if (p->d_parent == p1)
1941	return p;	1964	return p;
1942	}	1965	}
1943	return NULL;	1966	return NULL;
1944	}	1967	}
1945		1968
1946	/*	1969	/*
1947	* This helper attempts to cope with remotely renamed directories	1970	* This helper attempts to cope with remotely renamed directories
1948	*	1971	*
1949	* It assumes that the caller is already holding	1972	* It assumes that the caller is already holding
1950	* dentry->d_parent->d_inode->i_mutex and the dcache_lock	1973	* dentry->d_parent->d_inode->i_mutex and the dcache_lock
1951	*	1974	*
1952	* Note: If ever the locking in lock_rename() changes, then please	1975	* Note: If ever the locking in lock_rename() changes, then please
1953	* remember to update this too...	1976	* remember to update this too...
1954	*/	1977	*/
1955	static struct dentry __d_unalias(struct dentry dentry, struct dentry *alias)	1978	static struct dentry __d_unalias(struct dentry dentry, struct dentry *alias)
1956	__releases(dcache_lock)	1979	__releases(dcache_lock)
1957	{	1980	{
1958	struct mutex m1 = NULL, m2 = NULL;	1981	struct mutex m1 = NULL, m2 = NULL;
1959	struct dentry *ret;	1982	struct dentry *ret;
1960		1983
1961	/* If alias and dentry share a parent, then no extra locks required */	1984	/* If alias and dentry share a parent, then no extra locks required */
1962	if (alias->d_parent == dentry->d_parent)	1985	if (alias->d_parent == dentry->d_parent)
1963	goto out_unalias;	1986	goto out_unalias;
1964		1987
1965	/* Check for loops */	1988	/* Check for loops */
1966	ret = ERR_PTR(-ELOOP);	1989	ret = ERR_PTR(-ELOOP);
1967	if (d_ancestor(alias, dentry))	1990	if (d_ancestor(alias, dentry))
1968	goto out_err;	1991	goto out_err;
1969		1992
1970	/* See lock_rename() */	1993	/* See lock_rename() */
1971	ret = ERR_PTR(-EBUSY);	1994	ret = ERR_PTR(-EBUSY);
1972	if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))	1995	if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
1973	goto out_err;	1996	goto out_err;
1974	m1 = &dentry->d_sb->s_vfs_rename_mutex;	1997	m1 = &dentry->d_sb->s_vfs_rename_mutex;
1975	if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))	1998	if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
1976	goto out_err;	1999	goto out_err;
1977	m2 = &alias->d_parent->d_inode->i_mutex;	2000	m2 = &alias->d_parent->d_inode->i_mutex;
1978	out_unalias:	2001	out_unalias:
1979	d_move_locked(alias, dentry);	2002	d_move_locked(alias, dentry);
1980	ret = alias;	2003	ret = alias;
1981	out_err:	2004	out_err:
1982	spin_unlock(&dcache_lock);	2005	spin_unlock(&dcache_lock);
1983	if (m2)	2006	if (m2)
1984	mutex_unlock(m2);	2007	mutex_unlock(m2);
1985	if (m1)	2008	if (m1)
1986	mutex_unlock(m1);	2009	mutex_unlock(m1);
1987	return ret;	2010	return ret;
1988	}	2011	}
1989		2012
1990	/*	2013	/*
1991	* Prepare an anonymous dentry for life in the superblock's dentry tree as a	2014	* Prepare an anonymous dentry for life in the superblock's dentry tree as a
1992	* named dentry in place of the dentry to be replaced.	2015	* named dentry in place of the dentry to be replaced.
1993	*/	2016	*/
1994	static void __d_materialise_dentry(struct dentry dentry, struct dentry anon)	2017	static void __d_materialise_dentry(struct dentry dentry, struct dentry anon)
1995	{	2018	{
1996	struct dentry dparent, aparent;	2019	struct dentry dparent, aparent;
1997		2020
1998	switch_names(dentry, anon);	2021	switch_names(dentry, anon);
1999	swap(dentry->d_name.hash, anon->d_name.hash);	2022	swap(dentry->d_name.hash, anon->d_name.hash);
2000		2023
2001	dparent = dentry->d_parent;	2024	dparent = dentry->d_parent;
2002	aparent = anon->d_parent;	2025	aparent = anon->d_parent;
2003		2026
2004	dentry->d_parent = (aparent == anon) ? dentry : aparent;	2027	dentry->d_parent = (aparent == anon) ? dentry : aparent;
2005	list_del(&dentry->d_u.d_child);	2028	list_del(&dentry->d_u.d_child);
2006	if (!IS_ROOT(dentry))	2029	if (!IS_ROOT(dentry))
2007	list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);	2030	list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2008	else	2031	else
2009	INIT_LIST_HEAD(&dentry->d_u.d_child);	2032	INIT_LIST_HEAD(&dentry->d_u.d_child);
2010		2033
2011	anon->d_parent = (dparent == dentry) ? anon : dparent;	2034	anon->d_parent = (dparent == dentry) ? anon : dparent;
2012	list_del(&anon->d_u.d_child);	2035	list_del(&anon->d_u.d_child);
2013	if (!IS_ROOT(anon))	2036	if (!IS_ROOT(anon))
2014	list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);	2037	list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
2015	else	2038	else
2016	INIT_LIST_HEAD(&anon->d_u.d_child);	2039	INIT_LIST_HEAD(&anon->d_u.d_child);
2017		2040
2018	anon->d_flags &= ~DCACHE_DISCONNECTED;	2041	anon->d_flags &= ~DCACHE_DISCONNECTED;
2019	}	2042	}
2020		2043
2021	/**	2044	/**
2022	* d_materialise_unique - introduce an inode into the tree	2045	* d_materialise_unique - introduce an inode into the tree
2023	* @dentry: candidate dentry	2046	* @dentry: candidate dentry
2024	* @inode: inode to bind to the dentry, to which aliases may be attached	2047	* @inode: inode to bind to the dentry, to which aliases may be attached
2025	*	2048	*
2026	* Introduces an dentry into the tree, substituting an extant disconnected	2049	* Introduces an dentry into the tree, substituting an extant disconnected
2027	* root directory alias in its place if there is one	2050	* root directory alias in its place if there is one
2028	*/	2051	*/
2029	struct dentry d_materialise_unique(struct dentry dentry, struct inode *inode)	2052	struct dentry d_materialise_unique(struct dentry dentry, struct inode *inode)
2030	{	2053	{
2031	struct dentry *actual;	2054	struct dentry *actual;
2032		2055
2033	BUG_ON(!d_unhashed(dentry));	2056	BUG_ON(!d_unhashed(dentry));
2034		2057
2035	spin_lock(&dcache_lock);	2058	spin_lock(&dcache_lock);
2036		2059
2037	if (!inode) {	2060	if (!inode) {
2038	actual = dentry;	2061	actual = dentry;
2039	__d_instantiate(dentry, NULL);	2062	__d_instantiate(dentry, NULL);
2040	goto found_lock;	2063	goto found_lock;
2041	}	2064	}
2042		2065
2043	if (S_ISDIR(inode->i_mode)) {	2066	if (S_ISDIR(inode->i_mode)) {
2044	struct dentry *alias;	2067	struct dentry *alias;
2045		2068
2046	/* Does an aliased dentry already exist? */	2069	/* Does an aliased dentry already exist? */
2047	alias = __d_find_alias(inode, 0);	2070	alias = __d_find_alias(inode, 0);
2048	if (alias) {	2071	if (alias) {
2049	actual = alias;	2072	actual = alias;
2050	/* Is this an anonymous mountpoint that we could splice	2073	/* Is this an anonymous mountpoint that we could splice
2051	* into our tree? */	2074	* into our tree? */
2052	if (IS_ROOT(alias)) {	2075	if (IS_ROOT(alias)) {
2053	spin_lock(&alias->d_lock);	2076	spin_lock(&alias->d_lock);
2054	__d_materialise_dentry(dentry, alias);	2077	__d_materialise_dentry(dentry, alias);
2055	__d_drop(alias);	2078	__d_drop(alias);
2056	goto found;	2079	goto found;
2057	}	2080	}
2058	/* Nope, but we must(!) avoid directory aliasing */	2081	/* Nope, but we must(!) avoid directory aliasing */
2059	actual = __d_unalias(dentry, alias);	2082	actual = __d_unalias(dentry, alias);
2060	if (IS_ERR(actual))	2083	if (IS_ERR(actual))
2061	dput(alias);	2084	dput(alias);
2062	goto out_nolock;	2085	goto out_nolock;
2063	}	2086	}
2064	}	2087	}
2065		2088
2066	/* Add a unique reference */	2089	/* Add a unique reference */
2067	actual = __d_instantiate_unique(dentry, inode);	2090	actual = __d_instantiate_unique(dentry, inode);
2068	if (!actual)	2091	if (!actual)
2069	actual = dentry;	2092	actual = dentry;
2070	else if (unlikely(!d_unhashed(actual)))	2093	else if (unlikely(!d_unhashed(actual)))
2071	goto shouldnt_be_hashed;	2094	goto shouldnt_be_hashed;
2072		2095
2073	found_lock:	2096	found_lock:
2074	spin_lock(&actual->d_lock);	2097	spin_lock(&actual->d_lock);
2075	found:	2098	found:
2076	spin_lock(&dcache_hash_lock);	2099	spin_lock(&dcache_hash_lock);
2077	_d_rehash(actual);	2100	_d_rehash(actual);
2078	spin_unlock(&dcache_hash_lock);	2101	spin_unlock(&dcache_hash_lock);
2079	spin_unlock(&actual->d_lock);	2102	spin_unlock(&actual->d_lock);
2080	spin_unlock(&dcache_lock);	2103	spin_unlock(&dcache_lock);
2081	out_nolock:	2104	out_nolock:
2082	if (actual == dentry) {	2105	if (actual == dentry) {
2083	security_d_instantiate(dentry, inode);	2106	security_d_instantiate(dentry, inode);
2084	return NULL;	2107	return NULL;
2085	}	2108	}
2086		2109
2087	iput(inode);	2110	iput(inode);
2088	return actual;	2111	return actual;
2089		2112
2090	shouldnt_be_hashed:	2113	shouldnt_be_hashed:
2091	spin_unlock(&dcache_lock);	2114	spin_unlock(&dcache_lock);
2092	BUG();	2115	BUG();
2093	}	2116	}
2094	EXPORT_SYMBOL_GPL(d_materialise_unique);	2117	EXPORT_SYMBOL_GPL(d_materialise_unique);
2095		2118
2096	static int prepend(char *buffer, int buflen, const char *str, int namelen)	2119	static int prepend(char *buffer, int buflen, const char *str, int namelen)
2097	{	2120	{
2098	*buflen -= namelen;	2121	*buflen -= namelen;
2099	if (*buflen < 0)	2122	if (*buflen < 0)
2100	return -ENAMETOOLONG;	2123	return -ENAMETOOLONG;
2101	*buffer -= namelen;	2124	*buffer -= namelen;
2102	memcpy(*buffer, str, namelen);	2125	memcpy(*buffer, str, namelen);
2103	return 0;	2126	return 0;
2104	}	2127	}
2105		2128
2106	static int prepend_name(char *buffer, int buflen, struct qstr *name)	2129	static int prepend_name(char *buffer, int buflen, struct qstr *name)
2107	{	2130	{
2108	return prepend(buffer, buflen, name->name, name->len);	2131	return prepend(buffer, buflen, name->name, name->len);
2109	}	2132	}
2110		2133
2111	/**	2134	/**
2112	* Prepend path string to a buffer	2135	* Prepend path string to a buffer
2113	*	2136	*
2114	* @path: the dentry/vfsmount to report	2137	* @path: the dentry/vfsmount to report
2115	* @root: root vfsmnt/dentry (may be modified by this function)	2138	* @root: root vfsmnt/dentry (may be modified by this function)
2116	* @buffer: pointer to the end of the buffer	2139	* @buffer: pointer to the end of the buffer
2117	* @buflen: pointer to buffer length	2140	* @buflen: pointer to buffer length
2118	*	2141	*
2119	* Caller holds the dcache_lock.	2142	* Caller holds the dcache_lock.
2120	*	2143	*
2121	* If path is not reachable from the supplied root, then the value of	2144	* If path is not reachable from the supplied root, then the value of
2122	* root is changed (without modifying refcounts).	2145	* root is changed (without modifying refcounts).
2123	*/	2146	*/
2124	static int prepend_path(const struct path path, struct path root,	2147	static int prepend_path(const struct path path, struct path root,
2125	char *buffer, int buflen)	2148	char *buffer, int buflen)
2126	{	2149	{
2127	struct dentry *dentry = path->dentry;	2150	struct dentry *dentry = path->dentry;
2128	struct vfsmount *vfsmnt = path->mnt;	2151	struct vfsmount *vfsmnt = path->mnt;
2129	bool slash = false;	2152	bool slash = false;
2130	int error = 0;	2153	int error = 0;
2131		2154
2132	br_read_lock(vfsmount_lock);	2155	br_read_lock(vfsmount_lock);
2133	while (dentry != root->dentry \|\| vfsmnt != root->mnt) {	2156	while (dentry != root->dentry \|\| vfsmnt != root->mnt) {
2134	struct dentry * parent;	2157	struct dentry * parent;
2135		2158
2136	if (dentry == vfsmnt->mnt_root \|\| IS_ROOT(dentry)) {	2159	if (dentry == vfsmnt->mnt_root \|\| IS_ROOT(dentry)) {
2137	/* Global root? */	2160	/* Global root? */
2138	if (vfsmnt->mnt_parent == vfsmnt) {	2161	if (vfsmnt->mnt_parent == vfsmnt) {
2139	goto global_root;	2162	goto global_root;
2140	}	2163	}
2141	dentry = vfsmnt->mnt_mountpoint;	2164	dentry = vfsmnt->mnt_mountpoint;
2142	vfsmnt = vfsmnt->mnt_parent;	2165	vfsmnt = vfsmnt->mnt_parent;
2143	continue;	2166	continue;
2144	}	2167	}
2145	parent = dentry->d_parent;	2168	parent = dentry->d_parent;
2146	prefetch(parent);	2169	prefetch(parent);
2147	error = prepend_name(buffer, buflen, &dentry->d_name);	2170	error = prepend_name(buffer, buflen, &dentry->d_name);
2148	if (!error)	2171	if (!error)
2149	error = prepend(buffer, buflen, "/", 1);	2172	error = prepend(buffer, buflen, "/", 1);
2150	if (error)	2173	if (error)
2151	break;	2174	break;
2152		2175
2153	slash = true;	2176	slash = true;
2154	dentry = parent;	2177	dentry = parent;
2155	}	2178	}
2156		2179
2157	out:	2180	out:
2158	if (!error && !slash)	2181	if (!error && !slash)
2159	error = prepend(buffer, buflen, "/", 1);	2182	error = prepend(buffer, buflen, "/", 1);
2160		2183
2161	br_read_unlock(vfsmount_lock);	2184	br_read_unlock(vfsmount_lock);
2162	return error;	2185	return error;
2163		2186
2164	global_root:	2187	global_root:
2165	/*	2188	/*
2166	* Filesystems needing to implement special "root names"	2189	* Filesystems needing to implement special "root names"
2167	* should do so with ->d_dname()	2190	* should do so with ->d_dname()
2168	*/	2191	*/
2169	if (IS_ROOT(dentry) &&	2192	if (IS_ROOT(dentry) &&
2170	(dentry->d_name.len != 1 \|\| dentry->d_name.name[0] != '/')) {	2193	(dentry->d_name.len != 1 \|\| dentry->d_name.name[0] != '/')) {
2171	WARN(1, "Root dentry has weird name <%.*s>\n",	2194	WARN(1, "Root dentry has weird name <%.*s>\n",
2172	(int) dentry->d_name.len, dentry->d_name.name);	2195	(int) dentry->d_name.len, dentry->d_name.name);
2173	}	2196	}
2174	root->mnt = vfsmnt;	2197	root->mnt = vfsmnt;
2175	root->dentry = dentry;	2198	root->dentry = dentry;
2176	goto out;	2199	goto out;
2177	}	2200	}
2178		2201
2179	/**	2202	/**
2180	* __d_path - return the path of a dentry	2203	* __d_path - return the path of a dentry
2181	* @path: the dentry/vfsmount to report	2204	* @path: the dentry/vfsmount to report
2182	* @root: root vfsmnt/dentry (may be modified by this function)	2205	* @root: root vfsmnt/dentry (may be modified by this function)
2183	* @buf: buffer to return value in	2206	* @buf: buffer to return value in
2184	* @buflen: buffer length	2207	* @buflen: buffer length
2185	*	2208	*
2186	* Convert a dentry into an ASCII path name.	2209	* Convert a dentry into an ASCII path name.
2187	*	2210	*
2188	* Returns a pointer into the buffer or an error code if the	2211	* Returns a pointer into the buffer or an error code if the
2189	* path was too long.	2212	* path was too long.
2190	*	2213	*
2191	* "buflen" should be positive.	2214	* "buflen" should be positive.
2192	*	2215	*
2193	* If path is not reachable from the supplied root, then the value of	2216	* If path is not reachable from the supplied root, then the value of
2194	* root is changed (without modifying refcounts).	2217	* root is changed (without modifying refcounts).
2195	*/	2218	*/
2196	char __d_path(const struct path path, struct path *root,	2219	char __d_path(const struct path path, struct path *root,
2197	char *buf, int buflen)	2220	char *buf, int buflen)
2198	{	2221	{
2199	char *res = buf + buflen;	2222	char *res = buf + buflen;
2200	int error;	2223	int error;
2201		2224
2202	prepend(&res, &buflen, "\0", 1);	2225	prepend(&res, &buflen, "\0", 1);
2203	spin_lock(&dcache_lock);	2226	spin_lock(&dcache_lock);
2204	error = prepend_path(path, root, &res, &buflen);	2227	error = prepend_path(path, root, &res, &buflen);
2205	spin_unlock(&dcache_lock);	2228	spin_unlock(&dcache_lock);
2206		2229
2207	if (error)	2230	if (error)
2208	return ERR_PTR(error);	2231	return ERR_PTR(error);
2209	return res;	2232	return res;
2210	}	2233	}
2211		2234
2212	/*	2235	/*
2213	* same as __d_path but appends "(deleted)" for unlinked files.	2236	* same as __d_path but appends "(deleted)" for unlinked files.
2214	*/	2237	*/
2215	static int path_with_deleted(const struct path path, struct path root,	2238	static int path_with_deleted(const struct path path, struct path root,
2216	char *buf, int buflen)	2239	char *buf, int buflen)
2217	{	2240	{
2218	prepend(buf, buflen, "\0", 1);	2241	prepend(buf, buflen, "\0", 1);
2219	if (d_unlinked(path->dentry)) {	2242	if (d_unlinked(path->dentry)) {
2220	int error = prepend(buf, buflen, " (deleted)", 10);	2243	int error = prepend(buf, buflen, " (deleted)", 10);
2221	if (error)	2244	if (error)
2222	return error;	2245	return error;
2223	}	2246	}
2224		2247
2225	return prepend_path(path, root, buf, buflen);	2248	return prepend_path(path, root, buf, buflen);
2226	}	2249	}
2227		2250
2228	static int prepend_unreachable(char *buffer, int buflen)	2251	static int prepend_unreachable(char *buffer, int buflen)
2229	{	2252	{
2230	return prepend(buffer, buflen, "(unreachable)", 13);	2253	return prepend(buffer, buflen, "(unreachable)", 13);
2231	}	2254	}
2232		2255
2233	/**	2256	/**
2234	* d_path - return the path of a dentry	2257	* d_path - return the path of a dentry
2235	* @path: path to report	2258	* @path: path to report
2236	* @buf: buffer to return value in	2259	* @buf: buffer to return value in
2237	* @buflen: buffer length	2260	* @buflen: buffer length
2238	*	2261	*
2239	* Convert a dentry into an ASCII path name. If the entry has been deleted	2262	* Convert a dentry into an ASCII path name. If the entry has been deleted
2240	* the string " (deleted)" is appended. Note that this is ambiguous.	2263	* the string " (deleted)" is appended. Note that this is ambiguous.
2241	*	2264	*
2242	* Returns a pointer into the buffer or an error code if the path was	2265	* Returns a pointer into the buffer or an error code if the path was
2243	* too long. Note: Callers should use the returned pointer, not the passed	2266	* too long. Note: Callers should use the returned pointer, not the passed
2244	* in buffer, to use the name! The implementation often starts at an offset	2267	* in buffer, to use the name! The implementation often starts at an offset
2245	* into the buffer, and may leave 0 bytes at the start.	2268	* into the buffer, and may leave 0 bytes at the start.
2246	*	2269	*
2247	* "buflen" should be positive.	2270	* "buflen" should be positive.
2248	*/	2271	*/
2249	char d_path(const struct path path, char *buf, int buflen)	2272	char d_path(const struct path path, char *buf, int buflen)
2250	{	2273	{
2251	char *res = buf + buflen;	2274	char *res = buf + buflen;
2252	struct path root;	2275	struct path root;
2253	struct path tmp;	2276	struct path tmp;
2254	int error;	2277	int error;
2255		2278
2256	/*	2279	/*
2257	* We have various synthetic filesystems that never get mounted. On	2280	* We have various synthetic filesystems that never get mounted. On
2258	* these filesystems dentries are never used for lookup purposes, and	2281	* these filesystems dentries are never used for lookup purposes, and
2259	* thus don't need to be hashed. They also don't need a name until a	2282	* thus don't need to be hashed. They also don't need a name until a
2260	* user wants to identify the object in /proc/pid/fd/. The little hack	2283	* user wants to identify the object in /proc/pid/fd/. The little hack
2261	* below allows us to generate a name for these objects on demand:	2284	* below allows us to generate a name for these objects on demand:
2262	*/	2285	*/
2263	if (path->dentry->d_op && path->dentry->d_op->d_dname)	2286	if (path->dentry->d_op && path->dentry->d_op->d_dname)
2264	return path->dentry->d_op->d_dname(path->dentry, buf, buflen);	2287	return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2265		2288
2266	get_fs_root(current->fs, &root);	2289	get_fs_root(current->fs, &root);
2267	spin_lock(&dcache_lock);	2290	spin_lock(&dcache_lock);
2268	tmp = root;	2291	tmp = root;
2269	error = path_with_deleted(path, &tmp, &res, &buflen);	2292	error = path_with_deleted(path, &tmp, &res, &buflen);
2270	if (error)	2293	if (error)
2271	res = ERR_PTR(error);	2294	res = ERR_PTR(error);
2272	spin_unlock(&dcache_lock);	2295	spin_unlock(&dcache_lock);
2273	path_put(&root);	2296	path_put(&root);
2274	return res;	2297	return res;
2275	}	2298	}
2276	EXPORT_SYMBOL(d_path);	2299	EXPORT_SYMBOL(d_path);
2277		2300
2278	/**	2301	/**
2279	* d_path_with_unreachable - return the path of a dentry	2302	* d_path_with_unreachable - return the path of a dentry
2280	* @path: path to report	2303	* @path: path to report
2281	* @buf: buffer to return value in	2304	* @buf: buffer to return value in
2282	* @buflen: buffer length	2305	* @buflen: buffer length
2283	*	2306	*
2284	* The difference from d_path() is that this prepends "(unreachable)"	2307	* The difference from d_path() is that this prepends "(unreachable)"
2285	* to paths which are unreachable from the current process' root.	2308	* to paths which are unreachable from the current process' root.
2286	*/	2309	*/
2287	char d_path_with_unreachable(const struct path path, char *buf, int buflen)	2310	char d_path_with_unreachable(const struct path path, char *buf, int buflen)
2288	{	2311	{
2289	char *res = buf + buflen;	2312	char *res = buf + buflen;
2290	struct path root;	2313	struct path root;
2291	struct path tmp;	2314	struct path tmp;
2292	int error;	2315	int error;
2293		2316
2294	if (path->dentry->d_op && path->dentry->d_op->d_dname)	2317	if (path->dentry->d_op && path->dentry->d_op->d_dname)
2295	return path->dentry->d_op->d_dname(path->dentry, buf, buflen);	2318	return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2296		2319
2297	get_fs_root(current->fs, &root);	2320	get_fs_root(current->fs, &root);
2298	spin_lock(&dcache_lock);	2321	spin_lock(&dcache_lock);
2299	tmp = root;	2322	tmp = root;
2300	error = path_with_deleted(path, &tmp, &res, &buflen);	2323	error = path_with_deleted(path, &tmp, &res, &buflen);
2301	if (!error && !path_equal(&tmp, &root))	2324	if (!error && !path_equal(&tmp, &root))
2302	error = prepend_unreachable(&res, &buflen);	2325	error = prepend_unreachable(&res, &buflen);
2303	spin_unlock(&dcache_lock);	2326	spin_unlock(&dcache_lock);
2304	path_put(&root);	2327	path_put(&root);
2305	if (error)	2328	if (error)
2306	res = ERR_PTR(error);	2329	res = ERR_PTR(error);
2307		2330
2308	return res;	2331	return res;
2309	}	2332	}
2310		2333
2311	/*	2334	/*
2312	* Helper function for dentry_operations.d_dname() members	2335	* Helper function for dentry_operations.d_dname() members
2313	*/	2336	*/
2314	char dynamic_dname(struct dentry dentry, char *buffer, int buflen,	2337	char dynamic_dname(struct dentry dentry, char *buffer, int buflen,
2315	const char *fmt, ...)	2338	const char *fmt, ...)
2316	{	2339	{
2317	va_list args;	2340	va_list args;
2318	char temp[64];	2341	char temp[64];
2319	int sz;	2342	int sz;
2320		2343
2321	va_start(args, fmt);	2344	va_start(args, fmt);
2322	sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;	2345	sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2323	va_end(args);	2346	va_end(args);
2324		2347
2325	if (sz > sizeof(temp) \|\| sz > buflen)	2348	if (sz > sizeof(temp) \|\| sz > buflen)
2326	return ERR_PTR(-ENAMETOOLONG);	2349	return ERR_PTR(-ENAMETOOLONG);
2327		2350
2328	buffer += buflen - sz;	2351	buffer += buflen - sz;
2329	return memcpy(buffer, temp, sz);	2352	return memcpy(buffer, temp, sz);
2330	}	2353	}
2331		2354
2332	/*	2355	/*
2333	* Write full pathname from the root of the filesystem into the buffer.	2356	* Write full pathname from the root of the filesystem into the buffer.
2334	*/	2357	*/
2335	static char __dentry_path(struct dentry dentry, char *buf, int buflen)	2358	static char __dentry_path(struct dentry dentry, char *buf, int buflen)
2336	{	2359	{
2337	char *end = buf + buflen;	2360	char *end = buf + buflen;
2338	char *retval;	2361	char *retval;
2339		2362
2340	prepend(&end, &buflen, "\0", 1);	2363	prepend(&end, &buflen, "\0", 1);
2341	if (buflen < 1)	2364	if (buflen < 1)
2342	goto Elong;	2365	goto Elong;
2343	/* Get '/' right */	2366	/* Get '/' right */
2344	retval = end-1;	2367	retval = end-1;
2345	*retval = '/';	2368	*retval = '/';
2346		2369
2347	while (!IS_ROOT(dentry)) {	2370	while (!IS_ROOT(dentry)) {
2348	struct dentry *parent = dentry->d_parent;	2371	struct dentry *parent = dentry->d_parent;
2349		2372
2350	prefetch(parent);	2373	prefetch(parent);
2351	if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) \|\|	2374	if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) \|\|
2352	(prepend(&end, &buflen, "/", 1) != 0))	2375	(prepend(&end, &buflen, "/", 1) != 0))
2353	goto Elong;	2376	goto Elong;
2354		2377
2355	retval = end;	2378	retval = end;
2356	dentry = parent;	2379	dentry = parent;
2357	}	2380	}
2358	return retval;	2381	return retval;
2359	Elong:	2382	Elong:
2360	return ERR_PTR(-ENAMETOOLONG);	2383	return ERR_PTR(-ENAMETOOLONG);
2361	}	2384	}
2362		2385
2363	char dentry_path_raw(struct dentry dentry, char *buf, int buflen)	2386	char dentry_path_raw(struct dentry dentry, char *buf, int buflen)
2364	{	2387	{
2365	char *retval;	2388	char *retval;
2366		2389
2367	spin_lock(&dcache_lock);	2390	spin_lock(&dcache_lock);
2368	retval = __dentry_path(dentry, buf, buflen);	2391	retval = __dentry_path(dentry, buf, buflen);
2369	spin_unlock(&dcache_lock);	2392	spin_unlock(&dcache_lock);
2370		2393
2371	return retval;	2394	return retval;
2372	}	2395	}
2373	EXPORT_SYMBOL(dentry_path_raw);	2396	EXPORT_SYMBOL(dentry_path_raw);
2374		2397
2375	char dentry_path(struct dentry dentry, char *buf, int buflen)	2398	char dentry_path(struct dentry dentry, char *buf, int buflen)
2376	{	2399	{
2377	char *p = NULL;	2400	char *p = NULL;
2378	char *retval;	2401	char *retval;
2379		2402
2380	spin_lock(&dcache_lock);	2403	spin_lock(&dcache_lock);
2381	if (d_unlinked(dentry)) {	2404	if (d_unlinked(dentry)) {
2382	p = buf + buflen;	2405	p = buf + buflen;
2383	if (prepend(&p, &buflen, "//deleted", 10) != 0)	2406	if (prepend(&p, &buflen, "//deleted", 10) != 0)
2384	goto Elong;	2407	goto Elong;
2385	buflen++;	2408	buflen++;
2386	}	2409	}
2387	retval = __dentry_path(dentry, buf, buflen);	2410	retval = __dentry_path(dentry, buf, buflen);
2388	spin_unlock(&dcache_lock);	2411	spin_unlock(&dcache_lock);
2389	if (!IS_ERR(retval) && p)	2412	if (!IS_ERR(retval) && p)
2390	p = '/'; / restore '/' overriden with '\0' */	2413	p = '/'; / restore '/' overriden with '\0' */
2391	return retval;	2414	return retval;
2392	Elong:	2415	Elong:
2393	spin_unlock(&dcache_lock);	2416	spin_unlock(&dcache_lock);
2394	return ERR_PTR(-ENAMETOOLONG);	2417	return ERR_PTR(-ENAMETOOLONG);
2395	}	2418	}
2396		2419
2397	/*	2420	/*
2398	* NOTE! The user-level library version returns a	2421	* NOTE! The user-level library version returns a
2399	* character pointer. The kernel system call just	2422	* character pointer. The kernel system call just
2400	* returns the length of the buffer filled (which	2423	* returns the length of the buffer filled (which
2401	* includes the ending '\0' character), or a negative	2424	* includes the ending '\0' character), or a negative
2402	* error value. So libc would do something like	2425	* error value. So libc would do something like
2403	*	2426	*
2404	* char getcwd(char buf, size_t size)	2427	* char getcwd(char buf, size_t size)
2405	* {	2428	* {
2406	* int retval;	2429	* int retval;
2407	*	2430	*
2408	* retval = sys_getcwd(buf, size);	2431	* retval = sys_getcwd(buf, size);
2409	* if (retval >= 0)	2432	* if (retval >= 0)
2410	* return buf;	2433	* return buf;
2411	* errno = -retval;	2434	* errno = -retval;
2412	* return NULL;	2435	* return NULL;
2413	* }	2436	* }
2414	*/	2437	*/
2415	SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)	2438	SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2416	{	2439	{
2417	int error;	2440	int error;
2418	struct path pwd, root;	2441	struct path pwd, root;
2419	char page = (char ) __get_free_page(GFP_USER);	2442	char page = (char ) __get_free_page(GFP_USER);
2420		2443
2421	if (!page)	2444	if (!page)
2422	return -ENOMEM;	2445	return -ENOMEM;
2423		2446
2424	get_fs_root_and_pwd(current->fs, &root, &pwd);	2447	get_fs_root_and_pwd(current->fs, &root, &pwd);
2425		2448
2426	error = -ENOENT;	2449	error = -ENOENT;
2427	spin_lock(&dcache_lock);	2450	spin_lock(&dcache_lock);
2428	if (!d_unlinked(pwd.dentry)) {	2451	if (!d_unlinked(pwd.dentry)) {
2429	unsigned long len;	2452	unsigned long len;
2430	struct path tmp = root;	2453	struct path tmp = root;
2431	char *cwd = page + PAGE_SIZE;	2454	char *cwd = page + PAGE_SIZE;
2432	int buflen = PAGE_SIZE;	2455	int buflen = PAGE_SIZE;
2433		2456
2434	prepend(&cwd, &buflen, "\0", 1);	2457	prepend(&cwd, &buflen, "\0", 1);
2435	error = prepend_path(&pwd, &tmp, &cwd, &buflen);	2458	error = prepend_path(&pwd, &tmp, &cwd, &buflen);
2436	spin_unlock(&dcache_lock);	2459	spin_unlock(&dcache_lock);
2437		2460
2438	if (error)	2461	if (error)
2439	goto out;	2462	goto out;
2440		2463
2441	/* Unreachable from current root */	2464	/* Unreachable from current root */
2442	if (!path_equal(&tmp, &root)) {	2465	if (!path_equal(&tmp, &root)) {
2443	error = prepend_unreachable(&cwd, &buflen);	2466	error = prepend_unreachable(&cwd, &buflen);
2444	if (error)	2467	if (error)
2445	goto out;	2468	goto out;
2446	}	2469	}
2447		2470
2448	error = -ERANGE;	2471	error = -ERANGE;
2449	len = PAGE_SIZE + page - cwd;	2472	len = PAGE_SIZE + page - cwd;
2450	if (len <= size) {	2473	if (len <= size) {
2451	error = len;	2474	error = len;
2452	if (copy_to_user(buf, cwd, len))	2475	if (copy_to_user(buf, cwd, len))
2453	error = -EFAULT;	2476	error = -EFAULT;
2454	}	2477	}
2455	} else	2478	} else
2456	spin_unlock(&dcache_lock);	2479	spin_unlock(&dcache_lock);
2457		2480
2458	out:	2481	out:
2459	path_put(&pwd);	2482	path_put(&pwd);
2460	path_put(&root);	2483	path_put(&root);
2461	free_page((unsigned long) page);	2484	free_page((unsigned long) page);
2462	return error;	2485	return error;
2463	}	2486	}
2464		2487
2465	/*	2488	/*
2466	* Test whether new_dentry is a subdirectory of old_dentry.	2489	* Test whether new_dentry is a subdirectory of old_dentry.
2467	*	2490	*
2468	* Trivially implemented using the dcache structure	2491	* Trivially implemented using the dcache structure
2469	*/	2492	*/
2470		2493
2471	/**	2494	/**
2472	* is_subdir - is new dentry a subdirectory of old_dentry	2495	* is_subdir - is new dentry a subdirectory of old_dentry
2473	* @new_dentry: new dentry	2496	* @new_dentry: new dentry
2474	* @old_dentry: old dentry	2497	* @old_dentry: old dentry
2475	*	2498	*
2476	* Returns 1 if new_dentry is a subdirectory of the parent (at any depth).	2499	* Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2477	* Returns 0 otherwise.	2500	* Returns 0 otherwise.
2478	* Caller must ensure that "new_dentry" is pinned before calling is_subdir()	2501	* Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2479	*/	2502	*/
2480		2503
2481	int is_subdir(struct dentry new_dentry, struct dentry old_dentry)	2504	int is_subdir(struct dentry new_dentry, struct dentry old_dentry)
2482	{	2505	{
2483	int result;	2506	int result;
2484	unsigned long seq;	2507	unsigned long seq;
2485		2508
2486	if (new_dentry == old_dentry)	2509	if (new_dentry == old_dentry)
2487	return 1;	2510	return 1;
2488		2511
2489	/*	2512	/*
2490	* Need rcu_readlock to protect against the d_parent trashing	2513	* Need rcu_readlock to protect against the d_parent trashing
2491	* due to d_move	2514	* due to d_move
2492	*/	2515	*/
2493	rcu_read_lock();	2516	rcu_read_lock();
2494	do {	2517	do {
2495	/* for restarting inner loop in case of seq retry */	2518	/* for restarting inner loop in case of seq retry */
2496	seq = read_seqbegin(&rename_lock);	2519	seq = read_seqbegin(&rename_lock);
2497	if (d_ancestor(old_dentry, new_dentry))	2520	if (d_ancestor(old_dentry, new_dentry))
2498	result = 1;	2521	result = 1;
2499	else	2522	else
2500	result = 0;	2523	result = 0;
2501	} while (read_seqretry(&rename_lock, seq));	2524	} while (read_seqretry(&rename_lock, seq));
2502	rcu_read_unlock();	2525	rcu_read_unlock();
2503		2526
2504	return result;	2527	return result;
2505	}	2528	}
2506		2529
2507	int path_is_under(struct path path1, struct path path2)	2530	int path_is_under(struct path path1, struct path path2)
2508	{	2531	{
2509	struct vfsmount *mnt = path1->mnt;	2532	struct vfsmount *mnt = path1->mnt;
2510	struct dentry *dentry = path1->dentry;	2533	struct dentry *dentry = path1->dentry;
2511	int res;	2534	int res;
2512		2535
2513	br_read_lock(vfsmount_lock);	2536	br_read_lock(vfsmount_lock);
2514	if (mnt != path2->mnt) {	2537	if (mnt != path2->mnt) {
2515	for (;;) {	2538	for (;;) {
2516	if (mnt->mnt_parent == mnt) {	2539	if (mnt->mnt_parent == mnt) {
2517	br_read_unlock(vfsmount_lock);	2540	br_read_unlock(vfsmount_lock);
2518	return 0;	2541	return 0;
2519	}	2542	}
2520	if (mnt->mnt_parent == path2->mnt)	2543	if (mnt->mnt_parent == path2->mnt)
2521	break;	2544	break;
2522	mnt = mnt->mnt_parent;	2545	mnt = mnt->mnt_parent;
2523	}	2546	}
2524	dentry = mnt->mnt_mountpoint;	2547	dentry = mnt->mnt_mountpoint;
2525	}	2548	}
2526	res = is_subdir(dentry, path2->dentry);	2549	res = is_subdir(dentry, path2->dentry);
2527	br_read_unlock(vfsmount_lock);	2550	br_read_unlock(vfsmount_lock);
2528	return res;	2551	return res;
2529	}	2552	}
2530	EXPORT_SYMBOL(path_is_under);	2553	EXPORT_SYMBOL(path_is_under);
2531		2554
2532	void d_genocide(struct dentry *root)	2555	void d_genocide(struct dentry *root)
2533	{	2556	{
2534	struct dentry *this_parent = root;	2557	struct dentry *this_parent = root;
2535	struct list_head *next;	2558	struct list_head *next;
2536		2559
2537	spin_lock(&dcache_lock);	2560	spin_lock(&dcache_lock);
2538	repeat:	2561	repeat:
2539	next = this_parent->d_subdirs.next;	2562	next = this_parent->d_subdirs.next;
2540	resume:	2563	resume:
2541	while (next != &this_parent->d_subdirs) {	2564	while (next != &this_parent->d_subdirs) {
2542	struct list_head *tmp = next;	2565	struct list_head *tmp = next;
2543	struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);	2566	struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2544	next = tmp->next;	2567	next = tmp->next;
2545	if (d_unhashed(dentry)\|\|!dentry->d_inode)	2568	spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
		2569	if (d_unhashed(dentry) \|\| !dentry->d_inode) {
		2570	spin_unlock(&dentry->d_lock);
2546	continue;	2571	continue;
		2572	}
2547	if (!list_empty(&dentry->d_subdirs)) {	2573	if (!list_empty(&dentry->d_subdirs)) {
		2574	spin_unlock(&dentry->d_lock);
2548	this_parent = dentry;	2575	this_parent = dentry;
2549	goto repeat;	2576	goto repeat;
2550	}	2577	}
2551	spin_lock(&dentry->d_lock);
2552	dentry->d_count--;	2578	dentry->d_count--;
2553	spin_unlock(&dentry->d_lock);	2579	spin_unlock(&dentry->d_lock);
2554	}	2580	}
2555	if (this_parent != root) {	2581	if (this_parent != root) {
2556	next = this_parent->d_u.d_child.next;	2582	next = this_parent->d_u.d_child.next;
2557	spin_lock(&this_parent->d_lock);	2583	spin_lock(&this_parent->d_lock);
2558	this_parent->d_count--;	2584	this_parent->d_count--;
2559	spin_unlock(&this_parent->d_lock);	2585	spin_unlock(&this_parent->d_lock);
2560	this_parent = this_parent->d_parent;	2586	this_parent = this_parent->d_parent;
2561	goto resume;	2587	goto resume;
2562	}	2588	}
2563	spin_unlock(&dcache_lock);	2589	spin_unlock(&dcache_lock);
2564	}	2590	}
2565		2591
2566	/**	2592	/**
2567	* find_inode_number - check for dentry with name	2593	* find_inode_number - check for dentry with name
2568	* @dir: directory to check	2594	* @dir: directory to check
2569	* @name: Name to find.	2595	* @name: Name to find.
2570	*	2596	*
2571	* Check whether a dentry already exists for the given name,	2597	* Check whether a dentry already exists for the given name,
2572	* and return the inode number if it has an inode. Otherwise	2598	* and return the inode number if it has an inode. Otherwise
2573	* 0 is returned.	2599	* 0 is returned.
2574	*	2600	*
2575	* This routine is used to post-process directory listings for	2601	* This routine is used to post-process directory listings for
2576	* filesystems using synthetic inode numbers, and is necessary	2602	* filesystems using synthetic inode numbers, and is necessary
2577	* to keep getcwd() working.	2603	* to keep getcwd() working.
2578	*/	2604	*/
2579		2605
2580	ino_t find_inode_number(struct dentry dir, struct qstr name)	2606	ino_t find_inode_number(struct dentry dir, struct qstr name)
2581	{	2607	{
2582	struct dentry * dentry;	2608	struct dentry * dentry;
2583	ino_t ino = 0;	2609	ino_t ino = 0;
2584		2610
2585	dentry = d_hash_and_lookup(dir, name);	2611	dentry = d_hash_and_lookup(dir, name);
2586	if (dentry) {	2612	if (dentry) {
2587	if (dentry->d_inode)	2613	if (dentry->d_inode)
2588	ino = dentry->d_inode->i_ino;	2614	ino = dentry->d_inode->i_ino;
2589	dput(dentry);	2615	dput(dentry);
2590	}	2616	}
2591	return ino;	2617	return ino;
2592	}	2618	}
2593	EXPORT_SYMBOL(find_inode_number);	2619	EXPORT_SYMBOL(find_inode_number);
2594		2620
2595	static __initdata unsigned long dhash_entries;	2621	static __initdata unsigned long dhash_entries;
2596	static int __init set_dhash_entries(char *str)	2622	static int __init set_dhash_entries(char *str)
2597	{	2623	{
2598	if (!str)	2624	if (!str)
2599	return 0;	2625	return 0;
2600	dhash_entries = simple_strtoul(str, &str, 0);	2626	dhash_entries = simple_strtoul(str, &str, 0);
2601	return 1;	2627	return 1;
2602	}	2628	}
2603	__setup("dhash_entries=", set_dhash_entries);	2629	__setup("dhash_entries=", set_dhash_entries);
2604		2630
2605	static void __init dcache_init_early(void)	2631	static void __init dcache_init_early(void)
2606	{	2632	{
2607	int loop;	2633	int loop;
2608		2634
2609	/* If hashes are distributed across NUMA nodes, defer	2635	/* If hashes are distributed across NUMA nodes, defer
2610	* hash allocation until vmalloc space is available.	2636	* hash allocation until vmalloc space is available.
2611	*/	2637	*/
2612	if (hashdist)	2638	if (hashdist)
2613	return;	2639	return;
2614		2640
2615	dentry_hashtable =	2641	dentry_hashtable =
2616	alloc_large_system_hash("Dentry cache",	2642	alloc_large_system_hash("Dentry cache",
2617	sizeof(struct hlist_head),	2643	sizeof(struct hlist_head),
2618	dhash_entries,	2644	dhash_entries,
2619	13,	2645	13,
2620	HASH_EARLY,	2646	HASH_EARLY,
2621	&d_hash_shift,	2647	&d_hash_shift,
2622	&d_hash_mask,	2648	&d_hash_mask,
2623	0);	2649	0);
2624		2650
2625	for (loop = 0; loop < (1 << d_hash_shift); loop++)	2651	for (loop = 0; loop < (1 << d_hash_shift); loop++)
2626	INIT_HLIST_HEAD(&dentry_hashtable[loop]);	2652	INIT_HLIST_HEAD(&dentry_hashtable[loop]);
2627	}	2653	}
2628		2654
2629	static void __init dcache_init(void)	2655	static void __init dcache_init(void)
2630	{	2656	{
2631	int loop;	2657	int loop;
2632		2658
2633	/*	2659	/*
2634	* A constructor could be added for stable state like the lists,	2660	* A constructor could be added for stable state like the lists,
2635	* but it is probably not worth it because of the cache nature	2661	* but it is probably not worth it because of the cache nature
2636	* of the dcache.	2662	* of the dcache.
2637	*/	2663	*/
2638	dentry_cache = KMEM_CACHE(dentry,	2664	dentry_cache = KMEM_CACHE(dentry,
2639	SLAB_RECLAIM_ACCOUNT\|SLAB_PANIC\|SLAB_MEM_SPREAD);	2665	SLAB_RECLAIM_ACCOUNT\|SLAB_PANIC\|SLAB_MEM_SPREAD);
2640		2666
2641	register_shrinker(&dcache_shrinker);	2667	register_shrinker(&dcache_shrinker);
2642		2668
2643	/* Hash may have been set up in dcache_init_early */	2669	/* Hash may have been set up in dcache_init_early */
2644	if (!hashdist)	2670	if (!hashdist)
2645	return;	2671	return;
2646		2672
2647	dentry_hashtable =	2673	dentry_hashtable =
2648	alloc_large_system_hash("Dentry cache",	2674	alloc_large_system_hash("Dentry cache",
2649	sizeof(struct hlist_head),	2675	sizeof(struct hlist_head),
2650	dhash_entries,	2676	dhash_entries,
2651	13,	2677	13,
2652	0,	2678	0,
2653	&d_hash_shift,	2679	&d_hash_shift,
2654	&d_hash_mask,	2680	&d_hash_mask,
2655	0);	2681	0);
2656		2682
2657	for (loop = 0; loop < (1 << d_hash_shift); loop++)	2683	for (loop = 0; loop < (1 << d_hash_shift); loop++)
2658	INIT_HLIST_HEAD(&dentry_hashtable[loop]);	2684	INIT_HLIST_HEAD(&dentry_hashtable[loop]);
2659	}	2685	}
2660		2686
2661	/* SLAB cache for __getname() consumers */	2687	/* SLAB cache for __getname() consumers */
2662	struct kmem_cache *names_cachep __read_mostly;	2688	struct kmem_cache *names_cachep __read_mostly;
2663	EXPORT_SYMBOL(names_cachep);	2689	EXPORT_SYMBOL(names_cachep);
2664		2690
2665	EXPORT_SYMBOL(d_genocide);	2691	EXPORT_SYMBOL(d_genocide);
2666		2692
2667	void __init vfs_caches_init_early(void)	2693	void __init vfs_caches_init_early(void)
2668	{	2694	{
2669	dcache_init_early();	2695	dcache_init_early();
2670	inode_init_early();	2696	inode_init_early();
2671	}	2697	}
2672		2698
2673	void __init vfs_caches_init(unsigned long mempages)	2699	void __init vfs_caches_init(unsigned long mempages)
2674	{	2700	{
2675	unsigned long reserve;	2701	unsigned long reserve;
2676		2702
2677	/* Base hash sizes on available memory, with a reserve equal to	2703	/* Base hash sizes on available memory, with a reserve equal to
2678	150% of current kernel size */	2704	150% of current kernel size */
2679		2705
2680	reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);	2706	reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
2681	mempages -= reserve;	2707	mempages -= reserve;
2682		2708
2683	names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,	2709	names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
2684	SLAB_HWCACHE_ALIGN\|SLAB_PANIC, NULL);	2710	SLAB_HWCACHE_ALIGN\|SLAB_PANIC, NULL);

fs/libfs.c

Diff comments View file @ da50295

 /*
  *	fs/libfs.c
  *	Library for filesystems writers.
  */
 #include <linux/module.h>
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/mount.h>
 #include <linux/vfs.h>
 #include <linux/quotaops.h>
 #include <linux/mutex.h>
 #include <linux/exportfs.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>
 #include <asm/uaccess.h>
+static inline int simple_positive(struct dentry *dentry)
+{
+	return dentry->d_inode && !d_unhashed(dentry);
+}
 int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		   struct kstat *stat)
 {
 	struct inode *inode = dentry->d_inode;
 	generic_fillattr(inode, stat);
 	stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9);
 	return 0;
 }
 int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	buf->f_type = dentry->d_sb->s_magic;
 	buf->f_bsize = PAGE_CACHE_SIZE;
 	buf->f_namelen = NAME_MAX;
 	return 0;
 }
 /*
  * Retaining negative dentries for an in-memory filesystem just wastes
  * memory and lookup time: arrange for them to be deleted immediately.
  */
 static int simple_delete_dentry(const struct dentry *dentry)
 {
 	return 1;
 }
 /*
  * Lookup the data. This is trivial - if the dentry didn't already
  * exist, we know it is negative.  Set d_op to delete negative dentries.
  */
 struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
 	static const struct dentry_operations simple_dentry_operations = {
 		.d_delete = simple_delete_dentry,
 	};
 	if (dentry->d_name.len > NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
 	dentry->d_op = &simple_dentry_operations;
 	d_add(dentry, NULL);
 	return NULL;
 }
 int dcache_dir_open(struct inode *inode, struct file *file)
 {
 	static struct qstr cursor_name = {.len = 1, .name = "."};
 	file->private_data = d_alloc(file->f_path.dentry, &cursor_name);
 	return file->private_data ? 0 : -ENOMEM;
 }
 int dcache_dir_close(struct inode *inode, struct file *file)
 {
 	dput(file->private_data);
 	return 0;
 }
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
 {
 	mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
 	switch (origin) {
 		case 1:
 			offset += file->f_pos;
 		case 0:
 			if (offset >= 0)
 				break;
 		default:
 			mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
 			return -EINVAL;
 	}
 	if (offset != file->f_pos) {
 		file->f_pos = offset;
 		if (file->f_pos >= 2) {
 			struct list_head *p;
 			struct dentry *cursor = file->private_data;
 			loff_t n = file->f_pos - 2;
 			spin_lock(&dcache_lock);
 			list_del(&cursor->d_u.d_child);
 			p = file->f_path.dentry->d_subdirs.next;
 			while (n && p != &file->f_path.dentry->d_subdirs) {
 				struct dentry *next;
 				next = list_entry(p, struct dentry, d_u.d_child);
-				if (!d_unhashed(next) && next->d_inode)
+				spin_lock(&next->d_lock);
+				if (simple_positive(next))
 					n--;
+				spin_unlock(&next->d_lock);
 				p = p->next;
 			}
 			list_add_tail(&cursor->d_u.d_child, p);
 			spin_unlock(&dcache_lock);
 		}
 	}
 	mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
 	return offset;
 }
 /* Relationship between i_mode and the DT_xxx types */
 static inline unsigned char dt_type(struct inode *inode)
 {
 	return (inode->i_mode >> 12) & 15;
 }
 /*
  * Directory is locked and all positive dentries in it are safe, since
  * for ramfs-type trees they can't go away without unlink() or rmdir(),
  * both impossible due to the lock on directory.
  */
 int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
 	struct dentry *dentry = filp->f_path.dentry;
 	struct dentry *cursor = filp->private_data;
 	struct list_head *p, *q = &cursor->d_u.d_child;
 	ino_t ino;
 	int i = filp->f_pos;
 	switch (i) {
 		case 0:
 			ino = dentry->d_inode->i_ino;
 			if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
 				break;
 			filp->f_pos++;
 			i++;
 			/* fallthrough */
 		case 1:
 			ino = parent_ino(dentry);
 			if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
 				break;
 			filp->f_pos++;
 			i++;
 			/* fallthrough */
 		default:
 			spin_lock(&dcache_lock);
 			if (filp->f_pos == 2)
 				list_move(q, &dentry->d_subdirs);
 			for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
 				struct dentry *next;
 				next = list_entry(p, struct dentry, d_u.d_child);
-				if (d_unhashed(next) || !next->d_inode)
+				spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
+				if (!simple_positive(next)) {
+					spin_unlock(&next->d_lock);
 					continue;
+				}
+				spin_unlock(&next->d_lock);
 				spin_unlock(&dcache_lock);
 				if (filldir(dirent, next->d_name.name,
 					    next->d_name.len, filp->f_pos,
 					    next->d_inode->i_ino,
 					    dt_type(next->d_inode)) < 0)
 					return 0;
 				spin_lock(&dcache_lock);
 				/* next is still alive */
 				list_move(q, p);
 				p = q;
 				filp->f_pos++;
 			}
 			spin_unlock(&dcache_lock);
 	}
 	return 0;
 }
 ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
 {
 	return -EISDIR;
 }
 const struct file_operations simple_dir_operations = {
 	.open		= dcache_dir_open,
 	.release	= dcache_dir_close,
 	.llseek		= dcache_dir_lseek,
 	.read		= generic_read_dir,
 	.readdir	= dcache_readdir,
 	.fsync		= noop_fsync,
 };
 const struct inode_operations simple_dir_inode_operations = {
 	.lookup		= simple_lookup,
 };
 static const struct super_operations simple_super_operations = {
 	.statfs		= simple_statfs,
 };
 /*
  * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
  * will never be mountable)
  */
 struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
 	const struct super_operations *ops, unsigned long magic)
 {
 	struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
 	struct dentry *dentry;
 	struct inode *root;
 	struct qstr d_name = {.name = name, .len = strlen(name)};
 	if (IS_ERR(s))
 		return ERR_CAST(s);
 	s->s_flags = MS_NOUSER;
 	s->s_maxbytes = MAX_LFS_FILESIZE;
 	s->s_blocksize = PAGE_SIZE;
 	s->s_blocksize_bits = PAGE_SHIFT;
 	s->s_magic = magic;
 	s->s_op = ops ? ops : &simple_super_operations;
 	s->s_time_gran = 1;
 	root = new_inode(s);
 	if (!root)
 		goto Enomem;
 	/*
 	 * since this is the first inode, make it number 1. New inodes created
 	 * after this must take care not to collide with it (by passing
 	 * max_reserved of 1 to iunique).
 	 */
 	root->i_ino = 1;
 	root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
 	root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
 	dentry = d_alloc(NULL, &d_name);
 	if (!dentry) {
 		iput(root);
 		goto Enomem;
 	}
 	dentry->d_sb = s;
 	dentry->d_parent = dentry;
 	d_instantiate(dentry, root);
 	s->s_root = dentry;
 	s->s_flags |= MS_ACTIVE;
 	return dget(s->s_root);
 Enomem:
 	deactivate_locked_super(s);
 	return ERR_PTR(-ENOMEM);
 }
 int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 {
 	struct inode *inode = old_dentry->d_inode;
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 	inc_nlink(inode);
 	ihold(inode);
 	dget(dentry);
 	d_instantiate(dentry, inode);
 	return 0;
 }
-static inline int simple_positive(struct dentry *dentry)
-{
-	return dentry->d_inode && !d_unhashed(dentry);
-}
 int simple_empty(struct dentry *dentry)
 {
 	struct dentry *child;
 	int ret = 0;
 	spin_lock(&dcache_lock);
-	list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
+	list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
-		if (simple_positive(child))
+		spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
+		if (simple_positive(child)) {
+			spin_unlock(&child->d_lock);
 			goto out;
+		}
+		spin_unlock(&child->d_lock);
+	}
 	ret = 1;
 out:
 	spin_unlock(&dcache_lock);
 	return ret;
 }
 int simple_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 	drop_nlink(inode);
 	dput(dentry);
 	return 0;
 }
 int simple_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	if (!simple_empty(dentry))
 		return -ENOTEMPTY;
 	drop_nlink(dentry->d_inode);
 	simple_unlink(dir, dentry);
 	drop_nlink(dir);
 	return 0;
 }
 int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
 		struct inode *new_dir, struct dentry *new_dentry)
 {
 	struct inode *inode = old_dentry->d_inode;
 	int they_are_dirs = S_ISDIR(old_dentry->d_inode->i_mode);
 	if (!simple_empty(new_dentry))
 		return -ENOTEMPTY;
 	if (new_dentry->d_inode) {
 		simple_unlink(new_dir, new_dentry);
 		if (they_are_dirs)
 			drop_nlink(old_dir);
 	} else if (they_are_dirs) {
 		drop_nlink(old_dir);
 		inc_nlink(new_dir);
 	}
 	old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime =
 		new_dir->i_mtime = inode->i_ctime = CURRENT_TIME;
 	return 0;
 }
 /**
  * simple_setattr - setattr for simple filesystem
  * @dentry: dentry
  * @iattr: iattr structure
  *
  * Returns 0 on success, -error on failure.
  *
  * simple_setattr is a simple ->setattr implementation without a proper
  * implementation of size changes.
  *
  * It can either be used for in-memory filesystems or special files
  * on simple regular filesystems.  Anything that needs to change on-disk
  * or wire state on size changes needs its own setattr method.
  */
 int simple_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	struct inode *inode = dentry->d_inode;
 	int error;
 	WARN_ON_ONCE(inode->i_op->truncate);
 	error = inode_change_ok(inode, iattr);
 	if (error)
 		return error;
 	if (iattr->ia_valid & ATTR_SIZE)
 		truncate_setsize(inode, iattr->ia_size);
 	setattr_copy(inode, iattr);
 	mark_inode_dirty(inode);
 	return 0;
 }
 EXPORT_SYMBOL(simple_setattr);
 int simple_readpage(struct file *file, struct page *page)
 {
 	clear_highpage(page);
 	flush_dcache_page(page);
 	SetPageUptodate(page);
 	unlock_page(page);
 	return 0;
 }
 int simple_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
 	struct page *page;
 	pgoff_t index;
 	index = pos >> PAGE_CACHE_SHIFT;
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
 	if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
 		unsigned from = pos & (PAGE_CACHE_SIZE - 1);
 		zero_user_segments(page, 0, from, from + len, PAGE_CACHE_SIZE);
 	}
 	return 0;
 }
 /**
  * simple_write_end - .write_end helper for non-block-device FSes
  * @available: See .write_end of address_space_operations
  * @file: 		"
  * @mapping: 		"
  * @pos: 		"
  * @len: 		"
  * @copied: 		"
  * @page: 		"
  * @fsdata: 		"
  *
  * simple_write_end does the minimum needed for updating a page after writing is
  * done. It has the same API signature as the .write_end of
  * address_space_operations vector. So it can just be set onto .write_end for
  * FSes that don't need any other processing. i_mutex is assumed to be held.
  * Block based filesystems should use generic_write_end().
  * NOTE: Even though i_size might get updated by this function, mark_inode_dirty
  * is not called, so a filesystem that actually does store data in .write_inode
  * should extend on what's done here with a call to mark_inode_dirty() in the
  * case that i_size has changed.
  */
 int simple_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata)
 {
 	struct inode *inode = page->mapping->host;
 	loff_t last_pos = pos + copied;
 	/* zero the stale part of the page if we did a short copy */
 	if (copied < len) {
 		unsigned from = pos & (PAGE_CACHE_SIZE - 1);
 		zero_user(page, from + copied, len - copied);
 	}
 	if (!PageUptodate(page))
 		SetPageUptodate(page);
 	/*
 	 * No need to use i_size_read() here, the i_size
 	 * cannot change under us because we hold the i_mutex.
 	 */
 	if (last_pos > inode->i_size)
 		i_size_write(inode, last_pos);
 	set_page_dirty(page);
 	unlock_page(page);
 	page_cache_release(page);
 	return copied;
 }
 /*
  * the inodes created here are not hashed. If you use iunique to generate
  * unique inode values later for this filesystem, then you must take care
  * to pass it an appropriate max_reserved value to avoid collisions.
  */
 int simple_fill_super(struct super_block *s, unsigned long magic,
 		      struct tree_descr *files)
 {
 	struct inode *inode;
 	struct dentry *root;
 	struct dentry *dentry;
 	int i;
 	s->s_blocksize = PAGE_CACHE_SIZE;
 	s->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	s->s_magic = magic;
 	s->s_op = &simple_super_operations;
 	s->s_time_gran = 1;
 	inode = new_inode(s);
 	if (!inode)
 		return -ENOMEM;
 	/*
 	 * because the root inode is 1, the files array must not contain an
 	 * entry at index 1
 	 */
 	inode->i_ino = 1;
 	inode->i_mode = S_IFDIR | 0755;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
 	inode->i_nlink = 2;
 	root = d_alloc_root(inode);
 	if (!root) {
 		iput(inode);
 		return -ENOMEM;
 	}
 	for (i = 0; !files->name || files->name[0]; i++, files++) {
 		if (!files->name)
 			continue;
 		/* warn if it tries to conflict with the root inode */
 		if (unlikely(i == 1))
 			printk(KERN_WARNING "%s: %s passed in a files array"
 				"with an index of 1!\n", __func__,
 				s->s_type->name);
 		dentry = d_alloc_name(root, files->name);
 		if (!dentry)
 			goto out;
 		inode = new_inode(s);
 		if (!inode)
 			goto out;
 		inode->i_mode = S_IFREG | files->mode;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		inode->i_fop = files->ops;
 		inode->i_ino = i;
 		d_add(dentry, inode);
 	}
 	s->s_root = root;
 	return 0;
 out:
 	d_genocide(root);
 	dput(root);
 	return -ENOMEM;
 }
 static DEFINE_SPINLOCK(pin_fs_lock);
 int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count)
 {
 	struct vfsmount *mnt = NULL;
 	spin_lock(&pin_fs_lock);
 	if (unlikely(!*mount)) {
 		spin_unlock(&pin_fs_lock);
 		mnt = vfs_kern_mount(type, 0, type->name, NULL);
 		if (IS_ERR(mnt))
 			return PTR_ERR(mnt);
 		spin_lock(&pin_fs_lock);
 		if (!*mount)
 			*mount = mnt;
 	}
 	mntget(*mount);
 	++*count;
 	spin_unlock(&pin_fs_lock);
 	mntput(mnt);
 	return 0;
 }
 void simple_release_fs(struct vfsmount **mount, int *count)
 {
 	struct vfsmount *mnt;
 	spin_lock(&pin_fs_lock);
 	mnt = *mount;
 	if (!--*count)
 		*mount = NULL;
 	spin_unlock(&pin_fs_lock);
 	mntput(mnt);
 }
 /**
  * simple_read_from_buffer - copy data from the buffer to user space
  * @to: the user space buffer to read to
  * @count: the maximum number of bytes to read
  * @ppos: the current position in the buffer
  * @from: the buffer to read from
  * @available: the size of the buffer
  *
  * The simple_read_from_buffer() function reads up to @count bytes from the
  * buffer @from at offset @ppos into the user space address starting at @to.
  *
  * On success, the number of bytes read is returned and the offset @ppos is
  * advanced by this number, or negative value is returned on error.
  **/
 ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
 				const void *from, size_t available)
 {
 	loff_t pos = *ppos;
 	size_t ret;
 	if (pos < 0)
 		return -EINVAL;
 	if (pos >= available || !count)
 		return 0;
 	if (count > available - pos)
 		count = available - pos;
 	ret = copy_to_user(to, from + pos, count);
 	if (ret == count)
 		return -EFAULT;
 	count -= ret;
 	*ppos = pos + count;
 	return count;
 }
 /**
  * simple_write_to_buffer - copy data from user space to the buffer
  * @to: the buffer to write to
  * @available: the size of the buffer
  * @ppos: the current position in the buffer
  * @from: the user space buffer to read from
  * @count: the maximum number of bytes to read
  *
  * The simple_write_to_buffer() function reads up to @count bytes from the user
  * space address starting at @from into the buffer @to at offset @ppos.
  *
  * On success, the number of bytes written is returned and the offset @ppos is
  * advanced by this number, or negative value is returned on error.
  **/
 ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
 		const void __user *from, size_t count)
 {
 	loff_t pos = *ppos;
 	size_t res;
 	if (pos < 0)
 		return -EINVAL;
 	if (pos >= available || !count)
 		return 0;
 	if (count > available - pos)
 		count = available - pos;
 	res = copy_from_user(to + pos, from, count);
 	if (res == count)
 		return -EFAULT;
 	count -= res;
 	*ppos = pos + count;
 	return count;
 }
 /**
  * memory_read_from_buffer - copy data from the buffer
  * @to: the kernel space buffer to read to
  * @count: the maximum number of bytes to read
  * @ppos: the current position in the buffer
  * @from: the buffer to read from
  * @available: the size of the buffer
  *
  * The memory_read_from_buffer() function reads up to @count bytes from the
  * buffer @from at offset @ppos into the kernel space address starting at @to.
  *
  * On success, the number of bytes read is returned and the offset @ppos is
  * advanced by this number, or negative value is returned on error.
  **/
 ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
 				const void *from, size_t available)
 {
 	loff_t pos = *ppos;
 	if (pos < 0)
 		return -EINVAL;
 	if (pos >= available)
 		return 0;
 	if (count > available - pos)
 		count = available - pos;
 	memcpy(to, from + pos, count);
 	*ppos = pos + count;
 	return count;
 }
 /*
  * Transaction based IO.
  * The file expects a single write which triggers the transaction, and then
  * possibly a read which collects the result - which is stored in a
  * file-local buffer.
  */
 void simple_transaction_set(struct file *file, size_t n)
 {
 	struct simple_transaction_argresp *ar = file->private_data;
 	BUG_ON(n > SIMPLE_TRANSACTION_LIMIT);
 	/*
 	 * The barrier ensures that ar->size will really remain zero until
 	 * ar->data is ready for reading.
 	 */
 	smp_mb();
 	ar->size = n;
 }
 char *simple_transaction_get(struct file *file, const char __user *buf, size_t size)
 {
 	struct simple_transaction_argresp *ar;
 	static DEFINE_SPINLOCK(simple_transaction_lock);
 	if (size > SIMPLE_TRANSACTION_LIMIT - 1)
 		return ERR_PTR(-EFBIG);
 	ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL);
 	if (!ar)
 		return ERR_PTR(-ENOMEM);
 	spin_lock(&simple_transaction_lock);
 	/* only one write allowed per open */
 	if (file->private_data) {
 		spin_unlock(&simple_transaction_lock);
 		free_page((unsigned long)ar);
 		return ERR_PTR(-EBUSY);
 	}
 	file->private_data = ar;
 	spin_unlock(&simple_transaction_lock);
 	if (copy_from_user(ar->data, buf, size))
 		return ERR_PTR(-EFAULT);
 	return ar->data;
 }
 ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
 {
 	struct simple_transaction_argresp *ar = file->private_data;
 	if (!ar)
 		return 0;
 	return simple_read_from_buffer(buf, size, pos, ar->data, ar->size);
 }
 int simple_transaction_release(struct inode *inode, struct file *file)
 {
 	free_page((unsigned long)file->private_data);
 	return 0;
 }
 /* Simple attribute files */
 struct simple_attr {
 	int (*get)(void *, u64 *);
 	int (*set)(void *, u64);
 	char get_buf[24];	/* enough to store a u64 and "\n\0" */
 	char set_buf[24];
 	void *data;
 	const char *fmt;	/* format for read operation */
 	struct mutex mutex;	/* protects access to these buffers */
 };
 /* simple_attr_open is called by an actual attribute open file operation
  * to set the attribute specific access operations. */
 int simple_attr_open(struct inode *inode, struct file *file,
 		     int (*get)(void *, u64 *), int (*set)(void *, u64),
 		     const char *fmt)
 {
 	struct simple_attr *attr;
 	attr = kmalloc(sizeof(*attr), GFP_KERNEL);
 	if (!attr)
 		return -ENOMEM;
 	attr->get = get;
 	attr->set = set;
 	attr->data = inode->i_private;
 	attr->fmt = fmt;
 	mutex_init(&attr->mutex);
 	file->private_data = attr;
 	return nonseekable_open(inode, file);
 }
 int simple_attr_release(struct inode *inode, struct file *file)
 {
 	kfree(file->private_data);
 	return 0;
 }
 /* read from the buffer that is filled with the get function */
 ssize_t simple_attr_read(struct file *file, char __user *buf,
 			 size_t len, loff_t *ppos)
 {
 	struct simple_attr *attr;
 	size_t size;
 	ssize_t ret;
 	attr = file->private_data;
 	if (!attr->get)
 		return -EACCES;
 	ret = mutex_lock_interruptible(&attr->mutex);
 	if (ret)
 		return ret;
 	if (*ppos) {		/* continued read */
 		size = strlen(attr->get_buf);
 	} else {		/* first read */
 		u64 val;
 		ret = attr->get(attr->data, &val);
 		if (ret)
 			goto out;
 		size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
 				 attr->fmt, (unsigned long long)val);
 	}
 	ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
 out:
 	mutex_unlock(&attr->mutex);
 	return ret;
 }
 /* interpret the buffer as a number to call the set function with */
 ssize_t simple_attr_write(struct file *file, const char __user *buf,
 			  size_t len, loff_t *ppos)
 {
 	struct simple_attr *attr;
 	u64 val;
 	size_t size;
 	ssize_t ret;
 	attr = file->private_data;
 	if (!attr->set)
 		return -EACCES;
 	ret = mutex_lock_interruptible(&attr->mutex);
 	if (ret)
 		return ret;
 	ret = -EFAULT;
 	size = min(sizeof(attr->set_buf) - 1, len);
 	if (copy_from_user(attr->set_buf, buf, size))
 		goto out;
 	attr->set_buf[size] = '\0';
 	val = simple_strtol(attr->set_buf, NULL, 0);
 	ret = attr->set(attr->data, val);
 	if (ret == 0)
 		ret = len; /* on success, claim we got the whole input */
 out:
 	mutex_unlock(&attr->mutex);
 	return ret;
 }
 /**
  * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
  * @sb:		filesystem to do the file handle conversion on
  * @fid:	file handle to convert
  * @fh_len:	length of the file handle in bytes
  * @fh_type:	type of file handle
  * @get_inode:	filesystem callback to retrieve inode
  *
  * This function decodes @fid as long as it has one of the well-known
  * Linux filehandle types and calls @get_inode on it to retrieve the
  * inode for the object specified in the file handle.
  */
 struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid,
 		int fh_len, int fh_type, struct inode *(*get_inode)
 			(struct super_block *sb, u64 ino, u32 gen))
 {
 	struct inode *inode = NULL;
 	if (fh_len < 2)
 		return NULL;
 	switch (fh_type) {
 	case FILEID_INO32_GEN:
 	case FILEID_INO32_GEN_PARENT:
 		inode = get_inode(sb, fid->i32.ino, fid->i32.gen);
 		break;
 	}
 	return d_obtain_alias(inode);
 }
 EXPORT_SYMBOL_GPL(generic_fh_to_dentry);
 /**
  * generic_fh_to_dentry - generic helper for the fh_to_parent export operation
  * @sb:		filesystem to do the file handle conversion on
  * @fid:	file handle to convert
  * @fh_len:	length of the file handle in bytes
  * @fh_type:	type of file handle
  * @get_inode:	filesystem callback to retrieve inode
  *
  * This function decodes @fid as long as it has one of the well-known
  * Linux filehandle types and calls @get_inode on it to retrieve the
  * inode for the _parent_ object specified in the file handle if it
  * is specified in the file handle, or NULL otherwise.
  */
 struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
 		int fh_len, int fh_type, struct inode *(*get_inode)
 			(struct super_block *sb, u64 ino, u32 gen))
 {
 	struct inode *inode = NULL;
 	if (fh_len <= 2)
 		return NULL;
 	switch (fh_type) {
 	case FILEID_INO32_GEN_PARENT:
 		inode = get_inode(sb, fid->i32.parent_ino,
 				  (fh_len > 3 ? fid->i32.parent_gen : 0));
 		break;
 	}
 	return d_obtain_alias(inode);
 }
 EXPORT_SYMBOL_GPL(generic_fh_to_parent);
 /**
  * generic_file_fsync - generic fsync implementation for simple filesystems
  * @file:	file to synchronize
  * @datasync:	only synchronize essential metadata if true
  *
  * This is a generic implementation of the fsync method for simple
  * filesystems which track all non-inode metadata in the buffers list
  * hanging off the address_space structure.
  */
 int generic_file_fsync(struct file *file, int datasync)
 {
 	struct inode *inode = file->f_mapping->host;
 	int err;
 	int ret;
 	ret = sync_mapping_buffers(inode->i_mapping);
 	if (!(inode->i_state & I_DIRTY))
 		return ret;
 	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
 		return ret;
 	err = sync_inode_metadata(inode, 1);
 	if (ret == 0)
 		ret = err;
 	return ret;
 }
 EXPORT_SYMBOL(generic_file_fsync);
 /**
  * generic_check_addressable - Check addressability of file system
  * @blocksize_bits:	log of file system block size
  * @num_blocks:		number of blocks in file system
  *
  * Determine whether a file system with @num_blocks blocks (and a
  * block size of 2**@blocksize_bits) is addressable by the sector_t
  * and page cache of the system.  Return 0 if so and -EFBIG otherwise.
  */
 int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks)
 {
 	u64 last_fs_block = num_blocks - 1;
 	u64 last_fs_page =
 		last_fs_block >> (PAGE_CACHE_SHIFT - blocksize_bits);
 	if (unlikely(num_blocks == 0))
 		return 0;
 	if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT))
 		return -EINVAL;
 	if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||
 	    (last_fs_page > (pgoff_t)(~0ULL))) {
 		return -EFBIG;
 	}
 	return 0;
 }
 EXPORT_SYMBOL(generic_check_addressable);
 /*
  * No-op implementation of ->fsync for in-memory filesystems.
  */
 int noop_fsync(struct file *file, int datasync)
 {
 	return 0;
 }
 EXPORT_SYMBOL(dcache_dir_close);
 EXPORT_SYMBOL(dcache_dir_lseek);
 EXPORT_SYMBOL(dcache_dir_open);
 EXPORT_SYMBOL(dcache_readdir);
 EXPORT_SYMBOL(generic_read_dir);
 EXPORT_SYMBOL(mount_pseudo);
 EXPORT_SYMBOL(simple_write_begin);
 EXPORT_SYMBOL(simple_write_end);
 EXPORT_SYMBOL(simple_dir_inode_operations);
 EXPORT_SYMBOL(simple_dir_operations);
 EXPORT_SYMBOL(simple_empty);
 EXPORT_SYMBOL(simple_fill_super);
 EXPORT_SYMBOL(simple_getattr);
 EXPORT_SYMBOL(simple_link);
 EXPORT_SYMBOL(simple_lookup);
 EXPORT_SYMBOL(simple_pin_fs);
 EXPORT_SYMBOL(simple_readpage);
 EXPORT_SYMBOL(simple_release_fs);
 EXPORT_SYMBOL(simple_rename);
 EXPORT_SYMBOL(simple_rmdir);
 EXPORT_SYMBOL(simple_statfs);
 EXPORT_SYMBOL(noop_fsync);
 EXPORT_SYMBOL(simple_unlink);
 EXPORT_SYMBOL(simple_read_from_buffer);
 EXPORT_SYMBOL(simple_write_to_buffer);
 EXPORT_SYMBOL(memory_read_from_buffer);
 EXPORT_SYMBOL(simple_transaction_set);
 EXPORT_SYMBOL(simple_transaction_get);
 EXPORT_SYMBOL(simple_transaction_read);
 EXPORT_SYMBOL(simple_transaction_release);

fs/ocfs2/dcache.c

Diff comments View file @ da50295

 /* -*- mode: c; c-basic-offset: 8; -*-
  * vim: noexpandtab sw=8 ts=8 sts=0:
  *
  * dcache.c
  *
  * dentry cache handling code
  *
  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this program; if not, write to the
  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  * Boston, MA 021110-1307, USA.
  */
 #include <linux/fs.h>
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/namei.h>
 #define MLOG_MASK_PREFIX ML_DCACHE
 #include <cluster/masklog.h>
 #include "ocfs2.h"
 #include "alloc.h"
 #include "dcache.h"
 #include "dlmglue.h"
 #include "file.h"
 #include "inode.h"
 #include "super.h"
 void ocfs2_dentry_attach_gen(struct dentry *dentry)
 {
 	unsigned long gen =
 		OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
 	BUG_ON(dentry->d_inode);
 	dentry->d_fsdata = (void *)gen;
 }
 static int ocfs2_dentry_revalidate(struct dentry *dentry,
 				   struct nameidata *nd)
 {
 	struct inode *inode = dentry->d_inode;
 	int ret = 0;    /* if all else fails, just return false */
 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 	mlog_entry("(0x%p, '%.*s')\n", dentry,
 		   dentry->d_name.len, dentry->d_name.name);
 	/* For a negative dentry -
 	 * check the generation number of the parent and compare with the
 	 * one stored in the inode.
 	 */
 	if (inode == NULL) {
 		unsigned long gen = (unsigned long) dentry->d_fsdata;
 		unsigned long pgen =
 			OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
 		mlog(0, "negative dentry: %.*s parent gen: %lu "
 			"dentry gen: %lu\n",
 			dentry->d_name.len, dentry->d_name.name, pgen, gen);
 		if (gen != pgen)
 			goto bail;
 		goto valid;
 	}
 	BUG_ON(!osb);
 	if (inode == osb->root_inode || is_bad_inode(inode))
 		goto bail;
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 	/* did we or someone else delete this inode? */
 	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
 		spin_unlock(&OCFS2_I(inode)->ip_lock);
 		mlog(0, "inode (%llu) deleted, returning false\n",
 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		goto bail;
 	}
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 	/*
 	 * We don't need a cluster lock to test this because once an
 	 * inode nlink hits zero, it never goes back.
 	 */
 	if (inode->i_nlink == 0) {
 		mlog(0, "Inode %llu orphaned, returning false "
 		     "dir = %d\n",
 		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 		     S_ISDIR(inode->i_mode));
 		goto bail;
 	}
 	/*
 	 * If the last lookup failed to create dentry lock, let us
 	 * redo it.
 	 */
 	if (!dentry->d_fsdata) {
 		mlog(0, "Inode %llu doesn't have dentry lock, "
 		     "returning false\n",
 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 		goto bail;
 	}
 valid:
 	ret = 1;
 bail:
 	mlog_exit(ret);
 	return ret;
 }
 static int ocfs2_match_dentry(struct dentry *dentry,
 			      u64 parent_blkno,
 			      int skip_unhashed)
 {
 	struct inode *parent;
 	/*
 	 * ocfs2_lookup() does a d_splice_alias() _before_ attaching
 	 * to the lock data, so we skip those here, otherwise
 	 * ocfs2_dentry_attach_lock() will get its original dentry
 	 * back.
 	 */
 	if (!dentry->d_fsdata)
 		return 0;
 	if (!dentry->d_parent)
 		return 0;
 	if (skip_unhashed && d_unhashed(dentry))
 		return 0;
 	parent = dentry->d_parent->d_inode;
 	/* Negative parent dentry? */
 	if (!parent)
 		return 0;
 	/* Name is in a different directory. */
 	if (OCFS2_I(parent)->ip_blkno != parent_blkno)
 		return 0;
 	return 1;
 }
 /*
  * Walk the inode alias list, and find a dentry which has a given
  * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
  * is looking for a dentry_lock reference. The downconvert thread is
  * looking to unhash aliases, so we allow it to skip any that already
  * have that property.
  */
 struct dentry *ocfs2_find_local_alias(struct inode *inode,
 				      u64 parent_blkno,
 				      int skip_unhashed)
 {
 	struct list_head *p;
 	struct dentry *dentry = NULL;
 	spin_lock(&dcache_lock);
 	list_for_each(p, &inode->i_dentry) {
 		dentry = list_entry(p, struct dentry, d_alias);
+		spin_lock(&dentry->d_lock);
 		if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
 			mlog(0, "dentry found: %.*s\n",
 			     dentry->d_name.len, dentry->d_name.name);
-			dget_locked(dentry);
+			dget_locked_dlock(dentry);
+			spin_unlock(&dentry->d_lock);
 			break;
 		}
+		spin_unlock(&dentry->d_lock);
 		dentry = NULL;
 	}
 	spin_unlock(&dcache_lock);
 	return dentry;
 }
 DEFINE_SPINLOCK(dentry_attach_lock);
 /*
  * Attach this dentry to a cluster lock.
  *
  * Dentry locks cover all links in a given directory to a particular
  * inode. We do this so that ocfs2 can build a lock name which all
  * nodes in the cluster can agree on at all times. Shoving full names
  * in the cluster lock won't work due to size restrictions. Covering
  * links inside of a directory is a good compromise because it still
  * allows us to use the parent directory lock to synchronize
  * operations.
  *
  * Call this function with the parent dir semaphore and the parent dir
  * cluster lock held.
  *
  * The dir semaphore will protect us from having to worry about
  * concurrent processes on our node trying to attach a lock at the
  * same time.
  *
  * The dir cluster lock (held at either PR or EX mode) protects us
  * from unlink and rename on other nodes.
  *
  * A dput() can happen asynchronously due to pruning, so we cover
  * attaching and detaching the dentry lock with a
  * dentry_attach_lock.
  *
  * A node which has done lookup on a name retains a protected read
  * lock until final dput. If the user requests and unlink or rename,
  * the protected read is upgraded to an exclusive lock. Other nodes
  * who have seen the dentry will then be informed that they need to
  * downgrade their lock, which will involve d_delete on the
  * dentry. This happens in ocfs2_dentry_convert_worker().
  */
 int ocfs2_dentry_attach_lock(struct dentry *dentry,
 			     struct inode *inode,
 			     u64 parent_blkno)
 {
 	int ret;
 	struct dentry *alias;
 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
 	mlog(0, "Attach \"%.*s\", parent %llu, fsdata: %p\n",
 	     dentry->d_name.len, dentry->d_name.name,
 	     (unsigned long long)parent_blkno, dl);
 	/*
 	 * Negative dentry. We ignore these for now.
 	 *
 	 * XXX: Could we can improve ocfs2_dentry_revalidate() by
 	 * tracking these?
 	 */
 	if (!inode)
 		return 0;
 	if (!dentry->d_inode && dentry->d_fsdata) {
 		/* Converting a negative dentry to positive
 		   Clear dentry->d_fsdata */
 		dentry->d_fsdata = dl = NULL;
 	}
 	if (dl) {
 		mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
 				" \"%.*s\": old parent: %llu, new: %llu\n",
 				dentry->d_name.len, dentry->d_name.name,
 				(unsigned long long)parent_blkno,
 				(unsigned long long)dl->dl_parent_blkno);
 		return 0;
 	}
 	alias = ocfs2_find_local_alias(inode, parent_blkno, 0);
 	if (alias) {
 		/*
 		 * Great, an alias exists, which means we must have a
 		 * dentry lock already. We can just grab the lock off
 		 * the alias and add it to the list.
 		 *
 		 * We're depending here on the fact that this dentry
 		 * was found and exists in the dcache and so must have
 		 * a reference to the dentry_lock because we can't
 		 * race creates. Final dput() cannot happen on it
 		 * since we have it pinned, so our reference is safe.
 		 */
 		dl = alias->d_fsdata;
 		mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n",
 				(unsigned long long)parent_blkno,
 				(unsigned long long)OCFS2_I(inode)->ip_blkno);
 		mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
 				" \"%.*s\": old parent: %llu, new: %llu\n",
 				dentry->d_name.len, dentry->d_name.name,
 				(unsigned long long)parent_blkno,
 				(unsigned long long)dl->dl_parent_blkno);
 		mlog(0, "Found: %s\n", dl->dl_lockres.l_name);
 		goto out_attach;
 	}
 	/*
 	 * There are no other aliases
 	 */
 	dl = kmalloc(sizeof(*dl), GFP_NOFS);
 	if (!dl) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
 		return ret;
 	}
 	dl->dl_count = 0;
 	/*
 	 * Does this have to happen below, for all attaches, in case
 	 * the struct inode gets blown away by the downconvert thread?
 	 */
 	dl->dl_inode = igrab(inode);
 	dl->dl_parent_blkno = parent_blkno;
 	ocfs2_dentry_lock_res_init(dl, parent_blkno, inode);
 out_attach:
 	spin_lock(&dentry_attach_lock);
 	dentry->d_fsdata = dl;
 	dl->dl_count++;
 	spin_unlock(&dentry_attach_lock);
 	/*
 	 * This actually gets us our PRMODE level lock. From now on,
 	 * we'll have a notification if one of these names is
 	 * destroyed on another node.
 	 */
 	ret = ocfs2_dentry_lock(dentry, 0);
 	if (!ret)
 		ocfs2_dentry_unlock(dentry, 0);
 	else
 		mlog_errno(ret);
 	/*
 	 * In case of error, manually free the allocation and do the iput().
 	 * We need to do this because error here means no d_instantiate(),
 	 * which means iput() will not be called during dput(dentry).
 	 */
 	if (ret < 0 && !alias) {
 		ocfs2_lock_res_free(&dl->dl_lockres);
 		BUG_ON(dl->dl_count != 1);
 		spin_lock(&dentry_attach_lock);
 		dentry->d_fsdata = NULL;
 		spin_unlock(&dentry_attach_lock);
 		kfree(dl);
 		iput(inode);
 	}
 	dput(alias);
 	return ret;
 }
 DEFINE_SPINLOCK(dentry_list_lock);
 /* We limit the number of dentry locks to drop in one go. We have
  * this limit so that we don't starve other users of ocfs2_wq. */
 #define DL_INODE_DROP_COUNT 64
 /* Drop inode references from dentry locks */
 static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
 {
 	struct ocfs2_dentry_lock *dl;
 	spin_lock(&dentry_list_lock);
 	while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
 		dl = osb->dentry_lock_list;
 		osb->dentry_lock_list = dl->dl_next;
 		spin_unlock(&dentry_list_lock);
 		iput(dl->dl_inode);
 		kfree(dl);
 		spin_lock(&dentry_list_lock);
 	}
 	spin_unlock(&dentry_list_lock);
 }
 void ocfs2_drop_dl_inodes(struct work_struct *work)
 {
 	struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
 					       dentry_lock_work);
 	__ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
 	/*
 	 * Don't queue dropping if umount is in progress. We flush the
 	 * list in ocfs2_dismount_volume
 	 */
 	spin_lock(&dentry_list_lock);
 	if (osb->dentry_lock_list &&
 	    !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
 		queue_work(ocfs2_wq, &osb->dentry_lock_work);
 	spin_unlock(&dentry_list_lock);
 }
 /* Flush the whole work queue */
 void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
 {
 	__ocfs2_drop_dl_inodes(osb, -1);
 }
 /*
  * ocfs2_dentry_iput() and friends.
  *
  * At this point, our particular dentry is detached from the inodes
  * alias list, so there's no way that the locking code can find it.
  *
  * The interesting stuff happens when we determine that our lock needs
  * to go away because this is the last subdir alias in the
  * system. This function needs to handle a couple things:
  *
  * 1) Synchronizing lock shutdown with the downconvert threads. This
  *    is already handled for us via the lockres release drop function
  *    called in ocfs2_release_dentry_lock()
  *
  * 2) A race may occur when we're doing our lock shutdown and
  *    another process wants to create a new dentry lock. Right now we
  *    let them race, which means that for a very short while, this
  *    node might have two locks on a lock resource. This should be a
  *    problem though because one of them is in the process of being
  *    thrown out.
  */
 static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
 				   struct ocfs2_dentry_lock *dl)
 {
 	ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
 	ocfs2_lock_res_free(&dl->dl_lockres);
 	/* We leave dropping of inode reference to ocfs2_wq as that can
 	 * possibly lead to inode deletion which gets tricky */
 	spin_lock(&dentry_list_lock);
 	if (!osb->dentry_lock_list &&
 	    !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
 		queue_work(ocfs2_wq, &osb->dentry_lock_work);
 	dl->dl_next = osb->dentry_lock_list;
 	osb->dentry_lock_list = dl;
 	spin_unlock(&dentry_list_lock);
 }
 void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
 			   struct ocfs2_dentry_lock *dl)
 {
 	int unlock;
 	BUG_ON(dl->dl_count == 0);
 	spin_lock(&dentry_attach_lock);
 	dl->dl_count--;
 	unlock = !dl->dl_count;
 	spin_unlock(&dentry_attach_lock);
 	if (unlock)
 		ocfs2_drop_dentry_lock(osb, dl);
 }
 static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
 {
 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
 	if (!dl) {
 		/*
 		 * No dentry lock is ok if we're disconnected or
 		 * unhashed.
 		 */
 		if (!(dentry->d_flags & DCACHE_DISCONNECTED) &&
 		    !d_unhashed(dentry)) {
 			unsigned long long ino = 0ULL;
 			if (inode)
 				ino = (unsigned long long)OCFS2_I(inode)->ip_blkno;
 			mlog(ML_ERROR, "Dentry is missing cluster lock. "
 			     "inode: %llu, d_flags: 0x%x, d_name: %.*s\n",
 			     ino, dentry->d_flags, dentry->d_name.len,
 			     dentry->d_name.name);
 		}
 		goto out;
 	}
 	mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n",
 			dentry->d_name.len, dentry->d_name.name,
 			dl->dl_count);
 	ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl);
 out:
 	iput(inode);
 }
 /*
  * d_move(), but keep the locks in sync.
  *
  * When we are done, "dentry" will have the parent dir and name of
  * "target", which will be thrown away.
  *
  * We manually update the lock of "dentry" if need be.
  *
  * "target" doesn't have it's dentry lock touched - we allow the later
  * dput() to handle this for us.
  *
  * This is called during ocfs2_rename(), while holding parent
  * directory locks. The dentries have already been deleted on other
  * nodes via ocfs2_remote_dentry_delete().
  *
  * Normally, the VFS handles the d_move() for the file system, after
  * the ->rename() callback. OCFS2 wants to handle this internally, so
  * the new lock can be created atomically with respect to the cluster.
  */
 void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
 		       struct inode *old_dir, struct inode *new_dir)
 {
 	int ret;
 	struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb);
 	struct inode *inode = dentry->d_inode;
 	/*
 	 * Move within the same directory, so the actual lock info won't
 	 * change.
 	 *
 	 * XXX: Is there any advantage to dropping the lock here?
 	 */
 	if (old_dir == new_dir)
 		goto out_move;
 	ocfs2_dentry_lock_put(osb, dentry->d_fsdata);
 	dentry->d_fsdata = NULL;
 	ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno);
 	if (ret)
 		mlog_errno(ret);
 out_move:
 	d_move(dentry, target);
 }
 const struct dentry_operations ocfs2_dentry_ops = {
 	.d_revalidate		= ocfs2_dentry_revalidate,
 	.d_iput			= ocfs2_dentry_iput,
 };

security/tomoyo/realpath.c

Diff comments View file @ da50295

 /*
  * security/tomoyo/realpath.c
  *
  * Pathname calculation functions for TOMOYO.
  *
  * Copyright (C) 2005-2010  NTT DATA CORPORATION
  */
 #include <linux/types.h>
 #include <linux/mount.h>
 #include <linux/mnt_namespace.h>
 #include <linux/fs_struct.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
 #include <net/sock.h>
 #include "common.h"
+#include "../../fs/internal.h"
 /**
  * tomoyo_encode: Convert binary string to ascii string.
  *
  * @str: String in binary format.
  *
  * Returns pointer to @str in ascii format on success, NULL otherwise.
  *
  * This function uses kzalloc(), so caller must kfree() if this function
  * didn't return NULL.
  */
 char *tomoyo_encode(const char *str)
 {
 	int len = 0;
 	const char *p = str;
 	char *cp;
 	char *cp0;
 	if (!p)
 		return NULL;
 	while (*p) {
 		const unsigned char c = *p++;
 		if (c == '\\')
 			len += 2;
 		else if (c > ' ' && c < 127)
 			len++;
 		else
 			len += 4;
 	}
 	len++;
 	/* Reserve space for appending "/". */
 	cp = kzalloc(len + 10, GFP_NOFS);
 	if (!cp)
 		return NULL;
 	cp0 = cp;
 	p = str;
 	while (*p) {
 		const unsigned char c = *p++;
 		if (c == '\\') {
 			*cp++ = '\\';
 			*cp++ = '\\';
 		} else if (c > ' ' && c < 127) {
 			*cp++ = c;
 		} else {
 			*cp++ = '\\';
 			*cp++ = (c >> 6) + '0';
 			*cp++ = ((c >> 3) & 7) + '0';
 			*cp++ = (c & 7) + '0';
 		}
 	}
 	return cp0;
 }
 /**
  * tomoyo_realpath_from_path - Returns realpath(3) of the given pathname but ignores chroot'ed root.
  *
  * @path: Pointer to "struct path".
  *
  * Returns the realpath of the given @path on success, NULL otherwise.
  *
  * If dentry is a directory, trailing '/' is appended.
  * Characters out of 0x20 < c < 0x7F range are converted to
  * \ooo style octal string.
  * Character \ is converted to \\ string.
  *
  * These functions use kzalloc(), so the caller must call kfree()
  * if these functions didn't return NULL.
  */
 char *tomoyo_realpath_from_path(struct path *path)
 {
 	char *buf = NULL;
 	char *name = NULL;
 	unsigned int buf_len = PAGE_SIZE / 2;
 	struct dentry *dentry = path->dentry;
 	bool is_dir;
 	if (!dentry)
 		return NULL;
 	is_dir = dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode);
 	while (1) {
 		struct path ns_root = { .mnt = NULL, .dentry = NULL };
 		char *pos;
 		buf_len <<= 1;
 		kfree(buf);
 		buf = kmalloc(buf_len, GFP_NOFS);
 		if (!buf)
 			break;
 		/* Get better name for socket. */
 		if (dentry->d_sb && dentry->d_sb->s_magic == SOCKFS_MAGIC) {
 			struct inode *inode = dentry->d_inode;
 			struct socket *sock = inode ? SOCKET_I(inode) : NULL;
 			struct sock *sk = sock ? sock->sk : NULL;
 			if (sk) {
 				snprintf(buf, buf_len - 1, "socket:[family=%u:"
 					 "type=%u:protocol=%u]", sk->sk_family,
 					 sk->sk_type, sk->sk_protocol);
 			} else {
 				snprintf(buf, buf_len - 1, "socket:[unknown]");
 			}
 			name = tomoyo_encode(buf);
 			break;
 		}
 		/* For "socket:[\$]" and "pipe:[\$]". */
 		if (dentry->d_op && dentry->d_op->d_dname) {
 			pos = dentry->d_op->d_dname(dentry, buf, buf_len - 1);
 			if (IS_ERR(pos))
 				continue;
 			name = tomoyo_encode(pos);
 			break;
 		}
 		/* If we don't have a vfsmount, we can't calculate. */
 		if (!path->mnt)
 			break;
 		/* go to whatever namespace root we are under */
 		pos = __d_path(path, &ns_root, buf, buf_len);
 		/* Prepend "/proc" prefix if using internal proc vfs mount. */
 		if (!IS_ERR(pos) && (path->mnt->mnt_flags & MNT_INTERNAL) &&
 		    (path->mnt->mnt_sb->s_magic == PROC_SUPER_MAGIC)) {
 			pos -= 5;
 			if (pos >= buf)
 				memcpy(pos, "/proc", 5);
 			else
 				pos = ERR_PTR(-ENOMEM);
 		}
 		if (IS_ERR(pos))
 			continue;
 		name = tomoyo_encode(pos);
 		break;
 	}
 	kfree(buf);
 	if (!name)
 		tomoyo_warn_oom(__func__);
 	else if (is_dir && *name) {
 		/* Append trailing '/' if dentry is a directory. */
 		char *pos = name + strlen(name) - 1;
 		if (*pos != '/')
 			/*
 			 * This is OK because tomoyo_encode() reserves space
 			 * for appending "/".
 			 */
 			*++pos = '/';
 	}
 	return name;
 }
 /**
  * tomoyo_realpath_nofollow - Get realpath of a pathname.
  *
  * @pathname: The pathname to solve.
  *
  * Returns the realpath of @pathname on success, NULL otherwise.
  */
 char *tomoyo_realpath_nofollow(const char *pathname)
 {
 	struct path path;
 	if (pathname && kern_path(pathname, 0, &path) == 0) {
 		char *buf = tomoyo_realpath_from_path(&path);
 		path_put(&path);
 		return buf;
 	}
 	return NULL;
 }