Commit ce8d2cdf3d2b73e346c82e6f0a46da331df6364c

Authored by Dave Hansen
Committed by Linus Torvalds
1 parent 348366b963

r/o bind mounts: filesystem helpers for custom 'struct file's

Why do we need r/o bind mounts?

This feature allows a read-only view into a read-write filesystem.  In the
process of doing that, it also provides infrastructure for keeping track of
the number of writers to any given mount.

This has a number of uses.  It allows chroots to have parts of filesystems
writable.  It will be useful for containers in the future because users may
have root inside a container, but should not be allowed to write to
somefilesystems.  This also replaces patches that vserver has had out of the
tree for several years.

It allows security enhancement by making sure that parts of your filesystem
read-only (such as when you don't trust your FTP server), when you don't want
to have entire new filesystems mounted, or when you want atime selectively
updated.  I've been using the following script to test that the feature is
working as desired.  It takes a directory and makes a regular bind and a r/o
bind mount of it.  It then performs some normal filesystem operations on the
three directories, including ones that are expected to fail, like creating a
file on the r/o mount.

This patch:

Some filesystems forego the vfs and may_open() and create their own 'struct
file's.

This patch creates a couple of helper functions which can be used by these
filesystems, and will provide a unified place which the r/o bind mount code
may patch.

Also, rename an existing, static-scope init_file() to a less generic name.

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 8 changed files with 104 additions and 49 deletions Side-by-side Diff

... ... @@ -142,7 +142,7 @@
142 142 return 0;
143 143 }
144 144  
145   -static int init_file(struct inode * inode)
  145 +static int configfs_init_file(struct inode * inode)
146 146 {
147 147 inode->i_size = PAGE_SIZE;
148 148 inode->i_fop = &configfs_file_operations;
... ... @@ -283,7 +283,8 @@
283 283  
284 284 dentry->d_fsdata = configfs_get(sd);
285 285 sd->s_dentry = dentry;
286   - error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG, init_file);
  286 + error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG,
  287 + configfs_init_file);
287 288 if (error) {
288 289 configfs_put(sd);
289 290 return error;
... ... @@ -137,6 +137,66 @@
137 137  
138 138 EXPORT_SYMBOL(get_empty_filp);
139 139  
  140 +/**
  141 + * alloc_file - allocate and initialize a 'struct file'
  142 + * @mnt: the vfsmount on which the file will reside
  143 + * @dentry: the dentry representing the new file
  144 + * @mode: the mode with which the new file will be opened
  145 + * @fop: the 'struct file_operations' for the new file
  146 + *
  147 + * Use this instead of get_empty_filp() to get a new
  148 + * 'struct file'. Do so because of the same initialization
  149 + * pitfalls reasons listed for init_file(). This is a
  150 + * preferred interface to using init_file().
  151 + *
  152 + * If all the callers of init_file() are eliminated, its
  153 + * code should be moved into this function.
  154 + */
  155 +struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
  156 + mode_t mode, const struct file_operations *fop)
  157 +{
  158 + struct file *file;
  159 + struct path;
  160 +
  161 + file = get_empty_filp();
  162 + if (!file)
  163 + return NULL;
  164 +
  165 + init_file(file, mnt, dentry, mode, fop);
  166 + return file;
  167 +}
  168 +EXPORT_SYMBOL(alloc_file);
  169 +
  170 +/**
  171 + * init_file - initialize a 'struct file'
  172 + * @file: the already allocated 'struct file' to initialized
  173 + * @mnt: the vfsmount on which the file resides
  174 + * @dentry: the dentry representing this file
  175 + * @mode: the mode the file is opened with
  176 + * @fop: the 'struct file_operations' for this file
  177 + *
  178 + * Use this instead of setting the members directly. Doing so
  179 + * avoids making mistakes like forgetting the mntget() or
  180 + * forgetting to take a write on the mnt.
  181 + *
  182 + * Note: This is a crappy interface. It is here to make
  183 + * merging with the existing users of get_empty_filp()
  184 + * who have complex failure logic easier. All users
  185 + * of this should be moving to alloc_file().
  186 + */
  187 +int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
  188 + mode_t mode, const struct file_operations *fop)
  189 +{
  190 + int error = 0;
  191 + file->f_path.dentry = dentry;
  192 + file->f_path.mnt = mntget(mnt);
  193 + file->f_mapping = dentry->d_inode->i_mapping;
  194 + file->f_mode = mode;
  195 + file->f_op = fop;
  196 + return error;
  197 +}
  198 +EXPORT_SYMBOL(init_file);
  199 +
140 200 void fastcall fput(struct file *file)
141 201 {
142 202 if (atomic_dec_and_test(&file->f_count))
fs/hugetlbfs/inode.c
... ... @@ -933,16 +933,11 @@
933 933 if (!dentry)
934 934 goto out_shm_unlock;
935 935  
936   - error = -ENFILE;
937   - file = get_empty_filp();
938   - if (!file)
939   - goto out_dentry;
940   -
941 936 error = -ENOSPC;
942 937 inode = hugetlbfs_get_inode(root->d_sb, current->fsuid,
943 938 current->fsgid, S_IFREG | S_IRWXUGO, 0);
944 939 if (!inode)
945   - goto out_file;
  940 + goto out_dentry;
946 941  
947 942 error = -ENOMEM;
948 943 if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT))
949 944  
... ... @@ -951,17 +946,18 @@
951 946 d_instantiate(dentry, inode);
952 947 inode->i_size = size;
953 948 inode->i_nlink = 0;
954   - file->f_path.mnt = mntget(hugetlbfs_vfsmount);
955   - file->f_path.dentry = dentry;
956   - file->f_mapping = inode->i_mapping;
957   - file->f_op = &hugetlbfs_file_operations;
958   - file->f_mode = FMODE_WRITE | FMODE_READ;
  949 +
  950 + error = -ENFILE;
  951 + file = alloc_file(hugetlbfs_vfsmount, dentry,
  952 + FMODE_WRITE | FMODE_READ,
  953 + &hugetlbfs_file_operations);
  954 + if (!file)
  955 + goto out_inode;
  956 +
959 957 return file;
960 958  
961 959 out_inode:
962 960 iput(inode);
963   -out_file:
964   - put_filp(file);
965 961 out_dentry:
966 962 dput(dentry);
967 963 out_shm_unlock:
include/linux/file.h
... ... @@ -62,6 +62,15 @@
62 62 extern void FASTCALL(__fput(struct file *));
63 63 extern void FASTCALL(fput(struct file *));
64 64  
  65 +struct file_operations;
  66 +struct vfsmount;
  67 +struct dentry;
  68 +extern int init_file(struct file *, struct vfsmount *mnt,
  69 + struct dentry *dentry, mode_t mode,
  70 + const struct file_operations *fop);
  71 +extern struct file *alloc_file(struct vfsmount *, struct dentry *dentry,
  72 + mode_t mode, const struct file_operations *fop);
  73 +
65 74 static inline void fput_light(struct file *file, int fput_needed)
66 75 {
67 76 if (unlikely(fput_needed))
... ... @@ -907,7 +907,7 @@
907 907 goto out_unlock;
908 908  
909 909 path.dentry = dget(shp->shm_file->f_path.dentry);
910   - path.mnt = mntget(shp->shm_file->f_path.mnt);
  910 + path.mnt = shp->shm_file->f_path.mnt;
911 911 shp->shm_nattch++;
912 912 size = i_size_read(path.dentry->d_inode);
913 913 shm_unlock(shp);
914 914  
915 915  
916 916  
917 917  
... ... @@ -915,18 +915,16 @@
915 915 err = -ENOMEM;
916 916 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
917 917 if (!sfd)
918   - goto out_put_path;
  918 + goto out_put_dentry;
919 919  
920 920 err = -ENOMEM;
921   - file = get_empty_filp();
  921 +
  922 + file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations);
922 923 if (!file)
923 924 goto out_free;
924 925  
925   - file->f_op = &shm_file_operations;
926 926 file->private_data = sfd;
927   - file->f_path = path;
928 927 file->f_mapping = shp->shm_file->f_mapping;
929   - file->f_mode = f_mode;
930 928 sfd->id = shp->id;
931 929 sfd->ns = get_ipc_ns(ns);
932 930 sfd->file = shp->shm_file;
933 931  
... ... @@ -977,9 +975,8 @@
977 975  
978 976 out_free:
979 977 kfree(sfd);
980   -out_put_path:
  978 +out_put_dentry:
981 979 dput(path.dentry);
982   - mntput(path.mnt);
983 980 goto out_nattch;
984 981 }
985 982  
... ... @@ -2543,11 +2543,8 @@
2543 2543 d_instantiate(dentry, inode);
2544 2544 inode->i_size = size;
2545 2545 inode->i_nlink = 0; /* It is unlinked */
2546   - file->f_path.mnt = mntget(shm_mnt);
2547   - file->f_path.dentry = dentry;
2548   - file->f_mapping = inode->i_mapping;
2549   - file->f_op = &shmem_file_operations;
2550   - file->f_mode = FMODE_WRITE | FMODE_READ;
  2546 + init_file(file, shm_mnt, dentry, FMODE_WRITE | FMODE_READ,
  2547 + &shmem_file_operations);
2551 2548 return file;
2552 2549  
2553 2550 close_file:
... ... @@ -66,24 +66,19 @@
66 66 if (!dentry)
67 67 goto put_memory;
68 68  
69   - error = -ENFILE;
70   - file = get_empty_filp();
71   - if (!file)
72   - goto put_dentry;
73   -
74 69 error = -ENOSPC;
75 70 inode = ramfs_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
76 71 if (!inode)
77   - goto close_file;
  72 + goto put_dentry;
78 73  
79 74 d_instantiate(dentry, inode);
80   - inode->i_nlink = 0; /* It is unlinked */
  75 + error = -ENFILE;
  76 + file = alloc_file(shm_mnt, dentry, FMODE_WRITE | FMODE_READ,
  77 + &ramfs_file_operations);
  78 + if (!file)
  79 + goto put_dentry;
81 80  
82   - file->f_path.mnt = mntget(shm_mnt);
83   - file->f_path.dentry = dentry;
84   - file->f_mapping = inode->i_mapping;
85   - file->f_op = &ramfs_file_operations;
86   - file->f_mode = FMODE_WRITE | FMODE_READ;
  81 + inode->i_nlink = 0; /* It is unlinked */
87 82  
88 83 /* notify everyone as to the change of file size */
89 84 error = do_truncate(dentry, size, 0, file);
... ... @@ -364,26 +364,26 @@
364 364  
365 365 static int sock_attach_fd(struct socket *sock, struct file *file)
366 366 {
  367 + struct dentry *dentry;
367 368 struct qstr name = { .name = "" };
368 369  
369   - file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
370   - if (unlikely(!file->f_path.dentry))
  370 + dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
  371 + if (unlikely(!dentry))
371 372 return -ENOMEM;
372 373  
373   - file->f_path.dentry->d_op = &sockfs_dentry_operations;
  374 + dentry->d_op = &sockfs_dentry_operations;
374 375 /*
375 376 * We dont want to push this dentry into global dentry hash table.
376 377 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
377 378 * This permits a working /proc/$pid/fd/XXX on sockets
378 379 */
379   - file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
380   - d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
381   - file->f_path.mnt = mntget(sock_mnt);
382   - file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
  380 + dentry->d_flags &= ~DCACHE_UNHASHED;
  381 + d_instantiate(dentry, SOCK_INODE(sock));
383 382  
384 383 sock->file = file;
385   - file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
386   - file->f_mode = FMODE_READ | FMODE_WRITE;
  384 + init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
  385 + &socket_file_ops);
  386 + SOCK_INODE(sock)->i_fop = &socket_file_ops;
387 387 file->f_flags = O_RDWR;
388 388 file->f_pos = 0;
389 389 file->private_data = sock;