Commit 3064c3563ba4c23e2c7a47254ec056ed9ba0098a

Authored by Al Viro
1 parent 8fa1f1c2bd

death to mnt_pinned

Rather than playing silly buggers with vfsmount refcounts, just have
acct_on() ask fs/namespace.c for internal clone of file->f_path.mnt
and replace it with said clone.  Then attach the pin to original
vfsmount.  Voila - the clone will be alive until the file gets closed,
making sure that underlying superblock remains active, etc., and
we can drop the original vfsmount, so that it's not kept busy.
If the file lives until the final mntput of the original vfsmount,
we'll notice that there's an fs_pin (one in bsd_acct_struct that
holds that file) and mnt_pin_kill() will take it out.  Since
->kill() is synchronous, we won't proceed past that point until
these files are closed (and private clones of our vfsmount are
gone), so we get the same ordering warranties we used to get.

mnt_pin()/mnt_unpin()/->mnt_pinned is gone now, and good riddance -
it never became usable outside of kernel/acct.c (and racy wrt
umount even there).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Showing 4 changed files with 30 additions and 34 deletions Side-by-side Diff

... ... @@ -55,7 +55,6 @@
55 55 int mnt_id; /* mount identifier */
56 56 int mnt_group_id; /* peer group identifier */
57 57 int mnt_expiry_mark; /* true if marked for expiry */
58   - int mnt_pinned;
59 58 struct hlist_head mnt_pins;
60 59 struct path mnt_ex_mountpoint;
61 60 };
... ... @@ -937,7 +937,6 @@
937 937  
938 938 static void mntput_no_expire(struct mount *mnt)
939 939 {
940   -put_again:
941 940 rcu_read_lock();
942 941 mnt_add_count(mnt, -1);
943 942 if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
... ... @@ -950,14 +949,6 @@
950 949 unlock_mount_hash();
951 950 return;
952 951 }
953   - if (unlikely(mnt->mnt_pinned)) {
954   - mnt_add_count(mnt, mnt->mnt_pinned + 1);
955   - mnt->mnt_pinned = 0;
956   - rcu_read_unlock();
957   - unlock_mount_hash();
958   - mnt_pin_kill(mnt);
959   - goto put_again;
960   - }
961 952 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
962 953 rcu_read_unlock();
963 954 unlock_mount_hash();
... ... @@ -980,6 +971,8 @@
980 971 * so mnt_get_writers() below is safe.
981 972 */
982 973 WARN_ON(mnt_get_writers(mnt));
  974 + if (unlikely(mnt->mnt_pins.first))
  975 + mnt_pin_kill(mnt);
983 976 fsnotify_vfsmount_delete(&mnt->mnt);
984 977 dput(mnt->mnt.mnt_root);
985 978 deactivate_super(mnt->mnt.mnt_sb);
986 979  
987 980  
... ... @@ -1007,25 +1000,15 @@
1007 1000 }
1008 1001 EXPORT_SYMBOL(mntget);
1009 1002  
1010   -void mnt_pin(struct vfsmount *mnt)
  1003 +struct vfsmount *mnt_clone_internal(struct path *path)
1011 1004 {
1012   - lock_mount_hash();
1013   - real_mount(mnt)->mnt_pinned++;
1014   - unlock_mount_hash();
  1005 + struct mount *p;
  1006 + p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
  1007 + if (IS_ERR(p))
  1008 + return ERR_CAST(p);
  1009 + p->mnt.mnt_flags |= MNT_INTERNAL;
  1010 + return &p->mnt;
1015 1011 }
1016   -EXPORT_SYMBOL(mnt_pin);
1017   -
1018   -void mnt_unpin(struct vfsmount *m)
1019   -{
1020   - struct mount *mnt = real_mount(m);
1021   - lock_mount_hash();
1022   - if (mnt->mnt_pinned) {
1023   - mnt_add_count(mnt, 1);
1024   - mnt->mnt_pinned--;
1025   - }
1026   - unlock_mount_hash();
1027   -}
1028   -EXPORT_SYMBOL(mnt_unpin);
1029 1012  
1030 1013 static inline void mangle(struct seq_file *m, const char *s)
1031 1014 {
include/linux/mount.h
... ... @@ -62,6 +62,7 @@
62 62 };
63 63  
64 64 struct file; /* forward dec */
  65 +struct path;
65 66  
66 67 extern int mnt_want_write(struct vfsmount *mnt);
67 68 extern int mnt_want_write_file(struct file *file);
... ... @@ -70,8 +71,7 @@
70 71 extern void mnt_drop_write_file(struct file *file);
71 72 extern void mntput(struct vfsmount *mnt);
72 73 extern struct vfsmount *mntget(struct vfsmount *mnt);
73   -extern void mnt_pin(struct vfsmount *mnt);
74   -extern void mnt_unpin(struct vfsmount *mnt);
  74 +extern struct vfsmount *mnt_clone_internal(struct path *path);
75 75 extern int __mnt_is_readonly(struct vfsmount *mnt);
76 76  
77 77 struct file_system_type;
... ... @@ -154,7 +154,6 @@
154 154 {
155 155 struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
156 156 struct file *file = acct->file;
157   - mnt_unpin(file->f_path.mnt);
158 157 if (file->f_op->flush)
159 158 file->f_op->flush(file, NULL);
160 159 __fput_sync(file);
161 160  
... ... @@ -196,9 +195,10 @@
196 195 static int acct_on(struct filename *pathname)
197 196 {
198 197 struct file *file;
199   - struct vfsmount *mnt;
  198 + struct vfsmount *mnt, *internal;
200 199 struct pid_namespace *ns = task_active_pid_ns(current);
201 200 struct bsd_acct_struct *acct, *old;
  201 + int err;
202 202  
203 203 acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
204 204 if (!acct)
... ... @@ -222,6 +222,21 @@
222 222 filp_close(file, NULL);
223 223 return -EIO;
224 224 }
  225 + internal = mnt_clone_internal(&file->f_path);
  226 + if (IS_ERR(internal)) {
  227 + kfree(acct);
  228 + filp_close(file, NULL);
  229 + return PTR_ERR(internal);
  230 + }
  231 + err = mnt_want_write(internal);
  232 + if (err) {
  233 + mntput(internal);
  234 + kfree(acct);
  235 + filp_close(file, NULL);
  236 + return err;
  237 + }
  238 + mnt = file->f_path.mnt;
  239 + file->f_path.mnt = internal;
225 240  
226 241 atomic_long_set(&acct->pin.count, 1);
227 242 acct->pin.kill = acct_pin_kill;
... ... @@ -229,8 +244,6 @@
229 244 acct->needcheck = jiffies;
230 245 acct->ns = ns;
231 246 mutex_init(&acct->lock);
232   - mnt = file->f_path.mnt;
233   - mnt_pin(mnt);
234 247 mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */
235 248 pin_insert(&acct->pin, mnt);
236 249  
... ... @@ -240,7 +253,8 @@
240 253 else
241 254 ns->bacct = acct;
242 255 mutex_unlock(&acct->lock);
243   - mntput(mnt); /* it's pinned, now give up active reference */
  256 + mnt_drop_write(mnt);
  257 + mntput(mnt);
244 258 return 0;
245 259 }
246 260