Blame view

fs/namespace.c 57.2 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
  /*
   *  linux/fs/namespace.c
   *
   * (C) Copyright Al Viro 2000, 2001
   *	Released under GPL v2.
   *
   * Based on code from fs/super.c, copyright Linus Torvalds and others.
   * Heavily rewritten.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
11
12
13
14
  #include <linux/syscalls.h>
  #include <linux/slab.h>
  #include <linux/sched.h>
  #include <linux/smp_lock.h>
  #include <linux/init.h>
15a67dd8c   Randy Dunlap   [PATCH] fs/namesp...
15
  #include <linux/kernel.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
  #include <linux/acct.h>
16f7e0fe2   Randy Dunlap   [PATCH] capable/c...
17
  #include <linux/capability.h>
3d733633a   Dave Hansen   [PATCH] r/o bind ...
18
  #include <linux/cpumask.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
  #include <linux/module.h>
f20a9ead0   Andrew Morton   sysfs: add proper...
20
  #include <linux/sysfs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21
  #include <linux/seq_file.h>
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
22
  #include <linux/mnt_namespace.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23
24
25
  #include <linux/namei.h>
  #include <linux/security.h>
  #include <linux/mount.h>
07f3f05c1   David Howells   [PATCH] BLOCK: Mo...
26
  #include <linux/ramfs.h>
13f14b4d8   Eric Dumazet   Use ilog2() in fs...
27
  #include <linux/log2.h>
73cd49ecd   Miklos Szeredi   [patch 3/7] vfs: ...
28
  #include <linux/idr.h>
5ad4e53bd   Al Viro   Get rid of indire...
29
  #include <linux/fs_struct.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
31
  #include <asm/uaccess.h>
  #include <asm/unistd.h>
07b20889e   Ram Pai   [PATCH] beginning...
32
  #include "pnode.h"
948730b0e   Adrian Bunk   fs/namespace.c sh...
33
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
34

13f14b4d8   Eric Dumazet   Use ilog2() in fs...
35
36
  #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
  #define HASH_SIZE (1UL << HASH_SHIFT)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37
  /* spinlock for vfsmount related operations, inplace of dcache_lock */
5addc5dd8   Al Viro   [PATCH] make /pro...
38
39
40
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
  
  static int event;
73cd49ecd   Miklos Szeredi   [patch 3/7] vfs: ...
41
  static DEFINE_IDA(mnt_id_ida);
719f5d7f0   Miklos Szeredi   [patch 4/7] vfs: ...
42
  static DEFINE_IDA(mnt_group_ida);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
43

fa3536cc1   Eric Dumazet   [PATCH] Use __rea...
44
  static struct list_head *mount_hashtable __read_mostly;
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
45
  static struct kmem_cache *mnt_cache __read_mostly;
390c68436   Ram Pai   [PATCH] making na...
46
  static struct rw_semaphore namespace_sem;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
47

f87fd4c2a   Miklos Szeredi   [PATCH] add /sys/fs
48
  /* /sys/fs */
00d266662   Greg Kroah-Hartman   kobject: convert ...
49
50
  struct kobject *fs_kobj;
  EXPORT_SYMBOL_GPL(fs_kobj);
f87fd4c2a   Miklos Szeredi   [PATCH] add /sys/fs
51

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
53
  static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
  {
b58fed8b1   Ram Pai   [PATCH] lindent f...
54
55
  	unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
  	tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
13f14b4d8   Eric Dumazet   Use ilog2() in fs...
56
57
  	tmp = tmp + (tmp >> HASH_SHIFT);
  	return tmp & (HASH_SIZE - 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
58
  }
3d733633a   Dave Hansen   [PATCH] r/o bind ...
59
  #define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
73cd49ecd   Miklos Szeredi   [patch 3/7] vfs: ...
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
  /* allocation is serialized by namespace_sem */
  static int mnt_alloc_id(struct vfsmount *mnt)
  {
  	int res;
  
  retry:
  	ida_pre_get(&mnt_id_ida, GFP_KERNEL);
  	spin_lock(&vfsmount_lock);
  	res = ida_get_new(&mnt_id_ida, &mnt->mnt_id);
  	spin_unlock(&vfsmount_lock);
  	if (res == -EAGAIN)
  		goto retry;
  
  	return res;
  }
  
  static void mnt_free_id(struct vfsmount *mnt)
  {
  	spin_lock(&vfsmount_lock);
  	ida_remove(&mnt_id_ida, mnt->mnt_id);
  	spin_unlock(&vfsmount_lock);
  }
719f5d7f0   Miklos Szeredi   [patch 4/7] vfs: ...
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
  /*
   * Allocate a new peer group ID
   *
   * mnt_group_ida is protected by namespace_sem
   */
  static int mnt_alloc_group_id(struct vfsmount *mnt)
  {
  	if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
  		return -ENOMEM;
  
  	return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id);
  }
  
  /*
   * Release a peer group ID
   */
  void mnt_release_group_id(struct vfsmount *mnt)
  {
  	ida_remove(&mnt_group_ida, mnt->mnt_group_id);
  	mnt->mnt_group_id = 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
103
104
  struct vfsmount *alloc_vfsmnt(const char *name)
  {
c37622296   Robert P. J. Day   [PATCH] Transform...
105
  	struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
106
  	if (mnt) {
73cd49ecd   Miklos Szeredi   [patch 3/7] vfs: ...
107
108
109
  		int err;
  
  		err = mnt_alloc_id(mnt);
88b387824   Li Zefan   [PATCH] vfs: use ...
110
111
112
113
114
115
116
  		if (err)
  			goto out_free_cache;
  
  		if (name) {
  			mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
  			if (!mnt->mnt_devname)
  				goto out_free_id;
73cd49ecd   Miklos Szeredi   [patch 3/7] vfs: ...
117
  		}
b58fed8b1   Ram Pai   [PATCH] lindent f...
118
  		atomic_set(&mnt->mnt_count, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
119
120
121
122
  		INIT_LIST_HEAD(&mnt->mnt_hash);
  		INIT_LIST_HEAD(&mnt->mnt_child);
  		INIT_LIST_HEAD(&mnt->mnt_mounts);
  		INIT_LIST_HEAD(&mnt->mnt_list);
55e700b92   Miklos Szeredi   [PATCH] namespace...
123
  		INIT_LIST_HEAD(&mnt->mnt_expire);
03e06e68f   Ram Pai   [PATCH] introduce...
124
  		INIT_LIST_HEAD(&mnt->mnt_share);
a58b0eb8e   Ram Pai   [PATCH] introduce...
125
126
  		INIT_LIST_HEAD(&mnt->mnt_slave_list);
  		INIT_LIST_HEAD(&mnt->mnt_slave);
3d733633a   Dave Hansen   [PATCH] r/o bind ...
127
  		atomic_set(&mnt->__mnt_writers, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
128
129
  	}
  	return mnt;
88b387824   Li Zefan   [PATCH] vfs: use ...
130
131
132
133
134
135
  
  out_free_id:
  	mnt_free_id(mnt);
  out_free_cache:
  	kmem_cache_free(mnt_cache, mnt);
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
136
  }
8366025eb   Dave Hansen   [PATCH] r/o bind ...
137
138
139
140
141
142
143
144
  /*
   * Most r/o checks on a fs are for operations that take
   * discrete amounts of time, like a write() or unlink().
   * We must keep track of when those operations start
   * (for permission checks) and when they end, so that
   * we can determine when writes are able to occur to
   * a filesystem.
   */
3d733633a   Dave Hansen   [PATCH] r/o bind ...
145
146
147
148
149
150
151
152
153
154
155
156
157
  /*
   * __mnt_is_readonly: check whether a mount is read-only
   * @mnt: the mount to check for its write status
   *
   * This shouldn't be used directly ouside of the VFS.
   * It does not guarantee that the filesystem will stay
   * r/w, just that it is right *now*.  This can not and
   * should not be used in place of IS_RDONLY(inode).
   * mnt_want/drop_write() will _keep_ the filesystem
   * r/w.
   */
  int __mnt_is_readonly(struct vfsmount *mnt)
  {
2e4b7fcd9   Dave Hansen   [PATCH] r/o bind ...
158
159
160
161
162
  	if (mnt->mnt_flags & MNT_READONLY)
  		return 1;
  	if (mnt->mnt_sb->s_flags & MS_RDONLY)
  		return 1;
  	return 0;
3d733633a   Dave Hansen   [PATCH] r/o bind ...
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
  }
  EXPORT_SYMBOL_GPL(__mnt_is_readonly);
  
  struct mnt_writer {
  	/*
  	 * If holding multiple instances of this lock, they
  	 * must be ordered by cpu number.
  	 */
  	spinlock_t lock;
  	struct lock_class_key lock_class; /* compiles out with !lockdep */
  	unsigned long count;
  	struct vfsmount *mnt;
  } ____cacheline_aligned_in_smp;
  static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);
  
  static int __init init_mnt_writers(void)
  {
  	int cpu;
  	for_each_possible_cpu(cpu) {
  		struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
  		spin_lock_init(&writer->lock);
  		lockdep_set_class(&writer->lock, &writer->lock_class);
  		writer->count = 0;
  	}
  	return 0;
  }
  fs_initcall(init_mnt_writers);
  
  static void unlock_mnt_writers(void)
  {
  	int cpu;
  	struct mnt_writer *cpu_writer;
  
  	for_each_possible_cpu(cpu) {
  		cpu_writer = &per_cpu(mnt_writers, cpu);
  		spin_unlock(&cpu_writer->lock);
  	}
  }
  
  static inline void __clear_mnt_count(struct mnt_writer *cpu_writer)
  {
  	if (!cpu_writer->mnt)
  		return;
  	/*
  	 * This is in case anyone ever leaves an invalid,
  	 * old ->mnt and a count of 0.
  	 */
  	if (!cpu_writer->count)
  		return;
  	atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers);
  	cpu_writer->count = 0;
  }
   /*
   * must hold cpu_writer->lock
   */
  static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
  					  struct vfsmount *mnt)
  {
  	if (cpu_writer->mnt == mnt)
  		return;
  	__clear_mnt_count(cpu_writer);
  	cpu_writer->mnt = mnt;
  }
  
  /*
   * Most r/o checks on a fs are for operations that take
   * discrete amounts of time, like a write() or unlink().
   * We must keep track of when those operations start
   * (for permission checks) and when they end, so that
   * we can determine when writes are able to occur to
   * a filesystem.
   */
8366025eb   Dave Hansen   [PATCH] r/o bind ...
235
236
237
238
239
240
241
242
243
244
245
246
  /**
   * mnt_want_write - get write access to a mount
   * @mnt: the mount on which to take a write
   *
   * This tells the low-level filesystem that a write is
   * about to be performed to it, and makes sure that
   * writes are allowed before returning success.  When
   * the write operation is finished, mnt_drop_write()
   * must be called.  This is effectively a refcount.
   */
  int mnt_want_write(struct vfsmount *mnt)
  {
3d733633a   Dave Hansen   [PATCH] r/o bind ...
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
  	int ret = 0;
  	struct mnt_writer *cpu_writer;
  
  	cpu_writer = &get_cpu_var(mnt_writers);
  	spin_lock(&cpu_writer->lock);
  	if (__mnt_is_readonly(mnt)) {
  		ret = -EROFS;
  		goto out;
  	}
  	use_cpu_writer_for_mount(cpu_writer, mnt);
  	cpu_writer->count++;
  out:
  	spin_unlock(&cpu_writer->lock);
  	put_cpu_var(mnt_writers);
  	return ret;
8366025eb   Dave Hansen   [PATCH] r/o bind ...
262
263
  }
  EXPORT_SYMBOL_GPL(mnt_want_write);
3d733633a   Dave Hansen   [PATCH] r/o bind ...
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
  static void lock_mnt_writers(void)
  {
  	int cpu;
  	struct mnt_writer *cpu_writer;
  
  	for_each_possible_cpu(cpu) {
  		cpu_writer = &per_cpu(mnt_writers, cpu);
  		spin_lock(&cpu_writer->lock);
  		__clear_mnt_count(cpu_writer);
  		cpu_writer->mnt = NULL;
  	}
  }
  
  /*
   * These per-cpu write counts are not guaranteed to have
   * matched increments and decrements on any given cpu.
   * A file open()ed for write on one cpu and close()d on
   * another cpu will imbalance this count.  Make sure it
   * does not get too far out of whack.
   */
  static void handle_write_count_underflow(struct vfsmount *mnt)
  {
  	if (atomic_read(&mnt->__mnt_writers) >=
  	    MNT_WRITER_UNDERFLOW_LIMIT)
  		return;
  	/*
  	 * It isn't necessary to hold all of the locks
  	 * at the same time, but doing it this way makes
  	 * us share a lot more code.
  	 */
  	lock_mnt_writers();
  	/*
  	 * vfsmount_lock is for mnt_flags.
  	 */
  	spin_lock(&vfsmount_lock);
  	/*
  	 * If coalescing the per-cpu writer counts did not
  	 * get us back to a positive writer count, we have
  	 * a bug.
  	 */
  	if ((atomic_read(&mnt->__mnt_writers) < 0) &&
  	    !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
5c752ad9f   Arjan van de Ven   Use WARN() in fs/
306
  		WARN(1, KERN_DEBUG "leak detected on mount(%p) writers "
3d733633a   Dave Hansen   [PATCH] r/o bind ...
307
308
309
  				"count: %d
  ",
  			mnt, atomic_read(&mnt->__mnt_writers));
3d733633a   Dave Hansen   [PATCH] r/o bind ...
310
311
312
313
314
315
  		/* use the flag to keep the dmesg spam down */
  		mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
  	}
  	spin_unlock(&vfsmount_lock);
  	unlock_mnt_writers();
  }
8366025eb   Dave Hansen   [PATCH] r/o bind ...
316
317
318
319
320
321
322
323
324
325
  /**
   * mnt_drop_write - give up write access to a mount
   * @mnt: the mount on which to give up write access
   *
   * Tells the low-level filesystem that we are done
   * performing writes to it.  Must be matched with
   * mnt_want_write() call above.
   */
  void mnt_drop_write(struct vfsmount *mnt)
  {
3d733633a   Dave Hansen   [PATCH] r/o bind ...
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
  	int must_check_underflow = 0;
  	struct mnt_writer *cpu_writer;
  
  	cpu_writer = &get_cpu_var(mnt_writers);
  	spin_lock(&cpu_writer->lock);
  
  	use_cpu_writer_for_mount(cpu_writer, mnt);
  	if (cpu_writer->count > 0) {
  		cpu_writer->count--;
  	} else {
  		must_check_underflow = 1;
  		atomic_dec(&mnt->__mnt_writers);
  	}
  
  	spin_unlock(&cpu_writer->lock);
  	/*
  	 * Logically, we could call this each time,
  	 * but the __mnt_writers cacheline tends to
  	 * be cold, and makes this expensive.
  	 */
  	if (must_check_underflow)
  		handle_write_count_underflow(mnt);
  	/*
  	 * This could be done right after the spinlock
  	 * is taken because the spinlock keeps us on
  	 * the cpu, and disables preemption.  However,
  	 * putting it here bounds the amount that
  	 * __mnt_writers can underflow.  Without it,
  	 * we could theoretically wrap __mnt_writers.
  	 */
  	put_cpu_var(mnt_writers);
8366025eb   Dave Hansen   [PATCH] r/o bind ...
357
358
  }
  EXPORT_SYMBOL_GPL(mnt_drop_write);
2e4b7fcd9   Dave Hansen   [PATCH] r/o bind ...
359
  static int mnt_make_readonly(struct vfsmount *mnt)
8366025eb   Dave Hansen   [PATCH] r/o bind ...
360
  {
3d733633a   Dave Hansen   [PATCH] r/o bind ...
361
362
363
364
365
366
367
368
369
370
371
  	int ret = 0;
  
  	lock_mnt_writers();
  	/*
  	 * With all the locks held, this value is stable
  	 */
  	if (atomic_read(&mnt->__mnt_writers) > 0) {
  		ret = -EBUSY;
  		goto out;
  	}
  	/*
2e4b7fcd9   Dave Hansen   [PATCH] r/o bind ...
372
373
  	 * nobody can do a successful mnt_want_write() with all
  	 * of the counts in MNT_DENIED_WRITE and the locks held.
3d733633a   Dave Hansen   [PATCH] r/o bind ...
374
  	 */
2e4b7fcd9   Dave Hansen   [PATCH] r/o bind ...
375
376
377
378
  	spin_lock(&vfsmount_lock);
  	if (!ret)
  		mnt->mnt_flags |= MNT_READONLY;
  	spin_unlock(&vfsmount_lock);
3d733633a   Dave Hansen   [PATCH] r/o bind ...
379
380
381
  out:
  	unlock_mnt_writers();
  	return ret;
8366025eb   Dave Hansen   [PATCH] r/o bind ...
382
  }
8366025eb   Dave Hansen   [PATCH] r/o bind ...
383

2e4b7fcd9   Dave Hansen   [PATCH] r/o bind ...
384
385
386
387
388
389
  static void __mnt_unmake_readonly(struct vfsmount *mnt)
  {
  	spin_lock(&vfsmount_lock);
  	mnt->mnt_flags &= ~MNT_READONLY;
  	spin_unlock(&vfsmount_lock);
  }
a3ec947c8   Sukadev Bhattiprolu   vfs: simple_set_m...
390
  void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
454e2398b   David Howells   [PATCH] VFS: Perm...
391
392
393
  {
  	mnt->mnt_sb = sb;
  	mnt->mnt_root = dget(sb->s_root);
454e2398b   David Howells   [PATCH] VFS: Perm...
394
395
396
  }
  
  EXPORT_SYMBOL(simple_set_mnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
397
398
399
  void free_vfsmnt(struct vfsmount *mnt)
  {
  	kfree(mnt->mnt_devname);
73cd49ecd   Miklos Szeredi   [patch 3/7] vfs: ...
400
  	mnt_free_id(mnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
401
402
403
404
  	kmem_cache_free(mnt_cache, mnt);
  }
  
  /*
a05964f39   Ram Pai   [PATCH] shared mo...
405
406
   * find the first or last mount at @dentry on vfsmount @mnt depending on
   * @dir. If @dir is set return the first mount else return the last mount.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
407
   */
a05964f39   Ram Pai   [PATCH] shared mo...
408
409
  struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
  			      int dir)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
410
  {
b58fed8b1   Ram Pai   [PATCH] lindent f...
411
412
  	struct list_head *head = mount_hashtable + hash(mnt, dentry);
  	struct list_head *tmp = head;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
413
  	struct vfsmount *p, *found = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
414
  	for (;;) {
a05964f39   Ram Pai   [PATCH] shared mo...
415
  		tmp = dir ? tmp->next : tmp->prev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
416
417
418
419
420
  		p = NULL;
  		if (tmp == head)
  			break;
  		p = list_entry(tmp, struct vfsmount, mnt_hash);
  		if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
a05964f39   Ram Pai   [PATCH] shared mo...
421
  			found = p;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
422
423
424
  			break;
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
425
426
  	return found;
  }
a05964f39   Ram Pai   [PATCH] shared mo...
427
428
429
430
431
432
433
434
435
436
437
438
439
  /*
   * lookup_mnt increments the ref count before returning
   * the vfsmount struct.
   */
  struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
  {
  	struct vfsmount *child_mnt;
  	spin_lock(&vfsmount_lock);
  	if ((child_mnt = __lookup_mnt(mnt, dentry, 1)))
  		mntget(child_mnt);
  	spin_unlock(&vfsmount_lock);
  	return child_mnt;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
440
441
  static inline int check_mnt(struct vfsmount *mnt)
  {
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
442
  	return mnt->mnt_ns == current->nsproxy->mnt_ns;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
443
  }
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
444
  static void touch_mnt_namespace(struct mnt_namespace *ns)
5addc5dd8   Al Viro   [PATCH] make /pro...
445
446
447
448
449
450
  {
  	if (ns) {
  		ns->event = ++event;
  		wake_up_interruptible(&ns->poll);
  	}
  }
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
451
  static void __touch_mnt_namespace(struct mnt_namespace *ns)
5addc5dd8   Al Viro   [PATCH] make /pro...
452
453
454
455
456
457
  {
  	if (ns && ns->event != event) {
  		ns->event = event;
  		wake_up_interruptible(&ns->poll);
  	}
  }
1a3906895   Al Viro   [PATCH] reduce st...
458
  static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
459
  {
1a3906895   Al Viro   [PATCH] reduce st...
460
461
  	old_path->dentry = mnt->mnt_mountpoint;
  	old_path->mnt = mnt->mnt_parent;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
462
463
464
465
  	mnt->mnt_parent = mnt;
  	mnt->mnt_mountpoint = mnt->mnt_root;
  	list_del_init(&mnt->mnt_child);
  	list_del_init(&mnt->mnt_hash);
1a3906895   Al Viro   [PATCH] reduce st...
466
  	old_path->dentry->d_mounted--;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
467
  }
b90fa9ae8   Ram Pai   [PATCH] shared mo...
468
469
470
471
472
473
474
  void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
  			struct vfsmount *child_mnt)
  {
  	child_mnt->mnt_parent = mntget(mnt);
  	child_mnt->mnt_mountpoint = dget(dentry);
  	dentry->d_mounted++;
  }
1a3906895   Al Viro   [PATCH] reduce st...
475
  static void attach_mnt(struct vfsmount *mnt, struct path *path)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
476
  {
1a3906895   Al Viro   [PATCH] reduce st...
477
  	mnt_set_mountpoint(path->mnt, path->dentry, mnt);
b90fa9ae8   Ram Pai   [PATCH] shared mo...
478
  	list_add_tail(&mnt->mnt_hash, mount_hashtable +
1a3906895   Al Viro   [PATCH] reduce st...
479
480
  			hash(path->mnt, path->dentry));
  	list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts);
b90fa9ae8   Ram Pai   [PATCH] shared mo...
481
482
483
484
485
486
487
488
489
490
  }
  
  /*
   * the caller must hold vfsmount_lock
   */
  static void commit_tree(struct vfsmount *mnt)
  {
  	struct vfsmount *parent = mnt->mnt_parent;
  	struct vfsmount *m;
  	LIST_HEAD(head);
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
491
  	struct mnt_namespace *n = parent->mnt_ns;
b90fa9ae8   Ram Pai   [PATCH] shared mo...
492
493
494
495
496
  
  	BUG_ON(parent == mnt);
  
  	list_add_tail(&head, &mnt->mnt_list);
  	list_for_each_entry(m, &head, mnt_list)
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
497
  		m->mnt_ns = n;
b90fa9ae8   Ram Pai   [PATCH] shared mo...
498
499
500
501
502
  	list_splice(&head, n->list.prev);
  
  	list_add_tail(&mnt->mnt_hash, mount_hashtable +
  				hash(parent, mnt->mnt_mountpoint));
  	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
503
  	touch_mnt_namespace(n);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
  }
  
  static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
  {
  	struct list_head *next = p->mnt_mounts.next;
  	if (next == &p->mnt_mounts) {
  		while (1) {
  			if (p == root)
  				return NULL;
  			next = p->mnt_child.next;
  			if (next != &p->mnt_parent->mnt_mounts)
  				break;
  			p = p->mnt_parent;
  		}
  	}
  	return list_entry(next, struct vfsmount, mnt_child);
  }
9676f0c63   Ram Pai   [PATCH] unbindabl...
521
522
523
524
525
526
527
528
529
  static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
  {
  	struct list_head *prev = p->mnt_mounts.prev;
  	while (prev != &p->mnt_mounts) {
  		p = list_entry(prev, struct vfsmount, mnt_child);
  		prev = p->mnt_mounts.prev;
  	}
  	return p;
  }
36341f645   Ram Pai   [PATCH] mount exp...
530
531
  static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
  					int flag)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
532
533
534
535
536
  {
  	struct super_block *sb = old->mnt_sb;
  	struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
  
  	if (mnt) {
719f5d7f0   Miklos Szeredi   [patch 4/7] vfs: ...
537
538
539
540
541
542
543
544
545
546
  		if (flag & (CL_SLAVE | CL_PRIVATE))
  			mnt->mnt_group_id = 0; /* not a peer of original */
  		else
  			mnt->mnt_group_id = old->mnt_group_id;
  
  		if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
  			int err = mnt_alloc_group_id(mnt);
  			if (err)
  				goto out_free;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
547
548
549
550
551
552
  		mnt->mnt_flags = old->mnt_flags;
  		atomic_inc(&sb->s_active);
  		mnt->mnt_sb = sb;
  		mnt->mnt_root = dget(root);
  		mnt->mnt_mountpoint = mnt->mnt_root;
  		mnt->mnt_parent = mnt;
b90fa9ae8   Ram Pai   [PATCH] shared mo...
553

5afe00221   Ram Pai   [PATCH] handling ...
554
555
556
557
  		if (flag & CL_SLAVE) {
  			list_add(&mnt->mnt_slave, &old->mnt_slave_list);
  			mnt->mnt_master = old;
  			CLEAR_MNT_SHARED(mnt);
8aec08094   Al Viro   [PATCH] new helpe...
558
  		} else if (!(flag & CL_PRIVATE)) {
5afe00221   Ram Pai   [PATCH] handling ...
559
560
561
562
563
564
  			if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
  				list_add(&mnt->mnt_share, &old->mnt_share);
  			if (IS_MNT_SLAVE(old))
  				list_add(&mnt->mnt_slave, &old->mnt_slave);
  			mnt->mnt_master = old->mnt_master;
  		}
b90fa9ae8   Ram Pai   [PATCH] shared mo...
565
566
  		if (flag & CL_MAKE_SHARED)
  			set_mnt_shared(mnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
567
568
569
  
  		/* stick the duplicate mount on the same expiry list
  		 * as the original if that was on one */
36341f645   Ram Pai   [PATCH] mount exp...
570
  		if (flag & CL_EXPIRE) {
36341f645   Ram Pai   [PATCH] mount exp...
571
572
  			if (!list_empty(&old->mnt_expire))
  				list_add(&mnt->mnt_expire, &old->mnt_expire);
36341f645   Ram Pai   [PATCH] mount exp...
573
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
574
575
  	}
  	return mnt;
719f5d7f0   Miklos Szeredi   [patch 4/7] vfs: ...
576
577
578
579
  
   out_free:
  	free_vfsmnt(mnt);
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
580
  }
7b7b1ace2   Al Viro   [PATCH] saner han...
581
  static inline void __mntput(struct vfsmount *mnt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
582
  {
3d733633a   Dave Hansen   [PATCH] r/o bind ...
583
  	int cpu;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
584
  	struct super_block *sb = mnt->mnt_sb;
3d733633a   Dave Hansen   [PATCH] r/o bind ...
585
586
587
588
589
590
591
592
  	/*
  	 * We don't have to hold all of the locks at the
  	 * same time here because we know that we're the
  	 * last reference to mnt and that no new writers
  	 * can come in.
  	 */
  	for_each_possible_cpu(cpu) {
  		struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
3d733633a   Dave Hansen   [PATCH] r/o bind ...
593
  		spin_lock(&cpu_writer->lock);
1a88b5364   Al Viro   Fix incomplete __...
594
595
596
597
  		if (cpu_writer->mnt != mnt) {
  			spin_unlock(&cpu_writer->lock);
  			continue;
  		}
3d733633a   Dave Hansen   [PATCH] r/o bind ...
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
  		atomic_add(cpu_writer->count, &mnt->__mnt_writers);
  		cpu_writer->count = 0;
  		/*
  		 * Might as well do this so that no one
  		 * ever sees the pointer and expects
  		 * it to be valid.
  		 */
  		cpu_writer->mnt = NULL;
  		spin_unlock(&cpu_writer->lock);
  	}
  	/*
  	 * This probably indicates that somebody messed
  	 * up a mnt_want/drop_write() pair.  If this
  	 * happens, the filesystem was probably unable
  	 * to make r/w->r/o transitions.
  	 */
  	WARN_ON(atomic_read(&mnt->__mnt_writers));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
615
616
617
618
  	dput(mnt->mnt_root);
  	free_vfsmnt(mnt);
  	deactivate_super(sb);
  }
7b7b1ace2   Al Viro   [PATCH] saner han...
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
  void mntput_no_expire(struct vfsmount *mnt)
  {
  repeat:
  	if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
  		if (likely(!mnt->mnt_pinned)) {
  			spin_unlock(&vfsmount_lock);
  			__mntput(mnt);
  			return;
  		}
  		atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
  		mnt->mnt_pinned = 0;
  		spin_unlock(&vfsmount_lock);
  		acct_auto_close_mnt(mnt);
  		security_sb_umount_close(mnt);
  		goto repeat;
  	}
  }
  
  EXPORT_SYMBOL(mntput_no_expire);
  
  void mnt_pin(struct vfsmount *mnt)
  {
  	spin_lock(&vfsmount_lock);
  	mnt->mnt_pinned++;
  	spin_unlock(&vfsmount_lock);
  }
  
  EXPORT_SYMBOL(mnt_pin);
  
  void mnt_unpin(struct vfsmount *mnt)
  {
  	spin_lock(&vfsmount_lock);
  	if (mnt->mnt_pinned) {
  		atomic_inc(&mnt->mnt_count);
  		mnt->mnt_pinned--;
  	}
  	spin_unlock(&vfsmount_lock);
  }
  
  EXPORT_SYMBOL(mnt_unpin);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
659

b3b304a23   Miklos Szeredi   mount options: ad...
660
661
662
663
664
665
666
667
668
669
670
671
672
673
  static inline void mangle(struct seq_file *m, const char *s)
  {
  	seq_escape(m, s, " \t
  \\");
  }
  
  /*
   * Simple .show_options callback for filesystems which don't want to
   * implement more complex mount option showing.
   *
   * See also save_mount_options().
   */
  int generic_show_options(struct seq_file *m, struct vfsmount *mnt)
  {
2a32cebd6   Al Viro   Fix races around ...
674
675
676
677
  	const char *options;
  
  	rcu_read_lock();
  	options = rcu_dereference(mnt->mnt_sb->s_options);
b3b304a23   Miklos Szeredi   mount options: ad...
678
679
680
681
682
  
  	if (options != NULL && options[0]) {
  		seq_putc(m, ',');
  		mangle(m, options);
  	}
2a32cebd6   Al Viro   Fix races around ...
683
  	rcu_read_unlock();
b3b304a23   Miklos Szeredi   mount options: ad...
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
  
  	return 0;
  }
  EXPORT_SYMBOL(generic_show_options);
  
  /*
   * If filesystem uses generic_show_options(), this function should be
   * called from the fill_super() callback.
   *
   * The .remount_fs callback usually needs to be handled in a special
   * way, to make sure, that previous options are not overwritten if the
   * remount fails.
   *
   * Also note, that if the filesystem's .remount_fs function doesn't
   * reset all options to their default value, but changes only newly
   * given options, then the displayed options will not reflect reality
   * any more.
   */
  void save_mount_options(struct super_block *sb, char *options)
  {
2a32cebd6   Al Viro   Fix races around ...
704
705
  	BUG_ON(sb->s_options);
  	rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL));
b3b304a23   Miklos Szeredi   mount options: ad...
706
707
  }
  EXPORT_SYMBOL(save_mount_options);
2a32cebd6   Al Viro   Fix races around ...
708
709
710
711
712
713
714
715
716
717
  void replace_mount_options(struct super_block *sb, char *options)
  {
  	char *old = sb->s_options;
  	rcu_assign_pointer(sb->s_options, options);
  	if (old) {
  		synchronize_rcu();
  		kfree(old);
  	}
  }
  EXPORT_SYMBOL(replace_mount_options);
a1a2c409b   Miklos Szeredi   [patch 5/7] vfs: ...
718
  #ifdef CONFIG_PROC_FS
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
719
720
721
  /* iterator */
  static void *m_start(struct seq_file *m, loff_t *pos)
  {
a1a2c409b   Miklos Szeredi   [patch 5/7] vfs: ...
722
  	struct proc_mounts *p = m->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
723

390c68436   Ram Pai   [PATCH] making na...
724
  	down_read(&namespace_sem);
a1a2c409b   Miklos Szeredi   [patch 5/7] vfs: ...
725
  	return seq_list_start(&p->ns->list, *pos);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
726
727
728
729
  }
  
  static void *m_next(struct seq_file *m, void *v, loff_t *pos)
  {
a1a2c409b   Miklos Szeredi   [patch 5/7] vfs: ...
730
  	struct proc_mounts *p = m->private;
b0765fb85   Pavel Emelianov   Make /proc/self/m...
731

a1a2c409b   Miklos Szeredi   [patch 5/7] vfs: ...
732
  	return seq_list_next(v, &p->ns->list, pos);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
733
734
735
736
  }
  
  static void m_stop(struct seq_file *m, void *v)
  {
390c68436   Ram Pai   [PATCH] making na...
737
  	up_read(&namespace_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
738
  }
2d4d4864a   Ram Pai   [patch 6/7] vfs: ...
739
740
741
742
  struct proc_fs_info {
  	int flag;
  	const char *str;
  };
2069f4578   Eric Paris   LSM/SELinux: show...
743
  static int show_sb_opts(struct seq_file *m, struct super_block *sb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
744
  {
2d4d4864a   Ram Pai   [patch 6/7] vfs: ...
745
  	static const struct proc_fs_info fs_info[] = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
746
747
748
  		{ MS_SYNCHRONOUS, ",sync" },
  		{ MS_DIRSYNC, ",dirsync" },
  		{ MS_MANDLOCK, ",mand" },
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
749
750
  		{ 0, NULL }
  	};
2d4d4864a   Ram Pai   [patch 6/7] vfs: ...
751
752
753
754
755
756
  	const struct proc_fs_info *fs_infop;
  
  	for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
  		if (sb->s_flags & fs_infop->flag)
  			seq_puts(m, fs_infop->str);
  	}
2069f4578   Eric Paris   LSM/SELinux: show...
757
758
  
  	return security_sb_show_options(m, sb);
2d4d4864a   Ram Pai   [patch 6/7] vfs: ...
759
760
761
762
763
  }
  
  static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
  {
  	static const struct proc_fs_info mnt_info[] = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
764
765
766
  		{ MNT_NOSUID, ",nosuid" },
  		{ MNT_NODEV, ",nodev" },
  		{ MNT_NOEXEC, ",noexec" },
fc33a7bb9   Christoph Hellwig   [PATCH] per-mount...
767
768
  		{ MNT_NOATIME, ",noatime" },
  		{ MNT_NODIRATIME, ",nodiratime" },
47ae32d6a   Valerie Henson   [PATCH] relative ...
769
  		{ MNT_RELATIME, ",relatime" },
d0adde574   Matthew Garrett   Add a strictatime...
770
  		{ MNT_STRICTATIME, ",strictatime" },
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
771
772
  		{ 0, NULL }
  	};
2d4d4864a   Ram Pai   [patch 6/7] vfs: ...
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
  	const struct proc_fs_info *fs_infop;
  
  	for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
  		if (mnt->mnt_flags & fs_infop->flag)
  			seq_puts(m, fs_infop->str);
  	}
  }
  
  static void show_type(struct seq_file *m, struct super_block *sb)
  {
  	mangle(m, sb->s_type->name);
  	if (sb->s_subtype && sb->s_subtype[0]) {
  		seq_putc(m, '.');
  		mangle(m, sb->s_subtype);
  	}
  }
  
  static int show_vfsmnt(struct seq_file *m, void *v)
  {
  	struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
  	int err = 0;
c32c2f63a   Jan Blunck   d_path: Make seq_...
794
  	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
795
796
797
  
  	mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
  	seq_putc(m, ' ');
c32c2f63a   Jan Blunck   d_path: Make seq_...
798
799
  	seq_path(m, &mnt_path, " \t
  \\");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
800
  	seq_putc(m, ' ');
2d4d4864a   Ram Pai   [patch 6/7] vfs: ...
801
  	show_type(m, mnt->mnt_sb);
2e4b7fcd9   Dave Hansen   [PATCH] r/o bind ...
802
  	seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
2069f4578   Eric Paris   LSM/SELinux: show...
803
804
805
  	err = show_sb_opts(m, mnt->mnt_sb);
  	if (err)
  		goto out;
2d4d4864a   Ram Pai   [patch 6/7] vfs: ...
806
  	show_mnt_opts(m, mnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
807
808
809
810
  	if (mnt->mnt_sb->s_op->show_options)
  		err = mnt->mnt_sb->s_op->show_options(m, mnt);
  	seq_puts(m, " 0 0
  ");
2069f4578   Eric Paris   LSM/SELinux: show...
811
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
812
813
  	return err;
  }
a1a2c409b   Miklos Szeredi   [patch 5/7] vfs: ...
814
  const struct seq_operations mounts_op = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
815
816
817
818
819
  	.start	= m_start,
  	.next	= m_next,
  	.stop	= m_stop,
  	.show	= show_vfsmnt
  };
2d4d4864a   Ram Pai   [patch 6/7] vfs: ...
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
  static int show_mountinfo(struct seq_file *m, void *v)
  {
  	struct proc_mounts *p = m->private;
  	struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
  	struct super_block *sb = mnt->mnt_sb;
  	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
  	struct path root = p->root;
  	int err = 0;
  
  	seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
  		   MAJOR(sb->s_dev), MINOR(sb->s_dev));
  	seq_dentry(m, mnt->mnt_root, " \t
  \\");
  	seq_putc(m, ' ');
  	seq_path_root(m, &mnt_path, &root, " \t
  \\");
  	if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) {
  		/*
  		 * Mountpoint is outside root, discard that one.  Ugly,
  		 * but less so than trying to do that in iterator in a
  		 * race-free way (due to renames).
  		 */
  		return SEQ_SKIP;
  	}
  	seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
  	show_mnt_opts(m, mnt);
  
  	/* Tagged fields ("foo:X" or "bar") */
  	if (IS_MNT_SHARED(mnt))
  		seq_printf(m, " shared:%i", mnt->mnt_group_id);
97e7e0f71   Miklos Szeredi   [patch 7/7] vfs: ...
850
851
852
853
854
855
856
  	if (IS_MNT_SLAVE(mnt)) {
  		int master = mnt->mnt_master->mnt_group_id;
  		int dom = get_dominating_id(mnt, &p->root);
  		seq_printf(m, " master:%i", master);
  		if (dom && dom != master)
  			seq_printf(m, " propagate_from:%i", dom);
  	}
2d4d4864a   Ram Pai   [patch 6/7] vfs: ...
857
858
859
860
861
862
863
864
865
  	if (IS_MNT_UNBINDABLE(mnt))
  		seq_puts(m, " unbindable");
  
  	/* Filesystem specific data */
  	seq_puts(m, " - ");
  	show_type(m, sb);
  	seq_putc(m, ' ');
  	mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
  	seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
2069f4578   Eric Paris   LSM/SELinux: show...
866
867
868
  	err = show_sb_opts(m, sb);
  	if (err)
  		goto out;
2d4d4864a   Ram Pai   [patch 6/7] vfs: ...
869
870
871
872
  	if (sb->s_op->show_options)
  		err = sb->s_op->show_options(m, mnt);
  	seq_putc(m, '
  ');
2069f4578   Eric Paris   LSM/SELinux: show...
873
  out:
2d4d4864a   Ram Pai   [patch 6/7] vfs: ...
874
875
876
877
878
879
880
881
882
  	return err;
  }
  
  const struct seq_operations mountinfo_op = {
  	.start	= m_start,
  	.next	= m_next,
  	.stop	= m_stop,
  	.show	= show_mountinfo,
  };
b4629fe2f   Chuck Lever   VFS: New /proc fi...
883
884
  static int show_vfsstat(struct seq_file *m, void *v)
  {
b0765fb85   Pavel Emelianov   Make /proc/self/m...
885
  	struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
c32c2f63a   Jan Blunck   d_path: Make seq_...
886
  	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
b4629fe2f   Chuck Lever   VFS: New /proc fi...
887
888
889
890
891
892
893
894
895
896
897
  	int err = 0;
  
  	/* device */
  	if (mnt->mnt_devname) {
  		seq_puts(m, "device ");
  		mangle(m, mnt->mnt_devname);
  	} else
  		seq_puts(m, "no device");
  
  	/* mount point */
  	seq_puts(m, " mounted on ");
c32c2f63a   Jan Blunck   d_path: Make seq_...
898
899
  	seq_path(m, &mnt_path, " \t
  \\");
b4629fe2f   Chuck Lever   VFS: New /proc fi...
900
901
902
903
  	seq_putc(m, ' ');
  
  	/* file system type */
  	seq_puts(m, "with fstype ");
2d4d4864a   Ram Pai   [patch 6/7] vfs: ...
904
  	show_type(m, mnt->mnt_sb);
b4629fe2f   Chuck Lever   VFS: New /proc fi...
905
906
907
908
909
910
911
912
913
914
915
  
  	/* optional statistics */
  	if (mnt->mnt_sb->s_op->show_stats) {
  		seq_putc(m, ' ');
  		err = mnt->mnt_sb->s_op->show_stats(m, mnt);
  	}
  
  	seq_putc(m, '
  ');
  	return err;
  }
a1a2c409b   Miklos Szeredi   [patch 5/7] vfs: ...
916
  const struct seq_operations mountstats_op = {
b4629fe2f   Chuck Lever   VFS: New /proc fi...
917
918
919
920
921
  	.start	= m_start,
  	.next	= m_next,
  	.stop	= m_stop,
  	.show	= show_vfsstat,
  };
a1a2c409b   Miklos Szeredi   [patch 5/7] vfs: ...
922
  #endif  /* CONFIG_PROC_FS */
b4629fe2f   Chuck Lever   VFS: New /proc fi...
923

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
924
925
926
927
928
929
930
931
932
933
  /**
   * may_umount_tree - check if a mount tree is busy
   * @mnt: root of mount tree
   *
   * This is called to check if a tree of mounts has any
   * open files, pwds, chroots or sub mounts that are
   * busy.
   */
  int may_umount_tree(struct vfsmount *mnt)
  {
36341f645   Ram Pai   [PATCH] mount exp...
934
935
936
  	int actual_refs = 0;
  	int minimum_refs = 0;
  	struct vfsmount *p;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
937
938
  
  	spin_lock(&vfsmount_lock);
36341f645   Ram Pai   [PATCH] mount exp...
939
  	for (p = mnt; p; p = next_mnt(p, mnt)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
940
941
  		actual_refs += atomic_read(&p->mnt_count);
  		minimum_refs += 2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
942
943
944
945
  	}
  	spin_unlock(&vfsmount_lock);
  
  	if (actual_refs > minimum_refs)
e3474a8eb   Ian Kent   [PATCH] autofs4: ...
946
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
947

e3474a8eb   Ian Kent   [PATCH] autofs4: ...
948
  	return 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
  }
  
  EXPORT_SYMBOL(may_umount_tree);
  
  /**
   * may_umount - check if a mount point is busy
   * @mnt: root of mount
   *
   * This is called to check if a mount point has any
   * open files, pwds, chroots or sub mounts. If the
   * mount has sub mounts this will return busy
   * regardless of whether the sub mounts are busy.
   *
   * Doesn't take quota and stuff into account. IOW, in some cases it will
   * give false negatives. The main reason why it's here is that we need
   * a non-destructive way to look for easily umountable filesystems.
   */
  int may_umount(struct vfsmount *mnt)
  {
e3474a8eb   Ian Kent   [PATCH] autofs4: ...
968
  	int ret = 1;
a05964f39   Ram Pai   [PATCH] shared mo...
969
970
  	spin_lock(&vfsmount_lock);
  	if (propagate_mount_busy(mnt, 2))
e3474a8eb   Ian Kent   [PATCH] autofs4: ...
971
  		ret = 0;
a05964f39   Ram Pai   [PATCH] shared mo...
972
973
  	spin_unlock(&vfsmount_lock);
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
974
975
976
  }
  
  EXPORT_SYMBOL(may_umount);
b90fa9ae8   Ram Pai   [PATCH] shared mo...
977
  void release_mounts(struct list_head *head)
70fbcdf4d   Ram Pai   [PATCH] umount_tr...
978
979
  {
  	struct vfsmount *mnt;
bf066c7db   Miklos Szeredi   [PATCH] shared mo...
980
  	while (!list_empty(head)) {
b5e618181   Pavel Emelianov   Introduce a handy...
981
  		mnt = list_first_entry(head, struct vfsmount, mnt_hash);
70fbcdf4d   Ram Pai   [PATCH] umount_tr...
982
983
984
985
986
987
988
989
990
  		list_del_init(&mnt->mnt_hash);
  		if (mnt->mnt_parent != mnt) {
  			struct dentry *dentry;
  			struct vfsmount *m;
  			spin_lock(&vfsmount_lock);
  			dentry = mnt->mnt_mountpoint;
  			m = mnt->mnt_parent;
  			mnt->mnt_mountpoint = mnt->mnt_root;
  			mnt->mnt_parent = mnt;
7c4b93d82   Al Viro   [PATCH] count gho...
991
  			m->mnt_ghosts--;
70fbcdf4d   Ram Pai   [PATCH] umount_tr...
992
993
994
995
996
997
998
  			spin_unlock(&vfsmount_lock);
  			dput(dentry);
  			mntput(m);
  		}
  		mntput(mnt);
  	}
  }
a05964f39   Ram Pai   [PATCH] shared mo...
999
  void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1000
1001
  {
  	struct vfsmount *p;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1002

1bfba4e8e   Akinobu Mita   [PATCH] core: use...
1003
1004
  	for (p = mnt; p; p = next_mnt(p, mnt))
  		list_move(&p->mnt_hash, kill);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1005

a05964f39   Ram Pai   [PATCH] shared mo...
1006
1007
  	if (propagate)
  		propagate_umount(kill);
70fbcdf4d   Ram Pai   [PATCH] umount_tr...
1008
1009
1010
  	list_for_each_entry(p, kill, mnt_hash) {
  		list_del_init(&p->mnt_expire);
  		list_del_init(&p->mnt_list);
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
1011
1012
  		__touch_mnt_namespace(p->mnt_ns);
  		p->mnt_ns = NULL;
70fbcdf4d   Ram Pai   [PATCH] umount_tr...
1013
  		list_del_init(&p->mnt_child);
7c4b93d82   Al Viro   [PATCH] count gho...
1014
1015
  		if (p->mnt_parent != p) {
  			p->mnt_parent->mnt_ghosts++;
f30ac319f   Al Viro   [PATCH] umount_tr...
1016
  			p->mnt_mountpoint->d_mounted--;
7c4b93d82   Al Viro   [PATCH] count gho...
1017
  		}
a05964f39   Ram Pai   [PATCH] shared mo...
1018
  		change_mnt_propagation(p, MS_PRIVATE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1019
1020
  	}
  }
c35038bec   Al Viro   [PATCH] do shrink...
1021
  static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1022
1023
  static int do_umount(struct vfsmount *mnt, int flags)
  {
b58fed8b1   Ram Pai   [PATCH] lindent f...
1024
  	struct super_block *sb = mnt->mnt_sb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1025
  	int retval;
70fbcdf4d   Ram Pai   [PATCH] umount_tr...
1026
  	LIST_HEAD(umount_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
  
  	retval = security_sb_umount(mnt, flags);
  	if (retval)
  		return retval;
  
  	/*
  	 * Allow userspace to request a mountpoint be expired rather than
  	 * unmounting unconditionally. Unmount only happens if:
  	 *  (1) the mark is already set (the mark is cleared by mntput())
  	 *  (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount]
  	 */
  	if (flags & MNT_EXPIRE) {
6ac08c39a   Jan Blunck   Use struct path i...
1039
  		if (mnt == current->fs->root.mnt ||
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
  		    flags & (MNT_FORCE | MNT_DETACH))
  			return -EINVAL;
  
  		if (atomic_read(&mnt->mnt_count) != 2)
  			return -EBUSY;
  
  		if (!xchg(&mnt->mnt_expiry_mark, 1))
  			return -EAGAIN;
  	}
  
  	/*
  	 * If we may have to abort operations to get out of this
  	 * mount, and they will themselves hold resources we must
  	 * allow the fs to do things. In the Unix tradition of
  	 * 'Gee thats tricky lets do it in userspace' the umount_begin
  	 * might fail to complete on the first run through as other tasks
  	 * must return, and the like. Thats for the mount program to worry
  	 * about for the moment.
  	 */
42faad996   Al Viro   [PATCH] restore s...
1059
  	if (flags & MNT_FORCE && sb->s_op->umount_begin) {
42faad996   Al Viro   [PATCH] restore s...
1060
  		sb->s_op->umount_begin(sb);
42faad996   Al Viro   [PATCH] restore s...
1061
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
  
  	/*
  	 * No sense to grab the lock for this test, but test itself looks
  	 * somewhat bogus. Suggestions for better replacement?
  	 * Ho-hum... In principle, we might treat that as umount + switch
  	 * to rootfs. GC would eventually take care of the old vfsmount.
  	 * Actually it makes sense, especially if rootfs would contain a
  	 * /reboot - static binary that would close all descriptors and
  	 * call reboot(9). Then init(8) could umount root and exec /reboot.
  	 */
6ac08c39a   Jan Blunck   Use struct path i...
1072
  	if (mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1073
1074
1075
1076
1077
1078
1079
  		/*
  		 * Special case for "unmounting" root ...
  		 * we just try to remount it readonly.
  		 */
  		down_write(&sb->s_umount);
  		if (!(sb->s_flags & MS_RDONLY)) {
  			lock_kernel();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1080
1081
1082
1083
1084
1085
  			retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
  			unlock_kernel();
  		}
  		up_write(&sb->s_umount);
  		return retval;
  	}
390c68436   Ram Pai   [PATCH] making na...
1086
  	down_write(&namespace_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1087
  	spin_lock(&vfsmount_lock);
5addc5dd8   Al Viro   [PATCH] make /pro...
1088
  	event++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1089

c35038bec   Al Viro   [PATCH] do shrink...
1090
1091
  	if (!(flags & MNT_DETACH))
  		shrink_submounts(mnt, &umount_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1092
  	retval = -EBUSY;
a05964f39   Ram Pai   [PATCH] shared mo...
1093
  	if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1094
  		if (!list_empty(&mnt->mnt_list))
a05964f39   Ram Pai   [PATCH] shared mo...
1095
  			umount_tree(mnt, 1, &umount_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1096
1097
1098
1099
1100
  		retval = 0;
  	}
  	spin_unlock(&vfsmount_lock);
  	if (retval)
  		security_sb_umount_busy(mnt);
390c68436   Ram Pai   [PATCH] making na...
1101
  	up_write(&namespace_sem);
70fbcdf4d   Ram Pai   [PATCH] umount_tr...
1102
  	release_mounts(&umount_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
  	return retval;
  }
  
  /*
   * Now umount can handle mount points as well as block devices.
   * This is important for filesystems which use unnamed block devices.
   *
   * We now support a flag for forced unmount like the other 'big iron'
   * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
   */
bdc480e3b   Heiko Carstens   [CVE-2009-0029] S...
1113
  SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1114
  {
2d8f30380   Al Viro   [PATCH] sanitize ...
1115
  	struct path path;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1116
  	int retval;
2d8f30380   Al Viro   [PATCH] sanitize ...
1117
  	retval = user_path(name, &path);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1118
1119
1120
  	if (retval)
  		goto out;
  	retval = -EINVAL;
2d8f30380   Al Viro   [PATCH] sanitize ...
1121
  	if (path.dentry != path.mnt->mnt_root)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1122
  		goto dput_and_out;
2d8f30380   Al Viro   [PATCH] sanitize ...
1123
  	if (!check_mnt(path.mnt))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1124
1125
1126
1127
1128
  		goto dput_and_out;
  
  	retval = -EPERM;
  	if (!capable(CAP_SYS_ADMIN))
  		goto dput_and_out;
2d8f30380   Al Viro   [PATCH] sanitize ...
1129
  	retval = do_umount(path.mnt, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1130
  dput_and_out:
429731b15   Jan Blunck   Remove path_relea...
1131
  	/* we mustn't call path_put() as that would clear mnt_expiry_mark */
2d8f30380   Al Viro   [PATCH] sanitize ...
1132
1133
  	dput(path.dentry);
  	mntput_no_expire(path.mnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1134
1135
1136
1137
1138
1139
1140
  out:
  	return retval;
  }
  
  #ifdef __ARCH_WANT_SYS_OLDUMOUNT
  
  /*
b58fed8b1   Ram Pai   [PATCH] lindent f...
1141
   *	The 2.0 compatible umount. No flags.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1142
   */
bdc480e3b   Heiko Carstens   [CVE-2009-0029] S...
1143
  SYSCALL_DEFINE1(oldumount, char __user *, name)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1144
  {
b58fed8b1   Ram Pai   [PATCH] lindent f...
1145
  	return sys_umount(name, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1146
1147
1148
  }
  
  #endif
2d92ab3c6   Al Viro   [PATCH] finally g...
1149
  static int mount_is_safe(struct path *path)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1150
1151
1152
1153
1154
  {
  	if (capable(CAP_SYS_ADMIN))
  		return 0;
  	return -EPERM;
  #ifdef notyet
2d92ab3c6   Al Viro   [PATCH] finally g...
1155
  	if (S_ISLNK(path->dentry->d_inode->i_mode))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1156
  		return -EPERM;
2d92ab3c6   Al Viro   [PATCH] finally g...
1157
  	if (path->dentry->d_inode->i_mode & S_ISVTX) {
da9592ede   David Howells   CRED: Wrap task c...
1158
  		if (current_uid() != path->dentry->d_inode->i_uid)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1159
1160
  			return -EPERM;
  	}
2d92ab3c6   Al Viro   [PATCH] finally g...
1161
  	if (inode_permission(path->dentry->d_inode, MAY_WRITE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1162
1163
1164
1165
  		return -EPERM;
  	return 0;
  #endif
  }
b90fa9ae8   Ram Pai   [PATCH] shared mo...
1166
  struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
36341f645   Ram Pai   [PATCH] mount exp...
1167
  					int flag)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1168
1169
  {
  	struct vfsmount *res, *p, *q, *r, *s;
1a3906895   Al Viro   [PATCH] reduce st...
1170
  	struct path path;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1171

9676f0c63   Ram Pai   [PATCH] unbindabl...
1172
1173
  	if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
  		return NULL;
36341f645   Ram Pai   [PATCH] mount exp...
1174
  	res = q = clone_mnt(mnt, dentry, flag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1175
1176
1177
1178
1179
  	if (!q)
  		goto Enomem;
  	q->mnt_mountpoint = mnt->mnt_mountpoint;
  
  	p = mnt;
fdadd65fb   Domen Puncer   [PATCH] janitor: ...
1180
  	list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
7ec02ef15   Jan Blunck   vfs: remove lives...
1181
  		if (!is_subdir(r->mnt_mountpoint, dentry))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1182
1183
1184
  			continue;
  
  		for (s = r; s; s = next_mnt(s, r)) {
9676f0c63   Ram Pai   [PATCH] unbindabl...
1185
1186
1187
1188
  			if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
  				s = skip_mnt_tree(s);
  				continue;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1189
1190
1191
1192
1193
  			while (p != s->mnt_parent) {
  				p = p->mnt_parent;
  				q = q->mnt_parent;
  			}
  			p = s;
1a3906895   Al Viro   [PATCH] reduce st...
1194
1195
  			path.mnt = q;
  			path.dentry = p->mnt_mountpoint;
36341f645   Ram Pai   [PATCH] mount exp...
1196
  			q = clone_mnt(p, p->mnt_root, flag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1197
1198
1199
1200
  			if (!q)
  				goto Enomem;
  			spin_lock(&vfsmount_lock);
  			list_add_tail(&q->mnt_list, &res->mnt_list);
1a3906895   Al Viro   [PATCH] reduce st...
1201
  			attach_mnt(q, &path);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1202
1203
1204
1205
  			spin_unlock(&vfsmount_lock);
  		}
  	}
  	return res;
b58fed8b1   Ram Pai   [PATCH] lindent f...
1206
  Enomem:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1207
  	if (res) {
70fbcdf4d   Ram Pai   [PATCH] umount_tr...
1208
  		LIST_HEAD(umount_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1209
  		spin_lock(&vfsmount_lock);
a05964f39   Ram Pai   [PATCH] shared mo...
1210
  		umount_tree(res, 0, &umount_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1211
  		spin_unlock(&vfsmount_lock);
70fbcdf4d   Ram Pai   [PATCH] umount_tr...
1212
  		release_mounts(&umount_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1213
1214
1215
  	}
  	return NULL;
  }
8aec08094   Al Viro   [PATCH] new helpe...
1216
1217
1218
  struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry)
  {
  	struct vfsmount *tree;
1a60a2807   Al Viro   [PATCH] lock excl...
1219
  	down_write(&namespace_sem);
8aec08094   Al Viro   [PATCH] new helpe...
1220
  	tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE);
1a60a2807   Al Viro   [PATCH] lock excl...
1221
  	up_write(&namespace_sem);
8aec08094   Al Viro   [PATCH] new helpe...
1222
1223
1224
1225
1226
1227
  	return tree;
  }
  
  void drop_collected_mounts(struct vfsmount *mnt)
  {
  	LIST_HEAD(umount_list);
1a60a2807   Al Viro   [PATCH] lock excl...
1228
  	down_write(&namespace_sem);
8aec08094   Al Viro   [PATCH] new helpe...
1229
1230
1231
  	spin_lock(&vfsmount_lock);
  	umount_tree(mnt, 0, &umount_list);
  	spin_unlock(&vfsmount_lock);
1a60a2807   Al Viro   [PATCH] lock excl...
1232
  	up_write(&namespace_sem);
8aec08094   Al Viro   [PATCH] new helpe...
1233
1234
  	release_mounts(&umount_list);
  }
719f5d7f0   Miklos Szeredi   [patch 4/7] vfs: ...
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
  static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
  {
  	struct vfsmount *p;
  
  	for (p = mnt; p != end; p = next_mnt(p, mnt)) {
  		if (p->mnt_group_id && !IS_MNT_SHARED(p))
  			mnt_release_group_id(p);
  	}
  }
  
  static int invent_group_ids(struct vfsmount *mnt, bool recurse)
  {
  	struct vfsmount *p;
  
  	for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
  		if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
  			int err = mnt_alloc_group_id(p);
  			if (err) {
  				cleanup_group_ids(mnt, p);
  				return err;
  			}
  		}
  	}
  
  	return 0;
  }
b90fa9ae8   Ram Pai   [PATCH] shared mo...
1261
1262
  /*
   *  @source_mnt : mount tree to be attached
214444032   Ram Pai   [PATCH] shared mo...
1263
1264
1265
1266
   *  @nd         : place the mount tree @source_mnt is attached
   *  @parent_nd  : if non-null, detach the source_mnt from its parent and
   *  		   store the parent mount and mountpoint dentry.
   *  		   (done when source_mnt is moved)
b90fa9ae8   Ram Pai   [PATCH] shared mo...
1267
1268
1269
   *
   *  NOTE: in the table below explains the semantics when a source mount
   *  of a given type is attached to a destination mount of a given type.
9676f0c63   Ram Pai   [PATCH] unbindabl...
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
   * ---------------------------------------------------------------------------
   * |         BIND MOUNT OPERATION                                            |
   * |**************************************************************************
   * | source-->| shared        |       private  |       slave    | unbindable |
   * | dest     |               |                |                |            |
   * |   |      |               |                |                |            |
   * |   v      |               |                |                |            |
   * |**************************************************************************
   * |  shared  | shared (++)   |     shared (+) |     shared(+++)|  invalid   |
   * |          |               |                |                |            |
   * |non-shared| shared (+)    |      private   |      slave (*) |  invalid   |
   * ***************************************************************************
b90fa9ae8   Ram Pai   [PATCH] shared mo...
1282
1283
1284
1285
1286
1287
1288
1289
1290
   * A bind operation clones the source mount and mounts the clone on the
   * destination mount.
   *
   * (++)  the cloned mount is propagated to all the mounts in the propagation
   * 	 tree of the destination mount and the cloned mount is added to
   * 	 the peer group of the source mount.
   * (+)   the cloned mount is created under the destination mount and is marked
   *       as shared. The cloned mount is added to the peer group of the source
   *       mount.
5afe00221   Ram Pai   [PATCH] handling ...
1291
1292
1293
1294
1295
1296
1297
   * (+++) the mount is propagated to all the mounts in the propagation tree
   *       of the destination mount and the cloned mount is made slave
   *       of the same master as that of the source mount. The cloned mount
   *       is marked as 'shared and slave'.
   * (*)   the cloned mount is made a slave of the same master as that of the
   * 	 source mount.
   *
9676f0c63   Ram Pai   [PATCH] unbindabl...
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
   * ---------------------------------------------------------------------------
   * |         		MOVE MOUNT OPERATION                                 |
   * |**************************************************************************
   * | source-->| shared        |       private  |       slave    | unbindable |
   * | dest     |               |                |                |            |
   * |   |      |               |                |                |            |
   * |   v      |               |                |                |            |
   * |**************************************************************************
   * |  shared  | shared (+)    |     shared (+) |    shared(+++) |  invalid   |
   * |          |               |                |                |            |
   * |non-shared| shared (+*)   |      private   |    slave (*)   | unbindable |
   * ***************************************************************************
5afe00221   Ram Pai   [PATCH] handling ...
1310
1311
1312
   *
   * (+)  the mount is moved to the destination. And is then propagated to
   * 	all the mounts in the propagation tree of the destination mount.
214444032   Ram Pai   [PATCH] shared mo...
1313
   * (+*)  the mount is moved to the destination.
5afe00221   Ram Pai   [PATCH] handling ...
1314
1315
1316
1317
   * (+++)  the mount is moved to the destination and is then propagated to
   * 	all the mounts belonging to the destination mount's propagation tree.
   * 	the mount is marked as 'shared and slave'.
   * (*)	the mount continues to be a slave at the new location.
b90fa9ae8   Ram Pai   [PATCH] shared mo...
1318
1319
1320
1321
1322
1323
1324
   *
   * if the source mount is a tree, the operations explained above is
   * applied to each mount in the tree.
   * Must be called without spinlocks held, since this function can sleep
   * in allocations.
   */
  static int attach_recursive_mnt(struct vfsmount *source_mnt,
1a3906895   Al Viro   [PATCH] reduce st...
1325
  			struct path *path, struct path *parent_path)
b90fa9ae8   Ram Pai   [PATCH] shared mo...
1326
1327
  {
  	LIST_HEAD(tree_list);
1a3906895   Al Viro   [PATCH] reduce st...
1328
1329
  	struct vfsmount *dest_mnt = path->mnt;
  	struct dentry *dest_dentry = path->dentry;
b90fa9ae8   Ram Pai   [PATCH] shared mo...
1330
  	struct vfsmount *child, *p;
719f5d7f0   Miklos Szeredi   [patch 4/7] vfs: ...
1331
  	int err;
b90fa9ae8   Ram Pai   [PATCH] shared mo...
1332

719f5d7f0   Miklos Szeredi   [patch 4/7] vfs: ...
1333
1334
1335
1336
1337
1338
1339
1340
  	if (IS_MNT_SHARED(dest_mnt)) {
  		err = invent_group_ids(source_mnt, true);
  		if (err)
  			goto out;
  	}
  	err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
  	if (err)
  		goto out_cleanup_ids;
b90fa9ae8   Ram Pai   [PATCH] shared mo...
1341
1342
1343
1344
1345
1346
1347
  
  	if (IS_MNT_SHARED(dest_mnt)) {
  		for (p = source_mnt; p; p = next_mnt(p, source_mnt))
  			set_mnt_shared(p);
  	}
  
  	spin_lock(&vfsmount_lock);
1a3906895   Al Viro   [PATCH] reduce st...
1348
1349
1350
  	if (parent_path) {
  		detach_mnt(source_mnt, parent_path);
  		attach_mnt(source_mnt, path);
e5d67f071   Al Viro   Touch all affecte...
1351
  		touch_mnt_namespace(parent_path->mnt->mnt_ns);
214444032   Ram Pai   [PATCH] shared mo...
1352
1353
1354
1355
  	} else {
  		mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
  		commit_tree(source_mnt);
  	}
b90fa9ae8   Ram Pai   [PATCH] shared mo...
1356
1357
1358
1359
1360
1361
1362
  
  	list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
  		list_del_init(&child->mnt_hash);
  		commit_tree(child);
  	}
  	spin_unlock(&vfsmount_lock);
  	return 0;
719f5d7f0   Miklos Szeredi   [patch 4/7] vfs: ...
1363
1364
1365
1366
1367
1368
  
   out_cleanup_ids:
  	if (IS_MNT_SHARED(dest_mnt))
  		cleanup_group_ids(source_mnt, NULL);
   out:
  	return err;
b90fa9ae8   Ram Pai   [PATCH] shared mo...
1369
  }
8c3ee42e8   Al Viro   [PATCH] get rid o...
1370
  static int graft_tree(struct vfsmount *mnt, struct path *path)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1371
1372
1373
1374
  {
  	int err;
  	if (mnt->mnt_sb->s_flags & MS_NOUSER)
  		return -EINVAL;
8c3ee42e8   Al Viro   [PATCH] get rid o...
1375
  	if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1376
1377
1378
1379
  	      S_ISDIR(mnt->mnt_root->d_inode->i_mode))
  		return -ENOTDIR;
  
  	err = -ENOENT;
8c3ee42e8   Al Viro   [PATCH] get rid o...
1380
1381
  	mutex_lock(&path->dentry->d_inode->i_mutex);
  	if (IS_DEADDIR(path->dentry->d_inode))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1382
  		goto out_unlock;
8c3ee42e8   Al Viro   [PATCH] get rid o...
1383
  	err = security_sb_check_sb(mnt, path);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1384
1385
1386
1387
  	if (err)
  		goto out_unlock;
  
  	err = -ENOENT;
8c3ee42e8   Al Viro   [PATCH] get rid o...
1388
1389
  	if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry))
  		err = attach_recursive_mnt(mnt, path, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1390
  out_unlock:
8c3ee42e8   Al Viro   [PATCH] get rid o...
1391
  	mutex_unlock(&path->dentry->d_inode->i_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1392
  	if (!err)
8c3ee42e8   Al Viro   [PATCH] get rid o...
1393
  		security_sb_post_addmount(mnt, path);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1394
1395
1396
1397
  	return err;
  }
  
  /*
07b20889e   Ram Pai   [PATCH] beginning...
1398
1399
   * recursively change the type of the mountpoint.
   */
0a0d8a467   Al Viro   [PATCH] no need f...
1400
  static int do_change_type(struct path *path, int flag)
07b20889e   Ram Pai   [PATCH] beginning...
1401
  {
2d92ab3c6   Al Viro   [PATCH] finally g...
1402
  	struct vfsmount *m, *mnt = path->mnt;
07b20889e   Ram Pai   [PATCH] beginning...
1403
1404
  	int recurse = flag & MS_REC;
  	int type = flag & ~MS_REC;
719f5d7f0   Miklos Szeredi   [patch 4/7] vfs: ...
1405
  	int err = 0;
07b20889e   Ram Pai   [PATCH] beginning...
1406

ee6f95829   Miklos Szeredi   check privileges ...
1407
1408
  	if (!capable(CAP_SYS_ADMIN))
  		return -EPERM;
2d92ab3c6   Al Viro   [PATCH] finally g...
1409
  	if (path->dentry != path->mnt->mnt_root)
07b20889e   Ram Pai   [PATCH] beginning...
1410
1411
1412
  		return -EINVAL;
  
  	down_write(&namespace_sem);
719f5d7f0   Miklos Szeredi   [patch 4/7] vfs: ...
1413
1414
1415
1416
1417
  	if (type == MS_SHARED) {
  		err = invent_group_ids(mnt, recurse);
  		if (err)
  			goto out_unlock;
  	}
07b20889e   Ram Pai   [PATCH] beginning...
1418
1419
1420
1421
  	spin_lock(&vfsmount_lock);
  	for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
  		change_mnt_propagation(m, type);
  	spin_unlock(&vfsmount_lock);
719f5d7f0   Miklos Szeredi   [patch 4/7] vfs: ...
1422
1423
  
   out_unlock:
07b20889e   Ram Pai   [PATCH] beginning...
1424
  	up_write(&namespace_sem);
719f5d7f0   Miklos Szeredi   [patch 4/7] vfs: ...
1425
  	return err;
07b20889e   Ram Pai   [PATCH] beginning...
1426
1427
1428
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1429
1430
   * do loopback mount.
   */
0a0d8a467   Al Viro   [PATCH] no need f...
1431
  static int do_loopback(struct path *path, char *old_name,
2dafe1c4d   Eric Sandeen   reduce large do_m...
1432
  				int recurse)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1433
  {
2d92ab3c6   Al Viro   [PATCH] finally g...
1434
  	struct path old_path;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1435
  	struct vfsmount *mnt = NULL;
2d92ab3c6   Al Viro   [PATCH] finally g...
1436
  	int err = mount_is_safe(path);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1437
1438
1439
1440
  	if (err)
  		return err;
  	if (!old_name || !*old_name)
  		return -EINVAL;
2d92ab3c6   Al Viro   [PATCH] finally g...
1441
  	err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1442
1443
  	if (err)
  		return err;
390c68436   Ram Pai   [PATCH] making na...
1444
  	down_write(&namespace_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1445
  	err = -EINVAL;
2d92ab3c6   Al Viro   [PATCH] finally g...
1446
  	if (IS_MNT_UNBINDABLE(old_path.mnt))
4ac913785   Jan Blunck   Embed a struct pa...
1447
  		goto out;
9676f0c63   Ram Pai   [PATCH] unbindabl...
1448

2d92ab3c6   Al Viro   [PATCH] finally g...
1449
  	if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
ccd48bc7f   Al Viro   [PATCH] cleanups ...
1450
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1451

ccd48bc7f   Al Viro   [PATCH] cleanups ...
1452
1453
  	err = -ENOMEM;
  	if (recurse)
2d92ab3c6   Al Viro   [PATCH] finally g...
1454
  		mnt = copy_tree(old_path.mnt, old_path.dentry, 0);
ccd48bc7f   Al Viro   [PATCH] cleanups ...
1455
  	else
2d92ab3c6   Al Viro   [PATCH] finally g...
1456
  		mnt = clone_mnt(old_path.mnt, old_path.dentry, 0);
ccd48bc7f   Al Viro   [PATCH] cleanups ...
1457
1458
1459
  
  	if (!mnt)
  		goto out;
2d92ab3c6   Al Viro   [PATCH] finally g...
1460
  	err = graft_tree(mnt, path);
ccd48bc7f   Al Viro   [PATCH] cleanups ...
1461
  	if (err) {
70fbcdf4d   Ram Pai   [PATCH] umount_tr...
1462
  		LIST_HEAD(umount_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1463
  		spin_lock(&vfsmount_lock);
a05964f39   Ram Pai   [PATCH] shared mo...
1464
  		umount_tree(mnt, 0, &umount_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1465
  		spin_unlock(&vfsmount_lock);
70fbcdf4d   Ram Pai   [PATCH] umount_tr...
1466
  		release_mounts(&umount_list);
5b83d2c5c   Ram Pai   [PATCH] sanitize ...
1467
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1468

ccd48bc7f   Al Viro   [PATCH] cleanups ...
1469
  out:
390c68436   Ram Pai   [PATCH] making na...
1470
  	up_write(&namespace_sem);
2d92ab3c6   Al Viro   [PATCH] finally g...
1471
  	path_put(&old_path);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1472
1473
  	return err;
  }
2e4b7fcd9   Dave Hansen   [PATCH] r/o bind ...
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
  static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
  {
  	int error = 0;
  	int readonly_request = 0;
  
  	if (ms_flags & MS_RDONLY)
  		readonly_request = 1;
  	if (readonly_request == __mnt_is_readonly(mnt))
  		return 0;
  
  	if (readonly_request)
  		error = mnt_make_readonly(mnt);
  	else
  		__mnt_unmake_readonly(mnt);
  	return error;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1490
1491
1492
1493
1494
  /*
   * change filesystem flags. dir should be a physical root of filesystem.
   * If you've mounted a non-root directory somewhere and want to do remount
   * on it - tough luck.
   */
0a0d8a467   Al Viro   [PATCH] no need f...
1495
  static int do_remount(struct path *path, int flags, int mnt_flags,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1496
1497
1498
  		      void *data)
  {
  	int err;
2d92ab3c6   Al Viro   [PATCH] finally g...
1499
  	struct super_block *sb = path->mnt->mnt_sb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1500
1501
1502
  
  	if (!capable(CAP_SYS_ADMIN))
  		return -EPERM;
2d92ab3c6   Al Viro   [PATCH] finally g...
1503
  	if (!check_mnt(path->mnt))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1504
  		return -EINVAL;
2d92ab3c6   Al Viro   [PATCH] finally g...
1505
  	if (path->dentry != path->mnt->mnt_root)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1506
1507
1508
  		return -EINVAL;
  
  	down_write(&sb->s_umount);
2e4b7fcd9   Dave Hansen   [PATCH] r/o bind ...
1509
  	if (flags & MS_BIND)
2d92ab3c6   Al Viro   [PATCH] finally g...
1510
  		err = change_mount_flags(path->mnt, flags);
2e4b7fcd9   Dave Hansen   [PATCH] r/o bind ...
1511
1512
  	else
  		err = do_remount_sb(sb, flags, data, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1513
  	if (!err)
2d92ab3c6   Al Viro   [PATCH] finally g...
1514
  		path->mnt->mnt_flags = mnt_flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1515
  	up_write(&sb->s_umount);
0e55a7cca   Dan Williams   [RFC PATCH] touch...
1516
  	if (!err) {
2d92ab3c6   Al Viro   [PATCH] finally g...
1517
  		security_sb_post_remount(path->mnt, flags, data);
0e55a7cca   Dan Williams   [RFC PATCH] touch...
1518
1519
1520
1521
1522
  
  		spin_lock(&vfsmount_lock);
  		touch_mnt_namespace(path->mnt->mnt_ns);
  		spin_unlock(&vfsmount_lock);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1523
1524
  	return err;
  }
9676f0c63   Ram Pai   [PATCH] unbindabl...
1525
1526
1527
1528
1529
1530
1531
1532
1533
  static inline int tree_contains_unbindable(struct vfsmount *mnt)
  {
  	struct vfsmount *p;
  	for (p = mnt; p; p = next_mnt(p, mnt)) {
  		if (IS_MNT_UNBINDABLE(p))
  			return 1;
  	}
  	return 0;
  }
0a0d8a467   Al Viro   [PATCH] no need f...
1534
  static int do_move_mount(struct path *path, char *old_name)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1535
  {
2d92ab3c6   Al Viro   [PATCH] finally g...
1536
  	struct path old_path, parent_path;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1537
1538
1539
1540
1541
1542
  	struct vfsmount *p;
  	int err = 0;
  	if (!capable(CAP_SYS_ADMIN))
  		return -EPERM;
  	if (!old_name || !*old_name)
  		return -EINVAL;
2d92ab3c6   Al Viro   [PATCH] finally g...
1543
  	err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1544
1545
  	if (err)
  		return err;
390c68436   Ram Pai   [PATCH] making na...
1546
  	down_write(&namespace_sem);
2d92ab3c6   Al Viro   [PATCH] finally g...
1547
1548
  	while (d_mountpoint(path->dentry) &&
  	       follow_down(&path->mnt, &path->dentry))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1549
1550
  		;
  	err = -EINVAL;
2d92ab3c6   Al Viro   [PATCH] finally g...
1551
  	if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1552
1553
1554
  		goto out;
  
  	err = -ENOENT;
2d92ab3c6   Al Viro   [PATCH] finally g...
1555
1556
  	mutex_lock(&path->dentry->d_inode->i_mutex);
  	if (IS_DEADDIR(path->dentry->d_inode))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1557
  		goto out1;
2d92ab3c6   Al Viro   [PATCH] finally g...
1558
  	if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry))
214444032   Ram Pai   [PATCH] shared mo...
1559
  		goto out1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1560
1561
  
  	err = -EINVAL;
2d92ab3c6   Al Viro   [PATCH] finally g...
1562
  	if (old_path.dentry != old_path.mnt->mnt_root)
214444032   Ram Pai   [PATCH] shared mo...
1563
  		goto out1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1564

2d92ab3c6   Al Viro   [PATCH] finally g...
1565
  	if (old_path.mnt == old_path.mnt->mnt_parent)
214444032   Ram Pai   [PATCH] shared mo...
1566
  		goto out1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1567

2d92ab3c6   Al Viro   [PATCH] finally g...
1568
1569
  	if (S_ISDIR(path->dentry->d_inode->i_mode) !=
  	      S_ISDIR(old_path.dentry->d_inode->i_mode))
214444032   Ram Pai   [PATCH] shared mo...
1570
1571
1572
1573
  		goto out1;
  	/*
  	 * Don't move a mount residing in a shared parent.
  	 */
2d92ab3c6   Al Viro   [PATCH] finally g...
1574
1575
  	if (old_path.mnt->mnt_parent &&
  	    IS_MNT_SHARED(old_path.mnt->mnt_parent))
214444032   Ram Pai   [PATCH] shared mo...
1576
  		goto out1;
9676f0c63   Ram Pai   [PATCH] unbindabl...
1577
1578
1579
1580
  	/*
  	 * Don't move a mount tree containing unbindable mounts to a destination
  	 * mount which is shared.
  	 */
2d92ab3c6   Al Viro   [PATCH] finally g...
1581
1582
  	if (IS_MNT_SHARED(path->mnt) &&
  	    tree_contains_unbindable(old_path.mnt))
9676f0c63   Ram Pai   [PATCH] unbindabl...
1583
  		goto out1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1584
  	err = -ELOOP;
2d92ab3c6   Al Viro   [PATCH] finally g...
1585
1586
  	for (p = path->mnt; p->mnt_parent != p; p = p->mnt_parent)
  		if (p == old_path.mnt)
214444032   Ram Pai   [PATCH] shared mo...
1587
  			goto out1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1588

2d92ab3c6   Al Viro   [PATCH] finally g...
1589
  	err = attach_recursive_mnt(old_path.mnt, path, &parent_path);
4ac913785   Jan Blunck   Embed a struct pa...
1590
  	if (err)
214444032   Ram Pai   [PATCH] shared mo...
1591
  		goto out1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1592
1593
1594
  
  	/* if the mount is moved, it should no longer be expire
  	 * automatically */
2d92ab3c6   Al Viro   [PATCH] finally g...
1595
  	list_del_init(&old_path.mnt->mnt_expire);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1596
  out1:
2d92ab3c6   Al Viro   [PATCH] finally g...
1597
  	mutex_unlock(&path->dentry->d_inode->i_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1598
  out:
390c68436   Ram Pai   [PATCH] making na...
1599
  	up_write(&namespace_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1600
  	if (!err)
1a3906895   Al Viro   [PATCH] reduce st...
1601
  		path_put(&parent_path);
2d92ab3c6   Al Viro   [PATCH] finally g...
1602
  	path_put(&old_path);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1603
1604
1605
1606
1607
1608
1609
  	return err;
  }
  
  /*
   * create a new mount for userspace and request it to be added into the
   * namespace's tree
   */
0a0d8a467   Al Viro   [PATCH] no need f...
1610
  static int do_new_mount(struct path *path, char *type, int flags,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
  			int mnt_flags, char *name, void *data)
  {
  	struct vfsmount *mnt;
  
  	if (!type || !memchr(type, 0, PAGE_SIZE))
  		return -EINVAL;
  
  	/* we need capabilities... */
  	if (!capable(CAP_SYS_ADMIN))
  		return -EPERM;
  
  	mnt = do_kern_mount(type, flags, name, data);
  	if (IS_ERR(mnt))
  		return PTR_ERR(mnt);
2d92ab3c6   Al Viro   [PATCH] finally g...
1625
  	return do_add_mount(mnt, path, mnt_flags, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1626
1627
1628
1629
1630
1631
  }
  
  /*
   * add a mount into a namespace's mount tree
   * - provide the option of adding the new mount to an expiration list
   */
8d66bf548   Al Viro   [PATCH] pass stru...
1632
  int do_add_mount(struct vfsmount *newmnt, struct path *path,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1633
1634
1635
  		 int mnt_flags, struct list_head *fslist)
  {
  	int err;
390c68436   Ram Pai   [PATCH] making na...
1636
  	down_write(&namespace_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1637
  	/* Something was mounted here while we slept */
8d66bf548   Al Viro   [PATCH] pass stru...
1638
1639
  	while (d_mountpoint(path->dentry) &&
  	       follow_down(&path->mnt, &path->dentry))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1640
1641
  		;
  	err = -EINVAL;
8d66bf548   Al Viro   [PATCH] pass stru...
1642
  	if (!check_mnt(path->mnt))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1643
1644
1645
1646
  		goto unlock;
  
  	/* Refuse the same filesystem on the same mount point */
  	err = -EBUSY;
8d66bf548   Al Viro   [PATCH] pass stru...
1647
1648
  	if (path->mnt->mnt_sb == newmnt->mnt_sb &&
  	    path->mnt->mnt_root == path->dentry)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1649
1650
1651
1652
1653
1654
1655
  		goto unlock;
  
  	err = -EINVAL;
  	if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
  		goto unlock;
  
  	newmnt->mnt_flags = mnt_flags;
8d66bf548   Al Viro   [PATCH] pass stru...
1656
  	if ((err = graft_tree(newmnt, path)))
5b83d2c5c   Ram Pai   [PATCH] sanitize ...
1657
  		goto unlock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1658

6758f953d   Al Viro   [PATCH] mnt_expir...
1659
  	if (fslist) /* add to the specified expiration list */
55e700b92   Miklos Szeredi   [PATCH] namespace...
1660
  		list_add_tail(&newmnt->mnt_expire, fslist);
6758f953d   Al Viro   [PATCH] mnt_expir...
1661

390c68436   Ram Pai   [PATCH] making na...
1662
  	up_write(&namespace_sem);
5b83d2c5c   Ram Pai   [PATCH] sanitize ...
1663
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1664
1665
  
  unlock:
390c68436   Ram Pai   [PATCH] making na...
1666
  	up_write(&namespace_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1667
1668
1669
1670
1671
  	mntput(newmnt);
  	return err;
  }
  
  EXPORT_SYMBOL_GPL(do_add_mount);
5528f911b   Trond Myklebust   VFS: Add shrink_s...
1672
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1673
1674
1675
1676
1677
1678
   * process a list of expirable mountpoints with the intent of discarding any
   * mountpoints that aren't in use and haven't been touched since last we came
   * here
   */
  void mark_mounts_for_expiry(struct list_head *mounts)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1679
1680
  	struct vfsmount *mnt, *next;
  	LIST_HEAD(graveyard);
bcc5c7d2b   Al Viro   [PATCH] sanitize ...
1681
  	LIST_HEAD(umounts);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1682
1683
1684
  
  	if (list_empty(mounts))
  		return;
bcc5c7d2b   Al Viro   [PATCH] sanitize ...
1685
  	down_write(&namespace_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1686
1687
1688
1689
1690
1691
1692
1693
  	spin_lock(&vfsmount_lock);
  
  	/* extract from the expiration list every vfsmount that matches the
  	 * following criteria:
  	 * - only referenced by its parent vfsmount
  	 * - still marked for expiry (marked on the last call here; marks are
  	 *   cleared by mntput())
  	 */
55e700b92   Miklos Szeredi   [PATCH] namespace...
1694
  	list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1695
  		if (!xchg(&mnt->mnt_expiry_mark, 1) ||
bcc5c7d2b   Al Viro   [PATCH] sanitize ...
1696
  			propagate_mount_busy(mnt, 1))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1697
  			continue;
55e700b92   Miklos Szeredi   [PATCH] namespace...
1698
  		list_move(&mnt->mnt_expire, &graveyard);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1699
  	}
bcc5c7d2b   Al Viro   [PATCH] sanitize ...
1700
1701
1702
1703
1704
  	while (!list_empty(&graveyard)) {
  		mnt = list_first_entry(&graveyard, struct vfsmount, mnt_expire);
  		touch_mnt_namespace(mnt->mnt_ns);
  		umount_tree(mnt, 1, &umounts);
  	}
5528f911b   Trond Myklebust   VFS: Add shrink_s...
1705
  	spin_unlock(&vfsmount_lock);
bcc5c7d2b   Al Viro   [PATCH] sanitize ...
1706
1707
1708
  	up_write(&namespace_sem);
  
  	release_mounts(&umounts);
5528f911b   Trond Myklebust   VFS: Add shrink_s...
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
  }
  
  EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
  
  /*
   * Ripoff of 'select_parent()'
   *
   * search the list of submounts for a given mountpoint, and move any
   * shrinkable submounts to the 'graveyard' list.
   */
  static int select_submounts(struct vfsmount *parent, struct list_head *graveyard)
  {
  	struct vfsmount *this_parent = parent;
  	struct list_head *next;
  	int found = 0;
  
  repeat:
  	next = this_parent->mnt_mounts.next;
  resume:
  	while (next != &this_parent->mnt_mounts) {
  		struct list_head *tmp = next;
  		struct vfsmount *mnt = list_entry(tmp, struct vfsmount, mnt_child);
  
  		next = tmp->next;
  		if (!(mnt->mnt_flags & MNT_SHRINKABLE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1734
  			continue;
5528f911b   Trond Myklebust   VFS: Add shrink_s...
1735
1736
1737
1738
1739
1740
1741
  		/*
  		 * Descend a level if the d_mounts list is non-empty.
  		 */
  		if (!list_empty(&mnt->mnt_mounts)) {
  			this_parent = mnt;
  			goto repeat;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1742

5528f911b   Trond Myklebust   VFS: Add shrink_s...
1743
  		if (!propagate_mount_busy(mnt, 1)) {
5528f911b   Trond Myklebust   VFS: Add shrink_s...
1744
1745
1746
  			list_move_tail(&mnt->mnt_expire, graveyard);
  			found++;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1747
  	}
5528f911b   Trond Myklebust   VFS: Add shrink_s...
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
  	/*
  	 * All done at this level ... ascend and resume the search
  	 */
  	if (this_parent != parent) {
  		next = this_parent->mnt_child.next;
  		this_parent = this_parent->mnt_parent;
  		goto resume;
  	}
  	return found;
  }
  
  /*
   * process a list of expirable mountpoints with the intent of discarding any
   * submounts of a specific parent mountpoint
   */
c35038bec   Al Viro   [PATCH] do shrink...
1763
  static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts)
5528f911b   Trond Myklebust   VFS: Add shrink_s...
1764
1765
  {
  	LIST_HEAD(graveyard);
c35038bec   Al Viro   [PATCH] do shrink...
1766
  	struct vfsmount *m;
5528f911b   Trond Myklebust   VFS: Add shrink_s...
1767

5528f911b   Trond Myklebust   VFS: Add shrink_s...
1768
  	/* extract submounts of 'mountpoint' from the expiration list */
c35038bec   Al Viro   [PATCH] do shrink...
1769
  	while (select_submounts(mnt, &graveyard)) {
bcc5c7d2b   Al Viro   [PATCH] sanitize ...
1770
  		while (!list_empty(&graveyard)) {
c35038bec   Al Viro   [PATCH] do shrink...
1771
  			m = list_first_entry(&graveyard, struct vfsmount,
bcc5c7d2b   Al Viro   [PATCH] sanitize ...
1772
  						mnt_expire);
afef80b3d   Eric W. Biederman   vfs: fix shrink_s...
1773
1774
  			touch_mnt_namespace(m->mnt_ns);
  			umount_tree(m, 1, umounts);
bcc5c7d2b   Al Viro   [PATCH] sanitize ...
1775
1776
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1777
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1778
1779
1780
1781
1782
1783
  /*
   * Some copy_from_user() implementations do not return the exact number of
   * bytes remaining to copy on a fault.  But copy_mount_options() requires that.
   * Note that this function differs from copy_from_user() in that it will oops
   * on bad values of `to', rather than returning a short copy.
   */
b58fed8b1   Ram Pai   [PATCH] lindent f...
1784
1785
  static long exact_copy_from_user(void *to, const void __user * from,
  				 unsigned long n)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
  {
  	char *t = to;
  	const char __user *f = from;
  	char c;
  
  	if (!access_ok(VERIFY_READ, from, n))
  		return n;
  
  	while (n) {
  		if (__get_user(c, f)) {
  			memset(t, 0, n);
  			break;
  		}
  		*t++ = c;
  		f++;
  		n--;
  	}
  	return n;
  }
b58fed8b1   Ram Pai   [PATCH] lindent f...
1805
  int copy_mount_options(const void __user * data, unsigned long *where)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1806
1807
1808
1809
  {
  	int i;
  	unsigned long page;
  	unsigned long size;
b58fed8b1   Ram Pai   [PATCH] lindent f...
1810

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
  	*where = 0;
  	if (!data)
  		return 0;
  
  	if (!(page = __get_free_page(GFP_KERNEL)))
  		return -ENOMEM;
  
  	/* We only care that *some* data at the address the user
  	 * gave us is valid.  Just in case, we'll zero
  	 * the remainder of the page.
  	 */
  	/* copy_from_user cannot cross TASK_SIZE ! */
  	size = TASK_SIZE - (unsigned long)data;
  	if (size > PAGE_SIZE)
  		size = PAGE_SIZE;
  
  	i = size - exact_copy_from_user((void *)page, data, size);
  	if (!i) {
b58fed8b1   Ram Pai   [PATCH] lindent f...
1829
  		free_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
  		return -EFAULT;
  	}
  	if (i != PAGE_SIZE)
  		memset((char *)page + i, 0, PAGE_SIZE - i);
  	*where = page;
  	return 0;
  }
  
  /*
   * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
   * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
   *
   * data is a (void *) that can point to any structure up to
   * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
   * information (or be NULL).
   *
   * Pre-0.97 versions of mount() didn't have a flags word.
   * When the flags word was introduced its top half was required
   * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9.
   * Therefore, if this magic number is present, it carries no information
   * and must be discarded.
   */
b58fed8b1   Ram Pai   [PATCH] lindent f...
1852
  long do_mount(char *dev_name, char *dir_name, char *type_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1853
1854
  		  unsigned long flags, void *data_page)
  {
2d92ab3c6   Al Viro   [PATCH] finally g...
1855
  	struct path path;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
  	int retval = 0;
  	int mnt_flags = 0;
  
  	/* Discard magic */
  	if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
  		flags &= ~MS_MGC_MSK;
  
  	/* Basic sanity checks */
  
  	if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
  		return -EINVAL;
  	if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
  		return -EINVAL;
  
  	if (data_page)
  		((char *)data_page)[PAGE_SIZE - 1] = 0;
613cbe3d4   Andi Kleen   Don't set relatim...
1872
1873
1874
  	/* Default to relatime unless overriden */
  	if (!(flags & MS_NOATIME))
  		mnt_flags |= MNT_RELATIME;
0a1c01c94   Matthew Garrett   Make relatime def...
1875

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1876
1877
1878
1879
1880
1881
1882
  	/* Separate the per-mountpoint flags */
  	if (flags & MS_NOSUID)
  		mnt_flags |= MNT_NOSUID;
  	if (flags & MS_NODEV)
  		mnt_flags |= MNT_NODEV;
  	if (flags & MS_NOEXEC)
  		mnt_flags |= MNT_NOEXEC;
fc33a7bb9   Christoph Hellwig   [PATCH] per-mount...
1883
1884
1885
1886
  	if (flags & MS_NOATIME)
  		mnt_flags |= MNT_NOATIME;
  	if (flags & MS_NODIRATIME)
  		mnt_flags |= MNT_NODIRATIME;
d0adde574   Matthew Garrett   Add a strictatime...
1887
1888
  	if (flags & MS_STRICTATIME)
  		mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
2e4b7fcd9   Dave Hansen   [PATCH] r/o bind ...
1889
1890
  	if (flags & MS_RDONLY)
  		mnt_flags |= MNT_READONLY;
fc33a7bb9   Christoph Hellwig   [PATCH] per-mount...
1891
1892
  
  	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
d0adde574   Matthew Garrett   Add a strictatime...
1893
1894
  		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
  		   MS_STRICTATIME);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1895
1896
  
  	/* ... and get the mountpoint */
2d92ab3c6   Al Viro   [PATCH] finally g...
1897
  	retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1898
1899
  	if (retval)
  		return retval;
2d92ab3c6   Al Viro   [PATCH] finally g...
1900
  	retval = security_sb_mount(dev_name, &path,
b5266eb4c   Al Viro   [PATCH] switch a ...
1901
  				   type_page, flags, data_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1902
1903
1904
1905
  	if (retval)
  		goto dput_out;
  
  	if (flags & MS_REMOUNT)
2d92ab3c6   Al Viro   [PATCH] finally g...
1906
  		retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1907
1908
  				    data_page);
  	else if (flags & MS_BIND)
2d92ab3c6   Al Viro   [PATCH] finally g...
1909
  		retval = do_loopback(&path, dev_name, flags & MS_REC);
9676f0c63   Ram Pai   [PATCH] unbindabl...
1910
  	else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2d92ab3c6   Al Viro   [PATCH] finally g...
1911
  		retval = do_change_type(&path, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1912
  	else if (flags & MS_MOVE)
2d92ab3c6   Al Viro   [PATCH] finally g...
1913
  		retval = do_move_mount(&path, dev_name);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1914
  	else
2d92ab3c6   Al Viro   [PATCH] finally g...
1915
  		retval = do_new_mount(&path, type_page, flags, mnt_flags,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1916
1917
  				      dev_name, data_page);
  dput_out:
2d92ab3c6   Al Viro   [PATCH] finally g...
1918
  	path_put(&path);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1919
1920
  	return retval;
  }
741a29513   JANAK DESAI   [PATCH] unshare s...
1921
1922
1923
1924
  /*
   * Allocate a new namespace structure and populate it with contents
   * copied from the namespace of the passed in task structure.
   */
e3222c4ec   Badari Pulavarty   Merge sys_clone()...
1925
  static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
1926
  		struct fs_struct *fs)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1927
  {
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
1928
  	struct mnt_namespace *new_ns;
7f2da1e7d   Al Viro   [PATCH] kill altroot
1929
  	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1930
  	struct vfsmount *p, *q;
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
1931
  	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1932
  	if (!new_ns)
467e9f4b5   Cedric Le Goater   fix create_new_na...
1933
  		return ERR_PTR(-ENOMEM);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1934
1935
  
  	atomic_set(&new_ns->count, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1936
  	INIT_LIST_HEAD(&new_ns->list);
5addc5dd8   Al Viro   [PATCH] make /pro...
1937
1938
  	init_waitqueue_head(&new_ns->poll);
  	new_ns->event = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1939

390c68436   Ram Pai   [PATCH] making na...
1940
  	down_write(&namespace_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1941
  	/* First pass: copy the tree topology */
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
1942
  	new_ns->root = copy_tree(mnt_ns->root, mnt_ns->root->mnt_root,
9676f0c63   Ram Pai   [PATCH] unbindabl...
1943
  					CL_COPY_ALL | CL_EXPIRE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1944
  	if (!new_ns->root) {
390c68436   Ram Pai   [PATCH] making na...
1945
  		up_write(&namespace_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1946
  		kfree(new_ns);
5cc4a0341   Julia Lawall   fs/namespace.c: d...
1947
  		return ERR_PTR(-ENOMEM);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
  	}
  	spin_lock(&vfsmount_lock);
  	list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
  	spin_unlock(&vfsmount_lock);
  
  	/*
  	 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts
  	 * as belonging to new namespace.  We have already acquired a private
  	 * fs_struct, so tsk->fs->lock is not needed.
  	 */
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
1958
  	p = mnt_ns->root;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1959
1960
  	q = new_ns->root;
  	while (p) {
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
1961
  		q->mnt_ns = new_ns;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1962
  		if (fs) {
6ac08c39a   Jan Blunck   Use struct path i...
1963
  			if (p == fs->root.mnt) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1964
  				rootmnt = p;
6ac08c39a   Jan Blunck   Use struct path i...
1965
  				fs->root.mnt = mntget(q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1966
  			}
6ac08c39a   Jan Blunck   Use struct path i...
1967
  			if (p == fs->pwd.mnt) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1968
  				pwdmnt = p;
6ac08c39a   Jan Blunck   Use struct path i...
1969
  				fs->pwd.mnt = mntget(q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1970
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1971
  		}
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
1972
  		p = next_mnt(p, mnt_ns->root);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1973
1974
  		q = next_mnt(q, new_ns->root);
  	}
390c68436   Ram Pai   [PATCH] making na...
1975
  	up_write(&namespace_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1976

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1977
1978
1979
1980
  	if (rootmnt)
  		mntput(rootmnt);
  	if (pwdmnt)
  		mntput(pwdmnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1981

741a29513   JANAK DESAI   [PATCH] unshare s...
1982
1983
  	return new_ns;
  }
213dd266d   Eric W. Biederman   namespace: ensure...
1984
  struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
e3222c4ec   Badari Pulavarty   Merge sys_clone()...
1985
  		struct fs_struct *new_fs)
741a29513   JANAK DESAI   [PATCH] unshare s...
1986
  {
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
1987
  	struct mnt_namespace *new_ns;
741a29513   JANAK DESAI   [PATCH] unshare s...
1988

e3222c4ec   Badari Pulavarty   Merge sys_clone()...
1989
  	BUG_ON(!ns);
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
1990
  	get_mnt_ns(ns);
741a29513   JANAK DESAI   [PATCH] unshare s...
1991
1992
  
  	if (!(flags & CLONE_NEWNS))
e3222c4ec   Badari Pulavarty   Merge sys_clone()...
1993
  		return ns;
741a29513   JANAK DESAI   [PATCH] unshare s...
1994

e3222c4ec   Badari Pulavarty   Merge sys_clone()...
1995
  	new_ns = dup_mnt_ns(ns, new_fs);
741a29513   JANAK DESAI   [PATCH] unshare s...
1996

6b3286ed1   Kirill Korotaev   [PATCH] rename st...
1997
  	put_mnt_ns(ns);
e3222c4ec   Badari Pulavarty   Merge sys_clone()...
1998
  	return new_ns;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1999
  }
bdc480e3b   Heiko Carstens   [CVE-2009-0029] S...
2000
2001
  SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
  		char __user *, type, unsigned long, flags, void __user *, data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2002
2003
2004
2005
2006
2007
  {
  	int retval;
  	unsigned long data_page;
  	unsigned long type_page;
  	unsigned long dev_page;
  	char *dir_page;
b58fed8b1   Ram Pai   [PATCH] lindent f...
2008
  	retval = copy_mount_options(type, &type_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2009
2010
2011
2012
2013
2014
2015
  	if (retval < 0)
  		return retval;
  
  	dir_page = getname(dir_name);
  	retval = PTR_ERR(dir_page);
  	if (IS_ERR(dir_page))
  		goto out1;
b58fed8b1   Ram Pai   [PATCH] lindent f...
2016
  	retval = copy_mount_options(dev_name, &dev_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2017
2018
  	if (retval < 0)
  		goto out2;
b58fed8b1   Ram Pai   [PATCH] lindent f...
2019
  	retval = copy_mount_options(data, &data_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2020
2021
2022
2023
  	if (retval < 0)
  		goto out3;
  
  	lock_kernel();
b58fed8b1   Ram Pai   [PATCH] lindent f...
2024
2025
  	retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
  			  flags, (void *)data_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
  	unlock_kernel();
  	free_page(data_page);
  
  out3:
  	free_page(dev_page);
  out2:
  	putname(dir_page);
  out1:
  	free_page(type_page);
  	return retval;
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
   * pivot_root Semantics:
   * Moves the root file system of the current process to the directory put_old,
   * makes new_root as the new root file system of the current process, and sets
   * root/cwd of all processes which had them on the current root to new_root.
   *
   * Restrictions:
   * The new_root and put_old must be directories, and  must not be on the
   * same file  system as the current process root. The put_old  must  be
   * underneath new_root,  i.e. adding a non-zero number of /.. to the string
   * pointed to by put_old must yield the same directory as new_root. No other
   * file system may be mounted on put_old. After all, new_root is a mountpoint.
   *
4a0d11fae   Neil Brown   [PATCH] pivot_roo...
2051
2052
2053
2054
   * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
   * See Documentation/filesystems/ramfs-rootfs-initramfs.txt for alternatives
   * in this situation.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2055
2056
2057
2058
2059
2060
2061
2062
   * Notes:
   *  - we don't move root/cwd if they are not at the root (reason: if something
   *    cared enough to change them, it's probably wrong to force them elsewhere)
   *  - it's okay to pick a root that isn't the root of a file system, e.g.
   *    /nfs/my_root where /nfs is the mount point. It must be a mountpoint,
   *    though, so you may need to say mount --bind /nfs/my_root /nfs/my_root
   *    first.
   */
3480b2574   Heiko Carstens   [CVE-2009-0029] S...
2063
2064
  SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
  		const char __user *, put_old)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2065
2066
  {
  	struct vfsmount *tmp;
2d8f30380   Al Viro   [PATCH] sanitize ...
2067
  	struct path new, old, parent_path, root_parent, root;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2068
2069
2070
2071
  	int error;
  
  	if (!capable(CAP_SYS_ADMIN))
  		return -EPERM;
2d8f30380   Al Viro   [PATCH] sanitize ...
2072
  	error = user_path_dir(new_root, &new);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2073
2074
2075
  	if (error)
  		goto out0;
  	error = -EINVAL;
2d8f30380   Al Viro   [PATCH] sanitize ...
2076
  	if (!check_mnt(new.mnt))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2077
  		goto out1;
2d8f30380   Al Viro   [PATCH] sanitize ...
2078
  	error = user_path_dir(put_old, &old);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2079
2080
  	if (error)
  		goto out1;
2d8f30380   Al Viro   [PATCH] sanitize ...
2081
  	error = security_sb_pivotroot(&old, &new);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2082
  	if (error) {
2d8f30380   Al Viro   [PATCH] sanitize ...
2083
  		path_put(&old);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2084
2085
2086
2087
  		goto out1;
  	}
  
  	read_lock(&current->fs->lock);
8c3ee42e8   Al Viro   [PATCH] get rid o...
2088
  	root = current->fs->root;
6ac08c39a   Jan Blunck   Use struct path i...
2089
  	path_get(&current->fs->root);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2090
  	read_unlock(&current->fs->lock);
390c68436   Ram Pai   [PATCH] making na...
2091
  	down_write(&namespace_sem);
2d8f30380   Al Viro   [PATCH] sanitize ...
2092
  	mutex_lock(&old.dentry->d_inode->i_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2093
  	error = -EINVAL;
2d8f30380   Al Viro   [PATCH] sanitize ...
2094
2095
  	if (IS_MNT_SHARED(old.mnt) ||
  		IS_MNT_SHARED(new.mnt->mnt_parent) ||
8c3ee42e8   Al Viro   [PATCH] get rid o...
2096
  		IS_MNT_SHARED(root.mnt->mnt_parent))
214444032   Ram Pai   [PATCH] shared mo...
2097
  		goto out2;
8c3ee42e8   Al Viro   [PATCH] get rid o...
2098
  	if (!check_mnt(root.mnt))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2099
2100
  		goto out2;
  	error = -ENOENT;
2d8f30380   Al Viro   [PATCH] sanitize ...
2101
  	if (IS_DEADDIR(new.dentry->d_inode))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2102
  		goto out2;
2d8f30380   Al Viro   [PATCH] sanitize ...
2103
  	if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2104
  		goto out2;
2d8f30380   Al Viro   [PATCH] sanitize ...
2105
  	if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2106
2107
  		goto out2;
  	error = -EBUSY;
2d8f30380   Al Viro   [PATCH] sanitize ...
2108
2109
  	if (new.mnt == root.mnt ||
  	    old.mnt == root.mnt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2110
2111
  		goto out2; /* loop, on the same file system  */
  	error = -EINVAL;
8c3ee42e8   Al Viro   [PATCH] get rid o...
2112
  	if (root.mnt->mnt_root != root.dentry)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2113
  		goto out2; /* not a mountpoint */
8c3ee42e8   Al Viro   [PATCH] get rid o...
2114
  	if (root.mnt->mnt_parent == root.mnt)
0bb6fcc13   Miklos Szeredi   [PATCH] pivot_roo...
2115
  		goto out2; /* not attached */
2d8f30380   Al Viro   [PATCH] sanitize ...
2116
  	if (new.mnt->mnt_root != new.dentry)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2117
  		goto out2; /* not a mountpoint */
2d8f30380   Al Viro   [PATCH] sanitize ...
2118
  	if (new.mnt->mnt_parent == new.mnt)
0bb6fcc13   Miklos Szeredi   [PATCH] pivot_roo...
2119
  		goto out2; /* not attached */
4ac913785   Jan Blunck   Embed a struct pa...
2120
  	/* make sure we can reach put_old from new_root */
2d8f30380   Al Viro   [PATCH] sanitize ...
2121
  	tmp = old.mnt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2122
  	spin_lock(&vfsmount_lock);
2d8f30380   Al Viro   [PATCH] sanitize ...
2123
  	if (tmp != new.mnt) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2124
2125
2126
  		for (;;) {
  			if (tmp->mnt_parent == tmp)
  				goto out3; /* already mounted on put_old */
2d8f30380   Al Viro   [PATCH] sanitize ...
2127
  			if (tmp->mnt_parent == new.mnt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2128
2129
2130
  				break;
  			tmp = tmp->mnt_parent;
  		}
2d8f30380   Al Viro   [PATCH] sanitize ...
2131
  		if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2132
  			goto out3;
2d8f30380   Al Viro   [PATCH] sanitize ...
2133
  	} else if (!is_subdir(old.dentry, new.dentry))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2134
  		goto out3;
2d8f30380   Al Viro   [PATCH] sanitize ...
2135
  	detach_mnt(new.mnt, &parent_path);
8c3ee42e8   Al Viro   [PATCH] get rid o...
2136
  	detach_mnt(root.mnt, &root_parent);
4ac913785   Jan Blunck   Embed a struct pa...
2137
  	/* mount old root on put_old */
2d8f30380   Al Viro   [PATCH] sanitize ...
2138
  	attach_mnt(root.mnt, &old);
4ac913785   Jan Blunck   Embed a struct pa...
2139
  	/* mount new_root on / */
2d8f30380   Al Viro   [PATCH] sanitize ...
2140
  	attach_mnt(new.mnt, &root_parent);
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
2141
  	touch_mnt_namespace(current->nsproxy->mnt_ns);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2142
  	spin_unlock(&vfsmount_lock);
2d8f30380   Al Viro   [PATCH] sanitize ...
2143
2144
  	chroot_fs_refs(&root, &new);
  	security_sb_post_pivotroot(&root, &new);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2145
  	error = 0;
1a3906895   Al Viro   [PATCH] reduce st...
2146
2147
  	path_put(&root_parent);
  	path_put(&parent_path);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2148
  out2:
2d8f30380   Al Viro   [PATCH] sanitize ...
2149
  	mutex_unlock(&old.dentry->d_inode->i_mutex);
390c68436   Ram Pai   [PATCH] making na...
2150
  	up_write(&namespace_sem);
8c3ee42e8   Al Viro   [PATCH] get rid o...
2151
  	path_put(&root);
2d8f30380   Al Viro   [PATCH] sanitize ...
2152
  	path_put(&old);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2153
  out1:
2d8f30380   Al Viro   [PATCH] sanitize ...
2154
  	path_put(&new);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2155
  out0:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2156
2157
2158
2159
2160
2161
2162
2163
2164
  	return error;
  out3:
  	spin_unlock(&vfsmount_lock);
  	goto out2;
  }
  
  static void __init init_mount_tree(void)
  {
  	struct vfsmount *mnt;
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
2165
  	struct mnt_namespace *ns;
ac748a09f   Jan Blunck   Make set_fs_{root...
2166
  	struct path root;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2167
2168
2169
2170
  
  	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
  	if (IS_ERR(mnt))
  		panic("Can't create rootfs");
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
2171
2172
  	ns = kmalloc(sizeof(*ns), GFP_KERNEL);
  	if (!ns)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2173
  		panic("Can't allocate initial namespace");
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
  	atomic_set(&ns->count, 1);
  	INIT_LIST_HEAD(&ns->list);
  	init_waitqueue_head(&ns->poll);
  	ns->event = 0;
  	list_add(&mnt->mnt_list, &ns->list);
  	ns->root = mnt;
  	mnt->mnt_ns = ns;
  
  	init_task.nsproxy->mnt_ns = ns;
  	get_mnt_ns(ns);
ac748a09f   Jan Blunck   Make set_fs_{root...
2184
2185
2186
2187
2188
  	root.mnt = ns->root;
  	root.dentry = ns->root->mnt_root;
  
  	set_fs_pwd(current->fs, &root);
  	set_fs_root(current->fs, &root);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2189
  }
74bf17cff   Denis Cheng   fs: remove the un...
2190
  void __init mnt_init(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2191
  {
13f14b4d8   Eric Dumazet   Use ilog2() in fs...
2192
  	unsigned u;
15a67dd8c   Randy Dunlap   [PATCH] fs/namesp...
2193
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2194

390c68436   Ram Pai   [PATCH] making na...
2195
  	init_rwsem(&namespace_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2196
  	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
20c2df83d   Paul Mundt   mm: Remove slab d...
2197
  			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2198

b58fed8b1   Ram Pai   [PATCH] lindent f...
2199
  	mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2200
2201
2202
2203
  
  	if (!mount_hashtable)
  		panic("Failed to allocate mount hash table
  ");
13f14b4d8   Eric Dumazet   Use ilog2() in fs...
2204
2205
2206
2207
2208
  	printk("Mount-cache hash table entries: %lu
  ", HASH_SIZE);
  
  	for (u = 0; u < HASH_SIZE; u++)
  		INIT_LIST_HEAD(&mount_hashtable[u]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2209

15a67dd8c   Randy Dunlap   [PATCH] fs/namesp...
2210
2211
2212
2213
  	err = sysfs_init();
  	if (err)
  		printk(KERN_WARNING "%s: sysfs_init error: %d
  ",
8e24eea72   Harvey Harrison   fs: replace remai...
2214
  			__func__, err);
00d266662   Greg Kroah-Hartman   kobject: convert ...
2215
2216
  	fs_kobj = kobject_create_and_add("fs", NULL);
  	if (!fs_kobj)
8e24eea72   Harvey Harrison   fs: replace remai...
2217
2218
  		printk(KERN_WARNING "%s: kobj create error
  ", __func__);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2219
2220
2221
  	init_rootfs();
  	init_mount_tree();
  }
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
2222
  void __put_mnt_ns(struct mnt_namespace *ns)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2223
  {
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
2224
  	struct vfsmount *root = ns->root;
70fbcdf4d   Ram Pai   [PATCH] umount_tr...
2225
  	LIST_HEAD(umount_list);
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
2226
  	ns->root = NULL;
1ce88cf46   Miklos Szeredi   [PATCH] namespace...
2227
  	spin_unlock(&vfsmount_lock);
390c68436   Ram Pai   [PATCH] making na...
2228
  	down_write(&namespace_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2229
  	spin_lock(&vfsmount_lock);
a05964f39   Ram Pai   [PATCH] shared mo...
2230
  	umount_tree(root, 0, &umount_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2231
  	spin_unlock(&vfsmount_lock);
390c68436   Ram Pai   [PATCH] making na...
2232
  	up_write(&namespace_sem);
70fbcdf4d   Ram Pai   [PATCH] umount_tr...
2233
  	release_mounts(&umount_list);
6b3286ed1   Kirill Korotaev   [PATCH] rename st...
2234
  	kfree(ns);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2235
  }