Blame view
fs/namespace.c
70.9 KB
1da177e4c
|
1 2 3 4 5 6 7 8 9 |
/* * linux/fs/namespace.c * * (C) Copyright Al Viro 2000, 2001 * Released under GPL v2. * * Based on code from fs/super.c, copyright Linus Torvalds and others. * Heavily rewritten. */ |
1da177e4c
|
10 |
#include <linux/syscalls.h> |
d10577a8d
|
11 |
#include <linux/export.h> |
16f7e0fe2
|
12 |
#include <linux/capability.h> |
6b3286ed1
|
13 |
#include <linux/mnt_namespace.h> |
771b13716
|
14 |
#include <linux/user_namespace.h> |
1da177e4c
|
15 16 |
#include <linux/namei.h> #include <linux/security.h> |
73cd49ecd
|
17 |
#include <linux/idr.h> |
d10577a8d
|
18 |
#include <linux/acct.h> /* acct_auto_close_mnt */ |
57f150a58
|
19 |
#include <linux/init.h> /* init_rootfs */ |
d10577a8d
|
20 21 22 |
#include <linux/fs_struct.h> /* get_fs_root et.al. */ #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ #include <linux/uaccess.h> |
0bb80f240
|
23 |
#include <linux/proc_ns.h> |
20b4fb485
|
24 |
#include <linux/magic.h> |
07b20889e
|
25 |
#include "pnode.h" |
948730b0e
|
26 |
#include "internal.h" |
1da177e4c
|
27 |
|
13f14b4d8
|
28 29 |
#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) #define HASH_SIZE (1UL << HASH_SHIFT) |
5addc5dd8
|
30 |
static int event; |
73cd49ecd
|
31 |
static DEFINE_IDA(mnt_id_ida); |
719f5d7f0
|
32 |
static DEFINE_IDA(mnt_group_ida); |
99b7db7b8
|
33 |
static DEFINE_SPINLOCK(mnt_id_lock); |
f21f62208
|
34 35 |
static int mnt_id_start = 0; static int mnt_group_start = 1; |
1da177e4c
|
36 |
|
fa3536cc1
|
37 |
static struct list_head *mount_hashtable __read_mostly; |
84d17192d
|
38 |
static struct list_head *mountpoint_hashtable __read_mostly; |
e18b890bb
|
39 |
static struct kmem_cache *mnt_cache __read_mostly; |
59aa0da8e
|
40 |
static DECLARE_RWSEM(namespace_sem); |
1da177e4c
|
41 |
|
f87fd4c2a
|
42 |
/* /sys/fs */ |
00d266662
|
43 44 |
struct kobject *fs_kobj; EXPORT_SYMBOL_GPL(fs_kobj); |
f87fd4c2a
|
45 |
|
99b7db7b8
|
46 47 48 49 50 51 52 53 |
/* * vfsmount lock may be taken for read to prevent changes to the * vfsmount hash, ie. during mountpoint lookups or walking back * up the tree. * * It should be taken for write in all cases where the vfsmount * tree or hash is modified or when a vfsmount structure is modified. */ |
48a066e72
|
54 |
__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); |
99b7db7b8
|
55 |
|
1da177e4c
|
56 57 |
static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { |
b58fed8b1
|
58 59 |
unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); tmp += ((unsigned long)dentry / L1_CACHE_BYTES); |
13f14b4d8
|
60 61 |
tmp = tmp + (tmp >> HASH_SHIFT); return tmp & (HASH_SIZE - 1); |
1da177e4c
|
62 |
} |
99b7db7b8
|
63 64 65 66 |
/* * allocation is serialized by namespace_sem, but we need the spinlock to * serialize with freeing. */ |
b105e270b
|
67 |
static int mnt_alloc_id(struct mount *mnt) |
73cd49ecd
|
68 69 70 71 72 |
{ int res; retry: ida_pre_get(&mnt_id_ida, GFP_KERNEL); |
99b7db7b8
|
73 |
spin_lock(&mnt_id_lock); |
15169fe78
|
74 |
res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); |
f21f62208
|
75 |
if (!res) |
15169fe78
|
76 |
mnt_id_start = mnt->mnt_id + 1; |
99b7db7b8
|
77 |
spin_unlock(&mnt_id_lock); |
73cd49ecd
|
78 79 80 81 82 |
if (res == -EAGAIN) goto retry; return res; } |
b105e270b
|
83 |
static void mnt_free_id(struct mount *mnt) |
73cd49ecd
|
84 |
{ |
15169fe78
|
85 |
int id = mnt->mnt_id; |
99b7db7b8
|
86 |
spin_lock(&mnt_id_lock); |
f21f62208
|
87 88 89 |
ida_remove(&mnt_id_ida, id); if (mnt_id_start > id) mnt_id_start = id; |
99b7db7b8
|
90 |
spin_unlock(&mnt_id_lock); |
73cd49ecd
|
91 |
} |
719f5d7f0
|
92 93 94 95 96 |
/* * Allocate a new peer group ID * * mnt_group_ida is protected by namespace_sem */ |
4b8b21f4f
|
97 |
static int mnt_alloc_group_id(struct mount *mnt) |
719f5d7f0
|
98 |
{ |
f21f62208
|
99 |
int res; |
719f5d7f0
|
100 101 |
if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL)) return -ENOMEM; |
f21f62208
|
102 103 |
res = ida_get_new_above(&mnt_group_ida, mnt_group_start, |
15169fe78
|
104 |
&mnt->mnt_group_id); |
f21f62208
|
105 |
if (!res) |
15169fe78
|
106 |
mnt_group_start = mnt->mnt_group_id + 1; |
f21f62208
|
107 108 |
return res; |
719f5d7f0
|
109 110 111 112 113 |
} /* * Release a peer group ID */ |
4b8b21f4f
|
114 |
void mnt_release_group_id(struct mount *mnt) |
719f5d7f0
|
115 |
{ |
15169fe78
|
116 |
int id = mnt->mnt_group_id; |
f21f62208
|
117 118 119 |
ida_remove(&mnt_group_ida, id); if (mnt_group_start > id) mnt_group_start = id; |
15169fe78
|
120 |
mnt->mnt_group_id = 0; |
719f5d7f0
|
121 |
} |
b3e19d924
|
122 123 124 |
/* * vfsmount lock must be held for read */ |
83adc7532
|
125 |
static inline void mnt_add_count(struct mount *mnt, int n) |
b3e19d924
|
126 127 |
{ #ifdef CONFIG_SMP |
68e8a9fea
|
128 |
this_cpu_add(mnt->mnt_pcp->mnt_count, n); |
b3e19d924
|
129 130 |
#else preempt_disable(); |
68e8a9fea
|
131 |
mnt->mnt_count += n; |
b3e19d924
|
132 133 134 |
preempt_enable(); #endif } |
b3e19d924
|
135 136 137 |
/* * vfsmount lock must be held for write */ |
83adc7532
|
138 |
unsigned int mnt_get_count(struct mount *mnt) |
b3e19d924
|
139 140 |
{ #ifdef CONFIG_SMP |
f03c65993
|
141 |
unsigned int count = 0; |
b3e19d924
|
142 143 144 |
int cpu; for_each_possible_cpu(cpu) { |
68e8a9fea
|
145 |
count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count; |
b3e19d924
|
146 147 148 149 |
} return count; #else |
68e8a9fea
|
150 |
return mnt->mnt_count; |
b3e19d924
|
151 152 |
#endif } |
b105e270b
|
153 |
static struct mount *alloc_vfsmnt(const char *name) |
1da177e4c
|
154 |
{ |
c63181e6b
|
155 156 |
struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); if (mnt) { |
73cd49ecd
|
157 |
int err; |
c63181e6b
|
158 |
err = mnt_alloc_id(mnt); |
88b387824
|
159 160 161 162 |
if (err) goto out_free_cache; if (name) { |
c63181e6b
|
163 164 |
mnt->mnt_devname = kstrdup(name, GFP_KERNEL); if (!mnt->mnt_devname) |
88b387824
|
165 |
goto out_free_id; |
73cd49ecd
|
166 |
} |
b3e19d924
|
167 |
#ifdef CONFIG_SMP |
c63181e6b
|
168 169 |
mnt->mnt_pcp = alloc_percpu(struct mnt_pcp); if (!mnt->mnt_pcp) |
b3e19d924
|
170 |
goto out_free_devname; |
c63181e6b
|
171 |
this_cpu_add(mnt->mnt_pcp->mnt_count, 1); |
b3e19d924
|
172 |
#else |
c63181e6b
|
173 174 |
mnt->mnt_count = 1; mnt->mnt_writers = 0; |
b3e19d924
|
175 |
#endif |
c63181e6b
|
176 177 178 179 180 181 182 183 |
INIT_LIST_HEAD(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); INIT_LIST_HEAD(&mnt->mnt_expire); INIT_LIST_HEAD(&mnt->mnt_share); INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); |
2504c5d63
|
184 185 186 |
#ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif |
1da177e4c
|
187 |
} |
c63181e6b
|
188 |
return mnt; |
88b387824
|
189 |
|
d3ef3d735
|
190 191 |
#ifdef CONFIG_SMP out_free_devname: |
c63181e6b
|
192 |
kfree(mnt->mnt_devname); |
d3ef3d735
|
193 |
#endif |
88b387824
|
194 |
out_free_id: |
c63181e6b
|
195 |
mnt_free_id(mnt); |
88b387824
|
196 |
out_free_cache: |
c63181e6b
|
197 |
kmem_cache_free(mnt_cache, mnt); |
88b387824
|
198 |
return NULL; |
1da177e4c
|
199 |
} |
8366025eb
|
200 201 202 203 204 205 206 207 |
/* * Most r/o checks on a fs are for operations that take * discrete amounts of time, like a write() or unlink(). * We must keep track of when those operations start * (for permission checks) and when they end, so that * we can determine when writes are able to occur to * a filesystem. */ |
3d733633a
|
208 209 210 211 212 213 214 215 216 217 218 219 220 |
/* * __mnt_is_readonly: check whether a mount is read-only * @mnt: the mount to check for its write status * * This shouldn't be used directly ouside of the VFS. * It does not guarantee that the filesystem will stay * r/w, just that it is right *now*. This can not and * should not be used in place of IS_RDONLY(inode). * mnt_want/drop_write() will _keep_ the filesystem * r/w. */ int __mnt_is_readonly(struct vfsmount *mnt) { |
2e4b7fcd9
|
221 222 223 224 225 |
if (mnt->mnt_flags & MNT_READONLY) return 1; if (mnt->mnt_sb->s_flags & MS_RDONLY) return 1; return 0; |
3d733633a
|
226 227 |
} EXPORT_SYMBOL_GPL(__mnt_is_readonly); |
83adc7532
|
228 |
static inline void mnt_inc_writers(struct mount *mnt) |
d3ef3d735
|
229 230 |
{ #ifdef CONFIG_SMP |
68e8a9fea
|
231 |
this_cpu_inc(mnt->mnt_pcp->mnt_writers); |
d3ef3d735
|
232 |
#else |
68e8a9fea
|
233 |
mnt->mnt_writers++; |
d3ef3d735
|
234 235 |
#endif } |
3d733633a
|
236 |
|
83adc7532
|
237 |
static inline void mnt_dec_writers(struct mount *mnt) |
3d733633a
|
238 |
{ |
d3ef3d735
|
239 |
#ifdef CONFIG_SMP |
68e8a9fea
|
240 |
this_cpu_dec(mnt->mnt_pcp->mnt_writers); |
d3ef3d735
|
241 |
#else |
68e8a9fea
|
242 |
mnt->mnt_writers--; |
d3ef3d735
|
243 |
#endif |
3d733633a
|
244 |
} |
3d733633a
|
245 |
|
83adc7532
|
246 |
static unsigned int mnt_get_writers(struct mount *mnt) |
3d733633a
|
247 |
{ |
d3ef3d735
|
248 249 |
#ifdef CONFIG_SMP unsigned int count = 0; |
3d733633a
|
250 |
int cpu; |
3d733633a
|
251 252 |
for_each_possible_cpu(cpu) { |
68e8a9fea
|
253 |
count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers; |
3d733633a
|
254 |
} |
3d733633a
|
255 |
|
d3ef3d735
|
256 257 258 259 |
return count; #else return mnt->mnt_writers; #endif |
3d733633a
|
260 |
} |
4ed5e82fe
|
261 262 263 264 265 266 267 268 |
static int mnt_is_readonly(struct vfsmount *mnt) { if (mnt->mnt_sb->s_readonly_remount) return 1; /* Order wrt setting s_flags/s_readonly_remount in do_remount() */ smp_rmb(); return __mnt_is_readonly(mnt); } |
3d733633a
|
269 |
/* |
eb04c2828
|
270 271 272 273 |
* Most r/o & frozen checks on a fs are for operations that take discrete * amounts of time, like a write() or unlink(). We must keep track of when * those operations start (for permission checks) and when they end, so that we * can determine when writes are able to occur to a filesystem. |
3d733633a
|
274 |
*/ |
8366025eb
|
275 |
/** |
eb04c2828
|
276 |
* __mnt_want_write - get write access to a mount without freeze protection |
83adc7532
|
277 |
* @m: the mount on which to take a write |
8366025eb
|
278 |
* |
eb04c2828
|
279 280 281 282 283 |
* This tells the low-level filesystem that a write is about to be performed to * it, and makes sure that writes are allowed (mnt it read-write) before * returning success. This operation does not protect against filesystem being * frozen. When the write operation is finished, __mnt_drop_write() must be * called. This is effectively a refcount. |
8366025eb
|
284 |
*/ |
eb04c2828
|
285 |
int __mnt_want_write(struct vfsmount *m) |
8366025eb
|
286 |
{ |
83adc7532
|
287 |
struct mount *mnt = real_mount(m); |
3d733633a
|
288 |
int ret = 0; |
3d733633a
|
289 |
|
d3ef3d735
|
290 |
preempt_disable(); |
c6653a838
|
291 |
mnt_inc_writers(mnt); |
d3ef3d735
|
292 |
/* |
c6653a838
|
293 |
* The store to mnt_inc_writers must be visible before we pass |
d3ef3d735
|
294 295 296 297 |
* MNT_WRITE_HOLD loop below, so that the slowpath can see our * incremented count after it has set MNT_WRITE_HOLD. */ smp_mb(); |
1e75529e3
|
298 |
while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) |
d3ef3d735
|
299 300 301 302 303 304 305 |
cpu_relax(); /* * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will * be set to match its requirements. So we must not load that until * MNT_WRITE_HOLD is cleared. */ smp_rmb(); |
4ed5e82fe
|
306 |
if (mnt_is_readonly(m)) { |
c6653a838
|
307 |
mnt_dec_writers(mnt); |
3d733633a
|
308 |
ret = -EROFS; |
3d733633a
|
309 |
} |
d3ef3d735
|
310 |
preempt_enable(); |
eb04c2828
|
311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 |
return ret; } /** * mnt_want_write - get write access to a mount * @m: the mount on which to take a write * * This tells the low-level filesystem that a write is about to be performed to * it, and makes sure that writes are allowed (mount is read-write, filesystem * is not frozen) before returning success. When the write operation is * finished, mnt_drop_write() must be called. This is effectively a refcount. */ int mnt_want_write(struct vfsmount *m) { int ret; sb_start_write(m->mnt_sb); ret = __mnt_want_write(m); if (ret) sb_end_write(m->mnt_sb); |
3d733633a
|
332 |
return ret; |
8366025eb
|
333 334 335 336 |
} EXPORT_SYMBOL_GPL(mnt_want_write); /** |
96029c4e0
|
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 |
* mnt_clone_write - get write access to a mount * @mnt: the mount on which to take a write * * This is effectively like mnt_want_write, except * it must only be used to take an extra write reference * on a mountpoint that we already know has a write reference * on it. This allows some optimisation. * * After finished, mnt_drop_write must be called as usual to * drop the reference. */ int mnt_clone_write(struct vfsmount *mnt) { /* superblock may be r/o */ if (__mnt_is_readonly(mnt)) return -EROFS; preempt_disable(); |
83adc7532
|
354 |
mnt_inc_writers(real_mount(mnt)); |
96029c4e0
|
355 356 357 358 359 360 |
preempt_enable(); return 0; } EXPORT_SYMBOL_GPL(mnt_clone_write); /** |
eb04c2828
|
361 |
* __mnt_want_write_file - get write access to a file's mount |
96029c4e0
|
362 363 |
* @file: the file who's mount on which to take a write * |
eb04c2828
|
364 |
* This is like __mnt_want_write, but it takes a file and can |
96029c4e0
|
365 366 |
* do some optimisations if the file is open for write already */ |
eb04c2828
|
367 |
int __mnt_want_write_file(struct file *file) |
96029c4e0
|
368 |
{ |
496ad9aa8
|
369 |
struct inode *inode = file_inode(file); |
eb04c2828
|
370 |
|
2d8dd38a5
|
371 |
if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) |
eb04c2828
|
372 |
return __mnt_want_write(file->f_path.mnt); |
96029c4e0
|
373 374 375 |
else return mnt_clone_write(file->f_path.mnt); } |
eb04c2828
|
376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 |
/** * mnt_want_write_file - get write access to a file's mount * @file: the file who's mount on which to take a write * * This is like mnt_want_write, but it takes a file and can * do some optimisations if the file is open for write already */ int mnt_want_write_file(struct file *file) { int ret; sb_start_write(file->f_path.mnt->mnt_sb); ret = __mnt_want_write_file(file); if (ret) sb_end_write(file->f_path.mnt->mnt_sb); return ret; } |
96029c4e0
|
394 395 396 |
EXPORT_SYMBOL_GPL(mnt_want_write_file); /** |
eb04c2828
|
397 |
* __mnt_drop_write - give up write access to a mount |
8366025eb
|
398 399 400 401 |
* @mnt: the mount on which to give up write access * * Tells the low-level filesystem that we are done * performing writes to it. Must be matched with |
eb04c2828
|
402 |
* __mnt_want_write() call above. |
8366025eb
|
403 |
*/ |
eb04c2828
|
404 |
void __mnt_drop_write(struct vfsmount *mnt) |
8366025eb
|
405 |
{ |
d3ef3d735
|
406 |
preempt_disable(); |
83adc7532
|
407 |
mnt_dec_writers(real_mount(mnt)); |
d3ef3d735
|
408 |
preempt_enable(); |
8366025eb
|
409 |
} |
eb04c2828
|
410 411 412 413 414 415 416 417 418 419 420 421 422 423 |
/** * mnt_drop_write - give up write access to a mount * @mnt: the mount on which to give up write access * * Tells the low-level filesystem that we are done performing writes to it and * also allows filesystem to be frozen again. Must be matched with * mnt_want_write() call above. */ void mnt_drop_write(struct vfsmount *mnt) { __mnt_drop_write(mnt); sb_end_write(mnt->mnt_sb); } |
8366025eb
|
424 |
EXPORT_SYMBOL_GPL(mnt_drop_write); |
eb04c2828
|
425 426 427 428 |
void __mnt_drop_write_file(struct file *file) { __mnt_drop_write(file->f_path.mnt); } |
2a79f17e4
|
429 430 431 432 433 |
void mnt_drop_write_file(struct file *file) { mnt_drop_write(file->f_path.mnt); } EXPORT_SYMBOL(mnt_drop_write_file); |
83adc7532
|
434 |
static int mnt_make_readonly(struct mount *mnt) |
8366025eb
|
435 |
{ |
3d733633a
|
436 |
int ret = 0; |
719ea2fbb
|
437 |
lock_mount_hash(); |
83adc7532
|
438 |
mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; |
3d733633a
|
439 |
/* |
d3ef3d735
|
440 441 |
* After storing MNT_WRITE_HOLD, we'll read the counters. This store * should be visible before we do. |
3d733633a
|
442 |
*/ |
d3ef3d735
|
443 |
smp_mb(); |
3d733633a
|
444 |
/* |
d3ef3d735
|
445 446 447 448 449 450 451 452 453 454 455 456 457 458 |
* With writers on hold, if this value is zero, then there are * definitely no active writers (although held writers may subsequently * increment the count, they'll have to wait, and decrement it after * seeing MNT_READONLY). * * It is OK to have counter incremented on one CPU and decremented on * another: the sum will add up correctly. The danger would be when we * sum up each counter, if we read a counter before it is incremented, * but then read another CPU's count which it has been subsequently * decremented from -- we would see more decrements than we should. * MNT_WRITE_HOLD protects against this scenario, because * mnt_want_write first increments count, then smp_mb, then spins on * MNT_WRITE_HOLD, so it can't be decremented by another CPU while * we're counting up here. |
3d733633a
|
459 |
*/ |
c6653a838
|
460 |
if (mnt_get_writers(mnt) > 0) |
d3ef3d735
|
461 462 |
ret = -EBUSY; else |
83adc7532
|
463 |
mnt->mnt.mnt_flags |= MNT_READONLY; |
d3ef3d735
|
464 465 466 467 468 |
/* * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers * that become unheld will see MNT_READONLY. */ smp_wmb(); |
83adc7532
|
469 |
mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; |
719ea2fbb
|
470 |
unlock_mount_hash(); |
3d733633a
|
471 |
return ret; |
8366025eb
|
472 |
} |
8366025eb
|
473 |
|
83adc7532
|
474 |
static void __mnt_unmake_readonly(struct mount *mnt) |
2e4b7fcd9
|
475 |
{ |
719ea2fbb
|
476 |
lock_mount_hash(); |
83adc7532
|
477 |
mnt->mnt.mnt_flags &= ~MNT_READONLY; |
719ea2fbb
|
478 |
unlock_mount_hash(); |
2e4b7fcd9
|
479 |
} |
4ed5e82fe
|
480 481 482 483 |
int sb_prepare_remount_readonly(struct super_block *sb) { struct mount *mnt; int err = 0; |
8e8b87964
|
484 485 486 |
/* Racy optimization. Recheck the counter under MNT_WRITE_HOLD */ if (atomic_long_read(&sb->s_remove_count)) return -EBUSY; |
719ea2fbb
|
487 |
lock_mount_hash(); |
4ed5e82fe
|
488 489 490 491 492 493 494 495 496 497 |
list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; smp_mb(); if (mnt_get_writers(mnt) > 0) { err = -EBUSY; break; } } } |
8e8b87964
|
498 499 |
if (!err && atomic_long_read(&sb->s_remove_count)) err = -EBUSY; |
4ed5e82fe
|
500 501 502 503 504 505 506 507 |
if (!err) { sb->s_readonly_remount = 1; smp_wmb(); } list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; } |
719ea2fbb
|
508 |
unlock_mount_hash(); |
4ed5e82fe
|
509 510 511 |
return err; } |
b105e270b
|
512 |
static void free_vfsmnt(struct mount *mnt) |
1da177e4c
|
513 |
{ |
52ba1621d
|
514 |
kfree(mnt->mnt_devname); |
73cd49ecd
|
515 |
mnt_free_id(mnt); |
d3ef3d735
|
516 |
#ifdef CONFIG_SMP |
68e8a9fea
|
517 |
free_percpu(mnt->mnt_pcp); |
d3ef3d735
|
518 |
#endif |
b105e270b
|
519 |
kmem_cache_free(mnt_cache, mnt); |
1da177e4c
|
520 |
} |
48a066e72
|
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 |
/* call under rcu_read_lock */ bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) { struct mount *mnt; if (read_seqretry(&mount_lock, seq)) return false; if (bastard == NULL) return true; mnt = real_mount(bastard); mnt_add_count(mnt, 1); if (likely(!read_seqretry(&mount_lock, seq))) return true; if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { mnt_add_count(mnt, -1); return false; } rcu_read_unlock(); mntput(bastard); rcu_read_lock(); return false; } |
1da177e4c
|
542 |
/* |
474279dc0
|
543 |
* find the first mount at @dentry on vfsmount @mnt. |
48a066e72
|
544 |
* call under rcu_read_lock() |
1da177e4c
|
545 |
*/ |
474279dc0
|
546 |
struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) |
1da177e4c
|
547 |
{ |
b58fed8b1
|
548 |
struct list_head *head = mount_hashtable + hash(mnt, dentry); |
474279dc0
|
549 |
struct mount *p; |
48a066e72
|
550 |
list_for_each_entry_rcu(p, head, mnt_hash) |
474279dc0
|
551 552 553 554 555 556 557 |
if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) return p; return NULL; } /* * find the last mount at @dentry on vfsmount @mnt. |
48a066e72
|
558 |
* mount_lock must be held. |
474279dc0
|
559 560 561 562 563 564 565 566 567 568 |
*/ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) { struct list_head *head = mount_hashtable + hash(mnt, dentry); struct mount *p; list_for_each_entry_reverse(p, head, mnt_hash) if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) return p; return NULL; |
1da177e4c
|
569 |
} |
a05964f39
|
570 |
/* |
f015f1267
|
571 572 573 574 575 576 577 578 579 580 581 582 583 584 |
* lookup_mnt - Return the first child mount mounted at path * * "First" means first mounted chronologically. If you create the * following mounts: * * mount /dev/sda1 /mnt * mount /dev/sda2 /mnt * mount /dev/sda3 /mnt * * Then lookup_mnt() on the base /mnt dentry in the root mount will * return successively the root dentry and vfsmount of /dev/sda1, then * /dev/sda2, then /dev/sda3, then NULL. * * lookup_mnt takes a reference to the found vfsmount. |
a05964f39
|
585 |
*/ |
1c755af4d
|
586 |
struct vfsmount *lookup_mnt(struct path *path) |
a05964f39
|
587 |
{ |
c71053659
|
588 |
struct mount *child_mnt; |
48a066e72
|
589 590 |
struct vfsmount *m; unsigned seq; |
99b7db7b8
|
591 |
|
48a066e72
|
592 593 594 595 596 597 598 599 |
rcu_read_lock(); do { seq = read_seqbegin(&mount_lock); child_mnt = __lookup_mnt(path->mnt, path->dentry); m = child_mnt ? &child_mnt->mnt : NULL; } while (!legitimize_mnt(m, seq)); rcu_read_unlock(); return m; |
a05964f39
|
600 |
} |
84d17192d
|
601 602 603 604 |
static struct mountpoint *new_mountpoint(struct dentry *dentry) { struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry); struct mountpoint *mp; |
eed810076
|
605 |
int ret; |
84d17192d
|
606 607 608 609 610 611 612 613 614 615 616 617 618 619 |
list_for_each_entry(mp, chain, m_hash) { if (mp->m_dentry == dentry) { /* might be worth a WARN_ON() */ if (d_unlinked(dentry)) return ERR_PTR(-ENOENT); mp->m_count++; return mp; } } mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); if (!mp) return ERR_PTR(-ENOMEM); |
eed810076
|
620 621 |
ret = d_set_mounted(dentry); if (ret) { |
84d17192d
|
622 |
kfree(mp); |
eed810076
|
623 |
return ERR_PTR(ret); |
84d17192d
|
624 |
} |
eed810076
|
625 |
|
84d17192d
|
626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 |
mp->m_dentry = dentry; mp->m_count = 1; list_add(&mp->m_hash, chain); return mp; } static void put_mountpoint(struct mountpoint *mp) { if (!--mp->m_count) { struct dentry *dentry = mp->m_dentry; spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_MOUNTED; spin_unlock(&dentry->d_lock); list_del(&mp->m_hash); kfree(mp); } } |
143c8c91c
|
643 |
static inline int check_mnt(struct mount *mnt) |
1da177e4c
|
644 |
{ |
6b3286ed1
|
645 |
return mnt->mnt_ns == current->nsproxy->mnt_ns; |
1da177e4c
|
646 |
} |
99b7db7b8
|
647 648 649 |
/* * vfsmount lock must be held for write */ |
6b3286ed1
|
650 |
static void touch_mnt_namespace(struct mnt_namespace *ns) |
5addc5dd8
|
651 652 653 654 655 656 |
{ if (ns) { ns->event = ++event; wake_up_interruptible(&ns->poll); } } |
99b7db7b8
|
657 658 659 |
/* * vfsmount lock must be held for write */ |
6b3286ed1
|
660 |
static void __touch_mnt_namespace(struct mnt_namespace *ns) |
5addc5dd8
|
661 662 663 664 665 666 |
{ if (ns && ns->event != event) { ns->event = event; wake_up_interruptible(&ns->poll); } } |
99b7db7b8
|
667 668 669 |
/* * vfsmount lock must be held for write */ |
419148da6
|
670 671 |
static void detach_mnt(struct mount *mnt, struct path *old_path) { |
a73324da7
|
672 |
old_path->dentry = mnt->mnt_mountpoint; |
0714a5338
|
673 674 |
old_path->mnt = &mnt->mnt_parent->mnt; mnt->mnt_parent = mnt; |
a73324da7
|
675 |
mnt->mnt_mountpoint = mnt->mnt.mnt_root; |
6b41d536f
|
676 |
list_del_init(&mnt->mnt_child); |
1b8e5564b
|
677 |
list_del_init(&mnt->mnt_hash); |
84d17192d
|
678 679 |
put_mountpoint(mnt->mnt_mp); mnt->mnt_mp = NULL; |
1da177e4c
|
680 |
} |
99b7db7b8
|
681 682 683 |
/* * vfsmount lock must be held for write */ |
84d17192d
|
684 685 |
void mnt_set_mountpoint(struct mount *mnt, struct mountpoint *mp, |
44d964d60
|
686 |
struct mount *child_mnt) |
b90fa9ae8
|
687 |
{ |
84d17192d
|
688 |
mp->m_count++; |
3a2393d71
|
689 |
mnt_add_count(mnt, 1); /* essentially, that's mntget */ |
84d17192d
|
690 |
child_mnt->mnt_mountpoint = dget(mp->m_dentry); |
3a2393d71
|
691 |
child_mnt->mnt_parent = mnt; |
84d17192d
|
692 |
child_mnt->mnt_mp = mp; |
b90fa9ae8
|
693 |
} |
99b7db7b8
|
694 695 696 |
/* * vfsmount lock must be held for write */ |
84d17192d
|
697 698 699 |
static void attach_mnt(struct mount *mnt, struct mount *parent, struct mountpoint *mp) |
1da177e4c
|
700 |
{ |
84d17192d
|
701 |
mnt_set_mountpoint(parent, mp, mnt); |
1b8e5564b
|
702 |
list_add_tail(&mnt->mnt_hash, mount_hashtable + |
84d17192d
|
703 704 |
hash(&parent->mnt, mp->m_dentry)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); |
b90fa9ae8
|
705 706 707 |
} /* |
99b7db7b8
|
708 |
* vfsmount lock must be held for write |
b90fa9ae8
|
709 |
*/ |
4b2619a57
|
710 |
static void commit_tree(struct mount *mnt) |
b90fa9ae8
|
711 |
{ |
0714a5338
|
712 |
struct mount *parent = mnt->mnt_parent; |
83adc7532
|
713 |
struct mount *m; |
b90fa9ae8
|
714 |
LIST_HEAD(head); |
143c8c91c
|
715 |
struct mnt_namespace *n = parent->mnt_ns; |
b90fa9ae8
|
716 |
|
0714a5338
|
717 |
BUG_ON(parent == mnt); |
b90fa9ae8
|
718 |
|
1a4eeaf2a
|
719 |
list_add_tail(&head, &mnt->mnt_list); |
f7a99c5b7
|
720 |
list_for_each_entry(m, &head, mnt_list) |
143c8c91c
|
721 |
m->mnt_ns = n; |
f03c65993
|
722 |
|
b90fa9ae8
|
723 |
list_splice(&head, n->list.prev); |
1b8e5564b
|
724 |
list_add_tail(&mnt->mnt_hash, mount_hashtable + |
a73324da7
|
725 |
hash(&parent->mnt, mnt->mnt_mountpoint)); |
6b41d536f
|
726 |
list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); |
6b3286ed1
|
727 |
touch_mnt_namespace(n); |
1da177e4c
|
728 |
} |
909b0a88e
|
729 |
static struct mount *next_mnt(struct mount *p, struct mount *root) |
1da177e4c
|
730 |
{ |
6b41d536f
|
731 732 |
struct list_head *next = p->mnt_mounts.next; if (next == &p->mnt_mounts) { |
1da177e4c
|
733 |
while (1) { |
909b0a88e
|
734 |
if (p == root) |
1da177e4c
|
735 |
return NULL; |
6b41d536f
|
736 737 |
next = p->mnt_child.next; if (next != &p->mnt_parent->mnt_mounts) |
1da177e4c
|
738 |
break; |
0714a5338
|
739 |
p = p->mnt_parent; |
1da177e4c
|
740 741 |
} } |
6b41d536f
|
742 |
return list_entry(next, struct mount, mnt_child); |
1da177e4c
|
743 |
} |
315fc83e5
|
744 |
static struct mount *skip_mnt_tree(struct mount *p) |
9676f0c63
|
745 |
{ |
6b41d536f
|
746 747 748 749 |
struct list_head *prev = p->mnt_mounts.prev; while (prev != &p->mnt_mounts) { p = list_entry(prev, struct mount, mnt_child); prev = p->mnt_mounts.prev; |
9676f0c63
|
750 751 752 |
} return p; } |
9d412a43c
|
753 754 755 |
struct vfsmount * vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { |
b105e270b
|
756 |
struct mount *mnt; |
9d412a43c
|
757 758 759 760 761 762 763 764 765 766 |
struct dentry *root; if (!type) return ERR_PTR(-ENODEV); mnt = alloc_vfsmnt(name); if (!mnt) return ERR_PTR(-ENOMEM); if (flags & MS_KERNMOUNT) |
b105e270b
|
767 |
mnt->mnt.mnt_flags = MNT_INTERNAL; |
9d412a43c
|
768 769 770 771 772 773 |
root = mount_fs(type, flags, name, data); if (IS_ERR(root)) { free_vfsmnt(mnt); return ERR_CAST(root); } |
b105e270b
|
774 775 |
mnt->mnt.mnt_root = root; mnt->mnt.mnt_sb = root->d_sb; |
a73324da7
|
776 |
mnt->mnt_mountpoint = mnt->mnt.mnt_root; |
0714a5338
|
777 |
mnt->mnt_parent = mnt; |
719ea2fbb
|
778 |
lock_mount_hash(); |
39f7c4db1
|
779 |
list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); |
719ea2fbb
|
780 |
unlock_mount_hash(); |
b105e270b
|
781 |
return &mnt->mnt; |
9d412a43c
|
782 783 |
} EXPORT_SYMBOL_GPL(vfs_kern_mount); |
87129cc0e
|
784 |
static struct mount *clone_mnt(struct mount *old, struct dentry *root, |
36341f645
|
785 |
int flag) |
1da177e4c
|
786 |
{ |
87129cc0e
|
787 |
struct super_block *sb = old->mnt.mnt_sb; |
be34d1a3b
|
788 789 |
struct mount *mnt; int err; |
1da177e4c
|
790 |
|
be34d1a3b
|
791 792 793 |
mnt = alloc_vfsmnt(old->mnt_devname); if (!mnt) return ERR_PTR(-ENOMEM); |
719f5d7f0
|
794 |
|
7a472ef4b
|
795 |
if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE)) |
be34d1a3b
|
796 797 798 |
mnt->mnt_group_id = 0; /* not a peer of original */ else mnt->mnt_group_id = old->mnt_group_id; |
b90fa9ae8
|
799 |
|
be34d1a3b
|
800 801 802 803 |
if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) { err = mnt_alloc_group_id(mnt); if (err) goto out_free; |
1da177e4c
|
804 |
} |
be34d1a3b
|
805 806 |
mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; |
132c94e31
|
807 808 809 |
/* Don't allow unprivileged users to change mount flags */ if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; |
5ff9d8a65
|
810 811 812 |
/* Don't allow unprivileged users to reveal what is under a mount */ if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire)) mnt->mnt.mnt_flags |= MNT_LOCKED; |
be34d1a3b
|
813 814 815 816 817 |
atomic_inc(&sb->s_active); mnt->mnt.mnt_sb = sb; mnt->mnt.mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; |
719ea2fbb
|
818 |
lock_mount_hash(); |
be34d1a3b
|
819 |
list_add_tail(&mnt->mnt_instance, &sb->s_mounts); |
719ea2fbb
|
820 |
unlock_mount_hash(); |
be34d1a3b
|
821 |
|
7a472ef4b
|
822 823 |
if ((flag & CL_SLAVE) || ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) { |
be34d1a3b
|
824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 |
list_add(&mnt->mnt_slave, &old->mnt_slave_list); mnt->mnt_master = old; CLEAR_MNT_SHARED(mnt); } else if (!(flag & CL_PRIVATE)) { if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old)) list_add(&mnt->mnt_share, &old->mnt_share); if (IS_MNT_SLAVE(old)) list_add(&mnt->mnt_slave, &old->mnt_slave); mnt->mnt_master = old->mnt_master; } if (flag & CL_MAKE_SHARED) set_mnt_shared(mnt); /* stick the duplicate mount on the same expiry list * as the original if that was on one */ if (flag & CL_EXPIRE) { if (!list_empty(&old->mnt_expire)) list_add(&mnt->mnt_expire, &old->mnt_expire); } |
cb338d06e
|
843 |
return mnt; |
719f5d7f0
|
844 845 846 |
out_free: free_vfsmnt(mnt); |
be34d1a3b
|
847 |
return ERR_PTR(err); |
1da177e4c
|
848 |
} |
48a066e72
|
849 850 851 852 853 854 855 856 857 |
static void delayed_free(struct rcu_head *head) { struct mount *mnt = container_of(head, struct mount, mnt_rcu); kfree(mnt->mnt_devname); #ifdef CONFIG_SMP free_percpu(mnt->mnt_pcp); #endif kmem_cache_free(mnt_cache, mnt); } |
900148dca
|
858 |
static void mntput_no_expire(struct mount *mnt) |
b3e19d924
|
859 |
{ |
b3e19d924
|
860 |
put_again: |
48a066e72
|
861 862 863 864 |
rcu_read_lock(); mnt_add_count(mnt, -1); if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ rcu_read_unlock(); |
f03c65993
|
865 |
return; |
b3e19d924
|
866 |
} |
719ea2fbb
|
867 |
lock_mount_hash(); |
b3e19d924
|
868 |
if (mnt_get_count(mnt)) { |
48a066e72
|
869 |
rcu_read_unlock(); |
719ea2fbb
|
870 |
unlock_mount_hash(); |
99b7db7b8
|
871 872 |
return; } |
863d684f9
|
873 874 875 |
if (unlikely(mnt->mnt_pinned)) { mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt->mnt_pinned = 0; |
48a066e72
|
876 |
rcu_read_unlock(); |
719ea2fbb
|
877 |
unlock_mount_hash(); |
900148dca
|
878 |
acct_auto_close_mnt(&mnt->mnt); |
b3e19d924
|
879 |
goto put_again; |
7b7b1ace2
|
880 |
} |
48a066e72
|
881 882 883 884 885 886 887 |
if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { rcu_read_unlock(); unlock_mount_hash(); return; } mnt->mnt.mnt_flags |= MNT_DOOMED; rcu_read_unlock(); |
962830df3
|
888 |
|
39f7c4db1
|
889 |
list_del(&mnt->mnt_instance); |
719ea2fbb
|
890 |
unlock_mount_hash(); |
649a795af
|
891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 |
/* * This probably indicates that somebody messed * up a mnt_want/drop_write() pair. If this * happens, the filesystem was probably unable * to make r/w->r/o transitions. */ /* * The locking used to deal with mnt_count decrement provides barriers, * so mnt_get_writers() below is safe. */ WARN_ON(mnt_get_writers(mnt)); fsnotify_vfsmount_delete(&mnt->mnt); dput(mnt->mnt.mnt_root); deactivate_super(mnt->mnt.mnt_sb); |
48a066e72
|
906 907 |
mnt_free_id(mnt); call_rcu(&mnt->mnt_rcu, delayed_free); |
b3e19d924
|
908 |
} |
b3e19d924
|
909 910 911 912 |
void mntput(struct vfsmount *mnt) { if (mnt) { |
863d684f9
|
913 |
struct mount *m = real_mount(mnt); |
b3e19d924
|
914 |
/* avoid cacheline pingpong, hope gcc doesn't get "smart" */ |
863d684f9
|
915 916 917 |
if (unlikely(m->mnt_expiry_mark)) m->mnt_expiry_mark = 0; mntput_no_expire(m); |
b3e19d924
|
918 919 920 921 922 923 924 |
} } EXPORT_SYMBOL(mntput); struct vfsmount *mntget(struct vfsmount *mnt) { if (mnt) |
83adc7532
|
925 |
mnt_add_count(real_mount(mnt), 1); |
b3e19d924
|
926 927 928 |
return mnt; } EXPORT_SYMBOL(mntget); |
7b7b1ace2
|
929 930 |
void mnt_pin(struct vfsmount *mnt) { |
719ea2fbb
|
931 |
lock_mount_hash(); |
863d684f9
|
932 |
real_mount(mnt)->mnt_pinned++; |
719ea2fbb
|
933 |
unlock_mount_hash(); |
7b7b1ace2
|
934 |
} |
7b7b1ace2
|
935 |
EXPORT_SYMBOL(mnt_pin); |
863d684f9
|
936 |
void mnt_unpin(struct vfsmount *m) |
7b7b1ace2
|
937 |
{ |
863d684f9
|
938 |
struct mount *mnt = real_mount(m); |
719ea2fbb
|
939 |
lock_mount_hash(); |
7b7b1ace2
|
940 |
if (mnt->mnt_pinned) { |
863d684f9
|
941 |
mnt_add_count(mnt, 1); |
7b7b1ace2
|
942 943 |
mnt->mnt_pinned--; } |
719ea2fbb
|
944 |
unlock_mount_hash(); |
7b7b1ace2
|
945 |
} |
7b7b1ace2
|
946 |
EXPORT_SYMBOL(mnt_unpin); |
1da177e4c
|
947 |
|
b3b304a23
|
948 949 950 951 952 953 954 955 956 957 958 959 |
static inline void mangle(struct seq_file *m, const char *s) { seq_escape(m, s, " \t \\"); } /* * Simple .show_options callback for filesystems which don't want to * implement more complex mount option showing. * * See also save_mount_options(). */ |
34c80b1d9
|
960 |
int generic_show_options(struct seq_file *m, struct dentry *root) |
b3b304a23
|
961 |
{ |
2a32cebd6
|
962 963 964 |
const char *options; rcu_read_lock(); |
34c80b1d9
|
965 |
options = rcu_dereference(root->d_sb->s_options); |
b3b304a23
|
966 967 968 969 970 |
if (options != NULL && options[0]) { seq_putc(m, ','); mangle(m, options); } |
2a32cebd6
|
971 |
rcu_read_unlock(); |
b3b304a23
|
972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 |
return 0; } EXPORT_SYMBOL(generic_show_options); /* * If filesystem uses generic_show_options(), this function should be * called from the fill_super() callback. * * The .remount_fs callback usually needs to be handled in a special * way, to make sure, that previous options are not overwritten if the * remount fails. * * Also note, that if the filesystem's .remount_fs function doesn't * reset all options to their default value, but changes only newly * given options, then the displayed options will not reflect reality * any more. */ void save_mount_options(struct super_block *sb, char *options) { |
2a32cebd6
|
992 993 |
BUG_ON(sb->s_options); rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL)); |
b3b304a23
|
994 995 |
} EXPORT_SYMBOL(save_mount_options); |
2a32cebd6
|
996 997 998 999 1000 1001 1002 1003 1004 1005 |
void replace_mount_options(struct super_block *sb, char *options) { char *old = sb->s_options; rcu_assign_pointer(sb->s_options, options); if (old) { synchronize_rcu(); kfree(old); } } EXPORT_SYMBOL(replace_mount_options); |
a1a2c409b
|
1006 |
#ifdef CONFIG_PROC_FS |
0226f4923
|
1007 |
/* iterator; we want it to have access to namespace_sem, thus here... */ |
1da177e4c
|
1008 1009 |
static void *m_start(struct seq_file *m, loff_t *pos) { |
6ce6e24e7
|
1010 |
struct proc_mounts *p = proc_mounts(m); |
1da177e4c
|
1011 |
|
390c68436
|
1012 |
down_read(&namespace_sem); |
a1a2c409b
|
1013 |
return seq_list_start(&p->ns->list, *pos); |
1da177e4c
|
1014 1015 1016 1017 |
} static void *m_next(struct seq_file *m, void *v, loff_t *pos) { |
6ce6e24e7
|
1018 |
struct proc_mounts *p = proc_mounts(m); |
b0765fb85
|
1019 |
|
a1a2c409b
|
1020 |
return seq_list_next(v, &p->ns->list, pos); |
1da177e4c
|
1021 1022 1023 1024 |
} static void m_stop(struct seq_file *m, void *v) { |
390c68436
|
1025 |
up_read(&namespace_sem); |
1da177e4c
|
1026 |
} |
0226f4923
|
1027 |
static int m_show(struct seq_file *m, void *v) |
2d4d4864a
|
1028 |
{ |
6ce6e24e7
|
1029 |
struct proc_mounts *p = proc_mounts(m); |
1a4eeaf2a
|
1030 |
struct mount *r = list_entry(v, struct mount, mnt_list); |
0226f4923
|
1031 |
return p->show(m, &r->mnt); |
1da177e4c
|
1032 |
} |
a1a2c409b
|
1033 |
const struct seq_operations mounts_op = { |
1da177e4c
|
1034 1035 1036 |
.start = m_start, .next = m_next, .stop = m_stop, |
0226f4923
|
1037 |
.show = m_show, |
b4629fe2f
|
1038 |
}; |
a1a2c409b
|
1039 |
#endif /* CONFIG_PROC_FS */ |
b4629fe2f
|
1040 |
|
1da177e4c
|
1041 1042 1043 1044 1045 1046 1047 1048 |
/** * may_umount_tree - check if a mount tree is busy * @mnt: root of mount tree * * This is called to check if a tree of mounts has any * open files, pwds, chroots or sub mounts that are * busy. */ |
909b0a88e
|
1049 |
int may_umount_tree(struct vfsmount *m) |
1da177e4c
|
1050 |
{ |
909b0a88e
|
1051 |
struct mount *mnt = real_mount(m); |
36341f645
|
1052 1053 |
int actual_refs = 0; int minimum_refs = 0; |
315fc83e5
|
1054 |
struct mount *p; |
909b0a88e
|
1055 |
BUG_ON(!m); |
1da177e4c
|
1056 |
|
b3e19d924
|
1057 |
/* write lock needed for mnt_get_count */ |
719ea2fbb
|
1058 |
lock_mount_hash(); |
909b0a88e
|
1059 |
for (p = mnt; p; p = next_mnt(p, mnt)) { |
83adc7532
|
1060 |
actual_refs += mnt_get_count(p); |
1da177e4c
|
1061 |
minimum_refs += 2; |
1da177e4c
|
1062 |
} |
719ea2fbb
|
1063 |
unlock_mount_hash(); |
1da177e4c
|
1064 1065 |
if (actual_refs > minimum_refs) |
e3474a8eb
|
1066 |
return 0; |
1da177e4c
|
1067 |
|
e3474a8eb
|
1068 |
return 1; |
1da177e4c
|
1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 |
} EXPORT_SYMBOL(may_umount_tree); /** * may_umount - check if a mount point is busy * @mnt: root of mount * * This is called to check if a mount point has any * open files, pwds, chroots or sub mounts. If the * mount has sub mounts this will return busy * regardless of whether the sub mounts are busy. * * Doesn't take quota and stuff into account. IOW, in some cases it will * give false negatives. The main reason why it's here is that we need * a non-destructive way to look for easily umountable filesystems. */ int may_umount(struct vfsmount *mnt) { |
e3474a8eb
|
1088 |
int ret = 1; |
8ad08d8a0
|
1089 |
down_read(&namespace_sem); |
719ea2fbb
|
1090 |
lock_mount_hash(); |
1ab597386
|
1091 |
if (propagate_mount_busy(real_mount(mnt), 2)) |
e3474a8eb
|
1092 |
ret = 0; |
719ea2fbb
|
1093 |
unlock_mount_hash(); |
8ad08d8a0
|
1094 |
up_read(&namespace_sem); |
a05964f39
|
1095 |
return ret; |
1da177e4c
|
1096 1097 1098 |
} EXPORT_SYMBOL(may_umount); |
e3197d83d
|
1099 |
static LIST_HEAD(unmounted); /* protected by namespace_sem */ |
97216be09
|
1100 |
static void namespace_unlock(void) |
70fbcdf4d
|
1101 |
{ |
d5e50f74d
|
1102 |
struct mount *mnt; |
97216be09
|
1103 1104 1105 1106 1107 1108 1109 1110 1111 |
LIST_HEAD(head); if (likely(list_empty(&unmounted))) { up_write(&namespace_sem); return; } list_splice_init(&unmounted, &head); up_write(&namespace_sem); |
48a066e72
|
1112 |
synchronize_rcu(); |
97216be09
|
1113 1114 |
while (!list_empty(&head)) { mnt = list_first_entry(&head, struct mount, mnt_hash); |
1b8e5564b
|
1115 |
list_del_init(&mnt->mnt_hash); |
aba809cf0
|
1116 1117 |
if (mnt->mnt_ex_mountpoint.mnt) path_put(&mnt->mnt_ex_mountpoint); |
d5e50f74d
|
1118 |
mntput(&mnt->mnt); |
70fbcdf4d
|
1119 1120 |
} } |
97216be09
|
1121 |
static inline void namespace_lock(void) |
e3197d83d
|
1122 |
{ |
97216be09
|
1123 |
down_write(&namespace_sem); |
e3197d83d
|
1124 |
} |
99b7db7b8
|
1125 |
/* |
48a066e72
|
1126 |
* mount_lock must be held |
99b7db7b8
|
1127 |
* namespace_sem must be held for write |
48a066e72
|
1128 1129 1130 |
* how = 0 => just this tree, don't propagate * how = 1 => propagate; we know that nobody else has reference to any victims * how = 2 => lazy umount |
99b7db7b8
|
1131 |
*/ |
48a066e72
|
1132 |
void umount_tree(struct mount *mnt, int how) |
1da177e4c
|
1133 |
{ |
7b8a53fd8
|
1134 |
LIST_HEAD(tmp_list); |
315fc83e5
|
1135 |
struct mount *p; |
1da177e4c
|
1136 |
|
909b0a88e
|
1137 |
for (p = mnt; p; p = next_mnt(p, mnt)) |
1b8e5564b
|
1138 |
list_move(&p->mnt_hash, &tmp_list); |
1da177e4c
|
1139 |
|
48a066e72
|
1140 |
if (how) |
7b8a53fd8
|
1141 |
propagate_umount(&tmp_list); |
a05964f39
|
1142 |
|
1b8e5564b
|
1143 |
list_for_each_entry(p, &tmp_list, mnt_hash) { |
6776db3d3
|
1144 |
list_del_init(&p->mnt_expire); |
1a4eeaf2a
|
1145 |
list_del_init(&p->mnt_list); |
143c8c91c
|
1146 1147 |
__touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; |
48a066e72
|
1148 1149 |
if (how < 2) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; |
6b41d536f
|
1150 |
list_del_init(&p->mnt_child); |
676da58df
|
1151 |
if (mnt_has_parent(p)) { |
84d17192d
|
1152 |
put_mountpoint(p->mnt_mp); |
aba809cf0
|
1153 1154 1155 1156 1157 |
/* move the reference to mountpoint into ->mnt_ex_mountpoint */ p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint; p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt; p->mnt_mountpoint = p->mnt.mnt_root; p->mnt_parent = p; |
84d17192d
|
1158 |
p->mnt_mp = NULL; |
7c4b93d82
|
1159 |
} |
0f0afb1dc
|
1160 |
change_mnt_propagation(p, MS_PRIVATE); |
1da177e4c
|
1161 |
} |
328e6d901
|
1162 |
list_splice(&tmp_list, &unmounted); |
1da177e4c
|
1163 |
} |
b54b9be78
|
1164 |
static void shrink_submounts(struct mount *mnt); |
c35038bec
|
1165 |
|
1ab597386
|
1166 |
static int do_umount(struct mount *mnt, int flags) |
1da177e4c
|
1167 |
{ |
1ab597386
|
1168 |
struct super_block *sb = mnt->mnt.mnt_sb; |
1da177e4c
|
1169 |
int retval; |
1ab597386
|
1170 |
retval = security_sb_umount(&mnt->mnt, flags); |
1da177e4c
|
1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 |
if (retval) return retval; /* * Allow userspace to request a mountpoint be expired rather than * unmounting unconditionally. Unmount only happens if: * (1) the mark is already set (the mark is cleared by mntput()) * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount] */ if (flags & MNT_EXPIRE) { |
1ab597386
|
1181 |
if (&mnt->mnt == current->fs->root.mnt || |
1da177e4c
|
1182 1183 |
flags & (MNT_FORCE | MNT_DETACH)) return -EINVAL; |
b3e19d924
|
1184 1185 1186 1187 |
/* * probably don't strictly need the lock here if we examined * all race cases, but it's a slowpath. */ |
719ea2fbb
|
1188 |
lock_mount_hash(); |
83adc7532
|
1189 |
if (mnt_get_count(mnt) != 2) { |
719ea2fbb
|
1190 |
unlock_mount_hash(); |
1da177e4c
|
1191 |
return -EBUSY; |
b3e19d924
|
1192 |
} |
719ea2fbb
|
1193 |
unlock_mount_hash(); |
1da177e4c
|
1194 |
|
863d684f9
|
1195 |
if (!xchg(&mnt->mnt_expiry_mark, 1)) |
1da177e4c
|
1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 |
return -EAGAIN; } /* * If we may have to abort operations to get out of this * mount, and they will themselves hold resources we must * allow the fs to do things. In the Unix tradition of * 'Gee thats tricky lets do it in userspace' the umount_begin * might fail to complete on the first run through as other tasks * must return, and the like. Thats for the mount program to worry * about for the moment. */ |
42faad996
|
1208 |
if (flags & MNT_FORCE && sb->s_op->umount_begin) { |
42faad996
|
1209 |
sb->s_op->umount_begin(sb); |
42faad996
|
1210 |
} |
1da177e4c
|
1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 |
/* * No sense to grab the lock for this test, but test itself looks * somewhat bogus. Suggestions for better replacement? * Ho-hum... In principle, we might treat that as umount + switch * to rootfs. GC would eventually take care of the old vfsmount. * Actually it makes sense, especially if rootfs would contain a * /reboot - static binary that would close all descriptors and * call reboot(9). Then init(8) could umount root and exec /reboot. */ |
1ab597386
|
1221 |
if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) { |
1da177e4c
|
1222 1223 1224 1225 1226 |
/* * Special case for "unmounting" root ... * we just try to remount it readonly. */ down_write(&sb->s_umount); |
4aa98cf76
|
1227 |
if (!(sb->s_flags & MS_RDONLY)) |
1da177e4c
|
1228 |
retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); |
1da177e4c
|
1229 1230 1231 |
up_write(&sb->s_umount); return retval; } |
97216be09
|
1232 |
namespace_lock(); |
719ea2fbb
|
1233 |
lock_mount_hash(); |
5addc5dd8
|
1234 |
event++; |
1da177e4c
|
1235 |
|
48a066e72
|
1236 |
if (flags & MNT_DETACH) { |
1a4eeaf2a
|
1237 |
if (!list_empty(&mnt->mnt_list)) |
48a066e72
|
1238 |
umount_tree(mnt, 2); |
1da177e4c
|
1239 |
retval = 0; |
48a066e72
|
1240 1241 1242 1243 1244 1245 1246 1247 |
} else { shrink_submounts(mnt); retval = -EBUSY; if (!propagate_mount_busy(mnt, 2)) { if (!list_empty(&mnt->mnt_list)) umount_tree(mnt, 1); retval = 0; } |
1da177e4c
|
1248 |
} |
719ea2fbb
|
1249 |
unlock_mount_hash(); |
e3197d83d
|
1250 |
namespace_unlock(); |
1da177e4c
|
1251 1252 |
return retval; } |
9b40bc90a
|
1253 1254 1255 1256 1257 1258 1259 |
/* * Is the caller allowed to modify his namespace? */ static inline bool may_mount(void) { return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN); } |
1da177e4c
|
1260 1261 1262 1263 1264 1265 1266 |
/* * Now umount can handle mount points as well as block devices. * This is important for filesystems which use unnamed block devices. * * We now support a flag for forced unmount like the other 'big iron' * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD */ |
bdc480e3b
|
1267 |
SYSCALL_DEFINE2(umount, char __user *, name, int, flags) |
1da177e4c
|
1268 |
{ |
2d8f30380
|
1269 |
struct path path; |
900148dca
|
1270 |
struct mount *mnt; |
1da177e4c
|
1271 |
int retval; |
db1f05bb8
|
1272 |
int lookup_flags = 0; |
1da177e4c
|
1273 |
|
db1f05bb8
|
1274 1275 |
if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) return -EINVAL; |
9b40bc90a
|
1276 1277 |
if (!may_mount()) return -EPERM; |
db1f05bb8
|
1278 1279 |
if (!(flags & UMOUNT_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; |
197df04c7
|
1280 |
retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path); |
1da177e4c
|
1281 1282 |
if (retval) goto out; |
900148dca
|
1283 |
mnt = real_mount(path.mnt); |
1da177e4c
|
1284 |
retval = -EINVAL; |
2d8f30380
|
1285 |
if (path.dentry != path.mnt->mnt_root) |
1da177e4c
|
1286 |
goto dput_and_out; |
143c8c91c
|
1287 |
if (!check_mnt(mnt)) |
1da177e4c
|
1288 |
goto dput_and_out; |
5ff9d8a65
|
1289 1290 |
if (mnt->mnt.mnt_flags & MNT_LOCKED) goto dput_and_out; |
1da177e4c
|
1291 |
|
900148dca
|
1292 |
retval = do_umount(mnt, flags); |
1da177e4c
|
1293 |
dput_and_out: |
429731b15
|
1294 |
/* we mustn't call path_put() as that would clear mnt_expiry_mark */ |
2d8f30380
|
1295 |
dput(path.dentry); |
900148dca
|
1296 |
mntput_no_expire(mnt); |
1da177e4c
|
1297 1298 1299 1300 1301 1302 1303 |
out: return retval; } #ifdef __ARCH_WANT_SYS_OLDUMOUNT /* |
b58fed8b1
|
1304 |
* The 2.0 compatible umount. No flags. |
1da177e4c
|
1305 |
*/ |
bdc480e3b
|
1306 |
SYSCALL_DEFINE1(oldumount, char __user *, name) |
1da177e4c
|
1307 |
{ |
b58fed8b1
|
1308 |
return sys_umount(name, 0); |
1da177e4c
|
1309 1310 1311 |
} #endif |
4ce5d2b1a
|
1312 |
static bool is_mnt_ns_file(struct dentry *dentry) |
8823c079b
|
1313 |
{ |
4ce5d2b1a
|
1314 1315 |
/* Is this a proxy for a mount namespace? */ struct inode *inode = dentry->d_inode; |
0bb80f240
|
1316 |
struct proc_ns *ei; |
8823c079b
|
1317 1318 1319 |
if (!proc_ns_inode(inode)) return false; |
0bb80f240
|
1320 |
ei = get_proc_ns(inode); |
8823c079b
|
1321 1322 |
if (ei->ns_ops != &mntns_operations) return false; |
4ce5d2b1a
|
1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 |
return true; } static bool mnt_ns_loop(struct dentry *dentry) { /* Could bind mounting the mount namespace inode cause a * mount namespace loop? */ struct mnt_namespace *mnt_ns; if (!is_mnt_ns_file(dentry)) return false; mnt_ns = get_proc_ns(dentry->d_inode)->ns; |
8823c079b
|
1336 1337 |
return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; } |
87129cc0e
|
1338 |
struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, |
36341f645
|
1339 |
int flag) |
1da177e4c
|
1340 |
{ |
84d17192d
|
1341 |
struct mount *res, *p, *q, *r, *parent; |
1da177e4c
|
1342 |
|
4ce5d2b1a
|
1343 1344 1345 1346 |
if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt)) return ERR_PTR(-EINVAL); if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry)) |
be34d1a3b
|
1347 |
return ERR_PTR(-EINVAL); |
9676f0c63
|
1348 |
|
36341f645
|
1349 |
res = q = clone_mnt(mnt, dentry, flag); |
be34d1a3b
|
1350 1351 |
if (IS_ERR(q)) return q; |
5ff9d8a65
|
1352 |
q->mnt.mnt_flags &= ~MNT_LOCKED; |
a73324da7
|
1353 |
q->mnt_mountpoint = mnt->mnt_mountpoint; |
1da177e4c
|
1354 1355 |
p = mnt; |
6b41d536f
|
1356 |
list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { |
315fc83e5
|
1357 |
struct mount *s; |
7ec02ef15
|
1358 |
if (!is_subdir(r->mnt_mountpoint, dentry)) |
1da177e4c
|
1359 |
continue; |
909b0a88e
|
1360 |
for (s = r; s; s = next_mnt(s, r)) { |
4ce5d2b1a
|
1361 1362 1363 1364 1365 1366 1367 |
if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(s)) { s = skip_mnt_tree(s); continue; } if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(s->mnt.mnt_root)) { |
9676f0c63
|
1368 1369 1370 |
s = skip_mnt_tree(s); continue; } |
0714a5338
|
1371 1372 1373 |
while (p != s->mnt_parent) { p = p->mnt_parent; q = q->mnt_parent; |
1da177e4c
|
1374 |
} |
87129cc0e
|
1375 |
p = s; |
84d17192d
|
1376 |
parent = q; |
87129cc0e
|
1377 |
q = clone_mnt(p, p->mnt.mnt_root, flag); |
be34d1a3b
|
1378 1379 |
if (IS_ERR(q)) goto out; |
719ea2fbb
|
1380 |
lock_mount_hash(); |
1a4eeaf2a
|
1381 |
list_add_tail(&q->mnt_list, &res->mnt_list); |
84d17192d
|
1382 |
attach_mnt(q, parent, p->mnt_mp); |
719ea2fbb
|
1383 |
unlock_mount_hash(); |
1da177e4c
|
1384 1385 1386 |
} } return res; |
be34d1a3b
|
1387 |
out: |
1da177e4c
|
1388 |
if (res) { |
719ea2fbb
|
1389 |
lock_mount_hash(); |
328e6d901
|
1390 |
umount_tree(res, 0); |
719ea2fbb
|
1391 |
unlock_mount_hash(); |
1da177e4c
|
1392 |
} |
be34d1a3b
|
1393 |
return q; |
1da177e4c
|
1394 |
} |
be34d1a3b
|
1395 |
/* Caller should check returned pointer for errors */ |
589ff870e
|
1396 |
struct vfsmount *collect_mounts(struct path *path) |
8aec08094
|
1397 |
{ |
cb338d06e
|
1398 |
struct mount *tree; |
97216be09
|
1399 |
namespace_lock(); |
87129cc0e
|
1400 1401 |
tree = copy_tree(real_mount(path->mnt), path->dentry, CL_COPY_ALL | CL_PRIVATE); |
328e6d901
|
1402 |
namespace_unlock(); |
be34d1a3b
|
1403 |
if (IS_ERR(tree)) |
52e220d35
|
1404 |
return ERR_CAST(tree); |
be34d1a3b
|
1405 |
return &tree->mnt; |
8aec08094
|
1406 1407 1408 1409 |
} void drop_collected_mounts(struct vfsmount *mnt) { |
97216be09
|
1410 |
namespace_lock(); |
719ea2fbb
|
1411 |
lock_mount_hash(); |
328e6d901
|
1412 |
umount_tree(real_mount(mnt), 0); |
719ea2fbb
|
1413 |
unlock_mount_hash(); |
3ab6abee5
|
1414 |
namespace_unlock(); |
8aec08094
|
1415 |
} |
1f707137b
|
1416 1417 1418 |
int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, struct vfsmount *root) { |
1a4eeaf2a
|
1419 |
struct mount *mnt; |
1f707137b
|
1420 1421 1422 |
int res = f(root, arg); if (res) return res; |
1a4eeaf2a
|
1423 1424 |
list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) { res = f(&mnt->mnt, arg); |
1f707137b
|
1425 1426 1427 1428 1429 |
if (res) return res; } return 0; } |
4b8b21f4f
|
1430 |
static void cleanup_group_ids(struct mount *mnt, struct mount *end) |
719f5d7f0
|
1431 |
{ |
315fc83e5
|
1432 |
struct mount *p; |
719f5d7f0
|
1433 |
|
909b0a88e
|
1434 |
for (p = mnt; p != end; p = next_mnt(p, mnt)) { |
fc7be130c
|
1435 |
if (p->mnt_group_id && !IS_MNT_SHARED(p)) |
4b8b21f4f
|
1436 |
mnt_release_group_id(p); |
719f5d7f0
|
1437 1438 |
} } |
4b8b21f4f
|
1439 |
static int invent_group_ids(struct mount *mnt, bool recurse) |
719f5d7f0
|
1440 |
{ |
315fc83e5
|
1441 |
struct mount *p; |
719f5d7f0
|
1442 |
|
909b0a88e
|
1443 |
for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) { |
fc7be130c
|
1444 |
if (!p->mnt_group_id && !IS_MNT_SHARED(p)) { |
4b8b21f4f
|
1445 |
int err = mnt_alloc_group_id(p); |
719f5d7f0
|
1446 |
if (err) { |
4b8b21f4f
|
1447 |
cleanup_group_ids(mnt, p); |
719f5d7f0
|
1448 1449 1450 1451 1452 1453 1454 |
return err; } } } return 0; } |
b90fa9ae8
|
1455 1456 |
/* * @source_mnt : mount tree to be attached |
214444032
|
1457 1458 1459 1460 |
* @nd : place the mount tree @source_mnt is attached * @parent_nd : if non-null, detach the source_mnt from its parent and * store the parent mount and mountpoint dentry. * (done when source_mnt is moved) |
b90fa9ae8
|
1461 1462 1463 |
* * NOTE: in the table below explains the semantics when a source mount * of a given type is attached to a destination mount of a given type. |
9676f0c63
|
1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 |
* --------------------------------------------------------------------------- * | BIND MOUNT OPERATION | * |************************************************************************** * | source-->| shared | private | slave | unbindable | * | dest | | | | | * | | | | | | | * | v | | | | | * |************************************************************************** * | shared | shared (++) | shared (+) | shared(+++)| invalid | * | | | | | | * |non-shared| shared (+) | private | slave (*) | invalid | * *************************************************************************** |
b90fa9ae8
|
1476 1477 1478 1479 1480 1481 1482 1483 1484 |
* A bind operation clones the source mount and mounts the clone on the * destination mount. * * (++) the cloned mount is propagated to all the mounts in the propagation * tree of the destination mount and the cloned mount is added to * the peer group of the source mount. * (+) the cloned mount is created under the destination mount and is marked * as shared. The cloned mount is added to the peer group of the source * mount. |
5afe00221
|
1485 1486 1487 1488 1489 1490 1491 |
* (+++) the mount is propagated to all the mounts in the propagation tree * of the destination mount and the cloned mount is made slave * of the same master as that of the source mount. The cloned mount * is marked as 'shared and slave'. * (*) the cloned mount is made a slave of the same master as that of the * source mount. * |
9676f0c63
|
1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 |
* --------------------------------------------------------------------------- * | MOVE MOUNT OPERATION | * |************************************************************************** * | source-->| shared | private | slave | unbindable | * | dest | | | | | * | | | | | | | * | v | | | | | * |************************************************************************** * | shared | shared (+) | shared (+) | shared(+++) | invalid | * | | | | | | * |non-shared| shared (+*) | private | slave (*) | unbindable | * *************************************************************************** |
5afe00221
|
1504 1505 1506 |
* * (+) the mount is moved to the destination. And is then propagated to * all the mounts in the propagation tree of the destination mount. |
214444032
|
1507 |
* (+*) the mount is moved to the destination. |
5afe00221
|
1508 1509 1510 1511 |
* (+++) the mount is moved to the destination and is then propagated to * all the mounts belonging to the destination mount's propagation tree. * the mount is marked as 'shared and slave'. * (*) the mount continues to be a slave at the new location. |
b90fa9ae8
|
1512 1513 1514 1515 1516 1517 |
* * if the source mount is a tree, the operations explained above is * applied to each mount in the tree. * Must be called without spinlocks held, since this function can sleep * in allocations. */ |
0fb54e505
|
1518 |
static int attach_recursive_mnt(struct mount *source_mnt, |
84d17192d
|
1519 1520 1521 |
struct mount *dest_mnt, struct mountpoint *dest_mp, struct path *parent_path) |
b90fa9ae8
|
1522 1523 |
{ LIST_HEAD(tree_list); |
315fc83e5
|
1524 |
struct mount *child, *p; |
719f5d7f0
|
1525 |
int err; |
b90fa9ae8
|
1526 |
|
fc7be130c
|
1527 |
if (IS_MNT_SHARED(dest_mnt)) { |
0fb54e505
|
1528 |
err = invent_group_ids(source_mnt, true); |
719f5d7f0
|
1529 1530 1531 |
if (err) goto out; } |
84d17192d
|
1532 |
err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); |
719f5d7f0
|
1533 1534 |
if (err) goto out_cleanup_ids; |
b90fa9ae8
|
1535 |
|
719ea2fbb
|
1536 |
lock_mount_hash(); |
df1a1ad29
|
1537 |
|
fc7be130c
|
1538 |
if (IS_MNT_SHARED(dest_mnt)) { |
909b0a88e
|
1539 |
for (p = source_mnt; p; p = next_mnt(p, source_mnt)) |
0f0afb1dc
|
1540 |
set_mnt_shared(p); |
b90fa9ae8
|
1541 |
} |
1a3906895
|
1542 |
if (parent_path) { |
0fb54e505
|
1543 |
detach_mnt(source_mnt, parent_path); |
84d17192d
|
1544 |
attach_mnt(source_mnt, dest_mnt, dest_mp); |
143c8c91c
|
1545 |
touch_mnt_namespace(source_mnt->mnt_ns); |
214444032
|
1546 |
} else { |
84d17192d
|
1547 |
mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); |
0fb54e505
|
1548 |
commit_tree(source_mnt); |
214444032
|
1549 |
} |
b90fa9ae8
|
1550 |
|
1b8e5564b
|
1551 1552 |
list_for_each_entry_safe(child, p, &tree_list, mnt_hash) { list_del_init(&child->mnt_hash); |
4b2619a57
|
1553 |
commit_tree(child); |
b90fa9ae8
|
1554 |
} |
719ea2fbb
|
1555 |
unlock_mount_hash(); |
99b7db7b8
|
1556 |
|
b90fa9ae8
|
1557 |
return 0; |
719f5d7f0
|
1558 1559 |
out_cleanup_ids: |
fc7be130c
|
1560 |
if (IS_MNT_SHARED(dest_mnt)) |
0fb54e505
|
1561 |
cleanup_group_ids(source_mnt, NULL); |
719f5d7f0
|
1562 1563 |
out: return err; |
b90fa9ae8
|
1564 |
} |
84d17192d
|
1565 |
static struct mountpoint *lock_mount(struct path *path) |
b12cea919
|
1566 1567 |
{ struct vfsmount *mnt; |
84d17192d
|
1568 |
struct dentry *dentry = path->dentry; |
b12cea919
|
1569 |
retry: |
84d17192d
|
1570 1571 1572 1573 |
mutex_lock(&dentry->d_inode->i_mutex); if (unlikely(cant_mount(dentry))) { mutex_unlock(&dentry->d_inode->i_mutex); return ERR_PTR(-ENOENT); |
b12cea919
|
1574 |
} |
97216be09
|
1575 |
namespace_lock(); |
b12cea919
|
1576 |
mnt = lookup_mnt(path); |
84d17192d
|
1577 1578 1579 |
if (likely(!mnt)) { struct mountpoint *mp = new_mountpoint(dentry); if (IS_ERR(mp)) { |
97216be09
|
1580 |
namespace_unlock(); |
84d17192d
|
1581 1582 1583 1584 1585 |
mutex_unlock(&dentry->d_inode->i_mutex); return mp; } return mp; } |
97216be09
|
1586 |
namespace_unlock(); |
b12cea919
|
1587 1588 1589 |
mutex_unlock(&path->dentry->d_inode->i_mutex); path_put(path); path->mnt = mnt; |
84d17192d
|
1590 |
dentry = path->dentry = dget(mnt->mnt_root); |
b12cea919
|
1591 1592 |
goto retry; } |
84d17192d
|
1593 |
static void unlock_mount(struct mountpoint *where) |
b12cea919
|
1594 |
{ |
84d17192d
|
1595 1596 |
struct dentry *dentry = where->m_dentry; put_mountpoint(where); |
328e6d901
|
1597 |
namespace_unlock(); |
84d17192d
|
1598 |
mutex_unlock(&dentry->d_inode->i_mutex); |
b12cea919
|
1599 |
} |
84d17192d
|
1600 |
static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) |
1da177e4c
|
1601 |
{ |
95bc5f25c
|
1602 |
if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER) |
1da177e4c
|
1603 |
return -EINVAL; |
84d17192d
|
1604 |
if (S_ISDIR(mp->m_dentry->d_inode->i_mode) != |
95bc5f25c
|
1605 |
S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode)) |
1da177e4c
|
1606 |
return -ENOTDIR; |
84d17192d
|
1607 |
return attach_recursive_mnt(mnt, p, mp, NULL); |
1da177e4c
|
1608 1609 1610 |
} /* |
7a2e8a8fa
|
1611 1612 1613 1614 1615 |
* Sanity check the flags to change_mnt_propagation. */ static int flags_to_propagation_type(int flags) { |
7c6e984df
|
1616 |
int type = flags & ~(MS_REC | MS_SILENT); |
7a2e8a8fa
|
1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 |
/* Fail if any non-propagation flags are set */ if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) return 0; /* Only one propagation flag should be set */ if (!is_power_of_2(type)) return 0; return type; } /* |
07b20889e
|
1628 1629 |
* recursively change the type of the mountpoint. */ |
0a0d8a467
|
1630 |
static int do_change_type(struct path *path, int flag) |
07b20889e
|
1631 |
{ |
315fc83e5
|
1632 |
struct mount *m; |
4b8b21f4f
|
1633 |
struct mount *mnt = real_mount(path->mnt); |
07b20889e
|
1634 |
int recurse = flag & MS_REC; |
7a2e8a8fa
|
1635 |
int type; |
719f5d7f0
|
1636 |
int err = 0; |
07b20889e
|
1637 |
|
2d92ab3c6
|
1638 |
if (path->dentry != path->mnt->mnt_root) |
07b20889e
|
1639 |
return -EINVAL; |
7a2e8a8fa
|
1640 1641 1642 |
type = flags_to_propagation_type(flag); if (!type) return -EINVAL; |
97216be09
|
1643 |
namespace_lock(); |
719f5d7f0
|
1644 1645 1646 1647 1648 |
if (type == MS_SHARED) { err = invent_group_ids(mnt, recurse); if (err) goto out_unlock; } |
719ea2fbb
|
1649 |
lock_mount_hash(); |
909b0a88e
|
1650 |
for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) |
0f0afb1dc
|
1651 |
change_mnt_propagation(m, type); |
719ea2fbb
|
1652 |
unlock_mount_hash(); |
719f5d7f0
|
1653 1654 |
out_unlock: |
97216be09
|
1655 |
namespace_unlock(); |
719f5d7f0
|
1656 |
return err; |
07b20889e
|
1657 |
} |
5ff9d8a65
|
1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 |
static bool has_locked_children(struct mount *mnt, struct dentry *dentry) { struct mount *child; list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { if (!is_subdir(child->mnt_mountpoint, dentry)) continue; if (child->mnt.mnt_flags & MNT_LOCKED) return true; } return false; } |
07b20889e
|
1670 |
/* |
1da177e4c
|
1671 1672 |
* do loopback mount. */ |
808d4e3cf
|
1673 |
static int do_loopback(struct path *path, const char *old_name, |
2dafe1c4d
|
1674 |
int recurse) |
1da177e4c
|
1675 |
{ |
2d92ab3c6
|
1676 |
struct path old_path; |
84d17192d
|
1677 1678 |
struct mount *mnt = NULL, *old, *parent; struct mountpoint *mp; |
57eccb830
|
1679 |
int err; |
1da177e4c
|
1680 1681 |
if (!old_name || !*old_name) return -EINVAL; |
815d405ce
|
1682 |
err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path); |
1da177e4c
|
1683 1684 |
if (err) return err; |
8823c079b
|
1685 |
err = -EINVAL; |
4ce5d2b1a
|
1686 |
if (mnt_ns_loop(old_path.dentry)) |
8823c079b
|
1687 |
goto out; |
84d17192d
|
1688 1689 1690 |
mp = lock_mount(path); err = PTR_ERR(mp); if (IS_ERR(mp)) |
b12cea919
|
1691 |
goto out; |
87129cc0e
|
1692 |
old = real_mount(old_path.mnt); |
84d17192d
|
1693 |
parent = real_mount(path->mnt); |
87129cc0e
|
1694 |
|
1da177e4c
|
1695 |
err = -EINVAL; |
fc7be130c
|
1696 |
if (IS_MNT_UNBINDABLE(old)) |
b12cea919
|
1697 |
goto out2; |
9676f0c63
|
1698 |
|
84d17192d
|
1699 |
if (!check_mnt(parent) || !check_mnt(old)) |
b12cea919
|
1700 |
goto out2; |
1da177e4c
|
1701 |
|
5ff9d8a65
|
1702 1703 |
if (!recurse && has_locked_children(old, old_path.dentry)) goto out2; |
ccd48bc7f
|
1704 |
if (recurse) |
4ce5d2b1a
|
1705 |
mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE); |
ccd48bc7f
|
1706 |
else |
87129cc0e
|
1707 |
mnt = clone_mnt(old, old_path.dentry, 0); |
ccd48bc7f
|
1708 |
|
be34d1a3b
|
1709 1710 |
if (IS_ERR(mnt)) { err = PTR_ERR(mnt); |
e9c5d8a56
|
1711 |
goto out2; |
be34d1a3b
|
1712 |
} |
ccd48bc7f
|
1713 |
|
5ff9d8a65
|
1714 |
mnt->mnt.mnt_flags &= ~MNT_LOCKED; |
84d17192d
|
1715 |
err = graft_tree(mnt, parent, mp); |
ccd48bc7f
|
1716 |
if (err) { |
719ea2fbb
|
1717 |
lock_mount_hash(); |
328e6d901
|
1718 |
umount_tree(mnt, 0); |
719ea2fbb
|
1719 |
unlock_mount_hash(); |
5b83d2c5c
|
1720 |
} |
b12cea919
|
1721 |
out2: |
84d17192d
|
1722 |
unlock_mount(mp); |
ccd48bc7f
|
1723 |
out: |
2d92ab3c6
|
1724 |
path_put(&old_path); |
1da177e4c
|
1725 1726 |
return err; } |
2e4b7fcd9
|
1727 1728 1729 1730 1731 1732 1733 1734 1735 |
static int change_mount_flags(struct vfsmount *mnt, int ms_flags) { int error = 0; int readonly_request = 0; if (ms_flags & MS_RDONLY) readonly_request = 1; if (readonly_request == __mnt_is_readonly(mnt)) return 0; |
90563b198
|
1736 1737 |
if (mnt->mnt_flags & MNT_LOCK_READONLY) return -EPERM; |
2e4b7fcd9
|
1738 |
if (readonly_request) |
83adc7532
|
1739 |
error = mnt_make_readonly(real_mount(mnt)); |
2e4b7fcd9
|
1740 |
else |
83adc7532
|
1741 |
__mnt_unmake_readonly(real_mount(mnt)); |
2e4b7fcd9
|
1742 1743 |
return error; } |
1da177e4c
|
1744 1745 1746 1747 1748 |
/* * change filesystem flags. dir should be a physical root of filesystem. * If you've mounted a non-root directory somewhere and want to do remount * on it - tough luck. */ |
0a0d8a467
|
1749 |
static int do_remount(struct path *path, int flags, int mnt_flags, |
1da177e4c
|
1750 1751 1752 |
void *data) { int err; |
2d92ab3c6
|
1753 |
struct super_block *sb = path->mnt->mnt_sb; |
143c8c91c
|
1754 |
struct mount *mnt = real_mount(path->mnt); |
1da177e4c
|
1755 |
|
143c8c91c
|
1756 |
if (!check_mnt(mnt)) |
1da177e4c
|
1757 |
return -EINVAL; |
2d92ab3c6
|
1758 |
if (path->dentry != path->mnt->mnt_root) |
1da177e4c
|
1759 |
return -EINVAL; |
ff36fe2c8
|
1760 1761 1762 |
err = security_sb_remount(sb, data); if (err) return err; |
1da177e4c
|
1763 |
down_write(&sb->s_umount); |
2e4b7fcd9
|
1764 |
if (flags & MS_BIND) |
2d92ab3c6
|
1765 |
err = change_mount_flags(path->mnt, flags); |
57eccb830
|
1766 1767 |
else if (!capable(CAP_SYS_ADMIN)) err = -EPERM; |
4aa98cf76
|
1768 |
else |
2e4b7fcd9
|
1769 |
err = do_remount_sb(sb, flags, data, 0); |
7b43a79f3
|
1770 |
if (!err) { |
719ea2fbb
|
1771 |
lock_mount_hash(); |
143c8c91c
|
1772 1773 |
mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; mnt->mnt.mnt_flags = mnt_flags; |
143c8c91c
|
1774 |
touch_mnt_namespace(mnt->mnt_ns); |
719ea2fbb
|
1775 |
unlock_mount_hash(); |
0e55a7cca
|
1776 |
} |
6339dab86
|
1777 |
up_write(&sb->s_umount); |
1da177e4c
|
1778 1779 |
return err; } |
cbbe362cd
|
1780 |
static inline int tree_contains_unbindable(struct mount *mnt) |
9676f0c63
|
1781 |
{ |
315fc83e5
|
1782 |
struct mount *p; |
909b0a88e
|
1783 |
for (p = mnt; p; p = next_mnt(p, mnt)) { |
fc7be130c
|
1784 |
if (IS_MNT_UNBINDABLE(p)) |
9676f0c63
|
1785 1786 1787 1788 |
return 1; } return 0; } |
808d4e3cf
|
1789 |
static int do_move_mount(struct path *path, const char *old_name) |
1da177e4c
|
1790 |
{ |
2d92ab3c6
|
1791 |
struct path old_path, parent_path; |
676da58df
|
1792 |
struct mount *p; |
0fb54e505
|
1793 |
struct mount *old; |
84d17192d
|
1794 |
struct mountpoint *mp; |
57eccb830
|
1795 |
int err; |
1da177e4c
|
1796 1797 |
if (!old_name || !*old_name) return -EINVAL; |
2d92ab3c6
|
1798 |
err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); |
1da177e4c
|
1799 1800 |
if (err) return err; |
84d17192d
|
1801 1802 1803 |
mp = lock_mount(path); err = PTR_ERR(mp); if (IS_ERR(mp)) |
cc53ce53c
|
1804 |
goto out; |
143c8c91c
|
1805 |
old = real_mount(old_path.mnt); |
fc7be130c
|
1806 |
p = real_mount(path->mnt); |
143c8c91c
|
1807 |
|
1da177e4c
|
1808 |
err = -EINVAL; |
fc7be130c
|
1809 |
if (!check_mnt(p) || !check_mnt(old)) |
1da177e4c
|
1810 |
goto out1; |
5ff9d8a65
|
1811 1812 |
if (old->mnt.mnt_flags & MNT_LOCKED) goto out1; |
1da177e4c
|
1813 |
err = -EINVAL; |
2d92ab3c6
|
1814 |
if (old_path.dentry != old_path.mnt->mnt_root) |
214444032
|
1815 |
goto out1; |
1da177e4c
|
1816 |
|
676da58df
|
1817 |
if (!mnt_has_parent(old)) |
214444032
|
1818 |
goto out1; |
1da177e4c
|
1819 |
|
2d92ab3c6
|
1820 1821 |
if (S_ISDIR(path->dentry->d_inode->i_mode) != S_ISDIR(old_path.dentry->d_inode->i_mode)) |
214444032
|
1822 1823 1824 1825 |
goto out1; /* * Don't move a mount residing in a shared parent. */ |
fc7be130c
|
1826 |
if (IS_MNT_SHARED(old->mnt_parent)) |
214444032
|
1827 |
goto out1; |
9676f0c63
|
1828 1829 1830 1831 |
/* * Don't move a mount tree containing unbindable mounts to a destination * mount which is shared. */ |
fc7be130c
|
1832 |
if (IS_MNT_SHARED(p) && tree_contains_unbindable(old)) |
9676f0c63
|
1833 |
goto out1; |
1da177e4c
|
1834 |
err = -ELOOP; |
fc7be130c
|
1835 |
for (; mnt_has_parent(p); p = p->mnt_parent) |
676da58df
|
1836 |
if (p == old) |
214444032
|
1837 |
goto out1; |
1da177e4c
|
1838 |
|
84d17192d
|
1839 |
err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path); |
4ac913785
|
1840 |
if (err) |
214444032
|
1841 |
goto out1; |
1da177e4c
|
1842 1843 1844 |
/* if the mount is moved, it should no longer be expire * automatically */ |
6776db3d3
|
1845 |
list_del_init(&old->mnt_expire); |
1da177e4c
|
1846 |
out1: |
84d17192d
|
1847 |
unlock_mount(mp); |
1da177e4c
|
1848 |
out: |
1da177e4c
|
1849 |
if (!err) |
1a3906895
|
1850 |
path_put(&parent_path); |
2d92ab3c6
|
1851 |
path_put(&old_path); |
1da177e4c
|
1852 1853 |
return err; } |
9d412a43c
|
1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 |
static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) { int err; const char *subtype = strchr(fstype, '.'); if (subtype) { subtype++; err = -EINVAL; if (!subtype[0]) goto err; } else subtype = ""; mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL); err = -ENOMEM; if (!mnt->mnt_sb->s_subtype) goto err; return mnt; err: mntput(mnt); return ERR_PTR(err); } |
9d412a43c
|
1876 1877 1878 |
/* * add a mount into a namespace's mount tree */ |
95bc5f25c
|
1879 |
static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) |
9d412a43c
|
1880 |
{ |
84d17192d
|
1881 1882 |
struct mountpoint *mp; struct mount *parent; |
9d412a43c
|
1883 |
int err; |
48a066e72
|
1884 |
mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT); |
9d412a43c
|
1885 |
|
84d17192d
|
1886 1887 1888 |
mp = lock_mount(path); if (IS_ERR(mp)) return PTR_ERR(mp); |
9d412a43c
|
1889 |
|
84d17192d
|
1890 |
parent = real_mount(path->mnt); |
9d412a43c
|
1891 |
err = -EINVAL; |
84d17192d
|
1892 |
if (unlikely(!check_mnt(parent))) { |
156cacb1d
|
1893 1894 1895 1896 |
/* that's acceptable only for automounts done in private ns */ if (!(mnt_flags & MNT_SHRINKABLE)) goto unlock; /* ... and for those we'd better have mountpoint still alive */ |
84d17192d
|
1897 |
if (!parent->mnt_ns) |
156cacb1d
|
1898 1899 |
goto unlock; } |
9d412a43c
|
1900 1901 1902 |
/* Refuse the same filesystem on the same mount point */ err = -EBUSY; |
95bc5f25c
|
1903 |
if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb && |
9d412a43c
|
1904 1905 1906 1907 |
path->mnt->mnt_root == path->dentry) goto unlock; err = -EINVAL; |
95bc5f25c
|
1908 |
if (S_ISLNK(newmnt->mnt.mnt_root->d_inode->i_mode)) |
9d412a43c
|
1909 |
goto unlock; |
95bc5f25c
|
1910 |
newmnt->mnt.mnt_flags = mnt_flags; |
84d17192d
|
1911 |
err = graft_tree(newmnt, parent, mp); |
9d412a43c
|
1912 1913 |
unlock: |
84d17192d
|
1914 |
unlock_mount(mp); |
9d412a43c
|
1915 1916 |
return err; } |
b1e75df45
|
1917 |
|
1da177e4c
|
1918 1919 1920 1921 |
/* * create a new mount for userspace and request it to be added into the * namespace's tree */ |
0c55cfc41
|
1922 |
static int do_new_mount(struct path *path, const char *fstype, int flags, |
808d4e3cf
|
1923 |
int mnt_flags, const char *name, void *data) |
1da177e4c
|
1924 |
{ |
0c55cfc41
|
1925 |
struct file_system_type *type; |
9b40bc90a
|
1926 |
struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; |
1da177e4c
|
1927 |
struct vfsmount *mnt; |
15f9a3f3e
|
1928 |
int err; |
1da177e4c
|
1929 |
|
0c55cfc41
|
1930 |
if (!fstype) |
1da177e4c
|
1931 |
return -EINVAL; |
0c55cfc41
|
1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 |
type = get_fs_type(fstype); if (!type) return -ENODEV; if (user_ns != &init_user_ns) { if (!(type->fs_flags & FS_USERNS_MOUNT)) { put_filesystem(type); return -EPERM; } /* Only in special cases allow devices from mounts * created outside the initial user namespace. */ if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) { flags |= MS_NODEV; mnt_flags |= MNT_NODEV; } } mnt = vfs_kern_mount(type, flags, name, data); if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && !mnt->mnt_sb->s_subtype) mnt = fs_set_subtype(mnt, fstype); put_filesystem(type); |
1da177e4c
|
1956 1957 |
if (IS_ERR(mnt)) return PTR_ERR(mnt); |
95bc5f25c
|
1958 |
err = do_add_mount(real_mount(mnt), path, mnt_flags); |
15f9a3f3e
|
1959 1960 1961 |
if (err) mntput(mnt); return err; |
1da177e4c
|
1962 |
} |
19a167af7
|
1963 1964 |
int finish_automount(struct vfsmount *m, struct path *path) { |
6776db3d3
|
1965 |
struct mount *mnt = real_mount(m); |
19a167af7
|
1966 1967 1968 1969 |
int err; /* The new mount record should have at least 2 refs to prevent it being * expired before we get a chance to add it */ |
6776db3d3
|
1970 |
BUG_ON(mnt_get_count(mnt) < 2); |
19a167af7
|
1971 1972 1973 |
if (m->mnt_sb == path->mnt->mnt_sb && m->mnt_root == path->dentry) { |
b1e75df45
|
1974 1975 |
err = -ELOOP; goto fail; |
19a167af7
|
1976 |
} |
95bc5f25c
|
1977 |
err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE); |
b1e75df45
|
1978 1979 1980 1981 |
if (!err) return 0; fail: /* remove m from any expiration list it may be on */ |
6776db3d3
|
1982 |
if (!list_empty(&mnt->mnt_expire)) { |
97216be09
|
1983 |
namespace_lock(); |
6776db3d3
|
1984 |
list_del_init(&mnt->mnt_expire); |
97216be09
|
1985 |
namespace_unlock(); |
19a167af7
|
1986 |
} |
b1e75df45
|
1987 1988 |
mntput(m); mntput(m); |
19a167af7
|
1989 1990 |
return err; } |
ea5b778a8
|
1991 1992 1993 1994 1995 1996 1997 |
/** * mnt_set_expiry - Put a mount on an expiration list * @mnt: The mount to list. * @expiry_list: The list to add the mount to. */ void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) { |
97216be09
|
1998 |
namespace_lock(); |
ea5b778a8
|
1999 |
|
6776db3d3
|
2000 |
list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); |
ea5b778a8
|
2001 |
|
97216be09
|
2002 |
namespace_unlock(); |
ea5b778a8
|
2003 2004 2005 2006 |
} EXPORT_SYMBOL(mnt_set_expiry); /* |
1da177e4c
|
2007 2008 2009 2010 2011 2012 |
* process a list of expirable mountpoints with the intent of discarding any * mountpoints that aren't in use and haven't been touched since last we came * here */ void mark_mounts_for_expiry(struct list_head *mounts) { |
761d5c38e
|
2013 |
struct mount *mnt, *next; |
1da177e4c
|
2014 2015 2016 2017 |
LIST_HEAD(graveyard); if (list_empty(mounts)) return; |
97216be09
|
2018 |
namespace_lock(); |
719ea2fbb
|
2019 |
lock_mount_hash(); |
1da177e4c
|
2020 2021 2022 2023 2024 2025 2026 |
/* extract from the expiration list every vfsmount that matches the * following criteria: * - only referenced by its parent vfsmount * - still marked for expiry (marked on the last call here; marks are * cleared by mntput()) */ |
6776db3d3
|
2027 |
list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { |
863d684f9
|
2028 |
if (!xchg(&mnt->mnt_expiry_mark, 1) || |
1ab597386
|
2029 |
propagate_mount_busy(mnt, 1)) |
1da177e4c
|
2030 |
continue; |
6776db3d3
|
2031 |
list_move(&mnt->mnt_expire, &graveyard); |
1da177e4c
|
2032 |
} |
bcc5c7d2b
|
2033 |
while (!list_empty(&graveyard)) { |
6776db3d3
|
2034 |
mnt = list_first_entry(&graveyard, struct mount, mnt_expire); |
143c8c91c
|
2035 |
touch_mnt_namespace(mnt->mnt_ns); |
328e6d901
|
2036 |
umount_tree(mnt, 1); |
bcc5c7d2b
|
2037 |
} |
719ea2fbb
|
2038 |
unlock_mount_hash(); |
3ab6abee5
|
2039 |
namespace_unlock(); |
5528f911b
|
2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 |
} EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); /* * Ripoff of 'select_parent()' * * search the list of submounts for a given mountpoint, and move any * shrinkable submounts to the 'graveyard' list. */ |
692afc312
|
2050 |
static int select_submounts(struct mount *parent, struct list_head *graveyard) |
5528f911b
|
2051 |
{ |
692afc312
|
2052 |
struct mount *this_parent = parent; |
5528f911b
|
2053 2054 2055 2056 |
struct list_head *next; int found = 0; repeat: |
6b41d536f
|
2057 |
next = this_parent->mnt_mounts.next; |
5528f911b
|
2058 |
resume: |
6b41d536f
|
2059 |
while (next != &this_parent->mnt_mounts) { |
5528f911b
|
2060 |
struct list_head *tmp = next; |
6b41d536f
|
2061 |
struct mount *mnt = list_entry(tmp, struct mount, mnt_child); |
5528f911b
|
2062 2063 |
next = tmp->next; |
692afc312
|
2064 |
if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE)) |
1da177e4c
|
2065 |
continue; |
5528f911b
|
2066 2067 2068 |
/* * Descend a level if the d_mounts list is non-empty. */ |
6b41d536f
|
2069 |
if (!list_empty(&mnt->mnt_mounts)) { |
5528f911b
|
2070 2071 2072 |
this_parent = mnt; goto repeat; } |
1da177e4c
|
2073 |
|
1ab597386
|
2074 |
if (!propagate_mount_busy(mnt, 1)) { |
6776db3d3
|
2075 |
list_move_tail(&mnt->mnt_expire, graveyard); |
5528f911b
|
2076 2077 |
found++; } |
1da177e4c
|
2078 |
} |
5528f911b
|
2079 2080 2081 2082 |
/* * All done at this level ... ascend and resume the search */ if (this_parent != parent) { |
6b41d536f
|
2083 |
next = this_parent->mnt_child.next; |
0714a5338
|
2084 |
this_parent = this_parent->mnt_parent; |
5528f911b
|
2085 2086 2087 2088 2089 2090 2091 2092 |
goto resume; } return found; } /* * process a list of expirable mountpoints with the intent of discarding any * submounts of a specific parent mountpoint |
99b7db7b8
|
2093 |
* |
48a066e72
|
2094 |
* mount_lock must be held for write |
5528f911b
|
2095 |
*/ |
b54b9be78
|
2096 |
static void shrink_submounts(struct mount *mnt) |
5528f911b
|
2097 2098 |
{ LIST_HEAD(graveyard); |
761d5c38e
|
2099 |
struct mount *m; |
5528f911b
|
2100 |
|
5528f911b
|
2101 |
/* extract submounts of 'mountpoint' from the expiration list */ |
c35038bec
|
2102 |
while (select_submounts(mnt, &graveyard)) { |
bcc5c7d2b
|
2103 |
while (!list_empty(&graveyard)) { |
761d5c38e
|
2104 |
m = list_first_entry(&graveyard, struct mount, |
6776db3d3
|
2105 |
mnt_expire); |
143c8c91c
|
2106 |
touch_mnt_namespace(m->mnt_ns); |
328e6d901
|
2107 |
umount_tree(m, 1); |
bcc5c7d2b
|
2108 2109 |
} } |
1da177e4c
|
2110 |
} |
1da177e4c
|
2111 2112 2113 2114 2115 2116 |
/* * Some copy_from_user() implementations do not return the exact number of * bytes remaining to copy on a fault. But copy_mount_options() requires that. * Note that this function differs from copy_from_user() in that it will oops * on bad values of `to', rather than returning a short copy. */ |
b58fed8b1
|
2117 2118 |
static long exact_copy_from_user(void *to, const void __user * from, unsigned long n) |
1da177e4c
|
2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 |
{ char *t = to; const char __user *f = from; char c; if (!access_ok(VERIFY_READ, from, n)) return n; while (n) { if (__get_user(c, f)) { memset(t, 0, n); break; } *t++ = c; f++; n--; } return n; } |
b58fed8b1
|
2138 |
int copy_mount_options(const void __user * data, unsigned long *where) |
1da177e4c
|
2139 2140 2141 2142 |
{ int i; unsigned long page; unsigned long size; |
b58fed8b1
|
2143 |
|
1da177e4c
|
2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 |
*where = 0; if (!data) return 0; if (!(page = __get_free_page(GFP_KERNEL))) return -ENOMEM; /* We only care that *some* data at the address the user * gave us is valid. Just in case, we'll zero * the remainder of the page. */ /* copy_from_user cannot cross TASK_SIZE ! */ size = TASK_SIZE - (unsigned long)data; if (size > PAGE_SIZE) size = PAGE_SIZE; i = size - exact_copy_from_user((void *)page, data, size); if (!i) { |
b58fed8b1
|
2162 |
free_page(page); |
1da177e4c
|
2163 2164 2165 2166 2167 2168 2169 |
return -EFAULT; } if (i != PAGE_SIZE) memset((char *)page + i, 0, PAGE_SIZE - i); *where = page; return 0; } |
eca6f534e
|
2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 |
int copy_mount_string(const void __user *data, char **where) { char *tmp; if (!data) { *where = NULL; return 0; } tmp = strndup_user(data, PAGE_SIZE); if (IS_ERR(tmp)) return PTR_ERR(tmp); *where = tmp; return 0; } |
1da177e4c
|
2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 |
/* * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to * be given to the mount() call (ie: read-only, no-dev, no-suid etc). * * data is a (void *) that can point to any structure up to * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent * information (or be NULL). * * Pre-0.97 versions of mount() didn't have a flags word. * When the flags word was introduced its top half was required * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9. * Therefore, if this magic number is present, it carries no information * and must be discarded. */ |
808d4e3cf
|
2200 2201 |
long do_mount(const char *dev_name, const char *dir_name, const char *type_page, unsigned long flags, void *data_page) |
1da177e4c
|
2202 |
{ |
2d92ab3c6
|
2203 |
struct path path; |
1da177e4c
|
2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 |
int retval = 0; int mnt_flags = 0; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) flags &= ~MS_MGC_MSK; /* Basic sanity checks */ if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) return -EINVAL; |
1da177e4c
|
2215 2216 2217 |
if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; |
a27ab9f26
|
2218 2219 2220 2221 2222 2223 2224 |
/* ... and get the mountpoint */ retval = kern_path(dir_name, LOOKUP_FOLLOW, &path); if (retval) return retval; retval = security_sb_mount(dev_name, &path, type_page, flags, data_page); |
0d5cadb87
|
2225 2226 |
if (!retval && !may_mount()) retval = -EPERM; |
a27ab9f26
|
2227 2228 |
if (retval) goto dput_out; |
613cbe3d4
|
2229 2230 2231 |
/* Default to relatime unless overriden */ if (!(flags & MS_NOATIME)) mnt_flags |= MNT_RELATIME; |
0a1c01c94
|
2232 |
|
1da177e4c
|
2233 2234 2235 2236 2237 2238 2239 |
/* Separate the per-mountpoint flags */ if (flags & MS_NOSUID) mnt_flags |= MNT_NOSUID; if (flags & MS_NODEV) mnt_flags |= MNT_NODEV; if (flags & MS_NOEXEC) mnt_flags |= MNT_NOEXEC; |
fc33a7bb9
|
2240 2241 2242 2243 |
if (flags & MS_NOATIME) mnt_flags |= MNT_NOATIME; if (flags & MS_NODIRATIME) mnt_flags |= MNT_NODIRATIME; |
d0adde574
|
2244 2245 |
if (flags & MS_STRICTATIME) mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME); |
2e4b7fcd9
|
2246 2247 |
if (flags & MS_RDONLY) mnt_flags |= MNT_READONLY; |
fc33a7bb9
|
2248 |
|
7a4dec538
|
2249 |
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | |
d0adde574
|
2250 2251 |
MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | MS_STRICTATIME); |
1da177e4c
|
2252 |
|
1da177e4c
|
2253 |
if (flags & MS_REMOUNT) |
2d92ab3c6
|
2254 |
retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, |
1da177e4c
|
2255 2256 |
data_page); else if (flags & MS_BIND) |
2d92ab3c6
|
2257 |
retval = do_loopback(&path, dev_name, flags & MS_REC); |
9676f0c63
|
2258 |
else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) |
2d92ab3c6
|
2259 |
retval = do_change_type(&path, flags); |
1da177e4c
|
2260 |
else if (flags & MS_MOVE) |
2d92ab3c6
|
2261 |
retval = do_move_mount(&path, dev_name); |
1da177e4c
|
2262 |
else |
2d92ab3c6
|
2263 |
retval = do_new_mount(&path, type_page, flags, mnt_flags, |
1da177e4c
|
2264 2265 |
dev_name, data_page); dput_out: |
2d92ab3c6
|
2266 |
path_put(&path); |
1da177e4c
|
2267 2268 |
return retval; } |
771b13716
|
2269 2270 |
static void free_mnt_ns(struct mnt_namespace *ns) { |
98f842e67
|
2271 |
proc_free_inum(ns->proc_inum); |
771b13716
|
2272 2273 2274 |
put_user_ns(ns->user_ns); kfree(ns); } |
8823c079b
|
2275 2276 2277 2278 2279 2280 2281 2282 |
/* * Assign a sequence number so we can detect when we attempt to bind * mount a reference to an older mount namespace into the current * mount namespace, preventing reference counting loops. A 64bit * number incrementing at 10Ghz will take 12,427 years to wrap which * is effectively never, so we can ignore the possibility. */ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); |
771b13716
|
2283 |
static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) |
cf8d2c11c
|
2284 2285 |
{ struct mnt_namespace *new_ns; |
98f842e67
|
2286 |
int ret; |
cf8d2c11c
|
2287 2288 2289 2290 |
new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); if (!new_ns) return ERR_PTR(-ENOMEM); |
98f842e67
|
2291 2292 2293 2294 2295 |
ret = proc_alloc_inum(&new_ns->proc_inum); if (ret) { kfree(new_ns); return ERR_PTR(ret); } |
8823c079b
|
2296 |
new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); |
cf8d2c11c
|
2297 2298 2299 2300 2301 |
atomic_set(&new_ns->count, 1); new_ns->root = NULL; INIT_LIST_HEAD(&new_ns->list); init_waitqueue_head(&new_ns->poll); new_ns->event = 0; |
771b13716
|
2302 |
new_ns->user_ns = get_user_ns(user_ns); |
cf8d2c11c
|
2303 2304 |
return new_ns; } |
9559f6891
|
2305 2306 |
struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, struct user_namespace *user_ns, struct fs_struct *new_fs) |
1da177e4c
|
2307 |
{ |
6b3286ed1
|
2308 |
struct mnt_namespace *new_ns; |
7f2da1e7d
|
2309 |
struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; |
315fc83e5
|
2310 |
struct mount *p, *q; |
9559f6891
|
2311 |
struct mount *old; |
cb338d06e
|
2312 |
struct mount *new; |
7a472ef4b
|
2313 |
int copy_flags; |
1da177e4c
|
2314 |
|
9559f6891
|
2315 2316 2317 2318 2319 2320 2321 2322 |
BUG_ON(!ns); if (likely(!(flags & CLONE_NEWNS))) { get_mnt_ns(ns); return ns; } old = ns->root; |
771b13716
|
2323 |
new_ns = alloc_mnt_ns(user_ns); |
cf8d2c11c
|
2324 2325 |
if (IS_ERR(new_ns)) return new_ns; |
1da177e4c
|
2326 |
|
97216be09
|
2327 |
namespace_lock(); |
1da177e4c
|
2328 |
/* First pass: copy the tree topology */ |
4ce5d2b1a
|
2329 |
copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; |
9559f6891
|
2330 |
if (user_ns != ns->user_ns) |
132c94e31
|
2331 |
copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; |
7a472ef4b
|
2332 |
new = copy_tree(old, old->mnt.mnt_root, copy_flags); |
be34d1a3b
|
2333 |
if (IS_ERR(new)) { |
328e6d901
|
2334 |
namespace_unlock(); |
771b13716
|
2335 |
free_mnt_ns(new_ns); |
be34d1a3b
|
2336 |
return ERR_CAST(new); |
1da177e4c
|
2337 |
} |
be08d6d26
|
2338 |
new_ns->root = new; |
1a4eeaf2a
|
2339 |
list_add_tail(&new_ns->list, &new->mnt_list); |
1da177e4c
|
2340 2341 2342 2343 2344 2345 |
/* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts * as belonging to new namespace. We have already acquired a private * fs_struct, so tsk->fs->lock is not needed. */ |
909b0a88e
|
2346 |
p = old; |
cb338d06e
|
2347 |
q = new; |
1da177e4c
|
2348 |
while (p) { |
143c8c91c
|
2349 |
q->mnt_ns = new_ns; |
9559f6891
|
2350 2351 2352 |
if (new_fs) { if (&p->mnt == new_fs->root.mnt) { new_fs->root.mnt = mntget(&q->mnt); |
315fc83e5
|
2353 |
rootmnt = &p->mnt; |
1da177e4c
|
2354 |
} |
9559f6891
|
2355 2356 |
if (&p->mnt == new_fs->pwd.mnt) { new_fs->pwd.mnt = mntget(&q->mnt); |
315fc83e5
|
2357 |
pwdmnt = &p->mnt; |
1da177e4c
|
2358 |
} |
1da177e4c
|
2359 |
} |
909b0a88e
|
2360 2361 |
p = next_mnt(p, old); q = next_mnt(q, new); |
4ce5d2b1a
|
2362 2363 2364 2365 |
if (!q) break; while (p->mnt.mnt_root != q->mnt.mnt_root) p = next_mnt(p, old); |
1da177e4c
|
2366 |
} |
328e6d901
|
2367 |
namespace_unlock(); |
1da177e4c
|
2368 |
|
1da177e4c
|
2369 |
if (rootmnt) |
f03c65993
|
2370 |
mntput(rootmnt); |
1da177e4c
|
2371 |
if (pwdmnt) |
f03c65993
|
2372 |
mntput(pwdmnt); |
1da177e4c
|
2373 |
|
741a29513
|
2374 |
return new_ns; |
1da177e4c
|
2375 |
} |
cf8d2c11c
|
2376 2377 2378 2379 |
/** * create_mnt_ns - creates a private namespace and adds a root filesystem * @mnt: pointer to the new root filesystem mountpoint */ |
1a4eeaf2a
|
2380 |
static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) |
cf8d2c11c
|
2381 |
{ |
771b13716
|
2382 |
struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns); |
cf8d2c11c
|
2383 |
if (!IS_ERR(new_ns)) { |
1a4eeaf2a
|
2384 2385 |
struct mount *mnt = real_mount(m); mnt->mnt_ns = new_ns; |
be08d6d26
|
2386 |
new_ns->root = mnt; |
b1983cd89
|
2387 |
list_add(&mnt->mnt_list, &new_ns->list); |
c13344958
|
2388 |
} else { |
1a4eeaf2a
|
2389 |
mntput(m); |
cf8d2c11c
|
2390 2391 2392 |
} return new_ns; } |
cf8d2c11c
|
2393 |
|
ea441d110
|
2394 2395 2396 |
struct dentry *mount_subtree(struct vfsmount *mnt, const char *name) { struct mnt_namespace *ns; |
d31da0f0b
|
2397 |
struct super_block *s; |
ea441d110
|
2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 |
struct path path; int err; ns = create_mnt_ns(mnt); if (IS_ERR(ns)) return ERR_CAST(ns); err = vfs_path_lookup(mnt->mnt_root, mnt, name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); put_mnt_ns(ns); if (err) return ERR_PTR(err); /* trade a vfsmount reference for active sb one */ |
d31da0f0b
|
2414 2415 |
s = path.mnt->mnt_sb; atomic_inc(&s->s_active); |
ea441d110
|
2416 2417 |
mntput(path.mnt); /* lock the sucker */ |
d31da0f0b
|
2418 |
down_write(&s->s_umount); |
ea441d110
|
2419 2420 2421 2422 |
/* ... and return the root of (sub)tree on it */ return path.dentry; } EXPORT_SYMBOL(mount_subtree); |
bdc480e3b
|
2423 2424 |
SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, char __user *, type, unsigned long, flags, void __user *, data) |
1da177e4c
|
2425 |
{ |
eca6f534e
|
2426 2427 |
int ret; char *kernel_type; |
91a27b2a7
|
2428 |
struct filename *kernel_dir; |
eca6f534e
|
2429 |
char *kernel_dev; |
1da177e4c
|
2430 |
unsigned long data_page; |
1da177e4c
|
2431 |
|
eca6f534e
|
2432 2433 2434 |
ret = copy_mount_string(type, &kernel_type); if (ret < 0) goto out_type; |
1da177e4c
|
2435 |
|
eca6f534e
|
2436 2437 2438 2439 2440 |
kernel_dir = getname(dir_name); if (IS_ERR(kernel_dir)) { ret = PTR_ERR(kernel_dir); goto out_dir; } |
1da177e4c
|
2441 |
|
eca6f534e
|
2442 2443 2444 |
ret = copy_mount_string(dev_name, &kernel_dev); if (ret < 0) goto out_dev; |
1da177e4c
|
2445 |
|
eca6f534e
|
2446 2447 2448 |
ret = copy_mount_options(data, &data_page); if (ret < 0) goto out_data; |
1da177e4c
|
2449 |
|
91a27b2a7
|
2450 |
ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags, |
eca6f534e
|
2451 |
(void *) data_page); |
1da177e4c
|
2452 |
|
eca6f534e
|
2453 2454 2455 2456 2457 2458 2459 2460 2461 |
free_page(data_page); out_data: kfree(kernel_dev); out_dev: putname(kernel_dir); out_dir: kfree(kernel_type); out_type: return ret; |
1da177e4c
|
2462 2463 2464 |
} /* |
afac7cba7
|
2465 2466 |
* Return true if path is reachable from root * |
48a066e72
|
2467 |
* namespace_sem or mount_lock is held |
afac7cba7
|
2468 |
*/ |
643822b41
|
2469 |
bool is_path_reachable(struct mount *mnt, struct dentry *dentry, |
afac7cba7
|
2470 2471 |
const struct path *root) { |
643822b41
|
2472 |
while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) { |
a73324da7
|
2473 |
dentry = mnt->mnt_mountpoint; |
0714a5338
|
2474 |
mnt = mnt->mnt_parent; |
afac7cba7
|
2475 |
} |
643822b41
|
2476 |
return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry); |
afac7cba7
|
2477 2478 2479 2480 2481 |
} int path_is_under(struct path *path1, struct path *path2) { int res; |
48a066e72
|
2482 |
read_seqlock_excl(&mount_lock); |
643822b41
|
2483 |
res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); |
48a066e72
|
2484 |
read_sequnlock_excl(&mount_lock); |
afac7cba7
|
2485 2486 2487 2488 2489 |
return res; } EXPORT_SYMBOL(path_is_under); /* |
1da177e4c
|
2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 |
* pivot_root Semantics: * Moves the root file system of the current process to the directory put_old, * makes new_root as the new root file system of the current process, and sets * root/cwd of all processes which had them on the current root to new_root. * * Restrictions: * The new_root and put_old must be directories, and must not be on the * same file system as the current process root. The put_old must be * underneath new_root, i.e. adding a non-zero number of /.. to the string * pointed to by put_old must yield the same directory as new_root. No other * file system may be mounted on put_old. After all, new_root is a mountpoint. * |
4a0d11fae
|
2502 2503 2504 2505 |
* Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem. * See Documentation/filesystems/ramfs-rootfs-initramfs.txt for alternatives * in this situation. * |
1da177e4c
|
2506 2507 2508 2509 2510 2511 2512 2513 |
* Notes: * - we don't move root/cwd if they are not at the root (reason: if something * cared enough to change them, it's probably wrong to force them elsewhere) * - it's okay to pick a root that isn't the root of a file system, e.g. * /nfs/my_root where /nfs is the mount point. It must be a mountpoint, * though, so you may need to say mount --bind /nfs/my_root /nfs/my_root * first. */ |
3480b2574
|
2514 2515 |
SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, const char __user *, put_old) |
1da177e4c
|
2516 |
{ |
2d8f30380
|
2517 |
struct path new, old, parent_path, root_parent, root; |
84d17192d
|
2518 2519 |
struct mount *new_mnt, *root_mnt, *old_mnt; struct mountpoint *old_mp, *root_mp; |
1da177e4c
|
2520 |
int error; |
9b40bc90a
|
2521 |
if (!may_mount()) |
1da177e4c
|
2522 |
return -EPERM; |
2d8f30380
|
2523 |
error = user_path_dir(new_root, &new); |
1da177e4c
|
2524 2525 |
if (error) goto out0; |
1da177e4c
|
2526 |
|
2d8f30380
|
2527 |
error = user_path_dir(put_old, &old); |
1da177e4c
|
2528 2529 |
if (error) goto out1; |
2d8f30380
|
2530 |
error = security_sb_pivotroot(&old, &new); |
b12cea919
|
2531 2532 |
if (error) goto out2; |
1da177e4c
|
2533 |
|
f7ad3c6be
|
2534 |
get_fs_root(current->fs, &root); |
84d17192d
|
2535 2536 2537 |
old_mp = lock_mount(&old); error = PTR_ERR(old_mp); if (IS_ERR(old_mp)) |
b12cea919
|
2538 |
goto out3; |
1da177e4c
|
2539 |
error = -EINVAL; |
419148da6
|
2540 2541 |
new_mnt = real_mount(new.mnt); root_mnt = real_mount(root.mnt); |
84d17192d
|
2542 2543 |
old_mnt = real_mount(old.mnt); if (IS_MNT_SHARED(old_mnt) || |
fc7be130c
|
2544 2545 |
IS_MNT_SHARED(new_mnt->mnt_parent) || IS_MNT_SHARED(root_mnt->mnt_parent)) |
b12cea919
|
2546 |
goto out4; |
143c8c91c
|
2547 |
if (!check_mnt(root_mnt) || !check_mnt(new_mnt)) |
b12cea919
|
2548 |
goto out4; |
5ff9d8a65
|
2549 2550 |
if (new_mnt->mnt.mnt_flags & MNT_LOCKED) goto out4; |
1da177e4c
|
2551 |
error = -ENOENT; |
f3da392e9
|
2552 |
if (d_unlinked(new.dentry)) |
b12cea919
|
2553 |
goto out4; |
1da177e4c
|
2554 |
error = -EBUSY; |
84d17192d
|
2555 |
if (new_mnt == root_mnt || old_mnt == root_mnt) |
b12cea919
|
2556 |
goto out4; /* loop, on the same file system */ |
1da177e4c
|
2557 |
error = -EINVAL; |
8c3ee42e8
|
2558 |
if (root.mnt->mnt_root != root.dentry) |
b12cea919
|
2559 |
goto out4; /* not a mountpoint */ |
676da58df
|
2560 |
if (!mnt_has_parent(root_mnt)) |
b12cea919
|
2561 |
goto out4; /* not attached */ |
84d17192d
|
2562 |
root_mp = root_mnt->mnt_mp; |
2d8f30380
|
2563 |
if (new.mnt->mnt_root != new.dentry) |
b12cea919
|
2564 |
goto out4; /* not a mountpoint */ |
676da58df
|
2565 |
if (!mnt_has_parent(new_mnt)) |
b12cea919
|
2566 |
goto out4; /* not attached */ |
4ac913785
|
2567 |
/* make sure we can reach put_old from new_root */ |
84d17192d
|
2568 |
if (!is_path_reachable(old_mnt, old.dentry, &new)) |
b12cea919
|
2569 |
goto out4; |
84d17192d
|
2570 |
root_mp->m_count++; /* pin it so it won't go away */ |
719ea2fbb
|
2571 |
lock_mount_hash(); |
419148da6
|
2572 2573 |
detach_mnt(new_mnt, &parent_path); detach_mnt(root_mnt, &root_parent); |
5ff9d8a65
|
2574 2575 2576 2577 |
if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { new_mnt->mnt.mnt_flags |= MNT_LOCKED; root_mnt->mnt.mnt_flags &= ~MNT_LOCKED; } |
4ac913785
|
2578 |
/* mount old root on put_old */ |
84d17192d
|
2579 |
attach_mnt(root_mnt, old_mnt, old_mp); |
4ac913785
|
2580 |
/* mount new_root on / */ |
84d17192d
|
2581 |
attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); |
6b3286ed1
|
2582 |
touch_mnt_namespace(current->nsproxy->mnt_ns); |
719ea2fbb
|
2583 |
unlock_mount_hash(); |
2d8f30380
|
2584 |
chroot_fs_refs(&root, &new); |
84d17192d
|
2585 |
put_mountpoint(root_mp); |
1da177e4c
|
2586 |
error = 0; |
b12cea919
|
2587 |
out4: |
84d17192d
|
2588 |
unlock_mount(old_mp); |
b12cea919
|
2589 2590 2591 2592 2593 |
if (!error) { path_put(&root_parent); path_put(&parent_path); } out3: |
8c3ee42e8
|
2594 |
path_put(&root); |
b12cea919
|
2595 |
out2: |
2d8f30380
|
2596 |
path_put(&old); |
1da177e4c
|
2597 |
out1: |
2d8f30380
|
2598 |
path_put(&new); |
1da177e4c
|
2599 |
out0: |
1da177e4c
|
2600 |
return error; |
1da177e4c
|
2601 2602 2603 2604 2605 |
} static void __init init_mount_tree(void) { struct vfsmount *mnt; |
6b3286ed1
|
2606 |
struct mnt_namespace *ns; |
ac748a09f
|
2607 |
struct path root; |
0c55cfc41
|
2608 |
struct file_system_type *type; |
1da177e4c
|
2609 |
|
0c55cfc41
|
2610 2611 2612 2613 2614 |
type = get_fs_type("rootfs"); if (!type) panic("Can't find rootfs type"); mnt = vfs_kern_mount(type, 0, "rootfs", NULL); put_filesystem(type); |
1da177e4c
|
2615 2616 |
if (IS_ERR(mnt)) panic("Can't create rootfs"); |
b3e19d924
|
2617 |
|
3b22edc57
|
2618 2619 |
ns = create_mnt_ns(mnt); if (IS_ERR(ns)) |
1da177e4c
|
2620 |
panic("Can't allocate initial namespace"); |
6b3286ed1
|
2621 2622 2623 |
init_task.nsproxy->mnt_ns = ns; get_mnt_ns(ns); |
be08d6d26
|
2624 2625 |
root.mnt = mnt; root.dentry = mnt->mnt_root; |
ac748a09f
|
2626 2627 2628 |
set_fs_pwd(current->fs, &root); set_fs_root(current->fs, &root); |
1da177e4c
|
2629 |
} |
74bf17cff
|
2630 |
void __init mnt_init(void) |
1da177e4c
|
2631 |
{ |
13f14b4d8
|
2632 |
unsigned u; |
15a67dd8c
|
2633 |
int err; |
1da177e4c
|
2634 |
|
7d6fec45a
|
2635 |
mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), |
20c2df83d
|
2636 |
0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
1da177e4c
|
2637 |
|
b58fed8b1
|
2638 |
mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); |
84d17192d
|
2639 |
mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); |
1da177e4c
|
2640 |
|
84d17192d
|
2641 |
if (!mount_hashtable || !mountpoint_hashtable) |
1da177e4c
|
2642 2643 |
panic("Failed to allocate mount hash table "); |
80cdc6dae
|
2644 2645 |
printk(KERN_INFO "Mount-cache hash table entries: %lu ", HASH_SIZE); |
13f14b4d8
|
2646 2647 2648 |
for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); |
84d17192d
|
2649 2650 |
for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mountpoint_hashtable[u]); |
1da177e4c
|
2651 |
|
15a67dd8c
|
2652 2653 2654 2655 |
err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d ", |
8e24eea72
|
2656 |
__func__, err); |
00d266662
|
2657 2658 |
fs_kobj = kobject_create_and_add("fs", NULL); if (!fs_kobj) |
8e24eea72
|
2659 2660 |
printk(KERN_WARNING "%s: kobj create error ", __func__); |
1da177e4c
|
2661 2662 2663 |
init_rootfs(); init_mount_tree(); } |
616511d03
|
2664 |
void put_mnt_ns(struct mnt_namespace *ns) |
1da177e4c
|
2665 |
{ |
d498b25a4
|
2666 |
if (!atomic_dec_and_test(&ns->count)) |
616511d03
|
2667 |
return; |
7b00ed6fe
|
2668 |
drop_collected_mounts(&ns->root->mnt); |
771b13716
|
2669 |
free_mnt_ns(ns); |
1da177e4c
|
2670 |
} |
9d412a43c
|
2671 2672 2673 |
struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) { |
423e0ab08
|
2674 2675 2676 2677 2678 2679 2680 |
struct vfsmount *mnt; mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, data); if (!IS_ERR(mnt)) { /* * it is a longterm mount, don't release mnt until * we unmount before file sys is unregistered */ |
f7a99c5b7
|
2681 |
real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL; |
423e0ab08
|
2682 2683 |
} return mnt; |
9d412a43c
|
2684 2685 |
} EXPORT_SYMBOL_GPL(kern_mount_data); |
423e0ab08
|
2686 2687 2688 2689 2690 |
void kern_unmount(struct vfsmount *mnt) { /* release long term mount so mount point can be released */ if (!IS_ERR_OR_NULL(mnt)) { |
f7a99c5b7
|
2691 |
real_mount(mnt)->mnt_ns = NULL; |
48a066e72
|
2692 |
synchronize_rcu(); /* yecchhh... */ |
423e0ab08
|
2693 2694 2695 2696 |
mntput(mnt); } } EXPORT_SYMBOL(kern_unmount); |
02125a826
|
2697 2698 2699 |
bool our_mnt(struct vfsmount *mnt) { |
143c8c91c
|
2700 |
return check_mnt(real_mount(mnt)); |
02125a826
|
2701 |
} |
8823c079b
|
2702 |
|
3151527ee
|
2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 |
bool current_chrooted(void) { /* Does the current process have a non-standard root */ struct path ns_root; struct path fs_root; bool chrooted; /* Find the namespace root */ ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt; ns_root.dentry = ns_root.mnt->mnt_root; path_get(&ns_root); while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root)) ; get_fs_root(current->fs, &fs_root); chrooted = !path_equal(&fs_root, &ns_root); path_put(&fs_root); path_put(&ns_root); return chrooted; } |
e51db7353
|
2726 |
bool fs_fully_visible(struct file_system_type *type) |
87a8ebd63
|
2727 2728 2729 |
{ struct mnt_namespace *ns = current->nsproxy->mnt_ns; struct mount *mnt; |
e51db7353
|
2730 |
bool visible = false; |
87a8ebd63
|
2731 |
|
e51db7353
|
2732 2733 |
if (unlikely(!ns)) return false; |
44bb4385c
|
2734 |
down_read(&namespace_sem); |
87a8ebd63
|
2735 |
list_for_each_entry(mnt, &ns->list, mnt_list) { |
e51db7353
|
2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 |
struct mount *child; if (mnt->mnt.mnt_sb->s_type != type) continue; /* This mount is not fully visible if there are any child mounts * that cover anything except for empty directories. */ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { struct inode *inode = child->mnt_mountpoint->d_inode; if (!S_ISDIR(inode->i_mode)) goto next; if (inode->i_nlink != 2) goto next; |
87a8ebd63
|
2749 |
} |
e51db7353
|
2750 2751 2752 |
visible = true; goto found; next: ; |
87a8ebd63
|
2753 |
} |
e51db7353
|
2754 |
found: |
44bb4385c
|
2755 |
up_read(&namespace_sem); |
e51db7353
|
2756 |
return visible; |
87a8ebd63
|
2757 |
} |
8823c079b
|
2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 |
static void *mntns_get(struct task_struct *task) { struct mnt_namespace *ns = NULL; struct nsproxy *nsproxy; rcu_read_lock(); nsproxy = task_nsproxy(task); if (nsproxy) { ns = nsproxy->mnt_ns; get_mnt_ns(ns); } rcu_read_unlock(); return ns; } static void mntns_put(void *ns) { put_mnt_ns(ns); } static int mntns_install(struct nsproxy *nsproxy, void *ns) { struct fs_struct *fs = current->fs; struct mnt_namespace *mnt_ns = ns; struct path root; |
0c55cfc41
|
2784 |
if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || |
c7b96acf1
|
2785 2786 |
!ns_capable(current_user_ns(), CAP_SYS_CHROOT) || !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) |
ae11e0f18
|
2787 |
return -EPERM; |
8823c079b
|
2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 |
if (fs->users != 1) return -EINVAL; get_mnt_ns(mnt_ns); put_mnt_ns(nsproxy->mnt_ns); nsproxy->mnt_ns = mnt_ns; /* Find the root */ root.mnt = &mnt_ns->root->mnt; root.dentry = mnt_ns->root->mnt.mnt_root; path_get(&root); while(d_mountpoint(root.dentry) && follow_down_one(&root)) ; /* Update the pwd and root */ set_fs_pwd(fs, &root); set_fs_root(fs, &root); path_put(&root); return 0; } |
98f842e67
|
2810 2811 2812 2813 2814 |
static unsigned int mntns_inum(void *ns) { struct mnt_namespace *mnt_ns = ns; return mnt_ns->proc_inum; } |
8823c079b
|
2815 2816 2817 2818 2819 2820 |
const struct proc_ns_operations mntns_operations = { .name = "mnt", .type = CLONE_NEWNS, .get = mntns_get, .put = mntns_put, .install = mntns_install, |
98f842e67
|
2821 |
.inum = mntns_inum, |
8823c079b
|
2822 |
}; |