Commit bb4fbf094b440a9209ed88c8681960d4b26eec0f

Authored by Eric W. Biederman
Committed by Greg Kroah-Hartman
1 parent e260db7576

mnt: In propgate_umount handle visiting mounts in any order

commit 99b19d16471e9c3faa85cad38abc9cbbe04c6d55 upstream.

While investigating some poor umount performance I realized that in
the case of overlapping mount trees where some of the mounts are locked
the code has been failing to unmount all of the mounts it should
have been unmounting.

This failure to unmount all of the necessary
mounts can be reproduced with:

$ cat locked_mounts_test.sh

mount -t tmpfs test-base /mnt
mount --make-shared /mnt
mkdir -p /mnt/b

mount -t tmpfs test1 /mnt/b
mount --make-shared /mnt/b
mkdir -p /mnt/b/10

mount -t tmpfs test2 /mnt/b/10
mount --make-shared /mnt/b/10
mkdir -p /mnt/b/10/20

mount --rbind /mnt/b /mnt/b/10/20

unshare -Urm --propagation unchaged /bin/sh -c 'sleep 5; if [ $(grep test /proc/self/mountinfo | wc -l) -eq 1 ] ; then echo SUCCESS ; else echo FAILURE ; fi'
sleep 1
umount -l /mnt/b
wait %%

$ unshare -Urm ./locked_mounts_test.sh

This failure is corrected by removing the prepass that marks mounts
that may be umounted.

A first pass is added that umounts mounts if possible and if not sets
mount mark if they could be unmounted if they weren't locked and adds
them to a list to umount possibilities.  This first pass reconsiders
the mounts parent if it is on the list of umount possibilities, ensuring
that information of umoutability will pass from child to mount parent.

A second pass then walks through all mounts that are umounted and processes
their children unmounting them or marking them for reparenting.

A last pass cleans up the state on the mounts that could not be umounted
and if applicable reparents them to their first parent that remained
mounted.

While a bit longer than the old code this code is much more robust
as it allows information to flow up from the leaves and down
from the trunk making the order in which mounts are encountered
in the umount propgation tree irrelevant.

Fixes: 0c56fe31420c ("mnt: Don't propagate unmounts to locked mounts")
Reviewed-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Showing 3 changed files with 90 additions and 62 deletions Side-by-side Diff

... ... @@ -58,7 +58,7 @@
58 58 struct mnt_namespace *mnt_ns; /* containing namespace */
59 59 struct mountpoint *mnt_mp; /* where is it mounted */
60 60 struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
61   - struct list_head mnt_reparent; /* reparent list entry */
  61 + struct list_head mnt_umounting; /* list entry for umount propagation */
62 62 #ifdef CONFIG_FSNOTIFY
63 63 struct hlist_head mnt_fsnotify_marks;
64 64 __u32 mnt_fsnotify_mask;
... ... @@ -237,7 +237,7 @@
237 237 INIT_LIST_HEAD(&mnt->mnt_slave_list);
238 238 INIT_LIST_HEAD(&mnt->mnt_slave);
239 239 INIT_HLIST_NODE(&mnt->mnt_mp_list);
240   - INIT_LIST_HEAD(&mnt->mnt_reparent);
  240 + INIT_LIST_HEAD(&mnt->mnt_umounting);
241 241 #ifdef CONFIG_FSNOTIFY
242 242 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
243 243 #endif
... ... @@ -415,86 +415,95 @@
415 415 }
416 416 }
417 417  
418   -/*
419   - * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted.
420   - */
421   -static void mark_umount_candidates(struct mount *mnt)
  418 +static void umount_one(struct mount *mnt, struct list_head *to_umount)
422 419 {
423   - struct mount *parent = mnt->mnt_parent;
424   - struct mount *m;
425   -
426   - BUG_ON(parent == mnt);
427   -
428   - for (m = propagation_next(parent, parent); m;
429   - m = propagation_next(m, parent)) {
430   - struct mount *child = __lookup_mnt(&m->mnt,
431   - mnt->mnt_mountpoint);
432   - if (!child || (child->mnt.mnt_flags & MNT_UMOUNT))
433   - continue;
434   - if (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m)) {
435   - SET_MNT_MARK(child);
436   - }
437   - }
  420 + CLEAR_MNT_MARK(mnt);
  421 + mnt->mnt.mnt_flags |= MNT_UMOUNT;
  422 + list_del_init(&mnt->mnt_child);
  423 + list_del_init(&mnt->mnt_umounting);
  424 + list_move_tail(&mnt->mnt_list, to_umount);
438 425 }
439 426  
440 427 /*
441 428 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
442 429 * parent propagates to.
443 430 */
444   -static void __propagate_umount(struct mount *mnt, struct list_head *to_reparent)
  431 +static bool __propagate_umount(struct mount *mnt,
  432 + struct list_head *to_umount,
  433 + struct list_head *to_restore)
445 434 {
446   - struct mount *parent = mnt->mnt_parent;
447   - struct mount *m;
  435 + bool progress = false;
  436 + struct mount *child;
448 437  
449   - BUG_ON(parent == mnt);
  438 + /*
  439 + * The state of the parent won't change if this mount is
  440 + * already unmounted or marked as without children.
  441 + */
  442 + if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED))
  443 + goto out;
450 444  
451   - for (m = propagation_next(parent, parent); m;
452   - m = propagation_next(m, parent)) {
453   - struct mount *topper;
454   - struct mount *child = __lookup_mnt(&m->mnt,
455   - mnt->mnt_mountpoint);
456   - /*
457   - * umount the child only if the child has no children
458   - * and the child is marked safe to unmount.
459   - */
460   - if (!child || !IS_MNT_MARKED(child))
  445 + /* Verify topper is the only grandchild that has not been
  446 + * speculatively unmounted.
  447 + */
  448 + list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
  449 + if (child->mnt_mountpoint == mnt->mnt.mnt_root)
461 450 continue;
462   - CLEAR_MNT_MARK(child);
  451 + if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child))
  452 + continue;
  453 + /* Found a mounted child */
  454 + goto children;
  455 + }
463 456  
464   - /* If there is exactly one mount covering all of child
465   - * replace child with that mount.
466   - */
467   - topper = find_topper(child);
468   - if (topper)
469   - list_add_tail(&topper->mnt_reparent, to_reparent);
  457 + /* Mark mounts that can be unmounted if not locked */
  458 + SET_MNT_MARK(mnt);
  459 + progress = true;
470 460  
471   - if (topper || list_empty(&child->mnt_mounts)) {
472   - list_del_init(&child->mnt_child);
473   - list_del_init(&child->mnt_reparent);
474   - child->mnt.mnt_flags |= MNT_UMOUNT;
475   - list_move_tail(&child->mnt_list, &mnt->mnt_list);
  461 + /* If a mount is without children and not locked umount it. */
  462 + if (!IS_MNT_LOCKED(mnt)) {
  463 + umount_one(mnt, to_umount);
  464 + } else {
  465 +children:
  466 + list_move_tail(&mnt->mnt_umounting, to_restore);
  467 + }
  468 +out:
  469 + return progress;
  470 +}
  471 +
  472 +static void umount_list(struct list_head *to_umount,
  473 + struct list_head *to_restore)
  474 +{
  475 + struct mount *mnt, *child, *tmp;
  476 + list_for_each_entry(mnt, to_umount, mnt_list) {
  477 + list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
  478 + /* topper? */
  479 + if (child->mnt_mountpoint == mnt->mnt.mnt_root)
  480 + list_move_tail(&child->mnt_umounting, to_restore);
  481 + else
  482 + umount_one(child, to_umount);
476 483 }
477 484 }
478 485 }
479 486  
480   -static void reparent_mounts(struct list_head *to_reparent)
  487 +static void restore_mounts(struct list_head *to_restore)
481 488 {
482   - while (!list_empty(to_reparent)) {
  489 + /* Restore mounts to a clean working state */
  490 + while (!list_empty(to_restore)) {
483 491 struct mount *mnt, *parent;
484 492 struct mountpoint *mp;
485 493  
486   - mnt = list_first_entry(to_reparent, struct mount, mnt_reparent);
487   - list_del_init(&mnt->mnt_reparent);
  494 + mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
  495 + CLEAR_MNT_MARK(mnt);
  496 + list_del_init(&mnt->mnt_umounting);
488 497  
489   - /* Where should this mount be reparented to? */
  498 + /* Should this mount be reparented? */
490 499 mp = mnt->mnt_mp;
491 500 parent = mnt->mnt_parent;
492 501 while (parent->mnt.mnt_flags & MNT_UMOUNT) {
493 502 mp = parent->mnt_mp;
494 503 parent = parent->mnt_parent;
495 504 }
496   -
497   - mnt_change_mountpoint(parent, mp, mnt);
  505 + if (parent != mnt->mnt_parent)
  506 + mnt_change_mountpoint(parent, mp, mnt);
498 507 }
499 508 }
500 509  
501 510  
502 511  
503 512  
... ... @@ -508,15 +517,34 @@
508 517 int propagate_umount(struct list_head *list)
509 518 {
510 519 struct mount *mnt;
511   - LIST_HEAD(to_reparent);
  520 + LIST_HEAD(to_restore);
  521 + LIST_HEAD(to_umount);
512 522  
513   - list_for_each_entry_reverse(mnt, list, mnt_list)
514   - mark_umount_candidates(mnt);
  523 + list_for_each_entry(mnt, list, mnt_list) {
  524 + struct mount *parent = mnt->mnt_parent;
  525 + struct mount *m;
515 526  
516   - list_for_each_entry(mnt, list, mnt_list)
517   - __propagate_umount(mnt, &to_reparent);
  527 + for (m = propagation_next(parent, parent); m;
  528 + m = propagation_next(m, parent)) {
  529 + struct mount *child = __lookup_mnt(&m->mnt,
  530 + mnt->mnt_mountpoint);
  531 + if (!child)
  532 + continue;
518 533  
519   - reparent_mounts(&to_reparent);
  534 + /* Check the child and parents while progress is made */
  535 + while (__propagate_umount(child,
  536 + &to_umount, &to_restore)) {
  537 + /* Is the parent a umount candidate? */
  538 + child = child->mnt_parent;
  539 + if (list_empty(&child->mnt_umounting))
  540 + break;
  541 + }
  542 + }
  543 + }
  544 +
  545 + umount_list(&to_umount, &to_restore);
  546 + restore_mounts(&to_restore);
  547 + list_splice_tail(&to_umount, list);
520 548  
521 549 return 0;
522 550 }