Commit c72e05756b900b3be24cd73a16de52bab80984c0
1 parent
2ac626955e
kill-the-bkl/reiserfs: acquire the inode mutex safely
While searching a pathname, an inode mutex can be acquired in do_lookup() which calls reiserfs_lookup() which in turn acquires the write lock. On the other side reiserfs_fill_super() can acquire the write_lock and then call reiserfs_lookup_privroot() which can acquire an inode mutex (the root of the mount point). So we theoretically risk an AB - BA lock inversion that could lead to a deadlock. As for other lock dependencies found since the bkl to mutex conversion, the fix is to use reiserfs_mutex_lock_safe() which drops the lock dependency to the write lock. [ Impact: fix a possible deadlock with reiserfs ] Cc: Jeff Mahoney <jeffm@suse.com> Cc: Chris Mason <chris.mason@oracle.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Alexander Beregalov <a.beregalov@gmail.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Showing 3 changed files with 37 additions and 36 deletions Side-by-side Diff
fs/reiserfs/journal.c
... | ... | @@ -537,40 +537,6 @@ |
537 | 537 | journal_hash(table, cn->sb, cn->blocknr) = cn; |
538 | 538 | } |
539 | 539 | |
540 | -/* | |
541 | - * Several mutexes depend on the write lock. | |
542 | - * However sometimes we want to relax the write lock while we hold | |
543 | - * these mutexes, according to the release/reacquire on schedule() | |
544 | - * properties of the Bkl that were used. | |
545 | - * Reiserfs performances and locking were based on this scheme. | |
546 | - * Now that the write lock is a mutex and not the bkl anymore, doing so | |
547 | - * may result in a deadlock: | |
548 | - * | |
549 | - * A acquire write_lock | |
550 | - * A acquire j_commit_mutex | |
551 | - * A release write_lock and wait for something | |
552 | - * B acquire write_lock | |
553 | - * B can't acquire j_commit_mutex and sleep | |
554 | - * A can't acquire write lock anymore | |
555 | - * deadlock | |
556 | - * | |
557 | - * What we do here is avoiding such deadlock by playing the same game | |
558 | - * than the Bkl: if we can't acquire a mutex that depends on the write lock, | |
559 | - * we release the write lock, wait a bit and then retry. | |
560 | - * | |
561 | - * The mutexes concerned by this hack are: | |
562 | - * - The commit mutex of a journal list | |
563 | - * - The flush mutex | |
564 | - * - The journal lock | |
565 | - */ | |
566 | -static inline void reiserfs_mutex_lock_safe(struct mutex *m, | |
567 | - struct super_block *s) | |
568 | -{ | |
569 | - reiserfs_write_unlock(s); | |
570 | - mutex_lock(m); | |
571 | - reiserfs_write_lock(s); | |
572 | -} | |
573 | - | |
574 | 540 | /* lock the current transaction */ |
575 | 541 | static inline void lock_journal(struct super_block *sb) |
576 | 542 | { |
fs/reiserfs/xattr.c
... | ... | @@ -975,7 +975,7 @@ |
975 | 975 | int err = 0; |
976 | 976 | |
977 | 977 | /* If we don't have the privroot located yet - go find it */ |
978 | - mutex_lock(&s->s_root->d_inode->i_mutex); | |
978 | + reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s); | |
979 | 979 | dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, |
980 | 980 | strlen(PRIVROOT_NAME)); |
981 | 981 | if (!IS_ERR(dentry)) { |
... | ... | @@ -1011,7 +1011,7 @@ |
1011 | 1011 | |
1012 | 1012 | if (privroot->d_inode) { |
1013 | 1013 | s->s_xattr = reiserfs_xattr_handlers; |
1014 | - mutex_lock(&privroot->d_inode->i_mutex); | |
1014 | + reiserfs_mutex_lock_safe(&privroot->d_inode->i_mutex, s); | |
1015 | 1015 | if (!REISERFS_SB(s)->xattr_root) { |
1016 | 1016 | struct dentry *dentry; |
1017 | 1017 | dentry = lookup_one_len(XAROOT_NAME, privroot, |
include/linux/reiserfs_fs.h
... | ... | @@ -63,6 +63,41 @@ |
63 | 63 | void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); |
64 | 64 | |
65 | 65 | /* |
66 | + * Several mutexes depend on the write lock. | |
67 | + * However sometimes we want to relax the write lock while we hold | |
68 | + * these mutexes, according to the release/reacquire on schedule() | |
69 | + * properties of the Bkl that were used. | |
70 | + * Reiserfs performances and locking were based on this scheme. | |
71 | + * Now that the write lock is a mutex and not the bkl anymore, doing so | |
72 | + * may result in a deadlock: | |
73 | + * | |
74 | + * A acquire write_lock | |
75 | + * A acquire j_commit_mutex | |
76 | + * A release write_lock and wait for something | |
77 | + * B acquire write_lock | |
78 | + * B can't acquire j_commit_mutex and sleep | |
79 | + * A can't acquire write lock anymore | |
80 | + * deadlock | |
81 | + * | |
82 | + * What we do here is avoiding such deadlock by playing the same game | |
83 | + * than the Bkl: if we can't acquire a mutex that depends on the write lock, | |
84 | + * we release the write lock, wait a bit and then retry. | |
85 | + * | |
86 | + * The mutexes concerned by this hack are: | |
87 | + * - The commit mutex of a journal list | |
88 | + * - The flush mutex | |
89 | + * - The journal lock | |
90 | + * - The inode mutex | |
91 | + */ | |
92 | +static inline void reiserfs_mutex_lock_safe(struct mutex *m, | |
93 | + struct super_block *s) | |
94 | +{ | |
95 | + reiserfs_write_unlock(s); | |
96 | + mutex_lock(m); | |
97 | + reiserfs_write_lock(s); | |
98 | +} | |
99 | + | |
100 | +/* | |
66 | 101 | * When we schedule, we usually want to also release the write lock, |
67 | 102 | * according to the previous bkl based locking scheme of reiserfs. |
68 | 103 | */ |