kill-the-bkl/reiserfs: acquire the inode mutex safely

While searching a pathname, an inode mutex can be acquired in do_lookup() which calls reiserfs_lookup() which in turn acquires the write lock. On the other side reiserfs_fill_super() can acquire the write_lock and then call reiserfs_lookup_privroot() which can acquire an inode mutex (the root of the mount point). So we theoretically risk an AB - BA lock inversion that could lead to a deadlock. As for other lock dependencies found since the bkl to mutex conversion, the fix is to use reiserfs_mutex_lock_safe() which drops the lock dependency to the write lock. [ Impact: fix a possible deadlock with reiserfs ] Cc: Jeff Mahoney <jeffm@suse.com> Cc: Chris Mason <chris.mason@oracle.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Alexander Beregalov <a.beregalov@gmail.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

kill-the-bkl/reiserfs: acquire the inode mutex safely
While searching a pathname, an inode mutex can be acquired in do_lookup() which calls reiserfs_lookup() which in turn acquires the write lock. On the other side reiserfs_fill_super() can acquire the write_lock and then call reiserfs_lookup_privroot() which can acquire an inode mutex (the root of the mount point). So we theoretically risk an AB - BA lock inversion that could lead to a deadlock. As for other lock dependencies found since the bkl to mutex conversion, the fix is to use reiserfs_mutex_lock_safe() which drops the lock dependency to the write lock. [ Impact: fix a possible deadlock with reiserfs ] Cc: Jeff Mahoney <jeffm@suse.com> Cc: Chris Mason <chris.mason@oracle.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Alexander Beregalov <a.beregalov@gmail.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Frederic Weisbecker
1 parent 2ac626955e
Showing 3 changed files with 37 additions and 36 deletions Side-by-side Diff
fs/reiserfs/journal.c
fs/reiserfs/xattr.c
include/linux/reiserfs_fs.h
@@ -537,40 +537,6 @@
 	journal_hash(table, cn->sb, cn->blocknr) = cn;
 }
  
-/*
- * Several mutexes depend on the write lock.
- * However sometimes we want to relax the write lock while we hold
- * these mutexes, according to the release/reacquire on schedule()
- * properties of the Bkl that were used.
- * Reiserfs performances and locking were based on this scheme.
- * Now that the write lock is a mutex and not the bkl anymore, doing so
- * may result in a deadlock:
- *
- * A acquire write_lock
- * A acquire j_commit_mutex
- * A release write_lock and wait for something
- * B acquire write_lock
- * B can't acquire j_commit_mutex and sleep
- * A can't acquire write lock anymore
- * deadlock
- *
- * What we do here is avoiding such deadlock by playing the same game
- * than the Bkl: if we can't acquire a mutex that depends on the write lock,
- * we release the write lock, wait a bit and then retry.
- *
- * The mutexes concerned by this hack are:
- * - The commit mutex of a journal list
- * - The flush mutex
- * - The journal lock
- */
-static inline void reiserfs_mutex_lock_safe(struct mutex *m,
-			       struct super_block *s)
-{
-	reiserfs_write_unlock(s);
-	mutex_lock(m);
-	reiserfs_write_lock(s);
-}
-
 /* lock the current transaction */
 static inline void lock_journal(struct super_block *sb)
 {
@@ -975,7 +975,7 @@
 	int err = 0;
  
 	/* If we don't have the privroot located yet - go find it */
-	mutex_lock(&s->s_root->d_inode->i_mutex);
+	reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
 	dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
 				strlen(PRIVROOT_NAME));
 	if (!IS_ERR(dentry)) {
@@ -1011,7 +1011,7 @@
  
 	if (privroot->d_inode) {
 		s->s_xattr = reiserfs_xattr_handlers;
-		mutex_lock(&privroot->d_inode->i_mutex);
+		reiserfs_mutex_lock_safe(&privroot->d_inode->i_mutex, s);
 		if (!REISERFS_SB(s)->xattr_root) {
 			struct dentry *dentry;
 			dentry = lookup_one_len(XAROOT_NAME, privroot,
@@ -63,6 +63,41 @@
 void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
  
 /*
+ * Several mutexes depend on the write lock.
+ * However sometimes we want to relax the write lock while we hold
+ * these mutexes, according to the release/reacquire on schedule()
+ * properties of the Bkl that were used.
+ * Reiserfs performances and locking were based on this scheme.
+ * Now that the write lock is a mutex and not the bkl anymore, doing so
+ * may result in a deadlock:
+ *
+ * A acquire write_lock
+ * A acquire j_commit_mutex
+ * A release write_lock and wait for something
+ * B acquire write_lock
+ * B can't acquire j_commit_mutex and sleep
+ * A can't acquire write lock anymore
+ * deadlock
+ *
+ * What we do here is avoiding such deadlock by playing the same game
+ * than the Bkl: if we can't acquire a mutex that depends on the write lock,
+ * we release the write lock, wait a bit and then retry.
+ *
+ * The mutexes concerned by this hack are:
+ * - The commit mutex of a journal list
+ * - The flush mutex
+ * - The journal lock
+ * - The inode mutex
+ */
+static inline void reiserfs_mutex_lock_safe(struct mutex *m,
+			       struct super_block *s)
+{
+	reiserfs_write_unlock(s);
+	mutex_lock(m);
+	reiserfs_write_lock(s);
+}
+
+/*
  * When we schedule, we usually want to also release the write lock,
  * according to the previous bkl based locking scheme of reiserfs.
  */
...	...	@@ -537,40 +537,6 @@
537	537	journal_hash(table, cn->sb, cn->blocknr) = cn;
538	538	}
539	539
540		-/*
541		- * Several mutexes depend on the write lock.
542		- * However sometimes we want to relax the write lock while we hold
543		- * these mutexes, according to the release/reacquire on schedule()
544		- * properties of the Bkl that were used.
545		- * Reiserfs performances and locking were based on this scheme.
546		- * Now that the write lock is a mutex and not the bkl anymore, doing so
547		- * may result in a deadlock:
548		- *
549		- * A acquire write_lock
550		- * A acquire j_commit_mutex
551		- * A release write_lock and wait for something
552		- * B acquire write_lock
553		- * B can't acquire j_commit_mutex and sleep
554		- * A can't acquire write lock anymore
555		- * deadlock
556		- *
557		- * What we do here is avoiding such deadlock by playing the same game
558		- * than the Bkl: if we can't acquire a mutex that depends on the write lock,
559		- * we release the write lock, wait a bit and then retry.
560		- *
561		- * The mutexes concerned by this hack are:
562		- * - The commit mutex of a journal list
563		- * - The flush mutex
564		- * - The journal lock
565		- */
566		-static inline void reiserfs_mutex_lock_safe(struct mutex *m,
567		- struct super_block *s)
568		-{
569		- reiserfs_write_unlock(s);
570		- mutex_lock(m);
571		- reiserfs_write_lock(s);
572		-}
573		-
574	540	/* lock the current transaction */
575	541	static inline void lock_journal(struct super_block *sb)
576	542	{
...	...	@@ -975,7 +975,7 @@
975	975	int err = 0;
976	976
977	977	/* If we don't have the privroot located yet - go find it */
978		- mutex_lock(&s->s_root->d_inode->i_mutex);
	978	+ reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
979	979	dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
980	980	strlen(PRIVROOT_NAME));
981	981	if (!IS_ERR(dentry)) {
...	...	@@ -1011,7 +1011,7 @@
1011	1011
1012	1012	if (privroot->d_inode) {
1013	1013	s->s_xattr = reiserfs_xattr_handlers;
1014		- mutex_lock(&privroot->d_inode->i_mutex);
	1014	+ reiserfs_mutex_lock_safe(&privroot->d_inode->i_mutex, s);
1015	1015	if (!REISERFS_SB(s)->xattr_root) {
1016	1016	struct dentry *dentry;
1017	1017	dentry = lookup_one_len(XAROOT_NAME, privroot,
...	...	@@ -63,6 +63,41 @@
63	63	void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
64	64
65	65	/*
	66	+ * Several mutexes depend on the write lock.
	67	+ * However sometimes we want to relax the write lock while we hold
	68	+ * these mutexes, according to the release/reacquire on schedule()
	69	+ * properties of the Bkl that were used.
	70	+ * Reiserfs performances and locking were based on this scheme.
	71	+ * Now that the write lock is a mutex and not the bkl anymore, doing so
	72	+ * may result in a deadlock:
	73	+ *
	74	+ * A acquire write_lock
	75	+ * A acquire j_commit_mutex
	76	+ * A release write_lock and wait for something
	77	+ * B acquire write_lock
	78	+ * B can't acquire j_commit_mutex and sleep
	79	+ * A can't acquire write lock anymore
	80	+ * deadlock
	81	+ *
	82	+ * What we do here is avoiding such deadlock by playing the same game
	83	+ * than the Bkl: if we can't acquire a mutex that depends on the write lock,
	84	+ * we release the write lock, wait a bit and then retry.
	85	+ *
	86	+ * The mutexes concerned by this hack are:
	87	+ * - The commit mutex of a journal list
	88	+ * - The flush mutex
	89	+ * - The journal lock
	90	+ * - The inode mutex
	91	+ */
	92	+static inline void reiserfs_mutex_lock_safe(struct mutex *m,
	93	+ struct super_block *s)
	94	+{
	95	+ reiserfs_write_unlock(s);
	96	+ mutex_lock(m);
	97	+ reiserfs_write_lock(s);
	98	+}
	99	+
	100	+/*
66	101	* When we schedule, we usually want to also release the write lock,
67	102	* according to the previous bkl based locking scheme of reiserfs.
68	103	*/