ext3: Replace lock/unlock_super() with an explicit lock for resizing

Use a separate lock to protect s_groups_count and the other block group descriptors which get changed via an on-line resize operation, so we can stop overloading the use of lock_super(). Port of ext4 commit 32ed5058ce90024efcd811254b4b1de0468099df by Theodore Ts'o <tytso@mit.edu>. CC: Theodore Ts'o <tytso@mit.edu> Signed-off-by: Eric Sandeen <sandeen@redhat.com> Signed-off-by: Jan Kara <jack@suse.cz>

ext3: Replace lock/unlock_super() with an explicit lock for resizing
Use a separate lock to protect s_groups_count and the other block group descriptors which get changed via an on-line resize operation, so we can stop overloading the use of lock_super(). Port of ext4 commit 32ed5058ce90024efcd811254b4b1de0468099df by Theodore Ts'o <tytso@mit.edu>. CC: Theodore Ts'o <tytso@mit.edu> Signed-off-by: Eric Sandeen <sandeen@redhat.com> Signed-off-by: Jan Kara <jack@suse.cz>
Eric Sandeen · Jan Kara
1 parent b8a052d016
Showing 3 changed files with 20 additions and 17 deletions Side-by-side Diff
fs/ext3/resize.c
fs/ext3/super.c
include/linux/ext3_fs_sb.h
@@ -209,7 +209,7 @@
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
  
-	lock_super(sb);
+	mutex_lock(&sbi->s_resize_lock);
 	if (input->group != sbi->s_groups_count) {
 		err = -EBUSY;
 		goto exit_journal;
@@ -324,7 +324,7 @@
 	brelse(bh);
  
 exit_journal:
-	unlock_super(sb);
+	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext3_journal_stop(handle)) && !err)
 		err = err2;
  
@@ -662,11 +662,12 @@
  * important part is that the new block and inode counts are in the backup
  * superblocks, and the location of the new group metadata in the GDT backups.
  *
- * We do not need lock_super() for this, because these blocks are not
- * otherwise touched by the filesystem code when it is mounted.  We don't
- * need to worry about last changing from sbi->s_groups_count, because the
- * worst that can happen is that we do not copy the full number of backups
- * at this time.  The resize which changed s_groups_count will backup again.
+ * We do not need take the s_resize_lock for this, because these
+ * blocks are not otherwise touched by the filesystem code when it is
+ * mounted.  We don't need to worry about last changing from
+ * sbi->s_groups_count, because the worst that can happen is that we
+ * do not copy the full number of backups at this time.  The resize
+ * which changed s_groups_count will backup again.
  */
 static void update_backups(struct super_block *sb,
 			   int blk_off, char *data, int size)
@@ -825,7 +826,7 @@
 		goto exit_put;
 	}
  
-	lock_super(sb);
+	mutex_lock(&sbi->s_resize_lock);
 	if (input->group != sbi->s_groups_count) {
 		ext3_warning(sb, __func__,
 			     "multiple resizers run on filesystem!");
@@ -856,7 +857,7 @@
 	/*
 	 * OK, now we've set up the new group.  Time to make it active.
 	 *
-	 * Current kernels don't lock all allocations via lock_super(),
+	 * We do not lock all allocations via s_resize_lock
 	 * so we have to be safe wrt. concurrent accesses the group
 	 * data.  So we need to be careful to set all of the relevant
 	 * group descriptor data etc. *before* we enable the group.
  
@@ -900,12 +901,12 @@
 	 *
 	 * The precise rules we use are:
 	 *
-	 * * Writers of s_groups_count *must* hold lock_super
+	 * * Writers of s_groups_count *must* hold s_resize_lock
 	 * AND
 	 * * Writers must perform a smp_wmb() after updating all dependent
 	 *   data and before modifying the groups count
 	 *
-	 * * Readers must hold lock_super() over the access
+	 * * Readers must hold s_resize_lock over the access
 	 * OR
 	 * * Readers must perform an smp_rmb() after reading the groups count
 	 *   and before reading any dependent data.
@@ -936,7 +937,7 @@
 	ext3_journal_dirty_metadata(handle, sbi->s_sbh);
  
 exit_journal:
-	unlock_super(sb);
+	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext3_journal_stop(handle)) && !err)
 		err = err2;
 	if (!err) {
@@ -973,7 +974,7 @@
  
 	/* We don't need to worry about locking wrt other resizers just
 	 * yet: we're going to revalidate es->s_blocks_count after
-	 * taking lock_super() below. */
+	 * taking the s_resize_lock below. */
 	o_blocks_count = le32_to_cpu(es->s_blocks_count);
 	o_groups_count = EXT3_SB(sb)->s_groups_count;
  
  
@@ -1045,11 +1046,11 @@
 		goto exit_put;
 	}
  
-	lock_super(sb);
+	mutex_lock(&EXT3_SB(sb)->s_resize_lock);
 	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
 		ext3_warning(sb, __func__,
 			     "multiple resizers run on filesystem!");
-		unlock_super(sb);
+		mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
 		ext3_journal_stop(handle);
 		err = -EBUSY;
 		goto exit_put;
  
@@ -1059,13 +1060,13 @@
 						 EXT3_SB(sb)->s_sbh))) {
 		ext3_warning(sb, __func__,
 			     "error %d on journal write access", err);
-		unlock_super(sb);
+		mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
 		ext3_journal_stop(handle);
 		goto exit_put;
 	}
 	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
 	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
-	unlock_super(sb);
+	mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
 	ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
 		   o_blocks_count + add);
 	ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
@@ -1929,6 +1929,7 @@
 #endif
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
 	mutex_init(&sbi->s_orphan_lock);
+	mutex_init(&sbi->s_resize_lock);
  
 	sb->s_root = NULL;
  
@@ -73,6 +73,7 @@
 	struct journal_s * s_journal;
 	struct list_head s_orphan;
 	struct mutex s_orphan_lock;
+	struct mutex s_resize_lock;
 	unsigned long s_commit_interval;
 	struct block_device *journal_bdev;
 #ifdef CONFIG_JBD_DEBUG
...	...	@@ -209,7 +209,7 @@
209	209	if (IS_ERR(handle))
210	210	return PTR_ERR(handle);
211	211
212		- lock_super(sb);
	212	+ mutex_lock(&sbi->s_resize_lock);
213	213	if (input->group != sbi->s_groups_count) {
214	214	err = -EBUSY;
215	215	goto exit_journal;
...	...	@@ -324,7 +324,7 @@
324	324	brelse(bh);
325	325
326	326	exit_journal:
327		- unlock_super(sb);
	327	+ mutex_unlock(&sbi->s_resize_lock);
328	328	if ((err2 = ext3_journal_stop(handle)) && !err)
329	329	err = err2;
330	330
...	...	@@ -662,11 +662,12 @@
662	662	* important part is that the new block and inode counts are in the backup
663	663	* superblocks, and the location of the new group metadata in the GDT backups.
664	664	*
665		- * We do not need lock_super() for this, because these blocks are not
666		- * otherwise touched by the filesystem code when it is mounted. We don't
667		- * need to worry about last changing from sbi->s_groups_count, because the
668		- * worst that can happen is that we do not copy the full number of backups
669		- * at this time. The resize which changed s_groups_count will backup again.
	665	+ * We do not need take the s_resize_lock for this, because these
	666	+ * blocks are not otherwise touched by the filesystem code when it is
	667	+ * mounted. We don't need to worry about last changing from
	668	+ * sbi->s_groups_count, because the worst that can happen is that we
	669	+ * do not copy the full number of backups at this time. The resize
	670	+ * which changed s_groups_count will backup again.
670	671	*/
671	672	static void update_backups(struct super_block *sb,
672	673	int blk_off, char *data, int size)
...	...	@@ -825,7 +826,7 @@
825	826	goto exit_put;
826	827	}
827	828
828		- lock_super(sb);
	829	+ mutex_lock(&sbi->s_resize_lock);
829	830	if (input->group != sbi->s_groups_count) {
830	831	ext3_warning(sb, __func__,
831	832	"multiple resizers run on filesystem!");
...	...	@@ -856,7 +857,7 @@
856	857	/*
857	858	* OK, now we've set up the new group. Time to make it active.
858	859	*
859		- * Current kernels don't lock all allocations via lock_super(),
	860	+ * We do not lock all allocations via s_resize_lock
860	861	* so we have to be safe wrt. concurrent accesses the group
861	862	* data. So we need to be careful to set all of the relevant
862	863	* group descriptor data etc. before we enable the group.
863	864
...	...	@@ -900,12 +901,12 @@
900	901	*
901	902	* The precise rules we use are:
902	903	*
903		- * * Writers of s_groups_count must hold lock_super
	904	+ * * Writers of s_groups_count must hold s_resize_lock
904	905	* AND
905	906	* * Writers must perform a smp_wmb() after updating all dependent
906	907	* data and before modifying the groups count
907	908	*
908		- * * Readers must hold lock_super() over the access
	909	+ * * Readers must hold s_resize_lock over the access
909	910	* OR
910	911	* * Readers must perform an smp_rmb() after reading the groups count
911	912	* and before reading any dependent data.
...	...	@@ -936,7 +937,7 @@
936	937	ext3_journal_dirty_metadata(handle, sbi->s_sbh);
937	938
938	939	exit_journal:
939		- unlock_super(sb);
	940	+ mutex_unlock(&sbi->s_resize_lock);
940	941	if ((err2 = ext3_journal_stop(handle)) && !err)
941	942	err = err2;
942	943	if (!err) {
...	...	@@ -973,7 +974,7 @@
973	974
974	975	/* We don't need to worry about locking wrt other resizers just
975	976	* yet: we're going to revalidate es->s_blocks_count after
976		- * taking lock_super() below. */
	977	+ * taking the s_resize_lock below. */
977	978	o_blocks_count = le32_to_cpu(es->s_blocks_count);
978	979	o_groups_count = EXT3_SB(sb)->s_groups_count;
979	980
980	981
...	...	@@ -1045,11 +1046,11 @@
1045	1046	goto exit_put;
1046	1047	}
1047	1048
1048		- lock_super(sb);
	1049	+ mutex_lock(&EXT3_SB(sb)->s_resize_lock);
1049	1050	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
1050	1051	ext3_warning(sb, __func__,
1051	1052	"multiple resizers run on filesystem!");
1052		- unlock_super(sb);
	1053	+ mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1053	1054	ext3_journal_stop(handle);
1054	1055	err = -EBUSY;
1055	1056	goto exit_put;
1056	1057
...	...	@@ -1059,13 +1060,13 @@
1059	1060	EXT3_SB(sb)->s_sbh))) {
1060	1061	ext3_warning(sb, __func__,
1061	1062	"error %d on journal write access", err);
1062		- unlock_super(sb);
	1063	+ mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1063	1064	ext3_journal_stop(handle);
1064	1065	goto exit_put;
1065	1066	}
1066	1067	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
1067	1068	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
1068		- unlock_super(sb);
	1069	+ mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1069	1070	ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
1070	1071	o_blocks_count + add);
1071	1072	ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
...	...	@@ -1929,6 +1929,7 @@
1929	1929	#endif
1930	1930	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1931	1931	mutex_init(&sbi->s_orphan_lock);
	1932	+ mutex_init(&sbi->s_resize_lock);
1932	1933
1933	1934	sb->s_root = NULL;
1934	1935
...	...	@@ -73,6 +73,7 @@
73	73	struct journal_s * s_journal;
74	74	struct list_head s_orphan;
75	75	struct mutex s_orphan_lock;
	76	+ struct mutex s_resize_lock;
76	77	unsigned long s_commit_interval;
77	78	struct block_device *journal_bdev;
78	79	#ifdef CONFIG_JBD_DEBUG