Fix congestion_wait() sync/async vs read/write confusion

Commit 1faa16d22877f4839bd433547d770c676d1d964c accidentally broke the bdi congestion wait queue logic, causing us to wait on congestion for WRITE (== 1) when we really wanted BLK_RW_ASYNC (== 0) instead. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

Fix congestion_wait() sync/async vs read/write confusion
Commit 1faa16d22877f4839bd433547d770c676d1d964c accidentally broke the bdi congestion wait queue logic, causing us to wait on congestion for WRITE (== 1) when we really wanted BLK_RW_ASYNC (== 0) instead. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Jens Axboe · Jens Axboe
1 parent c2cc49a2f8
Showing 16 changed files with 43 additions and 40 deletions Side-by-side Diff
arch/x86/lib/usercopy_32.c
drivers/block/pktcdvd.c
drivers/md/dm-crypt.c
fs/fat/file.c
fs/fuse/dev.c
fs/nfs/write.c
fs/reiserfs/journal.c
fs/xfs/linux-2.6/kmem.c
fs/xfs/linux-2.6/xfs_buf.c
include/linux/backing-dev.h
include/linux/blkdev.h
mm/backing-dev.c
mm/memcontrol.c
mm/page-writeback.c
mm/page_alloc.c
mm/vmscan.c
@@ -751,7 +751,7 @@
  
 			if (retval == -ENOMEM && is_global_init(current)) {
 				up_read(&current->mm->mmap_sem);
-				congestion_wait(WRITE, HZ/50);
+				congestion_wait(BLK_RW_ASYNC, HZ/50);
 				goto survive;
 			}
  
@@ -1372,8 +1372,10 @@
 	wakeup = (pd->write_congestion_on > 0
 	 		&& pd->bio_queue_size <= pd->write_congestion_off);
 	spin_unlock(&pd->lock);
-	if (wakeup)
-		clear_bdi_congested(&pd->disk->queue->backing_dev_info, WRITE);
+	if (wakeup) {
+		clear_bdi_congested(&pd->disk->queue->backing_dev_info,
+					BLK_RW_ASYNC);
+	}
  
 	pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
 	pkt_set_state(pkt, PACKET_WAITING_STATE);
  
@@ -2592,10 +2594,10 @@
 	spin_lock(&pd->lock);
 	if (pd->write_congestion_on > 0
 	    && pd->bio_queue_size >= pd->write_congestion_on) {
-		set_bdi_congested(&q->backing_dev_info, WRITE);
+		set_bdi_congested(&q->backing_dev_info, BLK_RW_ASYNC);
 		do {
 			spin_unlock(&pd->lock);
-			congestion_wait(WRITE, HZ);
+			congestion_wait(BLK_RW_ASYNC, HZ);
 			spin_lock(&pd->lock);
 		} while(pd->bio_queue_size > pd->write_congestion_off);
 	}
@@ -776,7 +776,7 @@
 		 * But don't wait if split was due to the io size restriction
 		 */
 		if (unlikely(out_of_pages))
-			congestion_wait(WRITE, HZ/100);
+			congestion_wait(BLK_RW_ASYNC, HZ/100);
  
 		/*
 		 * With async crypto it is unsafe to share the crypto context
@@ -134,7 +134,7 @@
 	if ((filp->f_mode & FMODE_WRITE) &&
 	     MSDOS_SB(inode->i_sb)->options.flush) {
 		fat_flush_inodes(inode->i_sb, inode, NULL);
-		congestion_wait(WRITE, HZ/10);
+		congestion_wait(BLK_RW_ASYNC, HZ/10);
 	}
 	return 0;
 }
@@ -286,8 +286,8 @@
 		}
 		if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
 		    fc->connected && fc->bdi_initialized) {
-			clear_bdi_congested(&fc->bdi, READ);
-			clear_bdi_congested(&fc->bdi, WRITE);
+			clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
+			clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 		}
 		fc->num_background--;
 		fc->active_background--;
@@ -414,8 +414,8 @@
 		fc->blocked = 1;
 	if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
 	    fc->bdi_initialized) {
-		set_bdi_congested(&fc->bdi, READ);
-		set_bdi_congested(&fc->bdi, WRITE);
+		set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
+		set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 	}
 	list_add_tail(&req->list, &fc->bg_queue);
 	flush_bg_queue(fc);
@@ -202,8 +202,10 @@
 		struct nfs_server *nfss = NFS_SERVER(inode);
  
 		if (atomic_long_inc_return(&nfss->writeback) >
-				NFS_CONGESTION_ON_THRESH)
-			set_bdi_congested(&nfss->backing_dev_info, WRITE);
+				NFS_CONGESTION_ON_THRESH) {
+			set_bdi_congested(&nfss->backing_dev_info,
+						BLK_RW_ASYNC);
+		}
 	}
 	return ret;
 }
@@ -215,7 +217,7 @@
  
 	end_page_writeback(page);
 	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
-		clear_bdi_congested(&nfss->backing_dev_info, WRITE);
+		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
 }
  
 /*
@@ -997,7 +997,7 @@
 	DEFINE_WAIT(wait);
 	struct reiserfs_journal *j = SB_JOURNAL(s);
 	if (atomic_read(&j->j_async_throttle))
-		congestion_wait(WRITE, HZ / 10);
+		congestion_wait(BLK_RW_ASYNC, HZ / 10);
 	return 0;
 }
  
@@ -53,7 +53,7 @@
 			printk(KERN_ERR "XFS: possible memory allocation "
 					"deadlock in %s (mode:0x%x)\n",
 					__func__, lflags);
-		congestion_wait(WRITE, HZ/50);
+		congestion_wait(BLK_RW_ASYNC, HZ/50);
 	} while (1);
 }
  
@@ -130,7 +130,7 @@
 			printk(KERN_ERR "XFS: possible memory allocation "
 					"deadlock in %s (mode:0x%x)\n",
 					__func__, lflags);
-		congestion_wait(WRITE, HZ/50);
+		congestion_wait(BLK_RW_ASYNC, HZ/50);
 	} while (1);
 }
  
@@ -412,7 +412,7 @@
  
 			XFS_STATS_INC(xb_page_retries);
 			xfsbufd_wakeup(0, gfp_mask);
-			congestion_wait(WRITE, HZ/50);
+			congestion_wait(BLK_RW_ASYNC, HZ/50);
 			goto retry;
 		}
  
@@ -229,9 +229,9 @@
 				  (1 << BDI_async_congested));
 }
  
-void clear_bdi_congested(struct backing_dev_info *bdi, int rw);
-void set_bdi_congested(struct backing_dev_info *bdi, int rw);
-long congestion_wait(int rw, long timeout);
+void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
+void set_bdi_congested(struct backing_dev_info *bdi, int sync);
+long congestion_wait(int sync, long timeout);
  
  
 static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
@@ -779,18 +779,18 @@
  * congested queues, and wake up anyone who was waiting for requests to be
  * put back.
  */
-static inline void blk_clear_queue_congested(struct request_queue *q, int rw)
+static inline void blk_clear_queue_congested(struct request_queue *q, int sync)
 {
-	clear_bdi_congested(&q->backing_dev_info, rw);
+	clear_bdi_congested(&q->backing_dev_info, sync);
 }
  
 /*
  * A queue has just entered congestion.  Flag that in the queue's VM-visible
  * state flags and increment the global gounter of congested queues.
  */
-static inline void blk_set_queue_congested(struct request_queue *q, int rw)
+static inline void blk_set_queue_congested(struct request_queue *q, int sync)
 {
-	set_bdi_congested(&q->backing_dev_info, rw);
+	set_bdi_congested(&q->backing_dev_info, sync);
 }
  
 extern void blk_start_queue(struct request_queue *q);
@@ -283,7 +283,6 @@
 		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
 	};
  
-
 void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
 {
 	enum bdi_state bit;
  
  
@@ -308,18 +307,18 @@
  
 /**
  * congestion_wait - wait for a backing_dev to become uncongested
- * @rw: READ or WRITE
+ * @sync: SYNC or ASYNC IO
  * @timeout: timeout in jiffies
  *
  * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
  * write congestion.  If no backing_devs are congested then just wait for the
  * next write to be completed.
  */
-long congestion_wait(int rw, long timeout)
+long congestion_wait(int sync, long timeout)
 {
 	long ret;
 	DEFINE_WAIT(wait);
-	wait_queue_head_t *wqh = &congestion_wqh[rw];
+	wait_queue_head_t *wqh = &congestion_wqh[sync];
  
 	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
 	ret = io_schedule_timeout(timeout);
@@ -1973,7 +1973,7 @@
 		if (!progress) {
 			nr_retries--;
 			/* maybe some writeback is necessary */
-			congestion_wait(WRITE, HZ/10);
+			congestion_wait(BLK_RW_ASYNC, HZ/10);
 		}
  
 	}
@@ -575,7 +575,7 @@
 		if (pages_written >= write_chunk)
 			break;		/* We've done our duty */
  
-		congestion_wait(WRITE, HZ/10);
+		congestion_wait(BLK_RW_ASYNC, HZ/10);
 	}
  
 	if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
@@ -669,7 +669,7 @@
                 if (global_page_state(NR_UNSTABLE_NFS) +
 			global_page_state(NR_WRITEBACK) <= dirty_thresh)
                         	break;
-                congestion_wait(WRITE, HZ/10);
+                congestion_wait(BLK_RW_ASYNC, HZ/10);
  
 		/*
 		 * The caller might hold locks which can prevent IO completion
@@ -715,7 +715,7 @@
 		if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
 			/* Wrote less than expected */
 			if (wbc.encountered_congestion || wbc.more_io)
-				congestion_wait(WRITE, HZ/10);
+				congestion_wait(BLK_RW_ASYNC, HZ/10);
 			else
 				break;
 		}
@@ -787,7 +787,7 @@
 		writeback_inodes(&wbc);
 		if (wbc.nr_to_write > 0) {
 			if (wbc.encountered_congestion || wbc.more_io)
-				congestion_wait(WRITE, HZ/10);
+				congestion_wait(BLK_RW_ASYNC, HZ/10);
 			else
 				break;	/* All the old data is written */
 		}
@@ -1666,7 +1666,7 @@
 			preferred_zone, migratetype);
  
 		if (!page && gfp_mask & __GFP_NOFAIL)
-			congestion_wait(WRITE, HZ/50);
+			congestion_wait(BLK_RW_ASYNC, HZ/50);
 	} while (!page && (gfp_mask & __GFP_NOFAIL));
  
 	return page;
@@ -1831,7 +1831,7 @@
 	pages_reclaimed += did_some_progress;
 	if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
 		/* Wait for some write requests to complete then retry */
-		congestion_wait(WRITE, HZ/50);
+		congestion_wait(BLK_RW_ASYNC, HZ/50);
 		goto rebalance;
 	}
  
@@ -1104,7 +1104,7 @@
 		 */
 		if (nr_freed < nr_taken && !current_is_kswapd() &&
 		    lumpy_reclaim) {
-			congestion_wait(WRITE, HZ/10);
+			congestion_wait(BLK_RW_ASYNC, HZ/10);
  
 			/*
 			 * The attempt at page out may have made some
@@ -1721,7 +1721,7 @@
  
 		/* Take a nap, wait for some writeback to complete */
 		if (sc->nr_scanned && priority < DEF_PRIORITY - 2)
-			congestion_wait(WRITE, HZ/10);
+			congestion_wait(BLK_RW_ASYNC, HZ/10);
 	}
 	/* top priority shrink_zones still had more to do? don't OOM, then */
 	if (!sc->all_unreclaimable && scanning_global_lru(sc))
@@ -1960,7 +1960,7 @@
 		 * another pass across the zones.
 		 */
 		if (total_scanned && priority < DEF_PRIORITY - 2)
-			congestion_wait(WRITE, HZ/10);
+			congestion_wait(BLK_RW_ASYNC, HZ/10);
  
 		/*
 		 * We do this so kswapd doesn't build up large priorities for
@@ -2233,7 +2233,7 @@
 				goto out;
  
 			if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
-				congestion_wait(WRITE, HZ / 10);
+				congestion_wait(BLK_RW_ASYNC, HZ / 10);
 		}
 	}
...	...	@@ -751,7 +751,7 @@
751	751
752	752	if (retval == -ENOMEM && is_global_init(current)) {
753	753	up_read(&current->mm->mmap_sem);
754		- congestion_wait(WRITE, HZ/50);
	754	+ congestion_wait(BLK_RW_ASYNC, HZ/50);
755	755	goto survive;
756	756	}
757	757
...	...	@@ -1372,8 +1372,10 @@
1372	1372	wakeup = (pd->write_congestion_on > 0
1373	1373	&& pd->bio_queue_size <= pd->write_congestion_off);
1374	1374	spin_unlock(&pd->lock);
1375		- if (wakeup)
1376		- clear_bdi_congested(&pd->disk->queue->backing_dev_info, WRITE);
	1375	+ if (wakeup) {
	1376	+ clear_bdi_congested(&pd->disk->queue->backing_dev_info,
	1377	+ BLK_RW_ASYNC);
	1378	+ }
1377	1379
1378	1380	pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
1379	1381	pkt_set_state(pkt, PACKET_WAITING_STATE);
1380	1382
...	...	@@ -2592,10 +2594,10 @@
2592	2594	spin_lock(&pd->lock);
2593	2595	if (pd->write_congestion_on > 0
2594	2596	&& pd->bio_queue_size >= pd->write_congestion_on) {
2595		- set_bdi_congested(&q->backing_dev_info, WRITE);
	2597	+ set_bdi_congested(&q->backing_dev_info, BLK_RW_ASYNC);
2596	2598	do {
2597	2599	spin_unlock(&pd->lock);
2598		- congestion_wait(WRITE, HZ);
	2600	+ congestion_wait(BLK_RW_ASYNC, HZ);
2599	2601	spin_lock(&pd->lock);
2600	2602	} while(pd->bio_queue_size > pd->write_congestion_off);
2601	2603	}
...	...	@@ -776,7 +776,7 @@
776	776	* But don't wait if split was due to the io size restriction
777	777	*/
778	778	if (unlikely(out_of_pages))
779		- congestion_wait(WRITE, HZ/100);
	779	+ congestion_wait(BLK_RW_ASYNC, HZ/100);
780	780
781	781	/*
782	782	* With async crypto it is unsafe to share the crypto context
...	...	@@ -134,7 +134,7 @@
134	134	if ((filp->f_mode & FMODE_WRITE) &&
135	135	MSDOS_SB(inode->i_sb)->options.flush) {
136	136	fat_flush_inodes(inode->i_sb, inode, NULL);
137		- congestion_wait(WRITE, HZ/10);
	137	+ congestion_wait(BLK_RW_ASYNC, HZ/10);
138	138	}
139	139	return 0;
140	140	}
...	...	@@ -286,8 +286,8 @@
286	286	}
287	287	if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
288	288	fc->connected && fc->bdi_initialized) {
289		- clear_bdi_congested(&fc->bdi, READ);
290		- clear_bdi_congested(&fc->bdi, WRITE);
	289	+ clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
	290	+ clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
291	291	}
292	292	fc->num_background--;
293	293	fc->active_background--;
...	...	@@ -414,8 +414,8 @@
414	414	fc->blocked = 1;
415	415	if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
416	416	fc->bdi_initialized) {
417		- set_bdi_congested(&fc->bdi, READ);
418		- set_bdi_congested(&fc->bdi, WRITE);
	417	+ set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
	418	+ set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
419	419	}
420	420	list_add_tail(&req->list, &fc->bg_queue);
421	421	flush_bg_queue(fc);
...	...	@@ -202,8 +202,10 @@
202	202	struct nfs_server *nfss = NFS_SERVER(inode);
203	203
204	204	if (atomic_long_inc_return(&nfss->writeback) >
205		- NFS_CONGESTION_ON_THRESH)
206		- set_bdi_congested(&nfss->backing_dev_info, WRITE);
	205	+ NFS_CONGESTION_ON_THRESH) {
	206	+ set_bdi_congested(&nfss->backing_dev_info,
	207	+ BLK_RW_ASYNC);
	208	+ }
207	209	}
208	210	return ret;
209	211	}
...	...	@@ -215,7 +217,7 @@
215	217
216	218	end_page_writeback(page);
217	219	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
218		- clear_bdi_congested(&nfss->backing_dev_info, WRITE);
	220	+ clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
219	221	}
220	222
221	223	/*
...	...	@@ -997,7 +997,7 @@
997	997	DEFINE_WAIT(wait);
998	998	struct reiserfs_journal *j = SB_JOURNAL(s);
999	999	if (atomic_read(&j->j_async_throttle))
1000		- congestion_wait(WRITE, HZ / 10);
	1000	+ congestion_wait(BLK_RW_ASYNC, HZ / 10);
1001	1001	return 0;
1002	1002	}
1003	1003
...	...	@@ -53,7 +53,7 @@
53	53	printk(KERN_ERR "XFS: possible memory allocation "
54	54	"deadlock in %s (mode:0x%x)\n",
55	55	__func__, lflags);
56		- congestion_wait(WRITE, HZ/50);
	56	+ congestion_wait(BLK_RW_ASYNC, HZ/50);
57	57	} while (1);
58	58	}
59	59
...	...	@@ -130,7 +130,7 @@
130	130	printk(KERN_ERR "XFS: possible memory allocation "
131	131	"deadlock in %s (mode:0x%x)\n",
132	132	__func__, lflags);
133		- congestion_wait(WRITE, HZ/50);
	133	+ congestion_wait(BLK_RW_ASYNC, HZ/50);
134	134	} while (1);
135	135	}
136	136
...	...	@@ -412,7 +412,7 @@
412	412
413	413	XFS_STATS_INC(xb_page_retries);
414	414	xfsbufd_wakeup(0, gfp_mask);
415		- congestion_wait(WRITE, HZ/50);
	415	+ congestion_wait(BLK_RW_ASYNC, HZ/50);
416	416	goto retry;
417	417	}
418	418
...	...	@@ -229,9 +229,9 @@
229	229	(1 << BDI_async_congested));
230	230	}
231	231
232		-void clear_bdi_congested(struct backing_dev_info *bdi, int rw);
233		-void set_bdi_congested(struct backing_dev_info *bdi, int rw);
234		-long congestion_wait(int rw, long timeout);
	232	+void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
	233	+void set_bdi_congested(struct backing_dev_info *bdi, int sync);
	234	+long congestion_wait(int sync, long timeout);
235	235
236	236
237	237	static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)