Commit 8aa7e847d834ed937a9ad37a0f2ad5b8584c1ab0
Committed by
Jens Axboe
1 parent
c2cc49a2f8
Exists in
master
and in
39 other branches
Fix congestion_wait() sync/async vs read/write confusion
Commit 1faa16d22877f4839bd433547d770c676d1d964c accidentally broke the bdi congestion wait queue logic, causing us to wait on congestion for WRITE (== 1) when we really wanted BLK_RW_ASYNC (== 0) instead. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Showing 16 changed files with 43 additions and 40 deletions Side-by-side Diff
- arch/x86/lib/usercopy_32.c
- drivers/block/pktcdvd.c
- drivers/md/dm-crypt.c
- fs/fat/file.c
- fs/fuse/dev.c
- fs/nfs/write.c
- fs/reiserfs/journal.c
- fs/xfs/linux-2.6/kmem.c
- fs/xfs/linux-2.6/xfs_buf.c
- include/linux/backing-dev.h
- include/linux/blkdev.h
- mm/backing-dev.c
- mm/memcontrol.c
- mm/page-writeback.c
- mm/page_alloc.c
- mm/vmscan.c
arch/x86/lib/usercopy_32.c
drivers/block/pktcdvd.c
... | ... | @@ -1372,8 +1372,10 @@ |
1372 | 1372 | wakeup = (pd->write_congestion_on > 0 |
1373 | 1373 | && pd->bio_queue_size <= pd->write_congestion_off); |
1374 | 1374 | spin_unlock(&pd->lock); |
1375 | - if (wakeup) | |
1376 | - clear_bdi_congested(&pd->disk->queue->backing_dev_info, WRITE); | |
1375 | + if (wakeup) { | |
1376 | + clear_bdi_congested(&pd->disk->queue->backing_dev_info, | |
1377 | + BLK_RW_ASYNC); | |
1378 | + } | |
1377 | 1379 | |
1378 | 1380 | pkt->sleep_time = max(PACKET_WAIT_TIME, 1); |
1379 | 1381 | pkt_set_state(pkt, PACKET_WAITING_STATE); |
1380 | 1382 | |
... | ... | @@ -2592,10 +2594,10 @@ |
2592 | 2594 | spin_lock(&pd->lock); |
2593 | 2595 | if (pd->write_congestion_on > 0 |
2594 | 2596 | && pd->bio_queue_size >= pd->write_congestion_on) { |
2595 | - set_bdi_congested(&q->backing_dev_info, WRITE); | |
2597 | + set_bdi_congested(&q->backing_dev_info, BLK_RW_ASYNC); | |
2596 | 2598 | do { |
2597 | 2599 | spin_unlock(&pd->lock); |
2598 | - congestion_wait(WRITE, HZ); | |
2600 | + congestion_wait(BLK_RW_ASYNC, HZ); | |
2599 | 2601 | spin_lock(&pd->lock); |
2600 | 2602 | } while(pd->bio_queue_size > pd->write_congestion_off); |
2601 | 2603 | } |
drivers/md/dm-crypt.c
... | ... | @@ -776,7 +776,7 @@ |
776 | 776 | * But don't wait if split was due to the io size restriction |
777 | 777 | */ |
778 | 778 | if (unlikely(out_of_pages)) |
779 | - congestion_wait(WRITE, HZ/100); | |
779 | + congestion_wait(BLK_RW_ASYNC, HZ/100); | |
780 | 780 | |
781 | 781 | /* |
782 | 782 | * With async crypto it is unsafe to share the crypto context |
fs/fat/file.c
fs/fuse/dev.c
... | ... | @@ -286,8 +286,8 @@ |
286 | 286 | } |
287 | 287 | if (fc->num_background == FUSE_CONGESTION_THRESHOLD && |
288 | 288 | fc->connected && fc->bdi_initialized) { |
289 | - clear_bdi_congested(&fc->bdi, READ); | |
290 | - clear_bdi_congested(&fc->bdi, WRITE); | |
289 | + clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); | |
290 | + clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC); | |
291 | 291 | } |
292 | 292 | fc->num_background--; |
293 | 293 | fc->active_background--; |
... | ... | @@ -414,8 +414,8 @@ |
414 | 414 | fc->blocked = 1; |
415 | 415 | if (fc->num_background == FUSE_CONGESTION_THRESHOLD && |
416 | 416 | fc->bdi_initialized) { |
417 | - set_bdi_congested(&fc->bdi, READ); | |
418 | - set_bdi_congested(&fc->bdi, WRITE); | |
417 | + set_bdi_congested(&fc->bdi, BLK_RW_SYNC); | |
418 | + set_bdi_congested(&fc->bdi, BLK_RW_ASYNC); | |
419 | 419 | } |
420 | 420 | list_add_tail(&req->list, &fc->bg_queue); |
421 | 421 | flush_bg_queue(fc); |
fs/nfs/write.c
... | ... | @@ -202,8 +202,10 @@ |
202 | 202 | struct nfs_server *nfss = NFS_SERVER(inode); |
203 | 203 | |
204 | 204 | if (atomic_long_inc_return(&nfss->writeback) > |
205 | - NFS_CONGESTION_ON_THRESH) | |
206 | - set_bdi_congested(&nfss->backing_dev_info, WRITE); | |
205 | + NFS_CONGESTION_ON_THRESH) { | |
206 | + set_bdi_congested(&nfss->backing_dev_info, | |
207 | + BLK_RW_ASYNC); | |
208 | + } | |
207 | 209 | } |
208 | 210 | return ret; |
209 | 211 | } |
... | ... | @@ -215,7 +217,7 @@ |
215 | 217 | |
216 | 218 | end_page_writeback(page); |
217 | 219 | if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) |
218 | - clear_bdi_congested(&nfss->backing_dev_info, WRITE); | |
220 | + clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); | |
219 | 221 | } |
220 | 222 | |
221 | 223 | /* |
fs/reiserfs/journal.c
fs/xfs/linux-2.6/kmem.c
... | ... | @@ -53,7 +53,7 @@ |
53 | 53 | printk(KERN_ERR "XFS: possible memory allocation " |
54 | 54 | "deadlock in %s (mode:0x%x)\n", |
55 | 55 | __func__, lflags); |
56 | - congestion_wait(WRITE, HZ/50); | |
56 | + congestion_wait(BLK_RW_ASYNC, HZ/50); | |
57 | 57 | } while (1); |
58 | 58 | } |
59 | 59 | |
... | ... | @@ -130,7 +130,7 @@ |
130 | 130 | printk(KERN_ERR "XFS: possible memory allocation " |
131 | 131 | "deadlock in %s (mode:0x%x)\n", |
132 | 132 | __func__, lflags); |
133 | - congestion_wait(WRITE, HZ/50); | |
133 | + congestion_wait(BLK_RW_ASYNC, HZ/50); | |
134 | 134 | } while (1); |
135 | 135 | } |
136 | 136 |
fs/xfs/linux-2.6/xfs_buf.c
include/linux/backing-dev.h
... | ... | @@ -229,9 +229,9 @@ |
229 | 229 | (1 << BDI_async_congested)); |
230 | 230 | } |
231 | 231 | |
232 | -void clear_bdi_congested(struct backing_dev_info *bdi, int rw); | |
233 | -void set_bdi_congested(struct backing_dev_info *bdi, int rw); | |
234 | -long congestion_wait(int rw, long timeout); | |
232 | +void clear_bdi_congested(struct backing_dev_info *bdi, int sync); | |
233 | +void set_bdi_congested(struct backing_dev_info *bdi, int sync); | |
234 | +long congestion_wait(int sync, long timeout); | |
235 | 235 | |
236 | 236 | |
237 | 237 | static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) |
include/linux/blkdev.h
... | ... | @@ -779,18 +779,18 @@ |
779 | 779 | * congested queues, and wake up anyone who was waiting for requests to be |
780 | 780 | * put back. |
781 | 781 | */ |
782 | -static inline void blk_clear_queue_congested(struct request_queue *q, int rw) | |
782 | +static inline void blk_clear_queue_congested(struct request_queue *q, int sync) | |
783 | 783 | { |
784 | - clear_bdi_congested(&q->backing_dev_info, rw); | |
784 | + clear_bdi_congested(&q->backing_dev_info, sync); | |
785 | 785 | } |
786 | 786 | |
787 | 787 | /* |
788 | 788 | * A queue has just entered congestion. Flag that in the queue's VM-visible |
789 | 789 | * state flags and increment the global gounter of congested queues. |
790 | 790 | */ |
791 | -static inline void blk_set_queue_congested(struct request_queue *q, int rw) | |
791 | +static inline void blk_set_queue_congested(struct request_queue *q, int sync) | |
792 | 792 | { |
793 | - set_bdi_congested(&q->backing_dev_info, rw); | |
793 | + set_bdi_congested(&q->backing_dev_info, sync); | |
794 | 794 | } |
795 | 795 | |
796 | 796 | extern void blk_start_queue(struct request_queue *q); |
mm/backing-dev.c
... | ... | @@ -283,7 +283,6 @@ |
283 | 283 | __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) |
284 | 284 | }; |
285 | 285 | |
286 | - | |
287 | 286 | void clear_bdi_congested(struct backing_dev_info *bdi, int sync) |
288 | 287 | { |
289 | 288 | enum bdi_state bit; |
290 | 289 | |
291 | 290 | |
... | ... | @@ -308,18 +307,18 @@ |
308 | 307 | |
309 | 308 | /** |
310 | 309 | * congestion_wait - wait for a backing_dev to become uncongested |
311 | - * @rw: READ or WRITE | |
310 | + * @sync: SYNC or ASYNC IO | |
312 | 311 | * @timeout: timeout in jiffies |
313 | 312 | * |
314 | 313 | * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit |
315 | 314 | * write congestion. If no backing_devs are congested then just wait for the |
316 | 315 | * next write to be completed. |
317 | 316 | */ |
318 | -long congestion_wait(int rw, long timeout) | |
317 | +long congestion_wait(int sync, long timeout) | |
319 | 318 | { |
320 | 319 | long ret; |
321 | 320 | DEFINE_WAIT(wait); |
322 | - wait_queue_head_t *wqh = &congestion_wqh[rw]; | |
321 | + wait_queue_head_t *wqh = &congestion_wqh[sync]; | |
323 | 322 | |
324 | 323 | prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); |
325 | 324 | ret = io_schedule_timeout(timeout); |
mm/memcontrol.c
mm/page-writeback.c
... | ... | @@ -575,7 +575,7 @@ |
575 | 575 | if (pages_written >= write_chunk) |
576 | 576 | break; /* We've done our duty */ |
577 | 577 | |
578 | - congestion_wait(WRITE, HZ/10); | |
578 | + congestion_wait(BLK_RW_ASYNC, HZ/10); | |
579 | 579 | } |
580 | 580 | |
581 | 581 | if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && |
... | ... | @@ -669,7 +669,7 @@ |
669 | 669 | if (global_page_state(NR_UNSTABLE_NFS) + |
670 | 670 | global_page_state(NR_WRITEBACK) <= dirty_thresh) |
671 | 671 | break; |
672 | - congestion_wait(WRITE, HZ/10); | |
672 | + congestion_wait(BLK_RW_ASYNC, HZ/10); | |
673 | 673 | |
674 | 674 | /* |
675 | 675 | * The caller might hold locks which can prevent IO completion |
... | ... | @@ -715,7 +715,7 @@ |
715 | 715 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { |
716 | 716 | /* Wrote less than expected */ |
717 | 717 | if (wbc.encountered_congestion || wbc.more_io) |
718 | - congestion_wait(WRITE, HZ/10); | |
718 | + congestion_wait(BLK_RW_ASYNC, HZ/10); | |
719 | 719 | else |
720 | 720 | break; |
721 | 721 | } |
... | ... | @@ -787,7 +787,7 @@ |
787 | 787 | writeback_inodes(&wbc); |
788 | 788 | if (wbc.nr_to_write > 0) { |
789 | 789 | if (wbc.encountered_congestion || wbc.more_io) |
790 | - congestion_wait(WRITE, HZ/10); | |
790 | + congestion_wait(BLK_RW_ASYNC, HZ/10); | |
791 | 791 | else |
792 | 792 | break; /* All the old data is written */ |
793 | 793 | } |
mm/page_alloc.c
... | ... | @@ -1666,7 +1666,7 @@ |
1666 | 1666 | preferred_zone, migratetype); |
1667 | 1667 | |
1668 | 1668 | if (!page && gfp_mask & __GFP_NOFAIL) |
1669 | - congestion_wait(WRITE, HZ/50); | |
1669 | + congestion_wait(BLK_RW_ASYNC, HZ/50); | |
1670 | 1670 | } while (!page && (gfp_mask & __GFP_NOFAIL)); |
1671 | 1671 | |
1672 | 1672 | return page; |
... | ... | @@ -1831,7 +1831,7 @@ |
1831 | 1831 | pages_reclaimed += did_some_progress; |
1832 | 1832 | if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) { |
1833 | 1833 | /* Wait for some write requests to complete then retry */ |
1834 | - congestion_wait(WRITE, HZ/50); | |
1834 | + congestion_wait(BLK_RW_ASYNC, HZ/50); | |
1835 | 1835 | goto rebalance; |
1836 | 1836 | } |
1837 | 1837 |
mm/vmscan.c
... | ... | @@ -1104,7 +1104,7 @@ |
1104 | 1104 | */ |
1105 | 1105 | if (nr_freed < nr_taken && !current_is_kswapd() && |
1106 | 1106 | lumpy_reclaim) { |
1107 | - congestion_wait(WRITE, HZ/10); | |
1107 | + congestion_wait(BLK_RW_ASYNC, HZ/10); | |
1108 | 1108 | |
1109 | 1109 | /* |
1110 | 1110 | * The attempt at page out may have made some |
... | ... | @@ -1721,7 +1721,7 @@ |
1721 | 1721 | |
1722 | 1722 | /* Take a nap, wait for some writeback to complete */ |
1723 | 1723 | if (sc->nr_scanned && priority < DEF_PRIORITY - 2) |
1724 | - congestion_wait(WRITE, HZ/10); | |
1724 | + congestion_wait(BLK_RW_ASYNC, HZ/10); | |
1725 | 1725 | } |
1726 | 1726 | /* top priority shrink_zones still had more to do? don't OOM, then */ |
1727 | 1727 | if (!sc->all_unreclaimable && scanning_global_lru(sc)) |
... | ... | @@ -1960,7 +1960,7 @@ |
1960 | 1960 | * another pass across the zones. |
1961 | 1961 | */ |
1962 | 1962 | if (total_scanned && priority < DEF_PRIORITY - 2) |
1963 | - congestion_wait(WRITE, HZ/10); | |
1963 | + congestion_wait(BLK_RW_ASYNC, HZ/10); | |
1964 | 1964 | |
1965 | 1965 | /* |
1966 | 1966 | * We do this so kswapd doesn't build up large priorities for |
... | ... | @@ -2233,7 +2233,7 @@ |
2233 | 2233 | goto out; |
2234 | 2234 | |
2235 | 2235 | if (sc.nr_scanned && prio < DEF_PRIORITY - 2) |
2236 | - congestion_wait(WRITE, HZ / 10); | |
2236 | + congestion_wait(BLK_RW_ASYNC, HZ / 10); | |
2237 | 2237 | } |
2238 | 2238 | } |
2239 | 2239 |