Commit 8aa7e847d834ed937a9ad37a0f2ad5b8584c1ab0

Authored by Jens Axboe
Committed by Jens Axboe
1 parent c2cc49a2f8

Fix congestion_wait() sync/async vs read/write confusion

Commit 1faa16d22877f4839bd433547d770c676d1d964c accidentally broke
the bdi congestion wait queue logic, causing us to wait on congestion
for WRITE (== 1) when we really wanted BLK_RW_ASYNC (== 0) instead.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

Showing 16 changed files with 43 additions and 40 deletions Side-by-side Diff

arch/x86/lib/usercopy_32.c
... ... @@ -751,7 +751,7 @@
751 751  
752 752 if (retval == -ENOMEM && is_global_init(current)) {
753 753 up_read(&current->mm->mmap_sem);
754   - congestion_wait(WRITE, HZ/50);
  754 + congestion_wait(BLK_RW_ASYNC, HZ/50);
755 755 goto survive;
756 756 }
757 757  
drivers/block/pktcdvd.c
... ... @@ -1372,8 +1372,10 @@
1372 1372 wakeup = (pd->write_congestion_on > 0
1373 1373 && pd->bio_queue_size <= pd->write_congestion_off);
1374 1374 spin_unlock(&pd->lock);
1375   - if (wakeup)
1376   - clear_bdi_congested(&pd->disk->queue->backing_dev_info, WRITE);
  1375 + if (wakeup) {
  1376 + clear_bdi_congested(&pd->disk->queue->backing_dev_info,
  1377 + BLK_RW_ASYNC);
  1378 + }
1377 1379  
1378 1380 pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
1379 1381 pkt_set_state(pkt, PACKET_WAITING_STATE);
1380 1382  
... ... @@ -2592,10 +2594,10 @@
2592 2594 spin_lock(&pd->lock);
2593 2595 if (pd->write_congestion_on > 0
2594 2596 && pd->bio_queue_size >= pd->write_congestion_on) {
2595   - set_bdi_congested(&q->backing_dev_info, WRITE);
  2597 + set_bdi_congested(&q->backing_dev_info, BLK_RW_ASYNC);
2596 2598 do {
2597 2599 spin_unlock(&pd->lock);
2598   - congestion_wait(WRITE, HZ);
  2600 + congestion_wait(BLK_RW_ASYNC, HZ);
2599 2601 spin_lock(&pd->lock);
2600 2602 } while(pd->bio_queue_size > pd->write_congestion_off);
2601 2603 }
drivers/md/dm-crypt.c
... ... @@ -776,7 +776,7 @@
776 776 * But don't wait if split was due to the io size restriction
777 777 */
778 778 if (unlikely(out_of_pages))
779   - congestion_wait(WRITE, HZ/100);
  779 + congestion_wait(BLK_RW_ASYNC, HZ/100);
780 780  
781 781 /*
782 782 * With async crypto it is unsafe to share the crypto context
... ... @@ -134,7 +134,7 @@
134 134 if ((filp->f_mode & FMODE_WRITE) &&
135 135 MSDOS_SB(inode->i_sb)->options.flush) {
136 136 fat_flush_inodes(inode->i_sb, inode, NULL);
137   - congestion_wait(WRITE, HZ/10);
  137 + congestion_wait(BLK_RW_ASYNC, HZ/10);
138 138 }
139 139 return 0;
140 140 }
... ... @@ -286,8 +286,8 @@
286 286 }
287 287 if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
288 288 fc->connected && fc->bdi_initialized) {
289   - clear_bdi_congested(&fc->bdi, READ);
290   - clear_bdi_congested(&fc->bdi, WRITE);
  289 + clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
  290 + clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
291 291 }
292 292 fc->num_background--;
293 293 fc->active_background--;
... ... @@ -414,8 +414,8 @@
414 414 fc->blocked = 1;
415 415 if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
416 416 fc->bdi_initialized) {
417   - set_bdi_congested(&fc->bdi, READ);
418   - set_bdi_congested(&fc->bdi, WRITE);
  417 + set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
  418 + set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
419 419 }
420 420 list_add_tail(&req->list, &fc->bg_queue);
421 421 flush_bg_queue(fc);
... ... @@ -202,8 +202,10 @@
202 202 struct nfs_server *nfss = NFS_SERVER(inode);
203 203  
204 204 if (atomic_long_inc_return(&nfss->writeback) >
205   - NFS_CONGESTION_ON_THRESH)
206   - set_bdi_congested(&nfss->backing_dev_info, WRITE);
  205 + NFS_CONGESTION_ON_THRESH) {
  206 + set_bdi_congested(&nfss->backing_dev_info,
  207 + BLK_RW_ASYNC);
  208 + }
207 209 }
208 210 return ret;
209 211 }
... ... @@ -215,7 +217,7 @@
215 217  
216 218 end_page_writeback(page);
217 219 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
218   - clear_bdi_congested(&nfss->backing_dev_info, WRITE);
  220 + clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
219 221 }
220 222  
221 223 /*
fs/reiserfs/journal.c
... ... @@ -997,7 +997,7 @@
997 997 DEFINE_WAIT(wait);
998 998 struct reiserfs_journal *j = SB_JOURNAL(s);
999 999 if (atomic_read(&j->j_async_throttle))
1000   - congestion_wait(WRITE, HZ / 10);
  1000 + congestion_wait(BLK_RW_ASYNC, HZ / 10);
1001 1001 return 0;
1002 1002 }
1003 1003  
fs/xfs/linux-2.6/kmem.c
... ... @@ -53,7 +53,7 @@
53 53 printk(KERN_ERR "XFS: possible memory allocation "
54 54 "deadlock in %s (mode:0x%x)\n",
55 55 __func__, lflags);
56   - congestion_wait(WRITE, HZ/50);
  56 + congestion_wait(BLK_RW_ASYNC, HZ/50);
57 57 } while (1);
58 58 }
59 59  
... ... @@ -130,7 +130,7 @@
130 130 printk(KERN_ERR "XFS: possible memory allocation "
131 131 "deadlock in %s (mode:0x%x)\n",
132 132 __func__, lflags);
133   - congestion_wait(WRITE, HZ/50);
  133 + congestion_wait(BLK_RW_ASYNC, HZ/50);
134 134 } while (1);
135 135 }
136 136  
fs/xfs/linux-2.6/xfs_buf.c
... ... @@ -412,7 +412,7 @@
412 412  
413 413 XFS_STATS_INC(xb_page_retries);
414 414 xfsbufd_wakeup(0, gfp_mask);
415   - congestion_wait(WRITE, HZ/50);
  415 + congestion_wait(BLK_RW_ASYNC, HZ/50);
416 416 goto retry;
417 417 }
418 418  
include/linux/backing-dev.h
... ... @@ -229,9 +229,9 @@
229 229 (1 << BDI_async_congested));
230 230 }
231 231  
232   -void clear_bdi_congested(struct backing_dev_info *bdi, int rw);
233   -void set_bdi_congested(struct backing_dev_info *bdi, int rw);
234   -long congestion_wait(int rw, long timeout);
  232 +void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
  233 +void set_bdi_congested(struct backing_dev_info *bdi, int sync);
  234 +long congestion_wait(int sync, long timeout);
235 235  
236 236  
237 237 static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
include/linux/blkdev.h
... ... @@ -779,18 +779,18 @@
779 779 * congested queues, and wake up anyone who was waiting for requests to be
780 780 * put back.
781 781 */
782   -static inline void blk_clear_queue_congested(struct request_queue *q, int rw)
  782 +static inline void blk_clear_queue_congested(struct request_queue *q, int sync)
783 783 {
784   - clear_bdi_congested(&q->backing_dev_info, rw);
  784 + clear_bdi_congested(&q->backing_dev_info, sync);
785 785 }
786 786  
787 787 /*
788 788 * A queue has just entered congestion. Flag that in the queue's VM-visible
789 789 * state flags and increment the global gounter of congested queues.
790 790 */
791   -static inline void blk_set_queue_congested(struct request_queue *q, int rw)
  791 +static inline void blk_set_queue_congested(struct request_queue *q, int sync)
792 792 {
793   - set_bdi_congested(&q->backing_dev_info, rw);
  793 + set_bdi_congested(&q->backing_dev_info, sync);
794 794 }
795 795  
796 796 extern void blk_start_queue(struct request_queue *q);
... ... @@ -283,7 +283,6 @@
283 283 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
284 284 };
285 285  
286   -
287 286 void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
288 287 {
289 288 enum bdi_state bit;
290 289  
291 290  
... ... @@ -308,18 +307,18 @@
308 307  
309 308 /**
310 309 * congestion_wait - wait for a backing_dev to become uncongested
311   - * @rw: READ or WRITE
  310 + * @sync: SYNC or ASYNC IO
312 311 * @timeout: timeout in jiffies
313 312 *
314 313 * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
315 314 * write congestion. If no backing_devs are congested then just wait for the
316 315 * next write to be completed.
317 316 */
318   -long congestion_wait(int rw, long timeout)
  317 +long congestion_wait(int sync, long timeout)
319 318 {
320 319 long ret;
321 320 DEFINE_WAIT(wait);
322   - wait_queue_head_t *wqh = &congestion_wqh[rw];
  321 + wait_queue_head_t *wqh = &congestion_wqh[sync];
323 322  
324 323 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
325 324 ret = io_schedule_timeout(timeout);
... ... @@ -1973,7 +1973,7 @@
1973 1973 if (!progress) {
1974 1974 nr_retries--;
1975 1975 /* maybe some writeback is necessary */
1976   - congestion_wait(WRITE, HZ/10);
  1976 + congestion_wait(BLK_RW_ASYNC, HZ/10);
1977 1977 }
1978 1978  
1979 1979 }
... ... @@ -575,7 +575,7 @@
575 575 if (pages_written >= write_chunk)
576 576 break; /* We've done our duty */
577 577  
578   - congestion_wait(WRITE, HZ/10);
  578 + congestion_wait(BLK_RW_ASYNC, HZ/10);
579 579 }
580 580  
581 581 if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
... ... @@ -669,7 +669,7 @@
669 669 if (global_page_state(NR_UNSTABLE_NFS) +
670 670 global_page_state(NR_WRITEBACK) <= dirty_thresh)
671 671 break;
672   - congestion_wait(WRITE, HZ/10);
  672 + congestion_wait(BLK_RW_ASYNC, HZ/10);
673 673  
674 674 /*
675 675 * The caller might hold locks which can prevent IO completion
... ... @@ -715,7 +715,7 @@
715 715 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
716 716 /* Wrote less than expected */
717 717 if (wbc.encountered_congestion || wbc.more_io)
718   - congestion_wait(WRITE, HZ/10);
  718 + congestion_wait(BLK_RW_ASYNC, HZ/10);
719 719 else
720 720 break;
721 721 }
... ... @@ -787,7 +787,7 @@
787 787 writeback_inodes(&wbc);
788 788 if (wbc.nr_to_write > 0) {
789 789 if (wbc.encountered_congestion || wbc.more_io)
790   - congestion_wait(WRITE, HZ/10);
  790 + congestion_wait(BLK_RW_ASYNC, HZ/10);
791 791 else
792 792 break; /* All the old data is written */
793 793 }
... ... @@ -1666,7 +1666,7 @@
1666 1666 preferred_zone, migratetype);
1667 1667  
1668 1668 if (!page && gfp_mask & __GFP_NOFAIL)
1669   - congestion_wait(WRITE, HZ/50);
  1669 + congestion_wait(BLK_RW_ASYNC, HZ/50);
1670 1670 } while (!page && (gfp_mask & __GFP_NOFAIL));
1671 1671  
1672 1672 return page;
... ... @@ -1831,7 +1831,7 @@
1831 1831 pages_reclaimed += did_some_progress;
1832 1832 if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
1833 1833 /* Wait for some write requests to complete then retry */
1834   - congestion_wait(WRITE, HZ/50);
  1834 + congestion_wait(BLK_RW_ASYNC, HZ/50);
1835 1835 goto rebalance;
1836 1836 }
1837 1837  
... ... @@ -1104,7 +1104,7 @@
1104 1104 */
1105 1105 if (nr_freed < nr_taken && !current_is_kswapd() &&
1106 1106 lumpy_reclaim) {
1107   - congestion_wait(WRITE, HZ/10);
  1107 + congestion_wait(BLK_RW_ASYNC, HZ/10);
1108 1108  
1109 1109 /*
1110 1110 * The attempt at page out may have made some
... ... @@ -1721,7 +1721,7 @@
1721 1721  
1722 1722 /* Take a nap, wait for some writeback to complete */
1723 1723 if (sc->nr_scanned && priority < DEF_PRIORITY - 2)
1724   - congestion_wait(WRITE, HZ/10);
  1724 + congestion_wait(BLK_RW_ASYNC, HZ/10);
1725 1725 }
1726 1726 /* top priority shrink_zones still had more to do? don't OOM, then */
1727 1727 if (!sc->all_unreclaimable && scanning_global_lru(sc))
... ... @@ -1960,7 +1960,7 @@
1960 1960 * another pass across the zones.
1961 1961 */
1962 1962 if (total_scanned && priority < DEF_PRIORITY - 2)
1963   - congestion_wait(WRITE, HZ/10);
  1963 + congestion_wait(BLK_RW_ASYNC, HZ/10);
1964 1964  
1965 1965 /*
1966 1966 * We do this so kswapd doesn't build up large priorities for
... ... @@ -2233,7 +2233,7 @@
2233 2233 goto out;
2234 2234  
2235 2235 if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
2236   - congestion_wait(WRITE, HZ / 10);
  2236 + congestion_wait(BLK_RW_ASYNC, HZ / 10);
2237 2237 }
2238 2238 }
2239 2239