Commit f758eeabeb96f878c860e8f110f94ec8820822a9
Committed by
Wu Fengguang
1 parent
424b351fe1
Exists in
master
and in
4 other branches
writeback: split inode_wb_list_lock into bdi_writeback.list_lock
Split the global inode_wb_list_lock into a per-bdi_writeback list_lock, as it's currently the most contended lock in the system for metadata heavy workloads. It won't help for single-filesystem workloads for which we'll need the I/O-less balance_dirty_pages, but at least we can dedicate a cpu to spinning on each bdi now for larger systems. Based on earlier patches from Nick Piggin and Dave Chinner. It reduces lock contentions to 1/4 in this test case: 10 HDD JBOD, 100 dd on each disk, XFS, 6GB ram lock_stat version 0.3 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- vanilla 2.6.39-rc3: inode_wb_list_lock: 42590 44433 0.12 147.74 144127.35 252274 886792 0.08 121.34 917211.23 ------------------ inode_wb_list_lock 2 [<ffffffff81165da5>] bdev_inode_switch_bdi+0x29/0x85 inode_wb_list_lock 34 [<ffffffff8115bd0b>] inode_wb_list_del+0x22/0x49 inode_wb_list_lock 12893 [<ffffffff8115bb53>] __mark_inode_dirty+0x170/0x1d0 inode_wb_list_lock 10702 [<ffffffff8115afef>] writeback_single_inode+0x16d/0x20a ------------------ inode_wb_list_lock 2 [<ffffffff81165da5>] bdev_inode_switch_bdi+0x29/0x85 inode_wb_list_lock 19 [<ffffffff8115bd0b>] inode_wb_list_del+0x22/0x49 inode_wb_list_lock 5550 [<ffffffff8115bb53>] __mark_inode_dirty+0x170/0x1d0 inode_wb_list_lock 8511 [<ffffffff8115b4ad>] writeback_sb_inodes+0x10f/0x157 2.6.39-rc3 + patch: &(&wb->list_lock)->rlock: 11383 11657 0.14 151.69 40429.51 90825 527918 0.11 145.90 556843.37 ------------------------ &(&wb->list_lock)->rlock 10 [<ffffffff8115b189>] inode_wb_list_del+0x5f/0x86 &(&wb->list_lock)->rlock 1493 [<ffffffff8115b1ed>] writeback_inodes_wb+0x3d/0x150 &(&wb->list_lock)->rlock 3652 [<ffffffff8115a8e9>] writeback_sb_inodes+0x123/0x16f &(&wb->list_lock)->rlock 1412 [<ffffffff8115a38e>] writeback_single_inode+0x17f/0x223 ------------------------ &(&wb->list_lock)->rlock 3 [<ffffffff8110b5af>] bdi_lock_two+0x46/0x4b &(&wb->list_lock)->rlock 6 [<ffffffff8115b189>] inode_wb_list_del+0x5f/0x86 &(&wb->list_lock)->rlock 2061 [<ffffffff8115af97>] __mark_inode_dirty+0x173/0x1cf &(&wb->list_lock)->rlock 2629 [<ffffffff8115a8e9>] writeback_sb_inodes+0x123/0x16f hughd@google.com: fix recursive lock when bdi_lock_two() is called with new the same as old akpm@linux-foundation.org: cleanup bdev_inode_switch_bdi() comment Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Showing 8 changed files with 85 additions and 68 deletions Side-by-side Diff
fs/block_dev.c
... | ... | @@ -44,24 +44,28 @@ |
44 | 44 | { |
45 | 45 | return &BDEV_I(inode)->bdev; |
46 | 46 | } |
47 | - | |
48 | 47 | EXPORT_SYMBOL(I_BDEV); |
49 | 48 | |
50 | 49 | /* |
51 | - * move the inode from it's current bdi to the a new bdi. if the inode is dirty | |
52 | - * we need to move it onto the dirty list of @dst so that the inode is always | |
53 | - * on the right list. | |
50 | + * Move the inode from its current bdi to a new bdi. If the inode is dirty we | |
51 | + * need to move it onto the dirty list of @dst so that the inode is always on | |
52 | + * the right list. | |
54 | 53 | */ |
55 | 54 | static void bdev_inode_switch_bdi(struct inode *inode, |
56 | 55 | struct backing_dev_info *dst) |
57 | 56 | { |
58 | - spin_lock(&inode_wb_list_lock); | |
57 | + struct backing_dev_info *old = inode->i_data.backing_dev_info; | |
58 | + | |
59 | + if (unlikely(dst == old)) /* deadlock avoidance */ | |
60 | + return; | |
61 | + bdi_lock_two(&old->wb, &dst->wb); | |
59 | 62 | spin_lock(&inode->i_lock); |
60 | 63 | inode->i_data.backing_dev_info = dst; |
61 | 64 | if (inode->i_state & I_DIRTY) |
62 | 65 | list_move(&inode->i_wb_list, &dst->wb.b_dirty); |
63 | 66 | spin_unlock(&inode->i_lock); |
64 | - spin_unlock(&inode_wb_list_lock); | |
67 | + spin_unlock(&old->wb.list_lock); | |
68 | + spin_unlock(&dst->wb.list_lock); | |
65 | 69 | } |
66 | 70 | |
67 | 71 | static sector_t max_block(struct block_device *bdev) |
fs/fs-writeback.c
... | ... | @@ -181,12 +181,13 @@ |
181 | 181 | */ |
182 | 182 | void inode_wb_list_del(struct inode *inode) |
183 | 183 | { |
184 | - spin_lock(&inode_wb_list_lock); | |
184 | + struct backing_dev_info *bdi = inode_to_bdi(inode); | |
185 | + | |
186 | + spin_lock(&bdi->wb.list_lock); | |
185 | 187 | list_del_init(&inode->i_wb_list); |
186 | - spin_unlock(&inode_wb_list_lock); | |
188 | + spin_unlock(&bdi->wb.list_lock); | |
187 | 189 | } |
188 | 190 | |
189 | - | |
190 | 191 | /* |
191 | 192 | * Redirty an inode: set its when-it-was dirtied timestamp and move it to the |
192 | 193 | * furthest end of its superblock's dirty-inode list. |
193 | 194 | |
... | ... | @@ -196,11 +197,9 @@ |
196 | 197 | * the case then the inode must have been redirtied while it was being written |
197 | 198 | * out and we don't reset its dirtied_when. |
198 | 199 | */ |
199 | -static void redirty_tail(struct inode *inode) | |
200 | +static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) | |
200 | 201 | { |
201 | - struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | |
202 | - | |
203 | - assert_spin_locked(&inode_wb_list_lock); | |
202 | + assert_spin_locked(&wb->list_lock); | |
204 | 203 | if (!list_empty(&wb->b_dirty)) { |
205 | 204 | struct inode *tail; |
206 | 205 | |
207 | 206 | |
... | ... | @@ -214,11 +213,9 @@ |
214 | 213 | /* |
215 | 214 | * requeue inode for re-scanning after bdi->b_io list is exhausted. |
216 | 215 | */ |
217 | -static void requeue_io(struct inode *inode) | |
216 | +static void requeue_io(struct inode *inode, struct bdi_writeback *wb) | |
218 | 217 | { |
219 | - struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | |
220 | - | |
221 | - assert_spin_locked(&inode_wb_list_lock); | |
218 | + assert_spin_locked(&wb->list_lock); | |
222 | 219 | list_move(&inode->i_wb_list, &wb->b_more_io); |
223 | 220 | } |
224 | 221 | |
... | ... | @@ -226,7 +223,7 @@ |
226 | 223 | { |
227 | 224 | /* |
228 | 225 | * Prevent speculative execution through |
229 | - * spin_unlock(&inode_wb_list_lock); | |
226 | + * spin_unlock(&wb->list_lock); | |
230 | 227 | */ |
231 | 228 | |
232 | 229 | smp_mb(); |
... | ... | @@ -302,7 +299,7 @@ |
302 | 299 | */ |
303 | 300 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) |
304 | 301 | { |
305 | - assert_spin_locked(&inode_wb_list_lock); | |
302 | + assert_spin_locked(&wb->list_lock); | |
306 | 303 | list_splice_init(&wb->b_more_io, &wb->b_io); |
307 | 304 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
308 | 305 | } |
... | ... | @@ -317,7 +314,8 @@ |
317 | 314 | /* |
318 | 315 | * Wait for writeback on an inode to complete. |
319 | 316 | */ |
320 | -static void inode_wait_for_writeback(struct inode *inode) | |
317 | +static void inode_wait_for_writeback(struct inode *inode, | |
318 | + struct bdi_writeback *wb) | |
321 | 319 | { |
322 | 320 | DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); |
323 | 321 | wait_queue_head_t *wqh; |
324 | 322 | |
325 | 323 | |
... | ... | @@ -325,15 +323,15 @@ |
325 | 323 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); |
326 | 324 | while (inode->i_state & I_SYNC) { |
327 | 325 | spin_unlock(&inode->i_lock); |
328 | - spin_unlock(&inode_wb_list_lock); | |
326 | + spin_unlock(&wb->list_lock); | |
329 | 327 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); |
330 | - spin_lock(&inode_wb_list_lock); | |
328 | + spin_lock(&wb->list_lock); | |
331 | 329 | spin_lock(&inode->i_lock); |
332 | 330 | } |
333 | 331 | } |
334 | 332 | |
335 | 333 | /* |
336 | - * Write out an inode's dirty pages. Called under inode_wb_list_lock and | |
334 | + * Write out an inode's dirty pages. Called under wb->list_lock and | |
337 | 335 | * inode->i_lock. Either the caller has an active reference on the inode or |
338 | 336 | * the inode has I_WILL_FREE set. |
339 | 337 | * |
340 | 338 | |
... | ... | @@ -344,13 +342,14 @@ |
344 | 342 | * livelocks, etc. |
345 | 343 | */ |
346 | 344 | static int |
347 | -writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |
345 | +writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, | |
346 | + struct writeback_control *wbc) | |
348 | 347 | { |
349 | 348 | struct address_space *mapping = inode->i_mapping; |
350 | 349 | unsigned dirty; |
351 | 350 | int ret; |
352 | 351 | |
353 | - assert_spin_locked(&inode_wb_list_lock); | |
352 | + assert_spin_locked(&wb->list_lock); | |
354 | 353 | assert_spin_locked(&inode->i_lock); |
355 | 354 | |
356 | 355 | if (!atomic_read(&inode->i_count)) |
357 | 356 | |
... | ... | @@ -368,14 +367,14 @@ |
368 | 367 | * completed a full scan of b_io. |
369 | 368 | */ |
370 | 369 | if (wbc->sync_mode != WB_SYNC_ALL) { |
371 | - requeue_io(inode); | |
370 | + requeue_io(inode, wb); | |
372 | 371 | return 0; |
373 | 372 | } |
374 | 373 | |
375 | 374 | /* |
376 | 375 | * It's a data-integrity sync. We must wait. |
377 | 376 | */ |
378 | - inode_wait_for_writeback(inode); | |
377 | + inode_wait_for_writeback(inode, wb); | |
379 | 378 | } |
380 | 379 | |
381 | 380 | BUG_ON(inode->i_state & I_SYNC); |
... | ... | @@ -384,7 +383,7 @@ |
384 | 383 | inode->i_state |= I_SYNC; |
385 | 384 | inode->i_state &= ~I_DIRTY_PAGES; |
386 | 385 | spin_unlock(&inode->i_lock); |
387 | - spin_unlock(&inode_wb_list_lock); | |
386 | + spin_unlock(&wb->list_lock); | |
388 | 387 | |
389 | 388 | ret = do_writepages(mapping, wbc); |
390 | 389 | |
... | ... | @@ -415,7 +414,7 @@ |
415 | 414 | ret = err; |
416 | 415 | } |
417 | 416 | |
418 | - spin_lock(&inode_wb_list_lock); | |
417 | + spin_lock(&wb->list_lock); | |
419 | 418 | spin_lock(&inode->i_lock); |
420 | 419 | inode->i_state &= ~I_SYNC; |
421 | 420 | if (!(inode->i_state & I_FREEING)) { |
... | ... | @@ -438,7 +437,7 @@ |
438 | 437 | /* |
439 | 438 | * slice used up: queue for next turn |
440 | 439 | */ |
441 | - requeue_io(inode); | |
440 | + requeue_io(inode, wb); | |
442 | 441 | } else { |
443 | 442 | /* |
444 | 443 | * Writeback blocked by something other than |
... | ... | @@ -447,7 +446,7 @@ |
447 | 446 | * retrying writeback of the dirty page/inode |
448 | 447 | * that cannot be performed immediately. |
449 | 448 | */ |
450 | - redirty_tail(inode); | |
449 | + redirty_tail(inode, wb); | |
451 | 450 | } |
452 | 451 | } else if (inode->i_state & I_DIRTY) { |
453 | 452 | /* |
... | ... | @@ -456,7 +455,7 @@ |
456 | 455 | * submission or metadata updates after data IO |
457 | 456 | * completion. |
458 | 457 | */ |
459 | - redirty_tail(inode); | |
458 | + redirty_tail(inode, wb); | |
460 | 459 | } else { |
461 | 460 | /* |
462 | 461 | * The inode is clean. At this point we either have |
... | ... | @@ -521,7 +520,7 @@ |
521 | 520 | * superblock, move all inodes not belonging |
522 | 521 | * to it back onto the dirty list. |
523 | 522 | */ |
524 | - redirty_tail(inode); | |
523 | + redirty_tail(inode, wb); | |
525 | 524 | continue; |
526 | 525 | } |
527 | 526 | |
... | ... | @@ -541,7 +540,7 @@ |
541 | 540 | spin_lock(&inode->i_lock); |
542 | 541 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { |
543 | 542 | spin_unlock(&inode->i_lock); |
544 | - requeue_io(inode); | |
543 | + requeue_io(inode, wb); | |
545 | 544 | continue; |
546 | 545 | } |
547 | 546 | |
548 | 547 | |
549 | 548 | |
550 | 549 | |
... | ... | @@ -557,19 +556,19 @@ |
557 | 556 | __iget(inode); |
558 | 557 | |
559 | 558 | pages_skipped = wbc->pages_skipped; |
560 | - writeback_single_inode(inode, wbc); | |
559 | + writeback_single_inode(inode, wb, wbc); | |
561 | 560 | if (wbc->pages_skipped != pages_skipped) { |
562 | 561 | /* |
563 | 562 | * writeback is not making progress due to locked |
564 | 563 | * buffers. Skip this inode for now. |
565 | 564 | */ |
566 | - redirty_tail(inode); | |
565 | + redirty_tail(inode, wb); | |
567 | 566 | } |
568 | 567 | spin_unlock(&inode->i_lock); |
569 | - spin_unlock(&inode_wb_list_lock); | |
568 | + spin_unlock(&wb->list_lock); | |
570 | 569 | iput(inode); |
571 | 570 | cond_resched(); |
572 | - spin_lock(&inode_wb_list_lock); | |
571 | + spin_lock(&wb->list_lock); | |
573 | 572 | if (wbc->nr_to_write <= 0) { |
574 | 573 | wbc->more_io = 1; |
575 | 574 | return 1; |
... | ... | @@ -588,7 +587,7 @@ |
588 | 587 | |
589 | 588 | if (!wbc->wb_start) |
590 | 589 | wbc->wb_start = jiffies; /* livelock avoidance */ |
591 | - spin_lock(&inode_wb_list_lock); | |
590 | + spin_lock(&wb->list_lock); | |
592 | 591 | |
593 | 592 | if (list_empty(&wb->b_io)) |
594 | 593 | queue_io(wb, wbc->older_than_this); |
... | ... | @@ -598,7 +597,7 @@ |
598 | 597 | struct super_block *sb = inode->i_sb; |
599 | 598 | |
600 | 599 | if (!pin_sb_for_writeback(sb)) { |
601 | - requeue_io(inode); | |
600 | + requeue_io(inode, wb); | |
602 | 601 | continue; |
603 | 602 | } |
604 | 603 | ret = writeback_sb_inodes(sb, wb, wbc, false); |
... | ... | @@ -607,7 +606,7 @@ |
607 | 606 | if (ret) |
608 | 607 | break; |
609 | 608 | } |
610 | - spin_unlock(&inode_wb_list_lock); | |
609 | + spin_unlock(&wb->list_lock); | |
611 | 610 | /* Leave any unwritten inodes on b_io */ |
612 | 611 | } |
613 | 612 | |
614 | 613 | |
... | ... | @@ -616,11 +615,11 @@ |
616 | 615 | { |
617 | 616 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
618 | 617 | |
619 | - spin_lock(&inode_wb_list_lock); | |
618 | + spin_lock(&wb->list_lock); | |
620 | 619 | if (list_empty(&wb->b_io)) |
621 | 620 | queue_io(wb, wbc->older_than_this); |
622 | 621 | writeback_sb_inodes(sb, wb, wbc, true); |
623 | - spin_unlock(&inode_wb_list_lock); | |
622 | + spin_unlock(&wb->list_lock); | |
624 | 623 | } |
625 | 624 | |
626 | 625 | /* |
627 | 626 | |
628 | 627 | |
... | ... | @@ -762,15 +761,15 @@ |
762 | 761 | * become available for writeback. Otherwise |
763 | 762 | * we'll just busyloop. |
764 | 763 | */ |
765 | - spin_lock(&inode_wb_list_lock); | |
764 | + spin_lock(&wb->list_lock); | |
766 | 765 | if (!list_empty(&wb->b_more_io)) { |
767 | 766 | inode = wb_inode(wb->b_more_io.prev); |
768 | 767 | trace_wbc_writeback_wait(&wbc, wb->bdi); |
769 | 768 | spin_lock(&inode->i_lock); |
770 | - inode_wait_for_writeback(inode); | |
769 | + inode_wait_for_writeback(inode, wb); | |
771 | 770 | spin_unlock(&inode->i_lock); |
772 | 771 | } |
773 | - spin_unlock(&inode_wb_list_lock); | |
772 | + spin_unlock(&wb->list_lock); | |
774 | 773 | } |
775 | 774 | |
776 | 775 | return wrote; |
777 | 776 | |
... | ... | @@ -1104,10 +1103,10 @@ |
1104 | 1103 | } |
1105 | 1104 | |
1106 | 1105 | spin_unlock(&inode->i_lock); |
1107 | - spin_lock(&inode_wb_list_lock); | |
1106 | + spin_lock(&bdi->wb.list_lock); | |
1108 | 1107 | inode->dirtied_when = jiffies; |
1109 | 1108 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); |
1110 | - spin_unlock(&inode_wb_list_lock); | |
1109 | + spin_unlock(&bdi->wb.list_lock); | |
1111 | 1110 | |
1112 | 1111 | if (wakeup_bdi) |
1113 | 1112 | bdi_wakeup_thread_delayed(bdi); |
... | ... | @@ -1309,6 +1308,7 @@ |
1309 | 1308 | */ |
1310 | 1309 | int write_inode_now(struct inode *inode, int sync) |
1311 | 1310 | { |
1311 | + struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | |
1312 | 1312 | int ret; |
1313 | 1313 | struct writeback_control wbc = { |
1314 | 1314 | .nr_to_write = LONG_MAX, |
1315 | 1315 | |
1316 | 1316 | |
... | ... | @@ -1321,11 +1321,11 @@ |
1321 | 1321 | wbc.nr_to_write = 0; |
1322 | 1322 | |
1323 | 1323 | might_sleep(); |
1324 | - spin_lock(&inode_wb_list_lock); | |
1324 | + spin_lock(&wb->list_lock); | |
1325 | 1325 | spin_lock(&inode->i_lock); |
1326 | - ret = writeback_single_inode(inode, &wbc); | |
1326 | + ret = writeback_single_inode(inode, wb, &wbc); | |
1327 | 1327 | spin_unlock(&inode->i_lock); |
1328 | - spin_unlock(&inode_wb_list_lock); | |
1328 | + spin_unlock(&wb->list_lock); | |
1329 | 1329 | if (sync) |
1330 | 1330 | inode_sync_wait(inode); |
1331 | 1331 | return ret; |
1332 | 1332 | |
1333 | 1333 | |
1334 | 1334 | |
... | ... | @@ -1345,13 +1345,14 @@ |
1345 | 1345 | */ |
1346 | 1346 | int sync_inode(struct inode *inode, struct writeback_control *wbc) |
1347 | 1347 | { |
1348 | + struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | |
1348 | 1349 | int ret; |
1349 | 1350 | |
1350 | - spin_lock(&inode_wb_list_lock); | |
1351 | + spin_lock(&wb->list_lock); | |
1351 | 1352 | spin_lock(&inode->i_lock); |
1352 | - ret = writeback_single_inode(inode, wbc); | |
1353 | + ret = writeback_single_inode(inode, wb, wbc); | |
1353 | 1354 | spin_unlock(&inode->i_lock); |
1354 | - spin_unlock(&inode_wb_list_lock); | |
1355 | + spin_unlock(&wb->list_lock); | |
1355 | 1356 | return ret; |
1356 | 1357 | } |
1357 | 1358 | EXPORT_SYMBOL(sync_inode); |
fs/inode.c
... | ... | @@ -37,7 +37,7 @@ |
37 | 37 | * inode_lru, inode->i_lru |
38 | 38 | * inode_sb_list_lock protects: |
39 | 39 | * sb->s_inodes, inode->i_sb_list |
40 | - * inode_wb_list_lock protects: | |
40 | + * bdi->wb.list_lock protects: | |
41 | 41 | * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list |
42 | 42 | * inode_hash_lock protects: |
43 | 43 | * inode_hashtable, inode->i_hash |
... | ... | @@ -48,7 +48,7 @@ |
48 | 48 | * inode->i_lock |
49 | 49 | * inode_lru_lock |
50 | 50 | * |
51 | - * inode_wb_list_lock | |
51 | + * bdi->wb.list_lock | |
52 | 52 | * inode->i_lock |
53 | 53 | * |
54 | 54 | * inode_hash_lock |
... | ... | @@ -68,7 +68,6 @@ |
68 | 68 | static DEFINE_SPINLOCK(inode_lru_lock); |
69 | 69 | |
70 | 70 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); |
71 | -__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); | |
72 | 71 | |
73 | 72 | /* |
74 | 73 | * iprune_sem provides exclusion between the icache shrinking and the |
include/linux/backing-dev.h
... | ... | @@ -57,6 +57,7 @@ |
57 | 57 | struct list_head b_dirty; /* dirty inodes */ |
58 | 58 | struct list_head b_io; /* parked for writeback */ |
59 | 59 | struct list_head b_more_io; /* parked for more writeback */ |
60 | + spinlock_t list_lock; /* protects the b_* lists */ | |
60 | 61 | }; |
61 | 62 | |
62 | 63 | struct backing_dev_info { |
... | ... | @@ -106,6 +107,7 @@ |
106 | 107 | int bdi_has_dirty_io(struct backing_dev_info *bdi); |
107 | 108 | void bdi_arm_supers_timer(void); |
108 | 109 | void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); |
110 | +void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); | |
109 | 111 | |
110 | 112 | extern spinlock_t bdi_lock; |
111 | 113 | extern struct list_head bdi_list; |
include/linux/writeback.h
mm/backing-dev.c
... | ... | @@ -45,6 +45,17 @@ |
45 | 45 | static int bdi_sync_supers(void *); |
46 | 46 | static void sync_supers_timer_fn(unsigned long); |
47 | 47 | |
48 | +void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) | |
49 | +{ | |
50 | + if (wb1 < wb2) { | |
51 | + spin_lock(&wb1->list_lock); | |
52 | + spin_lock_nested(&wb2->list_lock, 1); | |
53 | + } else { | |
54 | + spin_lock(&wb2->list_lock); | |
55 | + spin_lock_nested(&wb1->list_lock, 1); | |
56 | + } | |
57 | +} | |
58 | + | |
48 | 59 | #ifdef CONFIG_DEBUG_FS |
49 | 60 | #include <linux/debugfs.h> |
50 | 61 | #include <linux/seq_file.h> |
51 | 62 | |
... | ... | @@ -67,14 +78,14 @@ |
67 | 78 | struct inode *inode; |
68 | 79 | |
69 | 80 | nr_dirty = nr_io = nr_more_io = 0; |
70 | - spin_lock(&inode_wb_list_lock); | |
81 | + spin_lock(&wb->list_lock); | |
71 | 82 | list_for_each_entry(inode, &wb->b_dirty, i_wb_list) |
72 | 83 | nr_dirty++; |
73 | 84 | list_for_each_entry(inode, &wb->b_io, i_wb_list) |
74 | 85 | nr_io++; |
75 | 86 | list_for_each_entry(inode, &wb->b_more_io, i_wb_list) |
76 | 87 | nr_more_io++; |
77 | - spin_unlock(&inode_wb_list_lock); | |
88 | + spin_unlock(&wb->list_lock); | |
78 | 89 | |
79 | 90 | global_dirty_limits(&background_thresh, &dirty_thresh); |
80 | 91 | bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); |
... | ... | @@ -628,6 +639,7 @@ |
628 | 639 | INIT_LIST_HEAD(&wb->b_dirty); |
629 | 640 | INIT_LIST_HEAD(&wb->b_io); |
630 | 641 | INIT_LIST_HEAD(&wb->b_more_io); |
642 | + spin_lock_init(&wb->list_lock); | |
631 | 643 | setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi); |
632 | 644 | } |
633 | 645 | |
634 | 646 | |
... | ... | @@ -676,11 +688,12 @@ |
676 | 688 | if (bdi_has_dirty_io(bdi)) { |
677 | 689 | struct bdi_writeback *dst = &default_backing_dev_info.wb; |
678 | 690 | |
679 | - spin_lock(&inode_wb_list_lock); | |
691 | + bdi_lock_two(&bdi->wb, dst); | |
680 | 692 | list_splice(&bdi->wb.b_dirty, &dst->b_dirty); |
681 | 693 | list_splice(&bdi->wb.b_io, &dst->b_io); |
682 | 694 | list_splice(&bdi->wb.b_more_io, &dst->b_more_io); |
683 | - spin_unlock(&inode_wb_list_lock); | |
695 | + spin_unlock(&bdi->wb.list_lock); | |
696 | + spin_unlock(&dst->list_lock); | |
684 | 697 | } |
685 | 698 | |
686 | 699 | bdi_unregister(bdi); |
mm/filemap.c
... | ... | @@ -81,7 +81,7 @@ |
81 | 81 | * ->i_mutex |
82 | 82 | * ->i_alloc_sem (various) |
83 | 83 | * |
84 | - * inode_wb_list_lock | |
84 | + * bdi->wb.list_lock | |
85 | 85 | * sb_lock (fs/fs-writeback.c) |
86 | 86 | * ->mapping->tree_lock (__sync_single_inode) |
87 | 87 | * |
88 | 88 | |
... | ... | @@ -99,9 +99,9 @@ |
99 | 99 | * ->zone.lru_lock (check_pte_range->isolate_lru_page) |
100 | 100 | * ->private_lock (page_remove_rmap->set_page_dirty) |
101 | 101 | * ->tree_lock (page_remove_rmap->set_page_dirty) |
102 | - * inode_wb_list_lock (page_remove_rmap->set_page_dirty) | |
102 | + * bdi.wb->list_lock (page_remove_rmap->set_page_dirty) | |
103 | 103 | * ->inode->i_lock (page_remove_rmap->set_page_dirty) |
104 | - * inode_wb_list_lock (zap_pte_range->set_page_dirty) | |
104 | + * bdi.wb->list_lock (zap_pte_range->set_page_dirty) | |
105 | 105 | * ->inode->i_lock (zap_pte_range->set_page_dirty) |
106 | 106 | * ->private_lock (zap_pte_range->__set_page_dirty_buffers) |
107 | 107 | * |
mm/rmap.c
... | ... | @@ -32,11 +32,11 @@ |
32 | 32 | * mmlist_lock (in mmput, drain_mmlist and others) |
33 | 33 | * mapping->private_lock (in __set_page_dirty_buffers) |
34 | 34 | * inode->i_lock (in set_page_dirty's __mark_inode_dirty) |
35 | - * inode_wb_list_lock (in set_page_dirty's __mark_inode_dirty) | |
35 | + * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty) | |
36 | 36 | * sb_lock (within inode_lock in fs/fs-writeback.c) |
37 | 37 | * mapping->tree_lock (widely used, in set_page_dirty, |
38 | 38 | * in arch-dependent flush_dcache_mmap_lock, |
39 | - * within inode_wb_list_lock in __sync_single_inode) | |
39 | + * within bdi.wb->list_lock in __sync_single_inode) | |
40 | 40 | * |
41 | 41 | * (code doesn't rely on that order so it could be switched around) |
42 | 42 | * ->tasklist_lock |