Commit cfc4ba5365449cb6b5c9f68d755a142f17da1e47
1 parent
f11fcae840
Exists in
master
and in
20 other branches
writeback: use RCU to protect bdi_list
Now that bdi_writeback_all() no longer handles integrity writeback, it doesn't have to block anymore. This means that we can switch bdi_list reader side protection to RCU. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Showing 4 changed files with 63 additions and 28 deletions Side-by-side Diff
fs/fs-writeback.c
... | ... | @@ -868,16 +868,16 @@ |
868 | 868 | |
869 | 869 | WARN_ON(wbc->sync_mode == WB_SYNC_ALL); |
870 | 870 | |
871 | - spin_lock(&bdi_lock); | |
871 | + rcu_read_lock(); | |
872 | 872 | |
873 | - list_for_each_entry(bdi, &bdi_list, bdi_list) { | |
873 | + list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | |
874 | 874 | if (!bdi_has_dirty_io(bdi)) |
875 | 875 | continue; |
876 | 876 | |
877 | 877 | bdi_alloc_queue_work(bdi, wbc); |
878 | 878 | } |
879 | 879 | |
880 | - spin_unlock(&bdi_lock); | |
880 | + rcu_read_unlock(); | |
881 | 881 | } |
882 | 882 | |
883 | 883 | /* |
include/linux/backing-dev.h
... | ... | @@ -59,6 +59,7 @@ |
59 | 59 | |
60 | 60 | struct backing_dev_info { |
61 | 61 | struct list_head bdi_list; |
62 | + struct rcu_head rcu_head; | |
62 | 63 | unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ |
63 | 64 | unsigned long state; /* Always use atomic bitops on this */ |
64 | 65 | unsigned int capabilities; /* Device capabilities */ |
mm/backing-dev.c
... | ... | @@ -26,6 +26,12 @@ |
26 | 26 | EXPORT_SYMBOL_GPL(default_backing_dev_info); |
27 | 27 | |
28 | 28 | static struct class *bdi_class; |
29 | + | |
30 | +/* | |
31 | + * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as | |
32 | + * reader side protection for bdi_pending_list. bdi_list has RCU reader side | |
33 | + * locking. | |
34 | + */ | |
29 | 35 | DEFINE_SPINLOCK(bdi_lock); |
30 | 36 | LIST_HEAD(bdi_list); |
31 | 37 | LIST_HEAD(bdi_pending_list); |
... | ... | @@ -284,9 +290,9 @@ |
284 | 290 | /* |
285 | 291 | * Add us to the active bdi_list |
286 | 292 | */ |
287 | - spin_lock(&bdi_lock); | |
288 | - list_add(&bdi->bdi_list, &bdi_list); | |
289 | - spin_unlock(&bdi_lock); | |
293 | + spin_lock_bh(&bdi_lock); | |
294 | + list_add_rcu(&bdi->bdi_list, &bdi_list); | |
295 | + spin_unlock_bh(&bdi_lock); | |
290 | 296 | |
291 | 297 | bdi_task_init(bdi, wb); |
292 | 298 | |
... | ... | @@ -389,7 +395,7 @@ |
389 | 395 | if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) |
390 | 396 | wb_do_writeback(me, 0); |
391 | 397 | |
392 | - spin_lock(&bdi_lock); | |
398 | + spin_lock_bh(&bdi_lock); | |
393 | 399 | |
394 | 400 | /* |
395 | 401 | * Check if any existing bdi's have dirty data without |
... | ... | @@ -410,7 +416,7 @@ |
410 | 416 | if (list_empty(&bdi_pending_list)) { |
411 | 417 | unsigned long wait; |
412 | 418 | |
413 | - spin_unlock(&bdi_lock); | |
419 | + spin_unlock_bh(&bdi_lock); | |
414 | 420 | wait = msecs_to_jiffies(dirty_writeback_interval * 10); |
415 | 421 | schedule_timeout(wait); |
416 | 422 | try_to_freeze(); |
... | ... | @@ -426,7 +432,7 @@ |
426 | 432 | bdi = list_entry(bdi_pending_list.next, struct backing_dev_info, |
427 | 433 | bdi_list); |
428 | 434 | list_del_init(&bdi->bdi_list); |
429 | - spin_unlock(&bdi_lock); | |
435 | + spin_unlock_bh(&bdi_lock); | |
430 | 436 | |
431 | 437 | wb = &bdi->wb; |
432 | 438 | wb->task = kthread_run(bdi_start_fn, wb, "flush-%s", |
433 | 439 | |
... | ... | @@ -445,9 +451,9 @@ |
445 | 451 | * a chance to flush other bdi's to free |
446 | 452 | * memory. |
447 | 453 | */ |
448 | - spin_lock(&bdi_lock); | |
454 | + spin_lock_bh(&bdi_lock); | |
449 | 455 | list_add_tail(&bdi->bdi_list, &bdi_pending_list); |
450 | - spin_unlock(&bdi_lock); | |
456 | + spin_unlock_bh(&bdi_lock); | |
451 | 457 | |
452 | 458 | bdi_flush_io(bdi); |
453 | 459 | } |
... | ... | @@ -456,6 +462,24 @@ |
456 | 462 | return 0; |
457 | 463 | } |
458 | 464 | |
465 | +static void bdi_add_to_pending(struct rcu_head *head) | |
466 | +{ | |
467 | + struct backing_dev_info *bdi; | |
468 | + | |
469 | + bdi = container_of(head, struct backing_dev_info, rcu_head); | |
470 | + INIT_LIST_HEAD(&bdi->bdi_list); | |
471 | + | |
472 | + spin_lock(&bdi_lock); | |
473 | + list_add_tail(&bdi->bdi_list, &bdi_pending_list); | |
474 | + spin_unlock(&bdi_lock); | |
475 | + | |
476 | + /* | |
477 | + * We are now on the pending list, wake up bdi_forker_task() | |
478 | + * to finish the job and add us back to the active bdi_list | |
479 | + */ | |
480 | + wake_up_process(default_backing_dev_info.wb.task); | |
481 | +} | |
482 | + | |
459 | 483 | /* |
460 | 484 | * Add the default flusher task that gets created for any bdi |
461 | 485 | * that has dirty data pending writeout |
462 | 486 | |
463 | 487 | |
464 | 488 | |
... | ... | @@ -478,16 +502,29 @@ |
478 | 502 | * waiting for previous additions to finish. |
479 | 503 | */ |
480 | 504 | if (!test_and_set_bit(BDI_pending, &bdi->state)) { |
481 | - list_move_tail(&bdi->bdi_list, &bdi_pending_list); | |
505 | + list_del_rcu(&bdi->bdi_list); | |
482 | 506 | |
483 | 507 | /* |
484 | - * We are now on the pending list, wake up bdi_forker_task() | |
485 | - * to finish the job and add us back to the active bdi_list | |
508 | + * We must wait for the current RCU period to end before | |
509 | + * moving to the pending list. So schedule that operation | |
510 | + * from an RCU callback. | |
486 | 511 | */ |
487 | - wake_up_process(default_backing_dev_info.wb.task); | |
512 | + call_rcu(&bdi->rcu_head, bdi_add_to_pending); | |
488 | 513 | } |
489 | 514 | } |
490 | 515 | |
516 | +/* | |
517 | + * Remove bdi from bdi_list, and ensure that it is no longer visible | |
518 | + */ | |
519 | +static void bdi_remove_from_list(struct backing_dev_info *bdi) | |
520 | +{ | |
521 | + spin_lock_bh(&bdi_lock); | |
522 | + list_del_rcu(&bdi->bdi_list); | |
523 | + spin_unlock_bh(&bdi_lock); | |
524 | + | |
525 | + synchronize_rcu(); | |
526 | +} | |
527 | + | |
491 | 528 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, |
492 | 529 | const char *fmt, ...) |
493 | 530 | { |
... | ... | @@ -506,9 +543,9 @@ |
506 | 543 | goto exit; |
507 | 544 | } |
508 | 545 | |
509 | - spin_lock(&bdi_lock); | |
510 | - list_add_tail(&bdi->bdi_list, &bdi_list); | |
511 | - spin_unlock(&bdi_lock); | |
546 | + spin_lock_bh(&bdi_lock); | |
547 | + list_add_tail_rcu(&bdi->bdi_list, &bdi_list); | |
548 | + spin_unlock_bh(&bdi_lock); | |
512 | 549 | |
513 | 550 | bdi->dev = dev; |
514 | 551 | |
... | ... | @@ -526,9 +563,7 @@ |
526 | 563 | wb->task = NULL; |
527 | 564 | ret = -ENOMEM; |
528 | 565 | |
529 | - spin_lock(&bdi_lock); | |
530 | - list_del(&bdi->bdi_list); | |
531 | - spin_unlock(&bdi_lock); | |
566 | + bdi_remove_from_list(bdi); | |
532 | 567 | goto exit; |
533 | 568 | } |
534 | 569 | } |
... | ... | @@ -565,9 +600,7 @@ |
565 | 600 | /* |
566 | 601 | * Make sure nobody finds us on the bdi_list anymore |
567 | 602 | */ |
568 | - spin_lock(&bdi_lock); | |
569 | - list_del(&bdi->bdi_list); | |
570 | - spin_unlock(&bdi_lock); | |
603 | + bdi_remove_from_list(bdi); | |
571 | 604 | |
572 | 605 | /* |
573 | 606 | * Finally, kill the kernel threads. We don't need to be RCU |
... | ... | @@ -599,6 +632,7 @@ |
599 | 632 | bdi->max_ratio = 100; |
600 | 633 | bdi->max_prop_frac = PROP_FRAC_BASE; |
601 | 634 | spin_lock_init(&bdi->wb_lock); |
635 | + INIT_RCU_HEAD(&bdi->rcu_head); | |
602 | 636 | INIT_LIST_HEAD(&bdi->bdi_list); |
603 | 637 | INIT_LIST_HEAD(&bdi->wb_list); |
604 | 638 | INIT_LIST_HEAD(&bdi->work_list); |
mm/page-writeback.c
... | ... | @@ -315,7 +315,7 @@ |
315 | 315 | { |
316 | 316 | int ret = 0; |
317 | 317 | |
318 | - spin_lock(&bdi_lock); | |
318 | + spin_lock_bh(&bdi_lock); | |
319 | 319 | if (min_ratio > bdi->max_ratio) { |
320 | 320 | ret = -EINVAL; |
321 | 321 | } else { |
... | ... | @@ -327,7 +327,7 @@ |
327 | 327 | ret = -EINVAL; |
328 | 328 | } |
329 | 329 | } |
330 | - spin_unlock(&bdi_lock); | |
330 | + spin_unlock_bh(&bdi_lock); | |
331 | 331 | |
332 | 332 | return ret; |
333 | 333 | } |
334 | 334 | |
... | ... | @@ -339,14 +339,14 @@ |
339 | 339 | if (max_ratio > 100) |
340 | 340 | return -EINVAL; |
341 | 341 | |
342 | - spin_lock(&bdi_lock); | |
342 | + spin_lock_bh(&bdi_lock); | |
343 | 343 | if (bdi->min_ratio > max_ratio) { |
344 | 344 | ret = -EINVAL; |
345 | 345 | } else { |
346 | 346 | bdi->max_ratio = max_ratio; |
347 | 347 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; |
348 | 348 | } |
349 | - spin_unlock(&bdi_lock); | |
349 | + spin_unlock_bh(&bdi_lock); | |
350 | 350 | |
351 | 351 | return ret; |
352 | 352 | } |