Commit cfc4ba5365449cb6b5c9f68d755a142f17da1e47

Authored by Jens Axboe
1 parent f11fcae840

writeback: use RCU to protect bdi_list

Now that bdi_writeback_all() no longer handles integrity writeback,
it doesn't have to block anymore. This means that we can switch
bdi_list reader side protection to RCU.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

Showing 4 changed files with 63 additions and 28 deletions Side-by-side Diff

... ... @@ -868,16 +868,16 @@
868 868  
869 869 WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
870 870  
871   - spin_lock(&bdi_lock);
  871 + rcu_read_lock();
872 872  
873   - list_for_each_entry(bdi, &bdi_list, bdi_list) {
  873 + list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
874 874 if (!bdi_has_dirty_io(bdi))
875 875 continue;
876 876  
877 877 bdi_alloc_queue_work(bdi, wbc);
878 878 }
879 879  
880   - spin_unlock(&bdi_lock);
  880 + rcu_read_unlock();
881 881 }
882 882  
883 883 /*
include/linux/backing-dev.h
... ... @@ -59,6 +59,7 @@
59 59  
60 60 struct backing_dev_info {
61 61 struct list_head bdi_list;
  62 + struct rcu_head rcu_head;
62 63 unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */
63 64 unsigned long state; /* Always use atomic bitops on this */
64 65 unsigned int capabilities; /* Device capabilities */
... ... @@ -26,6 +26,12 @@
26 26 EXPORT_SYMBOL_GPL(default_backing_dev_info);
27 27  
28 28 static struct class *bdi_class;
  29 +
  30 +/*
  31 + * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as
  32 + * reader side protection for bdi_pending_list. bdi_list has RCU reader side
  33 + * locking.
  34 + */
29 35 DEFINE_SPINLOCK(bdi_lock);
30 36 LIST_HEAD(bdi_list);
31 37 LIST_HEAD(bdi_pending_list);
... ... @@ -284,9 +290,9 @@
284 290 /*
285 291 * Add us to the active bdi_list
286 292 */
287   - spin_lock(&bdi_lock);
288   - list_add(&bdi->bdi_list, &bdi_list);
289   - spin_unlock(&bdi_lock);
  293 + spin_lock_bh(&bdi_lock);
  294 + list_add_rcu(&bdi->bdi_list, &bdi_list);
  295 + spin_unlock_bh(&bdi_lock);
290 296  
291 297 bdi_task_init(bdi, wb);
292 298  
... ... @@ -389,7 +395,7 @@
389 395 if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list))
390 396 wb_do_writeback(me, 0);
391 397  
392   - spin_lock(&bdi_lock);
  398 + spin_lock_bh(&bdi_lock);
393 399  
394 400 /*
395 401 * Check if any existing bdi's have dirty data without
... ... @@ -410,7 +416,7 @@
410 416 if (list_empty(&bdi_pending_list)) {
411 417 unsigned long wait;
412 418  
413   - spin_unlock(&bdi_lock);
  419 + spin_unlock_bh(&bdi_lock);
414 420 wait = msecs_to_jiffies(dirty_writeback_interval * 10);
415 421 schedule_timeout(wait);
416 422 try_to_freeze();
... ... @@ -426,7 +432,7 @@
426 432 bdi = list_entry(bdi_pending_list.next, struct backing_dev_info,
427 433 bdi_list);
428 434 list_del_init(&bdi->bdi_list);
429   - spin_unlock(&bdi_lock);
  435 + spin_unlock_bh(&bdi_lock);
430 436  
431 437 wb = &bdi->wb;
432 438 wb->task = kthread_run(bdi_start_fn, wb, "flush-%s",
433 439  
... ... @@ -445,9 +451,9 @@
445 451 * a chance to flush other bdi's to free
446 452 * memory.
447 453 */
448   - spin_lock(&bdi_lock);
  454 + spin_lock_bh(&bdi_lock);
449 455 list_add_tail(&bdi->bdi_list, &bdi_pending_list);
450   - spin_unlock(&bdi_lock);
  456 + spin_unlock_bh(&bdi_lock);
451 457  
452 458 bdi_flush_io(bdi);
453 459 }
... ... @@ -456,6 +462,24 @@
456 462 return 0;
457 463 }
458 464  
  465 +static void bdi_add_to_pending(struct rcu_head *head)
  466 +{
  467 + struct backing_dev_info *bdi;
  468 +
  469 + bdi = container_of(head, struct backing_dev_info, rcu_head);
  470 + INIT_LIST_HEAD(&bdi->bdi_list);
  471 +
  472 + spin_lock(&bdi_lock);
  473 + list_add_tail(&bdi->bdi_list, &bdi_pending_list);
  474 + spin_unlock(&bdi_lock);
  475 +
  476 + /*
  477 + * We are now on the pending list, wake up bdi_forker_task()
  478 + * to finish the job and add us back to the active bdi_list
  479 + */
  480 + wake_up_process(default_backing_dev_info.wb.task);
  481 +}
  482 +
459 483 /*
460 484 * Add the default flusher task that gets created for any bdi
461 485 * that has dirty data pending writeout
462 486  
463 487  
464 488  
... ... @@ -478,16 +502,29 @@
478 502 * waiting for previous additions to finish.
479 503 */
480 504 if (!test_and_set_bit(BDI_pending, &bdi->state)) {
481   - list_move_tail(&bdi->bdi_list, &bdi_pending_list);
  505 + list_del_rcu(&bdi->bdi_list);
482 506  
483 507 /*
484   - * We are now on the pending list, wake up bdi_forker_task()
485   - * to finish the job and add us back to the active bdi_list
  508 + * We must wait for the current RCU period to end before
  509 + * moving to the pending list. So schedule that operation
  510 + * from an RCU callback.
486 511 */
487   - wake_up_process(default_backing_dev_info.wb.task);
  512 + call_rcu(&bdi->rcu_head, bdi_add_to_pending);
488 513 }
489 514 }
490 515  
  516 +/*
  517 + * Remove bdi from bdi_list, and ensure that it is no longer visible
  518 + */
  519 +static void bdi_remove_from_list(struct backing_dev_info *bdi)
  520 +{
  521 + spin_lock_bh(&bdi_lock);
  522 + list_del_rcu(&bdi->bdi_list);
  523 + spin_unlock_bh(&bdi_lock);
  524 +
  525 + synchronize_rcu();
  526 +}
  527 +
491 528 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
492 529 const char *fmt, ...)
493 530 {
... ... @@ -506,9 +543,9 @@
506 543 goto exit;
507 544 }
508 545  
509   - spin_lock(&bdi_lock);
510   - list_add_tail(&bdi->bdi_list, &bdi_list);
511   - spin_unlock(&bdi_lock);
  546 + spin_lock_bh(&bdi_lock);
  547 + list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
  548 + spin_unlock_bh(&bdi_lock);
512 549  
513 550 bdi->dev = dev;
514 551  
... ... @@ -526,9 +563,7 @@
526 563 wb->task = NULL;
527 564 ret = -ENOMEM;
528 565  
529   - spin_lock(&bdi_lock);
530   - list_del(&bdi->bdi_list);
531   - spin_unlock(&bdi_lock);
  566 + bdi_remove_from_list(bdi);
532 567 goto exit;
533 568 }
534 569 }
... ... @@ -565,9 +600,7 @@
565 600 /*
566 601 * Make sure nobody finds us on the bdi_list anymore
567 602 */
568   - spin_lock(&bdi_lock);
569   - list_del(&bdi->bdi_list);
570   - spin_unlock(&bdi_lock);
  603 + bdi_remove_from_list(bdi);
571 604  
572 605 /*
573 606 * Finally, kill the kernel threads. We don't need to be RCU
... ... @@ -599,6 +632,7 @@
599 632 bdi->max_ratio = 100;
600 633 bdi->max_prop_frac = PROP_FRAC_BASE;
601 634 spin_lock_init(&bdi->wb_lock);
  635 + INIT_RCU_HEAD(&bdi->rcu_head);
602 636 INIT_LIST_HEAD(&bdi->bdi_list);
603 637 INIT_LIST_HEAD(&bdi->wb_list);
604 638 INIT_LIST_HEAD(&bdi->work_list);
... ... @@ -315,7 +315,7 @@
315 315 {
316 316 int ret = 0;
317 317  
318   - spin_lock(&bdi_lock);
  318 + spin_lock_bh(&bdi_lock);
319 319 if (min_ratio > bdi->max_ratio) {
320 320 ret = -EINVAL;
321 321 } else {
... ... @@ -327,7 +327,7 @@
327 327 ret = -EINVAL;
328 328 }
329 329 }
330   - spin_unlock(&bdi_lock);
  330 + spin_unlock_bh(&bdi_lock);
331 331  
332 332 return ret;
333 333 }
334 334  
... ... @@ -339,14 +339,14 @@
339 339 if (max_ratio > 100)
340 340 return -EINVAL;
341 341  
342   - spin_lock(&bdi_lock);
  342 + spin_lock_bh(&bdi_lock);
343 343 if (bdi->min_ratio > max_ratio) {
344 344 ret = -EINVAL;
345 345 } else {
346 346 bdi->max_ratio = max_ratio;
347 347 bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
348 348 }
349   - spin_unlock(&bdi_lock);
  349 + spin_unlock_bh(&bdi_lock);
350 350  
351 351 return ret;
352 352 }