Commit 9f9c19ec1a59422c7687b11847ed3408128aa0d6

Authored by Christoph Hellwig
Committed by Ben Myers
1 parent c29f7d457a

xfs: fix the logspace waiting algorithm

Apply the scheme used in log_regrant_write_log_space to wake up any other
threads waiting for log space before the newly added one to
log_regrant_write_log_space as well, and factor the code into readable
helpers.  For each of the queues we have add two helpers:

 - one to try to wake up all waiting threads.  This helper will also be
   usable by xfs_log_move_tail once we remove the current opportunistic
   wakeups in it.
 - one to sleep on t_wait until enough log space is available, loosely
   modelled after Linux waitqueues.

And use them to reimplement the guts of log_regrant_write_log_space and
log_regrant_write_log_space.  These two function now use one and the same
algorithm for waiting on log space instead of subtly different ones before,
with an option to completely unify them in the near future.

Also move the filesystem shutdown handling to the common caller given
that we had to touch it anyway.

Based on hard debugging and an earlier patch from
Chandra Seetharaman <sekharan@us.ibm.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chandra Seetharaman <sekharan@us.ibm.com>
Tested-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

Showing 2 changed files with 177 additions and 183 deletions Side-by-side Diff

... ... @@ -150,6 +150,117 @@
150 150 } while (head_val != old);
151 151 }
152 152  
  153 +STATIC bool
  154 +xlog_reserveq_wake(
  155 + struct log *log,
  156 + int *free_bytes)
  157 +{
  158 + struct xlog_ticket *tic;
  159 + int need_bytes;
  160 +
  161 + list_for_each_entry(tic, &log->l_reserveq, t_queue) {
  162 + if (tic->t_flags & XLOG_TIC_PERM_RESERV)
  163 + need_bytes = tic->t_unit_res * tic->t_cnt;
  164 + else
  165 + need_bytes = tic->t_unit_res;
  166 +
  167 + if (*free_bytes < need_bytes)
  168 + return false;
  169 + *free_bytes -= need_bytes;
  170 +
  171 + trace_xfs_log_grant_wake_up(log, tic);
  172 + wake_up(&tic->t_wait);
  173 + }
  174 +
  175 + return true;
  176 +}
  177 +
  178 +STATIC bool
  179 +xlog_writeq_wake(
  180 + struct log *log,
  181 + int *free_bytes)
  182 +{
  183 + struct xlog_ticket *tic;
  184 + int need_bytes;
  185 +
  186 + list_for_each_entry(tic, &log->l_writeq, t_queue) {
  187 + ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
  188 +
  189 + need_bytes = tic->t_unit_res;
  190 +
  191 + if (*free_bytes < need_bytes)
  192 + return false;
  193 + *free_bytes -= need_bytes;
  194 +
  195 + trace_xfs_log_regrant_write_wake_up(log, tic);
  196 + wake_up(&tic->t_wait);
  197 + }
  198 +
  199 + return true;
  200 +}
  201 +
  202 +STATIC int
  203 +xlog_reserveq_wait(
  204 + struct log *log,
  205 + struct xlog_ticket *tic,
  206 + int need_bytes)
  207 +{
  208 + list_add_tail(&tic->t_queue, &log->l_reserveq);
  209 +
  210 + do {
  211 + if (XLOG_FORCED_SHUTDOWN(log))
  212 + goto shutdown;
  213 + xlog_grant_push_ail(log, need_bytes);
  214 +
  215 + XFS_STATS_INC(xs_sleep_logspace);
  216 + trace_xfs_log_grant_sleep(log, tic);
  217 +
  218 + xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
  219 + trace_xfs_log_grant_wake(log, tic);
  220 +
  221 + spin_lock(&log->l_grant_reserve_lock);
  222 + if (XLOG_FORCED_SHUTDOWN(log))
  223 + goto shutdown;
  224 + } while (xlog_space_left(log, &log->l_grant_reserve_head) < need_bytes);
  225 +
  226 + list_del_init(&tic->t_queue);
  227 + return 0;
  228 +shutdown:
  229 + list_del_init(&tic->t_queue);
  230 + return XFS_ERROR(EIO);
  231 +}
  232 +
  233 +STATIC int
  234 +xlog_writeq_wait(
  235 + struct log *log,
  236 + struct xlog_ticket *tic,
  237 + int need_bytes)
  238 +{
  239 + list_add_tail(&tic->t_queue, &log->l_writeq);
  240 +
  241 + do {
  242 + if (XLOG_FORCED_SHUTDOWN(log))
  243 + goto shutdown;
  244 + xlog_grant_push_ail(log, need_bytes);
  245 +
  246 + XFS_STATS_INC(xs_sleep_logspace);
  247 + trace_xfs_log_regrant_write_sleep(log, tic);
  248 +
  249 + xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
  250 + trace_xfs_log_regrant_write_wake(log, tic);
  251 +
  252 + spin_lock(&log->l_grant_write_lock);
  253 + if (XLOG_FORCED_SHUTDOWN(log))
  254 + goto shutdown;
  255 + } while (xlog_space_left(log, &log->l_grant_write_head) < need_bytes);
  256 +
  257 + list_del_init(&tic->t_queue);
  258 + return 0;
  259 +shutdown:
  260 + list_del_init(&tic->t_queue);
  261 + return XFS_ERROR(EIO);
  262 +}
  263 +
153 264 static void
154 265 xlog_tic_reset_res(xlog_ticket_t *tic)
155 266 {
156 267  
... ... @@ -350,8 +461,19 @@
350 461 retval = xlog_grant_log_space(log, internal_ticket);
351 462 }
352 463  
  464 + if (unlikely(retval)) {
  465 + /*
  466 + * If we are failing, make sure the ticket doesn't have any
  467 + * current reservations. We don't want to add this back
  468 + * when the ticket/ transaction gets cancelled.
  469 + */
  470 + internal_ticket->t_curr_res = 0;
  471 + /* ungrant will give back unit_res * t_cnt. */
  472 + internal_ticket->t_cnt = 0;
  473 + }
  474 +
353 475 return retval;
354   -} /* xfs_log_reserve */
  476 +}
355 477  
356 478  
357 479 /*
... ... @@ -2481,8 +2603,8 @@
2481 2603 /*
2482 2604 * Atomically get the log space required for a log ticket.
2483 2605 *
2484   - * Once a ticket gets put onto the reserveq, it will only return after
2485   - * the needed reservation is satisfied.
  2606 + * Once a ticket gets put onto the reserveq, it will only return after the
  2607 + * needed reservation is satisfied.
2486 2608 *
2487 2609 * This function is structured so that it has a lock free fast path. This is
2488 2610 * necessary because every new transaction reservation will come through this
2489 2611  
2490 2612  
2491 2613  
2492 2614  
2493 2615  
2494 2616  
2495 2617  
2496 2618  
2497 2619  
2498 2620  
2499 2621  
... ... @@ -2490,114 +2612,54 @@
2490 2612 * every pass.
2491 2613 *
2492 2614 * As tickets are only ever moved on and off the reserveq under the
2493   - * l_grant_reserve_lock, we only need to take that lock if we are going
2494   - * to add the ticket to the queue and sleep. We can avoid taking the lock if the
2495   - * ticket was never added to the reserveq because the t_queue list head will be
2496   - * empty and we hold the only reference to it so it can safely be checked
2497   - * unlocked.
  2615 + * l_grant_reserve_lock, we only need to take that lock if we are going to add
  2616 + * the ticket to the queue and sleep. We can avoid taking the lock if the ticket
  2617 + * was never added to the reserveq because the t_queue list head will be empty
  2618 + * and we hold the only reference to it so it can safely be checked unlocked.
2498 2619 */
2499 2620 STATIC int
2500   -xlog_grant_log_space(xlog_t *log,
2501   - xlog_ticket_t *tic)
  2621 +xlog_grant_log_space(
  2622 + struct log *log,
  2623 + struct xlog_ticket *tic)
2502 2624 {
2503   - int free_bytes;
2504   - int need_bytes;
  2625 + int free_bytes, need_bytes;
  2626 + int error = 0;
2505 2627  
2506   -#ifdef DEBUG
2507   - if (log->l_flags & XLOG_ACTIVE_RECOVERY)
2508   - panic("grant Recovery problem");
2509   -#endif
  2628 + ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
2510 2629  
2511 2630 trace_xfs_log_grant_enter(log, tic);
2512 2631  
  2632 + /*
  2633 + * If there are other waiters on the queue then give them a chance at
  2634 + * logspace before us. Wake up the first waiters, if we do not wake
  2635 + * up all the waiters then go to sleep waiting for more free space,
  2636 + * otherwise try to get some space for this transaction.
  2637 + */
2513 2638 need_bytes = tic->t_unit_res;
2514 2639 if (tic->t_flags & XFS_LOG_PERM_RESERV)
2515 2640 need_bytes *= tic->t_ocnt;
2516   -
2517   - /* something is already sleeping; insert new transaction at end */
  2641 + free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
2518 2642 if (!list_empty_careful(&log->l_reserveq)) {
2519 2643 spin_lock(&log->l_grant_reserve_lock);
2520   - /* recheck the queue now we are locked */
2521   - if (list_empty(&log->l_reserveq)) {
2522   - spin_unlock(&log->l_grant_reserve_lock);
2523   - goto redo;
2524   - }
2525   - list_add_tail(&tic->t_queue, &log->l_reserveq);
2526   -
2527   - trace_xfs_log_grant_sleep1(log, tic);
2528   -
2529   - /*
2530   - * Gotta check this before going to sleep, while we're
2531   - * holding the grant lock.
2532   - */
2533   - if (XLOG_FORCED_SHUTDOWN(log))
2534   - goto error_return;
2535   -
2536   - XFS_STATS_INC(xs_sleep_logspace);
2537   - xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
2538   -
2539   - /*
2540   - * If we got an error, and the filesystem is shutting down,
2541   - * we'll catch it down below. So just continue...
2542   - */
2543   - trace_xfs_log_grant_wake1(log, tic);
2544   - }
2545   -
2546   -redo:
2547   - if (XLOG_FORCED_SHUTDOWN(log))
2548   - goto error_return_unlocked;
2549   -
2550   - free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
2551   - if (free_bytes < need_bytes) {
  2644 + if (!xlog_reserveq_wake(log, &free_bytes) ||
  2645 + free_bytes < need_bytes)
  2646 + error = xlog_reserveq_wait(log, tic, need_bytes);
  2647 + spin_unlock(&log->l_grant_reserve_lock);
  2648 + } else if (free_bytes < need_bytes) {
2552 2649 spin_lock(&log->l_grant_reserve_lock);
2553   - if (list_empty(&tic->t_queue))
2554   - list_add_tail(&tic->t_queue, &log->l_reserveq);
2555   -
2556   - trace_xfs_log_grant_sleep2(log, tic);
2557   -
2558   - if (XLOG_FORCED_SHUTDOWN(log))
2559   - goto error_return;
2560   -
2561   - xlog_grant_push_ail(log, need_bytes);
2562   -
2563   - XFS_STATS_INC(xs_sleep_logspace);
2564   - xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
2565   -
2566   - trace_xfs_log_grant_wake2(log, tic);
2567   - goto redo;
2568   - }
2569   -
2570   - if (!list_empty(&tic->t_queue)) {
2571   - spin_lock(&log->l_grant_reserve_lock);
2572   - list_del_init(&tic->t_queue);
  2650 + error = xlog_reserveq_wait(log, tic, need_bytes);
2573 2651 spin_unlock(&log->l_grant_reserve_lock);
2574 2652 }
  2653 + if (error)
  2654 + return error;
2575 2655  
2576   - /* we've got enough space */
2577 2656 xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
2578 2657 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2579 2658 trace_xfs_log_grant_exit(log, tic);
2580 2659 xlog_verify_grant_tail(log);
2581 2660 return 0;
  2661 +}
2582 2662  
2583   -error_return_unlocked:
2584   - spin_lock(&log->l_grant_reserve_lock);
2585   -error_return:
2586   - list_del_init(&tic->t_queue);
2587   - spin_unlock(&log->l_grant_reserve_lock);
2588   - trace_xfs_log_grant_error(log, tic);
2589   -
2590   - /*
2591   - * If we are failing, make sure the ticket doesn't have any
2592   - * current reservations. We don't want to add this back when
2593   - * the ticket/transaction gets cancelled.
2594   - */
2595   - tic->t_curr_res = 0;
2596   - tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
2597   - return XFS_ERROR(EIO);
2598   -} /* xlog_grant_log_space */
2599   -
2600   -
2601 2663 /*
2602 2664 * Replenish the byte reservation required by moving the grant write head.
2603 2665 *
2604 2666  
... ... @@ -2605,10 +2667,12 @@
2605 2667 * free fast path.
2606 2668 */
2607 2669 STATIC int
2608   -xlog_regrant_write_log_space(xlog_t *log,
2609   - xlog_ticket_t *tic)
  2670 +xlog_regrant_write_log_space(
  2671 + struct log *log,
  2672 + struct xlog_ticket *tic)
2610 2673 {
2611   - int free_bytes, need_bytes;
  2674 + int free_bytes, need_bytes;
  2675 + int error = 0;
2612 2676  
2613 2677 tic->t_curr_res = tic->t_unit_res;
2614 2678 xlog_tic_reset_res(tic);
2615 2679  
2616 2680  
2617 2681  
2618 2682  
2619 2683  
2620 2684  
2621 2685  
2622 2686  
... ... @@ -2616,104 +2680,38 @@
2616 2680 if (tic->t_cnt > 0)
2617 2681 return 0;
2618 2682  
2619   -#ifdef DEBUG
2620   - if (log->l_flags & XLOG_ACTIVE_RECOVERY)
2621   - panic("regrant Recovery problem");
2622   -#endif
  2683 + ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
2623 2684  
2624 2685 trace_xfs_log_regrant_write_enter(log, tic);
2625   - if (XLOG_FORCED_SHUTDOWN(log))
2626   - goto error_return_unlocked;
2627 2686  
2628   - /* If there are other waiters on the queue then give them a
2629   - * chance at logspace before us. Wake up the first waiters,
2630   - * if we do not wake up all the waiters then go to sleep waiting
2631   - * for more free space, otherwise try to get some space for
2632   - * this transaction.
  2687 + /*
  2688 + * If there are other waiters on the queue then give them a chance at
  2689 + * logspace before us. Wake up the first waiters, if we do not wake
  2690 + * up all the waiters then go to sleep waiting for more free space,
  2691 + * otherwise try to get some space for this transaction.
2633 2692 */
2634 2693 need_bytes = tic->t_unit_res;
  2694 + free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2635 2695 if (!list_empty_careful(&log->l_writeq)) {
2636   - struct xlog_ticket *ntic;
2637   -
2638 2696 spin_lock(&log->l_grant_write_lock);
2639   - free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2640   - list_for_each_entry(ntic, &log->l_writeq, t_queue) {
2641   - ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
2642   -
2643   - if (free_bytes < ntic->t_unit_res)
2644   - break;
2645   - free_bytes -= ntic->t_unit_res;
2646   - wake_up(&ntic->t_wait);
2647   - }
2648   -
2649   - if (ntic != list_first_entry(&log->l_writeq,
2650   - struct xlog_ticket, t_queue)) {
2651   - if (list_empty(&tic->t_queue))
2652   - list_add_tail(&tic->t_queue, &log->l_writeq);
2653   - trace_xfs_log_regrant_write_sleep1(log, tic);
2654   -
2655   - xlog_grant_push_ail(log, need_bytes);
2656   -
2657   - XFS_STATS_INC(xs_sleep_logspace);
2658   - xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
2659   - trace_xfs_log_regrant_write_wake1(log, tic);
2660   - } else
2661   - spin_unlock(&log->l_grant_write_lock);
2662   - }
2663   -
2664   -redo:
2665   - if (XLOG_FORCED_SHUTDOWN(log))
2666   - goto error_return_unlocked;
2667   -
2668   - free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2669   - if (free_bytes < need_bytes) {
  2697 + if (!xlog_writeq_wake(log, &free_bytes) ||
  2698 + free_bytes < need_bytes)
  2699 + error = xlog_writeq_wait(log, tic, need_bytes);
  2700 + spin_unlock(&log->l_grant_write_lock);
  2701 + } else if (free_bytes < need_bytes) {
2670 2702 spin_lock(&log->l_grant_write_lock);
2671   - if (list_empty(&tic->t_queue))
2672   - list_add_tail(&tic->t_queue, &log->l_writeq);
2673   -
2674   - if (XLOG_FORCED_SHUTDOWN(log))
2675   - goto error_return;
2676   -
2677   - xlog_grant_push_ail(log, need_bytes);
2678   -
2679   - XFS_STATS_INC(xs_sleep_logspace);
2680   - trace_xfs_log_regrant_write_sleep2(log, tic);
2681   - xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
2682   -
2683   - trace_xfs_log_regrant_write_wake2(log, tic);
2684   - goto redo;
2685   - }
2686   -
2687   - if (!list_empty(&tic->t_queue)) {
2688   - spin_lock(&log->l_grant_write_lock);
2689   - list_del_init(&tic->t_queue);
  2703 + error = xlog_writeq_wait(log, tic, need_bytes);
2690 2704 spin_unlock(&log->l_grant_write_lock);
2691 2705 }
2692 2706  
2693   - /* we've got enough space */
  2707 + if (error)
  2708 + return error;
  2709 +
2694 2710 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2695 2711 trace_xfs_log_regrant_write_exit(log, tic);
2696 2712 xlog_verify_grant_tail(log);
2697 2713 return 0;
2698   -
2699   -
2700   - error_return_unlocked:
2701   - spin_lock(&log->l_grant_write_lock);
2702   - error_return:
2703   - list_del_init(&tic->t_queue);
2704   - spin_unlock(&log->l_grant_write_lock);
2705   - trace_xfs_log_regrant_write_error(log, tic);
2706   -
2707   - /*
2708   - * If we are failing, make sure the ticket doesn't have any
2709   - * current reservations. We don't want to add this back when
2710   - * the ticket/transaction gets cancelled.
2711   - */
2712   - tic->t_curr_res = 0;
2713   - tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
2714   - return XFS_ERROR(EIO);
2715   -} /* xlog_regrant_write_log_space */
2716   -
  2714 +}
2717 2715  
2718 2716 /* The first cnt-1 times through here we don't need to
2719 2717 * move the grant write head because the permanent
... ... @@ -834,18 +834,14 @@
834 834 DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
835 835 DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
836 836 DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
837   -DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
838   -DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
839   -DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
840   -DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
  837 +DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep);
  838 +DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake);
841 839 DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
842 840 DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
843 841 DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
844 842 DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
845   -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
846   -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
847   -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
848   -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
  843 +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep);
  844 +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake);
849 845 DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
850 846 DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
851 847 DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);