Commit 4a1e001d2bb75c47a9cdbbfb66ae51daff1ddcba

Authored by Ingo Molnar

Merge branch 'rcu/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/paulm…

…ck/linux-rcu into core/urgent

Merge RCU fixes from Paul E. McKenney:

 " This series has four patches, the major point of which is to eliminate
   some slowdowns (including boot-time slowdowns) resulting from some
   RCU_FAST_NO_HZ changes.  The issue with the changes is that posting timers
   from the idle loop has no effect if the CPU has entered dyntick-idle
   mode because the CPU has already computed its wakeup time, and posting
   a timer does not cause it to be recomputed.  The short-term fix is for
   RCU to precompute the timeout value so that the CPU's calculation is
   correct. "

Signed-off-by: Ingo Molnar <mingo@kernel.org>

Showing 7 changed files Side-by-side Diff

include/linux/rcutiny.h
... ... @@ -87,8 +87,9 @@
87 87  
88 88 #ifdef CONFIG_TINY_RCU
89 89  
90   -static inline int rcu_needs_cpu(int cpu)
  90 +static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
91 91 {
  92 + *delta_jiffies = ULONG_MAX;
92 93 return 0;
93 94 }
94 95  
95 96  
... ... @@ -96,8 +97,9 @@
96 97  
97 98 int rcu_preempt_needs_cpu(void);
98 99  
99   -static inline int rcu_needs_cpu(int cpu)
  100 +static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
100 101 {
  102 + *delta_jiffies = ULONG_MAX;
101 103 return rcu_preempt_needs_cpu();
102 104 }
103 105  
include/linux/rcutree.h
... ... @@ -32,7 +32,7 @@
32 32  
33 33 extern void rcu_init(void);
34 34 extern void rcu_note_context_switch(int cpu);
35   -extern int rcu_needs_cpu(int cpu);
  35 +extern int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies);
36 36 extern void rcu_cpu_stall_reset(void);
37 37  
38 38 /*
include/trace/events/rcu.h
... ... @@ -289,6 +289,7 @@
289 289 * "In holdoff": Nothing to do, holding off after unsuccessful attempt.
290 290 * "Begin holdoff": Attempt failed, don't retry until next jiffy.
291 291 * "Dyntick with callbacks": Entering dyntick-idle despite callbacks.
  292 + * "Dyntick with lazy callbacks": Entering dyntick-idle w/lazy callbacks.
292 293 * "More callbacks": Still more callbacks, try again to clear them out.
293 294 * "Callbacks drained": All callbacks processed, off to dyntick idle!
294 295 * "Timer": Timer fired to cause CPU to continue processing callbacks.
... ... @@ -1397,6 +1397,8 @@
1397 1397 rdp->qlen_lazy += rsp->qlen_lazy;
1398 1398 rdp->qlen += rsp->qlen;
1399 1399 rdp->n_cbs_adopted += rsp->qlen;
  1400 + if (rsp->qlen_lazy != rsp->qlen)
  1401 + rcu_idle_count_callbacks_posted();
1400 1402 rsp->qlen_lazy = 0;
1401 1403 rsp->qlen = 0;
1402 1404  
... ... @@ -84,6 +84,20 @@
84 84 /* Process level is worth LLONG_MAX/2. */
85 85 int dynticks_nmi_nesting; /* Track NMI nesting level. */
86 86 atomic_t dynticks; /* Even value for idle, else odd. */
  87 +#ifdef CONFIG_RCU_FAST_NO_HZ
  88 + int dyntick_drain; /* Prepare-for-idle state variable. */
  89 + unsigned long dyntick_holdoff;
  90 + /* No retries for the jiffy of failure. */
  91 + struct timer_list idle_gp_timer;
  92 + /* Wake up CPU sleeping with callbacks. */
  93 + unsigned long idle_gp_timer_expires;
  94 + /* When to wake up CPU (for repost). */
  95 + bool idle_first_pass; /* First pass of attempt to go idle? */
  96 + unsigned long nonlazy_posted;
  97 + /* # times non-lazy CBs posted to CPU. */
  98 + unsigned long nonlazy_posted_snap;
  99 + /* idle-period nonlazy_posted snapshot. */
  100 +#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
87 101 };
88 102  
89 103 /* RCU's kthread states for tracing. */
kernel/rcutree_plugin.h
... ... @@ -1886,8 +1886,9 @@
1886 1886 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
1887 1887 * any flavor of RCU.
1888 1888 */
1889   -int rcu_needs_cpu(int cpu)
  1889 +int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
1890 1890 {
  1891 + *delta_jiffies = ULONG_MAX;
1891 1892 return rcu_cpu_has_callbacks(cpu);
1892 1893 }
1893 1894  
1894 1895  
... ... @@ -1962,42 +1963,7 @@
1962 1963 #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */
1963 1964 #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
1964 1965  
1965   -/* Loop counter for rcu_prepare_for_idle(). */
1966   -static DEFINE_PER_CPU(int, rcu_dyntick_drain);
1967   -/* If rcu_dyntick_holdoff==jiffies, don't try to enter dyntick-idle mode. */
1968   -static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
1969   -/* Timer to awaken the CPU if it enters dyntick-idle mode with callbacks. */
1970   -static DEFINE_PER_CPU(struct timer_list, rcu_idle_gp_timer);
1971   -/* Scheduled expiry time for rcu_idle_gp_timer to allow reposting. */
1972   -static DEFINE_PER_CPU(unsigned long, rcu_idle_gp_timer_expires);
1973   -/* Enable special processing on first attempt to enter dyntick-idle mode. */
1974   -static DEFINE_PER_CPU(bool, rcu_idle_first_pass);
1975   -/* Running count of non-lazy callbacks posted, never decremented. */
1976   -static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted);
1977   -/* Snapshot of rcu_nonlazy_posted to detect meaningful exits from idle. */
1978   -static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted_snap);
1979   -
1980 1966 /*
1981   - * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
1982   - * callbacks on this CPU, (2) this CPU has not yet attempted to enter
1983   - * dyntick-idle mode, or (3) this CPU is in the process of attempting to
1984   - * enter dyntick-idle mode. Otherwise, if we have recently tried and failed
1985   - * to enter dyntick-idle mode, we refuse to try to enter it. After all,
1986   - * it is better to incur scheduling-clock interrupts than to spin
1987   - * continuously for the same time duration!
1988   - */
1989   -int rcu_needs_cpu(int cpu)
1990   -{
1991   - /* Flag a new idle sojourn to the idle-entry state machine. */
1992   - per_cpu(rcu_idle_first_pass, cpu) = 1;
1993   - /* If no callbacks, RCU doesn't need the CPU. */
1994   - if (!rcu_cpu_has_callbacks(cpu))
1995   - return 0;
1996   - /* Otherwise, RCU needs the CPU only if it recently tried and failed. */
1997   - return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies;
1998   -}
1999   -
2000   -/*
2001 1967 * Does the specified flavor of RCU have non-lazy callbacks pending on
2002 1968 * the specified CPU? Both RCU flavor and CPU are specified by the
2003 1969 * rcu_data structure.
... ... @@ -2040,6 +2006,47 @@
2040 2006 }
2041 2007  
2042 2008 /*
  2009 + * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
  2010 + * callbacks on this CPU, (2) this CPU has not yet attempted to enter
  2011 + * dyntick-idle mode, or (3) this CPU is in the process of attempting to
  2012 + * enter dyntick-idle mode. Otherwise, if we have recently tried and failed
  2013 + * to enter dyntick-idle mode, we refuse to try to enter it. After all,
  2014 + * it is better to incur scheduling-clock interrupts than to spin
  2015 + * continuously for the same time duration!
  2016 + *
  2017 + * The delta_jiffies argument is used to store the time when RCU is
  2018 + * going to need the CPU again if it still has callbacks. The reason
  2019 + * for this is that rcu_prepare_for_idle() might need to post a timer,
  2020 + * but if so, it will do so after tick_nohz_stop_sched_tick() has set
  2021 + * the wakeup time for this CPU. This means that RCU's timer can be
  2022 + * delayed until the wakeup time, which defeats the purpose of posting
  2023 + * a timer.
  2024 + */
  2025 +int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
  2026 +{
  2027 + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
  2028 +
  2029 + /* Flag a new idle sojourn to the idle-entry state machine. */
  2030 + rdtp->idle_first_pass = 1;
  2031 + /* If no callbacks, RCU doesn't need the CPU. */
  2032 + if (!rcu_cpu_has_callbacks(cpu)) {
  2033 + *delta_jiffies = ULONG_MAX;
  2034 + return 0;
  2035 + }
  2036 + if (rdtp->dyntick_holdoff == jiffies) {
  2037 + /* RCU recently tried and failed, so don't try again. */
  2038 + *delta_jiffies = 1;
  2039 + return 1;
  2040 + }
  2041 + /* Set up for the possibility that RCU will post a timer. */
  2042 + if (rcu_cpu_has_nonlazy_callbacks(cpu))
  2043 + *delta_jiffies = RCU_IDLE_GP_DELAY;
  2044 + else
  2045 + *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY;
  2046 + return 0;
  2047 +}
  2048 +
  2049 +/*
2043 2050 * Handler for smp_call_function_single(). The only point of this
2044 2051 * handler is to wake the CPU up, so the handler does only tracing.
2045 2052 */
2046 2053  
2047 2054  
... ... @@ -2075,21 +2082,24 @@
2075 2082 */
2076 2083 static void rcu_prepare_for_idle_init(int cpu)
2077 2084 {
2078   - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
2079   - setup_timer(&per_cpu(rcu_idle_gp_timer, cpu),
2080   - rcu_idle_gp_timer_func, cpu);
2081   - per_cpu(rcu_idle_gp_timer_expires, cpu) = jiffies - 1;
2082   - per_cpu(rcu_idle_first_pass, cpu) = 1;
  2085 + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
  2086 +
  2087 + rdtp->dyntick_holdoff = jiffies - 1;
  2088 + setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu);
  2089 + rdtp->idle_gp_timer_expires = jiffies - 1;
  2090 + rdtp->idle_first_pass = 1;
2083 2091 }
2084 2092  
2085 2093 /*
2086 2094 * Clean up for exit from idle. Because we are exiting from idle, there
2087   - * is no longer any point to rcu_idle_gp_timer, so cancel it. This will
  2095 + * is no longer any point to ->idle_gp_timer, so cancel it. This will
2088 2096 * do nothing if this timer is not active, so just cancel it unconditionally.
2089 2097 */
2090 2098 static void rcu_cleanup_after_idle(int cpu)
2091 2099 {
2092   - del_timer(&per_cpu(rcu_idle_gp_timer, cpu));
  2100 + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
  2101 +
  2102 + del_timer(&rdtp->idle_gp_timer);
2093 2103 trace_rcu_prep_idle("Cleanup after idle");
2094 2104 }
2095 2105  
2096 2106  
2097 2107  
2098 2108  
2099 2109  
2100 2110  
2101 2111  
... ... @@ -2108,42 +2118,41 @@
2108 2118 * Because it is not legal to invoke rcu_process_callbacks() with irqs
2109 2119 * disabled, we do one pass of force_quiescent_state(), then do a
2110 2120 * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
2111   - * later. The per-cpu rcu_dyntick_drain variable controls the sequencing.
  2121 + * later. The ->dyntick_drain field controls the sequencing.
2112 2122 *
2113 2123 * The caller must have disabled interrupts.
2114 2124 */
2115 2125 static void rcu_prepare_for_idle(int cpu)
2116 2126 {
2117 2127 struct timer_list *tp;
  2128 + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
2118 2129  
2119 2130 /*
2120 2131 * If this is an idle re-entry, for example, due to use of
2121 2132 * RCU_NONIDLE() or the new idle-loop tracing API within the idle
2122 2133 * loop, then don't take any state-machine actions, unless the
2123 2134 * momentary exit from idle queued additional non-lazy callbacks.
2124   - * Instead, repost the rcu_idle_gp_timer if this CPU has callbacks
  2135 + * Instead, repost the ->idle_gp_timer if this CPU has callbacks
2125 2136 * pending.
2126 2137 */
2127   - if (!per_cpu(rcu_idle_first_pass, cpu) &&
2128   - (per_cpu(rcu_nonlazy_posted, cpu) ==
2129   - per_cpu(rcu_nonlazy_posted_snap, cpu))) {
  2138 + if (!rdtp->idle_first_pass &&
  2139 + (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) {
2130 2140 if (rcu_cpu_has_callbacks(cpu)) {
2131   - tp = &per_cpu(rcu_idle_gp_timer, cpu);
2132   - mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu));
  2141 + tp = &rdtp->idle_gp_timer;
  2142 + mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
2133 2143 }
2134 2144 return;
2135 2145 }
2136   - per_cpu(rcu_idle_first_pass, cpu) = 0;
2137   - per_cpu(rcu_nonlazy_posted_snap, cpu) =
2138   - per_cpu(rcu_nonlazy_posted, cpu) - 1;
  2146 + rdtp->idle_first_pass = 0;
  2147 + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1;
2139 2148  
2140 2149 /*
2141 2150 * If there are no callbacks on this CPU, enter dyntick-idle mode.
2142 2151 * Also reset state to avoid prejudicing later attempts.
2143 2152 */
2144 2153 if (!rcu_cpu_has_callbacks(cpu)) {
2145   - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
2146   - per_cpu(rcu_dyntick_drain, cpu) = 0;
  2154 + rdtp->dyntick_holdoff = jiffies - 1;
  2155 + rdtp->dyntick_drain = 0;
2147 2156 trace_rcu_prep_idle("No callbacks");
2148 2157 return;
2149 2158 }
2150 2159  
2151 2160  
2152 2161  
2153 2162  
2154 2163  
2155 2164  
2156 2165  
... ... @@ -2152,36 +2161,37 @@
2152 2161 * If in holdoff mode, just return. We will presumably have
2153 2162 * refrained from disabling the scheduling-clock tick.
2154 2163 */
2155   - if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) {
  2164 + if (rdtp->dyntick_holdoff == jiffies) {
2156 2165 trace_rcu_prep_idle("In holdoff");
2157 2166 return;
2158 2167 }
2159 2168  
2160   - /* Check and update the rcu_dyntick_drain sequencing. */
2161   - if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
  2169 + /* Check and update the ->dyntick_drain sequencing. */
  2170 + if (rdtp->dyntick_drain <= 0) {
2162 2171 /* First time through, initialize the counter. */
2163   - per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES;
2164   - } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES &&
  2172 + rdtp->dyntick_drain = RCU_IDLE_FLUSHES;
  2173 + } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES &&
2165 2174 !rcu_pending(cpu) &&
2166 2175 !local_softirq_pending()) {
2167 2176 /* Can we go dyntick-idle despite still having callbacks? */
2168   - trace_rcu_prep_idle("Dyntick with callbacks");
2169   - per_cpu(rcu_dyntick_drain, cpu) = 0;
2170   - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
2171   - if (rcu_cpu_has_nonlazy_callbacks(cpu))
2172   - per_cpu(rcu_idle_gp_timer_expires, cpu) =
  2177 + rdtp->dyntick_drain = 0;
  2178 + rdtp->dyntick_holdoff = jiffies;
  2179 + if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
  2180 + trace_rcu_prep_idle("Dyntick with callbacks");
  2181 + rdtp->idle_gp_timer_expires =
2173 2182 jiffies + RCU_IDLE_GP_DELAY;
2174   - else
2175   - per_cpu(rcu_idle_gp_timer_expires, cpu) =
  2183 + } else {
  2184 + rdtp->idle_gp_timer_expires =
2176 2185 jiffies + RCU_IDLE_LAZY_GP_DELAY;
2177   - tp = &per_cpu(rcu_idle_gp_timer, cpu);
2178   - mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu));
2179   - per_cpu(rcu_nonlazy_posted_snap, cpu) =
2180   - per_cpu(rcu_nonlazy_posted, cpu);
  2186 + trace_rcu_prep_idle("Dyntick with lazy callbacks");
  2187 + }
  2188 + tp = &rdtp->idle_gp_timer;
  2189 + mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
  2190 + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
2181 2191 return; /* Nothing more to do immediately. */
2182   - } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
  2192 + } else if (--(rdtp->dyntick_drain) <= 0) {
2183 2193 /* We have hit the limit, so time to give up. */
2184   - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
  2194 + rdtp->dyntick_holdoff = jiffies;
2185 2195 trace_rcu_prep_idle("Begin holdoff");
2186 2196 invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */
2187 2197 return;
... ... @@ -2227,7 +2237,7 @@
2227 2237 */
2228 2238 static void rcu_idle_count_callbacks_posted(void)
2229 2239 {
2230   - __this_cpu_add(rcu_nonlazy_posted, 1);
  2240 + __this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
2231 2241 }
2232 2242  
2233 2243 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
2234 2244  
... ... @@ -2238,11 +2248,12 @@
2238 2248  
2239 2249 static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
2240 2250 {
2241   - struct timer_list *tltp = &per_cpu(rcu_idle_gp_timer, cpu);
  2251 + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
  2252 + struct timer_list *tltp = &rdtp->idle_gp_timer;
2242 2253  
2243 2254 sprintf(cp, "drain=%d %c timer=%lu",
2244   - per_cpu(rcu_dyntick_drain, cpu),
2245   - per_cpu(rcu_dyntick_holdoff, cpu) == jiffies ? 'H' : '.',
  2255 + rdtp->dyntick_drain,
  2256 + rdtp->dyntick_holdoff == jiffies ? 'H' : '.',
2246 2257 timer_pending(tltp) ? tltp->expires - jiffies : -1);
2247 2258 }
2248 2259  
kernel/time/tick-sched.c
... ... @@ -274,6 +274,7 @@
274 274 static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
275 275 {
276 276 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
  277 + unsigned long rcu_delta_jiffies;
277 278 ktime_t last_update, expires, now;
278 279 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
279 280 u64 time_delta;
... ... @@ -322,7 +323,7 @@
322 323 time_delta = timekeeping_max_deferment();
323 324 } while (read_seqretry(&xtime_lock, seq));
324 325  
325   - if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
  326 + if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
326 327 arch_needs_cpu(cpu)) {
327 328 next_jiffies = last_jiffies + 1;
328 329 delta_jiffies = 1;
... ... @@ -330,6 +331,10 @@
330 331 /* Get the next timer wheel timer */
331 332 next_jiffies = get_next_timer_interrupt(last_jiffies);
332 333 delta_jiffies = next_jiffies - last_jiffies;
  334 + if (rcu_delta_jiffies < delta_jiffies) {
  335 + next_jiffies = last_jiffies + rcu_delta_jiffies;
  336 + delta_jiffies = rcu_delta_jiffies;
  337 + }
333 338 }
334 339 /*
335 340 * Do not stop the tick, if we are only one off