Commit 4a1e001d2bb75c47a9cdbbfb66ae51daff1ddcba
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
Merge branch 'rcu/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/paulm…
…ck/linux-rcu into core/urgent Merge RCU fixes from Paul E. McKenney: " This series has four patches, the major point of which is to eliminate some slowdowns (including boot-time slowdowns) resulting from some RCU_FAST_NO_HZ changes. The issue with the changes is that posting timers from the idle loop has no effect if the CPU has entered dyntick-idle mode because the CPU has already computed its wakeup time, and posting a timer does not cause it to be recomputed. The short-term fix is for RCU to precompute the timeout value so that the CPU's calculation is correct. " Signed-off-by: Ingo Molnar <mingo@kernel.org>
Showing 7 changed files Side-by-side Diff
include/linux/rcutiny.h
... | ... | @@ -87,8 +87,9 @@ |
87 | 87 | |
88 | 88 | #ifdef CONFIG_TINY_RCU |
89 | 89 | |
90 | -static inline int rcu_needs_cpu(int cpu) | |
90 | +static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) | |
91 | 91 | { |
92 | + *delta_jiffies = ULONG_MAX; | |
92 | 93 | return 0; |
93 | 94 | } |
94 | 95 | |
95 | 96 | |
... | ... | @@ -96,8 +97,9 @@ |
96 | 97 | |
97 | 98 | int rcu_preempt_needs_cpu(void); |
98 | 99 | |
99 | -static inline int rcu_needs_cpu(int cpu) | |
100 | +static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) | |
100 | 101 | { |
102 | + *delta_jiffies = ULONG_MAX; | |
101 | 103 | return rcu_preempt_needs_cpu(); |
102 | 104 | } |
103 | 105 |
include/linux/rcutree.h
include/trace/events/rcu.h
... | ... | @@ -289,6 +289,7 @@ |
289 | 289 | * "In holdoff": Nothing to do, holding off after unsuccessful attempt. |
290 | 290 | * "Begin holdoff": Attempt failed, don't retry until next jiffy. |
291 | 291 | * "Dyntick with callbacks": Entering dyntick-idle despite callbacks. |
292 | + * "Dyntick with lazy callbacks": Entering dyntick-idle w/lazy callbacks. | |
292 | 293 | * "More callbacks": Still more callbacks, try again to clear them out. |
293 | 294 | * "Callbacks drained": All callbacks processed, off to dyntick idle! |
294 | 295 | * "Timer": Timer fired to cause CPU to continue processing callbacks. |
kernel/rcutree.c
kernel/rcutree.h
... | ... | @@ -84,6 +84,20 @@ |
84 | 84 | /* Process level is worth LLONG_MAX/2. */ |
85 | 85 | int dynticks_nmi_nesting; /* Track NMI nesting level. */ |
86 | 86 | atomic_t dynticks; /* Even value for idle, else odd. */ |
87 | +#ifdef CONFIG_RCU_FAST_NO_HZ | |
88 | + int dyntick_drain; /* Prepare-for-idle state variable. */ | |
89 | + unsigned long dyntick_holdoff; | |
90 | + /* No retries for the jiffy of failure. */ | |
91 | + struct timer_list idle_gp_timer; | |
92 | + /* Wake up CPU sleeping with callbacks. */ | |
93 | + unsigned long idle_gp_timer_expires; | |
94 | + /* When to wake up CPU (for repost). */ | |
95 | + bool idle_first_pass; /* First pass of attempt to go idle? */ | |
96 | + unsigned long nonlazy_posted; | |
97 | + /* # times non-lazy CBs posted to CPU. */ | |
98 | + unsigned long nonlazy_posted_snap; | |
99 | + /* idle-period nonlazy_posted snapshot. */ | |
100 | +#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ | |
87 | 101 | }; |
88 | 102 | |
89 | 103 | /* RCU's kthread states for tracing. */ |
kernel/rcutree_plugin.h
... | ... | @@ -1886,8 +1886,9 @@ |
1886 | 1886 | * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs |
1887 | 1887 | * any flavor of RCU. |
1888 | 1888 | */ |
1889 | -int rcu_needs_cpu(int cpu) | |
1889 | +int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) | |
1890 | 1890 | { |
1891 | + *delta_jiffies = ULONG_MAX; | |
1891 | 1892 | return rcu_cpu_has_callbacks(cpu); |
1892 | 1893 | } |
1893 | 1894 | |
1894 | 1895 | |
... | ... | @@ -1962,42 +1963,7 @@ |
1962 | 1963 | #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ |
1963 | 1964 | #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ |
1964 | 1965 | |
1965 | -/* Loop counter for rcu_prepare_for_idle(). */ | |
1966 | -static DEFINE_PER_CPU(int, rcu_dyntick_drain); | |
1967 | -/* If rcu_dyntick_holdoff==jiffies, don't try to enter dyntick-idle mode. */ | |
1968 | -static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); | |
1969 | -/* Timer to awaken the CPU if it enters dyntick-idle mode with callbacks. */ | |
1970 | -static DEFINE_PER_CPU(struct timer_list, rcu_idle_gp_timer); | |
1971 | -/* Scheduled expiry time for rcu_idle_gp_timer to allow reposting. */ | |
1972 | -static DEFINE_PER_CPU(unsigned long, rcu_idle_gp_timer_expires); | |
1973 | -/* Enable special processing on first attempt to enter dyntick-idle mode. */ | |
1974 | -static DEFINE_PER_CPU(bool, rcu_idle_first_pass); | |
1975 | -/* Running count of non-lazy callbacks posted, never decremented. */ | |
1976 | -static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted); | |
1977 | -/* Snapshot of rcu_nonlazy_posted to detect meaningful exits from idle. */ | |
1978 | -static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted_snap); | |
1979 | - | |
1980 | 1966 | /* |
1981 | - * Allow the CPU to enter dyntick-idle mode if either: (1) There are no | |
1982 | - * callbacks on this CPU, (2) this CPU has not yet attempted to enter | |
1983 | - * dyntick-idle mode, or (3) this CPU is in the process of attempting to | |
1984 | - * enter dyntick-idle mode. Otherwise, if we have recently tried and failed | |
1985 | - * to enter dyntick-idle mode, we refuse to try to enter it. After all, | |
1986 | - * it is better to incur scheduling-clock interrupts than to spin | |
1987 | - * continuously for the same time duration! | |
1988 | - */ | |
1989 | -int rcu_needs_cpu(int cpu) | |
1990 | -{ | |
1991 | - /* Flag a new idle sojourn to the idle-entry state machine. */ | |
1992 | - per_cpu(rcu_idle_first_pass, cpu) = 1; | |
1993 | - /* If no callbacks, RCU doesn't need the CPU. */ | |
1994 | - if (!rcu_cpu_has_callbacks(cpu)) | |
1995 | - return 0; | |
1996 | - /* Otherwise, RCU needs the CPU only if it recently tried and failed. */ | |
1997 | - return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies; | |
1998 | -} | |
1999 | - | |
2000 | -/* | |
2001 | 1967 | * Does the specified flavor of RCU have non-lazy callbacks pending on |
2002 | 1968 | * the specified CPU? Both RCU flavor and CPU are specified by the |
2003 | 1969 | * rcu_data structure. |
... | ... | @@ -2040,6 +2006,47 @@ |
2040 | 2006 | } |
2041 | 2007 | |
2042 | 2008 | /* |
2009 | + * Allow the CPU to enter dyntick-idle mode if either: (1) There are no | |
2010 | + * callbacks on this CPU, (2) this CPU has not yet attempted to enter | |
2011 | + * dyntick-idle mode, or (3) this CPU is in the process of attempting to | |
2012 | + * enter dyntick-idle mode. Otherwise, if we have recently tried and failed | |
2013 | + * to enter dyntick-idle mode, we refuse to try to enter it. After all, | |
2014 | + * it is better to incur scheduling-clock interrupts than to spin | |
2015 | + * continuously for the same time duration! | |
2016 | + * | |
2017 | + * The delta_jiffies argument is used to store the time when RCU is | |
2018 | + * going to need the CPU again if it still has callbacks. The reason | |
2019 | + * for this is that rcu_prepare_for_idle() might need to post a timer, | |
2020 | + * but if so, it will do so after tick_nohz_stop_sched_tick() has set | |
2021 | + * the wakeup time for this CPU. This means that RCU's timer can be | |
2022 | + * delayed until the wakeup time, which defeats the purpose of posting | |
2023 | + * a timer. | |
2024 | + */ | |
2025 | +int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) | |
2026 | +{ | |
2027 | + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | |
2028 | + | |
2029 | + /* Flag a new idle sojourn to the idle-entry state machine. */ | |
2030 | + rdtp->idle_first_pass = 1; | |
2031 | + /* If no callbacks, RCU doesn't need the CPU. */ | |
2032 | + if (!rcu_cpu_has_callbacks(cpu)) { | |
2033 | + *delta_jiffies = ULONG_MAX; | |
2034 | + return 0; | |
2035 | + } | |
2036 | + if (rdtp->dyntick_holdoff == jiffies) { | |
2037 | + /* RCU recently tried and failed, so don't try again. */ | |
2038 | + *delta_jiffies = 1; | |
2039 | + return 1; | |
2040 | + } | |
2041 | + /* Set up for the possibility that RCU will post a timer. */ | |
2042 | + if (rcu_cpu_has_nonlazy_callbacks(cpu)) | |
2043 | + *delta_jiffies = RCU_IDLE_GP_DELAY; | |
2044 | + else | |
2045 | + *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY; | |
2046 | + return 0; | |
2047 | +} | |
2048 | + | |
2049 | +/* | |
2043 | 2050 | * Handler for smp_call_function_single(). The only point of this |
2044 | 2051 | * handler is to wake the CPU up, so the handler does only tracing. |
2045 | 2052 | */ |
2046 | 2053 | |
2047 | 2054 | |
... | ... | @@ -2075,21 +2082,24 @@ |
2075 | 2082 | */ |
2076 | 2083 | static void rcu_prepare_for_idle_init(int cpu) |
2077 | 2084 | { |
2078 | - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | |
2079 | - setup_timer(&per_cpu(rcu_idle_gp_timer, cpu), | |
2080 | - rcu_idle_gp_timer_func, cpu); | |
2081 | - per_cpu(rcu_idle_gp_timer_expires, cpu) = jiffies - 1; | |
2082 | - per_cpu(rcu_idle_first_pass, cpu) = 1; | |
2085 | + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | |
2086 | + | |
2087 | + rdtp->dyntick_holdoff = jiffies - 1; | |
2088 | + setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu); | |
2089 | + rdtp->idle_gp_timer_expires = jiffies - 1; | |
2090 | + rdtp->idle_first_pass = 1; | |
2083 | 2091 | } |
2084 | 2092 | |
2085 | 2093 | /* |
2086 | 2094 | * Clean up for exit from idle. Because we are exiting from idle, there |
2087 | - * is no longer any point to rcu_idle_gp_timer, so cancel it. This will | |
2095 | + * is no longer any point to ->idle_gp_timer, so cancel it. This will | |
2088 | 2096 | * do nothing if this timer is not active, so just cancel it unconditionally. |
2089 | 2097 | */ |
2090 | 2098 | static void rcu_cleanup_after_idle(int cpu) |
2091 | 2099 | { |
2092 | - del_timer(&per_cpu(rcu_idle_gp_timer, cpu)); | |
2100 | + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | |
2101 | + | |
2102 | + del_timer(&rdtp->idle_gp_timer); | |
2093 | 2103 | trace_rcu_prep_idle("Cleanup after idle"); |
2094 | 2104 | } |
2095 | 2105 | |
2096 | 2106 | |
2097 | 2107 | |
2098 | 2108 | |
2099 | 2109 | |
2100 | 2110 | |
2101 | 2111 | |
... | ... | @@ -2108,42 +2118,41 @@ |
2108 | 2118 | * Because it is not legal to invoke rcu_process_callbacks() with irqs |
2109 | 2119 | * disabled, we do one pass of force_quiescent_state(), then do a |
2110 | 2120 | * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked |
2111 | - * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. | |
2121 | + * later. The ->dyntick_drain field controls the sequencing. | |
2112 | 2122 | * |
2113 | 2123 | * The caller must have disabled interrupts. |
2114 | 2124 | */ |
2115 | 2125 | static void rcu_prepare_for_idle(int cpu) |
2116 | 2126 | { |
2117 | 2127 | struct timer_list *tp; |
2128 | + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | |
2118 | 2129 | |
2119 | 2130 | /* |
2120 | 2131 | * If this is an idle re-entry, for example, due to use of |
2121 | 2132 | * RCU_NONIDLE() or the new idle-loop tracing API within the idle |
2122 | 2133 | * loop, then don't take any state-machine actions, unless the |
2123 | 2134 | * momentary exit from idle queued additional non-lazy callbacks. |
2124 | - * Instead, repost the rcu_idle_gp_timer if this CPU has callbacks | |
2135 | + * Instead, repost the ->idle_gp_timer if this CPU has callbacks | |
2125 | 2136 | * pending. |
2126 | 2137 | */ |
2127 | - if (!per_cpu(rcu_idle_first_pass, cpu) && | |
2128 | - (per_cpu(rcu_nonlazy_posted, cpu) == | |
2129 | - per_cpu(rcu_nonlazy_posted_snap, cpu))) { | |
2138 | + if (!rdtp->idle_first_pass && | |
2139 | + (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { | |
2130 | 2140 | if (rcu_cpu_has_callbacks(cpu)) { |
2131 | - tp = &per_cpu(rcu_idle_gp_timer, cpu); | |
2132 | - mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu)); | |
2141 | + tp = &rdtp->idle_gp_timer; | |
2142 | + mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); | |
2133 | 2143 | } |
2134 | 2144 | return; |
2135 | 2145 | } |
2136 | - per_cpu(rcu_idle_first_pass, cpu) = 0; | |
2137 | - per_cpu(rcu_nonlazy_posted_snap, cpu) = | |
2138 | - per_cpu(rcu_nonlazy_posted, cpu) - 1; | |
2146 | + rdtp->idle_first_pass = 0; | |
2147 | + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1; | |
2139 | 2148 | |
2140 | 2149 | /* |
2141 | 2150 | * If there are no callbacks on this CPU, enter dyntick-idle mode. |
2142 | 2151 | * Also reset state to avoid prejudicing later attempts. |
2143 | 2152 | */ |
2144 | 2153 | if (!rcu_cpu_has_callbacks(cpu)) { |
2145 | - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | |
2146 | - per_cpu(rcu_dyntick_drain, cpu) = 0; | |
2154 | + rdtp->dyntick_holdoff = jiffies - 1; | |
2155 | + rdtp->dyntick_drain = 0; | |
2147 | 2156 | trace_rcu_prep_idle("No callbacks"); |
2148 | 2157 | return; |
2149 | 2158 | } |
2150 | 2159 | |
2151 | 2160 | |
2152 | 2161 | |
2153 | 2162 | |
2154 | 2163 | |
2155 | 2164 | |
2156 | 2165 | |
... | ... | @@ -2152,36 +2161,37 @@ |
2152 | 2161 | * If in holdoff mode, just return. We will presumably have |
2153 | 2162 | * refrained from disabling the scheduling-clock tick. |
2154 | 2163 | */ |
2155 | - if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { | |
2164 | + if (rdtp->dyntick_holdoff == jiffies) { | |
2156 | 2165 | trace_rcu_prep_idle("In holdoff"); |
2157 | 2166 | return; |
2158 | 2167 | } |
2159 | 2168 | |
2160 | - /* Check and update the rcu_dyntick_drain sequencing. */ | |
2161 | - if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { | |
2169 | + /* Check and update the ->dyntick_drain sequencing. */ | |
2170 | + if (rdtp->dyntick_drain <= 0) { | |
2162 | 2171 | /* First time through, initialize the counter. */ |
2163 | - per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES; | |
2164 | - } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES && | |
2172 | + rdtp->dyntick_drain = RCU_IDLE_FLUSHES; | |
2173 | + } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES && | |
2165 | 2174 | !rcu_pending(cpu) && |
2166 | 2175 | !local_softirq_pending()) { |
2167 | 2176 | /* Can we go dyntick-idle despite still having callbacks? */ |
2168 | - trace_rcu_prep_idle("Dyntick with callbacks"); | |
2169 | - per_cpu(rcu_dyntick_drain, cpu) = 0; | |
2170 | - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; | |
2171 | - if (rcu_cpu_has_nonlazy_callbacks(cpu)) | |
2172 | - per_cpu(rcu_idle_gp_timer_expires, cpu) = | |
2177 | + rdtp->dyntick_drain = 0; | |
2178 | + rdtp->dyntick_holdoff = jiffies; | |
2179 | + if (rcu_cpu_has_nonlazy_callbacks(cpu)) { | |
2180 | + trace_rcu_prep_idle("Dyntick with callbacks"); | |
2181 | + rdtp->idle_gp_timer_expires = | |
2173 | 2182 | jiffies + RCU_IDLE_GP_DELAY; |
2174 | - else | |
2175 | - per_cpu(rcu_idle_gp_timer_expires, cpu) = | |
2183 | + } else { | |
2184 | + rdtp->idle_gp_timer_expires = | |
2176 | 2185 | jiffies + RCU_IDLE_LAZY_GP_DELAY; |
2177 | - tp = &per_cpu(rcu_idle_gp_timer, cpu); | |
2178 | - mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu)); | |
2179 | - per_cpu(rcu_nonlazy_posted_snap, cpu) = | |
2180 | - per_cpu(rcu_nonlazy_posted, cpu); | |
2186 | + trace_rcu_prep_idle("Dyntick with lazy callbacks"); | |
2187 | + } | |
2188 | + tp = &rdtp->idle_gp_timer; | |
2189 | + mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); | |
2190 | + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; | |
2181 | 2191 | return; /* Nothing more to do immediately. */ |
2182 | - } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { | |
2192 | + } else if (--(rdtp->dyntick_drain) <= 0) { | |
2183 | 2193 | /* We have hit the limit, so time to give up. */ |
2184 | - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; | |
2194 | + rdtp->dyntick_holdoff = jiffies; | |
2185 | 2195 | trace_rcu_prep_idle("Begin holdoff"); |
2186 | 2196 | invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ |
2187 | 2197 | return; |
... | ... | @@ -2227,7 +2237,7 @@ |
2227 | 2237 | */ |
2228 | 2238 | static void rcu_idle_count_callbacks_posted(void) |
2229 | 2239 | { |
2230 | - __this_cpu_add(rcu_nonlazy_posted, 1); | |
2240 | + __this_cpu_add(rcu_dynticks.nonlazy_posted, 1); | |
2231 | 2241 | } |
2232 | 2242 | |
2233 | 2243 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
2234 | 2244 | |
... | ... | @@ -2238,11 +2248,12 @@ |
2238 | 2248 | |
2239 | 2249 | static void print_cpu_stall_fast_no_hz(char *cp, int cpu) |
2240 | 2250 | { |
2241 | - struct timer_list *tltp = &per_cpu(rcu_idle_gp_timer, cpu); | |
2251 | + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | |
2252 | + struct timer_list *tltp = &rdtp->idle_gp_timer; | |
2242 | 2253 | |
2243 | 2254 | sprintf(cp, "drain=%d %c timer=%lu", |
2244 | - per_cpu(rcu_dyntick_drain, cpu), | |
2245 | - per_cpu(rcu_dyntick_holdoff, cpu) == jiffies ? 'H' : '.', | |
2255 | + rdtp->dyntick_drain, | |
2256 | + rdtp->dyntick_holdoff == jiffies ? 'H' : '.', | |
2246 | 2257 | timer_pending(tltp) ? tltp->expires - jiffies : -1); |
2247 | 2258 | } |
2248 | 2259 |
kernel/time/tick-sched.c
... | ... | @@ -274,6 +274,7 @@ |
274 | 274 | static void tick_nohz_stop_sched_tick(struct tick_sched *ts) |
275 | 275 | { |
276 | 276 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; |
277 | + unsigned long rcu_delta_jiffies; | |
277 | 278 | ktime_t last_update, expires, now; |
278 | 279 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
279 | 280 | u64 time_delta; |
... | ... | @@ -322,7 +323,7 @@ |
322 | 323 | time_delta = timekeeping_max_deferment(); |
323 | 324 | } while (read_seqretry(&xtime_lock, seq)); |
324 | 325 | |
325 | - if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || | |
326 | + if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || | |
326 | 327 | arch_needs_cpu(cpu)) { |
327 | 328 | next_jiffies = last_jiffies + 1; |
328 | 329 | delta_jiffies = 1; |
... | ... | @@ -330,6 +331,10 @@ |
330 | 331 | /* Get the next timer wheel timer */ |
331 | 332 | next_jiffies = get_next_timer_interrupt(last_jiffies); |
332 | 333 | delta_jiffies = next_jiffies - last_jiffies; |
334 | + if (rcu_delta_jiffies < delta_jiffies) { | |
335 | + next_jiffies = last_jiffies + rcu_delta_jiffies; | |
336 | + delta_jiffies = rcu_delta_jiffies; | |
337 | + } | |
333 | 338 | } |
334 | 339 | /* |
335 | 340 | * Do not stop the tick, if we are only one off |