Commit 40393f525fe698e2b639cf1851ef0a40e7e158a8

Authored by Paul E. McKenney

Merge branches 'doctorture.2013.01.29a', 'fixes.2013.01.26a', 'tagcb.2013.01.24a…

…' and 'tiny.2013.01.29b' into HEAD

doctorture.2013.01.11a: Changes to rcutorture and to RCU documentation.

fixes.2013.01.26a: Miscellaneous fixes.

tagcb.2013.01.24a: Tag RCU callbacks with grace-period number to
	simplify callback advancement.

tiny.2013.01.29b: Enhancements to uniprocessor handling in tiny RCU.

Showing 12 changed files Side-by-side Diff

include/linux/rcupdate.h
... ... @@ -756,7 +756,7 @@
756 756 * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
757 757 * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
758 758 * be preempted, but explicit blocking is illegal. Finally, in preemptible
759   - * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds,
  759 + * RCU implementations in real-time (with -rt patchset) kernel builds,
760 760 * RCU read-side critical sections may be preempted and they may also
761 761 * block, but only when acquiring spinlocks that are subject to priority
762 762 * inheritance.
include/trace/events/rcu.h
... ... @@ -44,8 +44,10 @@
44 44 * of a new grace period or the end of an old grace period ("cpustart"
45 45 * and "cpuend", respectively), a CPU passing through a quiescent
46 46 * state ("cpuqs"), a CPU coming online or going offline ("cpuonl"
47   - * and "cpuofl", respectively), and a CPU being kicked for being too
48   - * long in dyntick-idle mode ("kick").
  47 + * and "cpuofl", respectively), a CPU being kicked for being too
  48 + * long in dyntick-idle mode ("kick"), a CPU accelerating its new
  49 + * callbacks to RCU_NEXT_READY_TAIL ("AccReadyCB"), and a CPU
  50 + * accelerating its new callbacks to RCU_WAIT_TAIL ("AccWaitCB").
49 51 */
50 52 TRACE_EVENT(rcu_grace_period,
51 53  
... ... @@ -393,7 +395,7 @@
393 395 */
394 396 TRACE_EVENT(rcu_batch_start,
395 397  
396   - TP_PROTO(char *rcuname, long qlen_lazy, long qlen, int blimit),
  398 + TP_PROTO(char *rcuname, long qlen_lazy, long qlen, long blimit),
397 399  
398 400 TP_ARGS(rcuname, qlen_lazy, qlen, blimit),
399 401  
... ... @@ -401,7 +403,7 @@
401 403 __field(char *, rcuname)
402 404 __field(long, qlen_lazy)
403 405 __field(long, qlen)
404   - __field(int, blimit)
  406 + __field(long, blimit)
405 407 ),
406 408  
407 409 TP_fast_assign(
... ... @@ -411,7 +413,7 @@
411 413 __entry->blimit = blimit;
412 414 ),
413 415  
414   - TP_printk("%s CBs=%ld/%ld bl=%d",
  416 + TP_printk("%s CBs=%ld/%ld bl=%ld",
415 417 __entry->rcuname, __entry->qlen_lazy, __entry->qlen,
416 418 __entry->blimit)
417 419 );
... ... @@ -453,7 +453,7 @@
453 453  
454 454 config TREE_PREEMPT_RCU
455 455 bool "Preemptible tree-based hierarchical RCU"
456   - depends on PREEMPT && SMP
  456 + depends on PREEMPT
457 457 help
458 458 This option selects the RCU implementation that is
459 459 designed for very large SMP systems with hundreds or
... ... @@ -461,6 +461,8 @@
461 461 is also required. It also scales down nicely to
462 462 smaller systems.
463 463  
  464 + Select this option if you are unsure.
  465 +
464 466 config TINY_RCU
465 467 bool "UP-only small-memory-footprint RCU"
466 468 depends on !PREEMPT && !SMP
... ... @@ -485,6 +487,14 @@
485 487 help
486 488 This option enables preemptible-RCU code that is common between
487 489 the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
  490 +
  491 +config RCU_STALL_COMMON
  492 + def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE )
  493 + help
  494 + This option enables RCU CPU stall code that is common between
  495 + the TINY and TREE variants of RCU. The purpose is to allow
  496 + the tiny variants to disable RCU CPU stall warnings, while
  497 + making these warnings mandatory for the tree variants.
488 498  
489 499 config CONTEXT_TRACKING
490 500 bool
kernel/context_tracking.c
  1 +/*
  2 + * Context tracking: Probe on high level context boundaries such as kernel
  3 + * and userspace. This includes syscalls and exceptions entry/exit.
  4 + *
  5 + * This is used by RCU to remove its dependency on the timer tick while a CPU
  6 + * runs in userspace.
  7 + *
  8 + * Started by Frederic Weisbecker:
  9 + *
  10 + * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
  11 + *
  12 + * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
  13 + * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
  14 + *
  15 + */
  16 +
1 17 #include <linux/context_tracking.h>
2 18 #include <linux/rcupdate.h>
3 19 #include <linux/sched.h>
... ... @@ -6,8 +22,8 @@
6 22  
7 23 struct context_tracking {
8 24 /*
9   - * When active is false, hooks are not set to
10   - * minimize overhead: TIF flags are cleared
  25 + * When active is false, probes are unset in order
  26 + * to minimize overhead: TIF flags are cleared
11 27 * and calls to user_enter/exit are ignored. This
12 28 * may be further optimized using static keys.
13 29 */
... ... @@ -24,6 +40,15 @@
24 40 #endif
25 41 };
26 42  
  43 +/**
  44 + * user_enter - Inform the context tracking that the CPU is going to
  45 + * enter userspace mode.
  46 + *
  47 + * This function must be called right before we switch from the kernel
  48 + * to userspace, when it's guaranteed the remaining kernel instructions
  49 + * to execute won't use any RCU read side critical section because this
  50 + * function sets RCU in extended quiescent state.
  51 + */
27 52 void user_enter(void)
28 53 {
29 54 unsigned long flags;
30 55  
31 56  
32 57  
33 58  
34 59  
... ... @@ -39,40 +64,70 @@
39 64 if (in_interrupt())
40 65 return;
41 66  
  67 + /* Kernel threads aren't supposed to go to userspace */
42 68 WARN_ON_ONCE(!current->mm);
43 69  
44 70 local_irq_save(flags);
45 71 if (__this_cpu_read(context_tracking.active) &&
46 72 __this_cpu_read(context_tracking.state) != IN_USER) {
47 73 __this_cpu_write(context_tracking.state, IN_USER);
  74 + /*
  75 + * At this stage, only low level arch entry code remains and
  76 + * then we'll run in userspace. We can assume there won't be
  77 + * any RCU read-side critical section until the next call to
  78 + * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
  79 + * on the tick.
  80 + */
48 81 rcu_user_enter();
49 82 }
50 83 local_irq_restore(flags);
51 84 }
52 85  
  86 +
  87 +/**
  88 + * user_exit - Inform the context tracking that the CPU is
  89 + * exiting userspace mode and entering the kernel.
  90 + *
  91 + * This function must be called after we entered the kernel from userspace
  92 + * before any use of RCU read side critical section. This potentially include
  93 + * any high level kernel code like syscalls, exceptions, signal handling, etc...
  94 + *
  95 + * This call supports re-entrancy. This way it can be called from any exception
  96 + * handler without needing to know if we came from userspace or not.
  97 + */
53 98 void user_exit(void)
54 99 {
55 100 unsigned long flags;
56 101  
57   - /*
58   - * Some contexts may involve an exception occuring in an irq,
59   - * leading to that nesting:
60   - * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
61   - * This would mess up the dyntick_nesting count though. And rcu_irq_*()
62   - * helpers are enough to protect RCU uses inside the exception. So
63   - * just return immediately if we detect we are in an IRQ.
64   - */
65 102 if (in_interrupt())
66 103 return;
67 104  
68 105 local_irq_save(flags);
69 106 if (__this_cpu_read(context_tracking.state) == IN_USER) {
70 107 __this_cpu_write(context_tracking.state, IN_KERNEL);
  108 + /*
  109 + * We are going to run code that may use RCU. Inform
  110 + * RCU core about that (ie: we may need the tick again).
  111 + */
71 112 rcu_user_exit();
72 113 }
73 114 local_irq_restore(flags);
74 115 }
75 116  
  117 +
  118 +/**
  119 + * context_tracking_task_switch - context switch the syscall callbacks
  120 + * @prev: the task that is being switched out
  121 + * @next: the task that is being switched in
  122 + *
  123 + * The context tracking uses the syscall slow path to implement its user-kernel
  124 + * boundaries probes on syscalls. This way it doesn't impact the syscall fast
  125 + * path on CPUs that don't do context tracking.
  126 + *
  127 + * But we need to clear the flag on the previous task because it may later
  128 + * migrate to some CPU that doesn't do the context tracking. As such the TIF
  129 + * flag may not be desired there.
  130 + */
76 131 void context_tracking_task_switch(struct task_struct *prev,
77 132 struct task_struct *next)
78 133 {
... ... @@ -111,5 +111,12 @@
111 111  
112 112 extern int rcu_expedited;
113 113  
  114 +#ifdef CONFIG_RCU_STALL_COMMON
  115 +
  116 +extern int rcu_cpu_stall_suppress;
  117 +int rcu_jiffies_till_stall_check(void);
  118 +
  119 +#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
  120 +
114 121 #endif /* __LINUX_RCU_H */
... ... @@ -415,4 +415,55 @@
415 415 #define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
416 416 do { } while (0)
417 417 #endif
  418 +
  419 +#ifdef CONFIG_RCU_STALL_COMMON
  420 +
  421 +#ifdef CONFIG_PROVE_RCU
  422 +#define RCU_STALL_DELAY_DELTA (5 * HZ)
  423 +#else
  424 +#define RCU_STALL_DELAY_DELTA 0
  425 +#endif
  426 +
  427 +int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
  428 +int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
  429 +
  430 +module_param(rcu_cpu_stall_suppress, int, 0644);
  431 +module_param(rcu_cpu_stall_timeout, int, 0644);
  432 +
  433 +int rcu_jiffies_till_stall_check(void)
  434 +{
  435 + int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
  436 +
  437 + /*
  438 + * Limit check must be consistent with the Kconfig limits
  439 + * for CONFIG_RCU_CPU_STALL_TIMEOUT.
  440 + */
  441 + if (till_stall_check < 3) {
  442 + ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
  443 + till_stall_check = 3;
  444 + } else if (till_stall_check > 300) {
  445 + ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
  446 + till_stall_check = 300;
  447 + }
  448 + return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
  449 +}
  450 +
  451 +static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
  452 +{
  453 + rcu_cpu_stall_suppress = 1;
  454 + return NOTIFY_DONE;
  455 +}
  456 +
  457 +static struct notifier_block rcu_panic_block = {
  458 + .notifier_call = rcu_panic,
  459 +};
  460 +
  461 +static int __init check_cpu_stall_init(void)
  462 +{
  463 + atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
  464 + return 0;
  465 +}
  466 +early_initcall(check_cpu_stall_init);
  467 +
  468 +#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
... ... @@ -51,10 +51,10 @@
51 51 void (*func)(struct rcu_head *rcu),
52 52 struct rcu_ctrlblk *rcp);
53 53  
54   -#include "rcutiny_plugin.h"
55   -
56 54 static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
57 55  
  56 +#include "rcutiny_plugin.h"
  57 +
58 58 /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
59 59 static void rcu_idle_enter_common(long long newval)
60 60 {
... ... @@ -193,7 +193,7 @@
193 193 * interrupts don't count, we must be running at the first interrupt
194 194 * level.
195 195 */
196   -int rcu_is_cpu_rrupt_from_idle(void)
  196 +static int rcu_is_cpu_rrupt_from_idle(void)
197 197 {
198 198 return rcu_dynticks_nesting <= 1;
199 199 }
... ... @@ -205,6 +205,7 @@
205 205 */
206 206 static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
207 207 {
  208 + reset_cpu_stall_ticks(rcp);
208 209 if (rcp->rcucblist != NULL &&
209 210 rcp->donetail != rcp->curtail) {
210 211 rcp->donetail = rcp->curtail;
... ... @@ -251,6 +252,7 @@
251 252 */
252 253 void rcu_check_callbacks(int cpu, int user)
253 254 {
  255 + check_cpu_stalls();
254 256 if (user || rcu_is_cpu_rrupt_from_idle())
255 257 rcu_sched_qs(cpu);
256 258 else if (!in_softirq())
kernel/rcutiny_plugin.h
... ... @@ -33,6 +33,9 @@
33 33 struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
34 34 struct rcu_head **curtail; /* ->next pointer of last CB. */
35 35 RCU_TRACE(long qlen); /* Number of pending CBs. */
  36 + RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */
  37 + RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */
  38 + RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */
36 39 RCU_TRACE(char *name); /* Name of RCU type. */
37 40 };
38 41  
... ... @@ -54,6 +57,51 @@
54 57 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
55 58 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
56 59  
  60 +#ifdef CONFIG_RCU_TRACE
  61 +
  62 +static void check_cpu_stall(struct rcu_ctrlblk *rcp)
  63 +{
  64 + unsigned long j;
  65 + unsigned long js;
  66 +
  67 + if (rcu_cpu_stall_suppress)
  68 + return;
  69 + rcp->ticks_this_gp++;
  70 + j = jiffies;
  71 + js = rcp->jiffies_stall;
  72 + if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
  73 + pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
  74 + rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
  75 + jiffies - rcp->gp_start, rcp->qlen);
  76 + dump_stack();
  77 + }
  78 + if (*rcp->curtail && ULONG_CMP_GE(j, js))
  79 + rcp->jiffies_stall = jiffies +
  80 + 3 * rcu_jiffies_till_stall_check() + 3;
  81 + else if (ULONG_CMP_GE(j, js))
  82 + rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
  83 +}
  84 +
  85 +static void check_cpu_stall_preempt(void);
  86 +
  87 +#endif /* #ifdef CONFIG_RCU_TRACE */
  88 +
  89 +static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
  90 +{
  91 +#ifdef CONFIG_RCU_TRACE
  92 + rcp->ticks_this_gp = 0;
  93 + rcp->gp_start = jiffies;
  94 + rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
  95 +#endif /* #ifdef CONFIG_RCU_TRACE */
  96 +}
  97 +
  98 +static void check_cpu_stalls(void)
  99 +{
  100 + RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
  101 + RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
  102 + RCU_TRACE(check_cpu_stall_preempt());
  103 +}
  104 +
57 105 #ifdef CONFIG_TINY_PREEMPT_RCU
58 106  
59 107 #include <linux/delay.h>
... ... @@ -448,6 +496,7 @@
448 496 /* Official start of GP. */
449 497 rcu_preempt_ctrlblk.gpnum++;
450 498 RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++);
  499 + reset_cpu_stall_ticks(&rcu_preempt_ctrlblk.rcb);
451 500  
452 501 /* Any blocked RCU readers block new GP. */
453 502 if (rcu_preempt_blocked_readers_any())
... ... @@ -1053,6 +1102,13 @@
1053 1102 MODULE_AUTHOR("Paul E. McKenney");
1054 1103 MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
1055 1104 MODULE_LICENSE("GPL");
  1105 +
  1106 +static void check_cpu_stall_preempt(void)
  1107 +{
  1108 +#ifdef CONFIG_TINY_PREEMPT_RCU
  1109 + check_cpu_stall(&rcu_preempt_ctrlblk.rcb);
  1110 +#endif /* #ifdef CONFIG_TINY_PREEMPT_RCU */
  1111 +}
1056 1112  
1057 1113 #endif /* #ifdef CONFIG_RCU_TRACE */
... ... @@ -1782,7 +1782,7 @@
1782 1782 barrier_cbs_wq =
1783 1783 kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]),
1784 1784 GFP_KERNEL);
1785   - if (barrier_cbs_tasks == NULL || barrier_cbs_wq == 0)
  1785 + if (barrier_cbs_tasks == NULL || !barrier_cbs_wq)
1786 1786 return -ENOMEM;
1787 1787 for (i = 0; i < n_barrier_cbs; i++) {
1788 1788 init_waitqueue_head(&barrier_cbs_wq[i]);
... ... @@ -105,7 +105,7 @@
105 105 * The rcu_scheduler_active variable transitions from zero to one just
106 106 * before the first task is spawned. So when this variable is zero, RCU
107 107 * can assume that there is but one task, allowing RCU to (for example)
108   - * optimized synchronize_sched() to a simple barrier(). When this variable
  108 + * optimize synchronize_sched() to a simple barrier(). When this variable
109 109 * is one, RCU must actually do all the hard work required to detect real
110 110 * grace periods. This variable is also used to suppress boot-time false
111 111 * positives from lockdep-RCU error checking.
... ... @@ -217,12 +217,6 @@
217 217 module_param(qhimark, long, 0444);
218 218 module_param(qlowmark, long, 0444);
219 219  
220   -int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
221   -int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
222   -
223   -module_param(rcu_cpu_stall_suppress, int, 0644);
224   -module_param(rcu_cpu_stall_timeout, int, 0644);
225   -
226 220 static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS;
227 221 static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS;
228 222  
229 223  
230 224  
... ... @@ -305,17 +299,27 @@
305 299 }
306 300  
307 301 /*
308   - * Does the current CPU require a yet-as-unscheduled grace period?
  302 + * Does the current CPU require a not-yet-started grace period?
  303 + * The caller must have disabled interrupts to prevent races with
  304 + * normal callback registry.
309 305 */
310 306 static int
311 307 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
312 308 {
313   - struct rcu_head **ntp;
  309 + int i;
314 310  
315   - ntp = rdp->nxttail[RCU_DONE_TAIL +
316   - (ACCESS_ONCE(rsp->completed) != rdp->completed)];
317   - return rdp->nxttail[RCU_DONE_TAIL] && ntp && *ntp &&
318   - !rcu_gp_in_progress(rsp);
  311 + if (rcu_gp_in_progress(rsp))
  312 + return 0; /* No, a grace period is already in progress. */
  313 + if (!rdp->nxttail[RCU_NEXT_TAIL])
  314 + return 0; /* No, this is a no-CBs (or offline) CPU. */
  315 + if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
  316 + return 1; /* Yes, this CPU has newly registered callbacks. */
  317 + for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
  318 + if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
  319 + ULONG_CMP_LT(ACCESS_ONCE(rsp->completed),
  320 + rdp->nxtcompleted[i]))
  321 + return 1; /* Yes, CBs for future grace period. */
  322 + return 0; /* No grace period needed. */
319 323 }
320 324  
321 325 /*
... ... @@ -336,7 +340,7 @@
336 340 static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
337 341 bool user)
338 342 {
339   - trace_rcu_dyntick("Start", oldval, 0);
  343 + trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting);
340 344 if (!user && !is_idle_task(current)) {
341 345 struct task_struct *idle = idle_task(smp_processor_id());
342 346  
... ... @@ -727,7 +731,7 @@
727 731 * interrupt from idle, return true. The caller must have at least
728 732 * disabled preemption.
729 733 */
730   -int rcu_is_cpu_rrupt_from_idle(void)
  734 +static int rcu_is_cpu_rrupt_from_idle(void)
731 735 {
732 736 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
733 737 }
734 738  
... ... @@ -793,28 +797,10 @@
793 797 return 0;
794 798 }
795 799  
796   -static int jiffies_till_stall_check(void)
797   -{
798   - int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
799   -
800   - /*
801   - * Limit check must be consistent with the Kconfig limits
802   - * for CONFIG_RCU_CPU_STALL_TIMEOUT.
803   - */
804   - if (till_stall_check < 3) {
805   - ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
806   - till_stall_check = 3;
807   - } else if (till_stall_check > 300) {
808   - ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
809   - till_stall_check = 300;
810   - }
811   - return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
812   -}
813   -
814 800 static void record_gp_stall_check_time(struct rcu_state *rsp)
815 801 {
816 802 rsp->gp_start = jiffies;
817   - rsp->jiffies_stall = jiffies + jiffies_till_stall_check();
  803 + rsp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
818 804 }
819 805  
820 806 /*
... ... @@ -857,7 +843,7 @@
857 843 raw_spin_unlock_irqrestore(&rnp->lock, flags);
858 844 return;
859 845 }
860   - rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3;
  846 + rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
861 847 raw_spin_unlock_irqrestore(&rnp->lock, flags);
862 848  
863 849 /*
... ... @@ -935,7 +921,7 @@
935 921 raw_spin_lock_irqsave(&rnp->lock, flags);
936 922 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
937 923 rsp->jiffies_stall = jiffies +
938   - 3 * jiffies_till_stall_check() + 3;
  924 + 3 * rcu_jiffies_till_stall_check() + 3;
939 925 raw_spin_unlock_irqrestore(&rnp->lock, flags);
940 926  
941 927 set_need_resched(); /* kick ourselves to get things going. */
... ... @@ -966,12 +952,6 @@
966 952 }
967 953 }
968 954  
969   -static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
970   -{
971   - rcu_cpu_stall_suppress = 1;
972   - return NOTIFY_DONE;
973   -}
974   -
975 955 /**
976 956 * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
977 957 *
... ... @@ -989,15 +969,6 @@
989 969 rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
990 970 }
991 971  
992   -static struct notifier_block rcu_panic_block = {
993   - .notifier_call = rcu_panic,
994   -};
995   -
996   -static void __init check_cpu_stall_init(void)
997   -{
998   - atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
999   -}
1000   -
1001 972 /*
1002 973 * Update CPU-local rcu_data state to record the newly noticed grace period.
1003 974 * This is used both when we started the grace period and when we notice
... ... @@ -1071,6 +1042,145 @@
1071 1042 }
1072 1043  
1073 1044 /*
  1045 + * Determine the value that ->completed will have at the end of the
  1046 + * next subsequent grace period. This is used to tag callbacks so that
  1047 + * a CPU can invoke callbacks in a timely fashion even if that CPU has
  1048 + * been dyntick-idle for an extended period with callbacks under the
  1049 + * influence of RCU_FAST_NO_HZ.
  1050 + *
  1051 + * The caller must hold rnp->lock with interrupts disabled.
  1052 + */
  1053 +static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
  1054 + struct rcu_node *rnp)
  1055 +{
  1056 + /*
  1057 + * If RCU is idle, we just wait for the next grace period.
  1058 + * But we can only be sure that RCU is idle if we are looking
  1059 + * at the root rcu_node structure -- otherwise, a new grace
  1060 + * period might have started, but just not yet gotten around
  1061 + * to initializing the current non-root rcu_node structure.
  1062 + */
  1063 + if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed)
  1064 + return rnp->completed + 1;
  1065 +
  1066 + /*
  1067 + * Otherwise, wait for a possible partial grace period and
  1068 + * then the subsequent full grace period.
  1069 + */
  1070 + return rnp->completed + 2;
  1071 +}
  1072 +
  1073 +/*
  1074 + * If there is room, assign a ->completed number to any callbacks on
  1075 + * this CPU that have not already been assigned. Also accelerate any
  1076 + * callbacks that were previously assigned a ->completed number that has
  1077 + * since proven to be too conservative, which can happen if callbacks get
  1078 + * assigned a ->completed number while RCU is idle, but with reference to
  1079 + * a non-root rcu_node structure. This function is idempotent, so it does
  1080 + * not hurt to call it repeatedly.
  1081 + *
  1082 + * The caller must hold rnp->lock with interrupts disabled.
  1083 + */
  1084 +static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
  1085 + struct rcu_data *rdp)
  1086 +{
  1087 + unsigned long c;
  1088 + int i;
  1089 +
  1090 + /* If the CPU has no callbacks, nothing to do. */
  1091 + if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
  1092 + return;
  1093 +
  1094 + /*
  1095 + * Starting from the sublist containing the callbacks most
  1096 + * recently assigned a ->completed number and working down, find the
  1097 + * first sublist that is not assignable to an upcoming grace period.
  1098 + * Such a sublist has something in it (first two tests) and has
  1099 + * a ->completed number assigned that will complete sooner than
  1100 + * the ->completed number for newly arrived callbacks (last test).
  1101 + *
  1102 + * The key point is that any later sublist can be assigned the
  1103 + * same ->completed number as the newly arrived callbacks, which
  1104 + * means that the callbacks in any of these later sublist can be
  1105 + * grouped into a single sublist, whether or not they have already
  1106 + * been assigned a ->completed number.
  1107 + */
  1108 + c = rcu_cbs_completed(rsp, rnp);
  1109 + for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
  1110 + if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
  1111 + !ULONG_CMP_GE(rdp->nxtcompleted[i], c))
  1112 + break;
  1113 +
  1114 + /*
  1115 + * If there are no sublist for unassigned callbacks, leave.
  1116 + * At the same time, advance "i" one sublist, so that "i" will
  1117 + * index into the sublist where all the remaining callbacks should
  1118 + * be grouped into.
  1119 + */
  1120 + if (++i >= RCU_NEXT_TAIL)
  1121 + return;
  1122 +
  1123 + /*
  1124 + * Assign all subsequent callbacks' ->completed number to the next
  1125 + * full grace period and group them all in the sublist initially
  1126 + * indexed by "i".
  1127 + */
  1128 + for (; i <= RCU_NEXT_TAIL; i++) {
  1129 + rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
  1130 + rdp->nxtcompleted[i] = c;
  1131 + }
  1132 +
  1133 + /* Trace depending on how much we were able to accelerate. */
  1134 + if (!*rdp->nxttail[RCU_WAIT_TAIL])
  1135 + trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccWaitCB");
  1136 + else
  1137 + trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccReadyCB");
  1138 +}
  1139 +
  1140 +/*
  1141 + * Move any callbacks whose grace period has completed to the
  1142 + * RCU_DONE_TAIL sublist, then compact the remaining sublists and
  1143 + * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL
  1144 + * sublist. This function is idempotent, so it does not hurt to
  1145 + * invoke it repeatedly. As long as it is not invoked -too- often...
  1146 + *
  1147 + * The caller must hold rnp->lock with interrupts disabled.
  1148 + */
  1149 +static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
  1150 + struct rcu_data *rdp)
  1151 +{
  1152 + int i, j;
  1153 +
  1154 + /* If the CPU has no callbacks, nothing to do. */
  1155 + if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
  1156 + return;
  1157 +
  1158 + /*
  1159 + * Find all callbacks whose ->completed numbers indicate that they
  1160 + * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
  1161 + */
  1162 + for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
  1163 + if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
  1164 + break;
  1165 + rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
  1166 + }
  1167 + /* Clean up any sublist tail pointers that were misordered above. */
  1168 + for (j = RCU_WAIT_TAIL; j < i; j++)
  1169 + rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
  1170 +
  1171 + /* Copy down callbacks to fill in empty sublists. */
  1172 + for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
  1173 + if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
  1174 + break;
  1175 + rdp->nxttail[j] = rdp->nxttail[i];
  1176 + rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
  1177 + }
  1178 +
  1179 + /* Classify any remaining callbacks. */
  1180 + rcu_accelerate_cbs(rsp, rnp, rdp);
  1181 +}
  1182 +
  1183 +/*
1074 1184 * Advance this CPU's callbacks, but only if the current grace period
1075 1185 * has ended. This may be called only from the CPU to whom the rdp
1076 1186 * belongs. In addition, the corresponding leaf rcu_node structure's
1077 1187  
1078 1188  
... ... @@ -1080,13 +1190,16 @@
1080 1190 __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1081 1191 {
1082 1192 /* Did another grace period end? */
1083   - if (rdp->completed != rnp->completed) {
  1193 + if (rdp->completed == rnp->completed) {
1084 1194  
1085   - /* Advance callbacks. No harm if list empty. */
1086   - rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
1087   - rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
1088   - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
  1195 + /* No, so just accelerate recent callbacks. */
  1196 + rcu_accelerate_cbs(rsp, rnp, rdp);
1089 1197  
  1198 + } else {
  1199 +
  1200 + /* Advance callbacks. */
  1201 + rcu_advance_cbs(rsp, rnp, rdp);
  1202 +
1090 1203 /* Remember that we saw this grace-period completion. */
1091 1204 rdp->completed = rnp->completed;
1092 1205 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend");
1093 1206  
... ... @@ -1392,17 +1505,10 @@
1392 1505 /*
1393 1506 * Because there is no grace period in progress right now,
1394 1507 * any callbacks we have up to this point will be satisfied
1395   - * by the next grace period. So promote all callbacks to be
1396   - * handled after the end of the next grace period. If the
1397   - * CPU is not yet aware of the end of the previous grace period,
1398   - * we need to allow for the callback advancement that will
1399   - * occur when it does become aware. Deadlock prevents us from
1400   - * making it aware at this point: We cannot acquire a leaf
1401   - * rcu_node ->lock while holding the root rcu_node ->lock.
  1508 + * by the next grace period. So this is a good place to
  1509 + * assign a grace period number to recently posted callbacks.
1402 1510 */
1403   - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
1404   - if (rdp->completed == rsp->completed)
1405   - rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
  1511 + rcu_accelerate_cbs(rsp, rnp, rdp);
1406 1512  
1407 1513 rsp->gp_flags = RCU_GP_FLAG_INIT;
1408 1514 raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
... ... @@ -1527,7 +1633,7 @@
1527 1633 * This GP can't end until cpu checks in, so all of our
1528 1634 * callbacks can be processed during the next GP.
1529 1635 */
1530   - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
  1636 + rcu_accelerate_cbs(rsp, rnp, rdp);
1531 1637  
1532 1638 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
1533 1639 }
... ... @@ -1779,7 +1885,7 @@
1779 1885 long bl, count, count_lazy;
1780 1886 int i;
1781 1887  
1782   - /* If no callbacks are ready, just return.*/
  1888 + /* If no callbacks are ready, just return. */
1783 1889 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
1784 1890 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
1785 1891 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
1786 1892  
1787 1893  
1788 1894  
... ... @@ -2008,19 +2114,19 @@
2008 2114  
2009 2115 WARN_ON_ONCE(rdp->beenonline == 0);
2010 2116  
2011   - /*
2012   - * Advance callbacks in response to end of earlier grace
2013   - * period that some other CPU ended.
2014   - */
  2117 + /* Handle the end of a grace period that some other CPU ended. */
2015 2118 rcu_process_gp_end(rsp, rdp);
2016 2119  
2017 2120 /* Update RCU state based on any recent quiescent states. */
2018 2121 rcu_check_quiescent_state(rsp, rdp);
2019 2122  
2020 2123 /* Does this CPU require a not-yet-started grace period? */
  2124 + local_irq_save(flags);
2021 2125 if (cpu_needs_another_gp(rsp, rdp)) {
2022   - raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
  2126 + raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
2023 2127 rcu_start_gp(rsp, flags); /* releases above lock */
  2128 + } else {
  2129 + local_irq_restore(flags);
2024 2130 }
2025 2131  
2026 2132 /* If there are callbacks ready, invoke them. */
... ... @@ -2719,9 +2825,6 @@
2719 2825 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
2720 2826 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
2721 2827 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
2722   -#ifdef CONFIG_RCU_USER_QS
2723   - WARN_ON_ONCE(rdp->dynticks->in_user);
2724   -#endif
2725 2828 rdp->cpu = cpu;
2726 2829 rdp->rsp = rsp;
2727 2830 rcu_boot_init_nocb_percpu_data(rdp);
... ... @@ -2938,6 +3041,10 @@
2938 3041  
2939 3042 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
2940 3043  
  3044 + /* Silence gcc 4.8 warning about array index out of range. */
  3045 + if (rcu_num_lvls > RCU_NUM_LVLS)
  3046 + panic("rcu_init_one: rcu_num_lvls overflow");
  3047 +
2941 3048 /* Initialize the level-tracking arrays. */
2942 3049  
2943 3050 for (i = 0; i < rcu_num_lvls; i++)
... ... @@ -3074,7 +3181,6 @@
3074 3181 cpu_notifier(rcu_cpu_notify, 0);
3075 3182 for_each_online_cpu(cpu)
3076 3183 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
3077   - check_cpu_stall_init();
3078 3184 }
3079 3185  
3080 3186 #include "rcutree_plugin.h"
... ... @@ -102,10 +102,6 @@
102 102 /* idle-period nonlazy_posted snapshot. */
103 103 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
104 104 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
105   -#ifdef CONFIG_RCU_USER_QS
106   - bool ignore_user_qs; /* Treat userspace as extended QS or not */
107   - bool in_user; /* Is the CPU in userland from RCU POV? */
108   -#endif
109 105 };
110 106  
111 107 /* RCU's kthread states for tracing. */
... ... @@ -282,6 +278,8 @@
282 278 */
283 279 struct rcu_head *nxtlist;
284 280 struct rcu_head **nxttail[RCU_NEXT_SIZE];
  281 + unsigned long nxtcompleted[RCU_NEXT_SIZE];
  282 + /* grace periods for sublists. */
285 283 long qlen_lazy; /* # of lazy queued callbacks */
286 284 long qlen; /* # of queued callbacks, incl lazy */
287 285 long qlen_last_fqs_check;
... ... @@ -343,11 +341,6 @@
343 341  
344 342 #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
345 343  
346   -#ifdef CONFIG_PROVE_RCU
347   -#define RCU_STALL_DELAY_DELTA (5 * HZ)
348   -#else
349   -#define RCU_STALL_DELAY_DELTA 0
350   -#endif
351 344 #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
352 345 /* to take at least one */
353 346 /* scheduling clock irq */
... ... @@ -605,61 +605,6 @@
605 605  
606 606 For more details, see Documentation/lockdep-design.txt.
607 607  
608   -config PROVE_RCU
609   - bool "RCU debugging: prove RCU correctness"
610   - depends on PROVE_LOCKING
611   - default n
612   - help
613   - This feature enables lockdep extensions that check for correct
614   - use of RCU APIs. This is currently under development. Say Y
615   - if you want to debug RCU usage or help work on the PROVE_RCU
616   - feature.
617   -
618   - Say N if you are unsure.
619   -
620   -config PROVE_RCU_REPEATEDLY
621   - bool "RCU debugging: don't disable PROVE_RCU on first splat"
622   - depends on PROVE_RCU
623   - default n
624   - help
625   - By itself, PROVE_RCU will disable checking upon issuing the
626   - first warning (or "splat"). This feature prevents such
627   - disabling, allowing multiple RCU-lockdep warnings to be printed
628   - on a single reboot.
629   -
630   - Say Y to allow multiple RCU-lockdep warnings per boot.
631   -
632   - Say N if you are unsure.
633   -
634   -config PROVE_RCU_DELAY
635   - bool "RCU debugging: preemptible RCU race provocation"
636   - depends on DEBUG_KERNEL && PREEMPT_RCU
637   - default n
638   - help
639   - There is a class of races that involve an unlikely preemption
640   - of __rcu_read_unlock() just after ->rcu_read_lock_nesting has
641   - been set to INT_MIN. This feature inserts a delay at that
642   - point to increase the probability of these races.
643   -
644   - Say Y to increase probability of preemption of __rcu_read_unlock().
645   -
646   - Say N if you are unsure.
647   -
648   -config SPARSE_RCU_POINTER
649   - bool "RCU debugging: sparse-based checks for pointer usage"
650   - default n
651   - help
652   - This feature enables the __rcu sparse annotation for
653   - RCU-protected pointers. This annotation will cause sparse
654   - to flag any non-RCU used of annotated pointers. This can be
655   - helpful when debugging RCU usage. Please note that this feature
656   - is not intended to enforce code cleanliness; it is instead merely
657   - a debugging aid.
658   -
659   - Say Y to make sparse flag questionable use of RCU-protected pointers
660   -
661   - Say N if you are unsure.
662   -
663 608 config LOCKDEP
664 609 bool
665 610 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
... ... @@ -937,6 +882,63 @@
937 882 BOOT_PRINTK_DELAY also may cause LOCKUP_DETECTOR to detect
938 883 what it believes to be lockup conditions.
939 884  
  885 +menu "RCU Debugging"
  886 +
  887 +config PROVE_RCU
  888 + bool "RCU debugging: prove RCU correctness"
  889 + depends on PROVE_LOCKING
  890 + default n
  891 + help
  892 + This feature enables lockdep extensions that check for correct
  893 + use of RCU APIs. This is currently under development. Say Y
  894 + if you want to debug RCU usage or help work on the PROVE_RCU
  895 + feature.
  896 +
  897 + Say N if you are unsure.
  898 +
  899 +config PROVE_RCU_REPEATEDLY
  900 + bool "RCU debugging: don't disable PROVE_RCU on first splat"
  901 + depends on PROVE_RCU
  902 + default n
  903 + help
  904 + By itself, PROVE_RCU will disable checking upon issuing the
  905 + first warning (or "splat"). This feature prevents such
  906 + disabling, allowing multiple RCU-lockdep warnings to be printed
  907 + on a single reboot.
  908 +
  909 + Say Y to allow multiple RCU-lockdep warnings per boot.
  910 +
  911 + Say N if you are unsure.
  912 +
  913 +config PROVE_RCU_DELAY
  914 + bool "RCU debugging: preemptible RCU race provocation"
  915 + depends on DEBUG_KERNEL && PREEMPT_RCU
  916 + default n
  917 + help
  918 + There is a class of races that involve an unlikely preemption
  919 + of __rcu_read_unlock() just after ->rcu_read_lock_nesting has
  920 + been set to INT_MIN. This feature inserts a delay at that
  921 + point to increase the probability of these races.
  922 +
  923 + Say Y to increase probability of preemption of __rcu_read_unlock().
  924 +
  925 + Say N if you are unsure.
  926 +
  927 +config SPARSE_RCU_POINTER
  928 + bool "RCU debugging: sparse-based checks for pointer usage"
  929 + default n
  930 + help
  931 + This feature enables the __rcu sparse annotation for
  932 + RCU-protected pointers. This annotation will cause sparse
  933 + to flag any non-RCU used of annotated pointers. This can be
  934 + helpful when debugging RCU usage. Please note that this feature
  935 + is not intended to enforce code cleanliness; it is instead merely
  936 + a debugging aid.
  937 +
  938 + Say Y to make sparse flag questionable use of RCU-protected pointers
  939 +
  940 + Say N if you are unsure.
  941 +
940 942 config RCU_TORTURE_TEST
941 943 tristate "torture tests for RCU"
942 944 depends on DEBUG_KERNEL
... ... @@ -970,7 +972,7 @@
970 972  
971 973 config RCU_CPU_STALL_TIMEOUT
972 974 int "RCU CPU stall timeout in seconds"
973   - depends on TREE_RCU || TREE_PREEMPT_RCU
  975 + depends on RCU_STALL_COMMON
974 976 range 3 300
975 977 default 21
976 978 help
... ... @@ -1015,6 +1017,8 @@
1015 1017  
1016 1018 Say Y here if you want to enable RCU tracing
1017 1019 Say N if you are unsure.
  1020 +
  1021 +endmenu # "RCU Debugging"
1018 1022  
1019 1023 config KPROBES_SANITY_TEST
1020 1024 bool "Kprobes sanity tests"