Commit 6d87669357936bffa1e8fea7a4e7743e76905736
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
Merge branches 'doc.2013.03.12a', 'fixes.2013.03.13a' and 'idlenocb.2013.03.26b' into HEAD
doc.2013.03.12a: Documentation changes. fixes.2013.03.13a: Miscellaneous fixes. idlenocb.2013.03.26b: Remove restrictions on no-CBs CPUs, make RCU_FAST_NO_HZ take advantage of numbered callbacks, add callback acceleration based on numbered callbacks.
Showing 11 changed files Side-by-side Diff
Documentation/RCU/stallwarn.txt
... | ... | @@ -92,14 +92,14 @@ |
92 | 92 | more information is printed with the stall-warning message, for example: |
93 | 93 | |
94 | 94 | INFO: rcu_preempt detected stall on CPU |
95 | - 0: (63959 ticks this GP) idle=241/3fffffffffffffff/0 | |
95 | + 0: (63959 ticks this GP) idle=241/3fffffffffffffff/0 softirq=82/543 | |
96 | 96 | (t=65000 jiffies) |
97 | 97 | |
98 | 98 | In kernels with CONFIG_RCU_FAST_NO_HZ, even more information is |
99 | 99 | printed: |
100 | 100 | |
101 | 101 | INFO: rcu_preempt detected stall on CPU |
102 | - 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 drain=0 . timer not pending | |
102 | + 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 nonlazy_posted: 25 .D | |
103 | 103 | (t=65000 jiffies) |
104 | 104 | |
105 | 105 | The "(64628 ticks this GP)" indicates that this CPU has taken more |
... | ... | @@ -116,13 +116,28 @@ |
116 | 116 | be a small positive number if in the idle loop and a very large positive |
117 | 117 | number (as shown above) otherwise. |
118 | 118 | |
119 | -For CONFIG_RCU_FAST_NO_HZ kernels, the "drain=0" indicates that the CPU is | |
120 | -not in the process of trying to force itself into dyntick-idle state, the | |
121 | -"." indicates that the CPU has not given up forcing RCU into dyntick-idle | |
122 | -mode (it would be "H" otherwise), and the "timer not pending" indicates | |
123 | -that the CPU has not recently forced RCU into dyntick-idle mode (it | |
124 | -would otherwise indicate the number of microseconds remaining in this | |
125 | -forced state). | |
119 | +The "softirq=" portion of the message tracks the number of RCU softirq | |
120 | +handlers that the stalled CPU has executed. The number before the "/" | |
121 | +is the number that had executed since boot at the time that this CPU | |
122 | +last noted the beginning of a grace period, which might be the current | |
123 | +(stalled) grace period, or it might be some earlier grace period (for | |
124 | +example, if the CPU might have been in dyntick-idle mode for an extended | |
125 | +time period. The number after the "/" is the number that have executed | |
126 | +since boot until the current time. If this latter number stays constant | |
127 | +across repeated stall-warning messages, it is possible that RCU's softirq | |
128 | +handlers are no longer able to execute on this CPU. This can happen if | |
129 | +the stalled CPU is spinning with interrupts are disabled, or, in -rt | |
130 | +kernels, if a high-priority process is starving RCU's softirq handler. | |
131 | + | |
132 | +For CONFIG_RCU_FAST_NO_HZ kernels, the "last_accelerate:" prints the | |
133 | +low-order 16 bits (in hex) of the jiffies counter when this CPU last | |
134 | +invoked rcu_try_advance_all_cbs() from rcu_needs_cpu() or last invoked | |
135 | +rcu_accelerate_cbs() from rcu_prepare_for_idle(). The "nonlazy_posted:" | |
136 | +prints the number of non-lazy callbacks posted since the last call to | |
137 | +rcu_needs_cpu(). Finally, an "L" indicates that there are currently | |
138 | +no non-lazy callbacks ("." is printed otherwise, as shown above) and | |
139 | +"D" indicates that dyntick-idle processing is enabled ("." is printed | |
140 | +otherwise, for example, if disabled via the "nohz=" kernel boot parameter). | |
126 | 141 | |
127 | 142 | |
128 | 143 | Multiple Warnings From One Stall |
Documentation/kernel-parameters.txt
... | ... | @@ -2461,9 +2461,12 @@ |
2461 | 2461 | In kernels built with CONFIG_RCU_NOCB_CPU=y, set |
2462 | 2462 | the specified list of CPUs to be no-callback CPUs. |
2463 | 2463 | Invocation of these CPUs' RCU callbacks will |
2464 | - be offloaded to "rcuoN" kthreads created for | |
2465 | - that purpose. This reduces OS jitter on the | |
2464 | + be offloaded to "rcuox/N" kthreads created for | |
2465 | + that purpose, where "x" is "b" for RCU-bh, "p" | |
2466 | + for RCU-preempt, and "s" for RCU-sched, and "N" | |
2467 | + is the CPU number. This reduces OS jitter on the | |
2466 | 2468 | offloaded CPUs, which can be useful for HPC and |
2469 | + | |
2467 | 2470 | real-time workloads. It can also improve energy |
2468 | 2471 | efficiency for asymmetric multiprocessors. |
2469 | 2472 | |
... | ... | @@ -2487,6 +2490,17 @@ |
2487 | 2490 | leaf rcu_node structure. Useful for very large |
2488 | 2491 | systems. |
2489 | 2492 | |
2493 | + rcutree.jiffies_till_first_fqs= [KNL,BOOT] | |
2494 | + Set delay from grace-period initialization to | |
2495 | + first attempt to force quiescent states. | |
2496 | + Units are jiffies, minimum value is zero, | |
2497 | + and maximum value is HZ. | |
2498 | + | |
2499 | + rcutree.jiffies_till_next_fqs= [KNL,BOOT] | |
2500 | + Set delay between subsequent attempts to force | |
2501 | + quiescent states. Units are jiffies, minimum | |
2502 | + value is one, and maximum value is HZ. | |
2503 | + | |
2490 | 2504 | rcutree.qhimark= [KNL,BOOT] |
2491 | 2505 | Set threshold of queued |
2492 | 2506 | RCU callbacks over which batch limiting is disabled. |
2493 | 2507 | |
... | ... | @@ -2501,16 +2515,15 @@ |
2501 | 2515 | rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] |
2502 | 2516 | Set timeout for RCU CPU stall warning messages. |
2503 | 2517 | |
2504 | - rcutree.jiffies_till_first_fqs= [KNL,BOOT] | |
2505 | - Set delay from grace-period initialization to | |
2506 | - first attempt to force quiescent states. | |
2507 | - Units are jiffies, minimum value is zero, | |
2508 | - and maximum value is HZ. | |
2518 | + rcutree.rcu_idle_gp_delay= [KNL,BOOT] | |
2519 | + Set wakeup interval for idle CPUs that have | |
2520 | + RCU callbacks (RCU_FAST_NO_HZ=y). | |
2509 | 2521 | |
2510 | - rcutree.jiffies_till_next_fqs= [KNL,BOOT] | |
2511 | - Set delay between subsequent attempts to force | |
2512 | - quiescent states. Units are jiffies, minimum | |
2513 | - value is one, and maximum value is HZ. | |
2522 | + rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT] | |
2523 | + Set wakeup interval for idle CPUs that have | |
2524 | + only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y). | |
2525 | + Lazy RCU callbacks are those which RCU can | |
2526 | + prove do nothing more than free memory. | |
2514 | 2527 | |
2515 | 2528 | rcutorture.fqs_duration= [KNL,BOOT] |
2516 | 2529 | Set duration of force_quiescent_state bursts. |
include/linux/list_bl.h
... | ... | @@ -125,6 +125,11 @@ |
125 | 125 | __bit_spin_unlock(0, (unsigned long *)b); |
126 | 126 | } |
127 | 127 | |
128 | +static inline bool hlist_bl_is_locked(struct hlist_bl_head *b) | |
129 | +{ | |
130 | + return bit_spin_is_locked(0, (unsigned long *)b); | |
131 | +} | |
132 | + | |
128 | 133 | /** |
129 | 134 | * hlist_bl_for_each_entry - iterate over list of given type |
130 | 135 | * @tpos: the type * to use as a loop cursor. |
include/linux/rculist_bl.h
... | ... | @@ -20,7 +20,7 @@ |
20 | 20 | static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h) |
21 | 21 | { |
22 | 22 | return (struct hlist_bl_node *) |
23 | - ((unsigned long)rcu_dereference(h->first) & ~LIST_BL_LOCKMASK); | |
23 | + ((unsigned long)rcu_dereference_check(h->first, hlist_bl_is_locked(h)) & ~LIST_BL_LOCKMASK); | |
24 | 24 | } |
25 | 25 | |
26 | 26 | /** |
include/linux/rcupdate.h
... | ... | @@ -80,6 +80,7 @@ |
80 | 80 | #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) |
81 | 81 | #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) |
82 | 82 | #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) |
83 | +#define ulong2long(a) (*(long *)(&(a))) | |
83 | 84 | |
84 | 85 | /* Exported common interfaces */ |
85 | 86 |
include/trace/events/rcu.h
... | ... | @@ -72,6 +72,58 @@ |
72 | 72 | ); |
73 | 73 | |
74 | 74 | /* |
75 | + * Tracepoint for future grace-period events, including those for no-callbacks | |
76 | + * CPUs. The caller should pull the data from the rcu_node structure, | |
77 | + * other than rcuname, which comes from the rcu_state structure, and event, | |
78 | + * which is one of the following: | |
79 | + * | |
80 | + * "Startleaf": Request a nocb grace period based on leaf-node data. | |
81 | + * "Startedleaf": Leaf-node start proved sufficient. | |
82 | + * "Startedleafroot": Leaf-node start proved sufficient after checking root. | |
83 | + * "Startedroot": Requested a nocb grace period based on root-node data. | |
84 | + * "StartWait": Start waiting for the requested grace period. | |
85 | + * "ResumeWait": Resume waiting after signal. | |
86 | + * "EndWait": Complete wait. | |
87 | + * "Cleanup": Clean up rcu_node structure after previous GP. | |
88 | + * "CleanupMore": Clean up, and another no-CB GP is needed. | |
89 | + */ | |
90 | +TRACE_EVENT(rcu_future_grace_period, | |
91 | + | |
92 | + TP_PROTO(char *rcuname, unsigned long gpnum, unsigned long completed, | |
93 | + unsigned long c, u8 level, int grplo, int grphi, | |
94 | + char *gpevent), | |
95 | + | |
96 | + TP_ARGS(rcuname, gpnum, completed, c, level, grplo, grphi, gpevent), | |
97 | + | |
98 | + TP_STRUCT__entry( | |
99 | + __field(char *, rcuname) | |
100 | + __field(unsigned long, gpnum) | |
101 | + __field(unsigned long, completed) | |
102 | + __field(unsigned long, c) | |
103 | + __field(u8, level) | |
104 | + __field(int, grplo) | |
105 | + __field(int, grphi) | |
106 | + __field(char *, gpevent) | |
107 | + ), | |
108 | + | |
109 | + TP_fast_assign( | |
110 | + __entry->rcuname = rcuname; | |
111 | + __entry->gpnum = gpnum; | |
112 | + __entry->completed = completed; | |
113 | + __entry->c = c; | |
114 | + __entry->level = level; | |
115 | + __entry->grplo = grplo; | |
116 | + __entry->grphi = grphi; | |
117 | + __entry->gpevent = gpevent; | |
118 | + ), | |
119 | + | |
120 | + TP_printk("%s %lu %lu %lu %u %d %d %s", | |
121 | + __entry->rcuname, __entry->gpnum, __entry->completed, | |
122 | + __entry->c, __entry->level, __entry->grplo, __entry->grphi, | |
123 | + __entry->gpevent) | |
124 | +); | |
125 | + | |
126 | +/* | |
75 | 127 | * Tracepoint for grace-period-initialization events. These are |
76 | 128 | * distinguished by the type of RCU, the new grace-period number, the |
77 | 129 | * rcu_node structure level, the starting and ending CPU covered by the |
... | ... | @@ -601,6 +653,9 @@ |
601 | 653 | #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) |
602 | 654 | #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \ |
603 | 655 | qsmask) do { } while (0) |
656 | +#define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \ | |
657 | + level, grplo, grphi, event) \ | |
658 | + do { } while (0) | |
604 | 659 | #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) |
605 | 660 | #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) |
606 | 661 | #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ |
init/Kconfig
... | ... | @@ -582,13 +582,16 @@ |
582 | 582 | depends on NO_HZ && SMP |
583 | 583 | default n |
584 | 584 | help |
585 | - This option causes RCU to attempt to accelerate grace periods in | |
586 | - order to allow CPUs to enter dynticks-idle state more quickly. | |
587 | - On the other hand, this option increases the overhead of the | |
588 | - dynticks-idle checking, thus degrading scheduling latency. | |
585 | + This option permits CPUs to enter dynticks-idle state even if | |
586 | + they have RCU callbacks queued, and prevents RCU from waking | |
587 | + these CPUs up more than roughly once every four jiffies (by | |
588 | + default, you can adjust this using the rcutree.rcu_idle_gp_delay | |
589 | + parameter), thus improving energy efficiency. On the other | |
590 | + hand, this option increases the duration of RCU grace periods, | |
591 | + for example, slowing down synchronize_rcu(). | |
589 | 592 | |
590 | - Say Y if energy efficiency is critically important, and you don't | |
591 | - care about real-time response. | |
593 | + Say Y if energy efficiency is critically important, and you | |
594 | + don't care about increased grace-period durations. | |
592 | 595 | |
593 | 596 | Say N if you are unsure. |
594 | 597 | |
... | ... | @@ -655,7 +658,7 @@ |
655 | 658 | Accept the default if unsure. |
656 | 659 | |
657 | 660 | config RCU_NOCB_CPU |
658 | - bool "Offload RCU callback processing from boot-selected CPUs" | |
661 | + bool "Offload RCU callback processing from boot-selected CPUs (EXPERIMENTAL" | |
659 | 662 | depends on TREE_RCU || TREE_PREEMPT_RCU |
660 | 663 | default n |
661 | 664 | help |
662 | 665 | |
663 | 666 | |
... | ... | @@ -666,15 +669,55 @@ |
666 | 669 | |
667 | 670 | This option offloads callback invocation from the set of |
668 | 671 | CPUs specified at boot time by the rcu_nocbs parameter. |
669 | - For each such CPU, a kthread ("rcuoN") will be created to | |
670 | - invoke callbacks, where the "N" is the CPU being offloaded. | |
671 | - Nothing prevents this kthread from running on the specified | |
672 | - CPUs, but (1) the kthreads may be preempted between each | |
673 | - callback, and (2) affinity or cgroups can be used to force | |
674 | - the kthreads to run on whatever set of CPUs is desired. | |
672 | + For each such CPU, a kthread ("rcuox/N") will be created to | |
673 | + invoke callbacks, where the "N" is the CPU being offloaded, | |
674 | + and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and | |
675 | + "s" for RCU-sched. Nothing prevents this kthread from running | |
676 | + on the specified CPUs, but (1) the kthreads may be preempted | |
677 | + between each callback, and (2) affinity or cgroups can be used | |
678 | + to force the kthreads to run on whatever set of CPUs is desired. | |
675 | 679 | |
676 | - Say Y here if you want reduced OS jitter on selected CPUs. | |
680 | + Say Y here if you want to help to debug reduced OS jitter. | |
677 | 681 | Say N here if you are unsure. |
682 | + | |
683 | +choice | |
684 | + prompt "Build-forced no-CBs CPUs" | |
685 | + default RCU_NOCB_CPU_NONE | |
686 | + help | |
687 | + This option allows no-CBs CPUs to be specified at build time. | |
688 | + Additional no-CBs CPUs may be specified by the rcu_nocbs= | |
689 | + boot parameter. | |
690 | + | |
691 | +config RCU_NOCB_CPU_NONE | |
692 | + bool "No build_forced no-CBs CPUs" | |
693 | + depends on RCU_NOCB_CPU | |
694 | + help | |
695 | + This option does not force any of the CPUs to be no-CBs CPUs. | |
696 | + Only CPUs designated by the rcu_nocbs= boot parameter will be | |
697 | + no-CBs CPUs. | |
698 | + | |
699 | +config RCU_NOCB_CPU_ZERO | |
700 | + bool "CPU 0 is a build_forced no-CBs CPU" | |
701 | + depends on RCU_NOCB_CPU | |
702 | + help | |
703 | + This option forces CPU 0 to be a no-CBs CPU. Additional CPUs | |
704 | + may be designated as no-CBs CPUs using the rcu_nocbs= boot | |
705 | + parameter will be no-CBs CPUs. | |
706 | + | |
707 | + Select this if CPU 0 needs to be a no-CBs CPU for real-time | |
708 | + or energy-efficiency reasons. | |
709 | + | |
710 | +config RCU_NOCB_CPU_ALL | |
711 | + bool "All CPUs are build_forced no-CBs CPUs" | |
712 | + depends on RCU_NOCB_CPU | |
713 | + help | |
714 | + This option forces all CPUs to be no-CBs CPUs. The rcu_nocbs= | |
715 | + boot parameter will be ignored. | |
716 | + | |
717 | + Select this if all CPUs need to be no-CBs CPUs for real-time | |
718 | + or energy-efficiency reasons. | |
719 | + | |
720 | +endchoice | |
678 | 721 | |
679 | 722 | endmenu # "RCU Subsystem" |
680 | 723 |
kernel/rcutree.c
... | ... | @@ -64,7 +64,7 @@ |
64 | 64 | static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; |
65 | 65 | static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; |
66 | 66 | |
67 | -#define RCU_STATE_INITIALIZER(sname, cr) { \ | |
67 | +#define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \ | |
68 | 68 | .level = { &sname##_state.node[0] }, \ |
69 | 69 | .call = cr, \ |
70 | 70 | .fqs_state = RCU_GP_IDLE, \ |
71 | 71 | |
72 | 72 | |
... | ... | @@ -76,13 +76,14 @@ |
76 | 76 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ |
77 | 77 | .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ |
78 | 78 | .name = #sname, \ |
79 | + .abbr = sabbr, \ | |
79 | 80 | } |
80 | 81 | |
81 | 82 | struct rcu_state rcu_sched_state = |
82 | - RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); | |
83 | + RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); | |
83 | 84 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); |
84 | 85 | |
85 | -struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); | |
86 | +struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); | |
86 | 87 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); |
87 | 88 | |
88 | 89 | static struct rcu_state *rcu_state; |
... | ... | @@ -223,6 +224,8 @@ |
223 | 224 | module_param(jiffies_till_first_fqs, ulong, 0644); |
224 | 225 | module_param(jiffies_till_next_fqs, ulong, 0644); |
225 | 226 | |
227 | +static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, | |
228 | + struct rcu_data *rdp); | |
226 | 229 | static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); |
227 | 230 | static void force_quiescent_state(struct rcu_state *rsp); |
228 | 231 | static int rcu_pending(int cpu); |
... | ... | @@ -310,6 +313,8 @@ |
310 | 313 | |
311 | 314 | if (rcu_gp_in_progress(rsp)) |
312 | 315 | return 0; /* No, a grace period is already in progress. */ |
316 | + if (rcu_nocb_needs_gp(rsp)) | |
317 | + return 1; /* Yes, a no-CBs CPU needs one. */ | |
313 | 318 | if (!rdp->nxttail[RCU_NEXT_TAIL]) |
314 | 319 | return 0; /* No, this is a no-CBs (or offline) CPU. */ |
315 | 320 | if (*rdp->nxttail[RCU_NEXT_READY_TAIL]) |
316 | 321 | |
... | ... | @@ -1035,10 +1040,11 @@ |
1035 | 1040 | { |
1036 | 1041 | int i; |
1037 | 1042 | |
1043 | + if (init_nocb_callback_list(rdp)) | |
1044 | + return; | |
1038 | 1045 | rdp->nxtlist = NULL; |
1039 | 1046 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
1040 | 1047 | rdp->nxttail[i] = &rdp->nxtlist; |
1041 | - init_nocb_callback_list(rdp); | |
1042 | 1048 | } |
1043 | 1049 | |
1044 | 1050 | /* |
... | ... | @@ -1071,6 +1077,120 @@ |
1071 | 1077 | } |
1072 | 1078 | |
1073 | 1079 | /* |
1080 | + * Trace-event helper function for rcu_start_future_gp() and | |
1081 | + * rcu_nocb_wait_gp(). | |
1082 | + */ | |
1083 | +static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, | |
1084 | + unsigned long c, char *s) | |
1085 | +{ | |
1086 | + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, | |
1087 | + rnp->completed, c, rnp->level, | |
1088 | + rnp->grplo, rnp->grphi, s); | |
1089 | +} | |
1090 | + | |
1091 | +/* | |
1092 | + * Start some future grace period, as needed to handle newly arrived | |
1093 | + * callbacks. The required future grace periods are recorded in each | |
1094 | + * rcu_node structure's ->need_future_gp field. | |
1095 | + * | |
1096 | + * The caller must hold the specified rcu_node structure's ->lock. | |
1097 | + */ | |
1098 | +static unsigned long __maybe_unused | |
1099 | +rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) | |
1100 | +{ | |
1101 | + unsigned long c; | |
1102 | + int i; | |
1103 | + struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); | |
1104 | + | |
1105 | + /* | |
1106 | + * Pick up grace-period number for new callbacks. If this | |
1107 | + * grace period is already marked as needed, return to the caller. | |
1108 | + */ | |
1109 | + c = rcu_cbs_completed(rdp->rsp, rnp); | |
1110 | + trace_rcu_future_gp(rnp, rdp, c, "Startleaf"); | |
1111 | + if (rnp->need_future_gp[c & 0x1]) { | |
1112 | + trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf"); | |
1113 | + return c; | |
1114 | + } | |
1115 | + | |
1116 | + /* | |
1117 | + * If either this rcu_node structure or the root rcu_node structure | |
1118 | + * believe that a grace period is in progress, then we must wait | |
1119 | + * for the one following, which is in "c". Because our request | |
1120 | + * will be noticed at the end of the current grace period, we don't | |
1121 | + * need to explicitly start one. | |
1122 | + */ | |
1123 | + if (rnp->gpnum != rnp->completed || | |
1124 | + ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) { | |
1125 | + rnp->need_future_gp[c & 0x1]++; | |
1126 | + trace_rcu_future_gp(rnp, rdp, c, "Startedleaf"); | |
1127 | + return c; | |
1128 | + } | |
1129 | + | |
1130 | + /* | |
1131 | + * There might be no grace period in progress. If we don't already | |
1132 | + * hold it, acquire the root rcu_node structure's lock in order to | |
1133 | + * start one (if needed). | |
1134 | + */ | |
1135 | + if (rnp != rnp_root) | |
1136 | + raw_spin_lock(&rnp_root->lock); | |
1137 | + | |
1138 | + /* | |
1139 | + * Get a new grace-period number. If there really is no grace | |
1140 | + * period in progress, it will be smaller than the one we obtained | |
1141 | + * earlier. Adjust callbacks as needed. Note that even no-CBs | |
1142 | + * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed. | |
1143 | + */ | |
1144 | + c = rcu_cbs_completed(rdp->rsp, rnp_root); | |
1145 | + for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++) | |
1146 | + if (ULONG_CMP_LT(c, rdp->nxtcompleted[i])) | |
1147 | + rdp->nxtcompleted[i] = c; | |
1148 | + | |
1149 | + /* | |
1150 | + * If the needed for the required grace period is already | |
1151 | + * recorded, trace and leave. | |
1152 | + */ | |
1153 | + if (rnp_root->need_future_gp[c & 0x1]) { | |
1154 | + trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot"); | |
1155 | + goto unlock_out; | |
1156 | + } | |
1157 | + | |
1158 | + /* Record the need for the future grace period. */ | |
1159 | + rnp_root->need_future_gp[c & 0x1]++; | |
1160 | + | |
1161 | + /* If a grace period is not already in progress, start one. */ | |
1162 | + if (rnp_root->gpnum != rnp_root->completed) { | |
1163 | + trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot"); | |
1164 | + } else { | |
1165 | + trace_rcu_future_gp(rnp, rdp, c, "Startedroot"); | |
1166 | + rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); | |
1167 | + } | |
1168 | +unlock_out: | |
1169 | + if (rnp != rnp_root) | |
1170 | + raw_spin_unlock(&rnp_root->lock); | |
1171 | + return c; | |
1172 | +} | |
1173 | + | |
1174 | +/* | |
1175 | + * Clean up any old requests for the just-ended grace period. Also return | |
1176 | + * whether any additional grace periods have been requested. Also invoke | |
1177 | + * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads | |
1178 | + * waiting for this grace period to complete. | |
1179 | + */ | |
1180 | +static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) | |
1181 | +{ | |
1182 | + int c = rnp->completed; | |
1183 | + int needmore; | |
1184 | + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | |
1185 | + | |
1186 | + rcu_nocb_gp_cleanup(rsp, rnp); | |
1187 | + rnp->need_future_gp[c & 0x1] = 0; | |
1188 | + needmore = rnp->need_future_gp[(c + 1) & 0x1]; | |
1189 | + trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup"); | |
1190 | + return needmore; | |
1191 | +} | |
1192 | + | |
1193 | +/* | |
1074 | 1194 | * If there is room, assign a ->completed number to any callbacks on |
1075 | 1195 | * this CPU that have not already been assigned. Also accelerate any |
1076 | 1196 | * callbacks that were previously assigned a ->completed number that has |
... | ... | @@ -1129,6 +1249,8 @@ |
1129 | 1249 | rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL]; |
1130 | 1250 | rdp->nxtcompleted[i] = c; |
1131 | 1251 | } |
1252 | + /* Record any needed additional grace periods. */ | |
1253 | + rcu_start_future_gp(rnp, rdp); | |
1132 | 1254 | |
1133 | 1255 | /* Trace depending on how much we were able to accelerate. */ |
1134 | 1256 | if (!*rdp->nxttail[RCU_WAIT_TAIL]) |
1135 | 1257 | |
... | ... | @@ -1308,9 +1430,9 @@ |
1308 | 1430 | rdp = this_cpu_ptr(rsp->rda); |
1309 | 1431 | rcu_preempt_check_blocked_tasks(rnp); |
1310 | 1432 | rnp->qsmask = rnp->qsmaskinit; |
1311 | - rnp->gpnum = rsp->gpnum; | |
1433 | + ACCESS_ONCE(rnp->gpnum) = rsp->gpnum; | |
1312 | 1434 | WARN_ON_ONCE(rnp->completed != rsp->completed); |
1313 | - rnp->completed = rsp->completed; | |
1435 | + ACCESS_ONCE(rnp->completed) = rsp->completed; | |
1314 | 1436 | if (rnp == rdp->mynode) |
1315 | 1437 | rcu_start_gp_per_cpu(rsp, rnp, rdp); |
1316 | 1438 | rcu_preempt_boost_start_gp(rnp); |
... | ... | @@ -1319,7 +1441,8 @@ |
1319 | 1441 | rnp->grphi, rnp->qsmask); |
1320 | 1442 | raw_spin_unlock_irq(&rnp->lock); |
1321 | 1443 | #ifdef CONFIG_PROVE_RCU_DELAY |
1322 | - if ((random32() % (rcu_num_nodes * 8)) == 0) | |
1444 | + if ((random32() % (rcu_num_nodes * 8)) == 0 && | |
1445 | + system_state == SYSTEM_RUNNING) | |
1323 | 1446 | schedule_timeout_uninterruptible(2); |
1324 | 1447 | #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ |
1325 | 1448 | cond_resched(); |
... | ... | @@ -1361,6 +1484,7 @@ |
1361 | 1484 | static void rcu_gp_cleanup(struct rcu_state *rsp) |
1362 | 1485 | { |
1363 | 1486 | unsigned long gp_duration; |
1487 | + int nocb = 0; | |
1364 | 1488 | struct rcu_data *rdp; |
1365 | 1489 | struct rcu_node *rnp = rcu_get_root(rsp); |
1366 | 1490 | |
1367 | 1491 | |
1368 | 1492 | |
... | ... | @@ -1390,17 +1514,23 @@ |
1390 | 1514 | */ |
1391 | 1515 | rcu_for_each_node_breadth_first(rsp, rnp) { |
1392 | 1516 | raw_spin_lock_irq(&rnp->lock); |
1393 | - rnp->completed = rsp->gpnum; | |
1517 | + ACCESS_ONCE(rnp->completed) = rsp->gpnum; | |
1518 | + rdp = this_cpu_ptr(rsp->rda); | |
1519 | + if (rnp == rdp->mynode) | |
1520 | + __rcu_process_gp_end(rsp, rnp, rdp); | |
1521 | + nocb += rcu_future_gp_cleanup(rsp, rnp); | |
1394 | 1522 | raw_spin_unlock_irq(&rnp->lock); |
1395 | 1523 | cond_resched(); |
1396 | 1524 | } |
1397 | 1525 | rnp = rcu_get_root(rsp); |
1398 | 1526 | raw_spin_lock_irq(&rnp->lock); |
1527 | + rcu_nocb_gp_set(rnp, nocb); | |
1399 | 1528 | |
1400 | 1529 | rsp->completed = rsp->gpnum; /* Declare grace period done. */ |
1401 | 1530 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); |
1402 | 1531 | rsp->fqs_state = RCU_GP_IDLE; |
1403 | 1532 | rdp = this_cpu_ptr(rsp->rda); |
1533 | + rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */ | |
1404 | 1534 | if (cpu_needs_another_gp(rsp, rdp)) |
1405 | 1535 | rsp->gp_flags = 1; |
1406 | 1536 | raw_spin_unlock_irq(&rnp->lock); |
1407 | 1537 | |
1408 | 1538 | |
1409 | 1539 | |
1410 | 1540 | |
1411 | 1541 | |
1412 | 1542 | |
1413 | 1543 | |
1414 | 1544 | |
... | ... | @@ -1476,57 +1606,62 @@ |
1476 | 1606 | /* |
1477 | 1607 | * Start a new RCU grace period if warranted, re-initializing the hierarchy |
1478 | 1608 | * in preparation for detecting the next grace period. The caller must hold |
1479 | - * the root node's ->lock, which is released before return. Hard irqs must | |
1480 | - * be disabled. | |
1609 | + * the root node's ->lock and hard irqs must be disabled. | |
1481 | 1610 | * |
1482 | 1611 | * Note that it is legal for a dying CPU (which is marked as offline) to |
1483 | 1612 | * invoke this function. This can happen when the dying CPU reports its |
1484 | 1613 | * quiescent state. |
1485 | 1614 | */ |
1486 | 1615 | static void |
1487 | -rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |
1488 | - __releases(rcu_get_root(rsp)->lock) | |
1616 | +rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, | |
1617 | + struct rcu_data *rdp) | |
1489 | 1618 | { |
1490 | - struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | |
1491 | - struct rcu_node *rnp = rcu_get_root(rsp); | |
1492 | - | |
1493 | - if (!rsp->gp_kthread || | |
1494 | - !cpu_needs_another_gp(rsp, rdp)) { | |
1619 | + if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) { | |
1495 | 1620 | /* |
1496 | 1621 | * Either we have not yet spawned the grace-period |
1497 | 1622 | * task, this CPU does not need another grace period, |
1498 | 1623 | * or a grace period is already in progress. |
1499 | 1624 | * Either way, don't start a new grace period. |
1500 | 1625 | */ |
1501 | - raw_spin_unlock_irqrestore(&rnp->lock, flags); | |
1502 | 1626 | return; |
1503 | 1627 | } |
1504 | - | |
1505 | - /* | |
1506 | - * Because there is no grace period in progress right now, | |
1507 | - * any callbacks we have up to this point will be satisfied | |
1508 | - * by the next grace period. So this is a good place to | |
1509 | - * assign a grace period number to recently posted callbacks. | |
1510 | - */ | |
1511 | - rcu_accelerate_cbs(rsp, rnp, rdp); | |
1512 | - | |
1513 | 1628 | rsp->gp_flags = RCU_GP_FLAG_INIT; |
1514 | - raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ | |
1515 | 1629 | |
1516 | - /* Ensure that CPU is aware of completion of last grace period. */ | |
1517 | - rcu_process_gp_end(rsp, rdp); | |
1518 | - local_irq_restore(flags); | |
1519 | - | |
1520 | 1630 | /* Wake up rcu_gp_kthread() to start the grace period. */ |
1521 | 1631 | wake_up(&rsp->gp_wq); |
1522 | 1632 | } |
1523 | 1633 | |
1524 | 1634 | /* |
1635 | + * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's | |
1636 | + * callbacks. Note that rcu_start_gp_advanced() cannot do this because it | |
1637 | + * is invoked indirectly from rcu_advance_cbs(), which would result in | |
1638 | + * endless recursion -- or would do so if it wasn't for the self-deadlock | |
1639 | + * that is encountered beforehand. | |
1640 | + */ | |
1641 | +static void | |
1642 | +rcu_start_gp(struct rcu_state *rsp) | |
1643 | +{ | |
1644 | + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | |
1645 | + struct rcu_node *rnp = rcu_get_root(rsp); | |
1646 | + | |
1647 | + /* | |
1648 | + * If there is no grace period in progress right now, any | |
1649 | + * callbacks we have up to this point will be satisfied by the | |
1650 | + * next grace period. Also, advancing the callbacks reduces the | |
1651 | + * probability of false positives from cpu_needs_another_gp() | |
1652 | + * resulting in pointless grace periods. So, advance callbacks | |
1653 | + * then start the grace period! | |
1654 | + */ | |
1655 | + rcu_advance_cbs(rsp, rnp, rdp); | |
1656 | + rcu_start_gp_advanced(rsp, rnp, rdp); | |
1657 | +} | |
1658 | + | |
1659 | +/* | |
1525 | 1660 | * Report a full set of quiescent states to the specified rcu_state |
1526 | 1661 | * data structure. This involves cleaning up after the prior grace |
1527 | 1662 | * period and letting rcu_start_gp() start up the next grace period |
1528 | - * if one is needed. Note that the caller must hold rnp->lock, as | |
1529 | - * required by rcu_start_gp(), which will release it. | |
1663 | + * if one is needed. Note that the caller must hold rnp->lock, which | |
1664 | + * is released before return. | |
1530 | 1665 | */ |
1531 | 1666 | static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) |
1532 | 1667 | __releases(rcu_get_root(rsp)->lock) |
... | ... | @@ -2124,7 +2259,8 @@ |
2124 | 2259 | local_irq_save(flags); |
2125 | 2260 | if (cpu_needs_another_gp(rsp, rdp)) { |
2126 | 2261 | raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */ |
2127 | - rcu_start_gp(rsp, flags); /* releases above lock */ | |
2262 | + rcu_start_gp(rsp); | |
2263 | + raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); | |
2128 | 2264 | } else { |
2129 | 2265 | local_irq_restore(flags); |
2130 | 2266 | } |
... | ... | @@ -2169,7 +2305,8 @@ |
2169 | 2305 | |
2170 | 2306 | static void invoke_rcu_core(void) |
2171 | 2307 | { |
2172 | - raise_softirq(RCU_SOFTIRQ); | |
2308 | + if (cpu_online(smp_processor_id())) | |
2309 | + raise_softirq(RCU_SOFTIRQ); | |
2173 | 2310 | } |
2174 | 2311 | |
2175 | 2312 | /* |
2176 | 2313 | |
... | ... | @@ -2204,11 +2341,11 @@ |
2204 | 2341 | |
2205 | 2342 | /* Start a new grace period if one not already started. */ |
2206 | 2343 | if (!rcu_gp_in_progress(rsp)) { |
2207 | - unsigned long nestflag; | |
2208 | 2344 | struct rcu_node *rnp_root = rcu_get_root(rsp); |
2209 | 2345 | |
2210 | - raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | |
2211 | - rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ | |
2346 | + raw_spin_lock(&rnp_root->lock); | |
2347 | + rcu_start_gp(rsp); | |
2348 | + raw_spin_unlock(&rnp_root->lock); | |
2212 | 2349 | } else { |
2213 | 2350 | /* Give the grace period a kick. */ |
2214 | 2351 | rdp->blimit = LONG_MAX; |
2215 | 2352 | |
2216 | 2353 | |
2217 | 2354 | |
... | ... | @@ -2628,19 +2765,27 @@ |
2628 | 2765 | } |
2629 | 2766 | |
2630 | 2767 | /* |
2631 | - * Check to see if any future RCU-related work will need to be done | |
2632 | - * by the current CPU, even if none need be done immediately, returning | |
2633 | - * 1 if so. | |
2768 | + * Return true if the specified CPU has any callback. If all_lazy is | |
2769 | + * non-NULL, store an indication of whether all callbacks are lazy. | |
2770 | + * (If there are no callbacks, all of them are deemed to be lazy.) | |
2634 | 2771 | */ |
2635 | -static int rcu_cpu_has_callbacks(int cpu) | |
2772 | +static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy) | |
2636 | 2773 | { |
2774 | + bool al = true; | |
2775 | + bool hc = false; | |
2776 | + struct rcu_data *rdp; | |
2637 | 2777 | struct rcu_state *rsp; |
2638 | 2778 | |
2639 | - /* RCU callbacks either ready or pending? */ | |
2640 | - for_each_rcu_flavor(rsp) | |
2641 | - if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) | |
2642 | - return 1; | |
2643 | - return 0; | |
2779 | + for_each_rcu_flavor(rsp) { | |
2780 | + rdp = per_cpu_ptr(rsp->rda, cpu); | |
2781 | + if (rdp->qlen != rdp->qlen_lazy) | |
2782 | + al = false; | |
2783 | + if (rdp->nxtlist) | |
2784 | + hc = true; | |
2785 | + } | |
2786 | + if (all_lazy) | |
2787 | + *all_lazy = al; | |
2788 | + return hc; | |
2644 | 2789 | } |
2645 | 2790 | |
2646 | 2791 | /* |
... | ... | @@ -2859,7 +3004,6 @@ |
2859 | 3004 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; |
2860 | 3005 | atomic_set(&rdp->dynticks->dynticks, |
2861 | 3006 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); |
2862 | - rcu_prepare_for_idle_init(cpu); | |
2863 | 3007 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
2864 | 3008 | |
2865 | 3009 | /* Add CPU to rcu_node bitmasks. */ |
... | ... | @@ -2909,7 +3053,6 @@ |
2909 | 3053 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); |
2910 | 3054 | struct rcu_node *rnp = rdp->mynode; |
2911 | 3055 | struct rcu_state *rsp; |
2912 | - int ret = NOTIFY_OK; | |
2913 | 3056 | |
2914 | 3057 | trace_rcu_utilization("Start CPU hotplug"); |
2915 | 3058 | switch (action) { |
2916 | 3059 | |
2917 | 3060 | |
... | ... | @@ -2923,21 +3066,12 @@ |
2923 | 3066 | rcu_boost_kthread_setaffinity(rnp, -1); |
2924 | 3067 | break; |
2925 | 3068 | case CPU_DOWN_PREPARE: |
2926 | - if (nocb_cpu_expendable(cpu)) | |
2927 | - rcu_boost_kthread_setaffinity(rnp, cpu); | |
2928 | - else | |
2929 | - ret = NOTIFY_BAD; | |
3069 | + rcu_boost_kthread_setaffinity(rnp, cpu); | |
2930 | 3070 | break; |
2931 | 3071 | case CPU_DYING: |
2932 | 3072 | case CPU_DYING_FROZEN: |
2933 | - /* | |
2934 | - * The whole machine is "stopped" except this CPU, so we can | |
2935 | - * touch any data without introducing corruption. We send the | |
2936 | - * dying CPU's callbacks to an arbitrarily chosen online CPU. | |
2937 | - */ | |
2938 | 3073 | for_each_rcu_flavor(rsp) |
2939 | 3074 | rcu_cleanup_dying_cpu(rsp); |
2940 | - rcu_cleanup_after_idle(cpu); | |
2941 | 3075 | break; |
2942 | 3076 | case CPU_DEAD: |
2943 | 3077 | case CPU_DEAD_FROZEN: |
... | ... | @@ -2950,7 +3084,7 @@ |
2950 | 3084 | break; |
2951 | 3085 | } |
2952 | 3086 | trace_rcu_utilization("End CPU hotplug"); |
2953 | - return ret; | |
3087 | + return NOTIFY_OK; | |
2954 | 3088 | } |
2955 | 3089 | |
2956 | 3090 | /* |
... | ... | @@ -3085,6 +3219,7 @@ |
3085 | 3219 | } |
3086 | 3220 | rnp->level = i; |
3087 | 3221 | INIT_LIST_HEAD(&rnp->blkd_tasks); |
3222 | + rcu_init_one_nocb(rnp); | |
3088 | 3223 | } |
3089 | 3224 | } |
3090 | 3225 | |
... | ... | @@ -3170,8 +3305,7 @@ |
3170 | 3305 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); |
3171 | 3306 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); |
3172 | 3307 | __rcu_init_preempt(); |
3173 | - rcu_init_nocb(); | |
3174 | - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | |
3308 | + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | |
3175 | 3309 | |
3176 | 3310 | /* |
3177 | 3311 | * We don't need protection against CPU-hotplug here because |
kernel/rcutree.h
... | ... | @@ -88,18 +88,13 @@ |
88 | 88 | int dynticks_nmi_nesting; /* Track NMI nesting level. */ |
89 | 89 | atomic_t dynticks; /* Even value for idle, else odd. */ |
90 | 90 | #ifdef CONFIG_RCU_FAST_NO_HZ |
91 | - int dyntick_drain; /* Prepare-for-idle state variable. */ | |
92 | - unsigned long dyntick_holdoff; | |
93 | - /* No retries for the jiffy of failure. */ | |
94 | - struct timer_list idle_gp_timer; | |
95 | - /* Wake up CPU sleeping with callbacks. */ | |
96 | - unsigned long idle_gp_timer_expires; | |
97 | - /* When to wake up CPU (for repost). */ | |
98 | - bool idle_first_pass; /* First pass of attempt to go idle? */ | |
91 | + bool all_lazy; /* Are all CPU's CBs lazy? */ | |
99 | 92 | unsigned long nonlazy_posted; |
100 | 93 | /* # times non-lazy CBs posted to CPU. */ |
101 | 94 | unsigned long nonlazy_posted_snap; |
102 | 95 | /* idle-period nonlazy_posted snapshot. */ |
96 | + unsigned long last_accelerate; | |
97 | + /* Last jiffy CBs were accelerated. */ | |
103 | 98 | int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ |
104 | 99 | #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ |
105 | 100 | }; |
... | ... | @@ -134,9 +129,6 @@ |
134 | 129 | /* elements that need to drain to allow the */ |
135 | 130 | /* current expedited grace period to */ |
136 | 131 | /* complete (only for TREE_PREEMPT_RCU). */ |
137 | - atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */ | |
138 | - /* Since this has meaning only for leaf */ | |
139 | - /* rcu_node structures, 32 bits suffices. */ | |
140 | 132 | unsigned long qsmaskinit; |
141 | 133 | /* Per-GP initial value for qsmask & expmask. */ |
142 | 134 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ |
... | ... | @@ -196,6 +188,12 @@ |
196 | 188 | /* Refused to boost: not sure why, though. */ |
197 | 189 | /* This can happen due to race conditions. */ |
198 | 190 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
191 | +#ifdef CONFIG_RCU_NOCB_CPU | |
192 | + wait_queue_head_t nocb_gp_wq[2]; | |
193 | + /* Place for rcu_nocb_kthread() to wait GP. */ | |
194 | +#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ | |
195 | + int need_future_gp[2]; | |
196 | + /* Counts of upcoming no-CB GP requests. */ | |
199 | 197 | raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; |
200 | 198 | } ____cacheline_internodealigned_in_smp; |
201 | 199 | |
... | ... | @@ -328,6 +326,11 @@ |
328 | 326 | struct task_struct *nocb_kthread; |
329 | 327 | #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ |
330 | 328 | |
329 | + /* 8) RCU CPU stall data. */ | |
330 | +#ifdef CONFIG_RCU_CPU_STALL_INFO | |
331 | + unsigned int softirq_snap; /* Snapshot of softirq activity. */ | |
332 | +#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ | |
333 | + | |
331 | 334 | int cpu; |
332 | 335 | struct rcu_state *rsp; |
333 | 336 | }; |
... | ... | @@ -375,12 +378,6 @@ |
375 | 378 | struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ |
376 | 379 | void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ |
377 | 380 | void (*func)(struct rcu_head *head)); |
378 | -#ifdef CONFIG_RCU_NOCB_CPU | |
379 | - void (*call_remote)(struct rcu_head *head, | |
380 | - void (*func)(struct rcu_head *head)); | |
381 | - /* call_rcu() flavor, but for */ | |
382 | - /* placing on remote CPU. */ | |
383 | -#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ | |
384 | 381 | |
385 | 382 | /* The following fields are guarded by the root rcu_node's lock. */ |
386 | 383 | |
... | ... | @@ -443,6 +440,7 @@ |
443 | 440 | unsigned long gp_max; /* Maximum GP duration in */ |
444 | 441 | /* jiffies. */ |
445 | 442 | char *name; /* Name of structure. */ |
443 | + char abbr; /* Abbreviated name. */ | |
446 | 444 | struct list_head flavors; /* List of RCU flavors. */ |
447 | 445 | }; |
448 | 446 | |
... | ... | @@ -520,7 +518,6 @@ |
520 | 518 | struct rcu_node *rnp); |
521 | 519 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
522 | 520 | static void __cpuinit rcu_prepare_kthreads(int cpu); |
523 | -static void rcu_prepare_for_idle_init(int cpu); | |
524 | 521 | static void rcu_cleanup_after_idle(int cpu); |
525 | 522 | static void rcu_prepare_for_idle(int cpu); |
526 | 523 | static void rcu_idle_count_callbacks_posted(void); |
527 | 524 | |
528 | 525 | |
... | ... | @@ -529,16 +526,18 @@ |
529 | 526 | static void print_cpu_stall_info_end(void); |
530 | 527 | static void zero_cpu_stall_ticks(struct rcu_data *rdp); |
531 | 528 | static void increment_cpu_stall_ticks(void); |
529 | +static int rcu_nocb_needs_gp(struct rcu_state *rsp); | |
530 | +static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); | |
531 | +static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); | |
532 | +static void rcu_init_one_nocb(struct rcu_node *rnp); | |
532 | 533 | static bool is_nocb_cpu(int cpu); |
533 | 534 | static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, |
534 | 535 | bool lazy); |
535 | 536 | static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, |
536 | 537 | struct rcu_data *rdp); |
537 | -static bool nocb_cpu_expendable(int cpu); | |
538 | 538 | static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); |
539 | 539 | static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); |
540 | -static void init_nocb_callback_list(struct rcu_data *rdp); | |
541 | -static void __init rcu_init_nocb(void); | |
540 | +static bool init_nocb_callback_list(struct rcu_data *rdp); | |
542 | 541 | |
543 | 542 | #endif /* #ifndef RCU_TREE_NONCORE */ |
544 | 543 |
kernel/rcutree_plugin.h
... | ... | @@ -85,11 +85,21 @@ |
85 | 85 | if (nr_cpu_ids != NR_CPUS) |
86 | 86 | printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); |
87 | 87 | #ifdef CONFIG_RCU_NOCB_CPU |
88 | +#ifndef CONFIG_RCU_NOCB_CPU_NONE | |
89 | + if (!have_rcu_nocb_mask) { | |
90 | + alloc_bootmem_cpumask_var(&rcu_nocb_mask); | |
91 | + have_rcu_nocb_mask = true; | |
92 | + } | |
93 | +#ifdef CONFIG_RCU_NOCB_CPU_ZERO | |
94 | + pr_info("\tExperimental no-CBs CPU 0\n"); | |
95 | + cpumask_set_cpu(0, rcu_nocb_mask); | |
96 | +#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */ | |
97 | +#ifdef CONFIG_RCU_NOCB_CPU_ALL | |
98 | + pr_info("\tExperimental no-CBs for all CPUs\n"); | |
99 | + cpumask_setall(rcu_nocb_mask); | |
100 | +#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */ | |
101 | +#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */ | |
88 | 102 | if (have_rcu_nocb_mask) { |
89 | - if (cpumask_test_cpu(0, rcu_nocb_mask)) { | |
90 | - cpumask_clear_cpu(0, rcu_nocb_mask); | |
91 | - pr_info("\tCPU 0: illegal no-CBs CPU (cleared).\n"); | |
92 | - } | |
93 | 103 | cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); |
94 | 104 | pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf); |
95 | 105 | if (rcu_nocb_poll) |
... | ... | @@ -101,7 +111,7 @@ |
101 | 111 | #ifdef CONFIG_TREE_PREEMPT_RCU |
102 | 112 | |
103 | 113 | struct rcu_state rcu_preempt_state = |
104 | - RCU_STATE_INITIALIZER(rcu_preempt, call_rcu); | |
114 | + RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); | |
105 | 115 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); |
106 | 116 | static struct rcu_state *rcu_state = &rcu_preempt_state; |
107 | 117 | |
108 | 118 | |
... | ... | @@ -1533,17 +1543,10 @@ |
1533 | 1543 | int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) |
1534 | 1544 | { |
1535 | 1545 | *delta_jiffies = ULONG_MAX; |
1536 | - return rcu_cpu_has_callbacks(cpu); | |
1546 | + return rcu_cpu_has_callbacks(cpu, NULL); | |
1537 | 1547 | } |
1538 | 1548 | |
1539 | 1549 | /* |
1540 | - * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it. | |
1541 | - */ | |
1542 | -static void rcu_prepare_for_idle_init(int cpu) | |
1543 | -{ | |
1544 | -} | |
1545 | - | |
1546 | -/* | |
1547 | 1550 | * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up |
1548 | 1551 | * after it. |
1549 | 1552 | */ |
... | ... | @@ -1577,16 +1580,6 @@ |
1577 | 1580 | * |
1578 | 1581 | * The following three proprocessor symbols control this state machine: |
1579 | 1582 | * |
1580 | - * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt | |
1581 | - * to satisfy RCU. Beyond this point, it is better to incur a periodic | |
1582 | - * scheduling-clock interrupt than to loop through the state machine | |
1583 | - * at full power. | |
1584 | - * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are | |
1585 | - * optional if RCU does not need anything immediately from this | |
1586 | - * CPU, even if this CPU still has RCU callbacks queued. The first | |
1587 | - * times through the state machine are mandatory: we need to give | |
1588 | - * the state machine a chance to communicate a quiescent state | |
1589 | - * to the RCU core. | |
1590 | 1583 | * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted |
1591 | 1584 | * to sleep in dyntick-idle mode with RCU callbacks pending. This |
1592 | 1585 | * is sized to be roughly one RCU grace period. Those energy-efficiency |
1593 | 1586 | |
1594 | 1587 | |
1595 | 1588 | |
1596 | 1589 | |
1597 | 1590 | |
1598 | 1591 | |
1599 | 1592 | |
1600 | 1593 | |
1601 | 1594 | |
1602 | 1595 | |
1603 | 1596 | |
1604 | 1597 | |
1605 | 1598 | |
1606 | 1599 | |
1607 | 1600 | |
1608 | 1601 | |
1609 | 1602 | |
1610 | 1603 | |
1611 | 1604 | |
1612 | 1605 | |
1613 | 1606 | |
... | ... | @@ -1602,186 +1595,108 @@ |
1602 | 1595 | * adjustment, they can be converted into kernel config parameters, though |
1603 | 1596 | * making the state machine smarter might be a better option. |
1604 | 1597 | */ |
1605 | -#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ | |
1606 | -#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ | |
1607 | 1598 | #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ |
1608 | 1599 | #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ |
1609 | 1600 | |
1601 | +static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; | |
1602 | +module_param(rcu_idle_gp_delay, int, 0644); | |
1603 | +static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; | |
1604 | +module_param(rcu_idle_lazy_gp_delay, int, 0644); | |
1605 | + | |
1610 | 1606 | extern int tick_nohz_enabled; |
1611 | 1607 | |
1612 | 1608 | /* |
1613 | - * Does the specified flavor of RCU have non-lazy callbacks pending on | |
1614 | - * the specified CPU? Both RCU flavor and CPU are specified by the | |
1615 | - * rcu_data structure. | |
1609 | + * Try to advance callbacks for all flavors of RCU on the current CPU. | |
1610 | + * Afterwards, if there are any callbacks ready for immediate invocation, | |
1611 | + * return true. | |
1616 | 1612 | */ |
1617 | -static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp) | |
1613 | +static bool rcu_try_advance_all_cbs(void) | |
1618 | 1614 | { |
1619 | - return rdp->qlen != rdp->qlen_lazy; | |
1620 | -} | |
1615 | + bool cbs_ready = false; | |
1616 | + struct rcu_data *rdp; | |
1617 | + struct rcu_node *rnp; | |
1618 | + struct rcu_state *rsp; | |
1621 | 1619 | |
1622 | -#ifdef CONFIG_TREE_PREEMPT_RCU | |
1620 | + for_each_rcu_flavor(rsp) { | |
1621 | + rdp = this_cpu_ptr(rsp->rda); | |
1622 | + rnp = rdp->mynode; | |
1623 | 1623 | |
1624 | -/* | |
1625 | - * Are there non-lazy RCU-preempt callbacks? (There cannot be if there | |
1626 | - * is no RCU-preempt in the kernel.) | |
1627 | - */ | |
1628 | -static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) | |
1629 | -{ | |
1630 | - struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); | |
1624 | + /* | |
1625 | + * Don't bother checking unless a grace period has | |
1626 | + * completed since we last checked and there are | |
1627 | + * callbacks not yet ready to invoke. | |
1628 | + */ | |
1629 | + if (rdp->completed != rnp->completed && | |
1630 | + rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) | |
1631 | + rcu_process_gp_end(rsp, rdp); | |
1631 | 1632 | |
1632 | - return __rcu_cpu_has_nonlazy_callbacks(rdp); | |
1633 | + if (cpu_has_callbacks_ready_to_invoke(rdp)) | |
1634 | + cbs_ready = true; | |
1635 | + } | |
1636 | + return cbs_ready; | |
1633 | 1637 | } |
1634 | 1638 | |
1635 | -#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | |
1636 | - | |
1637 | -static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) | |
1638 | -{ | |
1639 | - return 0; | |
1640 | -} | |
1641 | - | |
1642 | -#endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */ | |
1643 | - | |
1644 | 1639 | /* |
1645 | - * Does any flavor of RCU have non-lazy callbacks on the specified CPU? | |
1646 | - */ | |
1647 | -static bool rcu_cpu_has_nonlazy_callbacks(int cpu) | |
1648 | -{ | |
1649 | - return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) || | |
1650 | - __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) || | |
1651 | - rcu_preempt_cpu_has_nonlazy_callbacks(cpu); | |
1652 | -} | |
1653 | - | |
1654 | -/* | |
1655 | - * Allow the CPU to enter dyntick-idle mode if either: (1) There are no | |
1656 | - * callbacks on this CPU, (2) this CPU has not yet attempted to enter | |
1657 | - * dyntick-idle mode, or (3) this CPU is in the process of attempting to | |
1658 | - * enter dyntick-idle mode. Otherwise, if we have recently tried and failed | |
1659 | - * to enter dyntick-idle mode, we refuse to try to enter it. After all, | |
1660 | - * it is better to incur scheduling-clock interrupts than to spin | |
1661 | - * continuously for the same time duration! | |
1640 | + * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready | |
1641 | + * to invoke. If the CPU has callbacks, try to advance them. Tell the | |
1642 | + * caller to set the timeout based on whether or not there are non-lazy | |
1643 | + * callbacks. | |
1662 | 1644 | * |
1663 | - * The delta_jiffies argument is used to store the time when RCU is | |
1664 | - * going to need the CPU again if it still has callbacks. The reason | |
1665 | - * for this is that rcu_prepare_for_idle() might need to post a timer, | |
1666 | - * but if so, it will do so after tick_nohz_stop_sched_tick() has set | |
1667 | - * the wakeup time for this CPU. This means that RCU's timer can be | |
1668 | - * delayed until the wakeup time, which defeats the purpose of posting | |
1669 | - * a timer. | |
1645 | + * The caller must have disabled interrupts. | |
1670 | 1646 | */ |
1671 | -int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) | |
1647 | +int rcu_needs_cpu(int cpu, unsigned long *dj) | |
1672 | 1648 | { |
1673 | 1649 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); |
1674 | 1650 | |
1675 | - /* Flag a new idle sojourn to the idle-entry state machine. */ | |
1676 | - rdtp->idle_first_pass = 1; | |
1651 | + /* Snapshot to detect later posting of non-lazy callback. */ | |
1652 | + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; | |
1653 | + | |
1677 | 1654 | /* If no callbacks, RCU doesn't need the CPU. */ |
1678 | - if (!rcu_cpu_has_callbacks(cpu)) { | |
1679 | - *delta_jiffies = ULONG_MAX; | |
1655 | + if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) { | |
1656 | + *dj = ULONG_MAX; | |
1680 | 1657 | return 0; |
1681 | 1658 | } |
1682 | - if (rdtp->dyntick_holdoff == jiffies) { | |
1683 | - /* RCU recently tried and failed, so don't try again. */ | |
1684 | - *delta_jiffies = 1; | |
1659 | + | |
1660 | + /* Attempt to advance callbacks. */ | |
1661 | + if (rcu_try_advance_all_cbs()) { | |
1662 | + /* Some ready to invoke, so initiate later invocation. */ | |
1663 | + invoke_rcu_core(); | |
1685 | 1664 | return 1; |
1686 | 1665 | } |
1687 | - /* Set up for the possibility that RCU will post a timer. */ | |
1688 | - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { | |
1689 | - *delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies, | |
1690 | - RCU_IDLE_GP_DELAY) - jiffies; | |
1666 | + rdtp->last_accelerate = jiffies; | |
1667 | + | |
1668 | + /* Request timer delay depending on laziness, and round. */ | |
1669 | + if (rdtp->all_lazy) { | |
1670 | + *dj = round_up(rcu_idle_gp_delay + jiffies, | |
1671 | + rcu_idle_gp_delay) - jiffies; | |
1691 | 1672 | } else { |
1692 | - *delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY; | |
1693 | - *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; | |
1673 | + *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; | |
1694 | 1674 | } |
1695 | 1675 | return 0; |
1696 | 1676 | } |
1697 | 1677 | |
1698 | 1678 | /* |
1699 | - * Handler for smp_call_function_single(). The only point of this | |
1700 | - * handler is to wake the CPU up, so the handler does only tracing. | |
1701 | - */ | |
1702 | -void rcu_idle_demigrate(void *unused) | |
1703 | -{ | |
1704 | - trace_rcu_prep_idle("Demigrate"); | |
1705 | -} | |
1706 | - | |
1707 | -/* | |
1708 | - * Timer handler used to force CPU to start pushing its remaining RCU | |
1709 | - * callbacks in the case where it entered dyntick-idle mode with callbacks | |
1710 | - * pending. The hander doesn't really need to do anything because the | |
1711 | - * real work is done upon re-entry to idle, or by the next scheduling-clock | |
1712 | - * interrupt should idle not be re-entered. | |
1679 | + * Prepare a CPU for idle from an RCU perspective. The first major task | |
1680 | + * is to sense whether nohz mode has been enabled or disabled via sysfs. | |
1681 | + * The second major task is to check to see if a non-lazy callback has | |
1682 | + * arrived at a CPU that previously had only lazy callbacks. The third | |
1683 | + * major task is to accelerate (that is, assign grace-period numbers to) | |
1684 | + * any recently arrived callbacks. | |
1713 | 1685 | * |
1714 | - * One special case: the timer gets migrated without awakening the CPU | |
1715 | - * on which the timer was scheduled on. In this case, we must wake up | |
1716 | - * that CPU. We do so with smp_call_function_single(). | |
1717 | - */ | |
1718 | -static void rcu_idle_gp_timer_func(unsigned long cpu_in) | |
1719 | -{ | |
1720 | - int cpu = (int)cpu_in; | |
1721 | - | |
1722 | - trace_rcu_prep_idle("Timer"); | |
1723 | - if (cpu != smp_processor_id()) | |
1724 | - smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0); | |
1725 | - else | |
1726 | - WARN_ON_ONCE(1); /* Getting here can hang the system... */ | |
1727 | -} | |
1728 | - | |
1729 | -/* | |
1730 | - * Initialize the timer used to pull CPUs out of dyntick-idle mode. | |
1731 | - */ | |
1732 | -static void rcu_prepare_for_idle_init(int cpu) | |
1733 | -{ | |
1734 | - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | |
1735 | - | |
1736 | - rdtp->dyntick_holdoff = jiffies - 1; | |
1737 | - setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu); | |
1738 | - rdtp->idle_gp_timer_expires = jiffies - 1; | |
1739 | - rdtp->idle_first_pass = 1; | |
1740 | -} | |
1741 | - | |
1742 | -/* | |
1743 | - * Clean up for exit from idle. Because we are exiting from idle, there | |
1744 | - * is no longer any point to ->idle_gp_timer, so cancel it. This will | |
1745 | - * do nothing if this timer is not active, so just cancel it unconditionally. | |
1746 | - */ | |
1747 | -static void rcu_cleanup_after_idle(int cpu) | |
1748 | -{ | |
1749 | - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | |
1750 | - | |
1751 | - del_timer(&rdtp->idle_gp_timer); | |
1752 | - trace_rcu_prep_idle("Cleanup after idle"); | |
1753 | - rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled); | |
1754 | -} | |
1755 | - | |
1756 | -/* | |
1757 | - * Check to see if any RCU-related work can be done by the current CPU, | |
1758 | - * and if so, schedule a softirq to get it done. This function is part | |
1759 | - * of the RCU implementation; it is -not- an exported member of the RCU API. | |
1760 | - * | |
1761 | - * The idea is for the current CPU to clear out all work required by the | |
1762 | - * RCU core for the current grace period, so that this CPU can be permitted | |
1763 | - * to enter dyntick-idle mode. In some cases, it will need to be awakened | |
1764 | - * at the end of the grace period by whatever CPU ends the grace period. | |
1765 | - * This allows CPUs to go dyntick-idle more quickly, and to reduce the | |
1766 | - * number of wakeups by a modest integer factor. | |
1767 | - * | |
1768 | - * Because it is not legal to invoke rcu_process_callbacks() with irqs | |
1769 | - * disabled, we do one pass of force_quiescent_state(), then do a | |
1770 | - * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked | |
1771 | - * later. The ->dyntick_drain field controls the sequencing. | |
1772 | - * | |
1773 | 1686 | * The caller must have disabled interrupts. |
1774 | 1687 | */ |
1775 | 1688 | static void rcu_prepare_for_idle(int cpu) |
1776 | 1689 | { |
1777 | - struct timer_list *tp; | |
1690 | + struct rcu_data *rdp; | |
1778 | 1691 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); |
1692 | + struct rcu_node *rnp; | |
1693 | + struct rcu_state *rsp; | |
1779 | 1694 | int tne; |
1780 | 1695 | |
1781 | 1696 | /* Handle nohz enablement switches conservatively. */ |
1782 | 1697 | tne = ACCESS_ONCE(tick_nohz_enabled); |
1783 | 1698 | if (tne != rdtp->tick_nohz_enabled_snap) { |
1784 | - if (rcu_cpu_has_callbacks(cpu)) | |
1699 | + if (rcu_cpu_has_callbacks(cpu, NULL)) | |
1785 | 1700 | invoke_rcu_core(); /* force nohz to see update. */ |
1786 | 1701 | rdtp->tick_nohz_enabled_snap = tne; |
1787 | 1702 | return; |
1788 | 1703 | |
1789 | 1704 | |
1790 | 1705 | |
1791 | 1706 | |
1792 | 1707 | |
1793 | 1708 | |
1794 | 1709 | |
1795 | 1710 | |
1796 | 1711 | |
1797 | 1712 | |
1798 | 1713 | |
1799 | 1714 | |
... | ... | @@ -1789,126 +1704,57 @@ |
1789 | 1704 | if (!tne) |
1790 | 1705 | return; |
1791 | 1706 | |
1792 | - /* Adaptive-tick mode, where usermode execution is idle to RCU. */ | |
1793 | - if (!is_idle_task(current)) { | |
1794 | - rdtp->dyntick_holdoff = jiffies - 1; | |
1795 | - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { | |
1796 | - trace_rcu_prep_idle("User dyntick with callbacks"); | |
1797 | - rdtp->idle_gp_timer_expires = | |
1798 | - round_up(jiffies + RCU_IDLE_GP_DELAY, | |
1799 | - RCU_IDLE_GP_DELAY); | |
1800 | - } else if (rcu_cpu_has_callbacks(cpu)) { | |
1801 | - rdtp->idle_gp_timer_expires = | |
1802 | - round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); | |
1803 | - trace_rcu_prep_idle("User dyntick with lazy callbacks"); | |
1804 | - } else { | |
1805 | - return; | |
1806 | - } | |
1807 | - tp = &rdtp->idle_gp_timer; | |
1808 | - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); | |
1707 | + /* If this is a no-CBs CPU, no callbacks, just return. */ | |
1708 | + if (is_nocb_cpu(cpu)) | |
1809 | 1709 | return; |
1810 | - } | |
1811 | 1710 | |
1812 | 1711 | /* |
1813 | - * If this is an idle re-entry, for example, due to use of | |
1814 | - * RCU_NONIDLE() or the new idle-loop tracing API within the idle | |
1815 | - * loop, then don't take any state-machine actions, unless the | |
1816 | - * momentary exit from idle queued additional non-lazy callbacks. | |
1817 | - * Instead, repost the ->idle_gp_timer if this CPU has callbacks | |
1818 | - * pending. | |
1712 | + * If a non-lazy callback arrived at a CPU having only lazy | |
1713 | + * callbacks, invoke RCU core for the side-effect of recalculating | |
1714 | + * idle duration on re-entry to idle. | |
1819 | 1715 | */ |
1820 | - if (!rdtp->idle_first_pass && | |
1821 | - (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { | |
1822 | - if (rcu_cpu_has_callbacks(cpu)) { | |
1823 | - tp = &rdtp->idle_gp_timer; | |
1824 | - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); | |
1825 | - } | |
1716 | + if (rdtp->all_lazy && | |
1717 | + rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) { | |
1718 | + invoke_rcu_core(); | |
1826 | 1719 | return; |
1827 | 1720 | } |
1828 | - rdtp->idle_first_pass = 0; | |
1829 | - rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1; | |
1830 | 1721 | |
1831 | 1722 | /* |
1832 | - * If there are no callbacks on this CPU, enter dyntick-idle mode. | |
1833 | - * Also reset state to avoid prejudicing later attempts. | |
1723 | + * If we have not yet accelerated this jiffy, accelerate all | |
1724 | + * callbacks on this CPU. | |
1834 | 1725 | */ |
1835 | - if (!rcu_cpu_has_callbacks(cpu)) { | |
1836 | - rdtp->dyntick_holdoff = jiffies - 1; | |
1837 | - rdtp->dyntick_drain = 0; | |
1838 | - trace_rcu_prep_idle("No callbacks"); | |
1726 | + if (rdtp->last_accelerate == jiffies) | |
1839 | 1727 | return; |
1728 | + rdtp->last_accelerate = jiffies; | |
1729 | + for_each_rcu_flavor(rsp) { | |
1730 | + rdp = per_cpu_ptr(rsp->rda, cpu); | |
1731 | + if (!*rdp->nxttail[RCU_DONE_TAIL]) | |
1732 | + continue; | |
1733 | + rnp = rdp->mynode; | |
1734 | + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | |
1735 | + rcu_accelerate_cbs(rsp, rnp, rdp); | |
1736 | + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | |
1840 | 1737 | } |
1738 | +} | |
1841 | 1739 | |
1842 | - /* | |
1843 | - * If in holdoff mode, just return. We will presumably have | |
1844 | - * refrained from disabling the scheduling-clock tick. | |
1845 | - */ | |
1846 | - if (rdtp->dyntick_holdoff == jiffies) { | |
1847 | - trace_rcu_prep_idle("In holdoff"); | |
1848 | - return; | |
1849 | - } | |
1740 | +/* | |
1741 | + * Clean up for exit from idle. Attempt to advance callbacks based on | |
1742 | + * any grace periods that elapsed while the CPU was idle, and if any | |
1743 | + * callbacks are now ready to invoke, initiate invocation. | |
1744 | + */ | |
1745 | +static void rcu_cleanup_after_idle(int cpu) | |
1746 | +{ | |
1747 | + struct rcu_data *rdp; | |
1748 | + struct rcu_state *rsp; | |
1850 | 1749 | |
1851 | - /* Check and update the ->dyntick_drain sequencing. */ | |
1852 | - if (rdtp->dyntick_drain <= 0) { | |
1853 | - /* First time through, initialize the counter. */ | |
1854 | - rdtp->dyntick_drain = RCU_IDLE_FLUSHES; | |
1855 | - } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES && | |
1856 | - !rcu_pending(cpu) && | |
1857 | - !local_softirq_pending()) { | |
1858 | - /* Can we go dyntick-idle despite still having callbacks? */ | |
1859 | - rdtp->dyntick_drain = 0; | |
1860 | - rdtp->dyntick_holdoff = jiffies; | |
1861 | - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { | |
1862 | - trace_rcu_prep_idle("Dyntick with callbacks"); | |
1863 | - rdtp->idle_gp_timer_expires = | |
1864 | - round_up(jiffies + RCU_IDLE_GP_DELAY, | |
1865 | - RCU_IDLE_GP_DELAY); | |
1866 | - } else { | |
1867 | - rdtp->idle_gp_timer_expires = | |
1868 | - round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); | |
1869 | - trace_rcu_prep_idle("Dyntick with lazy callbacks"); | |
1870 | - } | |
1871 | - tp = &rdtp->idle_gp_timer; | |
1872 | - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); | |
1873 | - rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; | |
1874 | - return; /* Nothing more to do immediately. */ | |
1875 | - } else if (--(rdtp->dyntick_drain) <= 0) { | |
1876 | - /* We have hit the limit, so time to give up. */ | |
1877 | - rdtp->dyntick_holdoff = jiffies; | |
1878 | - trace_rcu_prep_idle("Begin holdoff"); | |
1879 | - invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ | |
1750 | + if (is_nocb_cpu(cpu)) | |
1880 | 1751 | return; |
1752 | + rcu_try_advance_all_cbs(); | |
1753 | + for_each_rcu_flavor(rsp) { | |
1754 | + rdp = per_cpu_ptr(rsp->rda, cpu); | |
1755 | + if (cpu_has_callbacks_ready_to_invoke(rdp)) | |
1756 | + invoke_rcu_core(); | |
1881 | 1757 | } |
1882 | - | |
1883 | - /* | |
1884 | - * Do one step of pushing the remaining RCU callbacks through | |
1885 | - * the RCU core state machine. | |
1886 | - */ | |
1887 | -#ifdef CONFIG_TREE_PREEMPT_RCU | |
1888 | - if (per_cpu(rcu_preempt_data, cpu).nxtlist) { | |
1889 | - rcu_preempt_qs(cpu); | |
1890 | - force_quiescent_state(&rcu_preempt_state); | |
1891 | - } | |
1892 | -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | |
1893 | - if (per_cpu(rcu_sched_data, cpu).nxtlist) { | |
1894 | - rcu_sched_qs(cpu); | |
1895 | - force_quiescent_state(&rcu_sched_state); | |
1896 | - } | |
1897 | - if (per_cpu(rcu_bh_data, cpu).nxtlist) { | |
1898 | - rcu_bh_qs(cpu); | |
1899 | - force_quiescent_state(&rcu_bh_state); | |
1900 | - } | |
1901 | - | |
1902 | - /* | |
1903 | - * If RCU callbacks are still pending, RCU still needs this CPU. | |
1904 | - * So try forcing the callbacks through the grace period. | |
1905 | - */ | |
1906 | - if (rcu_cpu_has_callbacks(cpu)) { | |
1907 | - trace_rcu_prep_idle("More callbacks"); | |
1908 | - invoke_rcu_core(); | |
1909 | - } else { | |
1910 | - trace_rcu_prep_idle("Callbacks drained"); | |
1911 | - } | |
1912 | 1758 | } |
1913 | 1759 | |
1914 | 1760 | /* |
1915 | 1761 | |
... | ... | @@ -2015,16 +1861,13 @@ |
2015 | 1861 | static void print_cpu_stall_fast_no_hz(char *cp, int cpu) |
2016 | 1862 | { |
2017 | 1863 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); |
2018 | - struct timer_list *tltp = &rdtp->idle_gp_timer; | |
2019 | - char c; | |
1864 | + unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap; | |
2020 | 1865 | |
2021 | - c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; | |
2022 | - if (timer_pending(tltp)) | |
2023 | - sprintf(cp, "drain=%d %c timer=%lu", | |
2024 | - rdtp->dyntick_drain, c, tltp->expires - jiffies); | |
2025 | - else | |
2026 | - sprintf(cp, "drain=%d %c timer not pending", | |
2027 | - rdtp->dyntick_drain, c); | |
1866 | + sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c", | |
1867 | + rdtp->last_accelerate & 0xffff, jiffies & 0xffff, | |
1868 | + ulong2long(nlpd), | |
1869 | + rdtp->all_lazy ? 'L' : '.', | |
1870 | + rdtp->tick_nohz_enabled_snap ? '.' : 'D'); | |
2028 | 1871 | } |
2029 | 1872 | |
2030 | 1873 | #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ |
2031 | 1874 | |
... | ... | @@ -2070,10 +1913,11 @@ |
2070 | 1913 | ticks_value = rsp->gpnum - rdp->gpnum; |
2071 | 1914 | } |
2072 | 1915 | print_cpu_stall_fast_no_hz(fast_no_hz, cpu); |
2073 | - printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d %s\n", | |
1916 | + printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n", | |
2074 | 1917 | cpu, ticks_value, ticks_title, |
2075 | 1918 | atomic_read(&rdtp->dynticks) & 0xfff, |
2076 | 1919 | rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, |
1920 | + rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), | |
2077 | 1921 | fast_no_hz); |
2078 | 1922 | } |
2079 | 1923 | |
... | ... | @@ -2087,6 +1931,7 @@ |
2087 | 1931 | static void zero_cpu_stall_ticks(struct rcu_data *rdp) |
2088 | 1932 | { |
2089 | 1933 | rdp->ticks_this_gp = 0; |
1934 | + rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id()); | |
2090 | 1935 | } |
2091 | 1936 | |
2092 | 1937 | /* Increment ->ticks_this_gp for all flavors of RCU. */ |
... | ... | @@ -2165,6 +2010,47 @@ |
2165 | 2010 | } |
2166 | 2011 | early_param("rcu_nocb_poll", parse_rcu_nocb_poll); |
2167 | 2012 | |
2013 | +/* | |
2014 | + * Do any no-CBs CPUs need another grace period? | |
2015 | + * | |
2016 | + * Interrupts must be disabled. If the caller does not hold the root | |
2017 | + * rnp_node structure's ->lock, the results are advisory only. | |
2018 | + */ | |
2019 | +static int rcu_nocb_needs_gp(struct rcu_state *rsp) | |
2020 | +{ | |
2021 | + struct rcu_node *rnp = rcu_get_root(rsp); | |
2022 | + | |
2023 | + return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1]; | |
2024 | +} | |
2025 | + | |
2026 | +/* | |
2027 | + * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended | |
2028 | + * grace period. | |
2029 | + */ | |
2030 | +static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) | |
2031 | +{ | |
2032 | + wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]); | |
2033 | +} | |
2034 | + | |
2035 | +/* | |
2036 | + * Set the root rcu_node structure's ->need_future_gp field | |
2037 | + * based on the sum of those of all rcu_node structures. This does | |
2038 | + * double-count the root rcu_node structure's requests, but this | |
2039 | + * is necessary to handle the possibility of a rcu_nocb_kthread() | |
2040 | + * having awakened during the time that the rcu_node structures | |
2041 | + * were being updated for the end of the previous grace period. | |
2042 | + */ | |
2043 | +static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) | |
2044 | +{ | |
2045 | + rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq; | |
2046 | +} | |
2047 | + | |
2048 | +static void rcu_init_one_nocb(struct rcu_node *rnp) | |
2049 | +{ | |
2050 | + init_waitqueue_head(&rnp->nocb_gp_wq[0]); | |
2051 | + init_waitqueue_head(&rnp->nocb_gp_wq[1]); | |
2052 | +} | |
2053 | + | |
2168 | 2054 | /* Is the specified CPU a no-CPUs CPU? */ |
2169 | 2055 | static bool is_nocb_cpu(int cpu) |
2170 | 2056 | { |
... | ... | @@ -2227,6 +2113,13 @@ |
2227 | 2113 | if (!is_nocb_cpu(rdp->cpu)) |
2228 | 2114 | return 0; |
2229 | 2115 | __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy); |
2116 | + if (__is_kfree_rcu_offset((unsigned long)rhp->func)) | |
2117 | + trace_rcu_kfree_callback(rdp->rsp->name, rhp, | |
2118 | + (unsigned long)rhp->func, | |
2119 | + rdp->qlen_lazy, rdp->qlen); | |
2120 | + else | |
2121 | + trace_rcu_callback(rdp->rsp->name, rhp, | |
2122 | + rdp->qlen_lazy, rdp->qlen); | |
2230 | 2123 | return 1; |
2231 | 2124 | } |
2232 | 2125 | |
2233 | 2126 | |
2234 | 2127 | |
2235 | 2128 | |
2236 | 2129 | |
2237 | 2130 | |
2238 | 2131 | |
... | ... | @@ -2265,98 +2158,39 @@ |
2265 | 2158 | } |
2266 | 2159 | |
2267 | 2160 | /* |
2268 | - * There must be at least one non-no-CBs CPU in operation at any given | |
2269 | - * time, because no-CBs CPUs are not capable of initiating grace periods | |
2270 | - * independently. This function therefore complains if the specified | |
2271 | - * CPU is the last non-no-CBs CPU, allowing the CPU-hotplug system to | |
2272 | - * avoid offlining the last such CPU. (Recursion is a wonderful thing, | |
2273 | - * but you have to have a base case!) | |
2161 | + * If necessary, kick off a new grace period, and either way wait | |
2162 | + * for a subsequent grace period to complete. | |
2274 | 2163 | */ |
2275 | -static bool nocb_cpu_expendable(int cpu) | |
2164 | +static void rcu_nocb_wait_gp(struct rcu_data *rdp) | |
2276 | 2165 | { |
2277 | - cpumask_var_t non_nocb_cpus; | |
2278 | - int ret; | |
2166 | + unsigned long c; | |
2167 | + bool d; | |
2168 | + unsigned long flags; | |
2169 | + struct rcu_node *rnp = rdp->mynode; | |
2279 | 2170 | |
2171 | + raw_spin_lock_irqsave(&rnp->lock, flags); | |
2172 | + c = rcu_start_future_gp(rnp, rdp); | |
2173 | + raw_spin_unlock_irqrestore(&rnp->lock, flags); | |
2174 | + | |
2280 | 2175 | /* |
2281 | - * If there are no no-CB CPUs or if this CPU is not a no-CB CPU, | |
2282 | - * then offlining this CPU is harmless. Let it happen. | |
2176 | + * Wait for the grace period. Do so interruptibly to avoid messing | |
2177 | + * up the load average. | |
2283 | 2178 | */ |
2284 | - if (!have_rcu_nocb_mask || is_nocb_cpu(cpu)) | |
2285 | - return 1; | |
2286 | - | |
2287 | - /* If no memory, play it safe and keep the CPU around. */ | |
2288 | - if (!alloc_cpumask_var(&non_nocb_cpus, GFP_NOIO)) | |
2289 | - return 0; | |
2290 | - cpumask_andnot(non_nocb_cpus, cpu_online_mask, rcu_nocb_mask); | |
2291 | - cpumask_clear_cpu(cpu, non_nocb_cpus); | |
2292 | - ret = !cpumask_empty(non_nocb_cpus); | |
2293 | - free_cpumask_var(non_nocb_cpus); | |
2294 | - return ret; | |
2179 | + trace_rcu_future_gp(rnp, rdp, c, "StartWait"); | |
2180 | + for (;;) { | |
2181 | + wait_event_interruptible( | |
2182 | + rnp->nocb_gp_wq[c & 0x1], | |
2183 | + (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c))); | |
2184 | + if (likely(d)) | |
2185 | + break; | |
2186 | + flush_signals(current); | |
2187 | + trace_rcu_future_gp(rnp, rdp, c, "ResumeWait"); | |
2188 | + } | |
2189 | + trace_rcu_future_gp(rnp, rdp, c, "EndWait"); | |
2190 | + smp_mb(); /* Ensure that CB invocation happens after GP end. */ | |
2295 | 2191 | } |
2296 | 2192 | |
2297 | 2193 | /* |
2298 | - * Helper structure for remote registry of RCU callbacks. | |
2299 | - * This is needed for when a no-CBs CPU needs to start a grace period. | |
2300 | - * If it just invokes call_rcu(), the resulting callback will be queued, | |
2301 | - * which can result in deadlock. | |
2302 | - */ | |
2303 | -struct rcu_head_remote { | |
2304 | - struct rcu_head *rhp; | |
2305 | - call_rcu_func_t *crf; | |
2306 | - void (*func)(struct rcu_head *rhp); | |
2307 | -}; | |
2308 | - | |
2309 | -/* | |
2310 | - * Register a callback as specified by the rcu_head_remote struct. | |
2311 | - * This function is intended to be invoked via smp_call_function_single(). | |
2312 | - */ | |
2313 | -static void call_rcu_local(void *arg) | |
2314 | -{ | |
2315 | - struct rcu_head_remote *rhrp = | |
2316 | - container_of(arg, struct rcu_head_remote, rhp); | |
2317 | - | |
2318 | - rhrp->crf(rhrp->rhp, rhrp->func); | |
2319 | -} | |
2320 | - | |
2321 | -/* | |
2322 | - * Set up an rcu_head_remote structure and the invoke call_rcu_local() | |
2323 | - * on CPU 0 (which is guaranteed to be a non-no-CBs CPU) via | |
2324 | - * smp_call_function_single(). | |
2325 | - */ | |
2326 | -static void invoke_crf_remote(struct rcu_head *rhp, | |
2327 | - void (*func)(struct rcu_head *rhp), | |
2328 | - call_rcu_func_t crf) | |
2329 | -{ | |
2330 | - struct rcu_head_remote rhr; | |
2331 | - | |
2332 | - rhr.rhp = rhp; | |
2333 | - rhr.crf = crf; | |
2334 | - rhr.func = func; | |
2335 | - smp_call_function_single(0, call_rcu_local, &rhr, 1); | |
2336 | -} | |
2337 | - | |
2338 | -/* | |
2339 | - * Helper functions to be passed to wait_rcu_gp(), each of which | |
2340 | - * invokes invoke_crf_remote() to register a callback appropriately. | |
2341 | - */ | |
2342 | -static void __maybe_unused | |
2343 | -call_rcu_preempt_remote(struct rcu_head *rhp, | |
2344 | - void (*func)(struct rcu_head *rhp)) | |
2345 | -{ | |
2346 | - invoke_crf_remote(rhp, func, call_rcu); | |
2347 | -} | |
2348 | -static void call_rcu_bh_remote(struct rcu_head *rhp, | |
2349 | - void (*func)(struct rcu_head *rhp)) | |
2350 | -{ | |
2351 | - invoke_crf_remote(rhp, func, call_rcu_bh); | |
2352 | -} | |
2353 | -static void call_rcu_sched_remote(struct rcu_head *rhp, | |
2354 | - void (*func)(struct rcu_head *rhp)) | |
2355 | -{ | |
2356 | - invoke_crf_remote(rhp, func, call_rcu_sched); | |
2357 | -} | |
2358 | - | |
2359 | -/* | |
2360 | 2194 | * Per-rcu_data kthread, but only for no-CBs CPUs. Each kthread invokes |
2361 | 2195 | * callbacks queued by the corresponding no-CBs CPU. |
2362 | 2196 | */ |
... | ... | @@ -2390,7 +2224,7 @@ |
2390 | 2224 | cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0); |
2391 | 2225 | ACCESS_ONCE(rdp->nocb_p_count) += c; |
2392 | 2226 | ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl; |
2393 | - wait_rcu_gp(rdp->rsp->call_remote); | |
2227 | + rcu_nocb_wait_gp(rdp); | |
2394 | 2228 | |
2395 | 2229 | /* Each pass through the following loop invokes a callback. */ |
2396 | 2230 | trace_rcu_batch_start(rdp->rsp->name, cl, c, -1); |
2397 | 2231 | |
2398 | 2232 | |
2399 | 2233 | |
2400 | 2234 | |
2401 | 2235 | |
2402 | 2236 | |
2403 | 2237 | |
... | ... | @@ -2436,33 +2270,42 @@ |
2436 | 2270 | return; |
2437 | 2271 | for_each_cpu(cpu, rcu_nocb_mask) { |
2438 | 2272 | rdp = per_cpu_ptr(rsp->rda, cpu); |
2439 | - t = kthread_run(rcu_nocb_kthread, rdp, "rcuo%d", cpu); | |
2273 | + t = kthread_run(rcu_nocb_kthread, rdp, | |
2274 | + "rcuo%c/%d", rsp->abbr, cpu); | |
2440 | 2275 | BUG_ON(IS_ERR(t)); |
2441 | 2276 | ACCESS_ONCE(rdp->nocb_kthread) = t; |
2442 | 2277 | } |
2443 | 2278 | } |
2444 | 2279 | |
2445 | 2280 | /* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */ |
2446 | -static void init_nocb_callback_list(struct rcu_data *rdp) | |
2281 | +static bool init_nocb_callback_list(struct rcu_data *rdp) | |
2447 | 2282 | { |
2448 | 2283 | if (rcu_nocb_mask == NULL || |
2449 | 2284 | !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask)) |
2450 | - return; | |
2285 | + return false; | |
2451 | 2286 | rdp->nxttail[RCU_NEXT_TAIL] = NULL; |
2287 | + return true; | |
2452 | 2288 | } |
2453 | 2289 | |
2454 | -/* Initialize the ->call_remote fields in the rcu_state structures. */ | |
2455 | -static void __init rcu_init_nocb(void) | |
2290 | +#else /* #ifdef CONFIG_RCU_NOCB_CPU */ | |
2291 | + | |
2292 | +static int rcu_nocb_needs_gp(struct rcu_state *rsp) | |
2456 | 2293 | { |
2457 | -#ifdef CONFIG_PREEMPT_RCU | |
2458 | - rcu_preempt_state.call_remote = call_rcu_preempt_remote; | |
2459 | -#endif /* #ifdef CONFIG_PREEMPT_RCU */ | |
2460 | - rcu_bh_state.call_remote = call_rcu_bh_remote; | |
2461 | - rcu_sched_state.call_remote = call_rcu_sched_remote; | |
2294 | + return 0; | |
2462 | 2295 | } |
2463 | 2296 | |
2464 | -#else /* #ifdef CONFIG_RCU_NOCB_CPU */ | |
2297 | +static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) | |
2298 | +{ | |
2299 | +} | |
2465 | 2300 | |
2301 | +static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) | |
2302 | +{ | |
2303 | +} | |
2304 | + | |
2305 | +static void rcu_init_one_nocb(struct rcu_node *rnp) | |
2306 | +{ | |
2307 | +} | |
2308 | + | |
2466 | 2309 | static bool is_nocb_cpu(int cpu) |
2467 | 2310 | { |
2468 | 2311 | return false; |
... | ... | @@ -2480,11 +2323,6 @@ |
2480 | 2323 | return 0; |
2481 | 2324 | } |
2482 | 2325 | |
2483 | -static bool nocb_cpu_expendable(int cpu) | |
2484 | -{ | |
2485 | - return 1; | |
2486 | -} | |
2487 | - | |
2488 | 2326 | static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) |
2489 | 2327 | { |
2490 | 2328 | } |
2491 | 2329 | |
... | ... | @@ -2493,12 +2331,9 @@ |
2493 | 2331 | { |
2494 | 2332 | } |
2495 | 2333 | |
2496 | -static void init_nocb_callback_list(struct rcu_data *rdp) | |
2334 | +static bool init_nocb_callback_list(struct rcu_data *rdp) | |
2497 | 2335 | { |
2498 | -} | |
2499 | - | |
2500 | -static void __init rcu_init_nocb(void) | |
2501 | -{ | |
2336 | + return false; | |
2502 | 2337 | } |
2503 | 2338 | |
2504 | 2339 | #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ |
kernel/rcutree_trace.c