Commit d9a3da0699b24a589b27a61e1a5b5bd30d9db669
Committed by
Ingo Molnar
1 parent
cf244dc01b
rcu: Add expedited grace-period support for preemptible RCU
Implement an synchronize_rcu_expedited() for preemptible RCU that actually is expedited. This uses synchronize_sched_expedited() to force all threads currently running in a preemptible-RCU read-side critical section onto the appropriate ->blocked_tasks[] list, then takes a snapshot of all of these lists and waits for them to drain. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1259784616158-git-send-email-> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 5 changed files with 260 additions and 27 deletions Side-by-side Diff
kernel/rcutorture.c
... | ... | @@ -327,6 +327,11 @@ |
327 | 327 | cur_ops->deferred_free(rp); |
328 | 328 | } |
329 | 329 | |
330 | +static int rcu_no_completed(void) | |
331 | +{ | |
332 | + return 0; | |
333 | +} | |
334 | + | |
330 | 335 | static void rcu_torture_deferred_free(struct rcu_torture *p) |
331 | 336 | { |
332 | 337 | call_rcu(&p->rtort_rcu, rcu_torture_cb); |
... | ... | @@ -388,6 +393,21 @@ |
388 | 393 | .name = "rcu_sync" |
389 | 394 | }; |
390 | 395 | |
396 | +static struct rcu_torture_ops rcu_expedited_ops = { | |
397 | + .init = rcu_sync_torture_init, | |
398 | + .cleanup = NULL, | |
399 | + .readlock = rcu_torture_read_lock, | |
400 | + .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | |
401 | + .readunlock = rcu_torture_read_unlock, | |
402 | + .completed = rcu_no_completed, | |
403 | + .deferred_free = rcu_sync_torture_deferred_free, | |
404 | + .sync = synchronize_rcu_expedited, | |
405 | + .cb_barrier = NULL, | |
406 | + .stats = NULL, | |
407 | + .irq_capable = 1, | |
408 | + .name = "rcu_expedited" | |
409 | +}; | |
410 | + | |
391 | 411 | /* |
392 | 412 | * Definitions for rcu_bh torture testing. |
393 | 413 | */ |
... | ... | @@ -581,11 +601,6 @@ |
581 | 601 | preempt_enable(); |
582 | 602 | } |
583 | 603 | |
584 | -static int sched_torture_completed(void) | |
585 | -{ | |
586 | - return 0; | |
587 | -} | |
588 | - | |
589 | 604 | static void rcu_sched_torture_deferred_free(struct rcu_torture *p) |
590 | 605 | { |
591 | 606 | call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); |
... | ... | @@ -602,7 +617,7 @@ |
602 | 617 | .readlock = sched_torture_read_lock, |
603 | 618 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ |
604 | 619 | .readunlock = sched_torture_read_unlock, |
605 | - .completed = sched_torture_completed, | |
620 | + .completed = rcu_no_completed, | |
606 | 621 | .deferred_free = rcu_sched_torture_deferred_free, |
607 | 622 | .sync = sched_torture_synchronize, |
608 | 623 | .cb_barrier = rcu_barrier_sched, |
... | ... | @@ -617,7 +632,7 @@ |
617 | 632 | .readlock = sched_torture_read_lock, |
618 | 633 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ |
619 | 634 | .readunlock = sched_torture_read_unlock, |
620 | - .completed = sched_torture_completed, | |
635 | + .completed = rcu_no_completed, | |
621 | 636 | .deferred_free = rcu_sync_torture_deferred_free, |
622 | 637 | .sync = sched_torture_synchronize, |
623 | 638 | .cb_barrier = NULL, |
... | ... | @@ -631,7 +646,7 @@ |
631 | 646 | .readlock = sched_torture_read_lock, |
632 | 647 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ |
633 | 648 | .readunlock = sched_torture_read_unlock, |
634 | - .completed = sched_torture_completed, | |
649 | + .completed = rcu_no_completed, | |
635 | 650 | .deferred_free = rcu_sync_torture_deferred_free, |
636 | 651 | .sync = synchronize_sched_expedited, |
637 | 652 | .cb_barrier = NULL, |
... | ... | @@ -1116,7 +1131,8 @@ |
1116 | 1131 | int cpu; |
1117 | 1132 | int firsterr = 0; |
1118 | 1133 | static struct rcu_torture_ops *torture_ops[] = |
1119 | - { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, | |
1134 | + { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, | |
1135 | + &rcu_bh_ops, &rcu_bh_sync_ops, | |
1120 | 1136 | &srcu_ops, &srcu_expedited_ops, |
1121 | 1137 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; |
1122 | 1138 |
kernel/rcutree.c
... | ... | @@ -948,7 +948,7 @@ |
948 | 948 | { |
949 | 949 | unsigned long flags; |
950 | 950 | unsigned long mask; |
951 | - int need_quiet = 0; | |
951 | + int need_report = 0; | |
952 | 952 | struct rcu_data *rdp = rsp->rda[cpu]; |
953 | 953 | struct rcu_node *rnp; |
954 | 954 | |
... | ... | @@ -967,7 +967,7 @@ |
967 | 967 | break; |
968 | 968 | } |
969 | 969 | if (rnp == rdp->mynode) |
970 | - need_quiet = rcu_preempt_offline_tasks(rsp, rnp, rdp); | |
970 | + need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); | |
971 | 971 | else |
972 | 972 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
973 | 973 | mask = rnp->grpmask; |
974 | 974 | |
... | ... | @@ -982,10 +982,12 @@ |
982 | 982 | */ |
983 | 983 | spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ |
984 | 984 | rnp = rdp->mynode; |
985 | - if (need_quiet) | |
985 | + if (need_report & RCU_OFL_TASKS_NORM_GP) | |
986 | 986 | rcu_report_unblock_qs_rnp(rnp, flags); |
987 | 987 | else |
988 | 988 | spin_unlock_irqrestore(&rnp->lock, flags); |
989 | + if (need_report & RCU_OFL_TASKS_EXP_GP) | |
990 | + rcu_report_exp_rnp(rsp, rnp); | |
989 | 991 | |
990 | 992 | rcu_adopt_orphan_cbs(rsp); |
991 | 993 | } |
... | ... | @@ -1843,6 +1845,8 @@ |
1843 | 1845 | rnp->level = i; |
1844 | 1846 | INIT_LIST_HEAD(&rnp->blocked_tasks[0]); |
1845 | 1847 | INIT_LIST_HEAD(&rnp->blocked_tasks[1]); |
1848 | + INIT_LIST_HEAD(&rnp->blocked_tasks[2]); | |
1849 | + INIT_LIST_HEAD(&rnp->blocked_tasks[3]); | |
1846 | 1850 | } |
1847 | 1851 | } |
1848 | 1852 | } |
kernel/rcutree.h
... | ... | @@ -104,8 +104,12 @@ |
104 | 104 | /* an rcu_data structure, otherwise, each */ |
105 | 105 | /* bit corresponds to a child rcu_node */ |
106 | 106 | /* structure. */ |
107 | + unsigned long expmask; /* Groups that have ->blocked_tasks[] */ | |
108 | + /* elements that need to drain to allow the */ | |
109 | + /* current expedited grace period to */ | |
110 | + /* complete (only for TREE_PREEMPT_RCU). */ | |
107 | 111 | unsigned long qsmaskinit; |
108 | - /* Per-GP initialization for qsmask. */ | |
112 | + /* Per-GP initial value for qsmask & expmask. */ | |
109 | 113 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ |
110 | 114 | /* Only one bit will be set in this mask. */ |
111 | 115 | int grplo; /* lowest-numbered CPU or group here. */ |
... | ... | @@ -113,7 +117,7 @@ |
113 | 117 | u8 grpnum; /* CPU/group number for next level up. */ |
114 | 118 | u8 level; /* root is at level 0. */ |
115 | 119 | struct rcu_node *parent; |
116 | - struct list_head blocked_tasks[2]; | |
120 | + struct list_head blocked_tasks[4]; | |
117 | 121 | /* Tasks blocked in RCU read-side critsect. */ |
118 | 122 | /* Grace period number (->gpnum) x blocked */ |
119 | 123 | /* by tasks on the (x & 0x1) element of the */ |
... | ... | @@ -128,6 +132,21 @@ |
128 | 132 | for ((rnp) = &(rsp)->node[0]; \ |
129 | 133 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) |
130 | 134 | |
135 | +/* | |
136 | + * Do a breadth-first scan of the non-leaf rcu_node structures for the | |
137 | + * specified rcu_state structure. Note that if there is a singleton | |
138 | + * rcu_node tree with but one rcu_node structure, this loop is a no-op. | |
139 | + */ | |
140 | +#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ | |
141 | + for ((rnp) = &(rsp)->node[0]; \ | |
142 | + (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) | |
143 | + | |
144 | +/* | |
145 | + * Scan the leaves of the rcu_node hierarchy for the specified rcu_state | |
146 | + * structure. Note that if there is a singleton rcu_node tree with but | |
147 | + * one rcu_node structure, this loop -will- visit the rcu_node structure. | |
148 | + * It is still a leaf node, even if it is also the root node. | |
149 | + */ | |
131 | 150 | #define rcu_for_each_leaf_node(rsp, rnp) \ |
132 | 151 | for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ |
133 | 152 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) |
... | ... | @@ -261,7 +280,7 @@ |
261 | 280 | long gpnum; /* Current gp number. */ |
262 | 281 | long completed; /* # of last completed gp. */ |
263 | 282 | |
264 | - /* End of fields guarded by root rcu_node's lock. */ | |
283 | + /* End of fields guarded by root rcu_node's lock. */ | |
265 | 284 | |
266 | 285 | spinlock_t onofflock; /* exclude on/offline and */ |
267 | 286 | /* starting new GP. Also */ |
... | ... | @@ -293,6 +312,13 @@ |
293 | 312 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
294 | 313 | }; |
295 | 314 | |
315 | +/* Return values for rcu_preempt_offline_tasks(). */ | |
316 | + | |
317 | +#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ | |
318 | + /* GP were moved to root. */ | |
319 | +#define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */ | |
320 | + /* GP were moved to root. */ | |
321 | + | |
296 | 322 | #ifdef RCU_TREE_NONCORE |
297 | 323 | |
298 | 324 | /* |
... | ... | @@ -333,6 +359,9 @@ |
333 | 359 | static void rcu_preempt_check_callbacks(int cpu); |
334 | 360 | static void rcu_preempt_process_callbacks(void); |
335 | 361 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); |
362 | +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) | |
363 | +static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); | |
364 | +#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ | |
336 | 365 | static int rcu_preempt_pending(int cpu); |
337 | 366 | static int rcu_preempt_needs_cpu(int cpu); |
338 | 367 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); |
kernel/rcutree_plugin.h
... | ... | @@ -24,12 +24,15 @@ |
24 | 24 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
25 | 25 | */ |
26 | 26 | |
27 | +#include <linux/delay.h> | |
27 | 28 | |
28 | 29 | #ifdef CONFIG_TREE_PREEMPT_RCU |
29 | 30 | |
30 | 31 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); |
31 | 32 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); |
32 | 33 | |
34 | +static int rcu_preempted_readers_exp(struct rcu_node *rnp); | |
35 | + | |
33 | 36 | /* |
34 | 37 | * Tell them what RCU they are running. |
35 | 38 | */ |
... | ... | @@ -157,7 +160,10 @@ |
157 | 160 | */ |
158 | 161 | static int rcu_preempted_readers(struct rcu_node *rnp) |
159 | 162 | { |
160 | - return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); | |
163 | + int phase = rnp->gpnum & 0x1; | |
164 | + | |
165 | + return !list_empty(&rnp->blocked_tasks[phase]) || | |
166 | + !list_empty(&rnp->blocked_tasks[phase + 2]); | |
161 | 167 | } |
162 | 168 | |
163 | 169 | /* |
... | ... | @@ -204,6 +210,7 @@ |
204 | 210 | static void rcu_read_unlock_special(struct task_struct *t) |
205 | 211 | { |
206 | 212 | int empty; |
213 | + int empty_exp; | |
207 | 214 | unsigned long flags; |
208 | 215 | struct rcu_node *rnp; |
209 | 216 | int special; |
... | ... | @@ -247,6 +254,8 @@ |
247 | 254 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
248 | 255 | } |
249 | 256 | empty = !rcu_preempted_readers(rnp); |
257 | + empty_exp = !rcu_preempted_readers_exp(rnp); | |
258 | + smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | |
250 | 259 | list_del_init(&t->rcu_node_entry); |
251 | 260 | t->rcu_blocked_node = NULL; |
252 | 261 | |
... | ... | @@ -259,6 +268,13 @@ |
259 | 268 | spin_unlock_irqrestore(&rnp->lock, flags); |
260 | 269 | else |
261 | 270 | rcu_report_unblock_qs_rnp(rnp, flags); |
271 | + | |
272 | + /* | |
273 | + * If this was the last task on the expedited lists, | |
274 | + * then we need to report up the rcu_node hierarchy. | |
275 | + */ | |
276 | + if (!empty_exp && !rcu_preempted_readers_exp(rnp)) | |
277 | + rcu_report_exp_rnp(&rcu_preempt_state, rnp); | |
262 | 278 | } else { |
263 | 279 | local_irq_restore(flags); |
264 | 280 | } |
... | ... | @@ -343,7 +359,7 @@ |
343 | 359 | int i; |
344 | 360 | struct list_head *lp; |
345 | 361 | struct list_head *lp_root; |
346 | - int retval; | |
362 | + int retval = 0; | |
347 | 363 | struct rcu_node *rnp_root = rcu_get_root(rsp); |
348 | 364 | struct task_struct *tp; |
349 | 365 | |
... | ... | @@ -353,7 +369,9 @@ |
353 | 369 | } |
354 | 370 | WARN_ON_ONCE(rnp != rdp->mynode && |
355 | 371 | (!list_empty(&rnp->blocked_tasks[0]) || |
356 | - !list_empty(&rnp->blocked_tasks[1]))); | |
372 | + !list_empty(&rnp->blocked_tasks[1]) || | |
373 | + !list_empty(&rnp->blocked_tasks[2]) || | |
374 | + !list_empty(&rnp->blocked_tasks[3]))); | |
357 | 375 | |
358 | 376 | /* |
359 | 377 | * Move tasks up to root rcu_node. Rely on the fact that the |
... | ... | @@ -361,8 +379,11 @@ |
361 | 379 | * rcu_nodes in terms of gp_num value. This fact allows us to |
362 | 380 | * move the blocked_tasks[] array directly, element by element. |
363 | 381 | */ |
364 | - retval = rcu_preempted_readers(rnp); | |
365 | - for (i = 0; i < 2; i++) { | |
382 | + if (rcu_preempted_readers(rnp)) | |
383 | + retval |= RCU_OFL_TASKS_NORM_GP; | |
384 | + if (rcu_preempted_readers_exp(rnp)) | |
385 | + retval |= RCU_OFL_TASKS_EXP_GP; | |
386 | + for (i = 0; i < 4; i++) { | |
366 | 387 | lp = &rnp->blocked_tasks[i]; |
367 | 388 | lp_root = &rnp_root->blocked_tasks[i]; |
368 | 389 | while (!list_empty(lp)) { |
369 | 390 | |
370 | 391 | |
371 | 392 | |
... | ... | @@ -449,14 +470,159 @@ |
449 | 470 | } |
450 | 471 | EXPORT_SYMBOL_GPL(synchronize_rcu); |
451 | 472 | |
473 | +static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); | |
474 | +static long sync_rcu_preempt_exp_count; | |
475 | +static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); | |
476 | + | |
452 | 477 | /* |
453 | - * Wait for an rcu-preempt grace period. We are supposed to expedite the | |
454 | - * grace period, but this is the crude slow compatability hack, so just | |
455 | - * invoke synchronize_rcu(). | |
478 | + * Return non-zero if there are any tasks in RCU read-side critical | |
479 | + * sections blocking the current preemptible-RCU expedited grace period. | |
480 | + * If there is no preemptible-RCU expedited grace period currently in | |
481 | + * progress, returns zero unconditionally. | |
456 | 482 | */ |
483 | +static int rcu_preempted_readers_exp(struct rcu_node *rnp) | |
484 | +{ | |
485 | + return !list_empty(&rnp->blocked_tasks[2]) || | |
486 | + !list_empty(&rnp->blocked_tasks[3]); | |
487 | +} | |
488 | + | |
489 | +/* | |
490 | + * return non-zero if there is no RCU expedited grace period in progress | |
491 | + * for the specified rcu_node structure, in other words, if all CPUs and | |
492 | + * tasks covered by the specified rcu_node structure have done their bit | |
493 | + * for the current expedited grace period. Works only for preemptible | |
494 | + * RCU -- other RCU implementation use other means. | |
495 | + * | |
496 | + * Caller must hold sync_rcu_preempt_exp_mutex. | |
497 | + */ | |
498 | +static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) | |
499 | +{ | |
500 | + return !rcu_preempted_readers_exp(rnp) && | |
501 | + ACCESS_ONCE(rnp->expmask) == 0; | |
502 | +} | |
503 | + | |
504 | +/* | |
505 | + * Report the exit from RCU read-side critical section for the last task | |
506 | + * that queued itself during or before the current expedited preemptible-RCU | |
507 | + * grace period. This event is reported either to the rcu_node structure on | |
508 | + * which the task was queued or to one of that rcu_node structure's ancestors, | |
509 | + * recursively up the tree. (Calm down, calm down, we do the recursion | |
510 | + * iteratively!) | |
511 | + * | |
512 | + * Caller must hold sync_rcu_preempt_exp_mutex. | |
513 | + */ | |
514 | +static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | |
515 | +{ | |
516 | + unsigned long flags; | |
517 | + unsigned long mask; | |
518 | + | |
519 | + spin_lock_irqsave(&rnp->lock, flags); | |
520 | + for (;;) { | |
521 | + if (!sync_rcu_preempt_exp_done(rnp)) | |
522 | + break; | |
523 | + if (rnp->parent == NULL) { | |
524 | + wake_up(&sync_rcu_preempt_exp_wq); | |
525 | + break; | |
526 | + } | |
527 | + mask = rnp->grpmask; | |
528 | + spin_unlock(&rnp->lock); /* irqs remain disabled */ | |
529 | + rnp = rnp->parent; | |
530 | + spin_lock(&rnp->lock); /* irqs already disabled */ | |
531 | + rnp->expmask &= ~mask; | |
532 | + } | |
533 | + spin_unlock_irqrestore(&rnp->lock, flags); | |
534 | +} | |
535 | + | |
536 | +/* | |
537 | + * Snapshot the tasks blocking the newly started preemptible-RCU expedited | |
538 | + * grace period for the specified rcu_node structure. If there are no such | |
539 | + * tasks, report it up the rcu_node hierarchy. | |
540 | + * | |
541 | + * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. | |
542 | + */ | |
543 | +static void | |
544 | +sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | |
545 | +{ | |
546 | + int must_wait; | |
547 | + | |
548 | + spin_lock(&rnp->lock); /* irqs already disabled */ | |
549 | + list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); | |
550 | + list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); | |
551 | + must_wait = rcu_preempted_readers_exp(rnp); | |
552 | + spin_unlock(&rnp->lock); /* irqs remain disabled */ | |
553 | + if (!must_wait) | |
554 | + rcu_report_exp_rnp(rsp, rnp); | |
555 | +} | |
556 | + | |
557 | +/* | |
558 | + * Wait for an rcu-preempt grace period, but expedite it. The basic idea | |
559 | + * is to invoke synchronize_sched_expedited() to push all the tasks to | |
560 | + * the ->blocked_tasks[] lists, move all entries from the first set of | |
561 | + * ->blocked_tasks[] lists to the second set, and finally wait for this | |
562 | + * second set to drain. | |
563 | + */ | |
457 | 564 | void synchronize_rcu_expedited(void) |
458 | 565 | { |
459 | - synchronize_rcu(); | |
566 | + unsigned long flags; | |
567 | + struct rcu_node *rnp; | |
568 | + struct rcu_state *rsp = &rcu_preempt_state; | |
569 | + long snap; | |
570 | + int trycount = 0; | |
571 | + | |
572 | + smp_mb(); /* Caller's modifications seen first by other CPUs. */ | |
573 | + snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; | |
574 | + smp_mb(); /* Above access cannot bleed into critical section. */ | |
575 | + | |
576 | + /* | |
577 | + * Acquire lock, falling back to synchronize_rcu() if too many | |
578 | + * lock-acquisition failures. Of course, if someone does the | |
579 | + * expedited grace period for us, just leave. | |
580 | + */ | |
581 | + while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { | |
582 | + if (trycount++ < 10) | |
583 | + udelay(trycount * num_online_cpus()); | |
584 | + else { | |
585 | + synchronize_rcu(); | |
586 | + return; | |
587 | + } | |
588 | + if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | |
589 | + goto mb_ret; /* Others did our work for us. */ | |
590 | + } | |
591 | + if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | |
592 | + goto unlock_mb_ret; /* Others did our work for us. */ | |
593 | + | |
594 | + /* force all RCU readers onto blocked_tasks[]. */ | |
595 | + synchronize_sched_expedited(); | |
596 | + | |
597 | + spin_lock_irqsave(&rsp->onofflock, flags); | |
598 | + | |
599 | + /* Initialize ->expmask for all non-leaf rcu_node structures. */ | |
600 | + rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { | |
601 | + spin_lock(&rnp->lock); /* irqs already disabled. */ | |
602 | + rnp->expmask = rnp->qsmaskinit; | |
603 | + spin_unlock(&rnp->lock); /* irqs remain disabled. */ | |
604 | + } | |
605 | + | |
606 | + /* Snapshot current state of ->blocked_tasks[] lists. */ | |
607 | + rcu_for_each_leaf_node(rsp, rnp) | |
608 | + sync_rcu_preempt_exp_init(rsp, rnp); | |
609 | + if (NUM_RCU_NODES > 1) | |
610 | + sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); | |
611 | + | |
612 | + spin_unlock_irqrestore(&rsp->onofflock, flags); | |
613 | + | |
614 | + /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ | |
615 | + rnp = rcu_get_root(rsp); | |
616 | + wait_event(sync_rcu_preempt_exp_wq, | |
617 | + sync_rcu_preempt_exp_done(rnp)); | |
618 | + | |
619 | + /* Clean up and exit. */ | |
620 | + smp_mb(); /* ensure expedited GP seen before counter increment. */ | |
621 | + ACCESS_ONCE(sync_rcu_preempt_exp_count)++; | |
622 | +unlock_mb_ret: | |
623 | + mutex_unlock(&sync_rcu_preempt_exp_mutex); | |
624 | +mb_ret: | |
625 | + smp_mb(); /* ensure subsequent action seen after grace period. */ | |
460 | 626 | } |
461 | 627 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); |
462 | 628 | |
... | ... | @@ -654,6 +820,20 @@ |
654 | 820 | synchronize_sched_expedited(); |
655 | 821 | } |
656 | 822 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); |
823 | + | |
824 | +#ifdef CONFIG_HOTPLUG_CPU | |
825 | + | |
826 | +/* | |
827 | + * Because preemptable RCU does not exist, there is never any need to | |
828 | + * report on tasks preempted in RCU read-side critical sections during | |
829 | + * expedited RCU grace periods. | |
830 | + */ | |
831 | +static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | |
832 | +{ | |
833 | + return; | |
834 | +} | |
835 | + | |
836 | +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ | |
657 | 837 | |
658 | 838 | /* |
659 | 839 | * Because preemptable RCU does not exist, it never has any work to do. |
kernel/rcutree_trace.c
... | ... | @@ -157,6 +157,7 @@ |
157 | 157 | { |
158 | 158 | long gpnum; |
159 | 159 | int level = 0; |
160 | + int phase; | |
160 | 161 | struct rcu_node *rnp; |
161 | 162 | |
162 | 163 | gpnum = rsp->gpnum; |
163 | 164 | |
... | ... | @@ -173,10 +174,13 @@ |
173 | 174 | seq_puts(m, "\n"); |
174 | 175 | level = rnp->level; |
175 | 176 | } |
176 | - seq_printf(m, "%lx/%lx %c>%c %d:%d ^%d ", | |
177 | + phase = gpnum & 0x1; | |
178 | + seq_printf(m, "%lx/%lx %c%c>%c%c %d:%d ^%d ", | |
177 | 179 | rnp->qsmask, rnp->qsmaskinit, |
178 | - "T."[list_empty(&rnp->blocked_tasks[gpnum & 1])], | |
179 | - "T."[list_empty(&rnp->blocked_tasks[!(gpnum & 1)])], | |
180 | + "T."[list_empty(&rnp->blocked_tasks[phase])], | |
181 | + "E."[list_empty(&rnp->blocked_tasks[phase + 2])], | |
182 | + "T."[list_empty(&rnp->blocked_tasks[!phase])], | |
183 | + "E."[list_empty(&rnp->blocked_tasks[!phase + 2])], | |
180 | 184 | rnp->grplo, rnp->grphi, rnp->grpnum); |
181 | 185 | } |
182 | 186 | seq_puts(m, "\n"); |