Commit 7e8b4c72344e0d904b0e3fa9fd2eb116f04b3d41

Authored by Paul E. McKenney
Committed by Paul E. McKenney
1 parent 203373c81b

rcu: Converge TINY_RCU expedited and normal boosting

This applies a trick from TREE_RCU boosting to TINY_RCU, eliminating
code and adding comments.  The key point is that it is possible for
the booster thread itself to work out whether there is a normal or
expedited boost required based solely on local information.  There
is therefore no need for boost initiation to know or care what type
of boosting is required.  In addition, when boosting is complete for
a given grace period, then by definition there cannot be any more
boosting for that grace period.  This allows eliminating yet more
state and statistics.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>

Showing 1 changed file with 89 additions and 74 deletions Side-by-side Diff

kernel/rcutiny_plugin.h
... ... @@ -106,15 +106,22 @@
106 106 unsigned long n_grace_periods;
107 107 #ifdef CONFIG_RCU_BOOST
108 108 unsigned long n_tasks_boosted;
  109 + /* Total number of tasks boosted. */
109 110 unsigned long n_exp_boosts;
  111 + /* Number of tasks boosted for expedited GP. */
110 112 unsigned long n_normal_boosts;
111   - unsigned long n_normal_balk_blkd_tasks;
112   - unsigned long n_normal_balk_gp_tasks;
113   - unsigned long n_normal_balk_boost_tasks;
114   - unsigned long n_normal_balk_notyet;
115   - unsigned long n_normal_balk_nos;
116   - unsigned long n_exp_balk_blkd_tasks;
117   - unsigned long n_exp_balk_nos;
  113 + /* Number of tasks boosted for normal GP. */
  114 + unsigned long n_balk_blkd_tasks;
  115 + /* Refused to boost: no blocked tasks. */
  116 + unsigned long n_balk_exp_gp_tasks;
  117 + /* Refused to boost: nothing blocking GP. */
  118 + unsigned long n_balk_boost_tasks;
  119 + /* Refused to boost: already boosting. */
  120 + unsigned long n_balk_notyet;
  121 + /* Refused to boost: not yet time. */
  122 + unsigned long n_balk_nos;
  123 + /* Refused to boost: not sure why, though. */
  124 + /* This can happen due to race conditions. */
118 125 #endif /* #ifdef CONFIG_RCU_BOOST */
119 126 #endif /* #ifdef CONFIG_RCU_TRACE */
120 127 };
... ... @@ -199,7 +206,6 @@
199 206  
200 207 #ifdef CONFIG_RCU_BOOST
201 208 static void rcu_initiate_boost_trace(void);
202   -static void rcu_initiate_exp_boost_trace(void);
203 209 #endif /* #ifdef CONFIG_RCU_BOOST */
204 210  
205 211 /*
... ... @@ -225,16 +231,13 @@
225 231 rcu_preempt_ctrlblk.n_normal_boosts,
226 232 (int)(jiffies & 0xffff),
227 233 (int)(rcu_preempt_ctrlblk.boost_time & 0xffff));
228   - seq_printf(m, " %s: nt=%lu gt=%lu bt=%lu ny=%lu nos=%lu\n",
229   - "normal balk",
230   - rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks,
231   - rcu_preempt_ctrlblk.n_normal_balk_gp_tasks,
232   - rcu_preempt_ctrlblk.n_normal_balk_boost_tasks,
233   - rcu_preempt_ctrlblk.n_normal_balk_notyet,
234   - rcu_preempt_ctrlblk.n_normal_balk_nos);
235   - seq_printf(m, " exp balk: bt=%lu nos=%lu\n",
236   - rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks,
237   - rcu_preempt_ctrlblk.n_exp_balk_nos);
  234 + seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu ny=%lu nos=%lu\n",
  235 + " balk",
  236 + rcu_preempt_ctrlblk.n_balk_blkd_tasks,
  237 + rcu_preempt_ctrlblk.n_balk_exp_gp_tasks,
  238 + rcu_preempt_ctrlblk.n_balk_boost_tasks,
  239 + rcu_preempt_ctrlblk.n_balk_notyet,
  240 + rcu_preempt_ctrlblk.n_balk_nos);
238 241 #endif /* #ifdef CONFIG_RCU_BOOST */
239 242 }
240 243  
241 244  
242 245  
243 246  
244 247  
245 248  
... ... @@ -252,23 +255,59 @@
252 255 {
253 256 unsigned long flags;
254 257 struct rt_mutex mtx;
255   - struct list_head *np;
256 258 struct task_struct *t;
  259 + struct list_head *tb;
257 260  
258   - if (rcu_preempt_ctrlblk.boost_tasks == NULL)
  261 + if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
  262 + rcu_preempt_ctrlblk.exp_tasks == NULL)
259 263 return 0; /* Nothing to boost. */
  264 +
260 265 raw_local_irq_save(flags);
261   - t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct,
262   - rcu_node_entry);
263   - np = rcu_next_node_entry(t);
  266 +
  267 + /*
  268 + * Recheck with irqs disabled: all tasks in need of boosting
  269 + * might exit their RCU read-side critical sections on their own
  270 + * if we are preempted just before disabling irqs.
  271 + */
  272 + if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
  273 + rcu_preempt_ctrlblk.exp_tasks == NULL) {
  274 + raw_local_irq_restore(flags);
  275 + return 0;
  276 + }
  277 +
  278 + /*
  279 + * Preferentially boost tasks blocking expedited grace periods.
  280 + * This cannot starve the normal grace periods because a second
  281 + * expedited grace period must boost all blocked tasks, including
  282 + * those blocking the pre-existing normal grace period.
  283 + */
  284 + if (rcu_preempt_ctrlblk.exp_tasks != NULL) {
  285 + tb = rcu_preempt_ctrlblk.exp_tasks;
  286 + RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
  287 + } else {
  288 + tb = rcu_preempt_ctrlblk.boost_tasks;
  289 + RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
  290 + }
  291 + RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
  292 +
  293 + /*
  294 + * We boost task t by manufacturing an rt_mutex that appears to
  295 + * be held by task t. We leave a pointer to that rt_mutex where
  296 + * task t can find it, and task t will release the mutex when it
  297 + * exits its outermost RCU read-side critical section. Then
  298 + * simply acquiring this artificial rt_mutex will boost task
  299 + * t's priority. (Thanks to tglx for suggesting this approach!)
  300 + */
  301 + t = container_of(tb, struct task_struct, rcu_node_entry);
264 302 rt_mutex_init_proxy_locked(&mtx, t);
265 303 t->rcu_boost_mutex = &mtx;
266 304 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
267 305 raw_local_irq_restore(flags);
268 306 rt_mutex_lock(&mtx);
269   - RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
270   - rt_mutex_unlock(&mtx);
271   - return rcu_preempt_ctrlblk.boost_tasks != NULL;
  307 + rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
  308 +
  309 + return rcu_preempt_ctrlblk.boost_tasks != NULL ||
  310 + rcu_preempt_ctrlblk.exp_tasks != NULL;
272 311 }
273 312  
274 313 /*
275 314  
276 315  
277 316  
... ... @@ -283,39 +322,24 @@
283 322 */
284 323 static int rcu_initiate_boost(void)
285 324 {
286   - if (!rcu_preempt_blocked_readers_cgp()) {
287   - RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++);
  325 + if (!rcu_preempt_blocked_readers_cgp() &&
  326 + rcu_preempt_ctrlblk.exp_tasks == NULL) {
  327 + RCU_TRACE(rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++);
288 328 return 0;
289 329 }
290   - if (rcu_preempt_ctrlblk.gp_tasks != NULL &&
291   - rcu_preempt_ctrlblk.boost_tasks == NULL &&
292   - ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) {
293   - rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks;
  330 + if (rcu_preempt_ctrlblk.exp_tasks != NULL ||
  331 + (rcu_preempt_ctrlblk.gp_tasks != NULL &&
  332 + rcu_preempt_ctrlblk.boost_tasks == NULL &&
  333 + ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))) {
  334 + if (rcu_preempt_ctrlblk.exp_tasks == NULL)
  335 + rcu_preempt_ctrlblk.boost_tasks =
  336 + rcu_preempt_ctrlblk.gp_tasks;
294 337 invoke_rcu_kthread();
295   - RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
296 338 } else
297 339 RCU_TRACE(rcu_initiate_boost_trace());
298 340 return 1;
299 341 }
300 342  
301   -/*
302   - * Initiate boosting for an expedited grace period.
303   - */
304   -static void rcu_initiate_expedited_boost(void)
305   -{
306   - unsigned long flags;
307   -
308   - raw_local_irq_save(flags);
309   - if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) {
310   - rcu_preempt_ctrlblk.boost_tasks =
311   - rcu_preempt_ctrlblk.blkd_tasks.next;
312   - invoke_rcu_kthread();
313   - RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
314   - } else
315   - RCU_TRACE(rcu_initiate_exp_boost_trace());
316   - raw_local_irq_restore(flags);
317   -}
318   -
319 343 #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
320 344  
321 345 /*
... ... @@ -347,13 +371,6 @@
347 371 }
348 372  
349 373 /*
350   - * If there is no RCU priority boosting, we don't initiate expedited boosting.
351   - */
352   -static void rcu_initiate_expedited_boost(void)
353   -{
354   -}
355   -
356   -/*
357 374 * If there is no RCU priority boosting, nothing to do at grace-period start.
358 375 */
359 376 static void rcu_preempt_boost_start_gp(void)
360 377  
361 378  
... ... @@ -786,13 +803,16 @@
786 803 rpcp->exp_tasks = rpcp->blkd_tasks.next;
787 804 if (rpcp->exp_tasks == &rpcp->blkd_tasks)
788 805 rpcp->exp_tasks = NULL;
789   - local_irq_restore(flags);
790 806  
791 807 /* Wait for tail of ->blkd_tasks list to drain. */
792   - if (rcu_preempted_readers_exp())
793   - rcu_initiate_expedited_boost();
  808 + if (!rcu_preempted_readers_exp())
  809 + local_irq_restore(flags);
  810 + else {
  811 + rcu_initiate_boost();
  812 + local_irq_restore(flags);
794 813 wait_event(sync_rcu_preempt_exp_wq,
795 814 !rcu_preempted_readers_exp());
  815 + }
796 816  
797 817 /* Clean up and exit. */
798 818 barrier(); /* ensure expedited GP seen before counter increment. */
799 819  
800 820  
801 821  
... ... @@ -905,22 +925,17 @@
905 925  
906 926 static void rcu_initiate_boost_trace(void)
907 927 {
908   - if (rcu_preempt_ctrlblk.gp_tasks == NULL)
909   - rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++;
  928 + if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
  929 + rcu_preempt_ctrlblk.n_balk_blkd_tasks++;
  930 + else if (rcu_preempt_ctrlblk.gp_tasks == NULL &&
  931 + rcu_preempt_ctrlblk.exp_tasks == NULL)
  932 + rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++;
910 933 else if (rcu_preempt_ctrlblk.boost_tasks != NULL)
911   - rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++;
  934 + rcu_preempt_ctrlblk.n_balk_boost_tasks++;
912 935 else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))
913   - rcu_preempt_ctrlblk.n_normal_balk_notyet++;
  936 + rcu_preempt_ctrlblk.n_balk_notyet++;
914 937 else
915   - rcu_preempt_ctrlblk.n_normal_balk_nos++;
916   -}
917   -
918   -static void rcu_initiate_exp_boost_trace(void)
919   -{
920   - if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
921   - rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++;
922   - else
923   - rcu_preempt_ctrlblk.n_exp_balk_nos++;
  938 + rcu_preempt_ctrlblk.n_balk_nos++;
924 939 }
925 940  
926 941 #endif /* #ifdef CONFIG_RCU_BOOST */