Commit 37c72e56f6b234ea7387ba530434a80abf2658d8

Authored by Paul E. McKenney
Committed by Ingo Molnar
1 parent 2bc872036e

rcu: Prevent RCU IPI storms in presence of high call_rcu() load

As the number of callbacks on a given CPU rises, invoke
force_quiescent_state() only every blimit number of callbacks
(defaults to 10,000), and even then only if no other CPU has
invoked force_quiescent_state() in the meantime.

This should fix the performance regression reported by Nick.

Reported-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
Cc: jens.axboe@oracle.com
LKML-Reference: <12555405592133-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 2 changed files with 28 additions and 5 deletions Side-by-side Diff

... ... @@ -958,7 +958,7 @@
958 958 * Invoke any RCU callbacks that have made it to the end of their grace
959 959 * period. Thottle as specified by rdp->blimit.
960 960 */
961   -static void rcu_do_batch(struct rcu_data *rdp)
  961 +static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
962 962 {
963 963 unsigned long flags;
964 964 struct rcu_head *next, *list, **tail;
... ... @@ -1011,6 +1011,13 @@
1011 1011 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
1012 1012 rdp->blimit = blimit;
1013 1013  
  1014 + /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
  1015 + if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
  1016 + rdp->qlen_last_fqs_check = 0;
  1017 + rdp->n_force_qs_snap = rsp->n_force_qs;
  1018 + } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
  1019 + rdp->qlen_last_fqs_check = rdp->qlen;
  1020 +
1014 1021 local_irq_restore(flags);
1015 1022  
1016 1023 /* Re-raise the RCU softirq if there are callbacks remaining. */
... ... @@ -1224,7 +1231,7 @@
1224 1231 }
1225 1232  
1226 1233 /* If there are callbacks ready, invoke them. */
1227   - rcu_do_batch(rdp);
  1234 + rcu_do_batch(rsp, rdp);
1228 1235 }
1229 1236  
1230 1237 /*
1231 1238  
... ... @@ -1288,10 +1295,20 @@
1288 1295 rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */
1289 1296 }
1290 1297  
1291   - /* Force the grace period if too many callbacks or too long waiting. */
1292   - if (unlikely(++rdp->qlen > qhimark)) {
  1298 + /*
  1299 + * Force the grace period if too many callbacks or too long waiting.
  1300 + * Enforce hysteresis, and don't invoke force_quiescent_state()
  1301 + * if some other CPU has recently done so. Also, don't bother
  1302 + * invoking force_quiescent_state() if the newly enqueued callback
  1303 + * is the only one waiting for a grace period to complete.
  1304 + */
  1305 + if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1293 1306 rdp->blimit = LONG_MAX;
1294   - force_quiescent_state(rsp, 0);
  1307 + if (rsp->n_force_qs == rdp->n_force_qs_snap &&
  1308 + *rdp->nxttail[RCU_DONE_TAIL] != head)
  1309 + force_quiescent_state(rsp, 0);
  1310 + rdp->n_force_qs_snap = rsp->n_force_qs;
  1311 + rdp->qlen_last_fqs_check = rdp->qlen;
1295 1312 } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
1296 1313 force_quiescent_state(rsp, 1);
1297 1314 local_irq_restore(flags);
... ... @@ -1523,6 +1540,8 @@
1523 1540 rdp->beenonline = 1; /* We have now been online. */
1524 1541 rdp->preemptable = preemptable;
1525 1542 rdp->passed_quiesc_completed = lastcomp - 1;
  1543 + rdp->qlen_last_fqs_check = 0;
  1544 + rdp->n_force_qs_snap = rsp->n_force_qs;
1526 1545 rdp->blimit = blimit;
1527 1546 spin_unlock(&rnp->lock); /* irqs remain disabled. */
1528 1547  
... ... @@ -167,6 +167,10 @@
167 167 struct rcu_head *nxtlist;
168 168 struct rcu_head **nxttail[RCU_NEXT_SIZE];
169 169 long qlen; /* # of queued callbacks */
  170 + long qlen_last_fqs_check;
  171 + /* qlen at last check for QS forcing */
  172 + unsigned long n_force_qs_snap;
  173 + /* did other CPU force QS recently? */
170 174 long blimit; /* Upper limit on a processed batch */
171 175  
172 176 #ifdef CONFIG_NO_HZ