Commit 37c72e56f6b234ea7387ba530434a80abf2658d8
Committed by
Ingo Molnar
1 parent
2bc872036e
rcu: Prevent RCU IPI storms in presence of high call_rcu() load
As the number of callbacks on a given CPU rises, invoke force_quiescent_state() only every blimit number of callbacks (defaults to 10,000), and even then only if no other CPU has invoked force_quiescent_state() in the meantime. This should fix the performance regression reported by Nick. Reported-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: jens.axboe@oracle.com LKML-Reference: <12555405592133-git-send-email-> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 2 changed files with 28 additions and 5 deletions Side-by-side Diff
kernel/rcutree.c
... | ... | @@ -958,7 +958,7 @@ |
958 | 958 | * Invoke any RCU callbacks that have made it to the end of their grace |
959 | 959 | * period. Thottle as specified by rdp->blimit. |
960 | 960 | */ |
961 | -static void rcu_do_batch(struct rcu_data *rdp) | |
961 | +static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |
962 | 962 | { |
963 | 963 | unsigned long flags; |
964 | 964 | struct rcu_head *next, *list, **tail; |
... | ... | @@ -1011,6 +1011,13 @@ |
1011 | 1011 | if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) |
1012 | 1012 | rdp->blimit = blimit; |
1013 | 1013 | |
1014 | + /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ | |
1015 | + if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) { | |
1016 | + rdp->qlen_last_fqs_check = 0; | |
1017 | + rdp->n_force_qs_snap = rsp->n_force_qs; | |
1018 | + } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) | |
1019 | + rdp->qlen_last_fqs_check = rdp->qlen; | |
1020 | + | |
1014 | 1021 | local_irq_restore(flags); |
1015 | 1022 | |
1016 | 1023 | /* Re-raise the RCU softirq if there are callbacks remaining. */ |
... | ... | @@ -1224,7 +1231,7 @@ |
1224 | 1231 | } |
1225 | 1232 | |
1226 | 1233 | /* If there are callbacks ready, invoke them. */ |
1227 | - rcu_do_batch(rdp); | |
1234 | + rcu_do_batch(rsp, rdp); | |
1228 | 1235 | } |
1229 | 1236 | |
1230 | 1237 | /* |
1231 | 1238 | |
... | ... | @@ -1288,10 +1295,20 @@ |
1288 | 1295 | rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ |
1289 | 1296 | } |
1290 | 1297 | |
1291 | - /* Force the grace period if too many callbacks or too long waiting. */ | |
1292 | - if (unlikely(++rdp->qlen > qhimark)) { | |
1298 | + /* | |
1299 | + * Force the grace period if too many callbacks or too long waiting. | |
1300 | + * Enforce hysteresis, and don't invoke force_quiescent_state() | |
1301 | + * if some other CPU has recently done so. Also, don't bother | |
1302 | + * invoking force_quiescent_state() if the newly enqueued callback | |
1303 | + * is the only one waiting for a grace period to complete. | |
1304 | + */ | |
1305 | + if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | |
1293 | 1306 | rdp->blimit = LONG_MAX; |
1294 | - force_quiescent_state(rsp, 0); | |
1307 | + if (rsp->n_force_qs == rdp->n_force_qs_snap && | |
1308 | + *rdp->nxttail[RCU_DONE_TAIL] != head) | |
1309 | + force_quiescent_state(rsp, 0); | |
1310 | + rdp->n_force_qs_snap = rsp->n_force_qs; | |
1311 | + rdp->qlen_last_fqs_check = rdp->qlen; | |
1295 | 1312 | } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) |
1296 | 1313 | force_quiescent_state(rsp, 1); |
1297 | 1314 | local_irq_restore(flags); |
... | ... | @@ -1523,6 +1540,8 @@ |
1523 | 1540 | rdp->beenonline = 1; /* We have now been online. */ |
1524 | 1541 | rdp->preemptable = preemptable; |
1525 | 1542 | rdp->passed_quiesc_completed = lastcomp - 1; |
1543 | + rdp->qlen_last_fqs_check = 0; | |
1544 | + rdp->n_force_qs_snap = rsp->n_force_qs; | |
1526 | 1545 | rdp->blimit = blimit; |
1527 | 1546 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1528 | 1547 |
kernel/rcutree.h
... | ... | @@ -167,6 +167,10 @@ |
167 | 167 | struct rcu_head *nxtlist; |
168 | 168 | struct rcu_head **nxttail[RCU_NEXT_SIZE]; |
169 | 169 | long qlen; /* # of queued callbacks */ |
170 | + long qlen_last_fqs_check; | |
171 | + /* qlen at last check for QS forcing */ | |
172 | + unsigned long n_force_qs_snap; | |
173 | + /* did other CPU force QS recently? */ | |
170 | 174 | long blimit; /* Upper limit on a processed batch */ |
171 | 175 | |
172 | 176 | #ifdef CONFIG_NO_HZ |