Commit ef631b0ca01655d24e9ca7e199262c4a46416a26

Authored by Paul E. McKenney
Committed by Ingo Molnar
1 parent 27b19565fe

rcu: Make hierarchical RCU less IPI-happy

This patch fixes a hierarchical-RCU performance bug located by Anton
Blanchard.  The problem stems from a misguided attempt to provide a
work-around for jiffies-counter failure.  This work-around uses a per-CPU
n_rcu_pending counter, which is incremented on each call to rcu_pending(),
which in turn is called from each scheduling-clock interrupt.  Each CPU
then treats this counter as a surrogate for the jiffies counter, so
that if the jiffies counter fails to advance, the per-CPU n_rcu_pending
counter will cause RCU to invoke force_quiescent_state(), which in turn
will (among other things) send resched IPIs to CPUs that have thus far
failed to pass through an RCU quiescent state.

Unfortunately, each CPU resets only its own counter after sending a
batch of IPIs.  This means that the other CPUs will also (needlessly)
send -another- round of IPIs, for a full N-squared set of IPIs in the
worst case every three scheduler-clock ticks until the grace period
finally ends.  It is not reasonable for a given CPU to reset each and
every n_rcu_pending for all the other CPUs, so this patch instead simply
disables the jiffies-counter "training wheels", thus eliminating the
excessive IPIs.

Note that the jiffies-counter IPIs do not have this problem due to
the fact that the jiffies counter is global, so that the CPU sending
the IPIs can easily reset things, thus preventing the other CPUs from
sending redundant IPIs.

Note also that the n_rcu_pending counter remains, as it will continue to
be used for tracing.  It may also see use to update the jiffies counter,
should an appropriate kick-the-jiffies-counter API appear.

Located-by: Anton Blanchard <anton@au1.ibm.com>
Tested-by: Anton Blanchard <anton@au1.ibm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: anton@samba.org
Cc: akpm@linux-foundation.org
Cc: dipankar@in.ibm.com
Cc: manfred@colorfullife.com
Cc: cl@linux-foundation.org
Cc: josht@linux.vnet.ibm.com
Cc: schamp@sgi.com
Cc: niv@us.ibm.com
Cc: dvhltc@us.ibm.com
Cc: ego@in.ibm.com
Cc: laijs@cn.fujitsu.com
Cc: rostedt@goodmis.org
Cc: peterz@infradead.org
Cc: penberg@cs.helsinki.fi
Cc: andi@firstfloor.org
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
LKML-Reference: <12396834793575-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 3 changed files with 10 additions and 26 deletions Side-by-side Diff

include/linux/rcutree.h
... ... @@ -161,9 +161,8 @@
161 161 unsigned long offline_fqs; /* Kicked due to being offline. */
162 162 unsigned long resched_ipi; /* Sent a resched IPI. */
163 163  
164   - /* 5) state to allow this CPU to force_quiescent_state on others */
  164 + /* 5) For future __rcu_pending statistics. */
165 165 long n_rcu_pending; /* rcu_pending() calls since boot. */
166   - long n_rcu_pending_force_qs; /* when to force quiescent states. */
167 166  
168 167 int cpu;
169 168 };
... ... @@ -530,8 +530,6 @@
530 530 rdp->qs_pending = 1;
531 531 rdp->passed_quiesc = 0;
532 532 rdp->gpnum = rsp->gpnum;
533   - rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
534   - RCU_JIFFIES_TILL_FORCE_QS;
535 533 }
536 534  
537 535 /*
... ... @@ -578,8 +576,6 @@
578 576 rsp->gpnum++;
579 577 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
580 578 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
581   - rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
582   - RCU_JIFFIES_TILL_FORCE_QS;
583 579 record_gp_stall_check_time(rsp);
584 580 dyntick_record_completed(rsp, rsp->completed - 1);
585 581 note_new_gpnum(rsp, rdp);
... ... @@ -1055,7 +1051,6 @@
1055 1051 {
1056 1052 unsigned long flags;
1057 1053 long lastcomp;
1058   - struct rcu_data *rdp = rsp->rda[smp_processor_id()];
1059 1054 struct rcu_node *rnp = rcu_get_root(rsp);
1060 1055 u8 signaled;
1061 1056  
1062 1057  
... ... @@ -1066,16 +1061,13 @@
1066 1061 return; /* Someone else is already on the job. */
1067 1062 }
1068 1063 if (relaxed &&
1069   - (long)(rsp->jiffies_force_qs - jiffies) >= 0 &&
1070   - (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) >= 0)
  1064 + (long)(rsp->jiffies_force_qs - jiffies) >= 0)
1071 1065 goto unlock_ret; /* no emergency and done recently. */
1072 1066 rsp->n_force_qs++;
1073 1067 spin_lock(&rnp->lock);
1074 1068 lastcomp = rsp->completed;
1075 1069 signaled = rsp->signaled;
1076 1070 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
1077   - rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
1078   - RCU_JIFFIES_TILL_FORCE_QS;
1079 1071 if (lastcomp == rsp->gpnum) {
1080 1072 rsp->n_force_qs_ngp++;
1081 1073 spin_unlock(&rnp->lock);
... ... @@ -1144,8 +1136,7 @@
1144 1136 * If an RCU GP has gone long enough, go check for dyntick
1145 1137 * idle CPUs and, if needed, send resched IPIs.
1146 1138 */
1147   - if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 ||
1148   - (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
  1139 + if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
1149 1140 force_quiescent_state(rsp, 1);
1150 1141  
1151 1142 /*
... ... @@ -1230,8 +1221,7 @@
1230 1221 if (unlikely(++rdp->qlen > qhimark)) {
1231 1222 rdp->blimit = LONG_MAX;
1232 1223 force_quiescent_state(rsp, 0);
1233   - } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 ||
1234   - (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
  1224 + } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
1235 1225 force_quiescent_state(rsp, 1);
1236 1226 local_irq_restore(flags);
1237 1227 }
... ... @@ -1290,8 +1280,7 @@
1290 1280  
1291 1281 /* Has an RCU GP gone long enough to send resched IPIs &c? */
1292 1282 if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
1293   - ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 ||
1294   - (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0))
  1283 + ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0))
1295 1284 return 1;
1296 1285  
1297 1286 /* nothing to do */
kernel/rcutree_trace.c
... ... @@ -49,14 +49,12 @@
49 49 {
50 50 if (!rdp->beenonline)
51 51 return;
52   - seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d rpfq=%ld rp=%x",
  52 + seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d",
53 53 rdp->cpu,
54 54 cpu_is_offline(rdp->cpu) ? '!' : ' ',
55 55 rdp->completed, rdp->gpnum,
56 56 rdp->passed_quiesc, rdp->passed_quiesc_completed,
57   - rdp->qs_pending,
58   - rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
59   - (int)(rdp->n_rcu_pending & 0xffff));
  57 + rdp->qs_pending);
60 58 #ifdef CONFIG_NO_HZ
61 59 seq_printf(m, " dt=%d/%d dn=%d df=%lu",
62 60 rdp->dynticks->dynticks,
63 61  
... ... @@ -102,14 +100,12 @@
102 100 {
103 101 if (!rdp->beenonline)
104 102 return;
105   - seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d,%ld,%ld",
  103 + seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d",
106 104 rdp->cpu,
107 105 cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"",
108 106 rdp->completed, rdp->gpnum,
109 107 rdp->passed_quiesc, rdp->passed_quiesc_completed,
110   - rdp->qs_pending,
111   - rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
112   - rdp->n_rcu_pending);
  108 + rdp->qs_pending);
113 109 #ifdef CONFIG_NO_HZ
114 110 seq_printf(m, ",%d,%d,%d,%lu",
115 111 rdp->dynticks->dynticks,
... ... @@ -123,7 +119,7 @@
123 119  
124 120 static int show_rcudata_csv(struct seq_file *m, void *unused)
125 121 {
126   - seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",\"rpfq\",\"rp\",");
  122 + seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",");
127 123 #ifdef CONFIG_NO_HZ
128 124 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
129 125 #endif /* #ifdef CONFIG_NO_HZ */