Commit 09223371deac67d08ca0b70bd18787920284c967

Authored by Shaohua Li
Committed by Paul E. McKenney
1 parent 9a43273690

rcu: Use softirq to address performance regression

Commit a26ac2455ffcf3(rcu: move TREE_RCU from softirq to kthread)
introduced performance regression. In an AIM7 test, this commit degraded
performance by about 40%.

The commit runs rcu callbacks in a kthread instead of softirq. We observed
high rate of context switch which is caused by this. Out test system has
64 CPUs and HZ is 1000, so we saw more than 64k context switch per second
which is caused by RCU's per-CPU kthread.  A trace showed that most of
the time the RCU per-CPU kthread doesn't actually handle any callbacks,
but instead just does a very small amount of work handling grace periods.
This means that RCU's per-CPU kthreads are making the scheduler do quite
a bit of work in order to allow a very small amount of RCU-related
processing to be done.

Alex Shi's analysis determined that this slowdown is due to lock
contention within the scheduler.  Unfortunately, as Peter Zijlstra points
out, the scheduler's real-time semantics require global action, which
means that this contention is inherent in real-time scheduling.  (Yes,
perhaps someone will come up with a workaround -- otherwise, -rt is not
going to do well on large SMP systems -- but this patch will work around
this issue in the meantime.  And "the meantime" might well be forever.)

This patch therefore re-introduces softirq processing to RCU, but only
for core RCU work.  RCU callbacks are still executed in kthread context,
so that only a small amount of RCU work runs in softirq context in the
common case.  This should minimize ksoftirqd execution, allowing us to
skip boosting of ksoftirqd for CONFIG_RCU_BOOST=y kernels.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Tested-by: "Alex,Shi" <alex.shi@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Showing 8 changed files with 35 additions and 6 deletions Side-by-side Diff

Documentation/filesystems/proc.txt
... ... @@ -843,6 +843,7 @@
843 843 TASKLET: 0 0 0 290
844 844 SCHED: 27035 26983 26971 26746
845 845 HRTIMER: 0 0 0 0
  846 + RCU: 1678 1769 2178 2250
846 847  
847 848  
848 849 1.3 IDE devices in /proc/ide
include/linux/interrupt.h
... ... @@ -414,6 +414,7 @@
414 414 TASKLET_SOFTIRQ,
415 415 SCHED_SOFTIRQ,
416 416 HRTIMER_SOFTIRQ,
  417 + RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
417 418  
418 419 NR_SOFTIRQS
419 420 };
include/trace/events/irq.h
... ... @@ -20,7 +20,8 @@
20 20 softirq_name(BLOCK_IOPOLL), \
21 21 softirq_name(TASKLET), \
22 22 softirq_name(SCHED), \
23   - softirq_name(HRTIMER))
  23 + softirq_name(HRTIMER), \
  24 + softirq_name(RCU))
24 25  
25 26 /**
26 27 * irq_handler_entry - called immediately before the irq action handler
... ... @@ -100,6 +100,7 @@
100 100  
101 101 static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
102 102 static void invoke_rcu_cpu_kthread(void);
  103 +static void __invoke_rcu_cpu_kthread(void);
103 104  
104 105 #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */
105 106  
106 107  
107 108  
... ... @@ -1442,13 +1443,21 @@
1442 1443 }
1443 1444  
1444 1445 /* If there are callbacks ready, invoke them. */
1445   - rcu_do_batch(rsp, rdp);
  1446 + if (cpu_has_callbacks_ready_to_invoke(rdp))
  1447 + __invoke_rcu_cpu_kthread();
1446 1448 }
1447 1449  
  1450 +static void rcu_kthread_do_work(void)
  1451 +{
  1452 + rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
  1453 + rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
  1454 + rcu_preempt_do_callbacks();
  1455 +}
  1456 +
1448 1457 /*
1449 1458 * Do softirq processing for the current CPU.
1450 1459 */
1451   -static void rcu_process_callbacks(void)
  1460 +static void rcu_process_callbacks(struct softirq_action *unused)
1452 1461 {
1453 1462 __rcu_process_callbacks(&rcu_sched_state,
1454 1463 &__get_cpu_var(rcu_sched_data));
... ... @@ -1465,7 +1474,7 @@
1465 1474 * the current CPU with interrupts disabled, the rcu_cpu_kthread_task
1466 1475 * cannot disappear out from under us.
1467 1476 */
1468   -static void invoke_rcu_cpu_kthread(void)
  1477 +static void __invoke_rcu_cpu_kthread(void)
1469 1478 {
1470 1479 unsigned long flags;
1471 1480  
... ... @@ -1479,6 +1488,11 @@
1479 1488 local_irq_restore(flags);
1480 1489 }
1481 1490  
  1491 +static void invoke_rcu_cpu_kthread(void)
  1492 +{
  1493 + raise_softirq(RCU_SOFTIRQ);
  1494 +}
  1495 +
1482 1496 /*
1483 1497 * Wake up the specified per-rcu_node-structure kthread.
1484 1498 * Because the per-rcu_node kthreads are immortal, we don't need
... ... @@ -1613,7 +1627,7 @@
1613 1627 *workp = 0;
1614 1628 local_irq_restore(flags);
1615 1629 if (work)
1616   - rcu_process_callbacks();
  1630 + rcu_kthread_do_work();
1617 1631 local_bh_enable();
1618 1632 if (*workp != 0)
1619 1633 spincnt++;
... ... @@ -2387,6 +2401,7 @@
2387 2401 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
2388 2402 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
2389 2403 __rcu_init_preempt();
  2404 + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
2390 2405  
2391 2406 /*
2392 2407 * We don't need protection against CPU-hotplug here because
... ... @@ -439,6 +439,7 @@
439 439 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
440 440 static void rcu_preempt_check_callbacks(int cpu);
441 441 static void rcu_preempt_process_callbacks(void);
  442 +static void rcu_preempt_do_callbacks(void);
442 443 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
443 444 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
444 445 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
kernel/rcutree_plugin.h
... ... @@ -602,6 +602,11 @@
602 602 &__get_cpu_var(rcu_preempt_data));
603 603 }
604 604  
  605 +static void rcu_preempt_do_callbacks(void)
  606 +{
  607 + rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
  608 +}
  609 +
605 610 /*
606 611 * Queue a preemptible-RCU callback for invocation after a grace period.
607 612 */
... ... @@ -994,6 +999,10 @@
994 999 * to process.
995 1000 */
996 1001 static void rcu_preempt_process_callbacks(void)
  1002 +{
  1003 +}
  1004 +
  1005 +static void rcu_preempt_do_callbacks(void)
997 1006 {
998 1007 }
999 1008  
... ... @@ -58,7 +58,7 @@
58 58  
59 59 char *softirq_to_name[NR_SOFTIRQS] = {
60 60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
61   - "TASKLET", "SCHED", "HRTIMER"
  61 + "TASKLET", "SCHED", "HRTIMER", "RCU"
62 62 };
63 63  
64 64 /*
tools/perf/util/trace-event-parse.c
... ... @@ -2187,6 +2187,7 @@
2187 2187 { "TASKLET_SOFTIRQ", 6 },
2188 2188 { "SCHED_SOFTIRQ", 7 },
2189 2189 { "HRTIMER_SOFTIRQ", 8 },
  2190 + { "RCU_SOFTIRQ", 9 },
2190 2191  
2191 2192 { "HRTIMER_NORESTART", 0 },
2192 2193 { "HRTIMER_RESTART", 1 },