Commit 6897fc22ea01b562b55c6168592bcbd3ee62b006

Authored by Christoph Hellwig
Committed by Linus Torvalds
1 parent 0c692d0784

kernel: use lockless list for smp_call_function_single

Make smp_call_function_single and friends more efficient by using a
lockless list.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 19 additions and 43 deletions Side-by-side Diff

include/linux/blkdev.h
... ... @@ -95,10 +95,7 @@
95 95 * as well!
96 96 */
97 97 struct request {
98   - union {
99   - struct list_head queuelist;
100   - struct llist_node ll_list;
101   - };
  98 + struct list_head queuelist;
102 99 union {
103 100 struct call_single_data csd;
104 101 struct work_struct mq_flush_data;
... ... @@ -11,12 +11,16 @@
11 11 #include <linux/list.h>
12 12 #include <linux/cpumask.h>
13 13 #include <linux/init.h>
  14 +#include <linux/llist.h>
14 15  
15 16 extern void cpu_idle(void);
16 17  
17 18 typedef void (*smp_call_func_t)(void *info);
18 19 struct call_single_data {
19   - struct list_head list;
  20 + union {
  21 + struct list_head list;
  22 + struct llist_node llist;
  23 + };
20 24 smp_call_func_t func;
21 25 void *info;
22 26 u16 flags;
... ... @@ -28,13 +28,8 @@
28 28  
29 29 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
30 30  
31   -struct call_single_queue {
32   - struct list_head list;
33   - raw_spinlock_t lock;
34   -};
  31 +static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
35 32  
36   -static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_queue, call_single_queue);
37   -
38 33 static int
39 34 hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
40 35 {
41 36  
... ... @@ -85,13 +80,9 @@
85 80 void *cpu = (void *)(long)smp_processor_id();
86 81 int i;
87 82  
88   - for_each_possible_cpu(i) {
89   - struct call_single_queue *q = &per_cpu(call_single_queue, i);
  83 + for_each_possible_cpu(i)
  84 + init_llist_head(&per_cpu(call_single_queue, i));
90 85  
91   - raw_spin_lock_init(&q->lock);
92   - INIT_LIST_HEAD(&q->list);
93   - }
94   -
95 86 hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
96 87 register_cpu_notifier(&hotplug_cfd_notifier);
97 88 }
98 89  
... ... @@ -141,18 +132,9 @@
141 132 */
142 133 static void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
143 134 {
144   - struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
145   - unsigned long flags;
146   - int ipi;
147   -
148 135 if (wait)
149 136 csd->flags |= CSD_FLAG_WAIT;
150 137  
151   - raw_spin_lock_irqsave(&dst->lock, flags);
152   - ipi = list_empty(&dst->list);
153   - list_add_tail(&csd->list, &dst->list);
154   - raw_spin_unlock_irqrestore(&dst->lock, flags);
155   -
156 138 /*
157 139 * The list addition should be visible before sending the IPI
158 140 * handler locks the list to pull the entry off it because of
... ... @@ -164,7 +146,7 @@
164 146 * locking and barrier primitives. Generic code isn't really
165 147 * equipped to do the right thing...
166 148 */
167   - if (ipi)
  149 + if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
168 150 arch_send_call_function_single_ipi(cpu);
169 151  
170 152 if (wait)
171 153  
172 154  
173 155  
174 156  
175 157  
176 158  
... ... @@ -177,27 +159,26 @@
177 159 */
178 160 void generic_smp_call_function_single_interrupt(void)
179 161 {
180   - struct call_single_queue *q = &__get_cpu_var(call_single_queue);
181   - LIST_HEAD(list);
  162 + struct llist_node *entry, *next;
182 163  
183 164 /*
184 165 * Shouldn't receive this interrupt on a cpu that is not yet online.
185 166 */
186 167 WARN_ON_ONCE(!cpu_online(smp_processor_id()));
187 168  
188   - raw_spin_lock(&q->lock);
189   - list_replace_init(&q->list, &list);
190   - raw_spin_unlock(&q->lock);
  169 + entry = llist_del_all(&__get_cpu_var(call_single_queue));
  170 + entry = llist_reverse_order(entry);
191 171  
192   - while (!list_empty(&list)) {
  172 + while (entry) {
193 173 struct call_single_data *csd;
194 174  
195   - csd = list_entry(list.next, struct call_single_data, list);
196   - list_del(&csd->list);
  175 + next = entry->next;
197 176  
  177 + csd = llist_entry(entry, struct call_single_data, llist);
198 178 csd->func(csd->info);
199   -
200 179 csd_unlock(csd);
  180 +
  181 + entry = next;
201 182 }
202 183 }
203 184  
204 185  
... ... @@ -411,17 +392,11 @@
411 392  
412 393 for_each_cpu(cpu, cfd->cpumask) {
413 394 struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu);
414   - struct call_single_queue *dst =
415   - &per_cpu(call_single_queue, cpu);
416   - unsigned long flags;
417 395  
418 396 csd_lock(csd);
419 397 csd->func = func;
420 398 csd->info = info;
421   -
422   - raw_spin_lock_irqsave(&dst->lock, flags);
423   - list_add_tail(&csd->list, &dst->list);
424   - raw_spin_unlock_irqrestore(&dst->lock, flags);
  399 + llist_add(&csd->llist, &per_cpu(call_single_queue, cpu));
425 400 }
426 401  
427 402 /* Send a message to all CPUs in the map */