Commit 6897fc22ea01b562b55c6168592bcbd3ee62b006
Committed by
Linus Torvalds
1 parent
0c692d0784
Exists in
master
and in
16 other branches
kernel: use lockless list for smp_call_function_single
Make smp_call_function_single and friends more efficient by using a lockless list. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Jan Kara <jack@suse.cz> Cc: Jens Axboe <axboe@kernel.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 19 additions and 43 deletions Side-by-side Diff
include/linux/blkdev.h
include/linux/smp.h
... | ... | @@ -11,12 +11,16 @@ |
11 | 11 | #include <linux/list.h> |
12 | 12 | #include <linux/cpumask.h> |
13 | 13 | #include <linux/init.h> |
14 | +#include <linux/llist.h> | |
14 | 15 | |
15 | 16 | extern void cpu_idle(void); |
16 | 17 | |
17 | 18 | typedef void (*smp_call_func_t)(void *info); |
18 | 19 | struct call_single_data { |
19 | - struct list_head list; | |
20 | + union { | |
21 | + struct list_head list; | |
22 | + struct llist_node llist; | |
23 | + }; | |
20 | 24 | smp_call_func_t func; |
21 | 25 | void *info; |
22 | 26 | u16 flags; |
kernel/smp.c
... | ... | @@ -28,13 +28,8 @@ |
28 | 28 | |
29 | 29 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data); |
30 | 30 | |
31 | -struct call_single_queue { | |
32 | - struct list_head list; | |
33 | - raw_spinlock_t lock; | |
34 | -}; | |
31 | +static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue); | |
35 | 32 | |
36 | -static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_queue, call_single_queue); | |
37 | - | |
38 | 33 | static int |
39 | 34 | hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) |
40 | 35 | { |
41 | 36 | |
... | ... | @@ -85,13 +80,9 @@ |
85 | 80 | void *cpu = (void *)(long)smp_processor_id(); |
86 | 81 | int i; |
87 | 82 | |
88 | - for_each_possible_cpu(i) { | |
89 | - struct call_single_queue *q = &per_cpu(call_single_queue, i); | |
83 | + for_each_possible_cpu(i) | |
84 | + init_llist_head(&per_cpu(call_single_queue, i)); | |
90 | 85 | |
91 | - raw_spin_lock_init(&q->lock); | |
92 | - INIT_LIST_HEAD(&q->list); | |
93 | - } | |
94 | - | |
95 | 86 | hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); |
96 | 87 | register_cpu_notifier(&hotplug_cfd_notifier); |
97 | 88 | } |
98 | 89 | |
... | ... | @@ -141,18 +132,9 @@ |
141 | 132 | */ |
142 | 133 | static void generic_exec_single(int cpu, struct call_single_data *csd, int wait) |
143 | 134 | { |
144 | - struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); | |
145 | - unsigned long flags; | |
146 | - int ipi; | |
147 | - | |
148 | 135 | if (wait) |
149 | 136 | csd->flags |= CSD_FLAG_WAIT; |
150 | 137 | |
151 | - raw_spin_lock_irqsave(&dst->lock, flags); | |
152 | - ipi = list_empty(&dst->list); | |
153 | - list_add_tail(&csd->list, &dst->list); | |
154 | - raw_spin_unlock_irqrestore(&dst->lock, flags); | |
155 | - | |
156 | 138 | /* |
157 | 139 | * The list addition should be visible before sending the IPI |
158 | 140 | * handler locks the list to pull the entry off it because of |
... | ... | @@ -164,7 +146,7 @@ |
164 | 146 | * locking and barrier primitives. Generic code isn't really |
165 | 147 | * equipped to do the right thing... |
166 | 148 | */ |
167 | - if (ipi) | |
149 | + if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) | |
168 | 150 | arch_send_call_function_single_ipi(cpu); |
169 | 151 | |
170 | 152 | if (wait) |
171 | 153 | |
172 | 154 | |
173 | 155 | |
174 | 156 | |
175 | 157 | |
176 | 158 | |
... | ... | @@ -177,27 +159,26 @@ |
177 | 159 | */ |
178 | 160 | void generic_smp_call_function_single_interrupt(void) |
179 | 161 | { |
180 | - struct call_single_queue *q = &__get_cpu_var(call_single_queue); | |
181 | - LIST_HEAD(list); | |
162 | + struct llist_node *entry, *next; | |
182 | 163 | |
183 | 164 | /* |
184 | 165 | * Shouldn't receive this interrupt on a cpu that is not yet online. |
185 | 166 | */ |
186 | 167 | WARN_ON_ONCE(!cpu_online(smp_processor_id())); |
187 | 168 | |
188 | - raw_spin_lock(&q->lock); | |
189 | - list_replace_init(&q->list, &list); | |
190 | - raw_spin_unlock(&q->lock); | |
169 | + entry = llist_del_all(&__get_cpu_var(call_single_queue)); | |
170 | + entry = llist_reverse_order(entry); | |
191 | 171 | |
192 | - while (!list_empty(&list)) { | |
172 | + while (entry) { | |
193 | 173 | struct call_single_data *csd; |
194 | 174 | |
195 | - csd = list_entry(list.next, struct call_single_data, list); | |
196 | - list_del(&csd->list); | |
175 | + next = entry->next; | |
197 | 176 | |
177 | + csd = llist_entry(entry, struct call_single_data, llist); | |
198 | 178 | csd->func(csd->info); |
199 | - | |
200 | 179 | csd_unlock(csd); |
180 | + | |
181 | + entry = next; | |
201 | 182 | } |
202 | 183 | } |
203 | 184 | |
204 | 185 | |
... | ... | @@ -411,17 +392,11 @@ |
411 | 392 | |
412 | 393 | for_each_cpu(cpu, cfd->cpumask) { |
413 | 394 | struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu); |
414 | - struct call_single_queue *dst = | |
415 | - &per_cpu(call_single_queue, cpu); | |
416 | - unsigned long flags; | |
417 | 395 | |
418 | 396 | csd_lock(csd); |
419 | 397 | csd->func = func; |
420 | 398 | csd->info = info; |
421 | - | |
422 | - raw_spin_lock_irqsave(&dst->lock, flags); | |
423 | - list_add_tail(&csd->list, &dst->list); | |
424 | - raw_spin_unlock_irqrestore(&dst->lock, flags); | |
399 | + llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)); | |
425 | 400 | } |
426 | 401 | |
427 | 402 | /* Send a message to all CPUs in the map */ |