Commit 5f1a8c1bc724498ff32acbd59ed5263275676b9d
Committed by
Herbert Xu
1 parent
83f619f3c8
Exists in
master
and in
20 other branches
padata: simplify serialization mechanism
We count the number of processed objects on a percpu basis, so we need to go through all the percpu reorder queues to calculate the sequence number of the next object that needs serialization. This patch changes this to count the number of processed objects global. So we can calculate the sequence number and the percpu reorder queue of the next object that needs serialization without searching through the percpu reorder queues. This avoids some accesses to memory of foreign cpus. Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Showing 2 changed files with 22 additions and 55 deletions Side-by-side Diff
include/linux/padata.h
... | ... | @@ -67,7 +67,6 @@ |
67 | 67 | * @pwork: work struct for parallelization. |
68 | 68 | * @swork: work struct for serialization. |
69 | 69 | * @pd: Backpointer to the internal control structure. |
70 | - * @num_obj: Number of objects that are processed by this cpu. | |
71 | 70 | * @cpu_index: Index of the cpu. |
72 | 71 | */ |
73 | 72 | struct padata_queue { |
... | ... | @@ -77,7 +76,6 @@ |
77 | 76 | struct work_struct pwork; |
78 | 77 | struct work_struct swork; |
79 | 78 | struct parallel_data *pd; |
80 | - atomic_t num_obj; | |
81 | 79 | int cpu_index; |
82 | 80 | }; |
83 | 81 | |
... | ... | @@ -93,6 +91,7 @@ |
93 | 91 | * @max_seq_nr: Maximal used sequence number. |
94 | 92 | * @cpumask: cpumask in use. |
95 | 93 | * @lock: Reorder lock. |
94 | + * @processed: Number of already processed objects. | |
96 | 95 | * @timer: Reorder timer. |
97 | 96 | */ |
98 | 97 | struct parallel_data { |
... | ... | @@ -103,7 +102,8 @@ |
103 | 102 | atomic_t refcnt; |
104 | 103 | unsigned int max_seq_nr; |
105 | 104 | cpumask_var_t cpumask; |
106 | - spinlock_t lock; | |
105 | + spinlock_t lock ____cacheline_aligned; | |
106 | + unsigned int processed; | |
107 | 107 | struct timer_list timer; |
108 | 108 | }; |
109 | 109 |
kernel/padata.c
... | ... | @@ -170,79 +170,47 @@ |
170 | 170 | */ |
171 | 171 | static struct padata_priv *padata_get_next(struct parallel_data *pd) |
172 | 172 | { |
173 | - int cpu, num_cpus, empty, calc_seq_nr; | |
174 | - int seq_nr, next_nr, overrun, next_overrun; | |
173 | + int cpu, num_cpus; | |
174 | + int next_nr, next_index; | |
175 | 175 | struct padata_queue *queue, *next_queue; |
176 | 176 | struct padata_priv *padata; |
177 | 177 | struct padata_list *reorder; |
178 | 178 | |
179 | - empty = 0; | |
180 | - next_nr = -1; | |
181 | - next_overrun = 0; | |
182 | - next_queue = NULL; | |
183 | - | |
184 | 179 | num_cpus = cpumask_weight(pd->cpumask); |
185 | 180 | |
186 | - for_each_cpu(cpu, pd->cpumask) { | |
187 | - queue = per_cpu_ptr(pd->queue, cpu); | |
188 | - reorder = &queue->reorder; | |
181 | + /* | |
182 | + * Calculate the percpu reorder queue and the sequence | |
183 | + * number of the next object. | |
184 | + */ | |
185 | + next_nr = pd->processed; | |
186 | + next_index = next_nr % num_cpus; | |
187 | + cpu = padata_index_to_cpu(pd, next_index); | |
188 | + next_queue = per_cpu_ptr(pd->queue, cpu); | |
189 | 189 | |
190 | - /* | |
191 | - * Calculate the seq_nr of the object that should be | |
192 | - * next in this reorder queue. | |
193 | - */ | |
194 | - overrun = 0; | |
195 | - calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus) | |
196 | - + queue->cpu_index; | |
197 | - | |
198 | - if (unlikely(calc_seq_nr > pd->max_seq_nr)) { | |
199 | - calc_seq_nr = calc_seq_nr - pd->max_seq_nr - 1; | |
200 | - overrun = 1; | |
201 | - } | |
202 | - | |
203 | - if (!list_empty(&reorder->list)) { | |
204 | - padata = list_entry(reorder->list.next, | |
205 | - struct padata_priv, list); | |
206 | - | |
207 | - seq_nr = padata->seq_nr; | |
208 | - BUG_ON(calc_seq_nr != seq_nr); | |
209 | - } else { | |
210 | - seq_nr = calc_seq_nr; | |
211 | - empty++; | |
212 | - } | |
213 | - | |
214 | - if (next_nr < 0 || seq_nr < next_nr | |
215 | - || (next_overrun && !overrun)) { | |
216 | - next_nr = seq_nr; | |
217 | - next_overrun = overrun; | |
218 | - next_queue = queue; | |
219 | - } | |
190 | + if (unlikely(next_nr > pd->max_seq_nr)) { | |
191 | + next_nr = next_nr - pd->max_seq_nr - 1; | |
192 | + next_index = next_nr % num_cpus; | |
193 | + cpu = padata_index_to_cpu(pd, next_index); | |
194 | + next_queue = per_cpu_ptr(pd->queue, cpu); | |
195 | + pd->processed = 0; | |
220 | 196 | } |
221 | 197 | |
222 | 198 | padata = NULL; |
223 | 199 | |
224 | - if (empty == num_cpus) | |
225 | - goto out; | |
226 | - | |
227 | 200 | reorder = &next_queue->reorder; |
228 | 201 | |
229 | 202 | if (!list_empty(&reorder->list)) { |
230 | 203 | padata = list_entry(reorder->list.next, |
231 | 204 | struct padata_priv, list); |
232 | 205 | |
233 | - if (unlikely(next_overrun)) { | |
234 | - for_each_cpu(cpu, pd->cpumask) { | |
235 | - queue = per_cpu_ptr(pd->queue, cpu); | |
236 | - atomic_set(&queue->num_obj, 0); | |
237 | - } | |
238 | - } | |
206 | + BUG_ON(next_nr != padata->seq_nr); | |
239 | 207 | |
240 | 208 | spin_lock(&reorder->lock); |
241 | 209 | list_del_init(&padata->list); |
242 | 210 | atomic_dec(&pd->reorder_objects); |
243 | 211 | spin_unlock(&reorder->lock); |
244 | 212 | |
245 | - atomic_inc(&next_queue->num_obj); | |
213 | + pd->processed++; | |
246 | 214 | |
247 | 215 | goto out; |
248 | 216 | } |
... | ... | @@ -430,7 +398,6 @@ |
430 | 398 | |
431 | 399 | INIT_WORK(&queue->pwork, padata_parallel_worker); |
432 | 400 | INIT_WORK(&queue->swork, padata_serial_worker); |
433 | - atomic_set(&queue->num_obj, 0); | |
434 | 401 | } |
435 | 402 | |
436 | 403 | num_cpus = cpumask_weight(pd->cpumask); |