Commit 5f1a8c1bc724498ff32acbd59ed5263275676b9d

Authored by Steffen Klassert
Committed by Herbert Xu
1 parent 83f619f3c8

padata: simplify serialization mechanism

We count the number of processed objects on a percpu basis,
so we need to go through all the percpu reorder queues to calculate
the sequence number of the next object that needs serialization.
This patch changes this to count the number of processed objects
global. So we can calculate the sequence number and the percpu
reorder queue of the next object that needs serialization without
searching through the percpu reorder queues. This avoids some
accesses to memory of foreign cpus.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Showing 2 changed files with 22 additions and 55 deletions Side-by-side Diff

include/linux/padata.h
... ... @@ -67,7 +67,6 @@
67 67 * @pwork: work struct for parallelization.
68 68 * @swork: work struct for serialization.
69 69 * @pd: Backpointer to the internal control structure.
70   - * @num_obj: Number of objects that are processed by this cpu.
71 70 * @cpu_index: Index of the cpu.
72 71 */
73 72 struct padata_queue {
... ... @@ -77,7 +76,6 @@
77 76 struct work_struct pwork;
78 77 struct work_struct swork;
79 78 struct parallel_data *pd;
80   - atomic_t num_obj;
81 79 int cpu_index;
82 80 };
83 81  
... ... @@ -93,6 +91,7 @@
93 91 * @max_seq_nr: Maximal used sequence number.
94 92 * @cpumask: cpumask in use.
95 93 * @lock: Reorder lock.
  94 + * @processed: Number of already processed objects.
96 95 * @timer: Reorder timer.
97 96 */
98 97 struct parallel_data {
... ... @@ -103,7 +102,8 @@
103 102 atomic_t refcnt;
104 103 unsigned int max_seq_nr;
105 104 cpumask_var_t cpumask;
106   - spinlock_t lock;
  105 + spinlock_t lock ____cacheline_aligned;
  106 + unsigned int processed;
107 107 struct timer_list timer;
108 108 };
109 109  
... ... @@ -170,79 +170,47 @@
170 170 */
171 171 static struct padata_priv *padata_get_next(struct parallel_data *pd)
172 172 {
173   - int cpu, num_cpus, empty, calc_seq_nr;
174   - int seq_nr, next_nr, overrun, next_overrun;
  173 + int cpu, num_cpus;
  174 + int next_nr, next_index;
175 175 struct padata_queue *queue, *next_queue;
176 176 struct padata_priv *padata;
177 177 struct padata_list *reorder;
178 178  
179   - empty = 0;
180   - next_nr = -1;
181   - next_overrun = 0;
182   - next_queue = NULL;
183   -
184 179 num_cpus = cpumask_weight(pd->cpumask);
185 180  
186   - for_each_cpu(cpu, pd->cpumask) {
187   - queue = per_cpu_ptr(pd->queue, cpu);
188   - reorder = &queue->reorder;
  181 + /*
  182 + * Calculate the percpu reorder queue and the sequence
  183 + * number of the next object.
  184 + */
  185 + next_nr = pd->processed;
  186 + next_index = next_nr % num_cpus;
  187 + cpu = padata_index_to_cpu(pd, next_index);
  188 + next_queue = per_cpu_ptr(pd->queue, cpu);
189 189  
190   - /*
191   - * Calculate the seq_nr of the object that should be
192   - * next in this reorder queue.
193   - */
194   - overrun = 0;
195   - calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
196   - + queue->cpu_index;
197   -
198   - if (unlikely(calc_seq_nr > pd->max_seq_nr)) {
199   - calc_seq_nr = calc_seq_nr - pd->max_seq_nr - 1;
200   - overrun = 1;
201   - }
202   -
203   - if (!list_empty(&reorder->list)) {
204   - padata = list_entry(reorder->list.next,
205   - struct padata_priv, list);
206   -
207   - seq_nr = padata->seq_nr;
208   - BUG_ON(calc_seq_nr != seq_nr);
209   - } else {
210   - seq_nr = calc_seq_nr;
211   - empty++;
212   - }
213   -
214   - if (next_nr < 0 || seq_nr < next_nr
215   - || (next_overrun && !overrun)) {
216   - next_nr = seq_nr;
217   - next_overrun = overrun;
218   - next_queue = queue;
219   - }
  190 + if (unlikely(next_nr > pd->max_seq_nr)) {
  191 + next_nr = next_nr - pd->max_seq_nr - 1;
  192 + next_index = next_nr % num_cpus;
  193 + cpu = padata_index_to_cpu(pd, next_index);
  194 + next_queue = per_cpu_ptr(pd->queue, cpu);
  195 + pd->processed = 0;
220 196 }
221 197  
222 198 padata = NULL;
223 199  
224   - if (empty == num_cpus)
225   - goto out;
226   -
227 200 reorder = &next_queue->reorder;
228 201  
229 202 if (!list_empty(&reorder->list)) {
230 203 padata = list_entry(reorder->list.next,
231 204 struct padata_priv, list);
232 205  
233   - if (unlikely(next_overrun)) {
234   - for_each_cpu(cpu, pd->cpumask) {
235   - queue = per_cpu_ptr(pd->queue, cpu);
236   - atomic_set(&queue->num_obj, 0);
237   - }
238   - }
  206 + BUG_ON(next_nr != padata->seq_nr);
239 207  
240 208 spin_lock(&reorder->lock);
241 209 list_del_init(&padata->list);
242 210 atomic_dec(&pd->reorder_objects);
243 211 spin_unlock(&reorder->lock);
244 212  
245   - atomic_inc(&next_queue->num_obj);
  213 + pd->processed++;
246 214  
247 215 goto out;
248 216 }
... ... @@ -430,7 +398,6 @@
430 398  
431 399 INIT_WORK(&queue->pwork, padata_parallel_worker);
432 400 INIT_WORK(&queue->swork, padata_serial_worker);
433   - atomic_set(&queue->num_obj, 0);
434 401 }
435 402  
436 403 num_cpus = cpumask_weight(pd->cpumask);