Commit 1f5a5b87f78fade3ae48dfd55e8765d1d622ea4e
1 parent
1318a481fc
Exists in
master
and in
20 other branches
genirq: Implement a sane sparse_irq allocator
The current sparse_irq allocator has several short comings due to failures in the design or the lack of it: - Requires iteration over the number of active irqs to find a free slot (Some architectures have grown their own workarounds for this) - Removal of entries is not possible - Racy between create_irq_nr and destroy_irq (plugged by horrible callbacks) - Migration of active irq descriptors is not possible - No bulk allocation of irq ranges - Sprinkeled irq_desc references all over the place outside of kernel/irq/ (The previous chip functions series is addressing this issue) Implement a sane allocator which fixes the above short comings (though migration of active descriptors needs a full tree wide cleanup of the direct and mostly unlocked access to irq_desc). The new allocator still uses a radix_tree, but uses a bitmap for keeping track of allocated irq numbers. That allows: - Fast lookup of a free slot - Allows the removal of descriptors - Prevents the create/destroy race - Bulk allocation of consecutive irq ranges - Basic design is ready for migration of life descriptors after further cleanups The bitmap is also used in the SPARSE_IRQ=n case for lookup and raceless (de)allocation of irq numbers. So it removes the requirement for looping through the descriptor array to find slots. Right now it uses sparse_irq_lock to protect the bitmap and the radix tree, but after cleaning up all users we should be able convert that to a mutex and to switch the radix_tree and decriptor allocations to GFP_KERNEL. [ Folded in a bugfix from Yinghai Lu ] Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Ingo Molnar <mingo@elte.hu>
Showing 2 changed files with 246 additions and 8 deletions Side-by-side Diff
include/linux/irq.h
... | ... | @@ -398,6 +398,29 @@ |
398 | 398 | } |
399 | 399 | #endif |
400 | 400 | |
401 | +int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node); | |
402 | +void irq_free_descs(unsigned int irq, unsigned int cnt); | |
403 | + | |
404 | +static inline int irq_alloc_desc(int node) | |
405 | +{ | |
406 | + return irq_alloc_descs(-1, 0, 1, node); | |
407 | +} | |
408 | + | |
409 | +static inline int irq_alloc_desc_at(unsigned int at, int node) | |
410 | +{ | |
411 | + return irq_alloc_descs(at, at, 1, node); | |
412 | +} | |
413 | + | |
414 | +static inline int irq_alloc_desc_from(unsigned int from, int node) | |
415 | +{ | |
416 | + return irq_alloc_descs(-1, from, 1, node); | |
417 | +} | |
418 | + | |
419 | +static inline void irq_free_desc(unsigned int irq) | |
420 | +{ | |
421 | + irq_free_descs(irq, 1); | |
422 | +} | |
423 | + | |
401 | 424 | #endif /* CONFIG_GENERIC_HARDIRQS */ |
402 | 425 | |
403 | 426 | #endif /* !CONFIG_S390 */ |
kernel/irq/irqdesc.c
... | ... | @@ -13,6 +13,7 @@ |
13 | 13 | #include <linux/interrupt.h> |
14 | 14 | #include <linux/kernel_stat.h> |
15 | 15 | #include <linux/radix-tree.h> |
16 | +#include <linux/bitmap.h> | |
16 | 17 | |
17 | 18 | #include "internals.h" |
18 | 19 | |
19 | 20 | |
... | ... | @@ -33,9 +34,54 @@ |
33 | 34 | } |
34 | 35 | #endif |
35 | 36 | |
37 | +#ifdef CONFIG_SMP | |
38 | +static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) | |
39 | +{ | |
40 | + if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node)) | |
41 | + return -ENOMEM; | |
42 | + | |
43 | +#ifdef CONFIG_GENERIC_PENDING_IRQ | |
44 | + if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { | |
45 | + free_cpumask_var(desc->irq_data.affinity); | |
46 | + return -ENOMEM; | |
47 | + } | |
48 | +#endif | |
49 | + return 0; | |
50 | +} | |
51 | + | |
52 | +static void desc_smp_init(struct irq_desc *desc, int node) | |
53 | +{ | |
54 | + desc->node = node; | |
55 | + cpumask_copy(desc->irq_data.affinity, irq_default_affinity); | |
56 | +} | |
57 | + | |
58 | +#else | |
59 | +static inline int | |
60 | +alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { return 0; } | |
61 | +static inline void desc_smp_init(struct irq_desc *desc, int node) { } | |
62 | +#endif | |
63 | + | |
64 | +static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | |
65 | +{ | |
66 | + desc->irq_data.irq = irq; | |
67 | + desc->irq_data.chip = &no_irq_chip; | |
68 | + desc->irq_data.chip_data = NULL; | |
69 | + desc->irq_data.handler_data = NULL; | |
70 | + desc->irq_data.msi_desc = NULL; | |
71 | + desc->status = IRQ_DEFAULT_INIT_FLAGS; | |
72 | + desc->handle_irq = handle_bad_irq; | |
73 | + desc->depth = 1; | |
74 | + desc->name = NULL; | |
75 | + memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); | |
76 | + desc_smp_init(desc, node); | |
77 | +} | |
78 | + | |
36 | 79 | int nr_irqs = NR_IRQS; |
37 | 80 | EXPORT_SYMBOL_GPL(nr_irqs); |
38 | 81 | |
82 | +DEFINE_RAW_SPINLOCK(sparse_irq_lock); | |
83 | +static DECLARE_BITMAP(allocated_irqs, NR_IRQS); | |
84 | + | |
39 | 85 | #ifdef CONFIG_SPARSE_IRQ |
40 | 86 | |
41 | 87 | static struct irq_desc irq_desc_init = { |
42 | 88 | |
... | ... | @@ -85,14 +131,9 @@ |
85 | 131 | arch_init_chip_data(desc, node); |
86 | 132 | } |
87 | 133 | |
88 | -/* | |
89 | - * Protect the sparse_irqs: | |
90 | - */ | |
91 | -DEFINE_RAW_SPINLOCK(sparse_irq_lock); | |
92 | - | |
93 | 134 | static RADIX_TREE(irq_desc_tree, GFP_ATOMIC); |
94 | 135 | |
95 | -static void set_irq_desc(unsigned int irq, struct irq_desc *desc) | |
136 | +static void irq_insert_desc(unsigned int irq, struct irq_desc *desc) | |
96 | 137 | { |
97 | 138 | radix_tree_insert(&irq_desc_tree, irq, desc); |
98 | 139 | } |
... | ... | @@ -111,6 +152,94 @@ |
111 | 152 | radix_tree_replace_slot(ptr, desc); |
112 | 153 | } |
113 | 154 | |
155 | +static void delete_irq_desc(unsigned int irq) | |
156 | +{ | |
157 | + radix_tree_delete(&irq_desc_tree, irq); | |
158 | +} | |
159 | + | |
160 | +#ifdef CONFIG_SMP | |
161 | +static void free_masks(struct irq_desc *desc) | |
162 | +{ | |
163 | +#ifdef CONFIG_GENERIC_PENDING_IRQ | |
164 | + free_cpumask_var(desc->pending_mask); | |
165 | +#endif | |
166 | + free_cpumask_var(desc->affinity); | |
167 | +} | |
168 | +#else | |
169 | +static inline void free_masks(struct irq_desc *desc) { } | |
170 | +#endif | |
171 | + | |
172 | +static struct irq_desc *alloc_desc(int irq, int node) | |
173 | +{ | |
174 | + struct irq_desc *desc; | |
175 | + gfp_t gfp = GFP_KERNEL; | |
176 | + | |
177 | + desc = kzalloc_node(sizeof(*desc), gfp, node); | |
178 | + if (!desc) | |
179 | + return NULL; | |
180 | + /* allocate based on nr_cpu_ids */ | |
181 | + desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs), | |
182 | + gfp, node); | |
183 | + if (!desc->kstat_irqs) | |
184 | + goto err_desc; | |
185 | + | |
186 | + if (alloc_masks(desc, gfp, node)) | |
187 | + goto err_kstat; | |
188 | + | |
189 | + raw_spin_lock_init(&desc->lock); | |
190 | + lockdep_set_class(&desc->lock, &irq_desc_lock_class); | |
191 | + | |
192 | + desc_set_defaults(irq, desc, node); | |
193 | + | |
194 | + return desc; | |
195 | + | |
196 | +err_kstat: | |
197 | + kfree(desc->kstat_irqs); | |
198 | +err_desc: | |
199 | + kfree(desc); | |
200 | + return NULL; | |
201 | +} | |
202 | + | |
203 | +static void free_desc(unsigned int irq) | |
204 | +{ | |
205 | + struct irq_desc *desc = irq_to_desc(irq); | |
206 | + unsigned long flags; | |
207 | + | |
208 | + raw_spin_lock_irqsave(&sparse_irq_lock, flags); | |
209 | + delete_irq_desc(irq); | |
210 | + raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | |
211 | + | |
212 | + free_masks(desc); | |
213 | + kfree(desc->kstat_irqs); | |
214 | + kfree(desc); | |
215 | +} | |
216 | + | |
217 | +static int alloc_descs(unsigned int start, unsigned int cnt, int node) | |
218 | +{ | |
219 | + struct irq_desc *desc; | |
220 | + unsigned long flags; | |
221 | + int i; | |
222 | + | |
223 | + for (i = 0; i < cnt; i++) { | |
224 | + desc = alloc_desc(start + i, node); | |
225 | + if (!desc) | |
226 | + goto err; | |
227 | + raw_spin_lock_irqsave(&sparse_irq_lock, flags); | |
228 | + irq_insert_desc(start + i, desc); | |
229 | + raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | |
230 | + } | |
231 | + return start; | |
232 | + | |
233 | +err: | |
234 | + for (i--; i >= 0; i--) | |
235 | + free_desc(start + i); | |
236 | + | |
237 | + raw_spin_lock_irqsave(&sparse_irq_lock, flags); | |
238 | + bitmap_clear(allocated_irqs, start, cnt); | |
239 | + raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | |
240 | + return -ENOMEM; | |
241 | +} | |
242 | + | |
114 | 243 | static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = { |
115 | 244 | [0 ... NR_IRQS_LEGACY-1] = { |
116 | 245 | .status = IRQ_DEFAULT_INIT_FLAGS, |
... | ... | @@ -155,7 +284,7 @@ |
155 | 284 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); |
156 | 285 | alloc_desc_masks(&desc[i], node, true); |
157 | 286 | init_desc_masks(&desc[i]); |
158 | - set_irq_desc(i, &desc[i]); | |
287 | + irq_insert_desc(i, &desc[i]); | |
159 | 288 | } |
160 | 289 | |
161 | 290 | return arch_early_irq_init(); |
... | ... | @@ -192,7 +321,7 @@ |
192 | 321 | } |
193 | 322 | init_one_irq_desc(irq, desc, node); |
194 | 323 | |
195 | - set_irq_desc(irq, desc); | |
324 | + irq_insert_desc(irq, desc); | |
196 | 325 | |
197 | 326 | out_unlock: |
198 | 327 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); |
199 | 328 | |
... | ... | @@ -245,8 +374,94 @@ |
245 | 374 | { |
246 | 375 | return irq_to_desc(irq); |
247 | 376 | } |
377 | + | |
378 | +#ifdef CONFIG_SMP | |
379 | +static inline int desc_node(struct irq_desc *desc) | |
380 | +{ | |
381 | + return desc->irq_data.node; | |
382 | +} | |
383 | +#else | |
384 | +static inline int desc_node(struct irq_desc *desc) { return 0; } | |
385 | +#endif | |
386 | + | |
387 | +static void free_desc(unsigned int irq) | |
388 | +{ | |
389 | + struct irq_desc *desc = irq_to_desc(irq); | |
390 | + unsigned long flags; | |
391 | + | |
392 | + raw_spin_lock_irqsave(&desc->lock, flags); | |
393 | + desc_set_defaults(irq, desc, desc_node(desc)); | |
394 | + raw_spin_unlock_irqrestore(&desc->lock, flags); | |
395 | +} | |
396 | + | |
397 | +static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) | |
398 | +{ | |
399 | + return start; | |
400 | +} | |
248 | 401 | #endif /* !CONFIG_SPARSE_IRQ */ |
249 | 402 | |
403 | +/* Dynamic interrupt handling */ | |
404 | + | |
405 | +/** | |
406 | + * irq_free_descs - free irq descriptors | |
407 | + * @from: Start of descriptor range | |
408 | + * @cnt: Number of consecutive irqs to free | |
409 | + */ | |
410 | +void irq_free_descs(unsigned int from, unsigned int cnt) | |
411 | +{ | |
412 | + unsigned long flags; | |
413 | + int i; | |
414 | + | |
415 | + if (from >= nr_irqs || (from + cnt) > nr_irqs) | |
416 | + return; | |
417 | + | |
418 | + for (i = 0; i < cnt; i++) | |
419 | + free_desc(from + i); | |
420 | + | |
421 | + raw_spin_lock_irqsave(&sparse_irq_lock, flags); | |
422 | + bitmap_clear(allocated_irqs, from, cnt); | |
423 | + raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | |
424 | +} | |
425 | + | |
426 | +/** | |
427 | + * irq_alloc_descs - allocate and initialize a range of irq descriptors | |
428 | + * @irq: Allocate for specific irq number if irq >= 0 | |
429 | + * @from: Start the search from this irq number | |
430 | + * @cnt: Number of consecutive irqs to allocate. | |
431 | + * @node: Preferred node on which the irq descriptor should be allocated | |
432 | + * | |
433 | + * Returns the first irq number or error code | |
434 | + */ | |
435 | +int __ref | |
436 | +irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) | |
437 | +{ | |
438 | + unsigned long flags; | |
439 | + int start, ret; | |
440 | + | |
441 | + if (!cnt) | |
442 | + return -EINVAL; | |
443 | + | |
444 | + raw_spin_lock_irqsave(&sparse_irq_lock, flags); | |
445 | + | |
446 | + start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0); | |
447 | + ret = -EEXIST; | |
448 | + if (irq >=0 && start != irq) | |
449 | + goto err; | |
450 | + | |
451 | + ret = -ENOMEM; | |
452 | + if (start >= nr_irqs) | |
453 | + goto err; | |
454 | + | |
455 | + bitmap_set(allocated_irqs, start, cnt); | |
456 | + raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | |
457 | + return alloc_descs(start, cnt, node); | |
458 | + | |
459 | +err: | |
460 | + raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | |
461 | + return ret; | |
462 | +} | |
463 | + | |
464 | +/* Statistics access */ | |
250 | 465 | void clear_kstat_irqs(struct irq_desc *desc) |
251 | 466 | { |
252 | 467 | memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); |
-
mentioned in commit 5666a3
-
mentioned in commit 5666a3
-
mentioned in commit 5666a3
-
mentioned in commit c291ee
-
mentioned in commit c291ee
-
mentioned in commit c291ee
-
mentioned in commit c291ee
-
mentioned in commit c291ee
-
mentioned in commit c291ee
-
mentioned in commit c291ee
-
mentioned in commit c291ee
-
mentioned in commit c291ee
-
mentioned in commit c291ee
-
mentioned in commit c291ee
-
mentioned in commit c291ee
-
mentioned in commit c291ee