Commit 1f5a5b87f78fade3ae48dfd55e8765d1d622ea4e

Authored by Thomas Gleixner
1 parent 1318a481fc

genirq: Implement a sane sparse_irq allocator

The current sparse_irq allocator has several short comings due to
failures in the design or the lack of it:

 - Requires iteration over the number of active irqs to find a free slot
   (Some architectures have grown their own workarounds for this)
 - Removal of entries is not possible
 - Racy between create_irq_nr and destroy_irq (plugged by horrible
   callbacks)
 - Migration of active irq descriptors is not possible
 - No bulk allocation of irq ranges
 - Sprinkeled irq_desc references all over the place outside of kernel/irq/
   (The previous chip functions series is addressing this issue)

Implement a sane allocator which fixes the above short comings (though
migration of active descriptors needs a full tree wide cleanup of the
direct and mostly unlocked access to irq_desc).

The new allocator still uses a radix_tree, but uses a bitmap for
keeping track of allocated irq numbers. That allows:

 - Fast lookup of a free slot
 - Allows the removal of descriptors
 - Prevents the create/destroy race
 - Bulk allocation of consecutive irq ranges
 - Basic design is ready for migration of life descriptors after
   further cleanups

The bitmap is also used in the SPARSE_IRQ=n case for lookup and
raceless (de)allocation of irq numbers. So it removes the requirement
for looping through the descriptor array to find slots.

Right now it uses sparse_irq_lock to protect the bitmap and the radix
tree, but after cleaning up all users we should be able convert that
to a mutex and to switch the radix_tree and decriptor allocations to
GFP_KERNEL.

[ Folded in a bugfix from Yinghai Lu ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>

Showing 2 changed files with 246 additions and 8 deletions Side-by-side Diff

... ... @@ -398,6 +398,29 @@
398 398 }
399 399 #endif
400 400  
  401 +int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node);
  402 +void irq_free_descs(unsigned int irq, unsigned int cnt);
  403 +
  404 +static inline int irq_alloc_desc(int node)
  405 +{
  406 + return irq_alloc_descs(-1, 0, 1, node);
  407 +}
  408 +
  409 +static inline int irq_alloc_desc_at(unsigned int at, int node)
  410 +{
  411 + return irq_alloc_descs(at, at, 1, node);
  412 +}
  413 +
  414 +static inline int irq_alloc_desc_from(unsigned int from, int node)
  415 +{
  416 + return irq_alloc_descs(-1, from, 1, node);
  417 +}
  418 +
  419 +static inline void irq_free_desc(unsigned int irq)
  420 +{
  421 + irq_free_descs(irq, 1);
  422 +}
  423 +
401 424 #endif /* CONFIG_GENERIC_HARDIRQS */
402 425  
403 426 #endif /* !CONFIG_S390 */
kernel/irq/irqdesc.c
... ... @@ -13,6 +13,7 @@
13 13 #include <linux/interrupt.h>
14 14 #include <linux/kernel_stat.h>
15 15 #include <linux/radix-tree.h>
  16 +#include <linux/bitmap.h>
16 17  
17 18 #include "internals.h"
18 19  
19 20  
... ... @@ -33,9 +34,54 @@
33 34 }
34 35 #endif
35 36  
  37 +#ifdef CONFIG_SMP
  38 +static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
  39 +{
  40 + if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node))
  41 + return -ENOMEM;
  42 +
  43 +#ifdef CONFIG_GENERIC_PENDING_IRQ
  44 + if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
  45 + free_cpumask_var(desc->irq_data.affinity);
  46 + return -ENOMEM;
  47 + }
  48 +#endif
  49 + return 0;
  50 +}
  51 +
  52 +static void desc_smp_init(struct irq_desc *desc, int node)
  53 +{
  54 + desc->node = node;
  55 + cpumask_copy(desc->irq_data.affinity, irq_default_affinity);
  56 +}
  57 +
  58 +#else
  59 +static inline int
  60 +alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { return 0; }
  61 +static inline void desc_smp_init(struct irq_desc *desc, int node) { }
  62 +#endif
  63 +
  64 +static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
  65 +{
  66 + desc->irq_data.irq = irq;
  67 + desc->irq_data.chip = &no_irq_chip;
  68 + desc->irq_data.chip_data = NULL;
  69 + desc->irq_data.handler_data = NULL;
  70 + desc->irq_data.msi_desc = NULL;
  71 + desc->status = IRQ_DEFAULT_INIT_FLAGS;
  72 + desc->handle_irq = handle_bad_irq;
  73 + desc->depth = 1;
  74 + desc->name = NULL;
  75 + memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
  76 + desc_smp_init(desc, node);
  77 +}
  78 +
36 79 int nr_irqs = NR_IRQS;
37 80 EXPORT_SYMBOL_GPL(nr_irqs);
38 81  
  82 +DEFINE_RAW_SPINLOCK(sparse_irq_lock);
  83 +static DECLARE_BITMAP(allocated_irqs, NR_IRQS);
  84 +
39 85 #ifdef CONFIG_SPARSE_IRQ
40 86  
41 87 static struct irq_desc irq_desc_init = {
42 88  
... ... @@ -85,14 +131,9 @@
85 131 arch_init_chip_data(desc, node);
86 132 }
87 133  
88   -/*
89   - * Protect the sparse_irqs:
90   - */
91   -DEFINE_RAW_SPINLOCK(sparse_irq_lock);
92   -
93 134 static RADIX_TREE(irq_desc_tree, GFP_ATOMIC);
94 135  
95   -static void set_irq_desc(unsigned int irq, struct irq_desc *desc)
  136 +static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
96 137 {
97 138 radix_tree_insert(&irq_desc_tree, irq, desc);
98 139 }
... ... @@ -111,6 +152,94 @@
111 152 radix_tree_replace_slot(ptr, desc);
112 153 }
113 154  
  155 +static void delete_irq_desc(unsigned int irq)
  156 +{
  157 + radix_tree_delete(&irq_desc_tree, irq);
  158 +}
  159 +
  160 +#ifdef CONFIG_SMP
  161 +static void free_masks(struct irq_desc *desc)
  162 +{
  163 +#ifdef CONFIG_GENERIC_PENDING_IRQ
  164 + free_cpumask_var(desc->pending_mask);
  165 +#endif
  166 + free_cpumask_var(desc->affinity);
  167 +}
  168 +#else
  169 +static inline void free_masks(struct irq_desc *desc) { }
  170 +#endif
  171 +
  172 +static struct irq_desc *alloc_desc(int irq, int node)
  173 +{
  174 + struct irq_desc *desc;
  175 + gfp_t gfp = GFP_KERNEL;
  176 +
  177 + desc = kzalloc_node(sizeof(*desc), gfp, node);
  178 + if (!desc)
  179 + return NULL;
  180 + /* allocate based on nr_cpu_ids */
  181 + desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs),
  182 + gfp, node);
  183 + if (!desc->kstat_irqs)
  184 + goto err_desc;
  185 +
  186 + if (alloc_masks(desc, gfp, node))
  187 + goto err_kstat;
  188 +
  189 + raw_spin_lock_init(&desc->lock);
  190 + lockdep_set_class(&desc->lock, &irq_desc_lock_class);
  191 +
  192 + desc_set_defaults(irq, desc, node);
  193 +
  194 + return desc;
  195 +
  196 +err_kstat:
  197 + kfree(desc->kstat_irqs);
  198 +err_desc:
  199 + kfree(desc);
  200 + return NULL;
  201 +}
  202 +
  203 +static void free_desc(unsigned int irq)
  204 +{
  205 + struct irq_desc *desc = irq_to_desc(irq);
  206 + unsigned long flags;
  207 +
  208 + raw_spin_lock_irqsave(&sparse_irq_lock, flags);
  209 + delete_irq_desc(irq);
  210 + raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
  211 +
  212 + free_masks(desc);
  213 + kfree(desc->kstat_irqs);
  214 + kfree(desc);
  215 +}
  216 +
  217 +static int alloc_descs(unsigned int start, unsigned int cnt, int node)
  218 +{
  219 + struct irq_desc *desc;
  220 + unsigned long flags;
  221 + int i;
  222 +
  223 + for (i = 0; i < cnt; i++) {
  224 + desc = alloc_desc(start + i, node);
  225 + if (!desc)
  226 + goto err;
  227 + raw_spin_lock_irqsave(&sparse_irq_lock, flags);
  228 + irq_insert_desc(start + i, desc);
  229 + raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
  230 + }
  231 + return start;
  232 +
  233 +err:
  234 + for (i--; i >= 0; i--)
  235 + free_desc(start + i);
  236 +
  237 + raw_spin_lock_irqsave(&sparse_irq_lock, flags);
  238 + bitmap_clear(allocated_irqs, start, cnt);
  239 + raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
  240 + return -ENOMEM;
  241 +}
  242 +
114 243 static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
115 244 [0 ... NR_IRQS_LEGACY-1] = {
116 245 .status = IRQ_DEFAULT_INIT_FLAGS,
... ... @@ -155,7 +284,7 @@
155 284 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
156 285 alloc_desc_masks(&desc[i], node, true);
157 286 init_desc_masks(&desc[i]);
158   - set_irq_desc(i, &desc[i]);
  287 + irq_insert_desc(i, &desc[i]);
159 288 }
160 289  
161 290 return arch_early_irq_init();
... ... @@ -192,7 +321,7 @@
192 321 }
193 322 init_one_irq_desc(irq, desc, node);
194 323  
195   - set_irq_desc(irq, desc);
  324 + irq_insert_desc(irq, desc);
196 325  
197 326 out_unlock:
198 327 raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
199 328  
... ... @@ -245,8 +374,94 @@
245 374 {
246 375 return irq_to_desc(irq);
247 376 }
  377 +
  378 +#ifdef CONFIG_SMP
  379 +static inline int desc_node(struct irq_desc *desc)
  380 +{
  381 + return desc->irq_data.node;
  382 +}
  383 +#else
  384 +static inline int desc_node(struct irq_desc *desc) { return 0; }
  385 +#endif
  386 +
  387 +static void free_desc(unsigned int irq)
  388 +{
  389 + struct irq_desc *desc = irq_to_desc(irq);
  390 + unsigned long flags;
  391 +
  392 + raw_spin_lock_irqsave(&desc->lock, flags);
  393 + desc_set_defaults(irq, desc, desc_node(desc));
  394 + raw_spin_unlock_irqrestore(&desc->lock, flags);
  395 +}
  396 +
  397 +static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
  398 +{
  399 + return start;
  400 +}
248 401 #endif /* !CONFIG_SPARSE_IRQ */
249 402  
  403 +/* Dynamic interrupt handling */
  404 +
  405 +/**
  406 + * irq_free_descs - free irq descriptors
  407 + * @from: Start of descriptor range
  408 + * @cnt: Number of consecutive irqs to free
  409 + */
  410 +void irq_free_descs(unsigned int from, unsigned int cnt)
  411 +{
  412 + unsigned long flags;
  413 + int i;
  414 +
  415 + if (from >= nr_irqs || (from + cnt) > nr_irqs)
  416 + return;
  417 +
  418 + for (i = 0; i < cnt; i++)
  419 + free_desc(from + i);
  420 +
  421 + raw_spin_lock_irqsave(&sparse_irq_lock, flags);
  422 + bitmap_clear(allocated_irqs, from, cnt);
  423 + raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
  424 +}
  425 +
  426 +/**
  427 + * irq_alloc_descs - allocate and initialize a range of irq descriptors
  428 + * @irq: Allocate for specific irq number if irq >= 0
  429 + * @from: Start the search from this irq number
  430 + * @cnt: Number of consecutive irqs to allocate.
  431 + * @node: Preferred node on which the irq descriptor should be allocated
  432 + *
  433 + * Returns the first irq number or error code
  434 + */
  435 +int __ref
  436 +irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
  437 +{
  438 + unsigned long flags;
  439 + int start, ret;
  440 +
  441 + if (!cnt)
  442 + return -EINVAL;
  443 +
  444 + raw_spin_lock_irqsave(&sparse_irq_lock, flags);
  445 +
  446 + start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0);
  447 + ret = -EEXIST;
  448 + if (irq >=0 && start != irq)
  449 + goto err;
  450 +
  451 + ret = -ENOMEM;
  452 + if (start >= nr_irqs)
  453 + goto err;
  454 +
  455 + bitmap_set(allocated_irqs, start, cnt);
  456 + raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
  457 + return alloc_descs(start, cnt, node);
  458 +
  459 +err:
  460 + raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
  461 + return ret;
  462 +}
  463 +
  464 +/* Statistics access */
250 465 void clear_kstat_irqs(struct irq_desc *desc)
251 466 {
252 467 memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));