Commit c291ee622165cb2c8d4e7af63fffd499354a23be

Authored by Thomas Gleixner
1 parent 3a5dc1fafb

genirq: Prevent proc race against freeing of irq descriptors

Since the rework of the sparse interrupt code to actually free the
unused interrupt descriptors there exists a race between the /proc
interfaces to the irq subsystem and the code which frees the interrupt
descriptor.

CPU0				CPU1
				show_interrupts()
				  desc = irq_to_desc(X);
free_desc(desc)
  remove_from_radix_tree();
  kfree(desc);
				  raw_spinlock_irq(&desc->lock);

/proc/interrupts is the only interface which can actively corrupt
kernel memory via the lock access. /proc/stat can only read from freed
memory. Extremly hard to trigger, but possible.

The interfaces in /proc/irq/N/ are not affected by this because the
removal of the proc file is serialized in procfs against concurrent
readers/writers. The removal happens before the descriptor is freed.

For architectures which have CONFIG_SPARSE_IRQ=n this is a non issue
as the descriptor is never freed. It's merely cleared out with the irq
descriptor lock held. So any concurrent proc access will either see
the old correct value or the cleared out ones.

Protect the lookup and access to the irq descriptor in
show_interrupts() with the sparse_irq_lock.

Provide kstat_irqs_usr() which is protecting the lookup and access
with sparse_irq_lock and switch /proc/stat to use it.

Document the existing kstat_irqs interfaces so it's clear that the
caller needs to take care about protection. The users of these
interfaces are either not affected due to SPARSE_IRQ=n or already
protected against removal.

Fixes: 1f5a5b87f78f "genirq: Implement a sane sparse_irq allocator"
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org

Showing 5 changed files with 79 additions and 2 deletions Side-by-side Diff

... ... @@ -159,7 +159,7 @@
159 159  
160 160 /* sum again ? it could be updated? */
161 161 for_each_irq_nr(j)
162   - seq_put_decimal_ull(p, ' ', kstat_irqs(j));
  162 + seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j));
163 163  
164 164 seq_printf(p,
165 165 "\nctxt %llu\n"
include/linux/kernel_stat.h
... ... @@ -68,6 +68,7 @@
68 68 * Number of interrupts per specific IRQ source, since bootup
69 69 */
70 70 extern unsigned int kstat_irqs(unsigned int irq);
  71 +extern unsigned int kstat_irqs_usr(unsigned int irq);
71 72  
72 73 /*
73 74 * Number of interrupts per cpu, since bootup
kernel/irq/internals.h
... ... @@ -78,8 +78,12 @@
78 78  
79 79 #ifdef CONFIG_SPARSE_IRQ
80 80 static inline void irq_mark_irq(unsigned int irq) { }
  81 +extern void irq_lock_sparse(void);
  82 +extern void irq_unlock_sparse(void);
81 83 #else
82 84 extern void irq_mark_irq(unsigned int irq);
  85 +static inline void irq_lock_sparse(void) { }
  86 +static inline void irq_unlock_sparse(void) { }
83 87 #endif
84 88  
85 89 extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
kernel/irq/irqdesc.c
... ... @@ -132,6 +132,16 @@
132 132 static inline void free_masks(struct irq_desc *desc) { }
133 133 #endif
134 134  
  135 +void irq_lock_sparse(void)
  136 +{
  137 + mutex_lock(&sparse_irq_lock);
  138 +}
  139 +
  140 +void irq_unlock_sparse(void)
  141 +{
  142 + mutex_unlock(&sparse_irq_lock);
  143 +}
  144 +
135 145 static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
136 146 {
137 147 struct irq_desc *desc;
... ... @@ -168,6 +178,12 @@
168 178  
169 179 unregister_irq_proc(irq, desc);
170 180  
  181 + /*
  182 + * sparse_irq_lock protects also show_interrupts() and
  183 + * kstat_irq_usr(). Once we deleted the descriptor from the
  184 + * sparse tree we can free it. Access in proc will fail to
  185 + * lookup the descriptor.
  186 + */
171 187 mutex_lock(&sparse_irq_lock);
172 188 delete_irq_desc(irq);
173 189 mutex_unlock(&sparse_irq_lock);
... ... @@ -574,6 +590,15 @@
574 590 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
575 591 }
576 592  
  593 +/**
  594 + * kstat_irqs_cpu - Get the statistics for an interrupt on a cpu
  595 + * @irq: The interrupt number
  596 + * @cpu: The cpu number
  597 + *
  598 + * Returns the sum of interrupt counts on @cpu since boot for
  599 + * @irq. The caller must ensure that the interrupt is not removed
  600 + * concurrently.
  601 + */
577 602 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
578 603 {
579 604 struct irq_desc *desc = irq_to_desc(irq);
... ... @@ -582,6 +607,14 @@
582 607 *per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
583 608 }
584 609  
  610 +/**
  611 + * kstat_irqs - Get the statistics for an interrupt
  612 + * @irq: The interrupt number
  613 + *
  614 + * Returns the sum of interrupt counts on all cpus since boot for
  615 + * @irq. The caller must ensure that the interrupt is not removed
  616 + * concurrently.
  617 + */
585 618 unsigned int kstat_irqs(unsigned int irq)
586 619 {
587 620 struct irq_desc *desc = irq_to_desc(irq);
... ... @@ -592,6 +625,25 @@
592 625 return 0;
593 626 for_each_possible_cpu(cpu)
594 627 sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
  628 + return sum;
  629 +}
  630 +
  631 +/**
  632 + * kstat_irqs_usr - Get the statistics for an interrupt
  633 + * @irq: The interrupt number
  634 + *
  635 + * Returns the sum of interrupt counts on all cpus since boot for
  636 + * @irq. Contrary to kstat_irqs() this can be called from any
  637 + * preemptible context. It's protected against concurrent removal of
  638 + * an interrupt descriptor when sparse irqs are enabled.
  639 + */
  640 +unsigned int kstat_irqs_usr(unsigned int irq)
  641 +{
  642 + int sum;
  643 +
  644 + irq_lock_sparse();
  645 + sum = kstat_irqs(irq);
  646 + irq_unlock_sparse();
595 647 return sum;
596 648 }
... ... @@ -15,6 +15,23 @@
15 15  
16 16 #include "internals.h"
17 17  
  18 +/*
  19 + * Access rules:
  20 + *
  21 + * procfs protects read/write of /proc/irq/N/ files against a
  22 + * concurrent free of the interrupt descriptor. remove_proc_entry()
  23 + * immediately prevents new read/writes to happen and waits for
  24 + * already running read/write functions to complete.
  25 + *
  26 + * We remove the proc entries first and then delete the interrupt
  27 + * descriptor from the radix tree and free it. So it is guaranteed
  28 + * that irq_to_desc(N) is valid as long as the read/writes are
  29 + * permitted by procfs.
  30 + *
  31 + * The read from /proc/interrupts is a different problem because there
  32 + * is no protection. So the lookup and the access to irqdesc
  33 + * information must be protected by sparse_irq_lock.
  34 + */
18 35 static struct proc_dir_entry *root_irq_dir;
19 36  
20 37 #ifdef CONFIG_SMP
21 38  
... ... @@ -437,9 +454,10 @@
437 454 seq_putc(p, '\n');
438 455 }
439 456  
  457 + irq_lock_sparse();
440 458 desc = irq_to_desc(i);
441 459 if (!desc)
442   - return 0;
  460 + goto outsparse;
443 461  
444 462 raw_spin_lock_irqsave(&desc->lock, flags);
445 463 for_each_online_cpu(j)
... ... @@ -479,6 +497,8 @@
479 497 seq_putc(p, '\n');
480 498 out:
481 499 raw_spin_unlock_irqrestore(&desc->lock, flags);
  500 +outsparse:
  501 + irq_unlock_sparse();
482 502 return 0;
483 503 }
484 504 #endif