Commit 5666a3de7ab455e889cdcecd2128bc316f842df3

Authored by Thomas Gleixner
Committed by Greg Kroah-Hartman
1 parent 7177a581fe

genirq: Prevent proc race against freeing of irq descriptors

commit c291ee622165cb2c8d4e7af63fffd499354a23be upstream.

Since the rework of the sparse interrupt code to actually free the
unused interrupt descriptors there exists a race between the /proc
interfaces to the irq subsystem and the code which frees the interrupt
descriptor.

CPU0				CPU1
				show_interrupts()
				  desc = irq_to_desc(X);
free_desc(desc)
  remove_from_radix_tree();
  kfree(desc);
				  raw_spinlock_irq(&desc->lock);

/proc/interrupts is the only interface which can actively corrupt
kernel memory via the lock access. /proc/stat can only read from freed
memory. Extremly hard to trigger, but possible.

The interfaces in /proc/irq/N/ are not affected by this because the
removal of the proc file is serialized in procfs against concurrent
readers/writers. The removal happens before the descriptor is freed.

For architectures which have CONFIG_SPARSE_IRQ=n this is a non issue
as the descriptor is never freed. It's merely cleared out with the irq
descriptor lock held. So any concurrent proc access will either see
the old correct value or the cleared out ones.

Protect the lookup and access to the irq descriptor in
show_interrupts() with the sparse_irq_lock.

Provide kstat_irqs_usr() which is protecting the lookup and access
with sparse_irq_lock and switch /proc/stat to use it.

Document the existing kstat_irqs interfaces so it's clear that the
caller needs to take care about protection. The users of these
interfaces are either not affected due to SPARSE_IRQ=n or already
protected against removal.

Fixes: 1f5a5b87f78f "genirq: Implement a sane sparse_irq allocator"
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Showing 5 changed files with 83 additions and 2 deletions Side-by-side Diff

... ... @@ -159,7 +159,7 @@
159 159  
160 160 /* sum again ? it could be updated? */
161 161 for_each_irq_nr(j)
162   - seq_put_decimal_ull(p, ' ', kstat_irqs(j));
  162 + seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j));
163 163  
164 164 seq_printf(p,
165 165 "\nctxt %llu\n"
include/linux/kernel_stat.h
... ... @@ -74,6 +74,7 @@
74 74 * Number of interrupts per specific IRQ source, since bootup
75 75 */
76 76 extern unsigned int kstat_irqs(unsigned int irq);
  77 +extern unsigned int kstat_irqs_usr(unsigned int irq);
77 78  
78 79 /*
79 80 * Number of interrupts per cpu, since bootup
kernel/irq/internals.h
... ... @@ -74,6 +74,14 @@
74 74 extern void mask_irq(struct irq_desc *desc);
75 75 extern void unmask_irq(struct irq_desc *desc);
76 76  
  77 +#ifdef CONFIG_SPARSE_IRQ
  78 +extern void irq_lock_sparse(void);
  79 +extern void irq_unlock_sparse(void);
  80 +#else
  81 +static inline void irq_lock_sparse(void) { }
  82 +static inline void irq_unlock_sparse(void) { }
  83 +#endif
  84 +
77 85 extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
78 86  
79 87 irqreturn_t handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action);
kernel/irq/irqdesc.c
... ... @@ -131,6 +131,16 @@
131 131 static inline void free_masks(struct irq_desc *desc) { }
132 132 #endif
133 133  
  134 +void irq_lock_sparse(void)
  135 +{
  136 + mutex_lock(&sparse_irq_lock);
  137 +}
  138 +
  139 +void irq_unlock_sparse(void)
  140 +{
  141 + mutex_unlock(&sparse_irq_lock);
  142 +}
  143 +
134 144 static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
135 145 {
136 146 struct irq_desc *desc;
... ... @@ -167,6 +177,12 @@
167 177  
168 178 unregister_irq_proc(irq, desc);
169 179  
  180 + /*
  181 + * sparse_irq_lock protects also show_interrupts() and
  182 + * kstat_irq_usr(). Once we deleted the descriptor from the
  183 + * sparse tree we can free it. Access in proc will fail to
  184 + * lookup the descriptor.
  185 + */
170 186 mutex_lock(&sparse_irq_lock);
171 187 delete_irq_desc(irq);
172 188 mutex_unlock(&sparse_irq_lock);
... ... @@ -489,6 +505,15 @@
489 505 raw_spin_unlock_irqrestore(&desc->lock, flags);
490 506 }
491 507  
  508 +/**
  509 + * kstat_irqs_cpu - Get the statistics for an interrupt on a cpu
  510 + * @irq: The interrupt number
  511 + * @cpu: The cpu number
  512 + *
  513 + * Returns the sum of interrupt counts on @cpu since boot for
  514 + * @irq. The caller must ensure that the interrupt is not removed
  515 + * concurrently.
  516 + */
492 517 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
493 518 {
494 519 struct irq_desc *desc = irq_to_desc(irq);
... ... @@ -497,6 +522,14 @@
497 522 *per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
498 523 }
499 524  
  525 +/**
  526 + * kstat_irqs - Get the statistics for an interrupt
  527 + * @irq: The interrupt number
  528 + *
  529 + * Returns the sum of interrupt counts on all cpus since boot for
  530 + * @irq. The caller must ensure that the interrupt is not removed
  531 + * concurrently.
  532 + */
500 533 unsigned int kstat_irqs(unsigned int irq)
501 534 {
502 535 struct irq_desc *desc = irq_to_desc(irq);
... ... @@ -507,6 +540,25 @@
507 540 return 0;
508 541 for_each_possible_cpu(cpu)
509 542 sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
  543 + return sum;
  544 +}
  545 +
  546 +/**
  547 + * kstat_irqs_usr - Get the statistics for an interrupt
  548 + * @irq: The interrupt number
  549 + *
  550 + * Returns the sum of interrupt counts on all cpus since boot for
  551 + * @irq. Contrary to kstat_irqs() this can be called from any
  552 + * preemptible context. It's protected against concurrent removal of
  553 + * an interrupt descriptor when sparse irqs are enabled.
  554 + */
  555 +unsigned int kstat_irqs_usr(unsigned int irq)
  556 +{
  557 + int sum;
  558 +
  559 + irq_lock_sparse();
  560 + sum = kstat_irqs(irq);
  561 + irq_unlock_sparse();
510 562 return sum;
511 563 }
... ... @@ -15,6 +15,23 @@
15 15  
16 16 #include "internals.h"
17 17  
  18 +/*
  19 + * Access rules:
  20 + *
  21 + * procfs protects read/write of /proc/irq/N/ files against a
  22 + * concurrent free of the interrupt descriptor. remove_proc_entry()
  23 + * immediately prevents new read/writes to happen and waits for
  24 + * already running read/write functions to complete.
  25 + *
  26 + * We remove the proc entries first and then delete the interrupt
  27 + * descriptor from the radix tree and free it. So it is guaranteed
  28 + * that irq_to_desc(N) is valid as long as the read/writes are
  29 + * permitted by procfs.
  30 + *
  31 + * The read from /proc/interrupts is a different problem because there
  32 + * is no protection. So the lookup and the access to irqdesc
  33 + * information must be protected by sparse_irq_lock.
  34 + */
18 35 static struct proc_dir_entry *root_irq_dir;
19 36  
20 37 #ifdef CONFIG_SMP
21 38  
... ... @@ -437,9 +454,10 @@
437 454 seq_putc(p, '\n');
438 455 }
439 456  
  457 + irq_lock_sparse();
440 458 desc = irq_to_desc(i);
441 459 if (!desc)
442   - return 0;
  460 + goto outsparse;
443 461  
444 462 raw_spin_lock_irqsave(&desc->lock, flags);
445 463 for_each_online_cpu(j)
... ... @@ -479,6 +497,8 @@
479 497 seq_putc(p, '\n');
480 498 out:
481 499 raw_spin_unlock_irqrestore(&desc->lock, flags);
  500 +outsparse:
  501 + irq_unlock_sparse();
482 502 return 0;
483 503 }
484 504 #endif