Commit f2c66cd8eeddedb440f33bc0f5cec1ed7ae376cb
Committed by
Linus Torvalds
1 parent
19cd56c48d
Exists in
master
and in
7 other branches
/proc/stat: scalability of irq num per cpu
/proc/stat shows the total number of all interrupts to each cpu. But when the number of IRQs are very large, it take very long time and 'cat /proc/stat' takes more than 10 secs. This is because sum of all irq events are counted when /proc/stat is read. This patch adds "sum of all irq" counter percpu and reduce read costs. The cost of reading /proc/stat is important because it's used by major applications as 'top', 'ps', 'w', etc.... A test on a mechin (4096cpu, 256 nodes, 4592 irqs) shows %time cat /proc/stat > /dev/null Before Patch: 12.627 sec After Patch: 2.459 sec Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Tested-by: Jack Steiner <steiner@sgi.com> Acked-by: Jack Steiner <steiner@sgi.com> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 2 changed files with 13 additions and 5 deletions Side-by-side Diff
fs/proc/stat.c
... | ... | @@ -52,9 +52,7 @@ |
52 | 52 | guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); |
53 | 53 | guest_nice = cputime64_add(guest_nice, |
54 | 54 | kstat_cpu(i).cpustat.guest_nice); |
55 | - for_each_irq_nr(j) { | |
56 | - sum += kstat_irqs_cpu(j, i); | |
57 | - } | |
55 | + sum += kstat_cpu_irqs_sum(i); | |
58 | 56 | sum += arch_irq_stat_cpu(i); |
59 | 57 | |
60 | 58 | for (j = 0; j < NR_SOFTIRQS; j++) { |
include/linux/kernel_stat.h
... | ... | @@ -33,6 +33,7 @@ |
33 | 33 | #ifndef CONFIG_GENERIC_HARDIRQS |
34 | 34 | unsigned int irqs[NR_IRQS]; |
35 | 35 | #endif |
36 | + unsigned long irqs_sum; | |
36 | 37 | unsigned int softirqs[NR_SOFTIRQS]; |
37 | 38 | }; |
38 | 39 | |
... | ... | @@ -54,6 +55,7 @@ |
54 | 55 | struct irq_desc *desc) |
55 | 56 | { |
56 | 57 | kstat_this_cpu.irqs[irq]++; |
58 | + kstat_this_cpu.irqs_sum++; | |
57 | 59 | } |
58 | 60 | |
59 | 61 | static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) |
... | ... | @@ -65,8 +67,9 @@ |
65 | 67 | extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); |
66 | 68 | #define kstat_irqs_this_cpu(DESC) \ |
67 | 69 | ((DESC)->kstat_irqs[smp_processor_id()]) |
68 | -#define kstat_incr_irqs_this_cpu(irqno, DESC) \ | |
69 | - ((DESC)->kstat_irqs[smp_processor_id()]++) | |
70 | +#define kstat_incr_irqs_this_cpu(irqno, DESC) do {\ | |
71 | + ((DESC)->kstat_irqs[smp_processor_id()]++);\ | |
72 | + kstat_this_cpu.irqs_sum++; } while (0) | |
70 | 73 | |
71 | 74 | #endif |
72 | 75 | |
... | ... | @@ -94,6 +97,13 @@ |
94 | 97 | return sum; |
95 | 98 | } |
96 | 99 | |
100 | +/* | |
101 | + * Number of interrupts per cpu, since bootup | |
102 | + */ | |
103 | +static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) | |
104 | +{ | |
105 | + return kstat_cpu(cpu).irqs_sum; | |
106 | +} | |
97 | 107 | |
98 | 108 | /* |
99 | 109 | * Lock/unlock the current runqueue - to extract task statistics: |