Commit 4305df947ca1fd52867c8d56837a4e6b1e33167c

Authored by Thomas Gleixner
1 parent 020dd984d7

x86: i8259: Convert to new irq_chip functions

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>

Showing 5 changed files with 47 additions and 44 deletions Inline Diff

arch/x86/include/asm/i8259.h
1 #ifndef _ASM_X86_I8259_H 1 #ifndef _ASM_X86_I8259_H
2 #define _ASM_X86_I8259_H 2 #define _ASM_X86_I8259_H
3 3
4 #include <linux/delay.h> 4 #include <linux/delay.h>
5 5
6 extern unsigned int cached_irq_mask; 6 extern unsigned int cached_irq_mask;
7 7
8 #define __byte(x, y) (((unsigned char *)&(y))[x]) 8 #define __byte(x, y) (((unsigned char *)&(y))[x])
9 #define cached_master_mask (__byte(0, cached_irq_mask)) 9 #define cached_master_mask (__byte(0, cached_irq_mask))
10 #define cached_slave_mask (__byte(1, cached_irq_mask)) 10 #define cached_slave_mask (__byte(1, cached_irq_mask))
11 11
12 /* i8259A PIC registers */ 12 /* i8259A PIC registers */
13 #define PIC_MASTER_CMD 0x20 13 #define PIC_MASTER_CMD 0x20
14 #define PIC_MASTER_IMR 0x21 14 #define PIC_MASTER_IMR 0x21
15 #define PIC_MASTER_ISR PIC_MASTER_CMD 15 #define PIC_MASTER_ISR PIC_MASTER_CMD
16 #define PIC_MASTER_POLL PIC_MASTER_ISR 16 #define PIC_MASTER_POLL PIC_MASTER_ISR
17 #define PIC_MASTER_OCW3 PIC_MASTER_ISR 17 #define PIC_MASTER_OCW3 PIC_MASTER_ISR
18 #define PIC_SLAVE_CMD 0xa0 18 #define PIC_SLAVE_CMD 0xa0
19 #define PIC_SLAVE_IMR 0xa1 19 #define PIC_SLAVE_IMR 0xa1
20 20
21 /* i8259A PIC related value */ 21 /* i8259A PIC related value */
22 #define PIC_CASCADE_IR 2 22 #define PIC_CASCADE_IR 2
23 #define MASTER_ICW4_DEFAULT 0x01 23 #define MASTER_ICW4_DEFAULT 0x01
24 #define SLAVE_ICW4_DEFAULT 0x01 24 #define SLAVE_ICW4_DEFAULT 0x01
25 #define PIC_ICW4_AEOI 2 25 #define PIC_ICW4_AEOI 2
26 26
27 extern raw_spinlock_t i8259A_lock; 27 extern raw_spinlock_t i8259A_lock;
28 28
29 /* the PIC may need a careful delay on some platforms, hence specific calls */ 29 /* the PIC may need a careful delay on some platforms, hence specific calls */
30 static inline unsigned char inb_pic(unsigned int port) 30 static inline unsigned char inb_pic(unsigned int port)
31 { 31 {
32 unsigned char value = inb(port); 32 unsigned char value = inb(port);
33 33
34 /* 34 /*
35 * delay for some accesses to PIC on motherboard or in chipset 35 * delay for some accesses to PIC on motherboard or in chipset
36 * must be at least one microsecond, so be safe here: 36 * must be at least one microsecond, so be safe here:
37 */ 37 */
38 udelay(2); 38 udelay(2);
39 39
40 return value; 40 return value;
41 } 41 }
42 42
43 static inline void outb_pic(unsigned char value, unsigned int port) 43 static inline void outb_pic(unsigned char value, unsigned int port)
44 { 44 {
45 outb(value, port); 45 outb(value, port);
46 /* 46 /*
47 * delay for some accesses to PIC on motherboard or in chipset 47 * delay for some accesses to PIC on motherboard or in chipset
48 * must be at least one microsecond, so be safe here: 48 * must be at least one microsecond, so be safe here:
49 */ 49 */
50 udelay(2); 50 udelay(2);
51 } 51 }
52 52
53 extern struct irq_chip i8259A_chip; 53 extern struct irq_chip i8259A_chip;
54 54
55 struct legacy_pic { 55 struct legacy_pic {
56 int nr_legacy_irqs; 56 int nr_legacy_irqs;
57 struct irq_chip *chip; 57 struct irq_chip *chip;
58 void (*mask)(unsigned int irq);
59 void (*unmask)(unsigned int irq);
58 void (*mask_all)(void); 60 void (*mask_all)(void);
59 void (*restore_mask)(void); 61 void (*restore_mask)(void);
60 void (*init)(int auto_eoi); 62 void (*init)(int auto_eoi);
61 int (*irq_pending)(unsigned int irq); 63 int (*irq_pending)(unsigned int irq);
62 void (*make_irq)(unsigned int irq); 64 void (*make_irq)(unsigned int irq);
63 }; 65 };
64 66
65 extern struct legacy_pic *legacy_pic; 67 extern struct legacy_pic *legacy_pic;
66 extern struct legacy_pic null_legacy_pic; 68 extern struct legacy_pic null_legacy_pic;
67 69
68 #endif /* _ASM_X86_I8259_H */ 70 #endif /* _ASM_X86_I8259_H */
69 71
arch/x86/kernel/apic/io_apic.c
1 /* 1 /*
2 * Intel IO-APIC support for multi-Pentium hosts. 2 * Intel IO-APIC support for multi-Pentium hosts.
3 * 3 *
4 * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo 4 * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
5 * 5 *
6 * Many thanks to Stig Venaas for trying out countless experimental 6 * Many thanks to Stig Venaas for trying out countless experimental
7 * patches and reporting/debugging problems patiently! 7 * patches and reporting/debugging problems patiently!
8 * 8 *
9 * (c) 1999, Multiple IO-APIC support, developed by 9 * (c) 1999, Multiple IO-APIC support, developed by
10 * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and 10 * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
11 * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>, 11 * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
12 * further tested and cleaned up by Zach Brown <zab@redhat.com> 12 * further tested and cleaned up by Zach Brown <zab@redhat.com>
13 * and Ingo Molnar <mingo@redhat.com> 13 * and Ingo Molnar <mingo@redhat.com>
14 * 14 *
15 * Fixes 15 * Fixes
16 * Maciej W. Rozycki : Bits for genuine 82489DX APICs; 16 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
17 * thanks to Eric Gilmore 17 * thanks to Eric Gilmore
18 * and Rolf G. Tews 18 * and Rolf G. Tews
19 * for testing these extensively 19 * for testing these extensively
20 * Paul Diefenbaugh : Added full ACPI support 20 * Paul Diefenbaugh : Added full ACPI support
21 */ 21 */
22 22
23 #include <linux/mm.h> 23 #include <linux/mm.h>
24 #include <linux/interrupt.h> 24 #include <linux/interrupt.h>
25 #include <linux/init.h> 25 #include <linux/init.h>
26 #include <linux/delay.h> 26 #include <linux/delay.h>
27 #include <linux/sched.h> 27 #include <linux/sched.h>
28 #include <linux/pci.h> 28 #include <linux/pci.h>
29 #include <linux/mc146818rtc.h> 29 #include <linux/mc146818rtc.h>
30 #include <linux/compiler.h> 30 #include <linux/compiler.h>
31 #include <linux/acpi.h> 31 #include <linux/acpi.h>
32 #include <linux/module.h> 32 #include <linux/module.h>
33 #include <linux/sysdev.h> 33 #include <linux/sysdev.h>
34 #include <linux/msi.h> 34 #include <linux/msi.h>
35 #include <linux/htirq.h> 35 #include <linux/htirq.h>
36 #include <linux/freezer.h> 36 #include <linux/freezer.h>
37 #include <linux/kthread.h> 37 #include <linux/kthread.h>
38 #include <linux/jiffies.h> /* time_after() */ 38 #include <linux/jiffies.h> /* time_after() */
39 #include <linux/slab.h> 39 #include <linux/slab.h>
40 #ifdef CONFIG_ACPI 40 #ifdef CONFIG_ACPI
41 #include <acpi/acpi_bus.h> 41 #include <acpi/acpi_bus.h>
42 #endif 42 #endif
43 #include <linux/bootmem.h> 43 #include <linux/bootmem.h>
44 #include <linux/dmar.h> 44 #include <linux/dmar.h>
45 #include <linux/hpet.h> 45 #include <linux/hpet.h>
46 46
47 #include <asm/idle.h> 47 #include <asm/idle.h>
48 #include <asm/io.h> 48 #include <asm/io.h>
49 #include <asm/smp.h> 49 #include <asm/smp.h>
50 #include <asm/cpu.h> 50 #include <asm/cpu.h>
51 #include <asm/desc.h> 51 #include <asm/desc.h>
52 #include <asm/proto.h> 52 #include <asm/proto.h>
53 #include <asm/acpi.h> 53 #include <asm/acpi.h>
54 #include <asm/dma.h> 54 #include <asm/dma.h>
55 #include <asm/timer.h> 55 #include <asm/timer.h>
56 #include <asm/i8259.h> 56 #include <asm/i8259.h>
57 #include <asm/nmi.h> 57 #include <asm/nmi.h>
58 #include <asm/msidef.h> 58 #include <asm/msidef.h>
59 #include <asm/hypertransport.h> 59 #include <asm/hypertransport.h>
60 #include <asm/setup.h> 60 #include <asm/setup.h>
61 #include <asm/irq_remapping.h> 61 #include <asm/irq_remapping.h>
62 #include <asm/hpet.h> 62 #include <asm/hpet.h>
63 #include <asm/hw_irq.h> 63 #include <asm/hw_irq.h>
64 64
65 #include <asm/apic.h> 65 #include <asm/apic.h>
66 66
67 #define __apicdebuginit(type) static type __init 67 #define __apicdebuginit(type) static type __init
68 #define for_each_irq_pin(entry, head) \ 68 #define for_each_irq_pin(entry, head) \
69 for (entry = head; entry; entry = entry->next) 69 for (entry = head; entry; entry = entry->next)
70 70
71 /* 71 /*
72 * Is the SiS APIC rmw bug present ? 72 * Is the SiS APIC rmw bug present ?
73 * -1 = don't know, 0 = no, 1 = yes 73 * -1 = don't know, 0 = no, 1 = yes
74 */ 74 */
75 int sis_apic_bug = -1; 75 int sis_apic_bug = -1;
76 76
77 static DEFINE_RAW_SPINLOCK(ioapic_lock); 77 static DEFINE_RAW_SPINLOCK(ioapic_lock);
78 static DEFINE_RAW_SPINLOCK(vector_lock); 78 static DEFINE_RAW_SPINLOCK(vector_lock);
79 79
80 /* 80 /*
81 * # of IRQ routing registers 81 * # of IRQ routing registers
82 */ 82 */
83 int nr_ioapic_registers[MAX_IO_APICS]; 83 int nr_ioapic_registers[MAX_IO_APICS];
84 84
85 /* I/O APIC entries */ 85 /* I/O APIC entries */
86 struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; 86 struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
87 int nr_ioapics; 87 int nr_ioapics;
88 88
89 /* IO APIC gsi routing info */ 89 /* IO APIC gsi routing info */
90 struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; 90 struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS];
91 91
92 /* The one past the highest gsi number used */ 92 /* The one past the highest gsi number used */
93 u32 gsi_top; 93 u32 gsi_top;
94 94
95 /* MP IRQ source entries */ 95 /* MP IRQ source entries */
96 struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; 96 struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
97 97
98 /* # of MP IRQ source entries */ 98 /* # of MP IRQ source entries */
99 int mp_irq_entries; 99 int mp_irq_entries;
100 100
101 /* GSI interrupts */ 101 /* GSI interrupts */
102 static int nr_irqs_gsi = NR_IRQS_LEGACY; 102 static int nr_irqs_gsi = NR_IRQS_LEGACY;
103 103
104 #if defined (CONFIG_MCA) || defined (CONFIG_EISA) 104 #if defined (CONFIG_MCA) || defined (CONFIG_EISA)
105 int mp_bus_id_to_type[MAX_MP_BUSSES]; 105 int mp_bus_id_to_type[MAX_MP_BUSSES];
106 #endif 106 #endif
107 107
108 DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); 108 DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
109 109
110 int skip_ioapic_setup; 110 int skip_ioapic_setup;
111 111
112 void arch_disable_smp_support(void) 112 void arch_disable_smp_support(void)
113 { 113 {
114 #ifdef CONFIG_PCI 114 #ifdef CONFIG_PCI
115 noioapicquirk = 1; 115 noioapicquirk = 1;
116 noioapicreroute = -1; 116 noioapicreroute = -1;
117 #endif 117 #endif
118 skip_ioapic_setup = 1; 118 skip_ioapic_setup = 1;
119 } 119 }
120 120
121 static int __init parse_noapic(char *str) 121 static int __init parse_noapic(char *str)
122 { 122 {
123 /* disable IO-APIC */ 123 /* disable IO-APIC */
124 arch_disable_smp_support(); 124 arch_disable_smp_support();
125 return 0; 125 return 0;
126 } 126 }
127 early_param("noapic", parse_noapic); 127 early_param("noapic", parse_noapic);
128 128
129 struct irq_pin_list { 129 struct irq_pin_list {
130 int apic, pin; 130 int apic, pin;
131 struct irq_pin_list *next; 131 struct irq_pin_list *next;
132 }; 132 };
133 133
134 static struct irq_pin_list *get_one_free_irq_2_pin(int node) 134 static struct irq_pin_list *get_one_free_irq_2_pin(int node)
135 { 135 {
136 struct irq_pin_list *pin; 136 struct irq_pin_list *pin;
137 137
138 pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); 138 pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
139 139
140 return pin; 140 return pin;
141 } 141 }
142 142
143 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 143 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
144 #ifdef CONFIG_SPARSE_IRQ 144 #ifdef CONFIG_SPARSE_IRQ
145 static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; 145 static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
146 #else 146 #else
147 static struct irq_cfg irq_cfgx[NR_IRQS]; 147 static struct irq_cfg irq_cfgx[NR_IRQS];
148 #endif 148 #endif
149 149
150 int __init arch_early_irq_init(void) 150 int __init arch_early_irq_init(void)
151 { 151 {
152 struct irq_cfg *cfg; 152 struct irq_cfg *cfg;
153 struct irq_desc *desc; 153 struct irq_desc *desc;
154 int count; 154 int count;
155 int node; 155 int node;
156 int i; 156 int i;
157 157
158 if (!legacy_pic->nr_legacy_irqs) { 158 if (!legacy_pic->nr_legacy_irqs) {
159 nr_irqs_gsi = 0; 159 nr_irqs_gsi = 0;
160 io_apic_irqs = ~0UL; 160 io_apic_irqs = ~0UL;
161 } 161 }
162 162
163 cfg = irq_cfgx; 163 cfg = irq_cfgx;
164 count = ARRAY_SIZE(irq_cfgx); 164 count = ARRAY_SIZE(irq_cfgx);
165 node = cpu_to_node(0); 165 node = cpu_to_node(0);
166 166
167 for (i = 0; i < count; i++) { 167 for (i = 0; i < count; i++) {
168 desc = irq_to_desc(i); 168 desc = irq_to_desc(i);
169 desc->chip_data = &cfg[i]; 169 desc->chip_data = &cfg[i];
170 zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); 170 zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
171 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); 171 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
172 /* 172 /*
173 * For legacy IRQ's, start with assigning irq0 to irq15 to 173 * For legacy IRQ's, start with assigning irq0 to irq15 to
174 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. 174 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
175 */ 175 */
176 if (i < legacy_pic->nr_legacy_irqs) { 176 if (i < legacy_pic->nr_legacy_irqs) {
177 cfg[i].vector = IRQ0_VECTOR + i; 177 cfg[i].vector = IRQ0_VECTOR + i;
178 cpumask_set_cpu(0, cfg[i].domain); 178 cpumask_set_cpu(0, cfg[i].domain);
179 } 179 }
180 } 180 }
181 181
182 return 0; 182 return 0;
183 } 183 }
184 184
185 #ifdef CONFIG_SPARSE_IRQ 185 #ifdef CONFIG_SPARSE_IRQ
186 struct irq_cfg *irq_cfg(unsigned int irq) 186 struct irq_cfg *irq_cfg(unsigned int irq)
187 { 187 {
188 struct irq_cfg *cfg = NULL; 188 struct irq_cfg *cfg = NULL;
189 struct irq_desc *desc; 189 struct irq_desc *desc;
190 190
191 desc = irq_to_desc(irq); 191 desc = irq_to_desc(irq);
192 if (desc) 192 if (desc)
193 cfg = desc->chip_data; 193 cfg = desc->chip_data;
194 194
195 return cfg; 195 return cfg;
196 } 196 }
197 197
198 static struct irq_cfg *get_one_free_irq_cfg(int node) 198 static struct irq_cfg *get_one_free_irq_cfg(int node)
199 { 199 {
200 struct irq_cfg *cfg; 200 struct irq_cfg *cfg;
201 201
202 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); 202 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
203 if (cfg) { 203 if (cfg) {
204 if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { 204 if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
205 kfree(cfg); 205 kfree(cfg);
206 cfg = NULL; 206 cfg = NULL;
207 } else if (!zalloc_cpumask_var_node(&cfg->old_domain, 207 } else if (!zalloc_cpumask_var_node(&cfg->old_domain,
208 GFP_ATOMIC, node)) { 208 GFP_ATOMIC, node)) {
209 free_cpumask_var(cfg->domain); 209 free_cpumask_var(cfg->domain);
210 kfree(cfg); 210 kfree(cfg);
211 cfg = NULL; 211 cfg = NULL;
212 } 212 }
213 } 213 }
214 214
215 return cfg; 215 return cfg;
216 } 216 }
217 217
218 int arch_init_chip_data(struct irq_desc *desc, int node) 218 int arch_init_chip_data(struct irq_desc *desc, int node)
219 { 219 {
220 struct irq_cfg *cfg; 220 struct irq_cfg *cfg;
221 221
222 cfg = desc->chip_data; 222 cfg = desc->chip_data;
223 if (!cfg) { 223 if (!cfg) {
224 desc->chip_data = get_one_free_irq_cfg(node); 224 desc->chip_data = get_one_free_irq_cfg(node);
225 if (!desc->chip_data) { 225 if (!desc->chip_data) {
226 printk(KERN_ERR "can not alloc irq_cfg\n"); 226 printk(KERN_ERR "can not alloc irq_cfg\n");
227 BUG_ON(1); 227 BUG_ON(1);
228 } 228 }
229 } 229 }
230 230
231 return 0; 231 return 0;
232 } 232 }
233 233
234 /* for move_irq_desc */ 234 /* for move_irq_desc */
235 static void 235 static void
236 init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node) 236 init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
237 { 237 {
238 struct irq_pin_list *old_entry, *head, *tail, *entry; 238 struct irq_pin_list *old_entry, *head, *tail, *entry;
239 239
240 cfg->irq_2_pin = NULL; 240 cfg->irq_2_pin = NULL;
241 old_entry = old_cfg->irq_2_pin; 241 old_entry = old_cfg->irq_2_pin;
242 if (!old_entry) 242 if (!old_entry)
243 return; 243 return;
244 244
245 entry = get_one_free_irq_2_pin(node); 245 entry = get_one_free_irq_2_pin(node);
246 if (!entry) 246 if (!entry)
247 return; 247 return;
248 248
249 entry->apic = old_entry->apic; 249 entry->apic = old_entry->apic;
250 entry->pin = old_entry->pin; 250 entry->pin = old_entry->pin;
251 head = entry; 251 head = entry;
252 tail = entry; 252 tail = entry;
253 old_entry = old_entry->next; 253 old_entry = old_entry->next;
254 while (old_entry) { 254 while (old_entry) {
255 entry = get_one_free_irq_2_pin(node); 255 entry = get_one_free_irq_2_pin(node);
256 if (!entry) { 256 if (!entry) {
257 entry = head; 257 entry = head;
258 while (entry) { 258 while (entry) {
259 head = entry->next; 259 head = entry->next;
260 kfree(entry); 260 kfree(entry);
261 entry = head; 261 entry = head;
262 } 262 }
263 /* still use the old one */ 263 /* still use the old one */
264 return; 264 return;
265 } 265 }
266 entry->apic = old_entry->apic; 266 entry->apic = old_entry->apic;
267 entry->pin = old_entry->pin; 267 entry->pin = old_entry->pin;
268 tail->next = entry; 268 tail->next = entry;
269 tail = entry; 269 tail = entry;
270 old_entry = old_entry->next; 270 old_entry = old_entry->next;
271 } 271 }
272 272
273 tail->next = NULL; 273 tail->next = NULL;
274 cfg->irq_2_pin = head; 274 cfg->irq_2_pin = head;
275 } 275 }
276 276
277 static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) 277 static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
278 { 278 {
279 struct irq_pin_list *entry, *next; 279 struct irq_pin_list *entry, *next;
280 280
281 if (old_cfg->irq_2_pin == cfg->irq_2_pin) 281 if (old_cfg->irq_2_pin == cfg->irq_2_pin)
282 return; 282 return;
283 283
284 entry = old_cfg->irq_2_pin; 284 entry = old_cfg->irq_2_pin;
285 285
286 while (entry) { 286 while (entry) {
287 next = entry->next; 287 next = entry->next;
288 kfree(entry); 288 kfree(entry);
289 entry = next; 289 entry = next;
290 } 290 }
291 old_cfg->irq_2_pin = NULL; 291 old_cfg->irq_2_pin = NULL;
292 } 292 }
293 293
294 void arch_init_copy_chip_data(struct irq_desc *old_desc, 294 void arch_init_copy_chip_data(struct irq_desc *old_desc,
295 struct irq_desc *desc, int node) 295 struct irq_desc *desc, int node)
296 { 296 {
297 struct irq_cfg *cfg; 297 struct irq_cfg *cfg;
298 struct irq_cfg *old_cfg; 298 struct irq_cfg *old_cfg;
299 299
300 cfg = get_one_free_irq_cfg(node); 300 cfg = get_one_free_irq_cfg(node);
301 301
302 if (!cfg) 302 if (!cfg)
303 return; 303 return;
304 304
305 desc->chip_data = cfg; 305 desc->chip_data = cfg;
306 306
307 old_cfg = old_desc->chip_data; 307 old_cfg = old_desc->chip_data;
308 308
309 cfg->vector = old_cfg->vector; 309 cfg->vector = old_cfg->vector;
310 cfg->move_in_progress = old_cfg->move_in_progress; 310 cfg->move_in_progress = old_cfg->move_in_progress;
311 cpumask_copy(cfg->domain, old_cfg->domain); 311 cpumask_copy(cfg->domain, old_cfg->domain);
312 cpumask_copy(cfg->old_domain, old_cfg->old_domain); 312 cpumask_copy(cfg->old_domain, old_cfg->old_domain);
313 313
314 init_copy_irq_2_pin(old_cfg, cfg, node); 314 init_copy_irq_2_pin(old_cfg, cfg, node);
315 } 315 }
316 316
317 static void free_irq_cfg(struct irq_cfg *cfg) 317 static void free_irq_cfg(struct irq_cfg *cfg)
318 { 318 {
319 free_cpumask_var(cfg->domain); 319 free_cpumask_var(cfg->domain);
320 free_cpumask_var(cfg->old_domain); 320 free_cpumask_var(cfg->old_domain);
321 kfree(cfg); 321 kfree(cfg);
322 } 322 }
323 323
324 void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) 324 void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
325 { 325 {
326 struct irq_cfg *old_cfg, *cfg; 326 struct irq_cfg *old_cfg, *cfg;
327 327
328 old_cfg = old_desc->chip_data; 328 old_cfg = old_desc->chip_data;
329 cfg = desc->chip_data; 329 cfg = desc->chip_data;
330 330
331 if (old_cfg == cfg) 331 if (old_cfg == cfg)
332 return; 332 return;
333 333
334 if (old_cfg) { 334 if (old_cfg) {
335 free_irq_2_pin(old_cfg, cfg); 335 free_irq_2_pin(old_cfg, cfg);
336 free_irq_cfg(old_cfg); 336 free_irq_cfg(old_cfg);
337 old_desc->chip_data = NULL; 337 old_desc->chip_data = NULL;
338 } 338 }
339 } 339 }
340 /* end for move_irq_desc */ 340 /* end for move_irq_desc */
341 341
342 #else 342 #else
343 struct irq_cfg *irq_cfg(unsigned int irq) 343 struct irq_cfg *irq_cfg(unsigned int irq)
344 { 344 {
345 return irq < nr_irqs ? irq_cfgx + irq : NULL; 345 return irq < nr_irqs ? irq_cfgx + irq : NULL;
346 } 346 }
347 347
348 #endif 348 #endif
349 349
350 struct io_apic { 350 struct io_apic {
351 unsigned int index; 351 unsigned int index;
352 unsigned int unused[3]; 352 unsigned int unused[3];
353 unsigned int data; 353 unsigned int data;
354 unsigned int unused2[11]; 354 unsigned int unused2[11];
355 unsigned int eoi; 355 unsigned int eoi;
356 }; 356 };
357 357
358 static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) 358 static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
359 { 359 {
360 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) 360 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
361 + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); 361 + (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
362 } 362 }
363 363
364 static inline void io_apic_eoi(unsigned int apic, unsigned int vector) 364 static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
365 { 365 {
366 struct io_apic __iomem *io_apic = io_apic_base(apic); 366 struct io_apic __iomem *io_apic = io_apic_base(apic);
367 writel(vector, &io_apic->eoi); 367 writel(vector, &io_apic->eoi);
368 } 368 }
369 369
370 static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 370 static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
371 { 371 {
372 struct io_apic __iomem *io_apic = io_apic_base(apic); 372 struct io_apic __iomem *io_apic = io_apic_base(apic);
373 writel(reg, &io_apic->index); 373 writel(reg, &io_apic->index);
374 return readl(&io_apic->data); 374 return readl(&io_apic->data);
375 } 375 }
376 376
377 static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) 377 static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
378 { 378 {
379 struct io_apic __iomem *io_apic = io_apic_base(apic); 379 struct io_apic __iomem *io_apic = io_apic_base(apic);
380 writel(reg, &io_apic->index); 380 writel(reg, &io_apic->index);
381 writel(value, &io_apic->data); 381 writel(value, &io_apic->data);
382 } 382 }
383 383
384 /* 384 /*
385 * Re-write a value: to be used for read-modify-write 385 * Re-write a value: to be used for read-modify-write
386 * cycles where the read already set up the index register. 386 * cycles where the read already set up the index register.
387 * 387 *
388 * Older SiS APIC requires we rewrite the index register 388 * Older SiS APIC requires we rewrite the index register
389 */ 389 */
390 static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) 390 static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
391 { 391 {
392 struct io_apic __iomem *io_apic = io_apic_base(apic); 392 struct io_apic __iomem *io_apic = io_apic_base(apic);
393 393
394 if (sis_apic_bug) 394 if (sis_apic_bug)
395 writel(reg, &io_apic->index); 395 writel(reg, &io_apic->index);
396 writel(value, &io_apic->data); 396 writel(value, &io_apic->data);
397 } 397 }
398 398
399 static bool io_apic_level_ack_pending(struct irq_cfg *cfg) 399 static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
400 { 400 {
401 struct irq_pin_list *entry; 401 struct irq_pin_list *entry;
402 unsigned long flags; 402 unsigned long flags;
403 403
404 raw_spin_lock_irqsave(&ioapic_lock, flags); 404 raw_spin_lock_irqsave(&ioapic_lock, flags);
405 for_each_irq_pin(entry, cfg->irq_2_pin) { 405 for_each_irq_pin(entry, cfg->irq_2_pin) {
406 unsigned int reg; 406 unsigned int reg;
407 int pin; 407 int pin;
408 408
409 pin = entry->pin; 409 pin = entry->pin;
410 reg = io_apic_read(entry->apic, 0x10 + pin*2); 410 reg = io_apic_read(entry->apic, 0x10 + pin*2);
411 /* Is the remote IRR bit set? */ 411 /* Is the remote IRR bit set? */
412 if (reg & IO_APIC_REDIR_REMOTE_IRR) { 412 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
413 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 413 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
414 return true; 414 return true;
415 } 415 }
416 } 416 }
417 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 417 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
418 418
419 return false; 419 return false;
420 } 420 }
421 421
422 union entry_union { 422 union entry_union {
423 struct { u32 w1, w2; }; 423 struct { u32 w1, w2; };
424 struct IO_APIC_route_entry entry; 424 struct IO_APIC_route_entry entry;
425 }; 425 };
426 426
427 static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) 427 static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
428 { 428 {
429 union entry_union eu; 429 union entry_union eu;
430 unsigned long flags; 430 unsigned long flags;
431 raw_spin_lock_irqsave(&ioapic_lock, flags); 431 raw_spin_lock_irqsave(&ioapic_lock, flags);
432 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); 432 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
433 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); 433 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
434 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 434 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
435 return eu.entry; 435 return eu.entry;
436 } 436 }
437 437
438 /* 438 /*
439 * When we write a new IO APIC routing entry, we need to write the high 439 * When we write a new IO APIC routing entry, we need to write the high
440 * word first! If the mask bit in the low word is clear, we will enable 440 * word first! If the mask bit in the low word is clear, we will enable
441 * the interrupt, and we need to make sure the entry is fully populated 441 * the interrupt, and we need to make sure the entry is fully populated
442 * before that happens. 442 * before that happens.
443 */ 443 */
444 static void 444 static void
445 __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 445 __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
446 { 446 {
447 union entry_union eu = {{0, 0}}; 447 union entry_union eu = {{0, 0}};
448 448
449 eu.entry = e; 449 eu.entry = e;
450 io_apic_write(apic, 0x11 + 2*pin, eu.w2); 450 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
451 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 451 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
452 } 452 }
453 453
454 void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 454 void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
455 { 455 {
456 unsigned long flags; 456 unsigned long flags;
457 raw_spin_lock_irqsave(&ioapic_lock, flags); 457 raw_spin_lock_irqsave(&ioapic_lock, flags);
458 __ioapic_write_entry(apic, pin, e); 458 __ioapic_write_entry(apic, pin, e);
459 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 459 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
460 } 460 }
461 461
462 /* 462 /*
463 * When we mask an IO APIC routing entry, we need to write the low 463 * When we mask an IO APIC routing entry, we need to write the low
464 * word first, in order to set the mask bit before we change the 464 * word first, in order to set the mask bit before we change the
465 * high bits! 465 * high bits!
466 */ 466 */
467 static void ioapic_mask_entry(int apic, int pin) 467 static void ioapic_mask_entry(int apic, int pin)
468 { 468 {
469 unsigned long flags; 469 unsigned long flags;
470 union entry_union eu = { .entry.mask = 1 }; 470 union entry_union eu = { .entry.mask = 1 };
471 471
472 raw_spin_lock_irqsave(&ioapic_lock, flags); 472 raw_spin_lock_irqsave(&ioapic_lock, flags);
473 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 473 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
474 io_apic_write(apic, 0x11 + 2*pin, eu.w2); 474 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
475 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 475 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
476 } 476 }
477 477
478 /* 478 /*
479 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 479 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
480 * shared ISA-space IRQs, so we have to support them. We are super 480 * shared ISA-space IRQs, so we have to support them. We are super
481 * fast in the common case, and fast for shared ISA-space IRQs. 481 * fast in the common case, and fast for shared ISA-space IRQs.
482 */ 482 */
483 static int 483 static int
484 add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) 484 add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
485 { 485 {
486 struct irq_pin_list **last, *entry; 486 struct irq_pin_list **last, *entry;
487 487
488 /* don't allow duplicates */ 488 /* don't allow duplicates */
489 last = &cfg->irq_2_pin; 489 last = &cfg->irq_2_pin;
490 for_each_irq_pin(entry, cfg->irq_2_pin) { 490 for_each_irq_pin(entry, cfg->irq_2_pin) {
491 if (entry->apic == apic && entry->pin == pin) 491 if (entry->apic == apic && entry->pin == pin)
492 return 0; 492 return 0;
493 last = &entry->next; 493 last = &entry->next;
494 } 494 }
495 495
496 entry = get_one_free_irq_2_pin(node); 496 entry = get_one_free_irq_2_pin(node);
497 if (!entry) { 497 if (!entry) {
498 printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", 498 printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
499 node, apic, pin); 499 node, apic, pin);
500 return -ENOMEM; 500 return -ENOMEM;
501 } 501 }
502 entry->apic = apic; 502 entry->apic = apic;
503 entry->pin = pin; 503 entry->pin = pin;
504 504
505 *last = entry; 505 *last = entry;
506 return 0; 506 return 0;
507 } 507 }
508 508
509 static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) 509 static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
510 { 510 {
511 if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin)) 511 if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin))
512 panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); 512 panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
513 } 513 }
514 514
515 /* 515 /*
516 * Reroute an IRQ to a different pin. 516 * Reroute an IRQ to a different pin.
517 */ 517 */
518 static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, 518 static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
519 int oldapic, int oldpin, 519 int oldapic, int oldpin,
520 int newapic, int newpin) 520 int newapic, int newpin)
521 { 521 {
522 struct irq_pin_list *entry; 522 struct irq_pin_list *entry;
523 523
524 for_each_irq_pin(entry, cfg->irq_2_pin) { 524 for_each_irq_pin(entry, cfg->irq_2_pin) {
525 if (entry->apic == oldapic && entry->pin == oldpin) { 525 if (entry->apic == oldapic && entry->pin == oldpin) {
526 entry->apic = newapic; 526 entry->apic = newapic;
527 entry->pin = newpin; 527 entry->pin = newpin;
528 /* every one is different, right? */ 528 /* every one is different, right? */
529 return; 529 return;
530 } 530 }
531 } 531 }
532 532
533 /* old apic/pin didn't exist, so just add new ones */ 533 /* old apic/pin didn't exist, so just add new ones */
534 add_pin_to_irq_node(cfg, node, newapic, newpin); 534 add_pin_to_irq_node(cfg, node, newapic, newpin);
535 } 535 }
536 536
537 static void __io_apic_modify_irq(struct irq_pin_list *entry, 537 static void __io_apic_modify_irq(struct irq_pin_list *entry,
538 int mask_and, int mask_or, 538 int mask_and, int mask_or,
539 void (*final)(struct irq_pin_list *entry)) 539 void (*final)(struct irq_pin_list *entry))
540 { 540 {
541 unsigned int reg, pin; 541 unsigned int reg, pin;
542 542
543 pin = entry->pin; 543 pin = entry->pin;
544 reg = io_apic_read(entry->apic, 0x10 + pin * 2); 544 reg = io_apic_read(entry->apic, 0x10 + pin * 2);
545 reg &= mask_and; 545 reg &= mask_and;
546 reg |= mask_or; 546 reg |= mask_or;
547 io_apic_modify(entry->apic, 0x10 + pin * 2, reg); 547 io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
548 if (final) 548 if (final)
549 final(entry); 549 final(entry);
550 } 550 }
551 551
552 static void io_apic_modify_irq(struct irq_cfg *cfg, 552 static void io_apic_modify_irq(struct irq_cfg *cfg,
553 int mask_and, int mask_or, 553 int mask_and, int mask_or,
554 void (*final)(struct irq_pin_list *entry)) 554 void (*final)(struct irq_pin_list *entry))
555 { 555 {
556 struct irq_pin_list *entry; 556 struct irq_pin_list *entry;
557 557
558 for_each_irq_pin(entry, cfg->irq_2_pin) 558 for_each_irq_pin(entry, cfg->irq_2_pin)
559 __io_apic_modify_irq(entry, mask_and, mask_or, final); 559 __io_apic_modify_irq(entry, mask_and, mask_or, final);
560 } 560 }
561 561
562 static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry) 562 static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
563 { 563 {
564 __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER, 564 __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
565 IO_APIC_REDIR_MASKED, NULL); 565 IO_APIC_REDIR_MASKED, NULL);
566 } 566 }
567 567
568 static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry) 568 static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
569 { 569 {
570 __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED, 570 __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
571 IO_APIC_REDIR_LEVEL_TRIGGER, NULL); 571 IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
572 } 572 }
573 573
574 static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) 574 static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
575 { 575 {
576 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); 576 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
577 } 577 }
578 578
579 static void io_apic_sync(struct irq_pin_list *entry) 579 static void io_apic_sync(struct irq_pin_list *entry)
580 { 580 {
581 /* 581 /*
582 * Synchronize the IO-APIC and the CPU by doing 582 * Synchronize the IO-APIC and the CPU by doing
583 * a dummy read from the IO-APIC 583 * a dummy read from the IO-APIC
584 */ 584 */
585 struct io_apic __iomem *io_apic; 585 struct io_apic __iomem *io_apic;
586 io_apic = io_apic_base(entry->apic); 586 io_apic = io_apic_base(entry->apic);
587 readl(&io_apic->data); 587 readl(&io_apic->data);
588 } 588 }
589 589
590 static void __mask_IO_APIC_irq(struct irq_cfg *cfg) 590 static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
591 { 591 {
592 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); 592 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
593 } 593 }
594 594
595 static void mask_IO_APIC_irq_desc(struct irq_desc *desc) 595 static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
596 { 596 {
597 struct irq_cfg *cfg = desc->chip_data; 597 struct irq_cfg *cfg = desc->chip_data;
598 unsigned long flags; 598 unsigned long flags;
599 599
600 BUG_ON(!cfg); 600 BUG_ON(!cfg);
601 601
602 raw_spin_lock_irqsave(&ioapic_lock, flags); 602 raw_spin_lock_irqsave(&ioapic_lock, flags);
603 __mask_IO_APIC_irq(cfg); 603 __mask_IO_APIC_irq(cfg);
604 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 604 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
605 } 605 }
606 606
607 static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) 607 static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
608 { 608 {
609 struct irq_cfg *cfg = desc->chip_data; 609 struct irq_cfg *cfg = desc->chip_data;
610 unsigned long flags; 610 unsigned long flags;
611 611
612 raw_spin_lock_irqsave(&ioapic_lock, flags); 612 raw_spin_lock_irqsave(&ioapic_lock, flags);
613 __unmask_IO_APIC_irq(cfg); 613 __unmask_IO_APIC_irq(cfg);
614 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 614 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
615 } 615 }
616 616
617 static void mask_IO_APIC_irq(unsigned int irq) 617 static void mask_IO_APIC_irq(unsigned int irq)
618 { 618 {
619 struct irq_desc *desc = irq_to_desc(irq); 619 struct irq_desc *desc = irq_to_desc(irq);
620 620
621 mask_IO_APIC_irq_desc(desc); 621 mask_IO_APIC_irq_desc(desc);
622 } 622 }
623 static void unmask_IO_APIC_irq(unsigned int irq) 623 static void unmask_IO_APIC_irq(unsigned int irq)
624 { 624 {
625 struct irq_desc *desc = irq_to_desc(irq); 625 struct irq_desc *desc = irq_to_desc(irq);
626 626
627 unmask_IO_APIC_irq_desc(desc); 627 unmask_IO_APIC_irq_desc(desc);
628 } 628 }
629 629
630 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 630 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
631 { 631 {
632 struct IO_APIC_route_entry entry; 632 struct IO_APIC_route_entry entry;
633 633
634 /* Check delivery_mode to be sure we're not clearing an SMI pin */ 634 /* Check delivery_mode to be sure we're not clearing an SMI pin */
635 entry = ioapic_read_entry(apic, pin); 635 entry = ioapic_read_entry(apic, pin);
636 if (entry.delivery_mode == dest_SMI) 636 if (entry.delivery_mode == dest_SMI)
637 return; 637 return;
638 /* 638 /*
639 * Disable it in the IO-APIC irq-routing table: 639 * Disable it in the IO-APIC irq-routing table:
640 */ 640 */
641 ioapic_mask_entry(apic, pin); 641 ioapic_mask_entry(apic, pin);
642 } 642 }
643 643
644 static void clear_IO_APIC (void) 644 static void clear_IO_APIC (void)
645 { 645 {
646 int apic, pin; 646 int apic, pin;
647 647
648 for (apic = 0; apic < nr_ioapics; apic++) 648 for (apic = 0; apic < nr_ioapics; apic++)
649 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) 649 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
650 clear_IO_APIC_pin(apic, pin); 650 clear_IO_APIC_pin(apic, pin);
651 } 651 }
652 652
653 #ifdef CONFIG_X86_32 653 #ifdef CONFIG_X86_32
654 /* 654 /*
655 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to 655 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
656 * specific CPU-side IRQs. 656 * specific CPU-side IRQs.
657 */ 657 */
658 658
659 #define MAX_PIRQS 8 659 #define MAX_PIRQS 8
660 static int pirq_entries[MAX_PIRQS] = { 660 static int pirq_entries[MAX_PIRQS] = {
661 [0 ... MAX_PIRQS - 1] = -1 661 [0 ... MAX_PIRQS - 1] = -1
662 }; 662 };
663 663
664 static int __init ioapic_pirq_setup(char *str) 664 static int __init ioapic_pirq_setup(char *str)
665 { 665 {
666 int i, max; 666 int i, max;
667 int ints[MAX_PIRQS+1]; 667 int ints[MAX_PIRQS+1];
668 668
669 get_options(str, ARRAY_SIZE(ints), ints); 669 get_options(str, ARRAY_SIZE(ints), ints);
670 670
671 apic_printk(APIC_VERBOSE, KERN_INFO 671 apic_printk(APIC_VERBOSE, KERN_INFO
672 "PIRQ redirection, working around broken MP-BIOS.\n"); 672 "PIRQ redirection, working around broken MP-BIOS.\n");
673 max = MAX_PIRQS; 673 max = MAX_PIRQS;
674 if (ints[0] < MAX_PIRQS) 674 if (ints[0] < MAX_PIRQS)
675 max = ints[0]; 675 max = ints[0];
676 676
677 for (i = 0; i < max; i++) { 677 for (i = 0; i < max; i++) {
678 apic_printk(APIC_VERBOSE, KERN_DEBUG 678 apic_printk(APIC_VERBOSE, KERN_DEBUG
679 "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); 679 "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
680 /* 680 /*
681 * PIRQs are mapped upside down, usually. 681 * PIRQs are mapped upside down, usually.
682 */ 682 */
683 pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; 683 pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
684 } 684 }
685 return 1; 685 return 1;
686 } 686 }
687 687
688 __setup("pirq=", ioapic_pirq_setup); 688 __setup("pirq=", ioapic_pirq_setup);
689 #endif /* CONFIG_X86_32 */ 689 #endif /* CONFIG_X86_32 */
690 690
691 struct IO_APIC_route_entry **alloc_ioapic_entries(void) 691 struct IO_APIC_route_entry **alloc_ioapic_entries(void)
692 { 692 {
693 int apic; 693 int apic;
694 struct IO_APIC_route_entry **ioapic_entries; 694 struct IO_APIC_route_entry **ioapic_entries;
695 695
696 ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics, 696 ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics,
697 GFP_ATOMIC); 697 GFP_ATOMIC);
698 if (!ioapic_entries) 698 if (!ioapic_entries)
699 return 0; 699 return 0;
700 700
701 for (apic = 0; apic < nr_ioapics; apic++) { 701 for (apic = 0; apic < nr_ioapics; apic++) {
702 ioapic_entries[apic] = 702 ioapic_entries[apic] =
703 kzalloc(sizeof(struct IO_APIC_route_entry) * 703 kzalloc(sizeof(struct IO_APIC_route_entry) *
704 nr_ioapic_registers[apic], GFP_ATOMIC); 704 nr_ioapic_registers[apic], GFP_ATOMIC);
705 if (!ioapic_entries[apic]) 705 if (!ioapic_entries[apic])
706 goto nomem; 706 goto nomem;
707 } 707 }
708 708
709 return ioapic_entries; 709 return ioapic_entries;
710 710
711 nomem: 711 nomem:
712 while (--apic >= 0) 712 while (--apic >= 0)
713 kfree(ioapic_entries[apic]); 713 kfree(ioapic_entries[apic]);
714 kfree(ioapic_entries); 714 kfree(ioapic_entries);
715 715
716 return 0; 716 return 0;
717 } 717 }
718 718
719 /* 719 /*
720 * Saves all the IO-APIC RTE's 720 * Saves all the IO-APIC RTE's
721 */ 721 */
722 int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) 722 int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
723 { 723 {
724 int apic, pin; 724 int apic, pin;
725 725
726 if (!ioapic_entries) 726 if (!ioapic_entries)
727 return -ENOMEM; 727 return -ENOMEM;
728 728
729 for (apic = 0; apic < nr_ioapics; apic++) { 729 for (apic = 0; apic < nr_ioapics; apic++) {
730 if (!ioapic_entries[apic]) 730 if (!ioapic_entries[apic])
731 return -ENOMEM; 731 return -ENOMEM;
732 732
733 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) 733 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
734 ioapic_entries[apic][pin] = 734 ioapic_entries[apic][pin] =
735 ioapic_read_entry(apic, pin); 735 ioapic_read_entry(apic, pin);
736 } 736 }
737 737
738 return 0; 738 return 0;
739 } 739 }
740 740
741 /* 741 /*
742 * Mask all IO APIC entries. 742 * Mask all IO APIC entries.
743 */ 743 */
744 void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) 744 void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
745 { 745 {
746 int apic, pin; 746 int apic, pin;
747 747
748 if (!ioapic_entries) 748 if (!ioapic_entries)
749 return; 749 return;
750 750
751 for (apic = 0; apic < nr_ioapics; apic++) { 751 for (apic = 0; apic < nr_ioapics; apic++) {
752 if (!ioapic_entries[apic]) 752 if (!ioapic_entries[apic])
753 break; 753 break;
754 754
755 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 755 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
756 struct IO_APIC_route_entry entry; 756 struct IO_APIC_route_entry entry;
757 757
758 entry = ioapic_entries[apic][pin]; 758 entry = ioapic_entries[apic][pin];
759 if (!entry.mask) { 759 if (!entry.mask) {
760 entry.mask = 1; 760 entry.mask = 1;
761 ioapic_write_entry(apic, pin, entry); 761 ioapic_write_entry(apic, pin, entry);
762 } 762 }
763 } 763 }
764 } 764 }
765 } 765 }
766 766
767 /* 767 /*
768 * Restore IO APIC entries which was saved in ioapic_entries. 768 * Restore IO APIC entries which was saved in ioapic_entries.
769 */ 769 */
770 int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) 770 int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
771 { 771 {
772 int apic, pin; 772 int apic, pin;
773 773
774 if (!ioapic_entries) 774 if (!ioapic_entries)
775 return -ENOMEM; 775 return -ENOMEM;
776 776
777 for (apic = 0; apic < nr_ioapics; apic++) { 777 for (apic = 0; apic < nr_ioapics; apic++) {
778 if (!ioapic_entries[apic]) 778 if (!ioapic_entries[apic])
779 return -ENOMEM; 779 return -ENOMEM;
780 780
781 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) 781 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
782 ioapic_write_entry(apic, pin, 782 ioapic_write_entry(apic, pin,
783 ioapic_entries[apic][pin]); 783 ioapic_entries[apic][pin]);
784 } 784 }
785 return 0; 785 return 0;
786 } 786 }
787 787
788 void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) 788 void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
789 { 789 {
790 int apic; 790 int apic;
791 791
792 for (apic = 0; apic < nr_ioapics; apic++) 792 for (apic = 0; apic < nr_ioapics; apic++)
793 kfree(ioapic_entries[apic]); 793 kfree(ioapic_entries[apic]);
794 794
795 kfree(ioapic_entries); 795 kfree(ioapic_entries);
796 } 796 }
797 797
798 /* 798 /*
799 * Find the IRQ entry number of a certain pin. 799 * Find the IRQ entry number of a certain pin.
800 */ 800 */
801 static int find_irq_entry(int apic, int pin, int type) 801 static int find_irq_entry(int apic, int pin, int type)
802 { 802 {
803 int i; 803 int i;
804 804
805 for (i = 0; i < mp_irq_entries; i++) 805 for (i = 0; i < mp_irq_entries; i++)
806 if (mp_irqs[i].irqtype == type && 806 if (mp_irqs[i].irqtype == type &&
807 (mp_irqs[i].dstapic == mp_ioapics[apic].apicid || 807 (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
808 mp_irqs[i].dstapic == MP_APIC_ALL) && 808 mp_irqs[i].dstapic == MP_APIC_ALL) &&
809 mp_irqs[i].dstirq == pin) 809 mp_irqs[i].dstirq == pin)
810 return i; 810 return i;
811 811
812 return -1; 812 return -1;
813 } 813 }
814 814
815 /* 815 /*
816 * Find the pin to which IRQ[irq] (ISA) is connected 816 * Find the pin to which IRQ[irq] (ISA) is connected
817 */ 817 */
818 static int __init find_isa_irq_pin(int irq, int type) 818 static int __init find_isa_irq_pin(int irq, int type)
819 { 819 {
820 int i; 820 int i;
821 821
822 for (i = 0; i < mp_irq_entries; i++) { 822 for (i = 0; i < mp_irq_entries; i++) {
823 int lbus = mp_irqs[i].srcbus; 823 int lbus = mp_irqs[i].srcbus;
824 824
825 if (test_bit(lbus, mp_bus_not_pci) && 825 if (test_bit(lbus, mp_bus_not_pci) &&
826 (mp_irqs[i].irqtype == type) && 826 (mp_irqs[i].irqtype == type) &&
827 (mp_irqs[i].srcbusirq == irq)) 827 (mp_irqs[i].srcbusirq == irq))
828 828
829 return mp_irqs[i].dstirq; 829 return mp_irqs[i].dstirq;
830 } 830 }
831 return -1; 831 return -1;
832 } 832 }
833 833
834 static int __init find_isa_irq_apic(int irq, int type) 834 static int __init find_isa_irq_apic(int irq, int type)
835 { 835 {
836 int i; 836 int i;
837 837
838 for (i = 0; i < mp_irq_entries; i++) { 838 for (i = 0; i < mp_irq_entries; i++) {
839 int lbus = mp_irqs[i].srcbus; 839 int lbus = mp_irqs[i].srcbus;
840 840
841 if (test_bit(lbus, mp_bus_not_pci) && 841 if (test_bit(lbus, mp_bus_not_pci) &&
842 (mp_irqs[i].irqtype == type) && 842 (mp_irqs[i].irqtype == type) &&
843 (mp_irqs[i].srcbusirq == irq)) 843 (mp_irqs[i].srcbusirq == irq))
844 break; 844 break;
845 } 845 }
846 if (i < mp_irq_entries) { 846 if (i < mp_irq_entries) {
847 int apic; 847 int apic;
848 for(apic = 0; apic < nr_ioapics; apic++) { 848 for(apic = 0; apic < nr_ioapics; apic++) {
849 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic) 849 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
850 return apic; 850 return apic;
851 } 851 }
852 } 852 }
853 853
854 return -1; 854 return -1;
855 } 855 }
856 856
857 #if defined(CONFIG_EISA) || defined(CONFIG_MCA) 857 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
858 /* 858 /*
859 * EISA Edge/Level control register, ELCR 859 * EISA Edge/Level control register, ELCR
860 */ 860 */
861 static int EISA_ELCR(unsigned int irq) 861 static int EISA_ELCR(unsigned int irq)
862 { 862 {
863 if (irq < legacy_pic->nr_legacy_irqs) { 863 if (irq < legacy_pic->nr_legacy_irqs) {
864 unsigned int port = 0x4d0 + (irq >> 3); 864 unsigned int port = 0x4d0 + (irq >> 3);
865 return (inb(port) >> (irq & 7)) & 1; 865 return (inb(port) >> (irq & 7)) & 1;
866 } 866 }
867 apic_printk(APIC_VERBOSE, KERN_INFO 867 apic_printk(APIC_VERBOSE, KERN_INFO
868 "Broken MPtable reports ISA irq %d\n", irq); 868 "Broken MPtable reports ISA irq %d\n", irq);
869 return 0; 869 return 0;
870 } 870 }
871 871
872 #endif 872 #endif
873 873
874 /* ISA interrupts are always polarity zero edge triggered, 874 /* ISA interrupts are always polarity zero edge triggered,
875 * when listed as conforming in the MP table. */ 875 * when listed as conforming in the MP table. */
876 876
877 #define default_ISA_trigger(idx) (0) 877 #define default_ISA_trigger(idx) (0)
878 #define default_ISA_polarity(idx) (0) 878 #define default_ISA_polarity(idx) (0)
879 879
880 /* EISA interrupts are always polarity zero and can be edge or level 880 /* EISA interrupts are always polarity zero and can be edge or level
881 * trigger depending on the ELCR value. If an interrupt is listed as 881 * trigger depending on the ELCR value. If an interrupt is listed as
882 * EISA conforming in the MP table, that means its trigger type must 882 * EISA conforming in the MP table, that means its trigger type must
883 * be read in from the ELCR */ 883 * be read in from the ELCR */
884 884
885 #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq)) 885 #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq))
886 #define default_EISA_polarity(idx) default_ISA_polarity(idx) 886 #define default_EISA_polarity(idx) default_ISA_polarity(idx)
887 887
888 /* PCI interrupts are always polarity one level triggered, 888 /* PCI interrupts are always polarity one level triggered,
889 * when listed as conforming in the MP table. */ 889 * when listed as conforming in the MP table. */
890 890
891 #define default_PCI_trigger(idx) (1) 891 #define default_PCI_trigger(idx) (1)
892 #define default_PCI_polarity(idx) (1) 892 #define default_PCI_polarity(idx) (1)
893 893
894 /* MCA interrupts are always polarity zero level triggered, 894 /* MCA interrupts are always polarity zero level triggered,
895 * when listed as conforming in the MP table. */ 895 * when listed as conforming in the MP table. */
896 896
897 #define default_MCA_trigger(idx) (1) 897 #define default_MCA_trigger(idx) (1)
898 #define default_MCA_polarity(idx) default_ISA_polarity(idx) 898 #define default_MCA_polarity(idx) default_ISA_polarity(idx)
899 899
900 static int MPBIOS_polarity(int idx) 900 static int MPBIOS_polarity(int idx)
901 { 901 {
902 int bus = mp_irqs[idx].srcbus; 902 int bus = mp_irqs[idx].srcbus;
903 int polarity; 903 int polarity;
904 904
905 /* 905 /*
906 * Determine IRQ line polarity (high active or low active): 906 * Determine IRQ line polarity (high active or low active):
907 */ 907 */
908 switch (mp_irqs[idx].irqflag & 3) 908 switch (mp_irqs[idx].irqflag & 3)
909 { 909 {
910 case 0: /* conforms, ie. bus-type dependent polarity */ 910 case 0: /* conforms, ie. bus-type dependent polarity */
911 if (test_bit(bus, mp_bus_not_pci)) 911 if (test_bit(bus, mp_bus_not_pci))
912 polarity = default_ISA_polarity(idx); 912 polarity = default_ISA_polarity(idx);
913 else 913 else
914 polarity = default_PCI_polarity(idx); 914 polarity = default_PCI_polarity(idx);
915 break; 915 break;
916 case 1: /* high active */ 916 case 1: /* high active */
917 { 917 {
918 polarity = 0; 918 polarity = 0;
919 break; 919 break;
920 } 920 }
921 case 2: /* reserved */ 921 case 2: /* reserved */
922 { 922 {
923 printk(KERN_WARNING "broken BIOS!!\n"); 923 printk(KERN_WARNING "broken BIOS!!\n");
924 polarity = 1; 924 polarity = 1;
925 break; 925 break;
926 } 926 }
927 case 3: /* low active */ 927 case 3: /* low active */
928 { 928 {
929 polarity = 1; 929 polarity = 1;
930 break; 930 break;
931 } 931 }
932 default: /* invalid */ 932 default: /* invalid */
933 { 933 {
934 printk(KERN_WARNING "broken BIOS!!\n"); 934 printk(KERN_WARNING "broken BIOS!!\n");
935 polarity = 1; 935 polarity = 1;
936 break; 936 break;
937 } 937 }
938 } 938 }
939 return polarity; 939 return polarity;
940 } 940 }
941 941
942 static int MPBIOS_trigger(int idx) 942 static int MPBIOS_trigger(int idx)
943 { 943 {
944 int bus = mp_irqs[idx].srcbus; 944 int bus = mp_irqs[idx].srcbus;
945 int trigger; 945 int trigger;
946 946
947 /* 947 /*
948 * Determine IRQ trigger mode (edge or level sensitive): 948 * Determine IRQ trigger mode (edge or level sensitive):
949 */ 949 */
950 switch ((mp_irqs[idx].irqflag>>2) & 3) 950 switch ((mp_irqs[idx].irqflag>>2) & 3)
951 { 951 {
952 case 0: /* conforms, ie. bus-type dependent */ 952 case 0: /* conforms, ie. bus-type dependent */
953 if (test_bit(bus, mp_bus_not_pci)) 953 if (test_bit(bus, mp_bus_not_pci))
954 trigger = default_ISA_trigger(idx); 954 trigger = default_ISA_trigger(idx);
955 else 955 else
956 trigger = default_PCI_trigger(idx); 956 trigger = default_PCI_trigger(idx);
957 #if defined(CONFIG_EISA) || defined(CONFIG_MCA) 957 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
958 switch (mp_bus_id_to_type[bus]) { 958 switch (mp_bus_id_to_type[bus]) {
959 case MP_BUS_ISA: /* ISA pin */ 959 case MP_BUS_ISA: /* ISA pin */
960 { 960 {
961 /* set before the switch */ 961 /* set before the switch */
962 break; 962 break;
963 } 963 }
964 case MP_BUS_EISA: /* EISA pin */ 964 case MP_BUS_EISA: /* EISA pin */
965 { 965 {
966 trigger = default_EISA_trigger(idx); 966 trigger = default_EISA_trigger(idx);
967 break; 967 break;
968 } 968 }
969 case MP_BUS_PCI: /* PCI pin */ 969 case MP_BUS_PCI: /* PCI pin */
970 { 970 {
971 /* set before the switch */ 971 /* set before the switch */
972 break; 972 break;
973 } 973 }
974 case MP_BUS_MCA: /* MCA pin */ 974 case MP_BUS_MCA: /* MCA pin */
975 { 975 {
976 trigger = default_MCA_trigger(idx); 976 trigger = default_MCA_trigger(idx);
977 break; 977 break;
978 } 978 }
979 default: 979 default:
980 { 980 {
981 printk(KERN_WARNING "broken BIOS!!\n"); 981 printk(KERN_WARNING "broken BIOS!!\n");
982 trigger = 1; 982 trigger = 1;
983 break; 983 break;
984 } 984 }
985 } 985 }
986 #endif 986 #endif
987 break; 987 break;
988 case 1: /* edge */ 988 case 1: /* edge */
989 { 989 {
990 trigger = 0; 990 trigger = 0;
991 break; 991 break;
992 } 992 }
993 case 2: /* reserved */ 993 case 2: /* reserved */
994 { 994 {
995 printk(KERN_WARNING "broken BIOS!!\n"); 995 printk(KERN_WARNING "broken BIOS!!\n");
996 trigger = 1; 996 trigger = 1;
997 break; 997 break;
998 } 998 }
999 case 3: /* level */ 999 case 3: /* level */
1000 { 1000 {
1001 trigger = 1; 1001 trigger = 1;
1002 break; 1002 break;
1003 } 1003 }
1004 default: /* invalid */ 1004 default: /* invalid */
1005 { 1005 {
1006 printk(KERN_WARNING "broken BIOS!!\n"); 1006 printk(KERN_WARNING "broken BIOS!!\n");
1007 trigger = 0; 1007 trigger = 0;
1008 break; 1008 break;
1009 } 1009 }
1010 } 1010 }
1011 return trigger; 1011 return trigger;
1012 } 1012 }
1013 1013
1014 static inline int irq_polarity(int idx) 1014 static inline int irq_polarity(int idx)
1015 { 1015 {
1016 return MPBIOS_polarity(idx); 1016 return MPBIOS_polarity(idx);
1017 } 1017 }
1018 1018
1019 static inline int irq_trigger(int idx) 1019 static inline int irq_trigger(int idx)
1020 { 1020 {
1021 return MPBIOS_trigger(idx); 1021 return MPBIOS_trigger(idx);
1022 } 1022 }
1023 1023
1024 static int pin_2_irq(int idx, int apic, int pin) 1024 static int pin_2_irq(int idx, int apic, int pin)
1025 { 1025 {
1026 int irq; 1026 int irq;
1027 int bus = mp_irqs[idx].srcbus; 1027 int bus = mp_irqs[idx].srcbus;
1028 1028
1029 /* 1029 /*
1030 * Debugging check, we are in big trouble if this message pops up! 1030 * Debugging check, we are in big trouble if this message pops up!
1031 */ 1031 */
1032 if (mp_irqs[idx].dstirq != pin) 1032 if (mp_irqs[idx].dstirq != pin)
1033 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); 1033 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1034 1034
1035 if (test_bit(bus, mp_bus_not_pci)) { 1035 if (test_bit(bus, mp_bus_not_pci)) {
1036 irq = mp_irqs[idx].srcbusirq; 1036 irq = mp_irqs[idx].srcbusirq;
1037 } else { 1037 } else {
1038 u32 gsi = mp_gsi_routing[apic].gsi_base + pin; 1038 u32 gsi = mp_gsi_routing[apic].gsi_base + pin;
1039 1039
1040 if (gsi >= NR_IRQS_LEGACY) 1040 if (gsi >= NR_IRQS_LEGACY)
1041 irq = gsi; 1041 irq = gsi;
1042 else 1042 else
1043 irq = gsi_top + gsi; 1043 irq = gsi_top + gsi;
1044 } 1044 }
1045 1045
1046 #ifdef CONFIG_X86_32 1046 #ifdef CONFIG_X86_32
1047 /* 1047 /*
1048 * PCI IRQ command line redirection. Yes, limits are hardcoded. 1048 * PCI IRQ command line redirection. Yes, limits are hardcoded.
1049 */ 1049 */
1050 if ((pin >= 16) && (pin <= 23)) { 1050 if ((pin >= 16) && (pin <= 23)) {
1051 if (pirq_entries[pin-16] != -1) { 1051 if (pirq_entries[pin-16] != -1) {
1052 if (!pirq_entries[pin-16]) { 1052 if (!pirq_entries[pin-16]) {
1053 apic_printk(APIC_VERBOSE, KERN_DEBUG 1053 apic_printk(APIC_VERBOSE, KERN_DEBUG
1054 "disabling PIRQ%d\n", pin-16); 1054 "disabling PIRQ%d\n", pin-16);
1055 } else { 1055 } else {
1056 irq = pirq_entries[pin-16]; 1056 irq = pirq_entries[pin-16];
1057 apic_printk(APIC_VERBOSE, KERN_DEBUG 1057 apic_printk(APIC_VERBOSE, KERN_DEBUG
1058 "using PIRQ%d -> IRQ %d\n", 1058 "using PIRQ%d -> IRQ %d\n",
1059 pin-16, irq); 1059 pin-16, irq);
1060 } 1060 }
1061 } 1061 }
1062 } 1062 }
1063 #endif 1063 #endif
1064 1064
1065 return irq; 1065 return irq;
1066 } 1066 }
1067 1067
1068 /* 1068 /*
1069 * Find a specific PCI IRQ entry. 1069 * Find a specific PCI IRQ entry.
1070 * Not an __init, possibly needed by modules 1070 * Not an __init, possibly needed by modules
1071 */ 1071 */
1072 int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, 1072 int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
1073 struct io_apic_irq_attr *irq_attr) 1073 struct io_apic_irq_attr *irq_attr)
1074 { 1074 {
1075 int apic, i, best_guess = -1; 1075 int apic, i, best_guess = -1;
1076 1076
1077 apic_printk(APIC_DEBUG, 1077 apic_printk(APIC_DEBUG,
1078 "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", 1078 "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
1079 bus, slot, pin); 1079 bus, slot, pin);
1080 if (test_bit(bus, mp_bus_not_pci)) { 1080 if (test_bit(bus, mp_bus_not_pci)) {
1081 apic_printk(APIC_VERBOSE, 1081 apic_printk(APIC_VERBOSE,
1082 "PCI BIOS passed nonexistent PCI bus %d!\n", bus); 1082 "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
1083 return -1; 1083 return -1;
1084 } 1084 }
1085 for (i = 0; i < mp_irq_entries; i++) { 1085 for (i = 0; i < mp_irq_entries; i++) {
1086 int lbus = mp_irqs[i].srcbus; 1086 int lbus = mp_irqs[i].srcbus;
1087 1087
1088 for (apic = 0; apic < nr_ioapics; apic++) 1088 for (apic = 0; apic < nr_ioapics; apic++)
1089 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || 1089 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
1090 mp_irqs[i].dstapic == MP_APIC_ALL) 1090 mp_irqs[i].dstapic == MP_APIC_ALL)
1091 break; 1091 break;
1092 1092
1093 if (!test_bit(lbus, mp_bus_not_pci) && 1093 if (!test_bit(lbus, mp_bus_not_pci) &&
1094 !mp_irqs[i].irqtype && 1094 !mp_irqs[i].irqtype &&
1095 (bus == lbus) && 1095 (bus == lbus) &&
1096 (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { 1096 (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
1097 int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); 1097 int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
1098 1098
1099 if (!(apic || IO_APIC_IRQ(irq))) 1099 if (!(apic || IO_APIC_IRQ(irq)))
1100 continue; 1100 continue;
1101 1101
1102 if (pin == (mp_irqs[i].srcbusirq & 3)) { 1102 if (pin == (mp_irqs[i].srcbusirq & 3)) {
1103 set_io_apic_irq_attr(irq_attr, apic, 1103 set_io_apic_irq_attr(irq_attr, apic,
1104 mp_irqs[i].dstirq, 1104 mp_irqs[i].dstirq,
1105 irq_trigger(i), 1105 irq_trigger(i),
1106 irq_polarity(i)); 1106 irq_polarity(i));
1107 return irq; 1107 return irq;
1108 } 1108 }
1109 /* 1109 /*
1110 * Use the first all-but-pin matching entry as a 1110 * Use the first all-but-pin matching entry as a
1111 * best-guess fuzzy result for broken mptables. 1111 * best-guess fuzzy result for broken mptables.
1112 */ 1112 */
1113 if (best_guess < 0) { 1113 if (best_guess < 0) {
1114 set_io_apic_irq_attr(irq_attr, apic, 1114 set_io_apic_irq_attr(irq_attr, apic,
1115 mp_irqs[i].dstirq, 1115 mp_irqs[i].dstirq,
1116 irq_trigger(i), 1116 irq_trigger(i),
1117 irq_polarity(i)); 1117 irq_polarity(i));
1118 best_guess = irq; 1118 best_guess = irq;
1119 } 1119 }
1120 } 1120 }
1121 } 1121 }
1122 return best_guess; 1122 return best_guess;
1123 } 1123 }
1124 EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); 1124 EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
1125 1125
1126 void lock_vector_lock(void) 1126 void lock_vector_lock(void)
1127 { 1127 {
1128 /* Used to the online set of cpus does not change 1128 /* Used to the online set of cpus does not change
1129 * during assign_irq_vector. 1129 * during assign_irq_vector.
1130 */ 1130 */
1131 raw_spin_lock(&vector_lock); 1131 raw_spin_lock(&vector_lock);
1132 } 1132 }
1133 1133
1134 void unlock_vector_lock(void) 1134 void unlock_vector_lock(void)
1135 { 1135 {
1136 raw_spin_unlock(&vector_lock); 1136 raw_spin_unlock(&vector_lock);
1137 } 1137 }
1138 1138
1139 static int 1139 static int
1140 __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) 1140 __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1141 { 1141 {
1142 /* 1142 /*
1143 * NOTE! The local APIC isn't very good at handling 1143 * NOTE! The local APIC isn't very good at handling
1144 * multiple interrupts at the same interrupt level. 1144 * multiple interrupts at the same interrupt level.
1145 * As the interrupt level is determined by taking the 1145 * As the interrupt level is determined by taking the
1146 * vector number and shifting that right by 4, we 1146 * vector number and shifting that right by 4, we
1147 * want to spread these out a bit so that they don't 1147 * want to spread these out a bit so that they don't
1148 * all fall in the same interrupt level. 1148 * all fall in the same interrupt level.
1149 * 1149 *
1150 * Also, we've got to be careful not to trash gate 1150 * Also, we've got to be careful not to trash gate
1151 * 0x80, because int 0x80 is hm, kind of importantish. ;) 1151 * 0x80, because int 0x80 is hm, kind of importantish. ;)
1152 */ 1152 */
1153 static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; 1153 static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
1154 static int current_offset = VECTOR_OFFSET_START % 8; 1154 static int current_offset = VECTOR_OFFSET_START % 8;
1155 unsigned int old_vector; 1155 unsigned int old_vector;
1156 int cpu, err; 1156 int cpu, err;
1157 cpumask_var_t tmp_mask; 1157 cpumask_var_t tmp_mask;
1158 1158
1159 if (cfg->move_in_progress) 1159 if (cfg->move_in_progress)
1160 return -EBUSY; 1160 return -EBUSY;
1161 1161
1162 if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) 1162 if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
1163 return -ENOMEM; 1163 return -ENOMEM;
1164 1164
1165 old_vector = cfg->vector; 1165 old_vector = cfg->vector;
1166 if (old_vector) { 1166 if (old_vector) {
1167 cpumask_and(tmp_mask, mask, cpu_online_mask); 1167 cpumask_and(tmp_mask, mask, cpu_online_mask);
1168 cpumask_and(tmp_mask, cfg->domain, tmp_mask); 1168 cpumask_and(tmp_mask, cfg->domain, tmp_mask);
1169 if (!cpumask_empty(tmp_mask)) { 1169 if (!cpumask_empty(tmp_mask)) {
1170 free_cpumask_var(tmp_mask); 1170 free_cpumask_var(tmp_mask);
1171 return 0; 1171 return 0;
1172 } 1172 }
1173 } 1173 }
1174 1174
1175 /* Only try and allocate irqs on cpus that are present */ 1175 /* Only try and allocate irqs on cpus that are present */
1176 err = -ENOSPC; 1176 err = -ENOSPC;
1177 for_each_cpu_and(cpu, mask, cpu_online_mask) { 1177 for_each_cpu_and(cpu, mask, cpu_online_mask) {
1178 int new_cpu; 1178 int new_cpu;
1179 int vector, offset; 1179 int vector, offset;
1180 1180
1181 apic->vector_allocation_domain(cpu, tmp_mask); 1181 apic->vector_allocation_domain(cpu, tmp_mask);
1182 1182
1183 vector = current_vector; 1183 vector = current_vector;
1184 offset = current_offset; 1184 offset = current_offset;
1185 next: 1185 next:
1186 vector += 8; 1186 vector += 8;
1187 if (vector >= first_system_vector) { 1187 if (vector >= first_system_vector) {
1188 /* If out of vectors on large boxen, must share them. */ 1188 /* If out of vectors on large boxen, must share them. */
1189 offset = (offset + 1) % 8; 1189 offset = (offset + 1) % 8;
1190 vector = FIRST_EXTERNAL_VECTOR + offset; 1190 vector = FIRST_EXTERNAL_VECTOR + offset;
1191 } 1191 }
1192 if (unlikely(current_vector == vector)) 1192 if (unlikely(current_vector == vector))
1193 continue; 1193 continue;
1194 1194
1195 if (test_bit(vector, used_vectors)) 1195 if (test_bit(vector, used_vectors))
1196 goto next; 1196 goto next;
1197 1197
1198 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) 1198 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1199 if (per_cpu(vector_irq, new_cpu)[vector] != -1) 1199 if (per_cpu(vector_irq, new_cpu)[vector] != -1)
1200 goto next; 1200 goto next;
1201 /* Found one! */ 1201 /* Found one! */
1202 current_vector = vector; 1202 current_vector = vector;
1203 current_offset = offset; 1203 current_offset = offset;
1204 if (old_vector) { 1204 if (old_vector) {
1205 cfg->move_in_progress = 1; 1205 cfg->move_in_progress = 1;
1206 cpumask_copy(cfg->old_domain, cfg->domain); 1206 cpumask_copy(cfg->old_domain, cfg->domain);
1207 } 1207 }
1208 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) 1208 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1209 per_cpu(vector_irq, new_cpu)[vector] = irq; 1209 per_cpu(vector_irq, new_cpu)[vector] = irq;
1210 cfg->vector = vector; 1210 cfg->vector = vector;
1211 cpumask_copy(cfg->domain, tmp_mask); 1211 cpumask_copy(cfg->domain, tmp_mask);
1212 err = 0; 1212 err = 0;
1213 break; 1213 break;
1214 } 1214 }
1215 free_cpumask_var(tmp_mask); 1215 free_cpumask_var(tmp_mask);
1216 return err; 1216 return err;
1217 } 1217 }
1218 1218
1219 int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) 1219 int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1220 { 1220 {
1221 int err; 1221 int err;
1222 unsigned long flags; 1222 unsigned long flags;
1223 1223
1224 raw_spin_lock_irqsave(&vector_lock, flags); 1224 raw_spin_lock_irqsave(&vector_lock, flags);
1225 err = __assign_irq_vector(irq, cfg, mask); 1225 err = __assign_irq_vector(irq, cfg, mask);
1226 raw_spin_unlock_irqrestore(&vector_lock, flags); 1226 raw_spin_unlock_irqrestore(&vector_lock, flags);
1227 return err; 1227 return err;
1228 } 1228 }
1229 1229
1230 static void __clear_irq_vector(int irq, struct irq_cfg *cfg) 1230 static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1231 { 1231 {
1232 int cpu, vector; 1232 int cpu, vector;
1233 1233
1234 BUG_ON(!cfg->vector); 1234 BUG_ON(!cfg->vector);
1235 1235
1236 vector = cfg->vector; 1236 vector = cfg->vector;
1237 for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) 1237 for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
1238 per_cpu(vector_irq, cpu)[vector] = -1; 1238 per_cpu(vector_irq, cpu)[vector] = -1;
1239 1239
1240 cfg->vector = 0; 1240 cfg->vector = 0;
1241 cpumask_clear(cfg->domain); 1241 cpumask_clear(cfg->domain);
1242 1242
1243 if (likely(!cfg->move_in_progress)) 1243 if (likely(!cfg->move_in_progress))
1244 return; 1244 return;
1245 for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { 1245 for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
1246 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; 1246 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
1247 vector++) { 1247 vector++) {
1248 if (per_cpu(vector_irq, cpu)[vector] != irq) 1248 if (per_cpu(vector_irq, cpu)[vector] != irq)
1249 continue; 1249 continue;
1250 per_cpu(vector_irq, cpu)[vector] = -1; 1250 per_cpu(vector_irq, cpu)[vector] = -1;
1251 break; 1251 break;
1252 } 1252 }
1253 } 1253 }
1254 cfg->move_in_progress = 0; 1254 cfg->move_in_progress = 0;
1255 } 1255 }
1256 1256
1257 void __setup_vector_irq(int cpu) 1257 void __setup_vector_irq(int cpu)
1258 { 1258 {
1259 /* Initialize vector_irq on a new cpu */ 1259 /* Initialize vector_irq on a new cpu */
1260 int irq, vector; 1260 int irq, vector;
1261 struct irq_cfg *cfg; 1261 struct irq_cfg *cfg;
1262 struct irq_desc *desc; 1262 struct irq_desc *desc;
1263 1263
1264 /* 1264 /*
1265 * vector_lock will make sure that we don't run into irq vector 1265 * vector_lock will make sure that we don't run into irq vector
1266 * assignments that might be happening on another cpu in parallel, 1266 * assignments that might be happening on another cpu in parallel,
1267 * while we setup our initial vector to irq mappings. 1267 * while we setup our initial vector to irq mappings.
1268 */ 1268 */
1269 raw_spin_lock(&vector_lock); 1269 raw_spin_lock(&vector_lock);
1270 /* Mark the inuse vectors */ 1270 /* Mark the inuse vectors */
1271 for_each_irq_desc(irq, desc) { 1271 for_each_irq_desc(irq, desc) {
1272 cfg = desc->chip_data; 1272 cfg = desc->chip_data;
1273 1273
1274 /* 1274 /*
1275 * If it is a legacy IRQ handled by the legacy PIC, this cpu 1275 * If it is a legacy IRQ handled by the legacy PIC, this cpu
1276 * will be part of the irq_cfg's domain. 1276 * will be part of the irq_cfg's domain.
1277 */ 1277 */
1278 if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq)) 1278 if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq))
1279 cpumask_set_cpu(cpu, cfg->domain); 1279 cpumask_set_cpu(cpu, cfg->domain);
1280 1280
1281 if (!cpumask_test_cpu(cpu, cfg->domain)) 1281 if (!cpumask_test_cpu(cpu, cfg->domain))
1282 continue; 1282 continue;
1283 vector = cfg->vector; 1283 vector = cfg->vector;
1284 per_cpu(vector_irq, cpu)[vector] = irq; 1284 per_cpu(vector_irq, cpu)[vector] = irq;
1285 } 1285 }
1286 /* Mark the free vectors */ 1286 /* Mark the free vectors */
1287 for (vector = 0; vector < NR_VECTORS; ++vector) { 1287 for (vector = 0; vector < NR_VECTORS; ++vector) {
1288 irq = per_cpu(vector_irq, cpu)[vector]; 1288 irq = per_cpu(vector_irq, cpu)[vector];
1289 if (irq < 0) 1289 if (irq < 0)
1290 continue; 1290 continue;
1291 1291
1292 cfg = irq_cfg(irq); 1292 cfg = irq_cfg(irq);
1293 if (!cpumask_test_cpu(cpu, cfg->domain)) 1293 if (!cpumask_test_cpu(cpu, cfg->domain))
1294 per_cpu(vector_irq, cpu)[vector] = -1; 1294 per_cpu(vector_irq, cpu)[vector] = -1;
1295 } 1295 }
1296 raw_spin_unlock(&vector_lock); 1296 raw_spin_unlock(&vector_lock);
1297 } 1297 }
1298 1298
1299 static struct irq_chip ioapic_chip; 1299 static struct irq_chip ioapic_chip;
1300 static struct irq_chip ir_ioapic_chip; 1300 static struct irq_chip ir_ioapic_chip;
1301 1301
1302 #define IOAPIC_AUTO -1 1302 #define IOAPIC_AUTO -1
1303 #define IOAPIC_EDGE 0 1303 #define IOAPIC_EDGE 0
1304 #define IOAPIC_LEVEL 1 1304 #define IOAPIC_LEVEL 1
1305 1305
1306 #ifdef CONFIG_X86_32 1306 #ifdef CONFIG_X86_32
1307 static inline int IO_APIC_irq_trigger(int irq) 1307 static inline int IO_APIC_irq_trigger(int irq)
1308 { 1308 {
1309 int apic, idx, pin; 1309 int apic, idx, pin;
1310 1310
1311 for (apic = 0; apic < nr_ioapics; apic++) { 1311 for (apic = 0; apic < nr_ioapics; apic++) {
1312 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1312 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1313 idx = find_irq_entry(apic, pin, mp_INT); 1313 idx = find_irq_entry(apic, pin, mp_INT);
1314 if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) 1314 if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
1315 return irq_trigger(idx); 1315 return irq_trigger(idx);
1316 } 1316 }
1317 } 1317 }
1318 /* 1318 /*
1319 * nonexistent IRQs are edge default 1319 * nonexistent IRQs are edge default
1320 */ 1320 */
1321 return 0; 1321 return 0;
1322 } 1322 }
1323 #else 1323 #else
1324 static inline int IO_APIC_irq_trigger(int irq) 1324 static inline int IO_APIC_irq_trigger(int irq)
1325 { 1325 {
1326 return 1; 1326 return 1;
1327 } 1327 }
1328 #endif 1328 #endif
1329 1329
1330 static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger) 1330 static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
1331 { 1331 {
1332 1332
1333 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1333 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1334 trigger == IOAPIC_LEVEL) 1334 trigger == IOAPIC_LEVEL)
1335 desc->status |= IRQ_LEVEL; 1335 desc->status |= IRQ_LEVEL;
1336 else 1336 else
1337 desc->status &= ~IRQ_LEVEL; 1337 desc->status &= ~IRQ_LEVEL;
1338 1338
1339 if (irq_remapped(irq)) { 1339 if (irq_remapped(irq)) {
1340 desc->status |= IRQ_MOVE_PCNTXT; 1340 desc->status |= IRQ_MOVE_PCNTXT;
1341 if (trigger) 1341 if (trigger)
1342 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, 1342 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
1343 handle_fasteoi_irq, 1343 handle_fasteoi_irq,
1344 "fasteoi"); 1344 "fasteoi");
1345 else 1345 else
1346 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, 1346 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
1347 handle_edge_irq, "edge"); 1347 handle_edge_irq, "edge");
1348 return; 1348 return;
1349 } 1349 }
1350 1350
1351 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1351 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1352 trigger == IOAPIC_LEVEL) 1352 trigger == IOAPIC_LEVEL)
1353 set_irq_chip_and_handler_name(irq, &ioapic_chip, 1353 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1354 handle_fasteoi_irq, 1354 handle_fasteoi_irq,
1355 "fasteoi"); 1355 "fasteoi");
1356 else 1356 else
1357 set_irq_chip_and_handler_name(irq, &ioapic_chip, 1357 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1358 handle_edge_irq, "edge"); 1358 handle_edge_irq, "edge");
1359 } 1359 }
1360 1360
1361 int setup_ioapic_entry(int apic_id, int irq, 1361 int setup_ioapic_entry(int apic_id, int irq,
1362 struct IO_APIC_route_entry *entry, 1362 struct IO_APIC_route_entry *entry,
1363 unsigned int destination, int trigger, 1363 unsigned int destination, int trigger,
1364 int polarity, int vector, int pin) 1364 int polarity, int vector, int pin)
1365 { 1365 {
1366 /* 1366 /*
1367 * add it to the IO-APIC irq-routing table: 1367 * add it to the IO-APIC irq-routing table:
1368 */ 1368 */
1369 memset(entry,0,sizeof(*entry)); 1369 memset(entry,0,sizeof(*entry));
1370 1370
1371 if (intr_remapping_enabled) { 1371 if (intr_remapping_enabled) {
1372 struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); 1372 struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
1373 struct irte irte; 1373 struct irte irte;
1374 struct IR_IO_APIC_route_entry *ir_entry = 1374 struct IR_IO_APIC_route_entry *ir_entry =
1375 (struct IR_IO_APIC_route_entry *) entry; 1375 (struct IR_IO_APIC_route_entry *) entry;
1376 int index; 1376 int index;
1377 1377
1378 if (!iommu) 1378 if (!iommu)
1379 panic("No mapping iommu for ioapic %d\n", apic_id); 1379 panic("No mapping iommu for ioapic %d\n", apic_id);
1380 1380
1381 index = alloc_irte(iommu, irq, 1); 1381 index = alloc_irte(iommu, irq, 1);
1382 if (index < 0) 1382 if (index < 0)
1383 panic("Failed to allocate IRTE for ioapic %d\n", apic_id); 1383 panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
1384 1384
1385 prepare_irte(&irte, vector, destination); 1385 prepare_irte(&irte, vector, destination);
1386 1386
1387 /* Set source-id of interrupt request */ 1387 /* Set source-id of interrupt request */
1388 set_ioapic_sid(&irte, apic_id); 1388 set_ioapic_sid(&irte, apic_id);
1389 1389
1390 modify_irte(irq, &irte); 1390 modify_irte(irq, &irte);
1391 1391
1392 ir_entry->index2 = (index >> 15) & 0x1; 1392 ir_entry->index2 = (index >> 15) & 0x1;
1393 ir_entry->zero = 0; 1393 ir_entry->zero = 0;
1394 ir_entry->format = 1; 1394 ir_entry->format = 1;
1395 ir_entry->index = (index & 0x7fff); 1395 ir_entry->index = (index & 0x7fff);
1396 /* 1396 /*
1397 * IO-APIC RTE will be configured with virtual vector. 1397 * IO-APIC RTE will be configured with virtual vector.
1398 * irq handler will do the explicit EOI to the io-apic. 1398 * irq handler will do the explicit EOI to the io-apic.
1399 */ 1399 */
1400 ir_entry->vector = pin; 1400 ir_entry->vector = pin;
1401 } else { 1401 } else {
1402 entry->delivery_mode = apic->irq_delivery_mode; 1402 entry->delivery_mode = apic->irq_delivery_mode;
1403 entry->dest_mode = apic->irq_dest_mode; 1403 entry->dest_mode = apic->irq_dest_mode;
1404 entry->dest = destination; 1404 entry->dest = destination;
1405 entry->vector = vector; 1405 entry->vector = vector;
1406 } 1406 }
1407 1407
1408 entry->mask = 0; /* enable IRQ */ 1408 entry->mask = 0; /* enable IRQ */
1409 entry->trigger = trigger; 1409 entry->trigger = trigger;
1410 entry->polarity = polarity; 1410 entry->polarity = polarity;
1411 1411
1412 /* Mask level triggered irqs. 1412 /* Mask level triggered irqs.
1413 * Use IRQ_DELAYED_DISABLE for edge triggered irqs. 1413 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1414 */ 1414 */
1415 if (trigger) 1415 if (trigger)
1416 entry->mask = 1; 1416 entry->mask = 1;
1417 return 0; 1417 return 0;
1418 } 1418 }
1419 1419
1420 static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc, 1420 static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc,
1421 int trigger, int polarity) 1421 int trigger, int polarity)
1422 { 1422 {
1423 struct irq_cfg *cfg; 1423 struct irq_cfg *cfg;
1424 struct IO_APIC_route_entry entry; 1424 struct IO_APIC_route_entry entry;
1425 unsigned int dest; 1425 unsigned int dest;
1426 1426
1427 if (!IO_APIC_IRQ(irq)) 1427 if (!IO_APIC_IRQ(irq))
1428 return; 1428 return;
1429 1429
1430 cfg = desc->chip_data; 1430 cfg = desc->chip_data;
1431 1431
1432 /* 1432 /*
1433 * For legacy irqs, cfg->domain starts with cpu 0 for legacy 1433 * For legacy irqs, cfg->domain starts with cpu 0 for legacy
1434 * controllers like 8259. Now that IO-APIC can handle this irq, update 1434 * controllers like 8259. Now that IO-APIC can handle this irq, update
1435 * the cfg->domain. 1435 * the cfg->domain.
1436 */ 1436 */
1437 if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) 1437 if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
1438 apic->vector_allocation_domain(0, cfg->domain); 1438 apic->vector_allocation_domain(0, cfg->domain);
1439 1439
1440 if (assign_irq_vector(irq, cfg, apic->target_cpus())) 1440 if (assign_irq_vector(irq, cfg, apic->target_cpus()))
1441 return; 1441 return;
1442 1442
1443 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); 1443 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
1444 1444
1445 apic_printk(APIC_VERBOSE,KERN_DEBUG 1445 apic_printk(APIC_VERBOSE,KERN_DEBUG
1446 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1446 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1447 "IRQ %d Mode:%i Active:%i)\n", 1447 "IRQ %d Mode:%i Active:%i)\n",
1448 apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector, 1448 apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector,
1449 irq, trigger, polarity); 1449 irq, trigger, polarity);
1450 1450
1451 1451
1452 if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry, 1452 if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry,
1453 dest, trigger, polarity, cfg->vector, pin)) { 1453 dest, trigger, polarity, cfg->vector, pin)) {
1454 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1454 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1455 mp_ioapics[apic_id].apicid, pin); 1455 mp_ioapics[apic_id].apicid, pin);
1456 __clear_irq_vector(irq, cfg); 1456 __clear_irq_vector(irq, cfg);
1457 return; 1457 return;
1458 } 1458 }
1459 1459
1460 ioapic_register_intr(irq, desc, trigger); 1460 ioapic_register_intr(irq, desc, trigger);
1461 if (irq < legacy_pic->nr_legacy_irqs) 1461 if (irq < legacy_pic->nr_legacy_irqs)
1462 legacy_pic->chip->mask(irq); 1462 legacy_pic->mask(irq);
1463 1463
1464 ioapic_write_entry(apic_id, pin, entry); 1464 ioapic_write_entry(apic_id, pin, entry);
1465 } 1465 }
1466 1466
1467 static struct { 1467 static struct {
1468 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); 1468 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
1469 } mp_ioapic_routing[MAX_IO_APICS]; 1469 } mp_ioapic_routing[MAX_IO_APICS];
1470 1470
1471 static void __init setup_IO_APIC_irqs(void) 1471 static void __init setup_IO_APIC_irqs(void)
1472 { 1472 {
1473 int apic_id, pin, idx, irq; 1473 int apic_id, pin, idx, irq;
1474 int notcon = 0; 1474 int notcon = 0;
1475 struct irq_desc *desc; 1475 struct irq_desc *desc;
1476 struct irq_cfg *cfg; 1476 struct irq_cfg *cfg;
1477 int node = cpu_to_node(0); 1477 int node = cpu_to_node(0);
1478 1478
1479 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1479 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1480 1480
1481 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) 1481 for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
1482 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { 1482 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
1483 idx = find_irq_entry(apic_id, pin, mp_INT); 1483 idx = find_irq_entry(apic_id, pin, mp_INT);
1484 if (idx == -1) { 1484 if (idx == -1) {
1485 if (!notcon) { 1485 if (!notcon) {
1486 notcon = 1; 1486 notcon = 1;
1487 apic_printk(APIC_VERBOSE, 1487 apic_printk(APIC_VERBOSE,
1488 KERN_DEBUG " %d-%d", 1488 KERN_DEBUG " %d-%d",
1489 mp_ioapics[apic_id].apicid, pin); 1489 mp_ioapics[apic_id].apicid, pin);
1490 } else 1490 } else
1491 apic_printk(APIC_VERBOSE, " %d-%d", 1491 apic_printk(APIC_VERBOSE, " %d-%d",
1492 mp_ioapics[apic_id].apicid, pin); 1492 mp_ioapics[apic_id].apicid, pin);
1493 continue; 1493 continue;
1494 } 1494 }
1495 if (notcon) { 1495 if (notcon) {
1496 apic_printk(APIC_VERBOSE, 1496 apic_printk(APIC_VERBOSE,
1497 " (apicid-pin) not connected\n"); 1497 " (apicid-pin) not connected\n");
1498 notcon = 0; 1498 notcon = 0;
1499 } 1499 }
1500 1500
1501 irq = pin_2_irq(idx, apic_id, pin); 1501 irq = pin_2_irq(idx, apic_id, pin);
1502 1502
1503 if ((apic_id > 0) && (irq > 16)) 1503 if ((apic_id > 0) && (irq > 16))
1504 continue; 1504 continue;
1505 1505
1506 /* 1506 /*
1507 * Skip the timer IRQ if there's a quirk handler 1507 * Skip the timer IRQ if there's a quirk handler
1508 * installed and if it returns 1: 1508 * installed and if it returns 1:
1509 */ 1509 */
1510 if (apic->multi_timer_check && 1510 if (apic->multi_timer_check &&
1511 apic->multi_timer_check(apic_id, irq)) 1511 apic->multi_timer_check(apic_id, irq))
1512 continue; 1512 continue;
1513 1513
1514 desc = irq_to_desc_alloc_node(irq, node); 1514 desc = irq_to_desc_alloc_node(irq, node);
1515 if (!desc) { 1515 if (!desc) {
1516 printk(KERN_INFO "can not get irq_desc for %d\n", irq); 1516 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1517 continue; 1517 continue;
1518 } 1518 }
1519 cfg = desc->chip_data; 1519 cfg = desc->chip_data;
1520 add_pin_to_irq_node(cfg, node, apic_id, pin); 1520 add_pin_to_irq_node(cfg, node, apic_id, pin);
1521 /* 1521 /*
1522 * don't mark it in pin_programmed, so later acpi could 1522 * don't mark it in pin_programmed, so later acpi could
1523 * set it correctly when irq < 16 1523 * set it correctly when irq < 16
1524 */ 1524 */
1525 setup_IO_APIC_irq(apic_id, pin, irq, desc, 1525 setup_IO_APIC_irq(apic_id, pin, irq, desc,
1526 irq_trigger(idx), irq_polarity(idx)); 1526 irq_trigger(idx), irq_polarity(idx));
1527 } 1527 }
1528 1528
1529 if (notcon) 1529 if (notcon)
1530 apic_printk(APIC_VERBOSE, 1530 apic_printk(APIC_VERBOSE,
1531 " (apicid-pin) not connected\n"); 1531 " (apicid-pin) not connected\n");
1532 } 1532 }
1533 1533
1534 /* 1534 /*
1535 * for the gsit that is not in first ioapic 1535 * for the gsit that is not in first ioapic
1536 * but could not use acpi_register_gsi() 1536 * but could not use acpi_register_gsi()
1537 * like some special sci in IBM x3330 1537 * like some special sci in IBM x3330
1538 */ 1538 */
1539 void setup_IO_APIC_irq_extra(u32 gsi) 1539 void setup_IO_APIC_irq_extra(u32 gsi)
1540 { 1540 {
1541 int apic_id = 0, pin, idx, irq; 1541 int apic_id = 0, pin, idx, irq;
1542 int node = cpu_to_node(0); 1542 int node = cpu_to_node(0);
1543 struct irq_desc *desc; 1543 struct irq_desc *desc;
1544 struct irq_cfg *cfg; 1544 struct irq_cfg *cfg;
1545 1545
1546 /* 1546 /*
1547 * Convert 'gsi' to 'ioapic.pin'. 1547 * Convert 'gsi' to 'ioapic.pin'.
1548 */ 1548 */
1549 apic_id = mp_find_ioapic(gsi); 1549 apic_id = mp_find_ioapic(gsi);
1550 if (apic_id < 0) 1550 if (apic_id < 0)
1551 return; 1551 return;
1552 1552
1553 pin = mp_find_ioapic_pin(apic_id, gsi); 1553 pin = mp_find_ioapic_pin(apic_id, gsi);
1554 idx = find_irq_entry(apic_id, pin, mp_INT); 1554 idx = find_irq_entry(apic_id, pin, mp_INT);
1555 if (idx == -1) 1555 if (idx == -1)
1556 return; 1556 return;
1557 1557
1558 irq = pin_2_irq(idx, apic_id, pin); 1558 irq = pin_2_irq(idx, apic_id, pin);
1559 #ifdef CONFIG_SPARSE_IRQ 1559 #ifdef CONFIG_SPARSE_IRQ
1560 desc = irq_to_desc(irq); 1560 desc = irq_to_desc(irq);
1561 if (desc) 1561 if (desc)
1562 return; 1562 return;
1563 #endif 1563 #endif
1564 desc = irq_to_desc_alloc_node(irq, node); 1564 desc = irq_to_desc_alloc_node(irq, node);
1565 if (!desc) { 1565 if (!desc) {
1566 printk(KERN_INFO "can not get irq_desc for %d\n", irq); 1566 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1567 return; 1567 return;
1568 } 1568 }
1569 1569
1570 cfg = desc->chip_data; 1570 cfg = desc->chip_data;
1571 add_pin_to_irq_node(cfg, node, apic_id, pin); 1571 add_pin_to_irq_node(cfg, node, apic_id, pin);
1572 1572
1573 if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { 1573 if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
1574 pr_debug("Pin %d-%d already programmed\n", 1574 pr_debug("Pin %d-%d already programmed\n",
1575 mp_ioapics[apic_id].apicid, pin); 1575 mp_ioapics[apic_id].apicid, pin);
1576 return; 1576 return;
1577 } 1577 }
1578 set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); 1578 set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
1579 1579
1580 setup_IO_APIC_irq(apic_id, pin, irq, desc, 1580 setup_IO_APIC_irq(apic_id, pin, irq, desc,
1581 irq_trigger(idx), irq_polarity(idx)); 1581 irq_trigger(idx), irq_polarity(idx));
1582 } 1582 }
1583 1583
1584 /* 1584 /*
1585 * Set up the timer pin, possibly with the 8259A-master behind. 1585 * Set up the timer pin, possibly with the 8259A-master behind.
1586 */ 1586 */
1587 static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, 1587 static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
1588 int vector) 1588 int vector)
1589 { 1589 {
1590 struct IO_APIC_route_entry entry; 1590 struct IO_APIC_route_entry entry;
1591 1591
1592 if (intr_remapping_enabled) 1592 if (intr_remapping_enabled)
1593 return; 1593 return;
1594 1594
1595 memset(&entry, 0, sizeof(entry)); 1595 memset(&entry, 0, sizeof(entry));
1596 1596
1597 /* 1597 /*
1598 * We use logical delivery to get the timer IRQ 1598 * We use logical delivery to get the timer IRQ
1599 * to the first CPU. 1599 * to the first CPU.
1600 */ 1600 */
1601 entry.dest_mode = apic->irq_dest_mode; 1601 entry.dest_mode = apic->irq_dest_mode;
1602 entry.mask = 0; /* don't mask IRQ for edge */ 1602 entry.mask = 0; /* don't mask IRQ for edge */
1603 entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); 1603 entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus());
1604 entry.delivery_mode = apic->irq_delivery_mode; 1604 entry.delivery_mode = apic->irq_delivery_mode;
1605 entry.polarity = 0; 1605 entry.polarity = 0;
1606 entry.trigger = 0; 1606 entry.trigger = 0;
1607 entry.vector = vector; 1607 entry.vector = vector;
1608 1608
1609 /* 1609 /*
1610 * The timer IRQ doesn't have to know that behind the 1610 * The timer IRQ doesn't have to know that behind the
1611 * scene we may have a 8259A-master in AEOI mode ... 1611 * scene we may have a 8259A-master in AEOI mode ...
1612 */ 1612 */
1613 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); 1613 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
1614 1614
1615 /* 1615 /*
1616 * Add it to the IO-APIC irq-routing table: 1616 * Add it to the IO-APIC irq-routing table:
1617 */ 1617 */
1618 ioapic_write_entry(apic_id, pin, entry); 1618 ioapic_write_entry(apic_id, pin, entry);
1619 } 1619 }
1620 1620
1621 1621
1622 __apicdebuginit(void) print_IO_APIC(void) 1622 __apicdebuginit(void) print_IO_APIC(void)
1623 { 1623 {
1624 int apic, i; 1624 int apic, i;
1625 union IO_APIC_reg_00 reg_00; 1625 union IO_APIC_reg_00 reg_00;
1626 union IO_APIC_reg_01 reg_01; 1626 union IO_APIC_reg_01 reg_01;
1627 union IO_APIC_reg_02 reg_02; 1627 union IO_APIC_reg_02 reg_02;
1628 union IO_APIC_reg_03 reg_03; 1628 union IO_APIC_reg_03 reg_03;
1629 unsigned long flags; 1629 unsigned long flags;
1630 struct irq_cfg *cfg; 1630 struct irq_cfg *cfg;
1631 struct irq_desc *desc; 1631 struct irq_desc *desc;
1632 unsigned int irq; 1632 unsigned int irq;
1633 1633
1634 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 1634 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1635 for (i = 0; i < nr_ioapics; i++) 1635 for (i = 0; i < nr_ioapics; i++)
1636 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 1636 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1637 mp_ioapics[i].apicid, nr_ioapic_registers[i]); 1637 mp_ioapics[i].apicid, nr_ioapic_registers[i]);
1638 1638
1639 /* 1639 /*
1640 * We are a bit conservative about what we expect. We have to 1640 * We are a bit conservative about what we expect. We have to
1641 * know about every hardware change ASAP. 1641 * know about every hardware change ASAP.
1642 */ 1642 */
1643 printk(KERN_INFO "testing the IO APIC.......................\n"); 1643 printk(KERN_INFO "testing the IO APIC.......................\n");
1644 1644
1645 for (apic = 0; apic < nr_ioapics; apic++) { 1645 for (apic = 0; apic < nr_ioapics; apic++) {
1646 1646
1647 raw_spin_lock_irqsave(&ioapic_lock, flags); 1647 raw_spin_lock_irqsave(&ioapic_lock, flags);
1648 reg_00.raw = io_apic_read(apic, 0); 1648 reg_00.raw = io_apic_read(apic, 0);
1649 reg_01.raw = io_apic_read(apic, 1); 1649 reg_01.raw = io_apic_read(apic, 1);
1650 if (reg_01.bits.version >= 0x10) 1650 if (reg_01.bits.version >= 0x10)
1651 reg_02.raw = io_apic_read(apic, 2); 1651 reg_02.raw = io_apic_read(apic, 2);
1652 if (reg_01.bits.version >= 0x20) 1652 if (reg_01.bits.version >= 0x20)
1653 reg_03.raw = io_apic_read(apic, 3); 1653 reg_03.raw = io_apic_read(apic, 3);
1654 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 1654 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1655 1655
1656 printk("\n"); 1656 printk("\n");
1657 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); 1657 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
1658 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1658 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1659 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1659 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1660 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); 1660 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
1661 printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); 1661 printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
1662 1662
1663 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01); 1663 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
1664 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); 1664 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
1665 1665
1666 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); 1666 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
1667 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); 1667 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
1668 1668
1669 /* 1669 /*
1670 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, 1670 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
1671 * but the value of reg_02 is read as the previous read register 1671 * but the value of reg_02 is read as the previous read register
1672 * value, so ignore it if reg_02 == reg_01. 1672 * value, so ignore it if reg_02 == reg_01.
1673 */ 1673 */
1674 if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { 1674 if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
1675 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); 1675 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
1676 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); 1676 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
1677 } 1677 }
1678 1678
1679 /* 1679 /*
1680 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 1680 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
1681 * or reg_03, but the value of reg_0[23] is read as the previous read 1681 * or reg_03, but the value of reg_0[23] is read as the previous read
1682 * register value, so ignore it if reg_03 == reg_0[12]. 1682 * register value, so ignore it if reg_03 == reg_0[12].
1683 */ 1683 */
1684 if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && 1684 if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
1685 reg_03.raw != reg_01.raw) { 1685 reg_03.raw != reg_01.raw) {
1686 printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); 1686 printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
1687 printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); 1687 printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
1688 } 1688 }
1689 1689
1690 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 1690 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1691 1691
1692 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" 1692 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
1693 " Stat Dmod Deli Vect:\n"); 1693 " Stat Dmod Deli Vect:\n");
1694 1694
1695 for (i = 0; i <= reg_01.bits.entries; i++) { 1695 for (i = 0; i <= reg_01.bits.entries; i++) {
1696 struct IO_APIC_route_entry entry; 1696 struct IO_APIC_route_entry entry;
1697 1697
1698 entry = ioapic_read_entry(apic, i); 1698 entry = ioapic_read_entry(apic, i);
1699 1699
1700 printk(KERN_DEBUG " %02x %03X ", 1700 printk(KERN_DEBUG " %02x %03X ",
1701 i, 1701 i,
1702 entry.dest 1702 entry.dest
1703 ); 1703 );
1704 1704
1705 printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", 1705 printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
1706 entry.mask, 1706 entry.mask,
1707 entry.trigger, 1707 entry.trigger,
1708 entry.irr, 1708 entry.irr,
1709 entry.polarity, 1709 entry.polarity,
1710 entry.delivery_status, 1710 entry.delivery_status,
1711 entry.dest_mode, 1711 entry.dest_mode,
1712 entry.delivery_mode, 1712 entry.delivery_mode,
1713 entry.vector 1713 entry.vector
1714 ); 1714 );
1715 } 1715 }
1716 } 1716 }
1717 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 1717 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1718 for_each_irq_desc(irq, desc) { 1718 for_each_irq_desc(irq, desc) {
1719 struct irq_pin_list *entry; 1719 struct irq_pin_list *entry;
1720 1720
1721 cfg = desc->chip_data; 1721 cfg = desc->chip_data;
1722 if (!cfg) 1722 if (!cfg)
1723 continue; 1723 continue;
1724 entry = cfg->irq_2_pin; 1724 entry = cfg->irq_2_pin;
1725 if (!entry) 1725 if (!entry)
1726 continue; 1726 continue;
1727 printk(KERN_DEBUG "IRQ%d ", irq); 1727 printk(KERN_DEBUG "IRQ%d ", irq);
1728 for_each_irq_pin(entry, cfg->irq_2_pin) 1728 for_each_irq_pin(entry, cfg->irq_2_pin)
1729 printk("-> %d:%d", entry->apic, entry->pin); 1729 printk("-> %d:%d", entry->apic, entry->pin);
1730 printk("\n"); 1730 printk("\n");
1731 } 1731 }
1732 1732
1733 printk(KERN_INFO ".................................... done.\n"); 1733 printk(KERN_INFO ".................................... done.\n");
1734 1734
1735 return; 1735 return;
1736 } 1736 }
1737 1737
1738 __apicdebuginit(void) print_APIC_field(int base) 1738 __apicdebuginit(void) print_APIC_field(int base)
1739 { 1739 {
1740 int i; 1740 int i;
1741 1741
1742 printk(KERN_DEBUG); 1742 printk(KERN_DEBUG);
1743 1743
1744 for (i = 0; i < 8; i++) 1744 for (i = 0; i < 8; i++)
1745 printk(KERN_CONT "%08x", apic_read(base + i*0x10)); 1745 printk(KERN_CONT "%08x", apic_read(base + i*0x10));
1746 1746
1747 printk(KERN_CONT "\n"); 1747 printk(KERN_CONT "\n");
1748 } 1748 }
1749 1749
1750 __apicdebuginit(void) print_local_APIC(void *dummy) 1750 __apicdebuginit(void) print_local_APIC(void *dummy)
1751 { 1751 {
1752 unsigned int i, v, ver, maxlvt; 1752 unsigned int i, v, ver, maxlvt;
1753 u64 icr; 1753 u64 icr;
1754 1754
1755 printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1755 printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1756 smp_processor_id(), hard_smp_processor_id()); 1756 smp_processor_id(), hard_smp_processor_id());
1757 v = apic_read(APIC_ID); 1757 v = apic_read(APIC_ID);
1758 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); 1758 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
1759 v = apic_read(APIC_LVR); 1759 v = apic_read(APIC_LVR);
1760 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1760 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1761 ver = GET_APIC_VERSION(v); 1761 ver = GET_APIC_VERSION(v);
1762 maxlvt = lapic_get_maxlvt(); 1762 maxlvt = lapic_get_maxlvt();
1763 1763
1764 v = apic_read(APIC_TASKPRI); 1764 v = apic_read(APIC_TASKPRI);
1765 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); 1765 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
1766 1766
1767 if (APIC_INTEGRATED(ver)) { /* !82489DX */ 1767 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1768 if (!APIC_XAPIC(ver)) { 1768 if (!APIC_XAPIC(ver)) {
1769 v = apic_read(APIC_ARBPRI); 1769 v = apic_read(APIC_ARBPRI);
1770 printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, 1770 printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
1771 v & APIC_ARBPRI_MASK); 1771 v & APIC_ARBPRI_MASK);
1772 } 1772 }
1773 v = apic_read(APIC_PROCPRI); 1773 v = apic_read(APIC_PROCPRI);
1774 printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); 1774 printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
1775 } 1775 }
1776 1776
1777 /* 1777 /*
1778 * Remote read supported only in the 82489DX and local APIC for 1778 * Remote read supported only in the 82489DX and local APIC for
1779 * Pentium processors. 1779 * Pentium processors.
1780 */ 1780 */
1781 if (!APIC_INTEGRATED(ver) || maxlvt == 3) { 1781 if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
1782 v = apic_read(APIC_RRR); 1782 v = apic_read(APIC_RRR);
1783 printk(KERN_DEBUG "... APIC RRR: %08x\n", v); 1783 printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
1784 } 1784 }
1785 1785
1786 v = apic_read(APIC_LDR); 1786 v = apic_read(APIC_LDR);
1787 printk(KERN_DEBUG "... APIC LDR: %08x\n", v); 1787 printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
1788 if (!x2apic_enabled()) { 1788 if (!x2apic_enabled()) {
1789 v = apic_read(APIC_DFR); 1789 v = apic_read(APIC_DFR);
1790 printk(KERN_DEBUG "... APIC DFR: %08x\n", v); 1790 printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
1791 } 1791 }
1792 v = apic_read(APIC_SPIV); 1792 v = apic_read(APIC_SPIV);
1793 printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); 1793 printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
1794 1794
1795 printk(KERN_DEBUG "... APIC ISR field:\n"); 1795 printk(KERN_DEBUG "... APIC ISR field:\n");
1796 print_APIC_field(APIC_ISR); 1796 print_APIC_field(APIC_ISR);
1797 printk(KERN_DEBUG "... APIC TMR field:\n"); 1797 printk(KERN_DEBUG "... APIC TMR field:\n");
1798 print_APIC_field(APIC_TMR); 1798 print_APIC_field(APIC_TMR);
1799 printk(KERN_DEBUG "... APIC IRR field:\n"); 1799 printk(KERN_DEBUG "... APIC IRR field:\n");
1800 print_APIC_field(APIC_IRR); 1800 print_APIC_field(APIC_IRR);
1801 1801
1802 if (APIC_INTEGRATED(ver)) { /* !82489DX */ 1802 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1803 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 1803 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1804 apic_write(APIC_ESR, 0); 1804 apic_write(APIC_ESR, 0);
1805 1805
1806 v = apic_read(APIC_ESR); 1806 v = apic_read(APIC_ESR);
1807 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1807 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1808 } 1808 }
1809 1809
1810 icr = apic_icr_read(); 1810 icr = apic_icr_read();
1811 printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); 1811 printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
1812 printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32)); 1812 printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
1813 1813
1814 v = apic_read(APIC_LVTT); 1814 v = apic_read(APIC_LVTT);
1815 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 1815 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
1816 1816
1817 if (maxlvt > 3) { /* PC is LVT#4. */ 1817 if (maxlvt > 3) { /* PC is LVT#4. */
1818 v = apic_read(APIC_LVTPC); 1818 v = apic_read(APIC_LVTPC);
1819 printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); 1819 printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
1820 } 1820 }
1821 v = apic_read(APIC_LVT0); 1821 v = apic_read(APIC_LVT0);
1822 printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); 1822 printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
1823 v = apic_read(APIC_LVT1); 1823 v = apic_read(APIC_LVT1);
1824 printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); 1824 printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
1825 1825
1826 if (maxlvt > 2) { /* ERR is LVT#3. */ 1826 if (maxlvt > 2) { /* ERR is LVT#3. */
1827 v = apic_read(APIC_LVTERR); 1827 v = apic_read(APIC_LVTERR);
1828 printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); 1828 printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
1829 } 1829 }
1830 1830
1831 v = apic_read(APIC_TMICT); 1831 v = apic_read(APIC_TMICT);
1832 printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); 1832 printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
1833 v = apic_read(APIC_TMCCT); 1833 v = apic_read(APIC_TMCCT);
1834 printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); 1834 printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
1835 v = apic_read(APIC_TDCR); 1835 v = apic_read(APIC_TDCR);
1836 printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); 1836 printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
1837 1837
1838 if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { 1838 if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
1839 v = apic_read(APIC_EFEAT); 1839 v = apic_read(APIC_EFEAT);
1840 maxlvt = (v >> 16) & 0xff; 1840 maxlvt = (v >> 16) & 0xff;
1841 printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v); 1841 printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
1842 v = apic_read(APIC_ECTRL); 1842 v = apic_read(APIC_ECTRL);
1843 printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v); 1843 printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
1844 for (i = 0; i < maxlvt; i++) { 1844 for (i = 0; i < maxlvt; i++) {
1845 v = apic_read(APIC_EILVTn(i)); 1845 v = apic_read(APIC_EILVTn(i));
1846 printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); 1846 printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
1847 } 1847 }
1848 } 1848 }
1849 printk("\n"); 1849 printk("\n");
1850 } 1850 }
1851 1851
1852 __apicdebuginit(void) print_local_APICs(int maxcpu) 1852 __apicdebuginit(void) print_local_APICs(int maxcpu)
1853 { 1853 {
1854 int cpu; 1854 int cpu;
1855 1855
1856 if (!maxcpu) 1856 if (!maxcpu)
1857 return; 1857 return;
1858 1858
1859 preempt_disable(); 1859 preempt_disable();
1860 for_each_online_cpu(cpu) { 1860 for_each_online_cpu(cpu) {
1861 if (cpu >= maxcpu) 1861 if (cpu >= maxcpu)
1862 break; 1862 break;
1863 smp_call_function_single(cpu, print_local_APIC, NULL, 1); 1863 smp_call_function_single(cpu, print_local_APIC, NULL, 1);
1864 } 1864 }
1865 preempt_enable(); 1865 preempt_enable();
1866 } 1866 }
1867 1867
1868 __apicdebuginit(void) print_PIC(void) 1868 __apicdebuginit(void) print_PIC(void)
1869 { 1869 {
1870 unsigned int v; 1870 unsigned int v;
1871 unsigned long flags; 1871 unsigned long flags;
1872 1872
1873 if (!legacy_pic->nr_legacy_irqs) 1873 if (!legacy_pic->nr_legacy_irqs)
1874 return; 1874 return;
1875 1875
1876 printk(KERN_DEBUG "\nprinting PIC contents\n"); 1876 printk(KERN_DEBUG "\nprinting PIC contents\n");
1877 1877
1878 raw_spin_lock_irqsave(&i8259A_lock, flags); 1878 raw_spin_lock_irqsave(&i8259A_lock, flags);
1879 1879
1880 v = inb(0xa1) << 8 | inb(0x21); 1880 v = inb(0xa1) << 8 | inb(0x21);
1881 printk(KERN_DEBUG "... PIC IMR: %04x\n", v); 1881 printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
1882 1882
1883 v = inb(0xa0) << 8 | inb(0x20); 1883 v = inb(0xa0) << 8 | inb(0x20);
1884 printk(KERN_DEBUG "... PIC IRR: %04x\n", v); 1884 printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
1885 1885
1886 outb(0x0b,0xa0); 1886 outb(0x0b,0xa0);
1887 outb(0x0b,0x20); 1887 outb(0x0b,0x20);
1888 v = inb(0xa0) << 8 | inb(0x20); 1888 v = inb(0xa0) << 8 | inb(0x20);
1889 outb(0x0a,0xa0); 1889 outb(0x0a,0xa0);
1890 outb(0x0a,0x20); 1890 outb(0x0a,0x20);
1891 1891
1892 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 1892 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
1893 1893
1894 printk(KERN_DEBUG "... PIC ISR: %04x\n", v); 1894 printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
1895 1895
1896 v = inb(0x4d1) << 8 | inb(0x4d0); 1896 v = inb(0x4d1) << 8 | inb(0x4d0);
1897 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); 1897 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1898 } 1898 }
1899 1899
1900 static int __initdata show_lapic = 1; 1900 static int __initdata show_lapic = 1;
1901 static __init int setup_show_lapic(char *arg) 1901 static __init int setup_show_lapic(char *arg)
1902 { 1902 {
1903 int num = -1; 1903 int num = -1;
1904 1904
1905 if (strcmp(arg, "all") == 0) { 1905 if (strcmp(arg, "all") == 0) {
1906 show_lapic = CONFIG_NR_CPUS; 1906 show_lapic = CONFIG_NR_CPUS;
1907 } else { 1907 } else {
1908 get_option(&arg, &num); 1908 get_option(&arg, &num);
1909 if (num >= 0) 1909 if (num >= 0)
1910 show_lapic = num; 1910 show_lapic = num;
1911 } 1911 }
1912 1912
1913 return 1; 1913 return 1;
1914 } 1914 }
1915 __setup("show_lapic=", setup_show_lapic); 1915 __setup("show_lapic=", setup_show_lapic);
1916 1916
1917 __apicdebuginit(int) print_ICs(void) 1917 __apicdebuginit(int) print_ICs(void)
1918 { 1918 {
1919 if (apic_verbosity == APIC_QUIET) 1919 if (apic_verbosity == APIC_QUIET)
1920 return 0; 1920 return 0;
1921 1921
1922 print_PIC(); 1922 print_PIC();
1923 1923
1924 /* don't print out if apic is not there */ 1924 /* don't print out if apic is not there */
1925 if (!cpu_has_apic && !apic_from_smp_config()) 1925 if (!cpu_has_apic && !apic_from_smp_config())
1926 return 0; 1926 return 0;
1927 1927
1928 print_local_APICs(show_lapic); 1928 print_local_APICs(show_lapic);
1929 print_IO_APIC(); 1929 print_IO_APIC();
1930 1930
1931 return 0; 1931 return 0;
1932 } 1932 }
1933 1933
1934 fs_initcall(print_ICs); 1934 fs_initcall(print_ICs);
1935 1935
1936 1936
1937 /* Where if anywhere is the i8259 connect in external int mode */ 1937 /* Where if anywhere is the i8259 connect in external int mode */
1938 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 1938 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
1939 1939
1940 void __init enable_IO_APIC(void) 1940 void __init enable_IO_APIC(void)
1941 { 1941 {
1942 int i8259_apic, i8259_pin; 1942 int i8259_apic, i8259_pin;
1943 int apic; 1943 int apic;
1944 1944
1945 if (!legacy_pic->nr_legacy_irqs) 1945 if (!legacy_pic->nr_legacy_irqs)
1946 return; 1946 return;
1947 1947
1948 for(apic = 0; apic < nr_ioapics; apic++) { 1948 for(apic = 0; apic < nr_ioapics; apic++) {
1949 int pin; 1949 int pin;
1950 /* See if any of the pins is in ExtINT mode */ 1950 /* See if any of the pins is in ExtINT mode */
1951 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1951 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1952 struct IO_APIC_route_entry entry; 1952 struct IO_APIC_route_entry entry;
1953 entry = ioapic_read_entry(apic, pin); 1953 entry = ioapic_read_entry(apic, pin);
1954 1954
1955 /* If the interrupt line is enabled and in ExtInt mode 1955 /* If the interrupt line is enabled and in ExtInt mode
1956 * I have found the pin where the i8259 is connected. 1956 * I have found the pin where the i8259 is connected.
1957 */ 1957 */
1958 if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { 1958 if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
1959 ioapic_i8259.apic = apic; 1959 ioapic_i8259.apic = apic;
1960 ioapic_i8259.pin = pin; 1960 ioapic_i8259.pin = pin;
1961 goto found_i8259; 1961 goto found_i8259;
1962 } 1962 }
1963 } 1963 }
1964 } 1964 }
1965 found_i8259: 1965 found_i8259:
1966 /* Look to see what if the MP table has reported the ExtINT */ 1966 /* Look to see what if the MP table has reported the ExtINT */
1967 /* If we could not find the appropriate pin by looking at the ioapic 1967 /* If we could not find the appropriate pin by looking at the ioapic
1968 * the i8259 probably is not connected the ioapic but give the 1968 * the i8259 probably is not connected the ioapic but give the
1969 * mptable a chance anyway. 1969 * mptable a chance anyway.
1970 */ 1970 */
1971 i8259_pin = find_isa_irq_pin(0, mp_ExtINT); 1971 i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
1972 i8259_apic = find_isa_irq_apic(0, mp_ExtINT); 1972 i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
1973 /* Trust the MP table if nothing is setup in the hardware */ 1973 /* Trust the MP table if nothing is setup in the hardware */
1974 if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { 1974 if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
1975 printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); 1975 printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
1976 ioapic_i8259.pin = i8259_pin; 1976 ioapic_i8259.pin = i8259_pin;
1977 ioapic_i8259.apic = i8259_apic; 1977 ioapic_i8259.apic = i8259_apic;
1978 } 1978 }
1979 /* Complain if the MP table and the hardware disagree */ 1979 /* Complain if the MP table and the hardware disagree */
1980 if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && 1980 if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
1981 (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) 1981 (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
1982 { 1982 {
1983 printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); 1983 printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
1984 } 1984 }
1985 1985
1986 /* 1986 /*
1987 * Do not trust the IO-APIC being empty at bootup 1987 * Do not trust the IO-APIC being empty at bootup
1988 */ 1988 */
1989 clear_IO_APIC(); 1989 clear_IO_APIC();
1990 } 1990 }
1991 1991
1992 /* 1992 /*
1993 * Not an __init, needed by the reboot code 1993 * Not an __init, needed by the reboot code
1994 */ 1994 */
1995 void disable_IO_APIC(void) 1995 void disable_IO_APIC(void)
1996 { 1996 {
1997 /* 1997 /*
1998 * Clear the IO-APIC before rebooting: 1998 * Clear the IO-APIC before rebooting:
1999 */ 1999 */
2000 clear_IO_APIC(); 2000 clear_IO_APIC();
2001 2001
2002 if (!legacy_pic->nr_legacy_irqs) 2002 if (!legacy_pic->nr_legacy_irqs)
2003 return; 2003 return;
2004 2004
2005 /* 2005 /*
2006 * If the i8259 is routed through an IOAPIC 2006 * If the i8259 is routed through an IOAPIC
2007 * Put that IOAPIC in virtual wire mode 2007 * Put that IOAPIC in virtual wire mode
2008 * so legacy interrupts can be delivered. 2008 * so legacy interrupts can be delivered.
2009 * 2009 *
2010 * With interrupt-remapping, for now we will use virtual wire A mode, 2010 * With interrupt-remapping, for now we will use virtual wire A mode,
2011 * as virtual wire B is little complex (need to configure both 2011 * as virtual wire B is little complex (need to configure both
2012 * IOAPIC RTE aswell as interrupt-remapping table entry). 2012 * IOAPIC RTE aswell as interrupt-remapping table entry).
2013 * As this gets called during crash dump, keep this simple for now. 2013 * As this gets called during crash dump, keep this simple for now.
2014 */ 2014 */
2015 if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { 2015 if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
2016 struct IO_APIC_route_entry entry; 2016 struct IO_APIC_route_entry entry;
2017 2017
2018 memset(&entry, 0, sizeof(entry)); 2018 memset(&entry, 0, sizeof(entry));
2019 entry.mask = 0; /* Enabled */ 2019 entry.mask = 0; /* Enabled */
2020 entry.trigger = 0; /* Edge */ 2020 entry.trigger = 0; /* Edge */
2021 entry.irr = 0; 2021 entry.irr = 0;
2022 entry.polarity = 0; /* High */ 2022 entry.polarity = 0; /* High */
2023 entry.delivery_status = 0; 2023 entry.delivery_status = 0;
2024 entry.dest_mode = 0; /* Physical */ 2024 entry.dest_mode = 0; /* Physical */
2025 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 2025 entry.delivery_mode = dest_ExtINT; /* ExtInt */
2026 entry.vector = 0; 2026 entry.vector = 0;
2027 entry.dest = read_apic_id(); 2027 entry.dest = read_apic_id();
2028 2028
2029 /* 2029 /*
2030 * Add it to the IO-APIC irq-routing table: 2030 * Add it to the IO-APIC irq-routing table:
2031 */ 2031 */
2032 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); 2032 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
2033 } 2033 }
2034 2034
2035 /* 2035 /*
2036 * Use virtual wire A mode when interrupt remapping is enabled. 2036 * Use virtual wire A mode when interrupt remapping is enabled.
2037 */ 2037 */
2038 if (cpu_has_apic || apic_from_smp_config()) 2038 if (cpu_has_apic || apic_from_smp_config())
2039 disconnect_bsp_APIC(!intr_remapping_enabled && 2039 disconnect_bsp_APIC(!intr_remapping_enabled &&
2040 ioapic_i8259.pin != -1); 2040 ioapic_i8259.pin != -1);
2041 } 2041 }
2042 2042
2043 #ifdef CONFIG_X86_32 2043 #ifdef CONFIG_X86_32
2044 /* 2044 /*
2045 * function to set the IO-APIC physical IDs based on the 2045 * function to set the IO-APIC physical IDs based on the
2046 * values stored in the MPC table. 2046 * values stored in the MPC table.
2047 * 2047 *
2048 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 2048 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
2049 */ 2049 */
2050 2050
2051 void __init setup_ioapic_ids_from_mpc(void) 2051 void __init setup_ioapic_ids_from_mpc(void)
2052 { 2052 {
2053 union IO_APIC_reg_00 reg_00; 2053 union IO_APIC_reg_00 reg_00;
2054 physid_mask_t phys_id_present_map; 2054 physid_mask_t phys_id_present_map;
2055 int apic_id; 2055 int apic_id;
2056 int i; 2056 int i;
2057 unsigned char old_id; 2057 unsigned char old_id;
2058 unsigned long flags; 2058 unsigned long flags;
2059 2059
2060 if (acpi_ioapic) 2060 if (acpi_ioapic)
2061 return; 2061 return;
2062 /* 2062 /*
2063 * Don't check I/O APIC IDs for xAPIC systems. They have 2063 * Don't check I/O APIC IDs for xAPIC systems. They have
2064 * no meaning without the serial APIC bus. 2064 * no meaning without the serial APIC bus.
2065 */ 2065 */
2066 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 2066 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
2067 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) 2067 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
2068 return; 2068 return;
2069 /* 2069 /*
2070 * This is broken; anything with a real cpu count has to 2070 * This is broken; anything with a real cpu count has to
2071 * circumvent this idiocy regardless. 2071 * circumvent this idiocy regardless.
2072 */ 2072 */
2073 apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); 2073 apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
2074 2074
2075 /* 2075 /*
2076 * Set the IOAPIC ID to the value stored in the MPC table. 2076 * Set the IOAPIC ID to the value stored in the MPC table.
2077 */ 2077 */
2078 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { 2078 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
2079 2079
2080 /* Read the register 0 value */ 2080 /* Read the register 0 value */
2081 raw_spin_lock_irqsave(&ioapic_lock, flags); 2081 raw_spin_lock_irqsave(&ioapic_lock, flags);
2082 reg_00.raw = io_apic_read(apic_id, 0); 2082 reg_00.raw = io_apic_read(apic_id, 0);
2083 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2083 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2084 2084
2085 old_id = mp_ioapics[apic_id].apicid; 2085 old_id = mp_ioapics[apic_id].apicid;
2086 2086
2087 if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) { 2087 if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) {
2088 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", 2088 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
2089 apic_id, mp_ioapics[apic_id].apicid); 2089 apic_id, mp_ioapics[apic_id].apicid);
2090 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2090 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2091 reg_00.bits.ID); 2091 reg_00.bits.ID);
2092 mp_ioapics[apic_id].apicid = reg_00.bits.ID; 2092 mp_ioapics[apic_id].apicid = reg_00.bits.ID;
2093 } 2093 }
2094 2094
2095 /* 2095 /*
2096 * Sanity check, is the ID really free? Every APIC in a 2096 * Sanity check, is the ID really free? Every APIC in a
2097 * system must have a unique ID or we get lots of nice 2097 * system must have a unique ID or we get lots of nice
2098 * 'stuck on smp_invalidate_needed IPI wait' messages. 2098 * 'stuck on smp_invalidate_needed IPI wait' messages.
2099 */ 2099 */
2100 if (apic->check_apicid_used(&phys_id_present_map, 2100 if (apic->check_apicid_used(&phys_id_present_map,
2101 mp_ioapics[apic_id].apicid)) { 2101 mp_ioapics[apic_id].apicid)) {
2102 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", 2102 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
2103 apic_id, mp_ioapics[apic_id].apicid); 2103 apic_id, mp_ioapics[apic_id].apicid);
2104 for (i = 0; i < get_physical_broadcast(); i++) 2104 for (i = 0; i < get_physical_broadcast(); i++)
2105 if (!physid_isset(i, phys_id_present_map)) 2105 if (!physid_isset(i, phys_id_present_map))
2106 break; 2106 break;
2107 if (i >= get_physical_broadcast()) 2107 if (i >= get_physical_broadcast())
2108 panic("Max APIC ID exceeded!\n"); 2108 panic("Max APIC ID exceeded!\n");
2109 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2109 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2110 i); 2110 i);
2111 physid_set(i, phys_id_present_map); 2111 physid_set(i, phys_id_present_map);
2112 mp_ioapics[apic_id].apicid = i; 2112 mp_ioapics[apic_id].apicid = i;
2113 } else { 2113 } else {
2114 physid_mask_t tmp; 2114 physid_mask_t tmp;
2115 apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp); 2115 apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp);
2116 apic_printk(APIC_VERBOSE, "Setting %d in the " 2116 apic_printk(APIC_VERBOSE, "Setting %d in the "
2117 "phys_id_present_map\n", 2117 "phys_id_present_map\n",
2118 mp_ioapics[apic_id].apicid); 2118 mp_ioapics[apic_id].apicid);
2119 physids_or(phys_id_present_map, phys_id_present_map, tmp); 2119 physids_or(phys_id_present_map, phys_id_present_map, tmp);
2120 } 2120 }
2121 2121
2122 2122
2123 /* 2123 /*
2124 * We need to adjust the IRQ routing table 2124 * We need to adjust the IRQ routing table
2125 * if the ID changed. 2125 * if the ID changed.
2126 */ 2126 */
2127 if (old_id != mp_ioapics[apic_id].apicid) 2127 if (old_id != mp_ioapics[apic_id].apicid)
2128 for (i = 0; i < mp_irq_entries; i++) 2128 for (i = 0; i < mp_irq_entries; i++)
2129 if (mp_irqs[i].dstapic == old_id) 2129 if (mp_irqs[i].dstapic == old_id)
2130 mp_irqs[i].dstapic 2130 mp_irqs[i].dstapic
2131 = mp_ioapics[apic_id].apicid; 2131 = mp_ioapics[apic_id].apicid;
2132 2132
2133 /* 2133 /*
2134 * Read the right value from the MPC table and 2134 * Read the right value from the MPC table and
2135 * write it into the ID register. 2135 * write it into the ID register.
2136 */ 2136 */
2137 apic_printk(APIC_VERBOSE, KERN_INFO 2137 apic_printk(APIC_VERBOSE, KERN_INFO
2138 "...changing IO-APIC physical APIC ID to %d ...", 2138 "...changing IO-APIC physical APIC ID to %d ...",
2139 mp_ioapics[apic_id].apicid); 2139 mp_ioapics[apic_id].apicid);
2140 2140
2141 reg_00.bits.ID = mp_ioapics[apic_id].apicid; 2141 reg_00.bits.ID = mp_ioapics[apic_id].apicid;
2142 raw_spin_lock_irqsave(&ioapic_lock, flags); 2142 raw_spin_lock_irqsave(&ioapic_lock, flags);
2143 io_apic_write(apic_id, 0, reg_00.raw); 2143 io_apic_write(apic_id, 0, reg_00.raw);
2144 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2144 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2145 2145
2146 /* 2146 /*
2147 * Sanity check 2147 * Sanity check
2148 */ 2148 */
2149 raw_spin_lock_irqsave(&ioapic_lock, flags); 2149 raw_spin_lock_irqsave(&ioapic_lock, flags);
2150 reg_00.raw = io_apic_read(apic_id, 0); 2150 reg_00.raw = io_apic_read(apic_id, 0);
2151 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2151 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2152 if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) 2152 if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
2153 printk("could not set ID!\n"); 2153 printk("could not set ID!\n");
2154 else 2154 else
2155 apic_printk(APIC_VERBOSE, " ok.\n"); 2155 apic_printk(APIC_VERBOSE, " ok.\n");
2156 } 2156 }
2157 } 2157 }
2158 #endif 2158 #endif
2159 2159
2160 int no_timer_check __initdata; 2160 int no_timer_check __initdata;
2161 2161
2162 static int __init notimercheck(char *s) 2162 static int __init notimercheck(char *s)
2163 { 2163 {
2164 no_timer_check = 1; 2164 no_timer_check = 1;
2165 return 1; 2165 return 1;
2166 } 2166 }
2167 __setup("no_timer_check", notimercheck); 2167 __setup("no_timer_check", notimercheck);
2168 2168
2169 /* 2169 /*
2170 * There is a nasty bug in some older SMP boards, their mptable lies 2170 * There is a nasty bug in some older SMP boards, their mptable lies
2171 * about the timer IRQ. We do the following to work around the situation: 2171 * about the timer IRQ. We do the following to work around the situation:
2172 * 2172 *
2173 * - timer IRQ defaults to IO-APIC IRQ 2173 * - timer IRQ defaults to IO-APIC IRQ
2174 * - if this function detects that timer IRQs are defunct, then we fall 2174 * - if this function detects that timer IRQs are defunct, then we fall
2175 * back to ISA timer IRQs 2175 * back to ISA timer IRQs
2176 */ 2176 */
2177 static int __init timer_irq_works(void) 2177 static int __init timer_irq_works(void)
2178 { 2178 {
2179 unsigned long t1 = jiffies; 2179 unsigned long t1 = jiffies;
2180 unsigned long flags; 2180 unsigned long flags;
2181 2181
2182 if (no_timer_check) 2182 if (no_timer_check)
2183 return 1; 2183 return 1;
2184 2184
2185 local_save_flags(flags); 2185 local_save_flags(flags);
2186 local_irq_enable(); 2186 local_irq_enable();
2187 /* Let ten ticks pass... */ 2187 /* Let ten ticks pass... */
2188 mdelay((10 * 1000) / HZ); 2188 mdelay((10 * 1000) / HZ);
2189 local_irq_restore(flags); 2189 local_irq_restore(flags);
2190 2190
2191 /* 2191 /*
2192 * Expect a few ticks at least, to be sure some possible 2192 * Expect a few ticks at least, to be sure some possible
2193 * glue logic does not lock up after one or two first 2193 * glue logic does not lock up after one or two first
2194 * ticks in a non-ExtINT mode. Also the local APIC 2194 * ticks in a non-ExtINT mode. Also the local APIC
2195 * might have cached one ExtINT interrupt. Finally, at 2195 * might have cached one ExtINT interrupt. Finally, at
2196 * least one tick may be lost due to delays. 2196 * least one tick may be lost due to delays.
2197 */ 2197 */
2198 2198
2199 /* jiffies wrap? */ 2199 /* jiffies wrap? */
2200 if (time_after(jiffies, t1 + 4)) 2200 if (time_after(jiffies, t1 + 4))
2201 return 1; 2201 return 1;
2202 return 0; 2202 return 0;
2203 } 2203 }
2204 2204
2205 /* 2205 /*
2206 * In the SMP+IOAPIC case it might happen that there are an unspecified 2206 * In the SMP+IOAPIC case it might happen that there are an unspecified
2207 * number of pending IRQ events unhandled. These cases are very rare, 2207 * number of pending IRQ events unhandled. These cases are very rare,
2208 * so we 'resend' these IRQs via IPIs, to the same CPU. It's much 2208 * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
2209 * better to do it this way as thus we do not have to be aware of 2209 * better to do it this way as thus we do not have to be aware of
2210 * 'pending' interrupts in the IRQ path, except at this point. 2210 * 'pending' interrupts in the IRQ path, except at this point.
2211 */ 2211 */
2212 /* 2212 /*
2213 * Edge triggered needs to resend any interrupt 2213 * Edge triggered needs to resend any interrupt
2214 * that was delayed but this is now handled in the device 2214 * that was delayed but this is now handled in the device
2215 * independent code. 2215 * independent code.
2216 */ 2216 */
2217 2217
2218 /* 2218 /*
2219 * Starting up a edge-triggered IO-APIC interrupt is 2219 * Starting up a edge-triggered IO-APIC interrupt is
2220 * nasty - we need to make sure that we get the edge. 2220 * nasty - we need to make sure that we get the edge.
2221 * If it is already asserted for some reason, we need 2221 * If it is already asserted for some reason, we need
2222 * return 1 to indicate that is was pending. 2222 * return 1 to indicate that is was pending.
2223 * 2223 *
2224 * This is not complete - we should be able to fake 2224 * This is not complete - we should be able to fake
2225 * an edge even if it isn't on the 8259A... 2225 * an edge even if it isn't on the 8259A...
2226 */ 2226 */
2227 2227
2228 static unsigned int startup_ioapic_irq(unsigned int irq) 2228 static unsigned int startup_ioapic_irq(unsigned int irq)
2229 { 2229 {
2230 int was_pending = 0; 2230 int was_pending = 0;
2231 unsigned long flags; 2231 unsigned long flags;
2232 struct irq_cfg *cfg; 2232 struct irq_cfg *cfg;
2233 2233
2234 raw_spin_lock_irqsave(&ioapic_lock, flags); 2234 raw_spin_lock_irqsave(&ioapic_lock, flags);
2235 if (irq < legacy_pic->nr_legacy_irqs) { 2235 if (irq < legacy_pic->nr_legacy_irqs) {
2236 legacy_pic->chip->mask(irq); 2236 legacy_pic->mask(irq);
2237 if (legacy_pic->irq_pending(irq)) 2237 if (legacy_pic->irq_pending(irq))
2238 was_pending = 1; 2238 was_pending = 1;
2239 } 2239 }
2240 cfg = irq_cfg(irq); 2240 cfg = irq_cfg(irq);
2241 __unmask_IO_APIC_irq(cfg); 2241 __unmask_IO_APIC_irq(cfg);
2242 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2242 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2243 2243
2244 return was_pending; 2244 return was_pending;
2245 } 2245 }
2246 2246
2247 static int ioapic_retrigger_irq(unsigned int irq) 2247 static int ioapic_retrigger_irq(unsigned int irq)
2248 { 2248 {
2249 2249
2250 struct irq_cfg *cfg = irq_cfg(irq); 2250 struct irq_cfg *cfg = irq_cfg(irq);
2251 unsigned long flags; 2251 unsigned long flags;
2252 2252
2253 raw_spin_lock_irqsave(&vector_lock, flags); 2253 raw_spin_lock_irqsave(&vector_lock, flags);
2254 apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); 2254 apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
2255 raw_spin_unlock_irqrestore(&vector_lock, flags); 2255 raw_spin_unlock_irqrestore(&vector_lock, flags);
2256 2256
2257 return 1; 2257 return 1;
2258 } 2258 }
2259 2259
2260 /* 2260 /*
2261 * Level and edge triggered IO-APIC interrupts need different handling, 2261 * Level and edge triggered IO-APIC interrupts need different handling,
2262 * so we use two separate IRQ descriptors. Edge triggered IRQs can be 2262 * so we use two separate IRQ descriptors. Edge triggered IRQs can be
2263 * handled with the level-triggered descriptor, but that one has slightly 2263 * handled with the level-triggered descriptor, but that one has slightly
2264 * more overhead. Level-triggered interrupts cannot be handled with the 2264 * more overhead. Level-triggered interrupts cannot be handled with the
2265 * edge-triggered handler, without risking IRQ storms and other ugly 2265 * edge-triggered handler, without risking IRQ storms and other ugly
2266 * races. 2266 * races.
2267 */ 2267 */
2268 2268
2269 #ifdef CONFIG_SMP 2269 #ifdef CONFIG_SMP
2270 void send_cleanup_vector(struct irq_cfg *cfg) 2270 void send_cleanup_vector(struct irq_cfg *cfg)
2271 { 2271 {
2272 cpumask_var_t cleanup_mask; 2272 cpumask_var_t cleanup_mask;
2273 2273
2274 if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { 2274 if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
2275 unsigned int i; 2275 unsigned int i;
2276 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) 2276 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
2277 apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); 2277 apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
2278 } else { 2278 } else {
2279 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); 2279 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
2280 apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); 2280 apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2281 free_cpumask_var(cleanup_mask); 2281 free_cpumask_var(cleanup_mask);
2282 } 2282 }
2283 cfg->move_in_progress = 0; 2283 cfg->move_in_progress = 0;
2284 } 2284 }
2285 2285
2286 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) 2286 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
2287 { 2287 {
2288 int apic, pin; 2288 int apic, pin;
2289 struct irq_pin_list *entry; 2289 struct irq_pin_list *entry;
2290 u8 vector = cfg->vector; 2290 u8 vector = cfg->vector;
2291 2291
2292 for_each_irq_pin(entry, cfg->irq_2_pin) { 2292 for_each_irq_pin(entry, cfg->irq_2_pin) {
2293 unsigned int reg; 2293 unsigned int reg;
2294 2294
2295 apic = entry->apic; 2295 apic = entry->apic;
2296 pin = entry->pin; 2296 pin = entry->pin;
2297 /* 2297 /*
2298 * With interrupt-remapping, destination information comes 2298 * With interrupt-remapping, destination information comes
2299 * from interrupt-remapping table entry. 2299 * from interrupt-remapping table entry.
2300 */ 2300 */
2301 if (!irq_remapped(irq)) 2301 if (!irq_remapped(irq))
2302 io_apic_write(apic, 0x11 + pin*2, dest); 2302 io_apic_write(apic, 0x11 + pin*2, dest);
2303 reg = io_apic_read(apic, 0x10 + pin*2); 2303 reg = io_apic_read(apic, 0x10 + pin*2);
2304 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 2304 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
2305 reg |= vector; 2305 reg |= vector;
2306 io_apic_modify(apic, 0x10 + pin*2, reg); 2306 io_apic_modify(apic, 0x10 + pin*2, reg);
2307 } 2307 }
2308 } 2308 }
2309 2309
2310 /* 2310 /*
2311 * Either sets desc->affinity to a valid value, and returns 2311 * Either sets desc->affinity to a valid value, and returns
2312 * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and 2312 * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
2313 * leaves desc->affinity untouched. 2313 * leaves desc->affinity untouched.
2314 */ 2314 */
2315 unsigned int 2315 unsigned int
2316 set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask, 2316 set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
2317 unsigned int *dest_id) 2317 unsigned int *dest_id)
2318 { 2318 {
2319 struct irq_cfg *cfg; 2319 struct irq_cfg *cfg;
2320 unsigned int irq; 2320 unsigned int irq;
2321 2321
2322 if (!cpumask_intersects(mask, cpu_online_mask)) 2322 if (!cpumask_intersects(mask, cpu_online_mask))
2323 return -1; 2323 return -1;
2324 2324
2325 irq = desc->irq; 2325 irq = desc->irq;
2326 cfg = desc->chip_data; 2326 cfg = desc->chip_data;
2327 if (assign_irq_vector(irq, cfg, mask)) 2327 if (assign_irq_vector(irq, cfg, mask))
2328 return -1; 2328 return -1;
2329 2329
2330 cpumask_copy(desc->affinity, mask); 2330 cpumask_copy(desc->affinity, mask);
2331 2331
2332 *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); 2332 *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
2333 return 0; 2333 return 0;
2334 } 2334 }
2335 2335
2336 static int 2336 static int
2337 set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) 2337 set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2338 { 2338 {
2339 struct irq_cfg *cfg; 2339 struct irq_cfg *cfg;
2340 unsigned long flags; 2340 unsigned long flags;
2341 unsigned int dest; 2341 unsigned int dest;
2342 unsigned int irq; 2342 unsigned int irq;
2343 int ret = -1; 2343 int ret = -1;
2344 2344
2345 irq = desc->irq; 2345 irq = desc->irq;
2346 cfg = desc->chip_data; 2346 cfg = desc->chip_data;
2347 2347
2348 raw_spin_lock_irqsave(&ioapic_lock, flags); 2348 raw_spin_lock_irqsave(&ioapic_lock, flags);
2349 ret = set_desc_affinity(desc, mask, &dest); 2349 ret = set_desc_affinity(desc, mask, &dest);
2350 if (!ret) { 2350 if (!ret) {
2351 /* Only the high 8 bits are valid. */ 2351 /* Only the high 8 bits are valid. */
2352 dest = SET_APIC_LOGICAL_ID(dest); 2352 dest = SET_APIC_LOGICAL_ID(dest);
2353 __target_IO_APIC_irq(irq, dest, cfg); 2353 __target_IO_APIC_irq(irq, dest, cfg);
2354 } 2354 }
2355 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2355 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2356 2356
2357 return ret; 2357 return ret;
2358 } 2358 }
2359 2359
2360 static int 2360 static int
2361 set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) 2361 set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
2362 { 2362 {
2363 struct irq_desc *desc; 2363 struct irq_desc *desc;
2364 2364
2365 desc = irq_to_desc(irq); 2365 desc = irq_to_desc(irq);
2366 2366
2367 return set_ioapic_affinity_irq_desc(desc, mask); 2367 return set_ioapic_affinity_irq_desc(desc, mask);
2368 } 2368 }
2369 2369
2370 #ifdef CONFIG_INTR_REMAP 2370 #ifdef CONFIG_INTR_REMAP
2371 2371
2372 /* 2372 /*
2373 * Migrate the IO-APIC irq in the presence of intr-remapping. 2373 * Migrate the IO-APIC irq in the presence of intr-remapping.
2374 * 2374 *
2375 * For both level and edge triggered, irq migration is a simple atomic 2375 * For both level and edge triggered, irq migration is a simple atomic
2376 * update(of vector and cpu destination) of IRTE and flush the hardware cache. 2376 * update(of vector and cpu destination) of IRTE and flush the hardware cache.
2377 * 2377 *
2378 * For level triggered, we eliminate the io-apic RTE modification (with the 2378 * For level triggered, we eliminate the io-apic RTE modification (with the
2379 * updated vector information), by using a virtual vector (io-apic pin number). 2379 * updated vector information), by using a virtual vector (io-apic pin number).
2380 * Real vector that is used for interrupting cpu will be coming from 2380 * Real vector that is used for interrupting cpu will be coming from
2381 * the interrupt-remapping table entry. 2381 * the interrupt-remapping table entry.
2382 */ 2382 */
2383 static int 2383 static int
2384 migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) 2384 migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2385 { 2385 {
2386 struct irq_cfg *cfg; 2386 struct irq_cfg *cfg;
2387 struct irte irte; 2387 struct irte irte;
2388 unsigned int dest; 2388 unsigned int dest;
2389 unsigned int irq; 2389 unsigned int irq;
2390 int ret = -1; 2390 int ret = -1;
2391 2391
2392 if (!cpumask_intersects(mask, cpu_online_mask)) 2392 if (!cpumask_intersects(mask, cpu_online_mask))
2393 return ret; 2393 return ret;
2394 2394
2395 irq = desc->irq; 2395 irq = desc->irq;
2396 if (get_irte(irq, &irte)) 2396 if (get_irte(irq, &irte))
2397 return ret; 2397 return ret;
2398 2398
2399 cfg = desc->chip_data; 2399 cfg = desc->chip_data;
2400 if (assign_irq_vector(irq, cfg, mask)) 2400 if (assign_irq_vector(irq, cfg, mask))
2401 return ret; 2401 return ret;
2402 2402
2403 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); 2403 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
2404 2404
2405 irte.vector = cfg->vector; 2405 irte.vector = cfg->vector;
2406 irte.dest_id = IRTE_DEST(dest); 2406 irte.dest_id = IRTE_DEST(dest);
2407 2407
2408 /* 2408 /*
2409 * Modified the IRTE and flushes the Interrupt entry cache. 2409 * Modified the IRTE and flushes the Interrupt entry cache.
2410 */ 2410 */
2411 modify_irte(irq, &irte); 2411 modify_irte(irq, &irte);
2412 2412
2413 if (cfg->move_in_progress) 2413 if (cfg->move_in_progress)
2414 send_cleanup_vector(cfg); 2414 send_cleanup_vector(cfg);
2415 2415
2416 cpumask_copy(desc->affinity, mask); 2416 cpumask_copy(desc->affinity, mask);
2417 2417
2418 return 0; 2418 return 0;
2419 } 2419 }
2420 2420
2421 /* 2421 /*
2422 * Migrates the IRQ destination in the process context. 2422 * Migrates the IRQ destination in the process context.
2423 */ 2423 */
2424 static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, 2424 static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2425 const struct cpumask *mask) 2425 const struct cpumask *mask)
2426 { 2426 {
2427 return migrate_ioapic_irq_desc(desc, mask); 2427 return migrate_ioapic_irq_desc(desc, mask);
2428 } 2428 }
2429 static int set_ir_ioapic_affinity_irq(unsigned int irq, 2429 static int set_ir_ioapic_affinity_irq(unsigned int irq,
2430 const struct cpumask *mask) 2430 const struct cpumask *mask)
2431 { 2431 {
2432 struct irq_desc *desc = irq_to_desc(irq); 2432 struct irq_desc *desc = irq_to_desc(irq);
2433 2433
2434 return set_ir_ioapic_affinity_irq_desc(desc, mask); 2434 return set_ir_ioapic_affinity_irq_desc(desc, mask);
2435 } 2435 }
2436 #else 2436 #else
2437 static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, 2437 static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2438 const struct cpumask *mask) 2438 const struct cpumask *mask)
2439 { 2439 {
2440 return 0; 2440 return 0;
2441 } 2441 }
2442 #endif 2442 #endif
2443 2443
2444 asmlinkage void smp_irq_move_cleanup_interrupt(void) 2444 asmlinkage void smp_irq_move_cleanup_interrupt(void)
2445 { 2445 {
2446 unsigned vector, me; 2446 unsigned vector, me;
2447 2447
2448 ack_APIC_irq(); 2448 ack_APIC_irq();
2449 exit_idle(); 2449 exit_idle();
2450 irq_enter(); 2450 irq_enter();
2451 2451
2452 me = smp_processor_id(); 2452 me = smp_processor_id();
2453 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 2453 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
2454 unsigned int irq; 2454 unsigned int irq;
2455 unsigned int irr; 2455 unsigned int irr;
2456 struct irq_desc *desc; 2456 struct irq_desc *desc;
2457 struct irq_cfg *cfg; 2457 struct irq_cfg *cfg;
2458 irq = __get_cpu_var(vector_irq)[vector]; 2458 irq = __get_cpu_var(vector_irq)[vector];
2459 2459
2460 if (irq == -1) 2460 if (irq == -1)
2461 continue; 2461 continue;
2462 2462
2463 desc = irq_to_desc(irq); 2463 desc = irq_to_desc(irq);
2464 if (!desc) 2464 if (!desc)
2465 continue; 2465 continue;
2466 2466
2467 cfg = irq_cfg(irq); 2467 cfg = irq_cfg(irq);
2468 raw_spin_lock(&desc->lock); 2468 raw_spin_lock(&desc->lock);
2469 2469
2470 /* 2470 /*
2471 * Check if the irq migration is in progress. If so, we 2471 * Check if the irq migration is in progress. If so, we
2472 * haven't received the cleanup request yet for this irq. 2472 * haven't received the cleanup request yet for this irq.
2473 */ 2473 */
2474 if (cfg->move_in_progress) 2474 if (cfg->move_in_progress)
2475 goto unlock; 2475 goto unlock;
2476 2476
2477 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) 2477 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2478 goto unlock; 2478 goto unlock;
2479 2479
2480 irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); 2480 irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
2481 /* 2481 /*
2482 * Check if the vector that needs to be cleanedup is 2482 * Check if the vector that needs to be cleanedup is
2483 * registered at the cpu's IRR. If so, then this is not 2483 * registered at the cpu's IRR. If so, then this is not
2484 * the best time to clean it up. Lets clean it up in the 2484 * the best time to clean it up. Lets clean it up in the
2485 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR 2485 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
2486 * to myself. 2486 * to myself.
2487 */ 2487 */
2488 if (irr & (1 << (vector % 32))) { 2488 if (irr & (1 << (vector % 32))) {
2489 apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); 2489 apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
2490 goto unlock; 2490 goto unlock;
2491 } 2491 }
2492 __get_cpu_var(vector_irq)[vector] = -1; 2492 __get_cpu_var(vector_irq)[vector] = -1;
2493 unlock: 2493 unlock:
2494 raw_spin_unlock(&desc->lock); 2494 raw_spin_unlock(&desc->lock);
2495 } 2495 }
2496 2496
2497 irq_exit(); 2497 irq_exit();
2498 } 2498 }
2499 2499
2500 static void __irq_complete_move(struct irq_desc **descp, unsigned vector) 2500 static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
2501 { 2501 {
2502 struct irq_desc *desc = *descp; 2502 struct irq_desc *desc = *descp;
2503 struct irq_cfg *cfg = desc->chip_data; 2503 struct irq_cfg *cfg = desc->chip_data;
2504 unsigned me; 2504 unsigned me;
2505 2505
2506 if (likely(!cfg->move_in_progress)) 2506 if (likely(!cfg->move_in_progress))
2507 return; 2507 return;
2508 2508
2509 me = smp_processor_id(); 2509 me = smp_processor_id();
2510 2510
2511 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) 2511 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2512 send_cleanup_vector(cfg); 2512 send_cleanup_vector(cfg);
2513 } 2513 }
2514 2514
2515 static void irq_complete_move(struct irq_desc **descp) 2515 static void irq_complete_move(struct irq_desc **descp)
2516 { 2516 {
2517 __irq_complete_move(descp, ~get_irq_regs()->orig_ax); 2517 __irq_complete_move(descp, ~get_irq_regs()->orig_ax);
2518 } 2518 }
2519 2519
2520 void irq_force_complete_move(int irq) 2520 void irq_force_complete_move(int irq)
2521 { 2521 {
2522 struct irq_desc *desc = irq_to_desc(irq); 2522 struct irq_desc *desc = irq_to_desc(irq);
2523 struct irq_cfg *cfg = desc->chip_data; 2523 struct irq_cfg *cfg = desc->chip_data;
2524 2524
2525 if (!cfg) 2525 if (!cfg)
2526 return; 2526 return;
2527 2527
2528 __irq_complete_move(&desc, cfg->vector); 2528 __irq_complete_move(&desc, cfg->vector);
2529 } 2529 }
2530 #else 2530 #else
2531 static inline void irq_complete_move(struct irq_desc **descp) {} 2531 static inline void irq_complete_move(struct irq_desc **descp) {}
2532 #endif 2532 #endif
2533 2533
2534 static void ack_apic_edge(unsigned int irq) 2534 static void ack_apic_edge(unsigned int irq)
2535 { 2535 {
2536 struct irq_desc *desc = irq_to_desc(irq); 2536 struct irq_desc *desc = irq_to_desc(irq);
2537 2537
2538 irq_complete_move(&desc); 2538 irq_complete_move(&desc);
2539 move_native_irq(irq); 2539 move_native_irq(irq);
2540 ack_APIC_irq(); 2540 ack_APIC_irq();
2541 } 2541 }
2542 2542
2543 atomic_t irq_mis_count; 2543 atomic_t irq_mis_count;
2544 2544
2545 /* 2545 /*
2546 * IO-APIC versions below 0x20 don't support EOI register. 2546 * IO-APIC versions below 0x20 don't support EOI register.
2547 * For the record, here is the information about various versions: 2547 * For the record, here is the information about various versions:
2548 * 0Xh 82489DX 2548 * 0Xh 82489DX
2549 * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant 2549 * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
2550 * 2Xh I/O(x)APIC which is PCI 2.2 Compliant 2550 * 2Xh I/O(x)APIC which is PCI 2.2 Compliant
2551 * 30h-FFh Reserved 2551 * 30h-FFh Reserved
2552 * 2552 *
2553 * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic 2553 * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
2554 * version as 0x2. This is an error with documentation and these ICH chips 2554 * version as 0x2. This is an error with documentation and these ICH chips
2555 * use io-apic's of version 0x20. 2555 * use io-apic's of version 0x20.
2556 * 2556 *
2557 * For IO-APIC's with EOI register, we use that to do an explicit EOI. 2557 * For IO-APIC's with EOI register, we use that to do an explicit EOI.
2558 * Otherwise, we simulate the EOI message manually by changing the trigger 2558 * Otherwise, we simulate the EOI message manually by changing the trigger
2559 * mode to edge and then back to level, with RTE being masked during this. 2559 * mode to edge and then back to level, with RTE being masked during this.
2560 */ 2560 */
2561 static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) 2561 static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2562 { 2562 {
2563 struct irq_pin_list *entry; 2563 struct irq_pin_list *entry;
2564 2564
2565 for_each_irq_pin(entry, cfg->irq_2_pin) { 2565 for_each_irq_pin(entry, cfg->irq_2_pin) {
2566 if (mp_ioapics[entry->apic].apicver >= 0x20) { 2566 if (mp_ioapics[entry->apic].apicver >= 0x20) {
2567 /* 2567 /*
2568 * Intr-remapping uses pin number as the virtual vector 2568 * Intr-remapping uses pin number as the virtual vector
2569 * in the RTE. Actual vector is programmed in 2569 * in the RTE. Actual vector is programmed in
2570 * intr-remapping table entry. Hence for the io-apic 2570 * intr-remapping table entry. Hence for the io-apic
2571 * EOI we use the pin number. 2571 * EOI we use the pin number.
2572 */ 2572 */
2573 if (irq_remapped(irq)) 2573 if (irq_remapped(irq))
2574 io_apic_eoi(entry->apic, entry->pin); 2574 io_apic_eoi(entry->apic, entry->pin);
2575 else 2575 else
2576 io_apic_eoi(entry->apic, cfg->vector); 2576 io_apic_eoi(entry->apic, cfg->vector);
2577 } else { 2577 } else {
2578 __mask_and_edge_IO_APIC_irq(entry); 2578 __mask_and_edge_IO_APIC_irq(entry);
2579 __unmask_and_level_IO_APIC_irq(entry); 2579 __unmask_and_level_IO_APIC_irq(entry);
2580 } 2580 }
2581 } 2581 }
2582 } 2582 }
2583 2583
2584 static void eoi_ioapic_irq(struct irq_desc *desc) 2584 static void eoi_ioapic_irq(struct irq_desc *desc)
2585 { 2585 {
2586 struct irq_cfg *cfg; 2586 struct irq_cfg *cfg;
2587 unsigned long flags; 2587 unsigned long flags;
2588 unsigned int irq; 2588 unsigned int irq;
2589 2589
2590 irq = desc->irq; 2590 irq = desc->irq;
2591 cfg = desc->chip_data; 2591 cfg = desc->chip_data;
2592 2592
2593 raw_spin_lock_irqsave(&ioapic_lock, flags); 2593 raw_spin_lock_irqsave(&ioapic_lock, flags);
2594 __eoi_ioapic_irq(irq, cfg); 2594 __eoi_ioapic_irq(irq, cfg);
2595 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2595 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2596 } 2596 }
2597 2597
2598 static void ack_apic_level(unsigned int irq) 2598 static void ack_apic_level(unsigned int irq)
2599 { 2599 {
2600 struct irq_desc *desc = irq_to_desc(irq); 2600 struct irq_desc *desc = irq_to_desc(irq);
2601 unsigned long v; 2601 unsigned long v;
2602 int i; 2602 int i;
2603 struct irq_cfg *cfg; 2603 struct irq_cfg *cfg;
2604 int do_unmask_irq = 0; 2604 int do_unmask_irq = 0;
2605 2605
2606 irq_complete_move(&desc); 2606 irq_complete_move(&desc);
2607 #ifdef CONFIG_GENERIC_PENDING_IRQ 2607 #ifdef CONFIG_GENERIC_PENDING_IRQ
2608 /* If we are moving the irq we need to mask it */ 2608 /* If we are moving the irq we need to mask it */
2609 if (unlikely(desc->status & IRQ_MOVE_PENDING)) { 2609 if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
2610 do_unmask_irq = 1; 2610 do_unmask_irq = 1;
2611 mask_IO_APIC_irq_desc(desc); 2611 mask_IO_APIC_irq_desc(desc);
2612 } 2612 }
2613 #endif 2613 #endif
2614 2614
2615 /* 2615 /*
2616 * It appears there is an erratum which affects at least version 0x11 2616 * It appears there is an erratum which affects at least version 0x11
2617 * of I/O APIC (that's the 82093AA and cores integrated into various 2617 * of I/O APIC (that's the 82093AA and cores integrated into various
2618 * chipsets). Under certain conditions a level-triggered interrupt is 2618 * chipsets). Under certain conditions a level-triggered interrupt is
2619 * erroneously delivered as edge-triggered one but the respective IRR 2619 * erroneously delivered as edge-triggered one but the respective IRR
2620 * bit gets set nevertheless. As a result the I/O unit expects an EOI 2620 * bit gets set nevertheless. As a result the I/O unit expects an EOI
2621 * message but it will never arrive and further interrupts are blocked 2621 * message but it will never arrive and further interrupts are blocked
2622 * from the source. The exact reason is so far unknown, but the 2622 * from the source. The exact reason is so far unknown, but the
2623 * phenomenon was observed when two consecutive interrupt requests 2623 * phenomenon was observed when two consecutive interrupt requests
2624 * from a given source get delivered to the same CPU and the source is 2624 * from a given source get delivered to the same CPU and the source is
2625 * temporarily disabled in between. 2625 * temporarily disabled in between.
2626 * 2626 *
2627 * A workaround is to simulate an EOI message manually. We achieve it 2627 * A workaround is to simulate an EOI message manually. We achieve it
2628 * by setting the trigger mode to edge and then to level when the edge 2628 * by setting the trigger mode to edge and then to level when the edge
2629 * trigger mode gets detected in the TMR of a local APIC for a 2629 * trigger mode gets detected in the TMR of a local APIC for a
2630 * level-triggered interrupt. We mask the source for the time of the 2630 * level-triggered interrupt. We mask the source for the time of the
2631 * operation to prevent an edge-triggered interrupt escaping meanwhile. 2631 * operation to prevent an edge-triggered interrupt escaping meanwhile.
2632 * The idea is from Manfred Spraul. --macro 2632 * The idea is from Manfred Spraul. --macro
2633 * 2633 *
2634 * Also in the case when cpu goes offline, fixup_irqs() will forward 2634 * Also in the case when cpu goes offline, fixup_irqs() will forward
2635 * any unhandled interrupt on the offlined cpu to the new cpu 2635 * any unhandled interrupt on the offlined cpu to the new cpu
2636 * destination that is handling the corresponding interrupt. This 2636 * destination that is handling the corresponding interrupt. This
2637 * interrupt forwarding is done via IPI's. Hence, in this case also 2637 * interrupt forwarding is done via IPI's. Hence, in this case also
2638 * level-triggered io-apic interrupt will be seen as an edge 2638 * level-triggered io-apic interrupt will be seen as an edge
2639 * interrupt in the IRR. And we can't rely on the cpu's EOI 2639 * interrupt in the IRR. And we can't rely on the cpu's EOI
2640 * to be broadcasted to the IO-APIC's which will clear the remoteIRR 2640 * to be broadcasted to the IO-APIC's which will clear the remoteIRR
2641 * corresponding to the level-triggered interrupt. Hence on IO-APIC's 2641 * corresponding to the level-triggered interrupt. Hence on IO-APIC's
2642 * supporting EOI register, we do an explicit EOI to clear the 2642 * supporting EOI register, we do an explicit EOI to clear the
2643 * remote IRR and on IO-APIC's which don't have an EOI register, 2643 * remote IRR and on IO-APIC's which don't have an EOI register,
2644 * we use the above logic (mask+edge followed by unmask+level) from 2644 * we use the above logic (mask+edge followed by unmask+level) from
2645 * Manfred Spraul to clear the remote IRR. 2645 * Manfred Spraul to clear the remote IRR.
2646 */ 2646 */
2647 cfg = desc->chip_data; 2647 cfg = desc->chip_data;
2648 i = cfg->vector; 2648 i = cfg->vector;
2649 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); 2649 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2650 2650
2651 /* 2651 /*
2652 * We must acknowledge the irq before we move it or the acknowledge will 2652 * We must acknowledge the irq before we move it or the acknowledge will
2653 * not propagate properly. 2653 * not propagate properly.
2654 */ 2654 */
2655 ack_APIC_irq(); 2655 ack_APIC_irq();
2656 2656
2657 /* 2657 /*
2658 * Tail end of clearing remote IRR bit (either by delivering the EOI 2658 * Tail end of clearing remote IRR bit (either by delivering the EOI
2659 * message via io-apic EOI register write or simulating it using 2659 * message via io-apic EOI register write or simulating it using
2660 * mask+edge followed by unnask+level logic) manually when the 2660 * mask+edge followed by unnask+level logic) manually when the
2661 * level triggered interrupt is seen as the edge triggered interrupt 2661 * level triggered interrupt is seen as the edge triggered interrupt
2662 * at the cpu. 2662 * at the cpu.
2663 */ 2663 */
2664 if (!(v & (1 << (i & 0x1f)))) { 2664 if (!(v & (1 << (i & 0x1f)))) {
2665 atomic_inc(&irq_mis_count); 2665 atomic_inc(&irq_mis_count);
2666 2666
2667 eoi_ioapic_irq(desc); 2667 eoi_ioapic_irq(desc);
2668 } 2668 }
2669 2669
2670 /* Now we can move and renable the irq */ 2670 /* Now we can move and renable the irq */
2671 if (unlikely(do_unmask_irq)) { 2671 if (unlikely(do_unmask_irq)) {
2672 /* Only migrate the irq if the ack has been received. 2672 /* Only migrate the irq if the ack has been received.
2673 * 2673 *
2674 * On rare occasions the broadcast level triggered ack gets 2674 * On rare occasions the broadcast level triggered ack gets
2675 * delayed going to ioapics, and if we reprogram the 2675 * delayed going to ioapics, and if we reprogram the
2676 * vector while Remote IRR is still set the irq will never 2676 * vector while Remote IRR is still set the irq will never
2677 * fire again. 2677 * fire again.
2678 * 2678 *
2679 * To prevent this scenario we read the Remote IRR bit 2679 * To prevent this scenario we read the Remote IRR bit
2680 * of the ioapic. This has two effects. 2680 * of the ioapic. This has two effects.
2681 * - On any sane system the read of the ioapic will 2681 * - On any sane system the read of the ioapic will
2682 * flush writes (and acks) going to the ioapic from 2682 * flush writes (and acks) going to the ioapic from
2683 * this cpu. 2683 * this cpu.
2684 * - We get to see if the ACK has actually been delivered. 2684 * - We get to see if the ACK has actually been delivered.
2685 * 2685 *
2686 * Based on failed experiments of reprogramming the 2686 * Based on failed experiments of reprogramming the
2687 * ioapic entry from outside of irq context starting 2687 * ioapic entry from outside of irq context starting
2688 * with masking the ioapic entry and then polling until 2688 * with masking the ioapic entry and then polling until
2689 * Remote IRR was clear before reprogramming the 2689 * Remote IRR was clear before reprogramming the
2690 * ioapic I don't trust the Remote IRR bit to be 2690 * ioapic I don't trust the Remote IRR bit to be
2691 * completey accurate. 2691 * completey accurate.
2692 * 2692 *
2693 * However there appears to be no other way to plug 2693 * However there appears to be no other way to plug
2694 * this race, so if the Remote IRR bit is not 2694 * this race, so if the Remote IRR bit is not
2695 * accurate and is causing problems then it is a hardware bug 2695 * accurate and is causing problems then it is a hardware bug
2696 * and you can go talk to the chipset vendor about it. 2696 * and you can go talk to the chipset vendor about it.
2697 */ 2697 */
2698 cfg = desc->chip_data; 2698 cfg = desc->chip_data;
2699 if (!io_apic_level_ack_pending(cfg)) 2699 if (!io_apic_level_ack_pending(cfg))
2700 move_masked_irq(irq); 2700 move_masked_irq(irq);
2701 unmask_IO_APIC_irq_desc(desc); 2701 unmask_IO_APIC_irq_desc(desc);
2702 } 2702 }
2703 } 2703 }
2704 2704
2705 #ifdef CONFIG_INTR_REMAP 2705 #ifdef CONFIG_INTR_REMAP
2706 static void ir_ack_apic_edge(unsigned int irq) 2706 static void ir_ack_apic_edge(unsigned int irq)
2707 { 2707 {
2708 ack_APIC_irq(); 2708 ack_APIC_irq();
2709 } 2709 }
2710 2710
2711 static void ir_ack_apic_level(unsigned int irq) 2711 static void ir_ack_apic_level(unsigned int irq)
2712 { 2712 {
2713 struct irq_desc *desc = irq_to_desc(irq); 2713 struct irq_desc *desc = irq_to_desc(irq);
2714 2714
2715 ack_APIC_irq(); 2715 ack_APIC_irq();
2716 eoi_ioapic_irq(desc); 2716 eoi_ioapic_irq(desc);
2717 } 2717 }
2718 #endif /* CONFIG_INTR_REMAP */ 2718 #endif /* CONFIG_INTR_REMAP */
2719 2719
2720 static struct irq_chip ioapic_chip __read_mostly = { 2720 static struct irq_chip ioapic_chip __read_mostly = {
2721 .name = "IO-APIC", 2721 .name = "IO-APIC",
2722 .startup = startup_ioapic_irq, 2722 .startup = startup_ioapic_irq,
2723 .mask = mask_IO_APIC_irq, 2723 .mask = mask_IO_APIC_irq,
2724 .unmask = unmask_IO_APIC_irq, 2724 .unmask = unmask_IO_APIC_irq,
2725 .ack = ack_apic_edge, 2725 .ack = ack_apic_edge,
2726 .eoi = ack_apic_level, 2726 .eoi = ack_apic_level,
2727 #ifdef CONFIG_SMP 2727 #ifdef CONFIG_SMP
2728 .set_affinity = set_ioapic_affinity_irq, 2728 .set_affinity = set_ioapic_affinity_irq,
2729 #endif 2729 #endif
2730 .retrigger = ioapic_retrigger_irq, 2730 .retrigger = ioapic_retrigger_irq,
2731 }; 2731 };
2732 2732
2733 static struct irq_chip ir_ioapic_chip __read_mostly = { 2733 static struct irq_chip ir_ioapic_chip __read_mostly = {
2734 .name = "IR-IO-APIC", 2734 .name = "IR-IO-APIC",
2735 .startup = startup_ioapic_irq, 2735 .startup = startup_ioapic_irq,
2736 .mask = mask_IO_APIC_irq, 2736 .mask = mask_IO_APIC_irq,
2737 .unmask = unmask_IO_APIC_irq, 2737 .unmask = unmask_IO_APIC_irq,
2738 #ifdef CONFIG_INTR_REMAP 2738 #ifdef CONFIG_INTR_REMAP
2739 .ack = ir_ack_apic_edge, 2739 .ack = ir_ack_apic_edge,
2740 .eoi = ir_ack_apic_level, 2740 .eoi = ir_ack_apic_level,
2741 #ifdef CONFIG_SMP 2741 #ifdef CONFIG_SMP
2742 .set_affinity = set_ir_ioapic_affinity_irq, 2742 .set_affinity = set_ir_ioapic_affinity_irq,
2743 #endif 2743 #endif
2744 #endif 2744 #endif
2745 .retrigger = ioapic_retrigger_irq, 2745 .retrigger = ioapic_retrigger_irq,
2746 }; 2746 };
2747 2747
2748 static inline void init_IO_APIC_traps(void) 2748 static inline void init_IO_APIC_traps(void)
2749 { 2749 {
2750 int irq; 2750 int irq;
2751 struct irq_desc *desc; 2751 struct irq_desc *desc;
2752 struct irq_cfg *cfg; 2752 struct irq_cfg *cfg;
2753 2753
2754 /* 2754 /*
2755 * NOTE! The local APIC isn't very good at handling 2755 * NOTE! The local APIC isn't very good at handling
2756 * multiple interrupts at the same interrupt level. 2756 * multiple interrupts at the same interrupt level.
2757 * As the interrupt level is determined by taking the 2757 * As the interrupt level is determined by taking the
2758 * vector number and shifting that right by 4, we 2758 * vector number and shifting that right by 4, we
2759 * want to spread these out a bit so that they don't 2759 * want to spread these out a bit so that they don't
2760 * all fall in the same interrupt level. 2760 * all fall in the same interrupt level.
2761 * 2761 *
2762 * Also, we've got to be careful not to trash gate 2762 * Also, we've got to be careful not to trash gate
2763 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2763 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2764 */ 2764 */
2765 for_each_irq_desc(irq, desc) { 2765 for_each_irq_desc(irq, desc) {
2766 cfg = desc->chip_data; 2766 cfg = desc->chip_data;
2767 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { 2767 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2768 /* 2768 /*
2769 * Hmm.. We don't have an entry for this, 2769 * Hmm.. We don't have an entry for this,
2770 * so default to an old-fashioned 8259 2770 * so default to an old-fashioned 8259
2771 * interrupt if we can.. 2771 * interrupt if we can..
2772 */ 2772 */
2773 if (irq < legacy_pic->nr_legacy_irqs) 2773 if (irq < legacy_pic->nr_legacy_irqs)
2774 legacy_pic->make_irq(irq); 2774 legacy_pic->make_irq(irq);
2775 else 2775 else
2776 /* Strange. Oh, well.. */ 2776 /* Strange. Oh, well.. */
2777 desc->chip = &no_irq_chip; 2777 desc->chip = &no_irq_chip;
2778 } 2778 }
2779 } 2779 }
2780 } 2780 }
2781 2781
2782 /* 2782 /*
2783 * The local APIC irq-chip implementation: 2783 * The local APIC irq-chip implementation:
2784 */ 2784 */
2785 2785
2786 static void mask_lapic_irq(unsigned int irq) 2786 static void mask_lapic_irq(unsigned int irq)
2787 { 2787 {
2788 unsigned long v; 2788 unsigned long v;
2789 2789
2790 v = apic_read(APIC_LVT0); 2790 v = apic_read(APIC_LVT0);
2791 apic_write(APIC_LVT0, v | APIC_LVT_MASKED); 2791 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
2792 } 2792 }
2793 2793
2794 static void unmask_lapic_irq(unsigned int irq) 2794 static void unmask_lapic_irq(unsigned int irq)
2795 { 2795 {
2796 unsigned long v; 2796 unsigned long v;
2797 2797
2798 v = apic_read(APIC_LVT0); 2798 v = apic_read(APIC_LVT0);
2799 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); 2799 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2800 } 2800 }
2801 2801
2802 static void ack_lapic_irq(unsigned int irq) 2802 static void ack_lapic_irq(unsigned int irq)
2803 { 2803 {
2804 ack_APIC_irq(); 2804 ack_APIC_irq();
2805 } 2805 }
2806 2806
2807 static struct irq_chip lapic_chip __read_mostly = { 2807 static struct irq_chip lapic_chip __read_mostly = {
2808 .name = "local-APIC", 2808 .name = "local-APIC",
2809 .mask = mask_lapic_irq, 2809 .mask = mask_lapic_irq,
2810 .unmask = unmask_lapic_irq, 2810 .unmask = unmask_lapic_irq,
2811 .ack = ack_lapic_irq, 2811 .ack = ack_lapic_irq,
2812 }; 2812 };
2813 2813
2814 static void lapic_register_intr(int irq, struct irq_desc *desc) 2814 static void lapic_register_intr(int irq, struct irq_desc *desc)
2815 { 2815 {
2816 desc->status &= ~IRQ_LEVEL; 2816 desc->status &= ~IRQ_LEVEL;
2817 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, 2817 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2818 "edge"); 2818 "edge");
2819 } 2819 }
2820 2820
2821 static void __init setup_nmi(void) 2821 static void __init setup_nmi(void)
2822 { 2822 {
2823 /* 2823 /*
2824 * Dirty trick to enable the NMI watchdog ... 2824 * Dirty trick to enable the NMI watchdog ...
2825 * We put the 8259A master into AEOI mode and 2825 * We put the 8259A master into AEOI mode and
2826 * unmask on all local APICs LVT0 as NMI. 2826 * unmask on all local APICs LVT0 as NMI.
2827 * 2827 *
2828 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') 2828 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2829 * is from Maciej W. Rozycki - so we do not have to EOI from 2829 * is from Maciej W. Rozycki - so we do not have to EOI from
2830 * the NMI handler or the timer interrupt. 2830 * the NMI handler or the timer interrupt.
2831 */ 2831 */
2832 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); 2832 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2833 2833
2834 enable_NMI_through_LVT0(); 2834 enable_NMI_through_LVT0();
2835 2835
2836 apic_printk(APIC_VERBOSE, " done.\n"); 2836 apic_printk(APIC_VERBOSE, " done.\n");
2837 } 2837 }
2838 2838
2839 /* 2839 /*
2840 * This looks a bit hackish but it's about the only one way of sending 2840 * This looks a bit hackish but it's about the only one way of sending
2841 * a few INTA cycles to 8259As and any associated glue logic. ICR does 2841 * a few INTA cycles to 8259As and any associated glue logic. ICR does
2842 * not support the ExtINT mode, unfortunately. We need to send these 2842 * not support the ExtINT mode, unfortunately. We need to send these
2843 * cycles as some i82489DX-based boards have glue logic that keeps the 2843 * cycles as some i82489DX-based boards have glue logic that keeps the
2844 * 8259A interrupt line asserted until INTA. --macro 2844 * 8259A interrupt line asserted until INTA. --macro
2845 */ 2845 */
2846 static inline void __init unlock_ExtINT_logic(void) 2846 static inline void __init unlock_ExtINT_logic(void)
2847 { 2847 {
2848 int apic, pin, i; 2848 int apic, pin, i;
2849 struct IO_APIC_route_entry entry0, entry1; 2849 struct IO_APIC_route_entry entry0, entry1;
2850 unsigned char save_control, save_freq_select; 2850 unsigned char save_control, save_freq_select;
2851 2851
2852 pin = find_isa_irq_pin(8, mp_INT); 2852 pin = find_isa_irq_pin(8, mp_INT);
2853 if (pin == -1) { 2853 if (pin == -1) {
2854 WARN_ON_ONCE(1); 2854 WARN_ON_ONCE(1);
2855 return; 2855 return;
2856 } 2856 }
2857 apic = find_isa_irq_apic(8, mp_INT); 2857 apic = find_isa_irq_apic(8, mp_INT);
2858 if (apic == -1) { 2858 if (apic == -1) {
2859 WARN_ON_ONCE(1); 2859 WARN_ON_ONCE(1);
2860 return; 2860 return;
2861 } 2861 }
2862 2862
2863 entry0 = ioapic_read_entry(apic, pin); 2863 entry0 = ioapic_read_entry(apic, pin);
2864 clear_IO_APIC_pin(apic, pin); 2864 clear_IO_APIC_pin(apic, pin);
2865 2865
2866 memset(&entry1, 0, sizeof(entry1)); 2866 memset(&entry1, 0, sizeof(entry1));
2867 2867
2868 entry1.dest_mode = 0; /* physical delivery */ 2868 entry1.dest_mode = 0; /* physical delivery */
2869 entry1.mask = 0; /* unmask IRQ now */ 2869 entry1.mask = 0; /* unmask IRQ now */
2870 entry1.dest = hard_smp_processor_id(); 2870 entry1.dest = hard_smp_processor_id();
2871 entry1.delivery_mode = dest_ExtINT; 2871 entry1.delivery_mode = dest_ExtINT;
2872 entry1.polarity = entry0.polarity; 2872 entry1.polarity = entry0.polarity;
2873 entry1.trigger = 0; 2873 entry1.trigger = 0;
2874 entry1.vector = 0; 2874 entry1.vector = 0;
2875 2875
2876 ioapic_write_entry(apic, pin, entry1); 2876 ioapic_write_entry(apic, pin, entry1);
2877 2877
2878 save_control = CMOS_READ(RTC_CONTROL); 2878 save_control = CMOS_READ(RTC_CONTROL);
2879 save_freq_select = CMOS_READ(RTC_FREQ_SELECT); 2879 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2880 CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, 2880 CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
2881 RTC_FREQ_SELECT); 2881 RTC_FREQ_SELECT);
2882 CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); 2882 CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
2883 2883
2884 i = 100; 2884 i = 100;
2885 while (i-- > 0) { 2885 while (i-- > 0) {
2886 mdelay(10); 2886 mdelay(10);
2887 if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) 2887 if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
2888 i -= 10; 2888 i -= 10;
2889 } 2889 }
2890 2890
2891 CMOS_WRITE(save_control, RTC_CONTROL); 2891 CMOS_WRITE(save_control, RTC_CONTROL);
2892 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); 2892 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2893 clear_IO_APIC_pin(apic, pin); 2893 clear_IO_APIC_pin(apic, pin);
2894 2894
2895 ioapic_write_entry(apic, pin, entry0); 2895 ioapic_write_entry(apic, pin, entry0);
2896 } 2896 }
2897 2897
2898 static int disable_timer_pin_1 __initdata; 2898 static int disable_timer_pin_1 __initdata;
2899 /* Actually the next is obsolete, but keep it for paranoid reasons -AK */ 2899 /* Actually the next is obsolete, but keep it for paranoid reasons -AK */
2900 static int __init disable_timer_pin_setup(char *arg) 2900 static int __init disable_timer_pin_setup(char *arg)
2901 { 2901 {
2902 disable_timer_pin_1 = 1; 2902 disable_timer_pin_1 = 1;
2903 return 0; 2903 return 0;
2904 } 2904 }
2905 early_param("disable_timer_pin_1", disable_timer_pin_setup); 2905 early_param("disable_timer_pin_1", disable_timer_pin_setup);
2906 2906
2907 int timer_through_8259 __initdata; 2907 int timer_through_8259 __initdata;
2908 2908
2909 /* 2909 /*
2910 * This code may look a bit paranoid, but it's supposed to cooperate with 2910 * This code may look a bit paranoid, but it's supposed to cooperate with
2911 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ 2911 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
2912 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast 2912 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
2913 * fanatically on his truly buggy board. 2913 * fanatically on his truly buggy board.
2914 * 2914 *
2915 * FIXME: really need to revamp this for all platforms. 2915 * FIXME: really need to revamp this for all platforms.
2916 */ 2916 */
2917 static inline void __init check_timer(void) 2917 static inline void __init check_timer(void)
2918 { 2918 {
2919 struct irq_desc *desc = irq_to_desc(0); 2919 struct irq_desc *desc = irq_to_desc(0);
2920 struct irq_cfg *cfg = desc->chip_data; 2920 struct irq_cfg *cfg = desc->chip_data;
2921 int node = cpu_to_node(0); 2921 int node = cpu_to_node(0);
2922 int apic1, pin1, apic2, pin2; 2922 int apic1, pin1, apic2, pin2;
2923 unsigned long flags; 2923 unsigned long flags;
2924 int no_pin1 = 0; 2924 int no_pin1 = 0;
2925 2925
2926 local_irq_save(flags); 2926 local_irq_save(flags);
2927 2927
2928 /* 2928 /*
2929 * get/set the timer IRQ vector: 2929 * get/set the timer IRQ vector:
2930 */ 2930 */
2931 legacy_pic->chip->mask(0); 2931 legacy_pic->mask(0);
2932 assign_irq_vector(0, cfg, apic->target_cpus()); 2932 assign_irq_vector(0, cfg, apic->target_cpus());
2933 2933
2934 /* 2934 /*
2935 * As IRQ0 is to be enabled in the 8259A, the virtual 2935 * As IRQ0 is to be enabled in the 8259A, the virtual
2936 * wire has to be disabled in the local APIC. Also 2936 * wire has to be disabled in the local APIC. Also
2937 * timer interrupts need to be acknowledged manually in 2937 * timer interrupts need to be acknowledged manually in
2938 * the 8259A for the i82489DX when using the NMI 2938 * the 8259A for the i82489DX when using the NMI
2939 * watchdog as that APIC treats NMIs as level-triggered. 2939 * watchdog as that APIC treats NMIs as level-triggered.
2940 * The AEOI mode will finish them in the 8259A 2940 * The AEOI mode will finish them in the 8259A
2941 * automatically. 2941 * automatically.
2942 */ 2942 */
2943 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2943 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2944 legacy_pic->init(1); 2944 legacy_pic->init(1);
2945 #ifdef CONFIG_X86_32 2945 #ifdef CONFIG_X86_32
2946 { 2946 {
2947 unsigned int ver; 2947 unsigned int ver;
2948 2948
2949 ver = apic_read(APIC_LVR); 2949 ver = apic_read(APIC_LVR);
2950 ver = GET_APIC_VERSION(ver); 2950 ver = GET_APIC_VERSION(ver);
2951 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); 2951 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2952 } 2952 }
2953 #endif 2953 #endif
2954 2954
2955 pin1 = find_isa_irq_pin(0, mp_INT); 2955 pin1 = find_isa_irq_pin(0, mp_INT);
2956 apic1 = find_isa_irq_apic(0, mp_INT); 2956 apic1 = find_isa_irq_apic(0, mp_INT);
2957 pin2 = ioapic_i8259.pin; 2957 pin2 = ioapic_i8259.pin;
2958 apic2 = ioapic_i8259.apic; 2958 apic2 = ioapic_i8259.apic;
2959 2959
2960 apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " 2960 apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
2961 "apic1=%d pin1=%d apic2=%d pin2=%d\n", 2961 "apic1=%d pin1=%d apic2=%d pin2=%d\n",
2962 cfg->vector, apic1, pin1, apic2, pin2); 2962 cfg->vector, apic1, pin1, apic2, pin2);
2963 2963
2964 /* 2964 /*
2965 * Some BIOS writers are clueless and report the ExtINTA 2965 * Some BIOS writers are clueless and report the ExtINTA
2966 * I/O APIC input from the cascaded 8259A as the timer 2966 * I/O APIC input from the cascaded 8259A as the timer
2967 * interrupt input. So just in case, if only one pin 2967 * interrupt input. So just in case, if only one pin
2968 * was found above, try it both directly and through the 2968 * was found above, try it both directly and through the
2969 * 8259A. 2969 * 8259A.
2970 */ 2970 */
2971 if (pin1 == -1) { 2971 if (pin1 == -1) {
2972 if (intr_remapping_enabled) 2972 if (intr_remapping_enabled)
2973 panic("BIOS bug: timer not connected to IO-APIC"); 2973 panic("BIOS bug: timer not connected to IO-APIC");
2974 pin1 = pin2; 2974 pin1 = pin2;
2975 apic1 = apic2; 2975 apic1 = apic2;
2976 no_pin1 = 1; 2976 no_pin1 = 1;
2977 } else if (pin2 == -1) { 2977 } else if (pin2 == -1) {
2978 pin2 = pin1; 2978 pin2 = pin1;
2979 apic2 = apic1; 2979 apic2 = apic1;
2980 } 2980 }
2981 2981
2982 if (pin1 != -1) { 2982 if (pin1 != -1) {
2983 /* 2983 /*
2984 * Ok, does IRQ0 through the IOAPIC work? 2984 * Ok, does IRQ0 through the IOAPIC work?
2985 */ 2985 */
2986 if (no_pin1) { 2986 if (no_pin1) {
2987 add_pin_to_irq_node(cfg, node, apic1, pin1); 2987 add_pin_to_irq_node(cfg, node, apic1, pin1);
2988 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2988 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2989 } else { 2989 } else {
2990 /* for edge trigger, setup_IO_APIC_irq already 2990 /* for edge trigger, setup_IO_APIC_irq already
2991 * leave it unmasked. 2991 * leave it unmasked.
2992 * so only need to unmask if it is level-trigger 2992 * so only need to unmask if it is level-trigger
2993 * do we really have level trigger timer? 2993 * do we really have level trigger timer?
2994 */ 2994 */
2995 int idx; 2995 int idx;
2996 idx = find_irq_entry(apic1, pin1, mp_INT); 2996 idx = find_irq_entry(apic1, pin1, mp_INT);
2997 if (idx != -1 && irq_trigger(idx)) 2997 if (idx != -1 && irq_trigger(idx))
2998 unmask_IO_APIC_irq_desc(desc); 2998 unmask_IO_APIC_irq_desc(desc);
2999 } 2999 }
3000 if (timer_irq_works()) { 3000 if (timer_irq_works()) {
3001 if (nmi_watchdog == NMI_IO_APIC) { 3001 if (nmi_watchdog == NMI_IO_APIC) {
3002 setup_nmi(); 3002 setup_nmi();
3003 legacy_pic->chip->unmask(0); 3003 legacy_pic->unmask(0);
3004 } 3004 }
3005 if (disable_timer_pin_1 > 0) 3005 if (disable_timer_pin_1 > 0)
3006 clear_IO_APIC_pin(0, pin1); 3006 clear_IO_APIC_pin(0, pin1);
3007 goto out; 3007 goto out;
3008 } 3008 }
3009 if (intr_remapping_enabled) 3009 if (intr_remapping_enabled)
3010 panic("timer doesn't work through Interrupt-remapped IO-APIC"); 3010 panic("timer doesn't work through Interrupt-remapped IO-APIC");
3011 local_irq_disable(); 3011 local_irq_disable();
3012 clear_IO_APIC_pin(apic1, pin1); 3012 clear_IO_APIC_pin(apic1, pin1);
3013 if (!no_pin1) 3013 if (!no_pin1)
3014 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 3014 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
3015 "8254 timer not connected to IO-APIC\n"); 3015 "8254 timer not connected to IO-APIC\n");
3016 3016
3017 apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " 3017 apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
3018 "(IRQ0) through the 8259A ...\n"); 3018 "(IRQ0) through the 8259A ...\n");
3019 apic_printk(APIC_QUIET, KERN_INFO 3019 apic_printk(APIC_QUIET, KERN_INFO
3020 "..... (found apic %d pin %d) ...\n", apic2, pin2); 3020 "..... (found apic %d pin %d) ...\n", apic2, pin2);
3021 /* 3021 /*
3022 * legacy devices should be connected to IO APIC #0 3022 * legacy devices should be connected to IO APIC #0
3023 */ 3023 */
3024 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); 3024 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
3025 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 3025 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
3026 legacy_pic->chip->unmask(0); 3026 legacy_pic->unmask(0);
3027 if (timer_irq_works()) { 3027 if (timer_irq_works()) {
3028 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 3028 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
3029 timer_through_8259 = 1; 3029 timer_through_8259 = 1;
3030 if (nmi_watchdog == NMI_IO_APIC) { 3030 if (nmi_watchdog == NMI_IO_APIC) {
3031 legacy_pic->chip->mask(0); 3031 legacy_pic->mask(0);
3032 setup_nmi(); 3032 setup_nmi();
3033 legacy_pic->chip->unmask(0); 3033 legacy_pic->unmask(0);
3034 } 3034 }
3035 goto out; 3035 goto out;
3036 } 3036 }
3037 /* 3037 /*
3038 * Cleanup, just in case ... 3038 * Cleanup, just in case ...
3039 */ 3039 */
3040 local_irq_disable(); 3040 local_irq_disable();
3041 legacy_pic->chip->mask(0); 3041 legacy_pic->mask(0);
3042 clear_IO_APIC_pin(apic2, pin2); 3042 clear_IO_APIC_pin(apic2, pin2);
3043 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 3043 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
3044 } 3044 }
3045 3045
3046 if (nmi_watchdog == NMI_IO_APIC) { 3046 if (nmi_watchdog == NMI_IO_APIC) {
3047 apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " 3047 apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
3048 "through the IO-APIC - disabling NMI Watchdog!\n"); 3048 "through the IO-APIC - disabling NMI Watchdog!\n");
3049 nmi_watchdog = NMI_NONE; 3049 nmi_watchdog = NMI_NONE;
3050 } 3050 }
3051 #ifdef CONFIG_X86_32 3051 #ifdef CONFIG_X86_32
3052 timer_ack = 0; 3052 timer_ack = 0;
3053 #endif 3053 #endif
3054 3054
3055 apic_printk(APIC_QUIET, KERN_INFO 3055 apic_printk(APIC_QUIET, KERN_INFO
3056 "...trying to set up timer as Virtual Wire IRQ...\n"); 3056 "...trying to set up timer as Virtual Wire IRQ...\n");
3057 3057
3058 lapic_register_intr(0, desc); 3058 lapic_register_intr(0, desc);
3059 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 3059 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
3060 legacy_pic->chip->unmask(0); 3060 legacy_pic->unmask(0);
3061 3061
3062 if (timer_irq_works()) { 3062 if (timer_irq_works()) {
3063 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); 3063 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
3064 goto out; 3064 goto out;
3065 } 3065 }
3066 local_irq_disable(); 3066 local_irq_disable();
3067 legacy_pic->chip->mask(0); 3067 legacy_pic->mask(0);
3068 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); 3068 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
3069 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); 3069 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
3070 3070
3071 apic_printk(APIC_QUIET, KERN_INFO 3071 apic_printk(APIC_QUIET, KERN_INFO
3072 "...trying to set up timer as ExtINT IRQ...\n"); 3072 "...trying to set up timer as ExtINT IRQ...\n");
3073 3073
3074 legacy_pic->init(0); 3074 legacy_pic->init(0);
3075 legacy_pic->make_irq(0); 3075 legacy_pic->make_irq(0);
3076 apic_write(APIC_LVT0, APIC_DM_EXTINT); 3076 apic_write(APIC_LVT0, APIC_DM_EXTINT);
3077 3077
3078 unlock_ExtINT_logic(); 3078 unlock_ExtINT_logic();
3079 3079
3080 if (timer_irq_works()) { 3080 if (timer_irq_works()) {
3081 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); 3081 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
3082 goto out; 3082 goto out;
3083 } 3083 }
3084 local_irq_disable(); 3084 local_irq_disable();
3085 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); 3085 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
3086 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " 3086 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
3087 "report. Then try booting with the 'noapic' option.\n"); 3087 "report. Then try booting with the 'noapic' option.\n");
3088 out: 3088 out:
3089 local_irq_restore(flags); 3089 local_irq_restore(flags);
3090 } 3090 }
3091 3091
3092 /* 3092 /*
3093 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available 3093 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
3094 * to devices. However there may be an I/O APIC pin available for 3094 * to devices. However there may be an I/O APIC pin available for
3095 * this interrupt regardless. The pin may be left unconnected, but 3095 * this interrupt regardless. The pin may be left unconnected, but
3096 * typically it will be reused as an ExtINT cascade interrupt for 3096 * typically it will be reused as an ExtINT cascade interrupt for
3097 * the master 8259A. In the MPS case such a pin will normally be 3097 * the master 8259A. In the MPS case such a pin will normally be
3098 * reported as an ExtINT interrupt in the MP table. With ACPI 3098 * reported as an ExtINT interrupt in the MP table. With ACPI
3099 * there is no provision for ExtINT interrupts, and in the absence 3099 * there is no provision for ExtINT interrupts, and in the absence
3100 * of an override it would be treated as an ordinary ISA I/O APIC 3100 * of an override it would be treated as an ordinary ISA I/O APIC
3101 * interrupt, that is edge-triggered and unmasked by default. We 3101 * interrupt, that is edge-triggered and unmasked by default. We
3102 * used to do this, but it caused problems on some systems because 3102 * used to do this, but it caused problems on some systems because
3103 * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using 3103 * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
3104 * the same ExtINT cascade interrupt to drive the local APIC of the 3104 * the same ExtINT cascade interrupt to drive the local APIC of the
3105 * bootstrap processor. Therefore we refrain from routing IRQ2 to 3105 * bootstrap processor. Therefore we refrain from routing IRQ2 to
3106 * the I/O APIC in all cases now. No actual device should request 3106 * the I/O APIC in all cases now. No actual device should request
3107 * it anyway. --macro 3107 * it anyway. --macro
3108 */ 3108 */
3109 #define PIC_IRQS (1UL << PIC_CASCADE_IR) 3109 #define PIC_IRQS (1UL << PIC_CASCADE_IR)
3110 3110
3111 void __init setup_IO_APIC(void) 3111 void __init setup_IO_APIC(void)
3112 { 3112 {
3113 3113
3114 /* 3114 /*
3115 * calling enable_IO_APIC() is moved to setup_local_APIC for BP 3115 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
3116 */ 3116 */
3117 io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL; 3117 io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
3118 3118
3119 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); 3119 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
3120 /* 3120 /*
3121 * Set up IO-APIC IRQ routing. 3121 * Set up IO-APIC IRQ routing.
3122 */ 3122 */
3123 x86_init.mpparse.setup_ioapic_ids(); 3123 x86_init.mpparse.setup_ioapic_ids();
3124 3124
3125 sync_Arb_IDs(); 3125 sync_Arb_IDs();
3126 setup_IO_APIC_irqs(); 3126 setup_IO_APIC_irqs();
3127 init_IO_APIC_traps(); 3127 init_IO_APIC_traps();
3128 if (legacy_pic->nr_legacy_irqs) 3128 if (legacy_pic->nr_legacy_irqs)
3129 check_timer(); 3129 check_timer();
3130 } 3130 }
3131 3131
3132 /* 3132 /*
3133 * Called after all the initialization is done. If we didnt find any 3133 * Called after all the initialization is done. If we didnt find any
3134 * APIC bugs then we can allow the modify fast path 3134 * APIC bugs then we can allow the modify fast path
3135 */ 3135 */
3136 3136
3137 static int __init io_apic_bug_finalize(void) 3137 static int __init io_apic_bug_finalize(void)
3138 { 3138 {
3139 if (sis_apic_bug == -1) 3139 if (sis_apic_bug == -1)
3140 sis_apic_bug = 0; 3140 sis_apic_bug = 0;
3141 return 0; 3141 return 0;
3142 } 3142 }
3143 3143
3144 late_initcall(io_apic_bug_finalize); 3144 late_initcall(io_apic_bug_finalize);
3145 3145
3146 struct sysfs_ioapic_data { 3146 struct sysfs_ioapic_data {
3147 struct sys_device dev; 3147 struct sys_device dev;
3148 struct IO_APIC_route_entry entry[0]; 3148 struct IO_APIC_route_entry entry[0];
3149 }; 3149 };
3150 static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; 3150 static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
3151 3151
3152 static int ioapic_suspend(struct sys_device *dev, pm_message_t state) 3152 static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
3153 { 3153 {
3154 struct IO_APIC_route_entry *entry; 3154 struct IO_APIC_route_entry *entry;
3155 struct sysfs_ioapic_data *data; 3155 struct sysfs_ioapic_data *data;
3156 int i; 3156 int i;
3157 3157
3158 data = container_of(dev, struct sysfs_ioapic_data, dev); 3158 data = container_of(dev, struct sysfs_ioapic_data, dev);
3159 entry = data->entry; 3159 entry = data->entry;
3160 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) 3160 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
3161 *entry = ioapic_read_entry(dev->id, i); 3161 *entry = ioapic_read_entry(dev->id, i);
3162 3162
3163 return 0; 3163 return 0;
3164 } 3164 }
3165 3165
3166 static int ioapic_resume(struct sys_device *dev) 3166 static int ioapic_resume(struct sys_device *dev)
3167 { 3167 {
3168 struct IO_APIC_route_entry *entry; 3168 struct IO_APIC_route_entry *entry;
3169 struct sysfs_ioapic_data *data; 3169 struct sysfs_ioapic_data *data;
3170 unsigned long flags; 3170 unsigned long flags;
3171 union IO_APIC_reg_00 reg_00; 3171 union IO_APIC_reg_00 reg_00;
3172 int i; 3172 int i;
3173 3173
3174 data = container_of(dev, struct sysfs_ioapic_data, dev); 3174 data = container_of(dev, struct sysfs_ioapic_data, dev);
3175 entry = data->entry; 3175 entry = data->entry;
3176 3176
3177 raw_spin_lock_irqsave(&ioapic_lock, flags); 3177 raw_spin_lock_irqsave(&ioapic_lock, flags);
3178 reg_00.raw = io_apic_read(dev->id, 0); 3178 reg_00.raw = io_apic_read(dev->id, 0);
3179 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { 3179 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
3180 reg_00.bits.ID = mp_ioapics[dev->id].apicid; 3180 reg_00.bits.ID = mp_ioapics[dev->id].apicid;
3181 io_apic_write(dev->id, 0, reg_00.raw); 3181 io_apic_write(dev->id, 0, reg_00.raw);
3182 } 3182 }
3183 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 3183 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3184 for (i = 0; i < nr_ioapic_registers[dev->id]; i++) 3184 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
3185 ioapic_write_entry(dev->id, i, entry[i]); 3185 ioapic_write_entry(dev->id, i, entry[i]);
3186 3186
3187 return 0; 3187 return 0;
3188 } 3188 }
3189 3189
3190 static struct sysdev_class ioapic_sysdev_class = { 3190 static struct sysdev_class ioapic_sysdev_class = {
3191 .name = "ioapic", 3191 .name = "ioapic",
3192 .suspend = ioapic_suspend, 3192 .suspend = ioapic_suspend,
3193 .resume = ioapic_resume, 3193 .resume = ioapic_resume,
3194 }; 3194 };
3195 3195
3196 static int __init ioapic_init_sysfs(void) 3196 static int __init ioapic_init_sysfs(void)
3197 { 3197 {
3198 struct sys_device * dev; 3198 struct sys_device * dev;
3199 int i, size, error; 3199 int i, size, error;
3200 3200
3201 error = sysdev_class_register(&ioapic_sysdev_class); 3201 error = sysdev_class_register(&ioapic_sysdev_class);
3202 if (error) 3202 if (error)
3203 return error; 3203 return error;
3204 3204
3205 for (i = 0; i < nr_ioapics; i++ ) { 3205 for (i = 0; i < nr_ioapics; i++ ) {
3206 size = sizeof(struct sys_device) + nr_ioapic_registers[i] 3206 size = sizeof(struct sys_device) + nr_ioapic_registers[i]
3207 * sizeof(struct IO_APIC_route_entry); 3207 * sizeof(struct IO_APIC_route_entry);
3208 mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); 3208 mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
3209 if (!mp_ioapic_data[i]) { 3209 if (!mp_ioapic_data[i]) {
3210 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); 3210 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
3211 continue; 3211 continue;
3212 } 3212 }
3213 dev = &mp_ioapic_data[i]->dev; 3213 dev = &mp_ioapic_data[i]->dev;
3214 dev->id = i; 3214 dev->id = i;
3215 dev->cls = &ioapic_sysdev_class; 3215 dev->cls = &ioapic_sysdev_class;
3216 error = sysdev_register(dev); 3216 error = sysdev_register(dev);
3217 if (error) { 3217 if (error) {
3218 kfree(mp_ioapic_data[i]); 3218 kfree(mp_ioapic_data[i]);
3219 mp_ioapic_data[i] = NULL; 3219 mp_ioapic_data[i] = NULL;
3220 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); 3220 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
3221 continue; 3221 continue;
3222 } 3222 }
3223 } 3223 }
3224 3224
3225 return 0; 3225 return 0;
3226 } 3226 }
3227 3227
3228 device_initcall(ioapic_init_sysfs); 3228 device_initcall(ioapic_init_sysfs);
3229 3229
3230 /* 3230 /*
3231 * Dynamic irq allocate and deallocation 3231 * Dynamic irq allocate and deallocation
3232 */ 3232 */
3233 unsigned int create_irq_nr(unsigned int irq_want, int node) 3233 unsigned int create_irq_nr(unsigned int irq_want, int node)
3234 { 3234 {
3235 /* Allocate an unused irq */ 3235 /* Allocate an unused irq */
3236 unsigned int irq; 3236 unsigned int irq;
3237 unsigned int new; 3237 unsigned int new;
3238 unsigned long flags; 3238 unsigned long flags;
3239 struct irq_cfg *cfg_new = NULL; 3239 struct irq_cfg *cfg_new = NULL;
3240 struct irq_desc *desc_new = NULL; 3240 struct irq_desc *desc_new = NULL;
3241 3241
3242 irq = 0; 3242 irq = 0;
3243 if (irq_want < nr_irqs_gsi) 3243 if (irq_want < nr_irqs_gsi)
3244 irq_want = nr_irqs_gsi; 3244 irq_want = nr_irqs_gsi;
3245 3245
3246 raw_spin_lock_irqsave(&vector_lock, flags); 3246 raw_spin_lock_irqsave(&vector_lock, flags);
3247 for (new = irq_want; new < nr_irqs; new++) { 3247 for (new = irq_want; new < nr_irqs; new++) {
3248 desc_new = irq_to_desc_alloc_node(new, node); 3248 desc_new = irq_to_desc_alloc_node(new, node);
3249 if (!desc_new) { 3249 if (!desc_new) {
3250 printk(KERN_INFO "can not get irq_desc for %d\n", new); 3250 printk(KERN_INFO "can not get irq_desc for %d\n", new);
3251 continue; 3251 continue;
3252 } 3252 }
3253 cfg_new = desc_new->chip_data; 3253 cfg_new = desc_new->chip_data;
3254 3254
3255 if (cfg_new->vector != 0) 3255 if (cfg_new->vector != 0)
3256 continue; 3256 continue;
3257 3257
3258 desc_new = move_irq_desc(desc_new, node); 3258 desc_new = move_irq_desc(desc_new, node);
3259 cfg_new = desc_new->chip_data; 3259 cfg_new = desc_new->chip_data;
3260 3260
3261 if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) 3261 if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
3262 irq = new; 3262 irq = new;
3263 break; 3263 break;
3264 } 3264 }
3265 raw_spin_unlock_irqrestore(&vector_lock, flags); 3265 raw_spin_unlock_irqrestore(&vector_lock, flags);
3266 3266
3267 if (irq > 0) 3267 if (irq > 0)
3268 dynamic_irq_init_keep_chip_data(irq); 3268 dynamic_irq_init_keep_chip_data(irq);
3269 3269
3270 return irq; 3270 return irq;
3271 } 3271 }
3272 3272
3273 int create_irq(void) 3273 int create_irq(void)
3274 { 3274 {
3275 int node = cpu_to_node(0); 3275 int node = cpu_to_node(0);
3276 unsigned int irq_want; 3276 unsigned int irq_want;
3277 int irq; 3277 int irq;
3278 3278
3279 irq_want = nr_irqs_gsi; 3279 irq_want = nr_irqs_gsi;
3280 irq = create_irq_nr(irq_want, node); 3280 irq = create_irq_nr(irq_want, node);
3281 3281
3282 if (irq == 0) 3282 if (irq == 0)
3283 irq = -1; 3283 irq = -1;
3284 3284
3285 return irq; 3285 return irq;
3286 } 3286 }
3287 3287
3288 void destroy_irq(unsigned int irq) 3288 void destroy_irq(unsigned int irq)
3289 { 3289 {
3290 unsigned long flags; 3290 unsigned long flags;
3291 3291
3292 dynamic_irq_cleanup_keep_chip_data(irq); 3292 dynamic_irq_cleanup_keep_chip_data(irq);
3293 3293
3294 free_irte(irq); 3294 free_irte(irq);
3295 raw_spin_lock_irqsave(&vector_lock, flags); 3295 raw_spin_lock_irqsave(&vector_lock, flags);
3296 __clear_irq_vector(irq, get_irq_chip_data(irq)); 3296 __clear_irq_vector(irq, get_irq_chip_data(irq));
3297 raw_spin_unlock_irqrestore(&vector_lock, flags); 3297 raw_spin_unlock_irqrestore(&vector_lock, flags);
3298 } 3298 }
3299 3299
3300 /* 3300 /*
3301 * MSI message composition 3301 * MSI message composition
3302 */ 3302 */
3303 #ifdef CONFIG_PCI_MSI 3303 #ifdef CONFIG_PCI_MSI
3304 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, 3304 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3305 struct msi_msg *msg, u8 hpet_id) 3305 struct msi_msg *msg, u8 hpet_id)
3306 { 3306 {
3307 struct irq_cfg *cfg; 3307 struct irq_cfg *cfg;
3308 int err; 3308 int err;
3309 unsigned dest; 3309 unsigned dest;
3310 3310
3311 if (disable_apic) 3311 if (disable_apic)
3312 return -ENXIO; 3312 return -ENXIO;
3313 3313
3314 cfg = irq_cfg(irq); 3314 cfg = irq_cfg(irq);
3315 err = assign_irq_vector(irq, cfg, apic->target_cpus()); 3315 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3316 if (err) 3316 if (err)
3317 return err; 3317 return err;
3318 3318
3319 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); 3319 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
3320 3320
3321 if (irq_remapped(irq)) { 3321 if (irq_remapped(irq)) {
3322 struct irte irte; 3322 struct irte irte;
3323 int ir_index; 3323 int ir_index;
3324 u16 sub_handle; 3324 u16 sub_handle;
3325 3325
3326 ir_index = map_irq_to_irte_handle(irq, &sub_handle); 3326 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
3327 BUG_ON(ir_index == -1); 3327 BUG_ON(ir_index == -1);
3328 3328
3329 prepare_irte(&irte, cfg->vector, dest); 3329 prepare_irte(&irte, cfg->vector, dest);
3330 3330
3331 /* Set source-id of interrupt request */ 3331 /* Set source-id of interrupt request */
3332 if (pdev) 3332 if (pdev)
3333 set_msi_sid(&irte, pdev); 3333 set_msi_sid(&irte, pdev);
3334 else 3334 else
3335 set_hpet_sid(&irte, hpet_id); 3335 set_hpet_sid(&irte, hpet_id);
3336 3336
3337 modify_irte(irq, &irte); 3337 modify_irte(irq, &irte);
3338 3338
3339 msg->address_hi = MSI_ADDR_BASE_HI; 3339 msg->address_hi = MSI_ADDR_BASE_HI;
3340 msg->data = sub_handle; 3340 msg->data = sub_handle;
3341 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | 3341 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
3342 MSI_ADDR_IR_SHV | 3342 MSI_ADDR_IR_SHV |
3343 MSI_ADDR_IR_INDEX1(ir_index) | 3343 MSI_ADDR_IR_INDEX1(ir_index) |
3344 MSI_ADDR_IR_INDEX2(ir_index); 3344 MSI_ADDR_IR_INDEX2(ir_index);
3345 } else { 3345 } else {
3346 if (x2apic_enabled()) 3346 if (x2apic_enabled())
3347 msg->address_hi = MSI_ADDR_BASE_HI | 3347 msg->address_hi = MSI_ADDR_BASE_HI |
3348 MSI_ADDR_EXT_DEST_ID(dest); 3348 MSI_ADDR_EXT_DEST_ID(dest);
3349 else 3349 else
3350 msg->address_hi = MSI_ADDR_BASE_HI; 3350 msg->address_hi = MSI_ADDR_BASE_HI;
3351 3351
3352 msg->address_lo = 3352 msg->address_lo =
3353 MSI_ADDR_BASE_LO | 3353 MSI_ADDR_BASE_LO |
3354 ((apic->irq_dest_mode == 0) ? 3354 ((apic->irq_dest_mode == 0) ?
3355 MSI_ADDR_DEST_MODE_PHYSICAL: 3355 MSI_ADDR_DEST_MODE_PHYSICAL:
3356 MSI_ADDR_DEST_MODE_LOGICAL) | 3356 MSI_ADDR_DEST_MODE_LOGICAL) |
3357 ((apic->irq_delivery_mode != dest_LowestPrio) ? 3357 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3358 MSI_ADDR_REDIRECTION_CPU: 3358 MSI_ADDR_REDIRECTION_CPU:
3359 MSI_ADDR_REDIRECTION_LOWPRI) | 3359 MSI_ADDR_REDIRECTION_LOWPRI) |
3360 MSI_ADDR_DEST_ID(dest); 3360 MSI_ADDR_DEST_ID(dest);
3361 3361
3362 msg->data = 3362 msg->data =
3363 MSI_DATA_TRIGGER_EDGE | 3363 MSI_DATA_TRIGGER_EDGE |
3364 MSI_DATA_LEVEL_ASSERT | 3364 MSI_DATA_LEVEL_ASSERT |
3365 ((apic->irq_delivery_mode != dest_LowestPrio) ? 3365 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3366 MSI_DATA_DELIVERY_FIXED: 3366 MSI_DATA_DELIVERY_FIXED:
3367 MSI_DATA_DELIVERY_LOWPRI) | 3367 MSI_DATA_DELIVERY_LOWPRI) |
3368 MSI_DATA_VECTOR(cfg->vector); 3368 MSI_DATA_VECTOR(cfg->vector);
3369 } 3369 }
3370 return err; 3370 return err;
3371 } 3371 }
3372 3372
3373 #ifdef CONFIG_SMP 3373 #ifdef CONFIG_SMP
3374 static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) 3374 static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3375 { 3375 {
3376 struct irq_desc *desc = irq_to_desc(irq); 3376 struct irq_desc *desc = irq_to_desc(irq);
3377 struct irq_cfg *cfg; 3377 struct irq_cfg *cfg;
3378 struct msi_msg msg; 3378 struct msi_msg msg;
3379 unsigned int dest; 3379 unsigned int dest;
3380 3380
3381 if (set_desc_affinity(desc, mask, &dest)) 3381 if (set_desc_affinity(desc, mask, &dest))
3382 return -1; 3382 return -1;
3383 3383
3384 cfg = desc->chip_data; 3384 cfg = desc->chip_data;
3385 3385
3386 __get_cached_msi_msg(desc->irq_data.msi_desc, &msg); 3386 __get_cached_msi_msg(desc->irq_data.msi_desc, &msg);
3387 3387
3388 msg.data &= ~MSI_DATA_VECTOR_MASK; 3388 msg.data &= ~MSI_DATA_VECTOR_MASK;
3389 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3389 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3390 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3390 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3391 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3391 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3392 3392
3393 __write_msi_msg(desc->irq_data.msi_desc, &msg); 3393 __write_msi_msg(desc->irq_data.msi_desc, &msg);
3394 3394
3395 return 0; 3395 return 0;
3396 } 3396 }
3397 #ifdef CONFIG_INTR_REMAP 3397 #ifdef CONFIG_INTR_REMAP
3398 /* 3398 /*
3399 * Migrate the MSI irq to another cpumask. This migration is 3399 * Migrate the MSI irq to another cpumask. This migration is
3400 * done in the process context using interrupt-remapping hardware. 3400 * done in the process context using interrupt-remapping hardware.
3401 */ 3401 */
3402 static int 3402 static int
3403 ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) 3403 ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3404 { 3404 {
3405 struct irq_desc *desc = irq_to_desc(irq); 3405 struct irq_desc *desc = irq_to_desc(irq);
3406 struct irq_cfg *cfg = desc->chip_data; 3406 struct irq_cfg *cfg = desc->chip_data;
3407 unsigned int dest; 3407 unsigned int dest;
3408 struct irte irte; 3408 struct irte irte;
3409 3409
3410 if (get_irte(irq, &irte)) 3410 if (get_irte(irq, &irte))
3411 return -1; 3411 return -1;
3412 3412
3413 if (set_desc_affinity(desc, mask, &dest)) 3413 if (set_desc_affinity(desc, mask, &dest))
3414 return -1; 3414 return -1;
3415 3415
3416 irte.vector = cfg->vector; 3416 irte.vector = cfg->vector;
3417 irte.dest_id = IRTE_DEST(dest); 3417 irte.dest_id = IRTE_DEST(dest);
3418 3418
3419 /* 3419 /*
3420 * atomically update the IRTE with the new destination and vector. 3420 * atomically update the IRTE with the new destination and vector.
3421 */ 3421 */
3422 modify_irte(irq, &irte); 3422 modify_irte(irq, &irte);
3423 3423
3424 /* 3424 /*
3425 * After this point, all the interrupts will start arriving 3425 * After this point, all the interrupts will start arriving
3426 * at the new destination. So, time to cleanup the previous 3426 * at the new destination. So, time to cleanup the previous
3427 * vector allocation. 3427 * vector allocation.
3428 */ 3428 */
3429 if (cfg->move_in_progress) 3429 if (cfg->move_in_progress)
3430 send_cleanup_vector(cfg); 3430 send_cleanup_vector(cfg);
3431 3431
3432 return 0; 3432 return 0;
3433 } 3433 }
3434 3434
3435 #endif 3435 #endif
3436 #endif /* CONFIG_SMP */ 3436 #endif /* CONFIG_SMP */
3437 3437
3438 /* 3438 /*
3439 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, 3439 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
3440 * which implement the MSI or MSI-X Capability Structure. 3440 * which implement the MSI or MSI-X Capability Structure.
3441 */ 3441 */
3442 static struct irq_chip msi_chip = { 3442 static struct irq_chip msi_chip = {
3443 .name = "PCI-MSI", 3443 .name = "PCI-MSI",
3444 .irq_unmask = unmask_msi_irq, 3444 .irq_unmask = unmask_msi_irq,
3445 .irq_mask = mask_msi_irq, 3445 .irq_mask = mask_msi_irq,
3446 .ack = ack_apic_edge, 3446 .ack = ack_apic_edge,
3447 #ifdef CONFIG_SMP 3447 #ifdef CONFIG_SMP
3448 .set_affinity = set_msi_irq_affinity, 3448 .set_affinity = set_msi_irq_affinity,
3449 #endif 3449 #endif
3450 .retrigger = ioapic_retrigger_irq, 3450 .retrigger = ioapic_retrigger_irq,
3451 }; 3451 };
3452 3452
3453 static struct irq_chip msi_ir_chip = { 3453 static struct irq_chip msi_ir_chip = {
3454 .name = "IR-PCI-MSI", 3454 .name = "IR-PCI-MSI",
3455 .irq_unmask = unmask_msi_irq, 3455 .irq_unmask = unmask_msi_irq,
3456 .irq_mask = mask_msi_irq, 3456 .irq_mask = mask_msi_irq,
3457 #ifdef CONFIG_INTR_REMAP 3457 #ifdef CONFIG_INTR_REMAP
3458 .ack = ir_ack_apic_edge, 3458 .ack = ir_ack_apic_edge,
3459 #ifdef CONFIG_SMP 3459 #ifdef CONFIG_SMP
3460 .set_affinity = ir_set_msi_irq_affinity, 3460 .set_affinity = ir_set_msi_irq_affinity,
3461 #endif 3461 #endif
3462 #endif 3462 #endif
3463 .retrigger = ioapic_retrigger_irq, 3463 .retrigger = ioapic_retrigger_irq,
3464 }; 3464 };
3465 3465
3466 /* 3466 /*
3467 * Map the PCI dev to the corresponding remapping hardware unit 3467 * Map the PCI dev to the corresponding remapping hardware unit
3468 * and allocate 'nvec' consecutive interrupt-remapping table entries 3468 * and allocate 'nvec' consecutive interrupt-remapping table entries
3469 * in it. 3469 * in it.
3470 */ 3470 */
3471 static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) 3471 static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3472 { 3472 {
3473 struct intel_iommu *iommu; 3473 struct intel_iommu *iommu;
3474 int index; 3474 int index;
3475 3475
3476 iommu = map_dev_to_ir(dev); 3476 iommu = map_dev_to_ir(dev);
3477 if (!iommu) { 3477 if (!iommu) {
3478 printk(KERN_ERR 3478 printk(KERN_ERR
3479 "Unable to map PCI %s to iommu\n", pci_name(dev)); 3479 "Unable to map PCI %s to iommu\n", pci_name(dev));
3480 return -ENOENT; 3480 return -ENOENT;
3481 } 3481 }
3482 3482
3483 index = alloc_irte(iommu, irq, nvec); 3483 index = alloc_irte(iommu, irq, nvec);
3484 if (index < 0) { 3484 if (index < 0) {
3485 printk(KERN_ERR 3485 printk(KERN_ERR
3486 "Unable to allocate %d IRTE for PCI %s\n", nvec, 3486 "Unable to allocate %d IRTE for PCI %s\n", nvec,
3487 pci_name(dev)); 3487 pci_name(dev));
3488 return -ENOSPC; 3488 return -ENOSPC;
3489 } 3489 }
3490 return index; 3490 return index;
3491 } 3491 }
3492 3492
3493 static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 3493 static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3494 { 3494 {
3495 int ret; 3495 int ret;
3496 struct msi_msg msg; 3496 struct msi_msg msg;
3497 3497
3498 ret = msi_compose_msg(dev, irq, &msg, -1); 3498 ret = msi_compose_msg(dev, irq, &msg, -1);
3499 if (ret < 0) 3499 if (ret < 0)
3500 return ret; 3500 return ret;
3501 3501
3502 set_irq_msi(irq, msidesc); 3502 set_irq_msi(irq, msidesc);
3503 write_msi_msg(irq, &msg); 3503 write_msi_msg(irq, &msg);
3504 3504
3505 if (irq_remapped(irq)) { 3505 if (irq_remapped(irq)) {
3506 struct irq_desc *desc = irq_to_desc(irq); 3506 struct irq_desc *desc = irq_to_desc(irq);
3507 /* 3507 /*
3508 * irq migration in process context 3508 * irq migration in process context
3509 */ 3509 */
3510 desc->status |= IRQ_MOVE_PCNTXT; 3510 desc->status |= IRQ_MOVE_PCNTXT;
3511 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); 3511 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
3512 } else 3512 } else
3513 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 3513 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
3514 3514
3515 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); 3515 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
3516 3516
3517 return 0; 3517 return 0;
3518 } 3518 }
3519 3519
3520 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3520 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3521 { 3521 {
3522 unsigned int irq; 3522 unsigned int irq;
3523 int ret, sub_handle; 3523 int ret, sub_handle;
3524 struct msi_desc *msidesc; 3524 struct msi_desc *msidesc;
3525 unsigned int irq_want; 3525 unsigned int irq_want;
3526 struct intel_iommu *iommu = NULL; 3526 struct intel_iommu *iommu = NULL;
3527 int index = 0; 3527 int index = 0;
3528 int node; 3528 int node;
3529 3529
3530 /* x86 doesn't support multiple MSI yet */ 3530 /* x86 doesn't support multiple MSI yet */
3531 if (type == PCI_CAP_ID_MSI && nvec > 1) 3531 if (type == PCI_CAP_ID_MSI && nvec > 1)
3532 return 1; 3532 return 1;
3533 3533
3534 node = dev_to_node(&dev->dev); 3534 node = dev_to_node(&dev->dev);
3535 irq_want = nr_irqs_gsi; 3535 irq_want = nr_irqs_gsi;
3536 sub_handle = 0; 3536 sub_handle = 0;
3537 list_for_each_entry(msidesc, &dev->msi_list, list) { 3537 list_for_each_entry(msidesc, &dev->msi_list, list) {
3538 irq = create_irq_nr(irq_want, node); 3538 irq = create_irq_nr(irq_want, node);
3539 if (irq == 0) 3539 if (irq == 0)
3540 return -1; 3540 return -1;
3541 irq_want = irq + 1; 3541 irq_want = irq + 1;
3542 if (!intr_remapping_enabled) 3542 if (!intr_remapping_enabled)
3543 goto no_ir; 3543 goto no_ir;
3544 3544
3545 if (!sub_handle) { 3545 if (!sub_handle) {
3546 /* 3546 /*
3547 * allocate the consecutive block of IRTE's 3547 * allocate the consecutive block of IRTE's
3548 * for 'nvec' 3548 * for 'nvec'
3549 */ 3549 */
3550 index = msi_alloc_irte(dev, irq, nvec); 3550 index = msi_alloc_irte(dev, irq, nvec);
3551 if (index < 0) { 3551 if (index < 0) {
3552 ret = index; 3552 ret = index;
3553 goto error; 3553 goto error;
3554 } 3554 }
3555 } else { 3555 } else {
3556 iommu = map_dev_to_ir(dev); 3556 iommu = map_dev_to_ir(dev);
3557 if (!iommu) { 3557 if (!iommu) {
3558 ret = -ENOENT; 3558 ret = -ENOENT;
3559 goto error; 3559 goto error;
3560 } 3560 }
3561 /* 3561 /*
3562 * setup the mapping between the irq and the IRTE 3562 * setup the mapping between the irq and the IRTE
3563 * base index, the sub_handle pointing to the 3563 * base index, the sub_handle pointing to the
3564 * appropriate interrupt remap table entry. 3564 * appropriate interrupt remap table entry.
3565 */ 3565 */
3566 set_irte_irq(irq, iommu, index, sub_handle); 3566 set_irte_irq(irq, iommu, index, sub_handle);
3567 } 3567 }
3568 no_ir: 3568 no_ir:
3569 ret = setup_msi_irq(dev, msidesc, irq); 3569 ret = setup_msi_irq(dev, msidesc, irq);
3570 if (ret < 0) 3570 if (ret < 0)
3571 goto error; 3571 goto error;
3572 sub_handle++; 3572 sub_handle++;
3573 } 3573 }
3574 return 0; 3574 return 0;
3575 3575
3576 error: 3576 error:
3577 destroy_irq(irq); 3577 destroy_irq(irq);
3578 return ret; 3578 return ret;
3579 } 3579 }
3580 3580
3581 void arch_teardown_msi_irq(unsigned int irq) 3581 void arch_teardown_msi_irq(unsigned int irq)
3582 { 3582 {
3583 destroy_irq(irq); 3583 destroy_irq(irq);
3584 } 3584 }
3585 3585
3586 #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) 3586 #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
3587 #ifdef CONFIG_SMP 3587 #ifdef CONFIG_SMP
3588 static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) 3588 static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3589 { 3589 {
3590 struct irq_desc *desc = irq_to_desc(irq); 3590 struct irq_desc *desc = irq_to_desc(irq);
3591 struct irq_cfg *cfg; 3591 struct irq_cfg *cfg;
3592 struct msi_msg msg; 3592 struct msi_msg msg;
3593 unsigned int dest; 3593 unsigned int dest;
3594 3594
3595 if (set_desc_affinity(desc, mask, &dest)) 3595 if (set_desc_affinity(desc, mask, &dest))
3596 return -1; 3596 return -1;
3597 3597
3598 cfg = desc->chip_data; 3598 cfg = desc->chip_data;
3599 3599
3600 dmar_msi_read(irq, &msg); 3600 dmar_msi_read(irq, &msg);
3601 3601
3602 msg.data &= ~MSI_DATA_VECTOR_MASK; 3602 msg.data &= ~MSI_DATA_VECTOR_MASK;
3603 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3603 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3604 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3604 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3605 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3605 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3606 3606
3607 dmar_msi_write(irq, &msg); 3607 dmar_msi_write(irq, &msg);
3608 3608
3609 return 0; 3609 return 0;
3610 } 3610 }
3611 3611
3612 #endif /* CONFIG_SMP */ 3612 #endif /* CONFIG_SMP */
3613 3613
3614 static struct irq_chip dmar_msi_type = { 3614 static struct irq_chip dmar_msi_type = {
3615 .name = "DMAR_MSI", 3615 .name = "DMAR_MSI",
3616 .unmask = dmar_msi_unmask, 3616 .unmask = dmar_msi_unmask,
3617 .mask = dmar_msi_mask, 3617 .mask = dmar_msi_mask,
3618 .ack = ack_apic_edge, 3618 .ack = ack_apic_edge,
3619 #ifdef CONFIG_SMP 3619 #ifdef CONFIG_SMP
3620 .set_affinity = dmar_msi_set_affinity, 3620 .set_affinity = dmar_msi_set_affinity,
3621 #endif 3621 #endif
3622 .retrigger = ioapic_retrigger_irq, 3622 .retrigger = ioapic_retrigger_irq,
3623 }; 3623 };
3624 3624
3625 int arch_setup_dmar_msi(unsigned int irq) 3625 int arch_setup_dmar_msi(unsigned int irq)
3626 { 3626 {
3627 int ret; 3627 int ret;
3628 struct msi_msg msg; 3628 struct msi_msg msg;
3629 3629
3630 ret = msi_compose_msg(NULL, irq, &msg, -1); 3630 ret = msi_compose_msg(NULL, irq, &msg, -1);
3631 if (ret < 0) 3631 if (ret < 0)
3632 return ret; 3632 return ret;
3633 dmar_msi_write(irq, &msg); 3633 dmar_msi_write(irq, &msg);
3634 set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, 3634 set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
3635 "edge"); 3635 "edge");
3636 return 0; 3636 return 0;
3637 } 3637 }
3638 #endif 3638 #endif
3639 3639
3640 #ifdef CONFIG_HPET_TIMER 3640 #ifdef CONFIG_HPET_TIMER
3641 3641
3642 #ifdef CONFIG_SMP 3642 #ifdef CONFIG_SMP
3643 static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) 3643 static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3644 { 3644 {
3645 struct irq_desc *desc = irq_to_desc(irq); 3645 struct irq_desc *desc = irq_to_desc(irq);
3646 struct irq_cfg *cfg; 3646 struct irq_cfg *cfg;
3647 struct msi_msg msg; 3647 struct msi_msg msg;
3648 unsigned int dest; 3648 unsigned int dest;
3649 3649
3650 if (set_desc_affinity(desc, mask, &dest)) 3650 if (set_desc_affinity(desc, mask, &dest))
3651 return -1; 3651 return -1;
3652 3652
3653 cfg = desc->chip_data; 3653 cfg = desc->chip_data;
3654 3654
3655 hpet_msi_read(irq, &msg); 3655 hpet_msi_read(irq, &msg);
3656 3656
3657 msg.data &= ~MSI_DATA_VECTOR_MASK; 3657 msg.data &= ~MSI_DATA_VECTOR_MASK;
3658 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3658 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3659 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3659 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3660 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3660 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3661 3661
3662 hpet_msi_write(irq, &msg); 3662 hpet_msi_write(irq, &msg);
3663 3663
3664 return 0; 3664 return 0;
3665 } 3665 }
3666 3666
3667 #endif /* CONFIG_SMP */ 3667 #endif /* CONFIG_SMP */
3668 3668
3669 static struct irq_chip ir_hpet_msi_type = { 3669 static struct irq_chip ir_hpet_msi_type = {
3670 .name = "IR-HPET_MSI", 3670 .name = "IR-HPET_MSI",
3671 .unmask = hpet_msi_unmask, 3671 .unmask = hpet_msi_unmask,
3672 .mask = hpet_msi_mask, 3672 .mask = hpet_msi_mask,
3673 #ifdef CONFIG_INTR_REMAP 3673 #ifdef CONFIG_INTR_REMAP
3674 .ack = ir_ack_apic_edge, 3674 .ack = ir_ack_apic_edge,
3675 #ifdef CONFIG_SMP 3675 #ifdef CONFIG_SMP
3676 .set_affinity = ir_set_msi_irq_affinity, 3676 .set_affinity = ir_set_msi_irq_affinity,
3677 #endif 3677 #endif
3678 #endif 3678 #endif
3679 .retrigger = ioapic_retrigger_irq, 3679 .retrigger = ioapic_retrigger_irq,
3680 }; 3680 };
3681 3681
3682 static struct irq_chip hpet_msi_type = { 3682 static struct irq_chip hpet_msi_type = {
3683 .name = "HPET_MSI", 3683 .name = "HPET_MSI",
3684 .unmask = hpet_msi_unmask, 3684 .unmask = hpet_msi_unmask,
3685 .mask = hpet_msi_mask, 3685 .mask = hpet_msi_mask,
3686 .ack = ack_apic_edge, 3686 .ack = ack_apic_edge,
3687 #ifdef CONFIG_SMP 3687 #ifdef CONFIG_SMP
3688 .set_affinity = hpet_msi_set_affinity, 3688 .set_affinity = hpet_msi_set_affinity,
3689 #endif 3689 #endif
3690 .retrigger = ioapic_retrigger_irq, 3690 .retrigger = ioapic_retrigger_irq,
3691 }; 3691 };
3692 3692
3693 int arch_setup_hpet_msi(unsigned int irq, unsigned int id) 3693 int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3694 { 3694 {
3695 int ret; 3695 int ret;
3696 struct msi_msg msg; 3696 struct msi_msg msg;
3697 struct irq_desc *desc = irq_to_desc(irq); 3697 struct irq_desc *desc = irq_to_desc(irq);
3698 3698
3699 if (intr_remapping_enabled) { 3699 if (intr_remapping_enabled) {
3700 struct intel_iommu *iommu = map_hpet_to_ir(id); 3700 struct intel_iommu *iommu = map_hpet_to_ir(id);
3701 int index; 3701 int index;
3702 3702
3703 if (!iommu) 3703 if (!iommu)
3704 return -1; 3704 return -1;
3705 3705
3706 index = alloc_irte(iommu, irq, 1); 3706 index = alloc_irte(iommu, irq, 1);
3707 if (index < 0) 3707 if (index < 0)
3708 return -1; 3708 return -1;
3709 } 3709 }
3710 3710
3711 ret = msi_compose_msg(NULL, irq, &msg, id); 3711 ret = msi_compose_msg(NULL, irq, &msg, id);
3712 if (ret < 0) 3712 if (ret < 0)
3713 return ret; 3713 return ret;
3714 3714
3715 hpet_msi_write(irq, &msg); 3715 hpet_msi_write(irq, &msg);
3716 desc->status |= IRQ_MOVE_PCNTXT; 3716 desc->status |= IRQ_MOVE_PCNTXT;
3717 if (irq_remapped(irq)) 3717 if (irq_remapped(irq))
3718 set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, 3718 set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
3719 handle_edge_irq, "edge"); 3719 handle_edge_irq, "edge");
3720 else 3720 else
3721 set_irq_chip_and_handler_name(irq, &hpet_msi_type, 3721 set_irq_chip_and_handler_name(irq, &hpet_msi_type,
3722 handle_edge_irq, "edge"); 3722 handle_edge_irq, "edge");
3723 3723
3724 return 0; 3724 return 0;
3725 } 3725 }
3726 #endif 3726 #endif
3727 3727
3728 #endif /* CONFIG_PCI_MSI */ 3728 #endif /* CONFIG_PCI_MSI */
3729 /* 3729 /*
3730 * Hypertransport interrupt support 3730 * Hypertransport interrupt support
3731 */ 3731 */
3732 #ifdef CONFIG_HT_IRQ 3732 #ifdef CONFIG_HT_IRQ
3733 3733
3734 #ifdef CONFIG_SMP 3734 #ifdef CONFIG_SMP
3735 3735
3736 static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) 3736 static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3737 { 3737 {
3738 struct ht_irq_msg msg; 3738 struct ht_irq_msg msg;
3739 fetch_ht_irq_msg(irq, &msg); 3739 fetch_ht_irq_msg(irq, &msg);
3740 3740
3741 msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); 3741 msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
3742 msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); 3742 msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
3743 3743
3744 msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); 3744 msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
3745 msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); 3745 msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
3746 3746
3747 write_ht_irq_msg(irq, &msg); 3747 write_ht_irq_msg(irq, &msg);
3748 } 3748 }
3749 3749
3750 static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) 3750 static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
3751 { 3751 {
3752 struct irq_desc *desc = irq_to_desc(irq); 3752 struct irq_desc *desc = irq_to_desc(irq);
3753 struct irq_cfg *cfg; 3753 struct irq_cfg *cfg;
3754 unsigned int dest; 3754 unsigned int dest;
3755 3755
3756 if (set_desc_affinity(desc, mask, &dest)) 3756 if (set_desc_affinity(desc, mask, &dest))
3757 return -1; 3757 return -1;
3758 3758
3759 cfg = desc->chip_data; 3759 cfg = desc->chip_data;
3760 3760
3761 target_ht_irq(irq, dest, cfg->vector); 3761 target_ht_irq(irq, dest, cfg->vector);
3762 3762
3763 return 0; 3763 return 0;
3764 } 3764 }
3765 3765
3766 #endif 3766 #endif
3767 3767
3768 static struct irq_chip ht_irq_chip = { 3768 static struct irq_chip ht_irq_chip = {
3769 .name = "PCI-HT", 3769 .name = "PCI-HT",
3770 .mask = mask_ht_irq, 3770 .mask = mask_ht_irq,
3771 .unmask = unmask_ht_irq, 3771 .unmask = unmask_ht_irq,
3772 .ack = ack_apic_edge, 3772 .ack = ack_apic_edge,
3773 #ifdef CONFIG_SMP 3773 #ifdef CONFIG_SMP
3774 .set_affinity = set_ht_irq_affinity, 3774 .set_affinity = set_ht_irq_affinity,
3775 #endif 3775 #endif
3776 .retrigger = ioapic_retrigger_irq, 3776 .retrigger = ioapic_retrigger_irq,
3777 }; 3777 };
3778 3778
3779 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) 3779 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3780 { 3780 {
3781 struct irq_cfg *cfg; 3781 struct irq_cfg *cfg;
3782 int err; 3782 int err;
3783 3783
3784 if (disable_apic) 3784 if (disable_apic)
3785 return -ENXIO; 3785 return -ENXIO;
3786 3786
3787 cfg = irq_cfg(irq); 3787 cfg = irq_cfg(irq);
3788 err = assign_irq_vector(irq, cfg, apic->target_cpus()); 3788 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3789 if (!err) { 3789 if (!err) {
3790 struct ht_irq_msg msg; 3790 struct ht_irq_msg msg;
3791 unsigned dest; 3791 unsigned dest;
3792 3792
3793 dest = apic->cpu_mask_to_apicid_and(cfg->domain, 3793 dest = apic->cpu_mask_to_apicid_and(cfg->domain,
3794 apic->target_cpus()); 3794 apic->target_cpus());
3795 3795
3796 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); 3796 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
3797 3797
3798 msg.address_lo = 3798 msg.address_lo =
3799 HT_IRQ_LOW_BASE | 3799 HT_IRQ_LOW_BASE |
3800 HT_IRQ_LOW_DEST_ID(dest) | 3800 HT_IRQ_LOW_DEST_ID(dest) |
3801 HT_IRQ_LOW_VECTOR(cfg->vector) | 3801 HT_IRQ_LOW_VECTOR(cfg->vector) |
3802 ((apic->irq_dest_mode == 0) ? 3802 ((apic->irq_dest_mode == 0) ?
3803 HT_IRQ_LOW_DM_PHYSICAL : 3803 HT_IRQ_LOW_DM_PHYSICAL :
3804 HT_IRQ_LOW_DM_LOGICAL) | 3804 HT_IRQ_LOW_DM_LOGICAL) |
3805 HT_IRQ_LOW_RQEOI_EDGE | 3805 HT_IRQ_LOW_RQEOI_EDGE |
3806 ((apic->irq_delivery_mode != dest_LowestPrio) ? 3806 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3807 HT_IRQ_LOW_MT_FIXED : 3807 HT_IRQ_LOW_MT_FIXED :
3808 HT_IRQ_LOW_MT_ARBITRATED) | 3808 HT_IRQ_LOW_MT_ARBITRATED) |
3809 HT_IRQ_LOW_IRQ_MASKED; 3809 HT_IRQ_LOW_IRQ_MASKED;
3810 3810
3811 write_ht_irq_msg(irq, &msg); 3811 write_ht_irq_msg(irq, &msg);
3812 3812
3813 set_irq_chip_and_handler_name(irq, &ht_irq_chip, 3813 set_irq_chip_and_handler_name(irq, &ht_irq_chip,
3814 handle_edge_irq, "edge"); 3814 handle_edge_irq, "edge");
3815 3815
3816 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); 3816 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
3817 } 3817 }
3818 return err; 3818 return err;
3819 } 3819 }
3820 #endif /* CONFIG_HT_IRQ */ 3820 #endif /* CONFIG_HT_IRQ */
3821 3821
3822 int __init io_apic_get_redir_entries (int ioapic) 3822 int __init io_apic_get_redir_entries (int ioapic)
3823 { 3823 {
3824 union IO_APIC_reg_01 reg_01; 3824 union IO_APIC_reg_01 reg_01;
3825 unsigned long flags; 3825 unsigned long flags;
3826 3826
3827 raw_spin_lock_irqsave(&ioapic_lock, flags); 3827 raw_spin_lock_irqsave(&ioapic_lock, flags);
3828 reg_01.raw = io_apic_read(ioapic, 1); 3828 reg_01.raw = io_apic_read(ioapic, 1);
3829 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 3829 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3830 3830
3831 /* The register returns the maximum index redir index 3831 /* The register returns the maximum index redir index
3832 * supported, which is one less than the total number of redir 3832 * supported, which is one less than the total number of redir
3833 * entries. 3833 * entries.
3834 */ 3834 */
3835 return reg_01.bits.entries + 1; 3835 return reg_01.bits.entries + 1;
3836 } 3836 }
3837 3837
3838 void __init probe_nr_irqs_gsi(void) 3838 void __init probe_nr_irqs_gsi(void)
3839 { 3839 {
3840 int nr; 3840 int nr;
3841 3841
3842 nr = gsi_top + NR_IRQS_LEGACY; 3842 nr = gsi_top + NR_IRQS_LEGACY;
3843 if (nr > nr_irqs_gsi) 3843 if (nr > nr_irqs_gsi)
3844 nr_irqs_gsi = nr; 3844 nr_irqs_gsi = nr;
3845 3845
3846 printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); 3846 printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
3847 } 3847 }
3848 3848
3849 #ifdef CONFIG_SPARSE_IRQ 3849 #ifdef CONFIG_SPARSE_IRQ
3850 int __init arch_probe_nr_irqs(void) 3850 int __init arch_probe_nr_irqs(void)
3851 { 3851 {
3852 int nr; 3852 int nr;
3853 3853
3854 if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) 3854 if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
3855 nr_irqs = NR_VECTORS * nr_cpu_ids; 3855 nr_irqs = NR_VECTORS * nr_cpu_ids;
3856 3856
3857 nr = nr_irqs_gsi + 8 * nr_cpu_ids; 3857 nr = nr_irqs_gsi + 8 * nr_cpu_ids;
3858 #if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) 3858 #if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
3859 /* 3859 /*
3860 * for MSI and HT dyn irq 3860 * for MSI and HT dyn irq
3861 */ 3861 */
3862 nr += nr_irqs_gsi * 16; 3862 nr += nr_irqs_gsi * 16;
3863 #endif 3863 #endif
3864 if (nr < nr_irqs) 3864 if (nr < nr_irqs)
3865 nr_irqs = nr; 3865 nr_irqs = nr;
3866 3866
3867 return NR_IRQS_LEGACY; 3867 return NR_IRQS_LEGACY;
3868 } 3868 }
3869 #endif 3869 #endif
3870 3870
3871 static int __io_apic_set_pci_routing(struct device *dev, int irq, 3871 static int __io_apic_set_pci_routing(struct device *dev, int irq,
3872 struct io_apic_irq_attr *irq_attr) 3872 struct io_apic_irq_attr *irq_attr)
3873 { 3873 {
3874 struct irq_desc *desc; 3874 struct irq_desc *desc;
3875 struct irq_cfg *cfg; 3875 struct irq_cfg *cfg;
3876 int node; 3876 int node;
3877 int ioapic, pin; 3877 int ioapic, pin;
3878 int trigger, polarity; 3878 int trigger, polarity;
3879 3879
3880 ioapic = irq_attr->ioapic; 3880 ioapic = irq_attr->ioapic;
3881 if (!IO_APIC_IRQ(irq)) { 3881 if (!IO_APIC_IRQ(irq)) {
3882 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", 3882 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
3883 ioapic); 3883 ioapic);
3884 return -EINVAL; 3884 return -EINVAL;
3885 } 3885 }
3886 3886
3887 if (dev) 3887 if (dev)
3888 node = dev_to_node(dev); 3888 node = dev_to_node(dev);
3889 else 3889 else
3890 node = cpu_to_node(0); 3890 node = cpu_to_node(0);
3891 3891
3892 desc = irq_to_desc_alloc_node(irq, node); 3892 desc = irq_to_desc_alloc_node(irq, node);
3893 if (!desc) { 3893 if (!desc) {
3894 printk(KERN_INFO "can not get irq_desc %d\n", irq); 3894 printk(KERN_INFO "can not get irq_desc %d\n", irq);
3895 return 0; 3895 return 0;
3896 } 3896 }
3897 3897
3898 pin = irq_attr->ioapic_pin; 3898 pin = irq_attr->ioapic_pin;
3899 trigger = irq_attr->trigger; 3899 trigger = irq_attr->trigger;
3900 polarity = irq_attr->polarity; 3900 polarity = irq_attr->polarity;
3901 3901
3902 /* 3902 /*
3903 * IRQs < 16 are already in the irq_2_pin[] map 3903 * IRQs < 16 are already in the irq_2_pin[] map
3904 */ 3904 */
3905 if (irq >= legacy_pic->nr_legacy_irqs) { 3905 if (irq >= legacy_pic->nr_legacy_irqs) {
3906 cfg = desc->chip_data; 3906 cfg = desc->chip_data;
3907 if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { 3907 if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
3908 printk(KERN_INFO "can not add pin %d for irq %d\n", 3908 printk(KERN_INFO "can not add pin %d for irq %d\n",
3909 pin, irq); 3909 pin, irq);
3910 return 0; 3910 return 0;
3911 } 3911 }
3912 } 3912 }
3913 3913
3914 setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); 3914 setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
3915 3915
3916 return 0; 3916 return 0;
3917 } 3917 }
3918 3918
3919 int io_apic_set_pci_routing(struct device *dev, int irq, 3919 int io_apic_set_pci_routing(struct device *dev, int irq,
3920 struct io_apic_irq_attr *irq_attr) 3920 struct io_apic_irq_attr *irq_attr)
3921 { 3921 {
3922 int ioapic, pin; 3922 int ioapic, pin;
3923 /* 3923 /*
3924 * Avoid pin reprogramming. PRTs typically include entries 3924 * Avoid pin reprogramming. PRTs typically include entries
3925 * with redundant pin->gsi mappings (but unique PCI devices); 3925 * with redundant pin->gsi mappings (but unique PCI devices);
3926 * we only program the IOAPIC on the first. 3926 * we only program the IOAPIC on the first.
3927 */ 3927 */
3928 ioapic = irq_attr->ioapic; 3928 ioapic = irq_attr->ioapic;
3929 pin = irq_attr->ioapic_pin; 3929 pin = irq_attr->ioapic_pin;
3930 if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) { 3930 if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
3931 pr_debug("Pin %d-%d already programmed\n", 3931 pr_debug("Pin %d-%d already programmed\n",
3932 mp_ioapics[ioapic].apicid, pin); 3932 mp_ioapics[ioapic].apicid, pin);
3933 return 0; 3933 return 0;
3934 } 3934 }
3935 set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed); 3935 set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
3936 3936
3937 return __io_apic_set_pci_routing(dev, irq, irq_attr); 3937 return __io_apic_set_pci_routing(dev, irq, irq_attr);
3938 } 3938 }
3939 3939
3940 u8 __init io_apic_unique_id(u8 id) 3940 u8 __init io_apic_unique_id(u8 id)
3941 { 3941 {
3942 #ifdef CONFIG_X86_32 3942 #ifdef CONFIG_X86_32
3943 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 3943 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
3944 !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) 3944 !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
3945 return io_apic_get_unique_id(nr_ioapics, id); 3945 return io_apic_get_unique_id(nr_ioapics, id);
3946 else 3946 else
3947 return id; 3947 return id;
3948 #else 3948 #else
3949 int i; 3949 int i;
3950 DECLARE_BITMAP(used, 256); 3950 DECLARE_BITMAP(used, 256);
3951 3951
3952 bitmap_zero(used, 256); 3952 bitmap_zero(used, 256);
3953 for (i = 0; i < nr_ioapics; i++) { 3953 for (i = 0; i < nr_ioapics; i++) {
3954 struct mpc_ioapic *ia = &mp_ioapics[i]; 3954 struct mpc_ioapic *ia = &mp_ioapics[i];
3955 __set_bit(ia->apicid, used); 3955 __set_bit(ia->apicid, used);
3956 } 3956 }
3957 if (!test_bit(id, used)) 3957 if (!test_bit(id, used))
3958 return id; 3958 return id;
3959 return find_first_zero_bit(used, 256); 3959 return find_first_zero_bit(used, 256);
3960 #endif 3960 #endif
3961 } 3961 }
3962 3962
3963 #ifdef CONFIG_X86_32 3963 #ifdef CONFIG_X86_32
3964 int __init io_apic_get_unique_id(int ioapic, int apic_id) 3964 int __init io_apic_get_unique_id(int ioapic, int apic_id)
3965 { 3965 {
3966 union IO_APIC_reg_00 reg_00; 3966 union IO_APIC_reg_00 reg_00;
3967 static physid_mask_t apic_id_map = PHYSID_MASK_NONE; 3967 static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
3968 physid_mask_t tmp; 3968 physid_mask_t tmp;
3969 unsigned long flags; 3969 unsigned long flags;
3970 int i = 0; 3970 int i = 0;
3971 3971
3972 /* 3972 /*
3973 * The P4 platform supports up to 256 APIC IDs on two separate APIC 3973 * The P4 platform supports up to 256 APIC IDs on two separate APIC
3974 * buses (one for LAPICs, one for IOAPICs), where predecessors only 3974 * buses (one for LAPICs, one for IOAPICs), where predecessors only
3975 * supports up to 16 on one shared APIC bus. 3975 * supports up to 16 on one shared APIC bus.
3976 * 3976 *
3977 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full 3977 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
3978 * advantage of new APIC bus architecture. 3978 * advantage of new APIC bus architecture.
3979 */ 3979 */
3980 3980
3981 if (physids_empty(apic_id_map)) 3981 if (physids_empty(apic_id_map))
3982 apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); 3982 apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
3983 3983
3984 raw_spin_lock_irqsave(&ioapic_lock, flags); 3984 raw_spin_lock_irqsave(&ioapic_lock, flags);
3985 reg_00.raw = io_apic_read(ioapic, 0); 3985 reg_00.raw = io_apic_read(ioapic, 0);
3986 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 3986 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3987 3987
3988 if (apic_id >= get_physical_broadcast()) { 3988 if (apic_id >= get_physical_broadcast()) {
3989 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " 3989 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
3990 "%d\n", ioapic, apic_id, reg_00.bits.ID); 3990 "%d\n", ioapic, apic_id, reg_00.bits.ID);
3991 apic_id = reg_00.bits.ID; 3991 apic_id = reg_00.bits.ID;
3992 } 3992 }
3993 3993
3994 /* 3994 /*
3995 * Every APIC in a system must have a unique ID or we get lots of nice 3995 * Every APIC in a system must have a unique ID or we get lots of nice
3996 * 'stuck on smp_invalidate_needed IPI wait' messages. 3996 * 'stuck on smp_invalidate_needed IPI wait' messages.
3997 */ 3997 */
3998 if (apic->check_apicid_used(&apic_id_map, apic_id)) { 3998 if (apic->check_apicid_used(&apic_id_map, apic_id)) {
3999 3999
4000 for (i = 0; i < get_physical_broadcast(); i++) { 4000 for (i = 0; i < get_physical_broadcast(); i++) {
4001 if (!apic->check_apicid_used(&apic_id_map, i)) 4001 if (!apic->check_apicid_used(&apic_id_map, i))
4002 break; 4002 break;
4003 } 4003 }
4004 4004
4005 if (i == get_physical_broadcast()) 4005 if (i == get_physical_broadcast())
4006 panic("Max apic_id exceeded!\n"); 4006 panic("Max apic_id exceeded!\n");
4007 4007
4008 printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " 4008 printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
4009 "trying %d\n", ioapic, apic_id, i); 4009 "trying %d\n", ioapic, apic_id, i);
4010 4010
4011 apic_id = i; 4011 apic_id = i;
4012 } 4012 }
4013 4013
4014 apic->apicid_to_cpu_present(apic_id, &tmp); 4014 apic->apicid_to_cpu_present(apic_id, &tmp);
4015 physids_or(apic_id_map, apic_id_map, tmp); 4015 physids_or(apic_id_map, apic_id_map, tmp);
4016 4016
4017 if (reg_00.bits.ID != apic_id) { 4017 if (reg_00.bits.ID != apic_id) {
4018 reg_00.bits.ID = apic_id; 4018 reg_00.bits.ID = apic_id;
4019 4019
4020 raw_spin_lock_irqsave(&ioapic_lock, flags); 4020 raw_spin_lock_irqsave(&ioapic_lock, flags);
4021 io_apic_write(ioapic, 0, reg_00.raw); 4021 io_apic_write(ioapic, 0, reg_00.raw);
4022 reg_00.raw = io_apic_read(ioapic, 0); 4022 reg_00.raw = io_apic_read(ioapic, 0);
4023 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 4023 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
4024 4024
4025 /* Sanity check */ 4025 /* Sanity check */
4026 if (reg_00.bits.ID != apic_id) { 4026 if (reg_00.bits.ID != apic_id) {
4027 printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); 4027 printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
4028 return -1; 4028 return -1;
4029 } 4029 }
4030 } 4030 }
4031 4031
4032 apic_printk(APIC_VERBOSE, KERN_INFO 4032 apic_printk(APIC_VERBOSE, KERN_INFO
4033 "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); 4033 "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
4034 4034
4035 return apic_id; 4035 return apic_id;
4036 } 4036 }
4037 #endif 4037 #endif
4038 4038
4039 int __init io_apic_get_version(int ioapic) 4039 int __init io_apic_get_version(int ioapic)
4040 { 4040 {
4041 union IO_APIC_reg_01 reg_01; 4041 union IO_APIC_reg_01 reg_01;
4042 unsigned long flags; 4042 unsigned long flags;
4043 4043
4044 raw_spin_lock_irqsave(&ioapic_lock, flags); 4044 raw_spin_lock_irqsave(&ioapic_lock, flags);
4045 reg_01.raw = io_apic_read(ioapic, 1); 4045 reg_01.raw = io_apic_read(ioapic, 1);
4046 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 4046 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
4047 4047
4048 return reg_01.bits.version; 4048 return reg_01.bits.version;
4049 } 4049 }
4050 4050
4051 int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) 4051 int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
4052 { 4052 {
4053 int ioapic, pin, idx; 4053 int ioapic, pin, idx;
4054 4054
4055 if (skip_ioapic_setup) 4055 if (skip_ioapic_setup)
4056 return -1; 4056 return -1;
4057 4057
4058 ioapic = mp_find_ioapic(gsi); 4058 ioapic = mp_find_ioapic(gsi);
4059 if (ioapic < 0) 4059 if (ioapic < 0)
4060 return -1; 4060 return -1;
4061 4061
4062 pin = mp_find_ioapic_pin(ioapic, gsi); 4062 pin = mp_find_ioapic_pin(ioapic, gsi);
4063 if (pin < 0) 4063 if (pin < 0)
4064 return -1; 4064 return -1;
4065 4065
4066 idx = find_irq_entry(ioapic, pin, mp_INT); 4066 idx = find_irq_entry(ioapic, pin, mp_INT);
4067 if (idx < 0) 4067 if (idx < 0)
4068 return -1; 4068 return -1;
4069 4069
4070 *trigger = irq_trigger(idx); 4070 *trigger = irq_trigger(idx);
4071 *polarity = irq_polarity(idx); 4071 *polarity = irq_polarity(idx);
4072 return 0; 4072 return 0;
4073 } 4073 }
4074 4074
4075 /* 4075 /*
4076 * This function currently is only a helper for the i386 smp boot process where 4076 * This function currently is only a helper for the i386 smp boot process where
4077 * we need to reprogram the ioredtbls to cater for the cpus which have come online 4077 * we need to reprogram the ioredtbls to cater for the cpus which have come online
4078 * so mask in all cases should simply be apic->target_cpus() 4078 * so mask in all cases should simply be apic->target_cpus()
4079 */ 4079 */
4080 #ifdef CONFIG_SMP 4080 #ifdef CONFIG_SMP
4081 void __init setup_ioapic_dest(void) 4081 void __init setup_ioapic_dest(void)
4082 { 4082 {
4083 int pin, ioapic, irq, irq_entry; 4083 int pin, ioapic, irq, irq_entry;
4084 struct irq_desc *desc; 4084 struct irq_desc *desc;
4085 const struct cpumask *mask; 4085 const struct cpumask *mask;
4086 4086
4087 if (skip_ioapic_setup == 1) 4087 if (skip_ioapic_setup == 1)
4088 return; 4088 return;
4089 4089
4090 for (ioapic = 0; ioapic < nr_ioapics; ioapic++) 4090 for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
4091 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { 4091 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
4092 irq_entry = find_irq_entry(ioapic, pin, mp_INT); 4092 irq_entry = find_irq_entry(ioapic, pin, mp_INT);
4093 if (irq_entry == -1) 4093 if (irq_entry == -1)
4094 continue; 4094 continue;
4095 irq = pin_2_irq(irq_entry, ioapic, pin); 4095 irq = pin_2_irq(irq_entry, ioapic, pin);
4096 4096
4097 if ((ioapic > 0) && (irq > 16)) 4097 if ((ioapic > 0) && (irq > 16))
4098 continue; 4098 continue;
4099 4099
4100 desc = irq_to_desc(irq); 4100 desc = irq_to_desc(irq);
4101 4101
4102 /* 4102 /*
4103 * Honour affinities which have been set in early boot 4103 * Honour affinities which have been set in early boot
4104 */ 4104 */
4105 if (desc->status & 4105 if (desc->status &
4106 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 4106 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4107 mask = desc->affinity; 4107 mask = desc->affinity;
4108 else 4108 else
4109 mask = apic->target_cpus(); 4109 mask = apic->target_cpus();
4110 4110
4111 if (intr_remapping_enabled) 4111 if (intr_remapping_enabled)
4112 set_ir_ioapic_affinity_irq_desc(desc, mask); 4112 set_ir_ioapic_affinity_irq_desc(desc, mask);
4113 else 4113 else
4114 set_ioapic_affinity_irq_desc(desc, mask); 4114 set_ioapic_affinity_irq_desc(desc, mask);
4115 } 4115 }
4116 4116
4117 } 4117 }
4118 #endif 4118 #endif
4119 4119
4120 #define IOAPIC_RESOURCE_NAME_SIZE 11 4120 #define IOAPIC_RESOURCE_NAME_SIZE 11
4121 4121
4122 static struct resource *ioapic_resources; 4122 static struct resource *ioapic_resources;
4123 4123
4124 static struct resource * __init ioapic_setup_resources(int nr_ioapics) 4124 static struct resource * __init ioapic_setup_resources(int nr_ioapics)
4125 { 4125 {
4126 unsigned long n; 4126 unsigned long n;
4127 struct resource *res; 4127 struct resource *res;
4128 char *mem; 4128 char *mem;
4129 int i; 4129 int i;
4130 4130
4131 if (nr_ioapics <= 0) 4131 if (nr_ioapics <= 0)
4132 return NULL; 4132 return NULL;
4133 4133
4134 n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); 4134 n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
4135 n *= nr_ioapics; 4135 n *= nr_ioapics;
4136 4136
4137 mem = alloc_bootmem(n); 4137 mem = alloc_bootmem(n);
4138 res = (void *)mem; 4138 res = (void *)mem;
4139 4139
4140 mem += sizeof(struct resource) * nr_ioapics; 4140 mem += sizeof(struct resource) * nr_ioapics;
4141 4141
4142 for (i = 0; i < nr_ioapics; i++) { 4142 for (i = 0; i < nr_ioapics; i++) {
4143 res[i].name = mem; 4143 res[i].name = mem;
4144 res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; 4144 res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
4145 snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); 4145 snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
4146 mem += IOAPIC_RESOURCE_NAME_SIZE; 4146 mem += IOAPIC_RESOURCE_NAME_SIZE;
4147 } 4147 }
4148 4148
4149 ioapic_resources = res; 4149 ioapic_resources = res;
4150 4150
4151 return res; 4151 return res;
4152 } 4152 }
4153 4153
4154 void __init ioapic_init_mappings(void) 4154 void __init ioapic_init_mappings(void)
4155 { 4155 {
4156 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; 4156 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
4157 struct resource *ioapic_res; 4157 struct resource *ioapic_res;
4158 int i; 4158 int i;
4159 4159
4160 ioapic_res = ioapic_setup_resources(nr_ioapics); 4160 ioapic_res = ioapic_setup_resources(nr_ioapics);
4161 for (i = 0; i < nr_ioapics; i++) { 4161 for (i = 0; i < nr_ioapics; i++) {
4162 if (smp_found_config) { 4162 if (smp_found_config) {
4163 ioapic_phys = mp_ioapics[i].apicaddr; 4163 ioapic_phys = mp_ioapics[i].apicaddr;
4164 #ifdef CONFIG_X86_32 4164 #ifdef CONFIG_X86_32
4165 if (!ioapic_phys) { 4165 if (!ioapic_phys) {
4166 printk(KERN_ERR 4166 printk(KERN_ERR
4167 "WARNING: bogus zero IO-APIC " 4167 "WARNING: bogus zero IO-APIC "
4168 "address found in MPTABLE, " 4168 "address found in MPTABLE, "
4169 "disabling IO/APIC support!\n"); 4169 "disabling IO/APIC support!\n");
4170 smp_found_config = 0; 4170 smp_found_config = 0;
4171 skip_ioapic_setup = 1; 4171 skip_ioapic_setup = 1;
4172 goto fake_ioapic_page; 4172 goto fake_ioapic_page;
4173 } 4173 }
4174 #endif 4174 #endif
4175 } else { 4175 } else {
4176 #ifdef CONFIG_X86_32 4176 #ifdef CONFIG_X86_32
4177 fake_ioapic_page: 4177 fake_ioapic_page:
4178 #endif 4178 #endif
4179 ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); 4179 ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
4180 ioapic_phys = __pa(ioapic_phys); 4180 ioapic_phys = __pa(ioapic_phys);
4181 } 4181 }
4182 set_fixmap_nocache(idx, ioapic_phys); 4182 set_fixmap_nocache(idx, ioapic_phys);
4183 apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", 4183 apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
4184 __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), 4184 __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
4185 ioapic_phys); 4185 ioapic_phys);
4186 idx++; 4186 idx++;
4187 4187
4188 ioapic_res->start = ioapic_phys; 4188 ioapic_res->start = ioapic_phys;
4189 ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; 4189 ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
4190 ioapic_res++; 4190 ioapic_res++;
4191 } 4191 }
4192 } 4192 }
4193 4193
4194 void __init ioapic_insert_resources(void) 4194 void __init ioapic_insert_resources(void)
4195 { 4195 {
4196 int i; 4196 int i;
4197 struct resource *r = ioapic_resources; 4197 struct resource *r = ioapic_resources;
4198 4198
4199 if (!r) { 4199 if (!r) {
4200 if (nr_ioapics > 0) 4200 if (nr_ioapics > 0)
4201 printk(KERN_ERR 4201 printk(KERN_ERR
4202 "IO APIC resources couldn't be allocated.\n"); 4202 "IO APIC resources couldn't be allocated.\n");
4203 return; 4203 return;
4204 } 4204 }
4205 4205
4206 for (i = 0; i < nr_ioapics; i++) { 4206 for (i = 0; i < nr_ioapics; i++) {
4207 insert_resource(&iomem_resource, r); 4207 insert_resource(&iomem_resource, r);
4208 r++; 4208 r++;
4209 } 4209 }
4210 } 4210 }
4211 4211
4212 int mp_find_ioapic(u32 gsi) 4212 int mp_find_ioapic(u32 gsi)
4213 { 4213 {
4214 int i = 0; 4214 int i = 0;
4215 4215
4216 /* Find the IOAPIC that manages this GSI. */ 4216 /* Find the IOAPIC that manages this GSI. */
4217 for (i = 0; i < nr_ioapics; i++) { 4217 for (i = 0; i < nr_ioapics; i++) {
4218 if ((gsi >= mp_gsi_routing[i].gsi_base) 4218 if ((gsi >= mp_gsi_routing[i].gsi_base)
4219 && (gsi <= mp_gsi_routing[i].gsi_end)) 4219 && (gsi <= mp_gsi_routing[i].gsi_end))
4220 return i; 4220 return i;
4221 } 4221 }
4222 4222
4223 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); 4223 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
4224 return -1; 4224 return -1;
4225 } 4225 }
4226 4226
4227 int mp_find_ioapic_pin(int ioapic, u32 gsi) 4227 int mp_find_ioapic_pin(int ioapic, u32 gsi)
4228 { 4228 {
4229 if (WARN_ON(ioapic == -1)) 4229 if (WARN_ON(ioapic == -1))
4230 return -1; 4230 return -1;
4231 if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end)) 4231 if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end))
4232 return -1; 4232 return -1;
4233 4233
4234 return gsi - mp_gsi_routing[ioapic].gsi_base; 4234 return gsi - mp_gsi_routing[ioapic].gsi_base;
4235 } 4235 }
4236 4236
4237 static int bad_ioapic(unsigned long address) 4237 static int bad_ioapic(unsigned long address)
4238 { 4238 {
4239 if (nr_ioapics >= MAX_IO_APICS) { 4239 if (nr_ioapics >= MAX_IO_APICS) {
4240 printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded " 4240 printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded "
4241 "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics); 4241 "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
4242 return 1; 4242 return 1;
4243 } 4243 }
4244 if (!address) { 4244 if (!address) {
4245 printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address" 4245 printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
4246 " found in table, skipping!\n"); 4246 " found in table, skipping!\n");
4247 return 1; 4247 return 1;
4248 } 4248 }
4249 return 0; 4249 return 0;
4250 } 4250 }
4251 4251
4252 void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) 4252 void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
4253 { 4253 {
4254 int idx = 0; 4254 int idx = 0;
4255 int entries; 4255 int entries;
4256 4256
4257 if (bad_ioapic(address)) 4257 if (bad_ioapic(address))
4258 return; 4258 return;
4259 4259
4260 idx = nr_ioapics; 4260 idx = nr_ioapics;
4261 4261
4262 mp_ioapics[idx].type = MP_IOAPIC; 4262 mp_ioapics[idx].type = MP_IOAPIC;
4263 mp_ioapics[idx].flags = MPC_APIC_USABLE; 4263 mp_ioapics[idx].flags = MPC_APIC_USABLE;
4264 mp_ioapics[idx].apicaddr = address; 4264 mp_ioapics[idx].apicaddr = address;
4265 4265
4266 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 4266 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
4267 mp_ioapics[idx].apicid = io_apic_unique_id(id); 4267 mp_ioapics[idx].apicid = io_apic_unique_id(id);
4268 mp_ioapics[idx].apicver = io_apic_get_version(idx); 4268 mp_ioapics[idx].apicver = io_apic_get_version(idx);
4269 4269
4270 /* 4270 /*
4271 * Build basic GSI lookup table to facilitate gsi->io_apic lookups 4271 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
4272 * and to prevent reprogramming of IOAPIC pins (PCI GSIs). 4272 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
4273 */ 4273 */
4274 entries = io_apic_get_redir_entries(idx); 4274 entries = io_apic_get_redir_entries(idx);
4275 mp_gsi_routing[idx].gsi_base = gsi_base; 4275 mp_gsi_routing[idx].gsi_base = gsi_base;
4276 mp_gsi_routing[idx].gsi_end = gsi_base + entries - 1; 4276 mp_gsi_routing[idx].gsi_end = gsi_base + entries - 1;
4277 4277
4278 /* 4278 /*
4279 * The number of IO-APIC IRQ registers (== #pins): 4279 * The number of IO-APIC IRQ registers (== #pins):
4280 */ 4280 */
4281 nr_ioapic_registers[idx] = entries; 4281 nr_ioapic_registers[idx] = entries;
4282 4282
4283 if (mp_gsi_routing[idx].gsi_end >= gsi_top) 4283 if (mp_gsi_routing[idx].gsi_end >= gsi_top)
4284 gsi_top = mp_gsi_routing[idx].gsi_end + 1; 4284 gsi_top = mp_gsi_routing[idx].gsi_end + 1;
4285 4285
4286 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " 4286 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
4287 "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, 4287 "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
4288 mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, 4288 mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
4289 mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end); 4289 mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end);
4290 4290
4291 nr_ioapics++; 4291 nr_ioapics++;
4292 } 4292 }
4293 4293
4294 /* Enable IOAPIC early just for system timer */ 4294 /* Enable IOAPIC early just for system timer */
4295 void __init pre_init_apic_IRQ0(void) 4295 void __init pre_init_apic_IRQ0(void)
4296 { 4296 {
4297 struct irq_cfg *cfg; 4297 struct irq_cfg *cfg;
4298 struct irq_desc *desc; 4298 struct irq_desc *desc;
4299 4299
4300 printk(KERN_INFO "Early APIC setup for system timer0\n"); 4300 printk(KERN_INFO "Early APIC setup for system timer0\n");
4301 #ifndef CONFIG_SMP 4301 #ifndef CONFIG_SMP
4302 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); 4302 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
4303 #endif 4303 #endif
4304 desc = irq_to_desc_alloc_node(0, 0); 4304 desc = irq_to_desc_alloc_node(0, 0);
4305 4305
4306 setup_local_APIC(); 4306 setup_local_APIC();
4307 4307
4308 cfg = irq_cfg(0); 4308 cfg = irq_cfg(0);
4309 add_pin_to_irq_node(cfg, 0, 0, 0); 4309 add_pin_to_irq_node(cfg, 0, 0, 0);
4310 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); 4310 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
4311 4311
4312 setup_IO_APIC_irq(0, 0, 0, desc, 0, 0); 4312 setup_IO_APIC_irq(0, 0, 0, desc, 0, 0);
4313 } 4313 }
4314 4314
arch/x86/kernel/apic/nmi.c
1 /* 1 /*
2 * NMI watchdog support on APIC systems 2 * NMI watchdog support on APIC systems
3 * 3 *
4 * Started by Ingo Molnar <mingo@redhat.com> 4 * Started by Ingo Molnar <mingo@redhat.com>
5 * 5 *
6 * Fixes: 6 * Fixes:
7 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. 7 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
8 * Mikael Pettersson : Power Management for local APIC NMI watchdog. 8 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
9 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. 9 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
10 * Pavel Machek and 10 * Pavel Machek and
11 * Mikael Pettersson : PM converted to driver model. Disable/enable API. 11 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
12 */ 12 */
13 13
14 #include <asm/apic.h> 14 #include <asm/apic.h>
15 15
16 #include <linux/nmi.h> 16 #include <linux/nmi.h>
17 #include <linux/mm.h> 17 #include <linux/mm.h>
18 #include <linux/delay.h> 18 #include <linux/delay.h>
19 #include <linux/interrupt.h> 19 #include <linux/interrupt.h>
20 #include <linux/module.h> 20 #include <linux/module.h>
21 #include <linux/slab.h> 21 #include <linux/slab.h>
22 #include <linux/sysdev.h> 22 #include <linux/sysdev.h>
23 #include <linux/sysctl.h> 23 #include <linux/sysctl.h>
24 #include <linux/percpu.h> 24 #include <linux/percpu.h>
25 #include <linux/kprobes.h> 25 #include <linux/kprobes.h>
26 #include <linux/cpumask.h> 26 #include <linux/cpumask.h>
27 #include <linux/kernel_stat.h> 27 #include <linux/kernel_stat.h>
28 #include <linux/kdebug.h> 28 #include <linux/kdebug.h>
29 #include <linux/smp.h> 29 #include <linux/smp.h>
30 30
31 #include <asm/i8259.h> 31 #include <asm/i8259.h>
32 #include <asm/io_apic.h> 32 #include <asm/io_apic.h>
33 #include <asm/proto.h> 33 #include <asm/proto.h>
34 #include <asm/timer.h> 34 #include <asm/timer.h>
35 35
36 #include <asm/mce.h> 36 #include <asm/mce.h>
37 37
38 #include <asm/mach_traps.h> 38 #include <asm/mach_traps.h>
39 39
40 int unknown_nmi_panic; 40 int unknown_nmi_panic;
41 int nmi_watchdog_enabled; 41 int nmi_watchdog_enabled;
42 42
43 /* For reliability, we're prepared to waste bits here. */ 43 /* For reliability, we're prepared to waste bits here. */
44 static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; 44 static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
45 45
46 /* nmi_active: 46 /* nmi_active:
47 * >0: the lapic NMI watchdog is active, but can be disabled 47 * >0: the lapic NMI watchdog is active, but can be disabled
48 * <0: the lapic NMI watchdog has not been set up, and cannot 48 * <0: the lapic NMI watchdog has not been set up, and cannot
49 * be enabled 49 * be enabled
50 * 0: the lapic NMI watchdog is disabled, but can be enabled 50 * 0: the lapic NMI watchdog is disabled, but can be enabled
51 */ 51 */
52 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ 52 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
53 EXPORT_SYMBOL(nmi_active); 53 EXPORT_SYMBOL(nmi_active);
54 54
55 unsigned int nmi_watchdog = NMI_NONE; 55 unsigned int nmi_watchdog = NMI_NONE;
56 EXPORT_SYMBOL(nmi_watchdog); 56 EXPORT_SYMBOL(nmi_watchdog);
57 57
58 static int panic_on_timeout; 58 static int panic_on_timeout;
59 59
60 static unsigned int nmi_hz = HZ; 60 static unsigned int nmi_hz = HZ;
61 static DEFINE_PER_CPU(short, wd_enabled); 61 static DEFINE_PER_CPU(short, wd_enabled);
62 static int endflag __initdata; 62 static int endflag __initdata;
63 63
64 static inline unsigned int get_nmi_count(int cpu) 64 static inline unsigned int get_nmi_count(int cpu)
65 { 65 {
66 return per_cpu(irq_stat, cpu).__nmi_count; 66 return per_cpu(irq_stat, cpu).__nmi_count;
67 } 67 }
68 68
69 static inline int mce_in_progress(void) 69 static inline int mce_in_progress(void)
70 { 70 {
71 #if defined(CONFIG_X86_MCE) 71 #if defined(CONFIG_X86_MCE)
72 return atomic_read(&mce_entry) > 0; 72 return atomic_read(&mce_entry) > 0;
73 #endif 73 #endif
74 return 0; 74 return 0;
75 } 75 }
76 76
77 /* 77 /*
78 * Take the local apic timer and PIT/HPET into account. We don't 78 * Take the local apic timer and PIT/HPET into account. We don't
79 * know which one is active, when we have highres/dyntick on 79 * know which one is active, when we have highres/dyntick on
80 */ 80 */
81 static inline unsigned int get_timer_irqs(int cpu) 81 static inline unsigned int get_timer_irqs(int cpu)
82 { 82 {
83 return per_cpu(irq_stat, cpu).apic_timer_irqs + 83 return per_cpu(irq_stat, cpu).apic_timer_irqs +
84 per_cpu(irq_stat, cpu).irq0_irqs; 84 per_cpu(irq_stat, cpu).irq0_irqs;
85 } 85 }
86 86
87 #ifdef CONFIG_SMP 87 #ifdef CONFIG_SMP
88 /* 88 /*
89 * The performance counters used by NMI_LOCAL_APIC don't trigger when 89 * The performance counters used by NMI_LOCAL_APIC don't trigger when
90 * the CPU is idle. To make sure the NMI watchdog really ticks on all 90 * the CPU is idle. To make sure the NMI watchdog really ticks on all
91 * CPUs during the test make them busy. 91 * CPUs during the test make them busy.
92 */ 92 */
93 static __init void nmi_cpu_busy(void *data) 93 static __init void nmi_cpu_busy(void *data)
94 { 94 {
95 local_irq_enable_in_hardirq(); 95 local_irq_enable_in_hardirq();
96 /* 96 /*
97 * Intentionally don't use cpu_relax here. This is 97 * Intentionally don't use cpu_relax here. This is
98 * to make sure that the performance counter really ticks, 98 * to make sure that the performance counter really ticks,
99 * even if there is a simulator or similar that catches the 99 * even if there is a simulator or similar that catches the
100 * pause instruction. On a real HT machine this is fine because 100 * pause instruction. On a real HT machine this is fine because
101 * all other CPUs are busy with "useless" delay loops and don't 101 * all other CPUs are busy with "useless" delay loops and don't
102 * care if they get somewhat less cycles. 102 * care if they get somewhat less cycles.
103 */ 103 */
104 while (endflag == 0) 104 while (endflag == 0)
105 mb(); 105 mb();
106 } 106 }
107 #endif 107 #endif
108 108
109 static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count) 109 static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
110 { 110 {
111 printk(KERN_CONT "\n"); 111 printk(KERN_CONT "\n");
112 112
113 printk(KERN_WARNING 113 printk(KERN_WARNING
114 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", 114 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
115 cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); 115 cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
116 116
117 printk(KERN_WARNING 117 printk(KERN_WARNING
118 "Please report this to bugzilla.kernel.org,\n"); 118 "Please report this to bugzilla.kernel.org,\n");
119 printk(KERN_WARNING 119 printk(KERN_WARNING
120 "and attach the output of the 'dmesg' command.\n"); 120 "and attach the output of the 'dmesg' command.\n");
121 121
122 per_cpu(wd_enabled, cpu) = 0; 122 per_cpu(wd_enabled, cpu) = 0;
123 atomic_dec(&nmi_active); 123 atomic_dec(&nmi_active);
124 } 124 }
125 125
126 static void __acpi_nmi_disable(void *__unused) 126 static void __acpi_nmi_disable(void *__unused)
127 { 127 {
128 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); 128 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
129 } 129 }
130 130
131 int __init check_nmi_watchdog(void) 131 int __init check_nmi_watchdog(void)
132 { 132 {
133 unsigned int *prev_nmi_count; 133 unsigned int *prev_nmi_count;
134 int cpu; 134 int cpu;
135 135
136 if (!nmi_watchdog_active() || !atomic_read(&nmi_active)) 136 if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
137 return 0; 137 return 0;
138 138
139 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); 139 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
140 if (!prev_nmi_count) 140 if (!prev_nmi_count)
141 goto error; 141 goto error;
142 142
143 printk(KERN_INFO "Testing NMI watchdog ... "); 143 printk(KERN_INFO "Testing NMI watchdog ... ");
144 144
145 #ifdef CONFIG_SMP 145 #ifdef CONFIG_SMP
146 if (nmi_watchdog == NMI_LOCAL_APIC) 146 if (nmi_watchdog == NMI_LOCAL_APIC)
147 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); 147 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
148 #endif 148 #endif
149 149
150 for_each_possible_cpu(cpu) 150 for_each_possible_cpu(cpu)
151 prev_nmi_count[cpu] = get_nmi_count(cpu); 151 prev_nmi_count[cpu] = get_nmi_count(cpu);
152 local_irq_enable(); 152 local_irq_enable();
153 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ 153 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
154 154
155 for_each_online_cpu(cpu) { 155 for_each_online_cpu(cpu) {
156 if (!per_cpu(wd_enabled, cpu)) 156 if (!per_cpu(wd_enabled, cpu))
157 continue; 157 continue;
158 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) 158 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
159 report_broken_nmi(cpu, prev_nmi_count); 159 report_broken_nmi(cpu, prev_nmi_count);
160 } 160 }
161 endflag = 1; 161 endflag = 1;
162 if (!atomic_read(&nmi_active)) { 162 if (!atomic_read(&nmi_active)) {
163 kfree(prev_nmi_count); 163 kfree(prev_nmi_count);
164 atomic_set(&nmi_active, -1); 164 atomic_set(&nmi_active, -1);
165 goto error; 165 goto error;
166 } 166 }
167 printk("OK.\n"); 167 printk("OK.\n");
168 168
169 /* 169 /*
170 * now that we know it works we can reduce NMI frequency to 170 * now that we know it works we can reduce NMI frequency to
171 * something more reasonable; makes a difference in some configs 171 * something more reasonable; makes a difference in some configs
172 */ 172 */
173 if (nmi_watchdog == NMI_LOCAL_APIC) 173 if (nmi_watchdog == NMI_LOCAL_APIC)
174 nmi_hz = lapic_adjust_nmi_hz(1); 174 nmi_hz = lapic_adjust_nmi_hz(1);
175 175
176 kfree(prev_nmi_count); 176 kfree(prev_nmi_count);
177 return 0; 177 return 0;
178 error: 178 error:
179 if (nmi_watchdog == NMI_IO_APIC) { 179 if (nmi_watchdog == NMI_IO_APIC) {
180 if (!timer_through_8259) 180 if (!timer_through_8259)
181 legacy_pic->chip->mask(0); 181 legacy_pic->mask(0);
182 on_each_cpu(__acpi_nmi_disable, NULL, 1); 182 on_each_cpu(__acpi_nmi_disable, NULL, 1);
183 } 183 }
184 184
185 #ifdef CONFIG_X86_32 185 #ifdef CONFIG_X86_32
186 timer_ack = 0; 186 timer_ack = 0;
187 #endif 187 #endif
188 return -1; 188 return -1;
189 } 189 }
190 190
191 static int __init setup_nmi_watchdog(char *str) 191 static int __init setup_nmi_watchdog(char *str)
192 { 192 {
193 unsigned int nmi; 193 unsigned int nmi;
194 194
195 if (!strncmp(str, "panic", 5)) { 195 if (!strncmp(str, "panic", 5)) {
196 panic_on_timeout = 1; 196 panic_on_timeout = 1;
197 str = strchr(str, ','); 197 str = strchr(str, ',');
198 if (!str) 198 if (!str)
199 return 1; 199 return 1;
200 ++str; 200 ++str;
201 } 201 }
202 202
203 if (!strncmp(str, "lapic", 5)) 203 if (!strncmp(str, "lapic", 5))
204 nmi_watchdog = NMI_LOCAL_APIC; 204 nmi_watchdog = NMI_LOCAL_APIC;
205 else if (!strncmp(str, "ioapic", 6)) 205 else if (!strncmp(str, "ioapic", 6))
206 nmi_watchdog = NMI_IO_APIC; 206 nmi_watchdog = NMI_IO_APIC;
207 else { 207 else {
208 get_option(&str, &nmi); 208 get_option(&str, &nmi);
209 if (nmi >= NMI_INVALID) 209 if (nmi >= NMI_INVALID)
210 return 0; 210 return 0;
211 nmi_watchdog = nmi; 211 nmi_watchdog = nmi;
212 } 212 }
213 213
214 return 1; 214 return 1;
215 } 215 }
216 __setup("nmi_watchdog=", setup_nmi_watchdog); 216 __setup("nmi_watchdog=", setup_nmi_watchdog);
217 217
218 /* 218 /*
219 * Suspend/resume support 219 * Suspend/resume support
220 */ 220 */
221 #ifdef CONFIG_PM 221 #ifdef CONFIG_PM
222 222
223 static int nmi_pm_active; /* nmi_active before suspend */ 223 static int nmi_pm_active; /* nmi_active before suspend */
224 224
225 static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) 225 static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
226 { 226 {
227 /* only CPU0 goes here, other CPUs should be offline */ 227 /* only CPU0 goes here, other CPUs should be offline */
228 nmi_pm_active = atomic_read(&nmi_active); 228 nmi_pm_active = atomic_read(&nmi_active);
229 stop_apic_nmi_watchdog(NULL); 229 stop_apic_nmi_watchdog(NULL);
230 BUG_ON(atomic_read(&nmi_active) != 0); 230 BUG_ON(atomic_read(&nmi_active) != 0);
231 return 0; 231 return 0;
232 } 232 }
233 233
234 static int lapic_nmi_resume(struct sys_device *dev) 234 static int lapic_nmi_resume(struct sys_device *dev)
235 { 235 {
236 /* only CPU0 goes here, other CPUs should be offline */ 236 /* only CPU0 goes here, other CPUs should be offline */
237 if (nmi_pm_active > 0) { 237 if (nmi_pm_active > 0) {
238 setup_apic_nmi_watchdog(NULL); 238 setup_apic_nmi_watchdog(NULL);
239 touch_nmi_watchdog(); 239 touch_nmi_watchdog();
240 } 240 }
241 return 0; 241 return 0;
242 } 242 }
243 243
244 static struct sysdev_class nmi_sysclass = { 244 static struct sysdev_class nmi_sysclass = {
245 .name = "lapic_nmi", 245 .name = "lapic_nmi",
246 .resume = lapic_nmi_resume, 246 .resume = lapic_nmi_resume,
247 .suspend = lapic_nmi_suspend, 247 .suspend = lapic_nmi_suspend,
248 }; 248 };
249 249
250 static struct sys_device device_lapic_nmi = { 250 static struct sys_device device_lapic_nmi = {
251 .id = 0, 251 .id = 0,
252 .cls = &nmi_sysclass, 252 .cls = &nmi_sysclass,
253 }; 253 };
254 254
255 static int __init init_lapic_nmi_sysfs(void) 255 static int __init init_lapic_nmi_sysfs(void)
256 { 256 {
257 int error; 257 int error;
258 258
259 /* 259 /*
260 * should really be a BUG_ON but b/c this is an 260 * should really be a BUG_ON but b/c this is an
261 * init call, it just doesn't work. -dcz 261 * init call, it just doesn't work. -dcz
262 */ 262 */
263 if (nmi_watchdog != NMI_LOCAL_APIC) 263 if (nmi_watchdog != NMI_LOCAL_APIC)
264 return 0; 264 return 0;
265 265
266 if (atomic_read(&nmi_active) < 0) 266 if (atomic_read(&nmi_active) < 0)
267 return 0; 267 return 0;
268 268
269 error = sysdev_class_register(&nmi_sysclass); 269 error = sysdev_class_register(&nmi_sysclass);
270 if (!error) 270 if (!error)
271 error = sysdev_register(&device_lapic_nmi); 271 error = sysdev_register(&device_lapic_nmi);
272 return error; 272 return error;
273 } 273 }
274 274
275 /* must come after the local APIC's device_initcall() */ 275 /* must come after the local APIC's device_initcall() */
276 late_initcall(init_lapic_nmi_sysfs); 276 late_initcall(init_lapic_nmi_sysfs);
277 277
278 #endif /* CONFIG_PM */ 278 #endif /* CONFIG_PM */
279 279
280 static void __acpi_nmi_enable(void *__unused) 280 static void __acpi_nmi_enable(void *__unused)
281 { 281 {
282 apic_write(APIC_LVT0, APIC_DM_NMI); 282 apic_write(APIC_LVT0, APIC_DM_NMI);
283 } 283 }
284 284
285 /* 285 /*
286 * Enable timer based NMIs on all CPUs: 286 * Enable timer based NMIs on all CPUs:
287 */ 287 */
288 void acpi_nmi_enable(void) 288 void acpi_nmi_enable(void)
289 { 289 {
290 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) 290 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
291 on_each_cpu(__acpi_nmi_enable, NULL, 1); 291 on_each_cpu(__acpi_nmi_enable, NULL, 1);
292 } 292 }
293 293
294 /* 294 /*
295 * Disable timer based NMIs on all CPUs: 295 * Disable timer based NMIs on all CPUs:
296 */ 296 */
297 void acpi_nmi_disable(void) 297 void acpi_nmi_disable(void)
298 { 298 {
299 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) 299 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
300 on_each_cpu(__acpi_nmi_disable, NULL, 1); 300 on_each_cpu(__acpi_nmi_disable, NULL, 1);
301 } 301 }
302 302
303 /* 303 /*
304 * This function is called as soon the LAPIC NMI watchdog driver has everything 304 * This function is called as soon the LAPIC NMI watchdog driver has everything
305 * in place and it's ready to check if the NMIs belong to the NMI watchdog 305 * in place and it's ready to check if the NMIs belong to the NMI watchdog
306 */ 306 */
307 void cpu_nmi_set_wd_enabled(void) 307 void cpu_nmi_set_wd_enabled(void)
308 { 308 {
309 __get_cpu_var(wd_enabled) = 1; 309 __get_cpu_var(wd_enabled) = 1;
310 } 310 }
311 311
312 void setup_apic_nmi_watchdog(void *unused) 312 void setup_apic_nmi_watchdog(void *unused)
313 { 313 {
314 if (__get_cpu_var(wd_enabled)) 314 if (__get_cpu_var(wd_enabled))
315 return; 315 return;
316 316
317 /* cheap hack to support suspend/resume */ 317 /* cheap hack to support suspend/resume */
318 /* if cpu0 is not active neither should the other cpus */ 318 /* if cpu0 is not active neither should the other cpus */
319 if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) 319 if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
320 return; 320 return;
321 321
322 switch (nmi_watchdog) { 322 switch (nmi_watchdog) {
323 case NMI_LOCAL_APIC: 323 case NMI_LOCAL_APIC:
324 if (lapic_watchdog_init(nmi_hz) < 0) { 324 if (lapic_watchdog_init(nmi_hz) < 0) {
325 __get_cpu_var(wd_enabled) = 0; 325 __get_cpu_var(wd_enabled) = 0;
326 return; 326 return;
327 } 327 }
328 /* FALL THROUGH */ 328 /* FALL THROUGH */
329 case NMI_IO_APIC: 329 case NMI_IO_APIC:
330 __get_cpu_var(wd_enabled) = 1; 330 __get_cpu_var(wd_enabled) = 1;
331 atomic_inc(&nmi_active); 331 atomic_inc(&nmi_active);
332 } 332 }
333 } 333 }
334 334
335 void stop_apic_nmi_watchdog(void *unused) 335 void stop_apic_nmi_watchdog(void *unused)
336 { 336 {
337 /* only support LOCAL and IO APICs for now */ 337 /* only support LOCAL and IO APICs for now */
338 if (!nmi_watchdog_active()) 338 if (!nmi_watchdog_active())
339 return; 339 return;
340 if (__get_cpu_var(wd_enabled) == 0) 340 if (__get_cpu_var(wd_enabled) == 0)
341 return; 341 return;
342 if (nmi_watchdog == NMI_LOCAL_APIC) 342 if (nmi_watchdog == NMI_LOCAL_APIC)
343 lapic_watchdog_stop(); 343 lapic_watchdog_stop();
344 else 344 else
345 __acpi_nmi_disable(NULL); 345 __acpi_nmi_disable(NULL);
346 __get_cpu_var(wd_enabled) = 0; 346 __get_cpu_var(wd_enabled) = 0;
347 atomic_dec(&nmi_active); 347 atomic_dec(&nmi_active);
348 } 348 }
349 349
350 /* 350 /*
351 * the best way to detect whether a CPU has a 'hard lockup' problem 351 * the best way to detect whether a CPU has a 'hard lockup' problem
352 * is to check it's local APIC timer IRQ counts. If they are not 352 * is to check it's local APIC timer IRQ counts. If they are not
353 * changing then that CPU has some problem. 353 * changing then that CPU has some problem.
354 * 354 *
355 * as these watchdog NMI IRQs are generated on every CPU, we only 355 * as these watchdog NMI IRQs are generated on every CPU, we only
356 * have to check the current processor. 356 * have to check the current processor.
357 * 357 *
358 * since NMIs don't listen to _any_ locks, we have to be extremely 358 * since NMIs don't listen to _any_ locks, we have to be extremely
359 * careful not to rely on unsafe variables. The printk might lock 359 * careful not to rely on unsafe variables. The printk might lock
360 * up though, so we have to break up any console locks first ... 360 * up though, so we have to break up any console locks first ...
361 * [when there will be more tty-related locks, break them up here too!] 361 * [when there will be more tty-related locks, break them up here too!]
362 */ 362 */
363 363
364 static DEFINE_PER_CPU(unsigned, last_irq_sum); 364 static DEFINE_PER_CPU(unsigned, last_irq_sum);
365 static DEFINE_PER_CPU(long, alert_counter); 365 static DEFINE_PER_CPU(long, alert_counter);
366 static DEFINE_PER_CPU(int, nmi_touch); 366 static DEFINE_PER_CPU(int, nmi_touch);
367 367
368 void touch_nmi_watchdog(void) 368 void touch_nmi_watchdog(void)
369 { 369 {
370 if (nmi_watchdog_active()) { 370 if (nmi_watchdog_active()) {
371 unsigned cpu; 371 unsigned cpu;
372 372
373 /* 373 /*
374 * Tell other CPUs to reset their alert counters. We cannot 374 * Tell other CPUs to reset their alert counters. We cannot
375 * do it ourselves because the alert count increase is not 375 * do it ourselves because the alert count increase is not
376 * atomic. 376 * atomic.
377 */ 377 */
378 for_each_present_cpu(cpu) { 378 for_each_present_cpu(cpu) {
379 if (per_cpu(nmi_touch, cpu) != 1) 379 if (per_cpu(nmi_touch, cpu) != 1)
380 per_cpu(nmi_touch, cpu) = 1; 380 per_cpu(nmi_touch, cpu) = 1;
381 } 381 }
382 } 382 }
383 383
384 /* 384 /*
385 * Tickle the softlockup detector too: 385 * Tickle the softlockup detector too:
386 */ 386 */
387 touch_softlockup_watchdog(); 387 touch_softlockup_watchdog();
388 } 388 }
389 EXPORT_SYMBOL(touch_nmi_watchdog); 389 EXPORT_SYMBOL(touch_nmi_watchdog);
390 390
391 notrace __kprobes int 391 notrace __kprobes int
392 nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) 392 nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
393 { 393 {
394 /* 394 /*
395 * Since current_thread_info()-> is always on the stack, and we 395 * Since current_thread_info()-> is always on the stack, and we
396 * always switch the stack NMI-atomically, it's safe to use 396 * always switch the stack NMI-atomically, it's safe to use
397 * smp_processor_id(). 397 * smp_processor_id().
398 */ 398 */
399 unsigned int sum; 399 unsigned int sum;
400 int touched = 0; 400 int touched = 0;
401 int cpu = smp_processor_id(); 401 int cpu = smp_processor_id();
402 int rc = 0; 402 int rc = 0;
403 403
404 sum = get_timer_irqs(cpu); 404 sum = get_timer_irqs(cpu);
405 405
406 if (__get_cpu_var(nmi_touch)) { 406 if (__get_cpu_var(nmi_touch)) {
407 __get_cpu_var(nmi_touch) = 0; 407 __get_cpu_var(nmi_touch) = 0;
408 touched = 1; 408 touched = 1;
409 } 409 }
410 410
411 /* We can be called before check_nmi_watchdog, hence NULL check. */ 411 /* We can be called before check_nmi_watchdog, hence NULL check. */
412 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 412 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
413 static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */ 413 static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
414 414
415 raw_spin_lock(&lock); 415 raw_spin_lock(&lock);
416 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); 416 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
417 show_regs(regs); 417 show_regs(regs);
418 dump_stack(); 418 dump_stack();
419 raw_spin_unlock(&lock); 419 raw_spin_unlock(&lock);
420 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); 420 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
421 421
422 rc = 1; 422 rc = 1;
423 } 423 }
424 424
425 /* Could check oops_in_progress here too, but it's safer not to */ 425 /* Could check oops_in_progress here too, but it's safer not to */
426 if (mce_in_progress()) 426 if (mce_in_progress())
427 touched = 1; 427 touched = 1;
428 428
429 /* if the none of the timers isn't firing, this cpu isn't doing much */ 429 /* if the none of the timers isn't firing, this cpu isn't doing much */
430 if (!touched && __get_cpu_var(last_irq_sum) == sum) { 430 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
431 /* 431 /*
432 * Ayiee, looks like this CPU is stuck ... 432 * Ayiee, looks like this CPU is stuck ...
433 * wait a few IRQs (5 seconds) before doing the oops ... 433 * wait a few IRQs (5 seconds) before doing the oops ...
434 */ 434 */
435 __this_cpu_inc(alert_counter); 435 __this_cpu_inc(alert_counter);
436 if (__this_cpu_read(alert_counter) == 5 * nmi_hz) 436 if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
437 /* 437 /*
438 * die_nmi will return ONLY if NOTIFY_STOP happens.. 438 * die_nmi will return ONLY if NOTIFY_STOP happens..
439 */ 439 */
440 die_nmi("BUG: NMI Watchdog detected LOCKUP", 440 die_nmi("BUG: NMI Watchdog detected LOCKUP",
441 regs, panic_on_timeout); 441 regs, panic_on_timeout);
442 } else { 442 } else {
443 __get_cpu_var(last_irq_sum) = sum; 443 __get_cpu_var(last_irq_sum) = sum;
444 __this_cpu_write(alert_counter, 0); 444 __this_cpu_write(alert_counter, 0);
445 } 445 }
446 446
447 /* see if the nmi watchdog went off */ 447 /* see if the nmi watchdog went off */
448 if (!__get_cpu_var(wd_enabled)) 448 if (!__get_cpu_var(wd_enabled))
449 return rc; 449 return rc;
450 switch (nmi_watchdog) { 450 switch (nmi_watchdog) {
451 case NMI_LOCAL_APIC: 451 case NMI_LOCAL_APIC:
452 rc |= lapic_wd_event(nmi_hz); 452 rc |= lapic_wd_event(nmi_hz);
453 break; 453 break;
454 case NMI_IO_APIC: 454 case NMI_IO_APIC:
455 /* 455 /*
456 * don't know how to accurately check for this. 456 * don't know how to accurately check for this.
457 * just assume it was a watchdog timer interrupt 457 * just assume it was a watchdog timer interrupt
458 * This matches the old behaviour. 458 * This matches the old behaviour.
459 */ 459 */
460 rc = 1; 460 rc = 1;
461 break; 461 break;
462 } 462 }
463 return rc; 463 return rc;
464 } 464 }
465 465
466 #ifdef CONFIG_SYSCTL 466 #ifdef CONFIG_SYSCTL
467 467
468 static void enable_ioapic_nmi_watchdog_single(void *unused) 468 static void enable_ioapic_nmi_watchdog_single(void *unused)
469 { 469 {
470 __get_cpu_var(wd_enabled) = 1; 470 __get_cpu_var(wd_enabled) = 1;
471 atomic_inc(&nmi_active); 471 atomic_inc(&nmi_active);
472 __acpi_nmi_enable(NULL); 472 __acpi_nmi_enable(NULL);
473 } 473 }
474 474
475 static void enable_ioapic_nmi_watchdog(void) 475 static void enable_ioapic_nmi_watchdog(void)
476 { 476 {
477 on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1); 477 on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
478 touch_nmi_watchdog(); 478 touch_nmi_watchdog();
479 } 479 }
480 480
481 static void disable_ioapic_nmi_watchdog(void) 481 static void disable_ioapic_nmi_watchdog(void)
482 { 482 {
483 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); 483 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
484 } 484 }
485 485
486 static int __init setup_unknown_nmi_panic(char *str) 486 static int __init setup_unknown_nmi_panic(char *str)
487 { 487 {
488 unknown_nmi_panic = 1; 488 unknown_nmi_panic = 1;
489 return 1; 489 return 1;
490 } 490 }
491 __setup("unknown_nmi_panic", setup_unknown_nmi_panic); 491 __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
492 492
493 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) 493 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
494 { 494 {
495 unsigned char reason = get_nmi_reason(); 495 unsigned char reason = get_nmi_reason();
496 char buf[64]; 496 char buf[64];
497 497
498 sprintf(buf, "NMI received for unknown reason %02x\n", reason); 498 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
499 die_nmi(buf, regs, 1); /* Always panic here */ 499 die_nmi(buf, regs, 1); /* Always panic here */
500 return 0; 500 return 0;
501 } 501 }
502 502
503 /* 503 /*
504 * proc handler for /proc/sys/kernel/nmi 504 * proc handler for /proc/sys/kernel/nmi
505 */ 505 */
506 int proc_nmi_enabled(struct ctl_table *table, int write, 506 int proc_nmi_enabled(struct ctl_table *table, int write,
507 void __user *buffer, size_t *length, loff_t *ppos) 507 void __user *buffer, size_t *length, loff_t *ppos)
508 { 508 {
509 int old_state; 509 int old_state;
510 510
511 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; 511 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
512 old_state = nmi_watchdog_enabled; 512 old_state = nmi_watchdog_enabled;
513 proc_dointvec(table, write, buffer, length, ppos); 513 proc_dointvec(table, write, buffer, length, ppos);
514 if (!!old_state == !!nmi_watchdog_enabled) 514 if (!!old_state == !!nmi_watchdog_enabled)
515 return 0; 515 return 0;
516 516
517 if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) { 517 if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
518 printk(KERN_WARNING 518 printk(KERN_WARNING
519 "NMI watchdog is permanently disabled\n"); 519 "NMI watchdog is permanently disabled\n");
520 return -EIO; 520 return -EIO;
521 } 521 }
522 522
523 if (nmi_watchdog == NMI_LOCAL_APIC) { 523 if (nmi_watchdog == NMI_LOCAL_APIC) {
524 if (nmi_watchdog_enabled) 524 if (nmi_watchdog_enabled)
525 enable_lapic_nmi_watchdog(); 525 enable_lapic_nmi_watchdog();
526 else 526 else
527 disable_lapic_nmi_watchdog(); 527 disable_lapic_nmi_watchdog();
528 } else if (nmi_watchdog == NMI_IO_APIC) { 528 } else if (nmi_watchdog == NMI_IO_APIC) {
529 if (nmi_watchdog_enabled) 529 if (nmi_watchdog_enabled)
530 enable_ioapic_nmi_watchdog(); 530 enable_ioapic_nmi_watchdog();
531 else 531 else
532 disable_ioapic_nmi_watchdog(); 532 disable_ioapic_nmi_watchdog();
533 } else { 533 } else {
534 printk(KERN_WARNING 534 printk(KERN_WARNING
535 "NMI watchdog doesn't know what hardware to touch\n"); 535 "NMI watchdog doesn't know what hardware to touch\n");
536 return -EIO; 536 return -EIO;
537 } 537 }
538 return 0; 538 return 0;
539 } 539 }
540 540
541 #endif /* CONFIG_SYSCTL */ 541 #endif /* CONFIG_SYSCTL */
542 542
543 int do_nmi_callback(struct pt_regs *regs, int cpu) 543 int do_nmi_callback(struct pt_regs *regs, int cpu)
544 { 544 {
545 #ifdef CONFIG_SYSCTL 545 #ifdef CONFIG_SYSCTL
546 if (unknown_nmi_panic) 546 if (unknown_nmi_panic)
547 return unknown_nmi_panic_callback(regs, cpu); 547 return unknown_nmi_panic_callback(regs, cpu);
548 #endif 548 #endif
549 return 0; 549 return 0;
550 } 550 }
551 551
552 void arch_trigger_all_cpu_backtrace(void) 552 void arch_trigger_all_cpu_backtrace(void)
553 { 553 {
554 int i; 554 int i;
555 555
556 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); 556 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
557 557
558 printk(KERN_INFO "sending NMI to all CPUs:\n"); 558 printk(KERN_INFO "sending NMI to all CPUs:\n");
559 apic->send_IPI_all(NMI_VECTOR); 559 apic->send_IPI_all(NMI_VECTOR);
560 560
561 /* Wait for up to 10 seconds for all CPUs to do the backtrace */ 561 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
562 for (i = 0; i < 10 * 1000; i++) { 562 for (i = 0; i < 10 * 1000; i++) {
563 if (cpumask_empty(to_cpumask(backtrace_mask))) 563 if (cpumask_empty(to_cpumask(backtrace_mask)))
564 break; 564 break;
565 mdelay(1); 565 mdelay(1);
566 } 566 }
567 } 567 }
568 568
arch/x86/kernel/i8259.c
1 #include <linux/linkage.h> 1 #include <linux/linkage.h>
2 #include <linux/errno.h> 2 #include <linux/errno.h>
3 #include <linux/signal.h> 3 #include <linux/signal.h>
4 #include <linux/sched.h> 4 #include <linux/sched.h>
5 #include <linux/ioport.h> 5 #include <linux/ioport.h>
6 #include <linux/interrupt.h> 6 #include <linux/interrupt.h>
7 #include <linux/timex.h> 7 #include <linux/timex.h>
8 #include <linux/random.h> 8 #include <linux/random.h>
9 #include <linux/init.h> 9 #include <linux/init.h>
10 #include <linux/kernel_stat.h> 10 #include <linux/kernel_stat.h>
11 #include <linux/sysdev.h> 11 #include <linux/sysdev.h>
12 #include <linux/bitops.h> 12 #include <linux/bitops.h>
13 #include <linux/acpi.h> 13 #include <linux/acpi.h>
14 #include <linux/io.h> 14 #include <linux/io.h>
15 #include <linux/delay.h> 15 #include <linux/delay.h>
16 16
17 #include <asm/atomic.h> 17 #include <asm/atomic.h>
18 #include <asm/system.h> 18 #include <asm/system.h>
19 #include <asm/timer.h> 19 #include <asm/timer.h>
20 #include <asm/hw_irq.h> 20 #include <asm/hw_irq.h>
21 #include <asm/pgtable.h> 21 #include <asm/pgtable.h>
22 #include <asm/desc.h> 22 #include <asm/desc.h>
23 #include <asm/apic.h> 23 #include <asm/apic.h>
24 #include <asm/i8259.h> 24 #include <asm/i8259.h>
25 25
26 /* 26 /*
27 * This is the 'legacy' 8259A Programmable Interrupt Controller, 27 * This is the 'legacy' 8259A Programmable Interrupt Controller,
28 * present in the majority of PC/AT boxes. 28 * present in the majority of PC/AT boxes.
29 * plus some generic x86 specific things if generic specifics makes 29 * plus some generic x86 specific things if generic specifics makes
30 * any sense at all. 30 * any sense at all.
31 */ 31 */
32 static void init_8259A(int auto_eoi);
32 33
33 static int i8259A_auto_eoi; 34 static int i8259A_auto_eoi;
34 DEFINE_RAW_SPINLOCK(i8259A_lock); 35 DEFINE_RAW_SPINLOCK(i8259A_lock);
35 static void mask_and_ack_8259A(unsigned int);
36 static void mask_8259A(void);
37 static void unmask_8259A(void);
38 static void disable_8259A_irq(unsigned int irq);
39 static void enable_8259A_irq(unsigned int irq);
40 static void init_8259A(int auto_eoi);
41 static int i8259A_irq_pending(unsigned int irq);
42 36
43 struct irq_chip i8259A_chip = {
44 .name = "XT-PIC",
45 .mask = disable_8259A_irq,
46 .disable = disable_8259A_irq,
47 .unmask = enable_8259A_irq,
48 .mask_ack = mask_and_ack_8259A,
49 };
50
51 /* 37 /*
52 * 8259A PIC functions to handle ISA devices: 38 * 8259A PIC functions to handle ISA devices:
53 */ 39 */
54 40
55 /* 41 /*
56 * This contains the irq mask for both 8259A irq controllers, 42 * This contains the irq mask for both 8259A irq controllers,
57 */ 43 */
58 unsigned int cached_irq_mask = 0xffff; 44 unsigned int cached_irq_mask = 0xffff;
59 45
60 /* 46 /*
61 * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) 47 * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
62 * boards the timer interrupt is not really connected to any IO-APIC pin, 48 * boards the timer interrupt is not really connected to any IO-APIC pin,
63 * it's fed to the master 8259A's IR0 line only. 49 * it's fed to the master 8259A's IR0 line only.
64 * 50 *
65 * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. 51 * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
66 * this 'mixed mode' IRQ handling costs nothing because it's only used 52 * this 'mixed mode' IRQ handling costs nothing because it's only used
67 * at IRQ setup time. 53 * at IRQ setup time.
68 */ 54 */
69 unsigned long io_apic_irqs; 55 unsigned long io_apic_irqs;
70 56
71 static void disable_8259A_irq(unsigned int irq) 57 static void mask_8259A_irq(unsigned int irq)
72 { 58 {
73 unsigned int mask = 1 << irq; 59 unsigned int mask = 1 << irq;
74 unsigned long flags; 60 unsigned long flags;
75 61
76 raw_spin_lock_irqsave(&i8259A_lock, flags); 62 raw_spin_lock_irqsave(&i8259A_lock, flags);
77 cached_irq_mask |= mask; 63 cached_irq_mask |= mask;
78 if (irq & 8) 64 if (irq & 8)
79 outb(cached_slave_mask, PIC_SLAVE_IMR); 65 outb(cached_slave_mask, PIC_SLAVE_IMR);
80 else 66 else
81 outb(cached_master_mask, PIC_MASTER_IMR); 67 outb(cached_master_mask, PIC_MASTER_IMR);
82 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 68 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
83 } 69 }
84 70
85 static void enable_8259A_irq(unsigned int irq) 71 static void disable_8259A_irq(struct irq_data *data)
86 { 72 {
73 mask_8259A_irq(data->irq);
74 }
75
76 static void unmask_8259A_irq(unsigned int irq)
77 {
87 unsigned int mask = ~(1 << irq); 78 unsigned int mask = ~(1 << irq);
88 unsigned long flags; 79 unsigned long flags;
89 80
90 raw_spin_lock_irqsave(&i8259A_lock, flags); 81 raw_spin_lock_irqsave(&i8259A_lock, flags);
91 cached_irq_mask &= mask; 82 cached_irq_mask &= mask;
92 if (irq & 8) 83 if (irq & 8)
93 outb(cached_slave_mask, PIC_SLAVE_IMR); 84 outb(cached_slave_mask, PIC_SLAVE_IMR);
94 else 85 else
95 outb(cached_master_mask, PIC_MASTER_IMR); 86 outb(cached_master_mask, PIC_MASTER_IMR);
96 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 87 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
97 } 88 }
98 89
90 static void enable_8259A_irq(struct irq_data *data)
91 {
92 unmask_8259A_irq(data->irq);
93 }
94
99 static int i8259A_irq_pending(unsigned int irq) 95 static int i8259A_irq_pending(unsigned int irq)
100 { 96 {
101 unsigned int mask = 1<<irq; 97 unsigned int mask = 1<<irq;
102 unsigned long flags; 98 unsigned long flags;
103 int ret; 99 int ret;
104 100
105 raw_spin_lock_irqsave(&i8259A_lock, flags); 101 raw_spin_lock_irqsave(&i8259A_lock, flags);
106 if (irq < 8) 102 if (irq < 8)
107 ret = inb(PIC_MASTER_CMD) & mask; 103 ret = inb(PIC_MASTER_CMD) & mask;
108 else 104 else
109 ret = inb(PIC_SLAVE_CMD) & (mask >> 8); 105 ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
110 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 106 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
111 107
112 return ret; 108 return ret;
113 } 109 }
114 110
115 static void make_8259A_irq(unsigned int irq) 111 static void make_8259A_irq(unsigned int irq)
116 { 112 {
117 disable_irq_nosync(irq); 113 disable_irq_nosync(irq);
118 io_apic_irqs &= ~(1<<irq); 114 io_apic_irqs &= ~(1<<irq);
119 set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, 115 set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
120 "XT"); 116 i8259A_chip.name);
121 enable_irq(irq); 117 enable_irq(irq);
122 } 118 }
123 119
124 /* 120 /*
125 * This function assumes to be called rarely. Switching between 121 * This function assumes to be called rarely. Switching between
126 * 8259A registers is slow. 122 * 8259A registers is slow.
127 * This has to be protected by the irq controller spinlock 123 * This has to be protected by the irq controller spinlock
128 * before being called. 124 * before being called.
129 */ 125 */
130 static inline int i8259A_irq_real(unsigned int irq) 126 static inline int i8259A_irq_real(unsigned int irq)
131 { 127 {
132 int value; 128 int value;
133 int irqmask = 1<<irq; 129 int irqmask = 1<<irq;
134 130
135 if (irq < 8) { 131 if (irq < 8) {
136 outb(0x0B, PIC_MASTER_CMD); /* ISR register */ 132 outb(0x0B, PIC_MASTER_CMD); /* ISR register */
137 value = inb(PIC_MASTER_CMD) & irqmask; 133 value = inb(PIC_MASTER_CMD) & irqmask;
138 outb(0x0A, PIC_MASTER_CMD); /* back to the IRR register */ 134 outb(0x0A, PIC_MASTER_CMD); /* back to the IRR register */
139 return value; 135 return value;
140 } 136 }
141 outb(0x0B, PIC_SLAVE_CMD); /* ISR register */ 137 outb(0x0B, PIC_SLAVE_CMD); /* ISR register */
142 value = inb(PIC_SLAVE_CMD) & (irqmask >> 8); 138 value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
143 outb(0x0A, PIC_SLAVE_CMD); /* back to the IRR register */ 139 outb(0x0A, PIC_SLAVE_CMD); /* back to the IRR register */
144 return value; 140 return value;
145 } 141 }
146 142
147 /* 143 /*
148 * Careful! The 8259A is a fragile beast, it pretty 144 * Careful! The 8259A is a fragile beast, it pretty
149 * much _has_ to be done exactly like this (mask it 145 * much _has_ to be done exactly like this (mask it
150 * first, _then_ send the EOI, and the order of EOI 146 * first, _then_ send the EOI, and the order of EOI
151 * to the two 8259s is important! 147 * to the two 8259s is important!
152 */ 148 */
153 static void mask_and_ack_8259A(unsigned int irq) 149 static void mask_and_ack_8259A(struct irq_data *data)
154 { 150 {
151 unsigned int irq = data->irq;
155 unsigned int irqmask = 1 << irq; 152 unsigned int irqmask = 1 << irq;
156 unsigned long flags; 153 unsigned long flags;
157 154
158 raw_spin_lock_irqsave(&i8259A_lock, flags); 155 raw_spin_lock_irqsave(&i8259A_lock, flags);
159 /* 156 /*
160 * Lightweight spurious IRQ detection. We do not want 157 * Lightweight spurious IRQ detection. We do not want
161 * to overdo spurious IRQ handling - it's usually a sign 158 * to overdo spurious IRQ handling - it's usually a sign
162 * of hardware problems, so we only do the checks we can 159 * of hardware problems, so we only do the checks we can
163 * do without slowing down good hardware unnecessarily. 160 * do without slowing down good hardware unnecessarily.
164 * 161 *
165 * Note that IRQ7 and IRQ15 (the two spurious IRQs 162 * Note that IRQ7 and IRQ15 (the two spurious IRQs
166 * usually resulting from the 8259A-1|2 PICs) occur 163 * usually resulting from the 8259A-1|2 PICs) occur
167 * even if the IRQ is masked in the 8259A. Thus we 164 * even if the IRQ is masked in the 8259A. Thus we
168 * can check spurious 8259A IRQs without doing the 165 * can check spurious 8259A IRQs without doing the
169 * quite slow i8259A_irq_real() call for every IRQ. 166 * quite slow i8259A_irq_real() call for every IRQ.
170 * This does not cover 100% of spurious interrupts, 167 * This does not cover 100% of spurious interrupts,
171 * but should be enough to warn the user that there 168 * but should be enough to warn the user that there
172 * is something bad going on ... 169 * is something bad going on ...
173 */ 170 */
174 if (cached_irq_mask & irqmask) 171 if (cached_irq_mask & irqmask)
175 goto spurious_8259A_irq; 172 goto spurious_8259A_irq;
176 cached_irq_mask |= irqmask; 173 cached_irq_mask |= irqmask;
177 174
178 handle_real_irq: 175 handle_real_irq:
179 if (irq & 8) { 176 if (irq & 8) {
180 inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ 177 inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */
181 outb(cached_slave_mask, PIC_SLAVE_IMR); 178 outb(cached_slave_mask, PIC_SLAVE_IMR);
182 /* 'Specific EOI' to slave */ 179 /* 'Specific EOI' to slave */
183 outb(0x60+(irq&7), PIC_SLAVE_CMD); 180 outb(0x60+(irq&7), PIC_SLAVE_CMD);
184 /* 'Specific EOI' to master-IRQ2 */ 181 /* 'Specific EOI' to master-IRQ2 */
185 outb(0x60+PIC_CASCADE_IR, PIC_MASTER_CMD); 182 outb(0x60+PIC_CASCADE_IR, PIC_MASTER_CMD);
186 } else { 183 } else {
187 inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ 184 inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */
188 outb(cached_master_mask, PIC_MASTER_IMR); 185 outb(cached_master_mask, PIC_MASTER_IMR);
189 outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ 186 outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */
190 } 187 }
191 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 188 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
192 return; 189 return;
193 190
194 spurious_8259A_irq: 191 spurious_8259A_irq:
195 /* 192 /*
196 * this is the slow path - should happen rarely. 193 * this is the slow path - should happen rarely.
197 */ 194 */
198 if (i8259A_irq_real(irq)) 195 if (i8259A_irq_real(irq))
199 /* 196 /*
200 * oops, the IRQ _is_ in service according to the 197 * oops, the IRQ _is_ in service according to the
201 * 8259A - not spurious, go handle it. 198 * 8259A - not spurious, go handle it.
202 */ 199 */
203 goto handle_real_irq; 200 goto handle_real_irq;
204 201
205 { 202 {
206 static int spurious_irq_mask; 203 static int spurious_irq_mask;
207 /* 204 /*
208 * At this point we can be sure the IRQ is spurious, 205 * At this point we can be sure the IRQ is spurious,
209 * lets ACK and report it. [once per IRQ] 206 * lets ACK and report it. [once per IRQ]
210 */ 207 */
211 if (!(spurious_irq_mask & irqmask)) { 208 if (!(spurious_irq_mask & irqmask)) {
212 printk(KERN_DEBUG 209 printk(KERN_DEBUG
213 "spurious 8259A interrupt: IRQ%d.\n", irq); 210 "spurious 8259A interrupt: IRQ%d.\n", irq);
214 spurious_irq_mask |= irqmask; 211 spurious_irq_mask |= irqmask;
215 } 212 }
216 atomic_inc(&irq_err_count); 213 atomic_inc(&irq_err_count);
217 /* 214 /*
218 * Theoretically we do not have to handle this IRQ, 215 * Theoretically we do not have to handle this IRQ,
219 * but in Linux this does not cause problems and is 216 * but in Linux this does not cause problems and is
220 * simpler for us. 217 * simpler for us.
221 */ 218 */
222 goto handle_real_irq; 219 goto handle_real_irq;
223 } 220 }
224 } 221 }
225 222
223 struct irq_chip i8259A_chip = {
224 .name = "XT-PIC",
225 .irq_mask = disable_8259A_irq,
226 .irq_disable = disable_8259A_irq,
227 .irq_unmask = enable_8259A_irq,
228 .irq_mask_ack = mask_and_ack_8259A,
229 };
230
226 static char irq_trigger[2]; 231 static char irq_trigger[2];
227 /** 232 /**
228 * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ 233 * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
229 */ 234 */
230 static void restore_ELCR(char *trigger) 235 static void restore_ELCR(char *trigger)
231 { 236 {
232 outb(trigger[0], 0x4d0); 237 outb(trigger[0], 0x4d0);
233 outb(trigger[1], 0x4d1); 238 outb(trigger[1], 0x4d1);
234 } 239 }
235 240
236 static void save_ELCR(char *trigger) 241 static void save_ELCR(char *trigger)
237 { 242 {
238 /* IRQ 0,1,2,8,13 are marked as reserved */ 243 /* IRQ 0,1,2,8,13 are marked as reserved */
239 trigger[0] = inb(0x4d0) & 0xF8; 244 trigger[0] = inb(0x4d0) & 0xF8;
240 trigger[1] = inb(0x4d1) & 0xDE; 245 trigger[1] = inb(0x4d1) & 0xDE;
241 } 246 }
242 247
243 static int i8259A_resume(struct sys_device *dev) 248 static int i8259A_resume(struct sys_device *dev)
244 { 249 {
245 init_8259A(i8259A_auto_eoi); 250 init_8259A(i8259A_auto_eoi);
246 restore_ELCR(irq_trigger); 251 restore_ELCR(irq_trigger);
247 return 0; 252 return 0;
248 } 253 }
249 254
250 static int i8259A_suspend(struct sys_device *dev, pm_message_t state) 255 static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
251 { 256 {
252 save_ELCR(irq_trigger); 257 save_ELCR(irq_trigger);
253 return 0; 258 return 0;
254 } 259 }
255 260
256 static int i8259A_shutdown(struct sys_device *dev) 261 static int i8259A_shutdown(struct sys_device *dev)
257 { 262 {
258 /* Put the i8259A into a quiescent state that 263 /* Put the i8259A into a quiescent state that
259 * the kernel initialization code can get it 264 * the kernel initialization code can get it
260 * out of. 265 * out of.
261 */ 266 */
262 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ 267 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
263 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ 268 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */
264 return 0; 269 return 0;
265 } 270 }
266 271
267 static struct sysdev_class i8259_sysdev_class = { 272 static struct sysdev_class i8259_sysdev_class = {
268 .name = "i8259", 273 .name = "i8259",
269 .suspend = i8259A_suspend, 274 .suspend = i8259A_suspend,
270 .resume = i8259A_resume, 275 .resume = i8259A_resume,
271 .shutdown = i8259A_shutdown, 276 .shutdown = i8259A_shutdown,
272 }; 277 };
273 278
274 static struct sys_device device_i8259A = { 279 static struct sys_device device_i8259A = {
275 .id = 0, 280 .id = 0,
276 .cls = &i8259_sysdev_class, 281 .cls = &i8259_sysdev_class,
277 }; 282 };
278 283
279 static void mask_8259A(void) 284 static void mask_8259A(void)
280 { 285 {
281 unsigned long flags; 286 unsigned long flags;
282 287
283 raw_spin_lock_irqsave(&i8259A_lock, flags); 288 raw_spin_lock_irqsave(&i8259A_lock, flags);
284 289
285 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ 290 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
286 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ 291 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
287 292
288 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 293 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
289 } 294 }
290 295
291 static void unmask_8259A(void) 296 static void unmask_8259A(void)
292 { 297 {
293 unsigned long flags; 298 unsigned long flags;
294 299
295 raw_spin_lock_irqsave(&i8259A_lock, flags); 300 raw_spin_lock_irqsave(&i8259A_lock, flags);
296 301
297 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ 302 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
298 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ 303 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
299 304
300 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 305 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
301 } 306 }
302 307
303 static void init_8259A(int auto_eoi) 308 static void init_8259A(int auto_eoi)
304 { 309 {
305 unsigned long flags; 310 unsigned long flags;
306 311
307 i8259A_auto_eoi = auto_eoi; 312 i8259A_auto_eoi = auto_eoi;
308 313
309 raw_spin_lock_irqsave(&i8259A_lock, flags); 314 raw_spin_lock_irqsave(&i8259A_lock, flags);
310 315
311 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ 316 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
312 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ 317 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
313 318
314 /* 319 /*
315 * outb_pic - this has to work on a wide range of PC hardware. 320 * outb_pic - this has to work on a wide range of PC hardware.
316 */ 321 */
317 outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ 322 outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
318 323
319 /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64, 324 /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64,
320 to 0x20-0x27 on i386 */ 325 to 0x20-0x27 on i386 */
321 outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); 326 outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
322 327
323 /* 8259A-1 (the master) has a slave on IR2 */ 328 /* 8259A-1 (the master) has a slave on IR2 */
324 outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); 329 outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);
325 330
326 if (auto_eoi) /* master does Auto EOI */ 331 if (auto_eoi) /* master does Auto EOI */
327 outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); 332 outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
328 else /* master expects normal EOI */ 333 else /* master expects normal EOI */
329 outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); 334 outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
330 335
331 outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ 336 outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */
332 337
333 /* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */ 338 /* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */
334 outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); 339 outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
335 /* 8259A-2 is a slave on master's IR2 */ 340 /* 8259A-2 is a slave on master's IR2 */
336 outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); 341 outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
337 /* (slave's support for AEOI in flat mode is to be investigated) */ 342 /* (slave's support for AEOI in flat mode is to be investigated) */
338 outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); 343 outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
339 344
340 if (auto_eoi) 345 if (auto_eoi)
341 /* 346 /*
342 * In AEOI mode we just have to mask the interrupt 347 * In AEOI mode we just have to mask the interrupt
343 * when acking. 348 * when acking.
344 */ 349 */
345 i8259A_chip.mask_ack = disable_8259A_irq; 350 i8259A_chip.irq_mask_ack = disable_8259A_irq;
346 else 351 else
347 i8259A_chip.mask_ack = mask_and_ack_8259A; 352 i8259A_chip.irq_mask_ack = mask_and_ack_8259A;
348 353
349 udelay(100); /* wait for 8259A to initialize */ 354 udelay(100); /* wait for 8259A to initialize */
350 355
351 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ 356 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
352 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ 357 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
353 358
354 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 359 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
355 } 360 }
356 361
357 /* 362 /*
358 * make i8259 a driver so that we can select pic functions at run time. the goal 363 * make i8259 a driver so that we can select pic functions at run time. the goal
359 * is to make x86 binary compatible among pc compatible and non-pc compatible 364 * is to make x86 binary compatible among pc compatible and non-pc compatible
360 * platforms, such as x86 MID. 365 * platforms, such as x86 MID.
361 */ 366 */
362 367
363 static void legacy_pic_noop(void) { }; 368 static void legacy_pic_noop(void) { };
364 static void legacy_pic_uint_noop(unsigned int unused) { }; 369 static void legacy_pic_uint_noop(unsigned int unused) { };
365 static void legacy_pic_int_noop(int unused) { }; 370 static void legacy_pic_int_noop(int unused) { };
366
367 static struct irq_chip dummy_pic_chip = {
368 .name = "dummy pic",
369 .mask = legacy_pic_uint_noop,
370 .unmask = legacy_pic_uint_noop,
371 .disable = legacy_pic_uint_noop,
372 .mask_ack = legacy_pic_uint_noop,
373 };
374 static int legacy_pic_irq_pending_noop(unsigned int irq) 371 static int legacy_pic_irq_pending_noop(unsigned int irq)
375 { 372 {
376 return 0; 373 return 0;
377 } 374 }
378 375
379 struct legacy_pic null_legacy_pic = { 376 struct legacy_pic null_legacy_pic = {
380 .nr_legacy_irqs = 0, 377 .nr_legacy_irqs = 0,
381 .chip = &dummy_pic_chip, 378 .chip = &dummy_irq_chip,
379 .mask = legacy_pic_uint_noop,
380 .unmask = legacy_pic_uint_noop,
382 .mask_all = legacy_pic_noop, 381 .mask_all = legacy_pic_noop,
383 .restore_mask = legacy_pic_noop, 382 .restore_mask = legacy_pic_noop,
384 .init = legacy_pic_int_noop, 383 .init = legacy_pic_int_noop,
385 .irq_pending = legacy_pic_irq_pending_noop, 384 .irq_pending = legacy_pic_irq_pending_noop,
386 .make_irq = legacy_pic_uint_noop, 385 .make_irq = legacy_pic_uint_noop,
387 }; 386 };
388 387
389 struct legacy_pic default_legacy_pic = { 388 struct legacy_pic default_legacy_pic = {
390 .nr_legacy_irqs = NR_IRQS_LEGACY, 389 .nr_legacy_irqs = NR_IRQS_LEGACY,
391 .chip = &i8259A_chip, 390 .chip = &i8259A_chip,
392 .mask_all = mask_8259A, 391 .mask = mask_8259A_irq,
392 .unmask = unmask_8259A_irq,
393 .mask_all = mask_8259A,
arch/x86/kernel/smpboot.c
1 /* 1 /*
2 * x86 SMP booting functions 2 * x86 SMP booting functions
3 * 3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> 4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
5 * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com> 5 * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
6 * Copyright 2001 Andi Kleen, SuSE Labs. 6 * Copyright 2001 Andi Kleen, SuSE Labs.
7 * 7 *
8 * Much of the core SMP work is based on previous work by Thomas Radke, to 8 * Much of the core SMP work is based on previous work by Thomas Radke, to
9 * whom a great many thanks are extended. 9 * whom a great many thanks are extended.
10 * 10 *
11 * Thanks to Intel for making available several different Pentium, 11 * Thanks to Intel for making available several different Pentium,
12 * Pentium Pro and Pentium-II/Xeon MP machines. 12 * Pentium Pro and Pentium-II/Xeon MP machines.
13 * Original development of Linux SMP code supported by Caldera. 13 * Original development of Linux SMP code supported by Caldera.
14 * 14 *
15 * This code is released under the GNU General Public License version 2 or 15 * This code is released under the GNU General Public License version 2 or
16 * later. 16 * later.
17 * 17 *
18 * Fixes 18 * Fixes
19 * Felix Koop : NR_CPUS used properly 19 * Felix Koop : NR_CPUS used properly
20 * Jose Renau : Handle single CPU case. 20 * Jose Renau : Handle single CPU case.
21 * Alan Cox : By repeated request 8) - Total BogoMIPS report. 21 * Alan Cox : By repeated request 8) - Total BogoMIPS report.
22 * Greg Wright : Fix for kernel stacks panic. 22 * Greg Wright : Fix for kernel stacks panic.
23 * Erich Boleyn : MP v1.4 and additional changes. 23 * Erich Boleyn : MP v1.4 and additional changes.
24 * Matthias Sattler : Changes for 2.1 kernel map. 24 * Matthias Sattler : Changes for 2.1 kernel map.
25 * Michel Lespinasse : Changes for 2.1 kernel map. 25 * Michel Lespinasse : Changes for 2.1 kernel map.
26 * Michael Chastain : Change trampoline.S to gnu as. 26 * Michael Chastain : Change trampoline.S to gnu as.
27 * Alan Cox : Dumb bug: 'B' step PPro's are fine 27 * Alan Cox : Dumb bug: 'B' step PPro's are fine
28 * Ingo Molnar : Added APIC timers, based on code 28 * Ingo Molnar : Added APIC timers, based on code
29 * from Jose Renau 29 * from Jose Renau
30 * Ingo Molnar : various cleanups and rewrites 30 * Ingo Molnar : various cleanups and rewrites
31 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. 31 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
32 * Maciej W. Rozycki : Bits for genuine 82489DX APICs 32 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
33 * Andi Kleen : Changed for SMP boot into long mode. 33 * Andi Kleen : Changed for SMP boot into long mode.
34 * Martin J. Bligh : Added support for multi-quad systems 34 * Martin J. Bligh : Added support for multi-quad systems
35 * Dave Jones : Report invalid combinations of Athlon CPUs. 35 * Dave Jones : Report invalid combinations of Athlon CPUs.
36 * Rusty Russell : Hacked into shape for new "hotplug" boot process. 36 * Rusty Russell : Hacked into shape for new "hotplug" boot process.
37 * Andi Kleen : Converted to new state machine. 37 * Andi Kleen : Converted to new state machine.
38 * Ashok Raj : CPU hotplug support 38 * Ashok Raj : CPU hotplug support
39 * Glauber Costa : i386 and x86_64 integration 39 * Glauber Costa : i386 and x86_64 integration
40 */ 40 */
41 41
42 #include <linux/init.h> 42 #include <linux/init.h>
43 #include <linux/smp.h> 43 #include <linux/smp.h>
44 #include <linux/module.h> 44 #include <linux/module.h>
45 #include <linux/sched.h> 45 #include <linux/sched.h>
46 #include <linux/percpu.h> 46 #include <linux/percpu.h>
47 #include <linux/bootmem.h> 47 #include <linux/bootmem.h>
48 #include <linux/err.h> 48 #include <linux/err.h>
49 #include <linux/nmi.h> 49 #include <linux/nmi.h>
50 #include <linux/tboot.h> 50 #include <linux/tboot.h>
51 #include <linux/stackprotector.h> 51 #include <linux/stackprotector.h>
52 #include <linux/gfp.h> 52 #include <linux/gfp.h>
53 53
54 #include <asm/acpi.h> 54 #include <asm/acpi.h>
55 #include <asm/desc.h> 55 #include <asm/desc.h>
56 #include <asm/nmi.h> 56 #include <asm/nmi.h>
57 #include <asm/irq.h> 57 #include <asm/irq.h>
58 #include <asm/idle.h> 58 #include <asm/idle.h>
59 #include <asm/trampoline.h> 59 #include <asm/trampoline.h>
60 #include <asm/cpu.h> 60 #include <asm/cpu.h>
61 #include <asm/numa.h> 61 #include <asm/numa.h>
62 #include <asm/pgtable.h> 62 #include <asm/pgtable.h>
63 #include <asm/tlbflush.h> 63 #include <asm/tlbflush.h>
64 #include <asm/mtrr.h> 64 #include <asm/mtrr.h>
65 #include <asm/vmi.h> 65 #include <asm/vmi.h>
66 #include <asm/apic.h> 66 #include <asm/apic.h>
67 #include <asm/setup.h> 67 #include <asm/setup.h>
68 #include <asm/uv/uv.h> 68 #include <asm/uv/uv.h>
69 #include <linux/mc146818rtc.h> 69 #include <linux/mc146818rtc.h>
70 70
71 #include <asm/smpboot_hooks.h> 71 #include <asm/smpboot_hooks.h>
72 #include <asm/i8259.h> 72 #include <asm/i8259.h>
73 73
74 #ifdef CONFIG_X86_32 74 #ifdef CONFIG_X86_32
75 u8 apicid_2_node[MAX_APICID]; 75 u8 apicid_2_node[MAX_APICID];
76 #endif 76 #endif
77 77
78 /* State of each CPU */ 78 /* State of each CPU */
79 DEFINE_PER_CPU(int, cpu_state) = { 0 }; 79 DEFINE_PER_CPU(int, cpu_state) = { 0 };
80 80
81 /* Store all idle threads, this can be reused instead of creating 81 /* Store all idle threads, this can be reused instead of creating
82 * a new thread. Also avoids complicated thread destroy functionality 82 * a new thread. Also avoids complicated thread destroy functionality
83 * for idle threads. 83 * for idle threads.
84 */ 84 */
85 #ifdef CONFIG_HOTPLUG_CPU 85 #ifdef CONFIG_HOTPLUG_CPU
86 /* 86 /*
87 * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is 87 * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
88 * removed after init for !CONFIG_HOTPLUG_CPU. 88 * removed after init for !CONFIG_HOTPLUG_CPU.
89 */ 89 */
90 static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); 90 static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
91 #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) 91 #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
92 #define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) 92 #define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
93 93
94 /* 94 /*
95 * We need this for trampoline_base protection from concurrent accesses when 95 * We need this for trampoline_base protection from concurrent accesses when
96 * off- and onlining cores wildly. 96 * off- and onlining cores wildly.
97 */ 97 */
98 static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex); 98 static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
99 99
100 void cpu_hotplug_driver_lock() 100 void cpu_hotplug_driver_lock()
101 { 101 {
102 mutex_lock(&x86_cpu_hotplug_driver_mutex); 102 mutex_lock(&x86_cpu_hotplug_driver_mutex);
103 } 103 }
104 104
105 void cpu_hotplug_driver_unlock() 105 void cpu_hotplug_driver_unlock()
106 { 106 {
107 mutex_unlock(&x86_cpu_hotplug_driver_mutex); 107 mutex_unlock(&x86_cpu_hotplug_driver_mutex);
108 } 108 }
109 109
110 ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; } 110 ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
111 ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; } 111 ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
112 #else 112 #else
113 static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; 113 static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
114 #define get_idle_for_cpu(x) (idle_thread_array[(x)]) 114 #define get_idle_for_cpu(x) (idle_thread_array[(x)])
115 #define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) 115 #define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
116 #endif 116 #endif
117 117
118 /* Number of siblings per CPU package */ 118 /* Number of siblings per CPU package */
119 int smp_num_siblings = 1; 119 int smp_num_siblings = 1;
120 EXPORT_SYMBOL(smp_num_siblings); 120 EXPORT_SYMBOL(smp_num_siblings);
121 121
122 /* Last level cache ID of each logical CPU */ 122 /* Last level cache ID of each logical CPU */
123 DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; 123 DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
124 124
125 /* representing HT siblings of each logical CPU */ 125 /* representing HT siblings of each logical CPU */
126 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); 126 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
127 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); 127 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
128 128
129 /* representing HT and core siblings of each logical CPU */ 129 /* representing HT and core siblings of each logical CPU */
130 DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); 130 DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
131 EXPORT_PER_CPU_SYMBOL(cpu_core_map); 131 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
132 132
133 /* Per CPU bogomips and other parameters */ 133 /* Per CPU bogomips and other parameters */
134 DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); 134 DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
135 EXPORT_PER_CPU_SYMBOL(cpu_info); 135 EXPORT_PER_CPU_SYMBOL(cpu_info);
136 136
137 atomic_t init_deasserted; 137 atomic_t init_deasserted;
138 138
139 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) 139 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
140 /* which node each logical CPU is on */ 140 /* which node each logical CPU is on */
141 int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; 141 int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
142 EXPORT_SYMBOL(cpu_to_node_map); 142 EXPORT_SYMBOL(cpu_to_node_map);
143 143
144 /* set up a mapping between cpu and node. */ 144 /* set up a mapping between cpu and node. */
145 static void map_cpu_to_node(int cpu, int node) 145 static void map_cpu_to_node(int cpu, int node)
146 { 146 {
147 printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node); 147 printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
148 cpumask_set_cpu(cpu, node_to_cpumask_map[node]); 148 cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
149 cpu_to_node_map[cpu] = node; 149 cpu_to_node_map[cpu] = node;
150 } 150 }
151 151
152 /* undo a mapping between cpu and node. */ 152 /* undo a mapping between cpu and node. */
153 static void unmap_cpu_to_node(int cpu) 153 static void unmap_cpu_to_node(int cpu)
154 { 154 {
155 int node; 155 int node;
156 156
157 printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu); 157 printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
158 for (node = 0; node < MAX_NUMNODES; node++) 158 for (node = 0; node < MAX_NUMNODES; node++)
159 cpumask_clear_cpu(cpu, node_to_cpumask_map[node]); 159 cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
160 cpu_to_node_map[cpu] = 0; 160 cpu_to_node_map[cpu] = 0;
161 } 161 }
162 #else /* !(CONFIG_NUMA && CONFIG_X86_32) */ 162 #else /* !(CONFIG_NUMA && CONFIG_X86_32) */
163 #define map_cpu_to_node(cpu, node) ({}) 163 #define map_cpu_to_node(cpu, node) ({})
164 #define unmap_cpu_to_node(cpu) ({}) 164 #define unmap_cpu_to_node(cpu) ({})
165 #endif 165 #endif
166 166
167 #ifdef CONFIG_X86_32 167 #ifdef CONFIG_X86_32
168 static int boot_cpu_logical_apicid; 168 static int boot_cpu_logical_apicid;
169 169
170 u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = 170 u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
171 { [0 ... NR_CPUS-1] = BAD_APICID }; 171 { [0 ... NR_CPUS-1] = BAD_APICID };
172 172
173 static void map_cpu_to_logical_apicid(void) 173 static void map_cpu_to_logical_apicid(void)
174 { 174 {
175 int cpu = smp_processor_id(); 175 int cpu = smp_processor_id();
176 int apicid = logical_smp_processor_id(); 176 int apicid = logical_smp_processor_id();
177 int node = apic->apicid_to_node(apicid); 177 int node = apic->apicid_to_node(apicid);
178 178
179 if (!node_online(node)) 179 if (!node_online(node))
180 node = first_online_node; 180 node = first_online_node;
181 181
182 cpu_2_logical_apicid[cpu] = apicid; 182 cpu_2_logical_apicid[cpu] = apicid;
183 map_cpu_to_node(cpu, node); 183 map_cpu_to_node(cpu, node);
184 } 184 }
185 185
186 void numa_remove_cpu(int cpu) 186 void numa_remove_cpu(int cpu)
187 { 187 {
188 cpu_2_logical_apicid[cpu] = BAD_APICID; 188 cpu_2_logical_apicid[cpu] = BAD_APICID;
189 unmap_cpu_to_node(cpu); 189 unmap_cpu_to_node(cpu);
190 } 190 }
191 #else 191 #else
192 #define map_cpu_to_logical_apicid() do {} while (0) 192 #define map_cpu_to_logical_apicid() do {} while (0)
193 #endif 193 #endif
194 194
195 /* 195 /*
196 * Report back to the Boot Processor. 196 * Report back to the Boot Processor.
197 * Running on AP. 197 * Running on AP.
198 */ 198 */
199 static void __cpuinit smp_callin(void) 199 static void __cpuinit smp_callin(void)
200 { 200 {
201 int cpuid, phys_id; 201 int cpuid, phys_id;
202 unsigned long timeout; 202 unsigned long timeout;
203 203
204 /* 204 /*
205 * If waken up by an INIT in an 82489DX configuration 205 * If waken up by an INIT in an 82489DX configuration
206 * we may get here before an INIT-deassert IPI reaches 206 * we may get here before an INIT-deassert IPI reaches
207 * our local APIC. We have to wait for the IPI or we'll 207 * our local APIC. We have to wait for the IPI or we'll
208 * lock up on an APIC access. 208 * lock up on an APIC access.
209 */ 209 */
210 if (apic->wait_for_init_deassert) 210 if (apic->wait_for_init_deassert)
211 apic->wait_for_init_deassert(&init_deasserted); 211 apic->wait_for_init_deassert(&init_deasserted);
212 212
213 /* 213 /*
214 * (This works even if the APIC is not enabled.) 214 * (This works even if the APIC is not enabled.)
215 */ 215 */
216 phys_id = read_apic_id(); 216 phys_id = read_apic_id();
217 cpuid = smp_processor_id(); 217 cpuid = smp_processor_id();
218 if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { 218 if (cpumask_test_cpu(cpuid, cpu_callin_mask)) {
219 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, 219 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
220 phys_id, cpuid); 220 phys_id, cpuid);
221 } 221 }
222 pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); 222 pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
223 223
224 /* 224 /*
225 * STARTUP IPIs are fragile beasts as they might sometimes 225 * STARTUP IPIs are fragile beasts as they might sometimes
226 * trigger some glue motherboard logic. Complete APIC bus 226 * trigger some glue motherboard logic. Complete APIC bus
227 * silence for 1 second, this overestimates the time the 227 * silence for 1 second, this overestimates the time the
228 * boot CPU is spending to send the up to 2 STARTUP IPIs 228 * boot CPU is spending to send the up to 2 STARTUP IPIs
229 * by a factor of two. This should be enough. 229 * by a factor of two. This should be enough.
230 */ 230 */
231 231
232 /* 232 /*
233 * Waiting 2s total for startup (udelay is not yet working) 233 * Waiting 2s total for startup (udelay is not yet working)
234 */ 234 */
235 timeout = jiffies + 2*HZ; 235 timeout = jiffies + 2*HZ;
236 while (time_before(jiffies, timeout)) { 236 while (time_before(jiffies, timeout)) {
237 /* 237 /*
238 * Has the boot CPU finished it's STARTUP sequence? 238 * Has the boot CPU finished it's STARTUP sequence?
239 */ 239 */
240 if (cpumask_test_cpu(cpuid, cpu_callout_mask)) 240 if (cpumask_test_cpu(cpuid, cpu_callout_mask))
241 break; 241 break;
242 cpu_relax(); 242 cpu_relax();
243 } 243 }
244 244
245 if (!time_before(jiffies, timeout)) { 245 if (!time_before(jiffies, timeout)) {
246 panic("%s: CPU%d started up but did not get a callout!\n", 246 panic("%s: CPU%d started up but did not get a callout!\n",
247 __func__, cpuid); 247 __func__, cpuid);
248 } 248 }
249 249
250 /* 250 /*
251 * the boot CPU has finished the init stage and is spinning 251 * the boot CPU has finished the init stage and is spinning
252 * on callin_map until we finish. We are free to set up this 252 * on callin_map until we finish. We are free to set up this
253 * CPU, first the APIC. (this is probably redundant on most 253 * CPU, first the APIC. (this is probably redundant on most
254 * boards) 254 * boards)
255 */ 255 */
256 256
257 pr_debug("CALLIN, before setup_local_APIC().\n"); 257 pr_debug("CALLIN, before setup_local_APIC().\n");
258 if (apic->smp_callin_clear_local_apic) 258 if (apic->smp_callin_clear_local_apic)
259 apic->smp_callin_clear_local_apic(); 259 apic->smp_callin_clear_local_apic();
260 setup_local_APIC(); 260 setup_local_APIC();
261 end_local_APIC_setup(); 261 end_local_APIC_setup();
262 map_cpu_to_logical_apicid(); 262 map_cpu_to_logical_apicid();
263 263
264 /* 264 /*
265 * Need to setup vector mappings before we enable interrupts. 265 * Need to setup vector mappings before we enable interrupts.
266 */ 266 */
267 setup_vector_irq(smp_processor_id()); 267 setup_vector_irq(smp_processor_id());
268 /* 268 /*
269 * Get our bogomips. 269 * Get our bogomips.
270 * 270 *
271 * Need to enable IRQs because it can take longer and then 271 * Need to enable IRQs because it can take longer and then
272 * the NMI watchdog might kill us. 272 * the NMI watchdog might kill us.
273 */ 273 */
274 local_irq_enable(); 274 local_irq_enable();
275 calibrate_delay(); 275 calibrate_delay();
276 local_irq_disable(); 276 local_irq_disable();
277 pr_debug("Stack at about %p\n", &cpuid); 277 pr_debug("Stack at about %p\n", &cpuid);
278 278
279 /* 279 /*
280 * Save our processor parameters 280 * Save our processor parameters
281 */ 281 */
282 smp_store_cpu_info(cpuid); 282 smp_store_cpu_info(cpuid);
283 283
284 notify_cpu_starting(cpuid); 284 notify_cpu_starting(cpuid);
285 285
286 /* 286 /*
287 * Allow the master to continue. 287 * Allow the master to continue.
288 */ 288 */
289 cpumask_set_cpu(cpuid, cpu_callin_mask); 289 cpumask_set_cpu(cpuid, cpu_callin_mask);
290 } 290 }
291 291
292 /* 292 /*
293 * Activate a secondary processor. 293 * Activate a secondary processor.
294 */ 294 */
295 notrace static void __cpuinit start_secondary(void *unused) 295 notrace static void __cpuinit start_secondary(void *unused)
296 { 296 {
297 /* 297 /*
298 * Don't put *anything* before cpu_init(), SMP booting is too 298 * Don't put *anything* before cpu_init(), SMP booting is too
299 * fragile that we want to limit the things done here to the 299 * fragile that we want to limit the things done here to the
300 * most necessary things. 300 * most necessary things.
301 */ 301 */
302 302
303 #ifdef CONFIG_X86_32 303 #ifdef CONFIG_X86_32
304 /* 304 /*
305 * Switch away from the trampoline page-table 305 * Switch away from the trampoline page-table
306 * 306 *
307 * Do this before cpu_init() because it needs to access per-cpu 307 * Do this before cpu_init() because it needs to access per-cpu
308 * data which may not be mapped in the trampoline page-table. 308 * data which may not be mapped in the trampoline page-table.
309 */ 309 */
310 load_cr3(swapper_pg_dir); 310 load_cr3(swapper_pg_dir);
311 __flush_tlb_all(); 311 __flush_tlb_all();
312 #endif 312 #endif
313 313
314 vmi_bringup(); 314 vmi_bringup();
315 cpu_init(); 315 cpu_init();
316 preempt_disable(); 316 preempt_disable();
317 smp_callin(); 317 smp_callin();
318 318
319 /* otherwise gcc will move up smp_processor_id before the cpu_init */ 319 /* otherwise gcc will move up smp_processor_id before the cpu_init */
320 barrier(); 320 barrier();
321 /* 321 /*
322 * Check TSC synchronization with the BP: 322 * Check TSC synchronization with the BP:
323 */ 323 */
324 check_tsc_sync_target(); 324 check_tsc_sync_target();
325 325
326 if (nmi_watchdog == NMI_IO_APIC) { 326 if (nmi_watchdog == NMI_IO_APIC) {
327 legacy_pic->chip->mask(0); 327 legacy_pic->mask(0);
328 enable_NMI_through_LVT0(); 328 enable_NMI_through_LVT0();
329 legacy_pic->chip->unmask(0); 329 legacy_pic->unmask(0);
330 } 330 }
331 331
332 /* This must be done before setting cpu_online_mask */ 332 /* This must be done before setting cpu_online_mask */
333 set_cpu_sibling_map(raw_smp_processor_id()); 333 set_cpu_sibling_map(raw_smp_processor_id());
334 wmb(); 334 wmb();
335 335
336 /* 336 /*
337 * We need to hold call_lock, so there is no inconsistency 337 * We need to hold call_lock, so there is no inconsistency
338 * between the time smp_call_function() determines number of 338 * between the time smp_call_function() determines number of
339 * IPI recipients, and the time when the determination is made 339 * IPI recipients, and the time when the determination is made
340 * for which cpus receive the IPI. Holding this 340 * for which cpus receive the IPI. Holding this
341 * lock helps us to not include this cpu in a currently in progress 341 * lock helps us to not include this cpu in a currently in progress
342 * smp_call_function(). 342 * smp_call_function().
343 * 343 *
344 * We need to hold vector_lock so there the set of online cpus 344 * We need to hold vector_lock so there the set of online cpus
345 * does not change while we are assigning vectors to cpus. Holding 345 * does not change while we are assigning vectors to cpus. Holding
346 * this lock ensures we don't half assign or remove an irq from a cpu. 346 * this lock ensures we don't half assign or remove an irq from a cpu.
347 */ 347 */
348 ipi_call_lock(); 348 ipi_call_lock();
349 lock_vector_lock(); 349 lock_vector_lock();
350 set_cpu_online(smp_processor_id(), true); 350 set_cpu_online(smp_processor_id(), true);
351 unlock_vector_lock(); 351 unlock_vector_lock();
352 ipi_call_unlock(); 352 ipi_call_unlock();
353 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 353 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
354 x86_platform.nmi_init(); 354 x86_platform.nmi_init();
355 355
356 /* enable local interrupts */ 356 /* enable local interrupts */
357 local_irq_enable(); 357 local_irq_enable();
358 358
359 /* to prevent fake stack check failure in clock setup */ 359 /* to prevent fake stack check failure in clock setup */
360 boot_init_stack_canary(); 360 boot_init_stack_canary();
361 361
362 x86_cpuinit.setup_percpu_clockev(); 362 x86_cpuinit.setup_percpu_clockev();
363 363
364 wmb(); 364 wmb();
365 cpu_idle(); 365 cpu_idle();
366 } 366 }
367 367
368 #ifdef CONFIG_CPUMASK_OFFSTACK 368 #ifdef CONFIG_CPUMASK_OFFSTACK
369 /* In this case, llc_shared_map is a pointer to a cpumask. */ 369 /* In this case, llc_shared_map is a pointer to a cpumask. */
370 static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst, 370 static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
371 const struct cpuinfo_x86 *src) 371 const struct cpuinfo_x86 *src)
372 { 372 {
373 struct cpumask *llc = dst->llc_shared_map; 373 struct cpumask *llc = dst->llc_shared_map;
374 *dst = *src; 374 *dst = *src;
375 dst->llc_shared_map = llc; 375 dst->llc_shared_map = llc;
376 } 376 }
377 #else 377 #else
378 static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst, 378 static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
379 const struct cpuinfo_x86 *src) 379 const struct cpuinfo_x86 *src)
380 { 380 {
381 *dst = *src; 381 *dst = *src;
382 } 382 }
383 #endif /* CONFIG_CPUMASK_OFFSTACK */ 383 #endif /* CONFIG_CPUMASK_OFFSTACK */
384 384
385 /* 385 /*
386 * The bootstrap kernel entry code has set these up. Save them for 386 * The bootstrap kernel entry code has set these up. Save them for
387 * a given CPU 387 * a given CPU
388 */ 388 */
389 389
390 void __cpuinit smp_store_cpu_info(int id) 390 void __cpuinit smp_store_cpu_info(int id)
391 { 391 {
392 struct cpuinfo_x86 *c = &cpu_data(id); 392 struct cpuinfo_x86 *c = &cpu_data(id);
393 393
394 copy_cpuinfo_x86(c, &boot_cpu_data); 394 copy_cpuinfo_x86(c, &boot_cpu_data);
395 c->cpu_index = id; 395 c->cpu_index = id;
396 if (id != 0) 396 if (id != 0)
397 identify_secondary_cpu(c); 397 identify_secondary_cpu(c);
398 } 398 }
399 399
400 400
401 void __cpuinit set_cpu_sibling_map(int cpu) 401 void __cpuinit set_cpu_sibling_map(int cpu)
402 { 402 {
403 int i; 403 int i;
404 struct cpuinfo_x86 *c = &cpu_data(cpu); 404 struct cpuinfo_x86 *c = &cpu_data(cpu);
405 405
406 cpumask_set_cpu(cpu, cpu_sibling_setup_mask); 406 cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
407 407
408 if (smp_num_siblings > 1) { 408 if (smp_num_siblings > 1) {
409 for_each_cpu(i, cpu_sibling_setup_mask) { 409 for_each_cpu(i, cpu_sibling_setup_mask) {
410 struct cpuinfo_x86 *o = &cpu_data(i); 410 struct cpuinfo_x86 *o = &cpu_data(i);
411 411
412 if (c->phys_proc_id == o->phys_proc_id && 412 if (c->phys_proc_id == o->phys_proc_id &&
413 c->cpu_core_id == o->cpu_core_id) { 413 c->cpu_core_id == o->cpu_core_id) {
414 cpumask_set_cpu(i, cpu_sibling_mask(cpu)); 414 cpumask_set_cpu(i, cpu_sibling_mask(cpu));
415 cpumask_set_cpu(cpu, cpu_sibling_mask(i)); 415 cpumask_set_cpu(cpu, cpu_sibling_mask(i));
416 cpumask_set_cpu(i, cpu_core_mask(cpu)); 416 cpumask_set_cpu(i, cpu_core_mask(cpu));
417 cpumask_set_cpu(cpu, cpu_core_mask(i)); 417 cpumask_set_cpu(cpu, cpu_core_mask(i));
418 cpumask_set_cpu(i, c->llc_shared_map); 418 cpumask_set_cpu(i, c->llc_shared_map);
419 cpumask_set_cpu(cpu, o->llc_shared_map); 419 cpumask_set_cpu(cpu, o->llc_shared_map);
420 } 420 }
421 } 421 }
422 } else { 422 } else {
423 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); 423 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
424 } 424 }
425 425
426 cpumask_set_cpu(cpu, c->llc_shared_map); 426 cpumask_set_cpu(cpu, c->llc_shared_map);
427 427
428 if (current_cpu_data.x86_max_cores == 1) { 428 if (current_cpu_data.x86_max_cores == 1) {
429 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); 429 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
430 c->booted_cores = 1; 430 c->booted_cores = 1;
431 return; 431 return;
432 } 432 }
433 433
434 for_each_cpu(i, cpu_sibling_setup_mask) { 434 for_each_cpu(i, cpu_sibling_setup_mask) {
435 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && 435 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
436 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { 436 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
437 cpumask_set_cpu(i, c->llc_shared_map); 437 cpumask_set_cpu(i, c->llc_shared_map);
438 cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map); 438 cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map);
439 } 439 }
440 if (c->phys_proc_id == cpu_data(i).phys_proc_id) { 440 if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
441 cpumask_set_cpu(i, cpu_core_mask(cpu)); 441 cpumask_set_cpu(i, cpu_core_mask(cpu));
442 cpumask_set_cpu(cpu, cpu_core_mask(i)); 442 cpumask_set_cpu(cpu, cpu_core_mask(i));
443 /* 443 /*
444 * Does this new cpu bringup a new core? 444 * Does this new cpu bringup a new core?
445 */ 445 */
446 if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) { 446 if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) {
447 /* 447 /*
448 * for each core in package, increment 448 * for each core in package, increment
449 * the booted_cores for this new cpu 449 * the booted_cores for this new cpu
450 */ 450 */
451 if (cpumask_first(cpu_sibling_mask(i)) == i) 451 if (cpumask_first(cpu_sibling_mask(i)) == i)
452 c->booted_cores++; 452 c->booted_cores++;
453 /* 453 /*
454 * increment the core count for all 454 * increment the core count for all
455 * the other cpus in this package 455 * the other cpus in this package
456 */ 456 */
457 if (i != cpu) 457 if (i != cpu)
458 cpu_data(i).booted_cores++; 458 cpu_data(i).booted_cores++;
459 } else if (i != cpu && !c->booted_cores) 459 } else if (i != cpu && !c->booted_cores)
460 c->booted_cores = cpu_data(i).booted_cores; 460 c->booted_cores = cpu_data(i).booted_cores;
461 } 461 }
462 } 462 }
463 } 463 }
464 464
465 /* maps the cpu to the sched domain representing multi-core */ 465 /* maps the cpu to the sched domain representing multi-core */
466 const struct cpumask *cpu_coregroup_mask(int cpu) 466 const struct cpumask *cpu_coregroup_mask(int cpu)
467 { 467 {
468 struct cpuinfo_x86 *c = &cpu_data(cpu); 468 struct cpuinfo_x86 *c = &cpu_data(cpu);
469 /* 469 /*
470 * For perf, we return last level cache shared map. 470 * For perf, we return last level cache shared map.
471 * And for power savings, we return cpu_core_map 471 * And for power savings, we return cpu_core_map
472 */ 472 */
473 if ((sched_mc_power_savings || sched_smt_power_savings) && 473 if ((sched_mc_power_savings || sched_smt_power_savings) &&
474 !(cpu_has(c, X86_FEATURE_AMD_DCM))) 474 !(cpu_has(c, X86_FEATURE_AMD_DCM)))
475 return cpu_core_mask(cpu); 475 return cpu_core_mask(cpu);
476 else 476 else
477 return c->llc_shared_map; 477 return c->llc_shared_map;
478 } 478 }
479 479
480 static void impress_friends(void) 480 static void impress_friends(void)
481 { 481 {
482 int cpu; 482 int cpu;
483 unsigned long bogosum = 0; 483 unsigned long bogosum = 0;
484 /* 484 /*
485 * Allow the user to impress friends. 485 * Allow the user to impress friends.
486 */ 486 */
487 pr_debug("Before bogomips.\n"); 487 pr_debug("Before bogomips.\n");
488 for_each_possible_cpu(cpu) 488 for_each_possible_cpu(cpu)
489 if (cpumask_test_cpu(cpu, cpu_callout_mask)) 489 if (cpumask_test_cpu(cpu, cpu_callout_mask))
490 bogosum += cpu_data(cpu).loops_per_jiffy; 490 bogosum += cpu_data(cpu).loops_per_jiffy;
491 printk(KERN_INFO 491 printk(KERN_INFO
492 "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", 492 "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
493 num_online_cpus(), 493 num_online_cpus(),
494 bogosum/(500000/HZ), 494 bogosum/(500000/HZ),
495 (bogosum/(5000/HZ))%100); 495 (bogosum/(5000/HZ))%100);
496 496
497 pr_debug("Before bogocount - setting activated=1.\n"); 497 pr_debug("Before bogocount - setting activated=1.\n");
498 } 498 }
499 499
500 void __inquire_remote_apic(int apicid) 500 void __inquire_remote_apic(int apicid)
501 { 501 {
502 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; 502 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
503 char *names[] = { "ID", "VERSION", "SPIV" }; 503 char *names[] = { "ID", "VERSION", "SPIV" };
504 int timeout; 504 int timeout;
505 u32 status; 505 u32 status;
506 506
507 printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid); 507 printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid);
508 508
509 for (i = 0; i < ARRAY_SIZE(regs); i++) { 509 for (i = 0; i < ARRAY_SIZE(regs); i++) {
510 printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]); 510 printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]);
511 511
512 /* 512 /*
513 * Wait for idle. 513 * Wait for idle.
514 */ 514 */
515 status = safe_apic_wait_icr_idle(); 515 status = safe_apic_wait_icr_idle();
516 if (status) 516 if (status)
517 printk(KERN_CONT 517 printk(KERN_CONT
518 "a previous APIC delivery may have failed\n"); 518 "a previous APIC delivery may have failed\n");
519 519
520 apic_icr_write(APIC_DM_REMRD | regs[i], apicid); 520 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
521 521
522 timeout = 0; 522 timeout = 0;
523 do { 523 do {
524 udelay(100); 524 udelay(100);
525 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; 525 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
526 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); 526 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
527 527
528 switch (status) { 528 switch (status) {
529 case APIC_ICR_RR_VALID: 529 case APIC_ICR_RR_VALID:
530 status = apic_read(APIC_RRR); 530 status = apic_read(APIC_RRR);
531 printk(KERN_CONT "%08x\n", status); 531 printk(KERN_CONT "%08x\n", status);
532 break; 532 break;
533 default: 533 default:
534 printk(KERN_CONT "failed\n"); 534 printk(KERN_CONT "failed\n");
535 } 535 }
536 } 536 }
537 } 537 }
538 538
539 /* 539 /*
540 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal 540 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
541 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this 541 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
542 * won't ... remember to clear down the APIC, etc later. 542 * won't ... remember to clear down the APIC, etc later.
543 */ 543 */
544 int __cpuinit 544 int __cpuinit
545 wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) 545 wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
546 { 546 {
547 unsigned long send_status, accept_status = 0; 547 unsigned long send_status, accept_status = 0;
548 int maxlvt; 548 int maxlvt;
549 549
550 /* Target chip */ 550 /* Target chip */
551 /* Boot on the stack */ 551 /* Boot on the stack */
552 /* Kick the second */ 552 /* Kick the second */
553 apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid); 553 apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid);
554 554
555 pr_debug("Waiting for send to finish...\n"); 555 pr_debug("Waiting for send to finish...\n");
556 send_status = safe_apic_wait_icr_idle(); 556 send_status = safe_apic_wait_icr_idle();
557 557
558 /* 558 /*
559 * Give the other CPU some time to accept the IPI. 559 * Give the other CPU some time to accept the IPI.
560 */ 560 */
561 udelay(200); 561 udelay(200);
562 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { 562 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
563 maxlvt = lapic_get_maxlvt(); 563 maxlvt = lapic_get_maxlvt();
564 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 564 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
565 apic_write(APIC_ESR, 0); 565 apic_write(APIC_ESR, 0);
566 accept_status = (apic_read(APIC_ESR) & 0xEF); 566 accept_status = (apic_read(APIC_ESR) & 0xEF);
567 } 567 }
568 pr_debug("NMI sent.\n"); 568 pr_debug("NMI sent.\n");
569 569
570 if (send_status) 570 if (send_status)
571 printk(KERN_ERR "APIC never delivered???\n"); 571 printk(KERN_ERR "APIC never delivered???\n");
572 if (accept_status) 572 if (accept_status)
573 printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); 573 printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
574 574
575 return (send_status | accept_status); 575 return (send_status | accept_status);
576 } 576 }
577 577
578 static int __cpuinit 578 static int __cpuinit
579 wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) 579 wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
580 { 580 {
581 unsigned long send_status, accept_status = 0; 581 unsigned long send_status, accept_status = 0;
582 int maxlvt, num_starts, j; 582 int maxlvt, num_starts, j;
583 583
584 maxlvt = lapic_get_maxlvt(); 584 maxlvt = lapic_get_maxlvt();
585 585
586 /* 586 /*
587 * Be paranoid about clearing APIC errors. 587 * Be paranoid about clearing APIC errors.
588 */ 588 */
589 if (APIC_INTEGRATED(apic_version[phys_apicid])) { 589 if (APIC_INTEGRATED(apic_version[phys_apicid])) {
590 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 590 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
591 apic_write(APIC_ESR, 0); 591 apic_write(APIC_ESR, 0);
592 apic_read(APIC_ESR); 592 apic_read(APIC_ESR);
593 } 593 }
594 594
595 pr_debug("Asserting INIT.\n"); 595 pr_debug("Asserting INIT.\n");
596 596
597 /* 597 /*
598 * Turn INIT on target chip 598 * Turn INIT on target chip
599 */ 599 */
600 /* 600 /*
601 * Send IPI 601 * Send IPI
602 */ 602 */
603 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, 603 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
604 phys_apicid); 604 phys_apicid);
605 605
606 pr_debug("Waiting for send to finish...\n"); 606 pr_debug("Waiting for send to finish...\n");
607 send_status = safe_apic_wait_icr_idle(); 607 send_status = safe_apic_wait_icr_idle();
608 608
609 mdelay(10); 609 mdelay(10);
610 610
611 pr_debug("Deasserting INIT.\n"); 611 pr_debug("Deasserting INIT.\n");
612 612
613 /* Target chip */ 613 /* Target chip */
614 /* Send IPI */ 614 /* Send IPI */
615 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); 615 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
616 616
617 pr_debug("Waiting for send to finish...\n"); 617 pr_debug("Waiting for send to finish...\n");
618 send_status = safe_apic_wait_icr_idle(); 618 send_status = safe_apic_wait_icr_idle();
619 619
620 mb(); 620 mb();
621 atomic_set(&init_deasserted, 1); 621 atomic_set(&init_deasserted, 1);
622 622
623 /* 623 /*
624 * Should we send STARTUP IPIs ? 624 * Should we send STARTUP IPIs ?
625 * 625 *
626 * Determine this based on the APIC version. 626 * Determine this based on the APIC version.
627 * If we don't have an integrated APIC, don't send the STARTUP IPIs. 627 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
628 */ 628 */
629 if (APIC_INTEGRATED(apic_version[phys_apicid])) 629 if (APIC_INTEGRATED(apic_version[phys_apicid]))
630 num_starts = 2; 630 num_starts = 2;
631 else 631 else
632 num_starts = 0; 632 num_starts = 0;
633 633
634 /* 634 /*
635 * Paravirt / VMI wants a startup IPI hook here to set up the 635 * Paravirt / VMI wants a startup IPI hook here to set up the
636 * target processor state. 636 * target processor state.
637 */ 637 */
638 startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, 638 startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
639 (unsigned long)stack_start.sp); 639 (unsigned long)stack_start.sp);
640 640
641 /* 641 /*
642 * Run STARTUP IPI loop. 642 * Run STARTUP IPI loop.
643 */ 643 */
644 pr_debug("#startup loops: %d.\n", num_starts); 644 pr_debug("#startup loops: %d.\n", num_starts);
645 645
646 for (j = 1; j <= num_starts; j++) { 646 for (j = 1; j <= num_starts; j++) {
647 pr_debug("Sending STARTUP #%d.\n", j); 647 pr_debug("Sending STARTUP #%d.\n", j);
648 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 648 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
649 apic_write(APIC_ESR, 0); 649 apic_write(APIC_ESR, 0);
650 apic_read(APIC_ESR); 650 apic_read(APIC_ESR);
651 pr_debug("After apic_write.\n"); 651 pr_debug("After apic_write.\n");
652 652
653 /* 653 /*
654 * STARTUP IPI 654 * STARTUP IPI
655 */ 655 */
656 656
657 /* Target chip */ 657 /* Target chip */
658 /* Boot on the stack */ 658 /* Boot on the stack */
659 /* Kick the second */ 659 /* Kick the second */
660 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), 660 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
661 phys_apicid); 661 phys_apicid);
662 662
663 /* 663 /*
664 * Give the other CPU some time to accept the IPI. 664 * Give the other CPU some time to accept the IPI.
665 */ 665 */
666 udelay(300); 666 udelay(300);
667 667
668 pr_debug("Startup point 1.\n"); 668 pr_debug("Startup point 1.\n");
669 669
670 pr_debug("Waiting for send to finish...\n"); 670 pr_debug("Waiting for send to finish...\n");
671 send_status = safe_apic_wait_icr_idle(); 671 send_status = safe_apic_wait_icr_idle();
672 672
673 /* 673 /*
674 * Give the other CPU some time to accept the IPI. 674 * Give the other CPU some time to accept the IPI.
675 */ 675 */
676 udelay(200); 676 udelay(200);
677 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 677 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
678 apic_write(APIC_ESR, 0); 678 apic_write(APIC_ESR, 0);
679 accept_status = (apic_read(APIC_ESR) & 0xEF); 679 accept_status = (apic_read(APIC_ESR) & 0xEF);
680 if (send_status || accept_status) 680 if (send_status || accept_status)
681 break; 681 break;
682 } 682 }
683 pr_debug("After Startup.\n"); 683 pr_debug("After Startup.\n");
684 684
685 if (send_status) 685 if (send_status)
686 printk(KERN_ERR "APIC never delivered???\n"); 686 printk(KERN_ERR "APIC never delivered???\n");
687 if (accept_status) 687 if (accept_status)
688 printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); 688 printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
689 689
690 return (send_status | accept_status); 690 return (send_status | accept_status);
691 } 691 }
692 692
693 struct create_idle { 693 struct create_idle {
694 struct work_struct work; 694 struct work_struct work;
695 struct task_struct *idle; 695 struct task_struct *idle;
696 struct completion done; 696 struct completion done;
697 int cpu; 697 int cpu;
698 }; 698 };
699 699
700 static void __cpuinit do_fork_idle(struct work_struct *work) 700 static void __cpuinit do_fork_idle(struct work_struct *work)
701 { 701 {
702 struct create_idle *c_idle = 702 struct create_idle *c_idle =
703 container_of(work, struct create_idle, work); 703 container_of(work, struct create_idle, work);
704 704
705 c_idle->idle = fork_idle(c_idle->cpu); 705 c_idle->idle = fork_idle(c_idle->cpu);
706 complete(&c_idle->done); 706 complete(&c_idle->done);
707 } 707 }
708 708
709 /* reduce the number of lines printed when booting a large cpu count system */ 709 /* reduce the number of lines printed when booting a large cpu count system */
710 static void __cpuinit announce_cpu(int cpu, int apicid) 710 static void __cpuinit announce_cpu(int cpu, int apicid)
711 { 711 {
712 static int current_node = -1; 712 static int current_node = -1;
713 int node = early_cpu_to_node(cpu); 713 int node = early_cpu_to_node(cpu);
714 714
715 if (system_state == SYSTEM_BOOTING) { 715 if (system_state == SYSTEM_BOOTING) {
716 if (node != current_node) { 716 if (node != current_node) {
717 if (current_node > (-1)) 717 if (current_node > (-1))
718 pr_cont(" Ok.\n"); 718 pr_cont(" Ok.\n");
719 current_node = node; 719 current_node = node;
720 pr_info("Booting Node %3d, Processors ", node); 720 pr_info("Booting Node %3d, Processors ", node);
721 } 721 }
722 pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : ""); 722 pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : "");
723 return; 723 return;
724 } else 724 } else
725 pr_info("Booting Node %d Processor %d APIC 0x%x\n", 725 pr_info("Booting Node %d Processor %d APIC 0x%x\n",
726 node, cpu, apicid); 726 node, cpu, apicid);
727 } 727 }
728 728
729 /* 729 /*
730 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 730 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
731 * (ie clustered apic addressing mode), this is a LOGICAL apic ID. 731 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
732 * Returns zero if CPU booted OK, else error code from 732 * Returns zero if CPU booted OK, else error code from
733 * ->wakeup_secondary_cpu. 733 * ->wakeup_secondary_cpu.
734 */ 734 */
735 static int __cpuinit do_boot_cpu(int apicid, int cpu) 735 static int __cpuinit do_boot_cpu(int apicid, int cpu)
736 { 736 {
737 unsigned long boot_error = 0; 737 unsigned long boot_error = 0;
738 unsigned long start_ip; 738 unsigned long start_ip;
739 int timeout; 739 int timeout;
740 struct create_idle c_idle = { 740 struct create_idle c_idle = {
741 .cpu = cpu, 741 .cpu = cpu,
742 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), 742 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
743 }; 743 };
744 744
745 INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle); 745 INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle);
746 746
747 alternatives_smp_switch(1); 747 alternatives_smp_switch(1);
748 748
749 c_idle.idle = get_idle_for_cpu(cpu); 749 c_idle.idle = get_idle_for_cpu(cpu);
750 750
751 /* 751 /*
752 * We can't use kernel_thread since we must avoid to 752 * We can't use kernel_thread since we must avoid to
753 * reschedule the child. 753 * reschedule the child.
754 */ 754 */
755 if (c_idle.idle) { 755 if (c_idle.idle) {
756 c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *) 756 c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *)
757 (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1); 757 (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1);
758 init_idle(c_idle.idle, cpu); 758 init_idle(c_idle.idle, cpu);
759 goto do_rest; 759 goto do_rest;
760 } 760 }
761 761
762 schedule_work(&c_idle.work); 762 schedule_work(&c_idle.work);
763 wait_for_completion(&c_idle.done); 763 wait_for_completion(&c_idle.done);
764 764
765 if (IS_ERR(c_idle.idle)) { 765 if (IS_ERR(c_idle.idle)) {
766 printk("failed fork for CPU %d\n", cpu); 766 printk("failed fork for CPU %d\n", cpu);
767 destroy_work_on_stack(&c_idle.work); 767 destroy_work_on_stack(&c_idle.work);
768 return PTR_ERR(c_idle.idle); 768 return PTR_ERR(c_idle.idle);
769 } 769 }
770 770
771 set_idle_for_cpu(cpu, c_idle.idle); 771 set_idle_for_cpu(cpu, c_idle.idle);
772 do_rest: 772 do_rest:
773 per_cpu(current_task, cpu) = c_idle.idle; 773 per_cpu(current_task, cpu) = c_idle.idle;
774 #ifdef CONFIG_X86_32 774 #ifdef CONFIG_X86_32
775 /* Stack for startup_32 can be just as for start_secondary onwards */ 775 /* Stack for startup_32 can be just as for start_secondary onwards */
776 irq_ctx_init(cpu); 776 irq_ctx_init(cpu);
777 initial_page_table = __pa(&trampoline_pg_dir); 777 initial_page_table = __pa(&trampoline_pg_dir);
778 #else 778 #else
779 clear_tsk_thread_flag(c_idle.idle, TIF_FORK); 779 clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
780 initial_gs = per_cpu_offset(cpu); 780 initial_gs = per_cpu_offset(cpu);
781 per_cpu(kernel_stack, cpu) = 781 per_cpu(kernel_stack, cpu) =
782 (unsigned long)task_stack_page(c_idle.idle) - 782 (unsigned long)task_stack_page(c_idle.idle) -
783 KERNEL_STACK_OFFSET + THREAD_SIZE; 783 KERNEL_STACK_OFFSET + THREAD_SIZE;
784 #endif 784 #endif
785 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 785 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
786 initial_code = (unsigned long)start_secondary; 786 initial_code = (unsigned long)start_secondary;
787 stack_start.sp = (void *) c_idle.idle->thread.sp; 787 stack_start.sp = (void *) c_idle.idle->thread.sp;
788 788
789 /* start_ip had better be page-aligned! */ 789 /* start_ip had better be page-aligned! */
790 start_ip = setup_trampoline(); 790 start_ip = setup_trampoline();
791 791
792 /* So we see what's up */ 792 /* So we see what's up */
793 announce_cpu(cpu, apicid); 793 announce_cpu(cpu, apicid);
794 794
795 /* 795 /*
796 * This grunge runs the startup process for 796 * This grunge runs the startup process for
797 * the targeted processor. 797 * the targeted processor.
798 */ 798 */
799 799
800 atomic_set(&init_deasserted, 0); 800 atomic_set(&init_deasserted, 0);
801 801
802 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { 802 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
803 803
804 pr_debug("Setting warm reset code and vector.\n"); 804 pr_debug("Setting warm reset code and vector.\n");
805 805
806 smpboot_setup_warm_reset_vector(start_ip); 806 smpboot_setup_warm_reset_vector(start_ip);
807 /* 807 /*
808 * Be paranoid about clearing APIC errors. 808 * Be paranoid about clearing APIC errors.
809 */ 809 */
810 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { 810 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
811 apic_write(APIC_ESR, 0); 811 apic_write(APIC_ESR, 0);
812 apic_read(APIC_ESR); 812 apic_read(APIC_ESR);
813 } 813 }
814 } 814 }
815 815
816 /* 816 /*
817 * Kick the secondary CPU. Use the method in the APIC driver 817 * Kick the secondary CPU. Use the method in the APIC driver
818 * if it's defined - or use an INIT boot APIC message otherwise: 818 * if it's defined - or use an INIT boot APIC message otherwise:
819 */ 819 */
820 if (apic->wakeup_secondary_cpu) 820 if (apic->wakeup_secondary_cpu)
821 boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); 821 boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
822 else 822 else
823 boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); 823 boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
824 824
825 if (!boot_error) { 825 if (!boot_error) {
826 /* 826 /*
827 * allow APs to start initializing. 827 * allow APs to start initializing.
828 */ 828 */
829 pr_debug("Before Callout %d.\n", cpu); 829 pr_debug("Before Callout %d.\n", cpu);
830 cpumask_set_cpu(cpu, cpu_callout_mask); 830 cpumask_set_cpu(cpu, cpu_callout_mask);
831 pr_debug("After Callout %d.\n", cpu); 831 pr_debug("After Callout %d.\n", cpu);
832 832
833 /* 833 /*
834 * Wait 5s total for a response 834 * Wait 5s total for a response
835 */ 835 */
836 for (timeout = 0; timeout < 50000; timeout++) { 836 for (timeout = 0; timeout < 50000; timeout++) {
837 if (cpumask_test_cpu(cpu, cpu_callin_mask)) 837 if (cpumask_test_cpu(cpu, cpu_callin_mask))
838 break; /* It has booted */ 838 break; /* It has booted */
839 udelay(100); 839 udelay(100);
840 /* 840 /*
841 * Allow other tasks to run while we wait for the 841 * Allow other tasks to run while we wait for the
842 * AP to come online. This also gives a chance 842 * AP to come online. This also gives a chance
843 * for the MTRR work(triggered by the AP coming online) 843 * for the MTRR work(triggered by the AP coming online)
844 * to be completed in the stop machine context. 844 * to be completed in the stop machine context.
845 */ 845 */
846 schedule(); 846 schedule();
847 } 847 }
848 848
849 if (cpumask_test_cpu(cpu, cpu_callin_mask)) 849 if (cpumask_test_cpu(cpu, cpu_callin_mask))
850 pr_debug("CPU%d: has booted.\n", cpu); 850 pr_debug("CPU%d: has booted.\n", cpu);
851 else { 851 else {
852 boot_error = 1; 852 boot_error = 1;
853 if (*((volatile unsigned char *)trampoline_base) 853 if (*((volatile unsigned char *)trampoline_base)
854 == 0xA5) 854 == 0xA5)
855 /* trampoline started but...? */ 855 /* trampoline started but...? */
856 pr_err("CPU%d: Stuck ??\n", cpu); 856 pr_err("CPU%d: Stuck ??\n", cpu);
857 else 857 else
858 /* trampoline code not run */ 858 /* trampoline code not run */
859 pr_err("CPU%d: Not responding.\n", cpu); 859 pr_err("CPU%d: Not responding.\n", cpu);
860 if (apic->inquire_remote_apic) 860 if (apic->inquire_remote_apic)
861 apic->inquire_remote_apic(apicid); 861 apic->inquire_remote_apic(apicid);
862 } 862 }
863 } 863 }
864 864
865 if (boot_error) { 865 if (boot_error) {
866 /* Try to put things back the way they were before ... */ 866 /* Try to put things back the way they were before ... */
867 numa_remove_cpu(cpu); /* was set by numa_add_cpu */ 867 numa_remove_cpu(cpu); /* was set by numa_add_cpu */
868 868
869 /* was set by do_boot_cpu() */ 869 /* was set by do_boot_cpu() */
870 cpumask_clear_cpu(cpu, cpu_callout_mask); 870 cpumask_clear_cpu(cpu, cpu_callout_mask);
871 871
872 /* was set by cpu_init() */ 872 /* was set by cpu_init() */
873 cpumask_clear_cpu(cpu, cpu_initialized_mask); 873 cpumask_clear_cpu(cpu, cpu_initialized_mask);
874 874
875 set_cpu_present(cpu, false); 875 set_cpu_present(cpu, false);
876 per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; 876 per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
877 } 877 }
878 878
879 /* mark "stuck" area as not stuck */ 879 /* mark "stuck" area as not stuck */
880 *((volatile unsigned long *)trampoline_base) = 0; 880 *((volatile unsigned long *)trampoline_base) = 0;
881 881
882 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { 882 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
883 /* 883 /*
884 * Cleanup possible dangling ends... 884 * Cleanup possible dangling ends...
885 */ 885 */
886 smpboot_restore_warm_reset_vector(); 886 smpboot_restore_warm_reset_vector();
887 } 887 }
888 888
889 destroy_work_on_stack(&c_idle.work); 889 destroy_work_on_stack(&c_idle.work);
890 return boot_error; 890 return boot_error;
891 } 891 }
892 892
893 int __cpuinit native_cpu_up(unsigned int cpu) 893 int __cpuinit native_cpu_up(unsigned int cpu)
894 { 894 {
895 int apicid = apic->cpu_present_to_apicid(cpu); 895 int apicid = apic->cpu_present_to_apicid(cpu);
896 unsigned long flags; 896 unsigned long flags;
897 int err; 897 int err;
898 898
899 WARN_ON(irqs_disabled()); 899 WARN_ON(irqs_disabled());
900 900
901 pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); 901 pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
902 902
903 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || 903 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
904 !physid_isset(apicid, phys_cpu_present_map)) { 904 !physid_isset(apicid, phys_cpu_present_map)) {
905 printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); 905 printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
906 return -EINVAL; 906 return -EINVAL;
907 } 907 }
908 908
909 /* 909 /*
910 * Already booted CPU? 910 * Already booted CPU?
911 */ 911 */
912 if (cpumask_test_cpu(cpu, cpu_callin_mask)) { 912 if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
913 pr_debug("do_boot_cpu %d Already started\n", cpu); 913 pr_debug("do_boot_cpu %d Already started\n", cpu);
914 return -ENOSYS; 914 return -ENOSYS;
915 } 915 }
916 916
917 /* 917 /*
918 * Save current MTRR state in case it was changed since early boot 918 * Save current MTRR state in case it was changed since early boot
919 * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: 919 * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
920 */ 920 */
921 mtrr_save_state(); 921 mtrr_save_state();
922 922
923 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 923 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
924 924
925 err = do_boot_cpu(apicid, cpu); 925 err = do_boot_cpu(apicid, cpu);
926 926
927 if (err) { 927 if (err) {
928 pr_debug("do_boot_cpu failed %d\n", err); 928 pr_debug("do_boot_cpu failed %d\n", err);
929 return -EIO; 929 return -EIO;
930 } 930 }
931 931
932 /* 932 /*
933 * Check TSC synchronization with the AP (keep irqs disabled 933 * Check TSC synchronization with the AP (keep irqs disabled
934 * while doing so): 934 * while doing so):
935 */ 935 */
936 local_irq_save(flags); 936 local_irq_save(flags);
937 check_tsc_sync_source(cpu); 937 check_tsc_sync_source(cpu);
938 local_irq_restore(flags); 938 local_irq_restore(flags);
939 939
940 while (!cpu_online(cpu)) { 940 while (!cpu_online(cpu)) {
941 cpu_relax(); 941 cpu_relax();
942 touch_nmi_watchdog(); 942 touch_nmi_watchdog();
943 } 943 }
944 944
945 return 0; 945 return 0;
946 } 946 }
947 947
948 /* 948 /*
949 * Fall back to non SMP mode after errors. 949 * Fall back to non SMP mode after errors.
950 * 950 *
951 * RED-PEN audit/test this more. I bet there is more state messed up here. 951 * RED-PEN audit/test this more. I bet there is more state messed up here.
952 */ 952 */
953 static __init void disable_smp(void) 953 static __init void disable_smp(void)
954 { 954 {
955 init_cpu_present(cpumask_of(0)); 955 init_cpu_present(cpumask_of(0));
956 init_cpu_possible(cpumask_of(0)); 956 init_cpu_possible(cpumask_of(0));
957 smpboot_clear_io_apic_irqs(); 957 smpboot_clear_io_apic_irqs();
958 958
959 if (smp_found_config) 959 if (smp_found_config)
960 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 960 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
961 else 961 else
962 physid_set_mask_of_physid(0, &phys_cpu_present_map); 962 physid_set_mask_of_physid(0, &phys_cpu_present_map);
963 map_cpu_to_logical_apicid(); 963 map_cpu_to_logical_apicid();
964 cpumask_set_cpu(0, cpu_sibling_mask(0)); 964 cpumask_set_cpu(0, cpu_sibling_mask(0));
965 cpumask_set_cpu(0, cpu_core_mask(0)); 965 cpumask_set_cpu(0, cpu_core_mask(0));
966 } 966 }
967 967
968 /* 968 /*
969 * Various sanity checks. 969 * Various sanity checks.
970 */ 970 */
971 static int __init smp_sanity_check(unsigned max_cpus) 971 static int __init smp_sanity_check(unsigned max_cpus)
972 { 972 {
973 preempt_disable(); 973 preempt_disable();
974 974
975 #if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32) 975 #if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32)
976 if (def_to_bigsmp && nr_cpu_ids > 8) { 976 if (def_to_bigsmp && nr_cpu_ids > 8) {
977 unsigned int cpu; 977 unsigned int cpu;
978 unsigned nr; 978 unsigned nr;
979 979
980 printk(KERN_WARNING 980 printk(KERN_WARNING
981 "More than 8 CPUs detected - skipping them.\n" 981 "More than 8 CPUs detected - skipping them.\n"
982 "Use CONFIG_X86_BIGSMP.\n"); 982 "Use CONFIG_X86_BIGSMP.\n");
983 983
984 nr = 0; 984 nr = 0;
985 for_each_present_cpu(cpu) { 985 for_each_present_cpu(cpu) {
986 if (nr >= 8) 986 if (nr >= 8)
987 set_cpu_present(cpu, false); 987 set_cpu_present(cpu, false);
988 nr++; 988 nr++;
989 } 989 }
990 990
991 nr = 0; 991 nr = 0;
992 for_each_possible_cpu(cpu) { 992 for_each_possible_cpu(cpu) {
993 if (nr >= 8) 993 if (nr >= 8)
994 set_cpu_possible(cpu, false); 994 set_cpu_possible(cpu, false);
995 nr++; 995 nr++;
996 } 996 }
997 997
998 nr_cpu_ids = 8; 998 nr_cpu_ids = 8;
999 } 999 }
1000 #endif 1000 #endif
1001 1001
1002 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { 1002 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
1003 printk(KERN_WARNING 1003 printk(KERN_WARNING
1004 "weird, boot CPU (#%d) not listed by the BIOS.\n", 1004 "weird, boot CPU (#%d) not listed by the BIOS.\n",
1005 hard_smp_processor_id()); 1005 hard_smp_processor_id());
1006 1006
1007 physid_set(hard_smp_processor_id(), phys_cpu_present_map); 1007 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1008 } 1008 }
1009 1009
1010 /* 1010 /*
1011 * If we couldn't find an SMP configuration at boot time, 1011 * If we couldn't find an SMP configuration at boot time,
1012 * get out of here now! 1012 * get out of here now!
1013 */ 1013 */
1014 if (!smp_found_config && !acpi_lapic) { 1014 if (!smp_found_config && !acpi_lapic) {
1015 preempt_enable(); 1015 preempt_enable();
1016 printk(KERN_NOTICE "SMP motherboard not detected.\n"); 1016 printk(KERN_NOTICE "SMP motherboard not detected.\n");
1017 disable_smp(); 1017 disable_smp();
1018 if (APIC_init_uniprocessor()) 1018 if (APIC_init_uniprocessor())
1019 printk(KERN_NOTICE "Local APIC not detected." 1019 printk(KERN_NOTICE "Local APIC not detected."
1020 " Using dummy APIC emulation.\n"); 1020 " Using dummy APIC emulation.\n");
1021 return -1; 1021 return -1;
1022 } 1022 }
1023 1023
1024 /* 1024 /*
1025 * Should not be necessary because the MP table should list the boot 1025 * Should not be necessary because the MP table should list the boot
1026 * CPU too, but we do it for the sake of robustness anyway. 1026 * CPU too, but we do it for the sake of robustness anyway.
1027 */ 1027 */
1028 if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { 1028 if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
1029 printk(KERN_NOTICE 1029 printk(KERN_NOTICE
1030 "weird, boot CPU (#%d) not listed by the BIOS.\n", 1030 "weird, boot CPU (#%d) not listed by the BIOS.\n",
1031 boot_cpu_physical_apicid); 1031 boot_cpu_physical_apicid);
1032 physid_set(hard_smp_processor_id(), phys_cpu_present_map); 1032 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1033 } 1033 }
1034 preempt_enable(); 1034 preempt_enable();
1035 1035
1036 /* 1036 /*
1037 * If we couldn't find a local APIC, then get out of here now! 1037 * If we couldn't find a local APIC, then get out of here now!
1038 */ 1038 */
1039 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && 1039 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
1040 !cpu_has_apic) { 1040 !cpu_has_apic) {
1041 if (!disable_apic) { 1041 if (!disable_apic) {
1042 pr_err("BIOS bug, local APIC #%d not detected!...\n", 1042 pr_err("BIOS bug, local APIC #%d not detected!...\n",
1043 boot_cpu_physical_apicid); 1043 boot_cpu_physical_apicid);
1044 pr_err("... forcing use of dummy APIC emulation." 1044 pr_err("... forcing use of dummy APIC emulation."
1045 "(tell your hw vendor)\n"); 1045 "(tell your hw vendor)\n");
1046 } 1046 }
1047 smpboot_clear_io_apic(); 1047 smpboot_clear_io_apic();
1048 arch_disable_smp_support(); 1048 arch_disable_smp_support();
1049 return -1; 1049 return -1;
1050 } 1050 }
1051 1051
1052 verify_local_APIC(); 1052 verify_local_APIC();
1053 1053
1054 /* 1054 /*
1055 * If SMP should be disabled, then really disable it! 1055 * If SMP should be disabled, then really disable it!
1056 */ 1056 */
1057 if (!max_cpus) { 1057 if (!max_cpus) {
1058 printk(KERN_INFO "SMP mode deactivated.\n"); 1058 printk(KERN_INFO "SMP mode deactivated.\n");
1059 smpboot_clear_io_apic(); 1059 smpboot_clear_io_apic();
1060 1060
1061 localise_nmi_watchdog(); 1061 localise_nmi_watchdog();
1062 1062
1063 connect_bsp_APIC(); 1063 connect_bsp_APIC();
1064 setup_local_APIC(); 1064 setup_local_APIC();
1065 end_local_APIC_setup(); 1065 end_local_APIC_setup();
1066 return -1; 1066 return -1;
1067 } 1067 }
1068 1068
1069 return 0; 1069 return 0;
1070 } 1070 }
1071 1071
1072 static void __init smp_cpu_index_default(void) 1072 static void __init smp_cpu_index_default(void)
1073 { 1073 {
1074 int i; 1074 int i;
1075 struct cpuinfo_x86 *c; 1075 struct cpuinfo_x86 *c;
1076 1076
1077 for_each_possible_cpu(i) { 1077 for_each_possible_cpu(i) {
1078 c = &cpu_data(i); 1078 c = &cpu_data(i);
1079 /* mark all to hotplug */ 1079 /* mark all to hotplug */
1080 c->cpu_index = nr_cpu_ids; 1080 c->cpu_index = nr_cpu_ids;
1081 } 1081 }
1082 } 1082 }
1083 1083
1084 /* 1084 /*
1085 * Prepare for SMP bootup. The MP table or ACPI has been read 1085 * Prepare for SMP bootup. The MP table or ACPI has been read
1086 * earlier. Just do some sanity checking here and enable APIC mode. 1086 * earlier. Just do some sanity checking here and enable APIC mode.
1087 */ 1087 */
1088 void __init native_smp_prepare_cpus(unsigned int max_cpus) 1088 void __init native_smp_prepare_cpus(unsigned int max_cpus)
1089 { 1089 {
1090 unsigned int i; 1090 unsigned int i;
1091 1091
1092 preempt_disable(); 1092 preempt_disable();
1093 smp_cpu_index_default(); 1093 smp_cpu_index_default();
1094 current_cpu_data = boot_cpu_data; 1094 current_cpu_data = boot_cpu_data;
1095 cpumask_copy(cpu_callin_mask, cpumask_of(0)); 1095 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1096 mb(); 1096 mb();
1097 /* 1097 /*
1098 * Setup boot CPU information 1098 * Setup boot CPU information
1099 */ 1099 */
1100 smp_store_cpu_info(0); /* Final full version of the data */ 1100 smp_store_cpu_info(0); /* Final full version of the data */
1101 #ifdef CONFIG_X86_32 1101 #ifdef CONFIG_X86_32
1102 boot_cpu_logical_apicid = logical_smp_processor_id(); 1102 boot_cpu_logical_apicid = logical_smp_processor_id();
1103 #endif 1103 #endif
1104 current_thread_info()->cpu = 0; /* needed? */ 1104 current_thread_info()->cpu = 0; /* needed? */
1105 for_each_possible_cpu(i) { 1105 for_each_possible_cpu(i) {
1106 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); 1106 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
1107 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); 1107 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
1108 zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); 1108 zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
1109 } 1109 }
1110 set_cpu_sibling_map(0); 1110 set_cpu_sibling_map(0);
1111 1111
1112 1112
1113 if (smp_sanity_check(max_cpus) < 0) { 1113 if (smp_sanity_check(max_cpus) < 0) {
1114 printk(KERN_INFO "SMP disabled\n"); 1114 printk(KERN_INFO "SMP disabled\n");
1115 disable_smp(); 1115 disable_smp();
1116 goto out; 1116 goto out;
1117 } 1117 }
1118 1118
1119 default_setup_apic_routing(); 1119 default_setup_apic_routing();
1120 1120
1121 preempt_disable(); 1121 preempt_disable();
1122 if (read_apic_id() != boot_cpu_physical_apicid) { 1122 if (read_apic_id() != boot_cpu_physical_apicid) {
1123 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1123 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1124 read_apic_id(), boot_cpu_physical_apicid); 1124 read_apic_id(), boot_cpu_physical_apicid);
1125 /* Or can we switch back to PIC here? */ 1125 /* Or can we switch back to PIC here? */
1126 } 1126 }
1127 preempt_enable(); 1127 preempt_enable();
1128 1128
1129 connect_bsp_APIC(); 1129 connect_bsp_APIC();
1130 1130
1131 /* 1131 /*
1132 * Switch from PIC to APIC mode. 1132 * Switch from PIC to APIC mode.
1133 */ 1133 */
1134 setup_local_APIC(); 1134 setup_local_APIC();
1135 1135
1136 /* 1136 /*
1137 * Enable IO APIC before setting up error vector 1137 * Enable IO APIC before setting up error vector
1138 */ 1138 */
1139 if (!skip_ioapic_setup && nr_ioapics) 1139 if (!skip_ioapic_setup && nr_ioapics)
1140 enable_IO_APIC(); 1140 enable_IO_APIC();
1141 1141
1142 end_local_APIC_setup(); 1142 end_local_APIC_setup();
1143 1143
1144 map_cpu_to_logical_apicid(); 1144 map_cpu_to_logical_apicid();
1145 1145
1146 if (apic->setup_portio_remap) 1146 if (apic->setup_portio_remap)
1147 apic->setup_portio_remap(); 1147 apic->setup_portio_remap();
1148 1148
1149 smpboot_setup_io_apic(); 1149 smpboot_setup_io_apic();
1150 /* 1150 /*
1151 * Set up local APIC timer on boot CPU. 1151 * Set up local APIC timer on boot CPU.
1152 */ 1152 */
1153 1153
1154 printk(KERN_INFO "CPU%d: ", 0); 1154 printk(KERN_INFO "CPU%d: ", 0);
1155 print_cpu_info(&cpu_data(0)); 1155 print_cpu_info(&cpu_data(0));
1156 x86_init.timers.setup_percpu_clockev(); 1156 x86_init.timers.setup_percpu_clockev();
1157 1157
1158 if (is_uv_system()) 1158 if (is_uv_system())
1159 uv_system_init(); 1159 uv_system_init();
1160 1160
1161 set_mtrr_aps_delayed_init(); 1161 set_mtrr_aps_delayed_init();
1162 out: 1162 out:
1163 preempt_enable(); 1163 preempt_enable();
1164 } 1164 }
1165 1165
1166 void arch_enable_nonboot_cpus_begin(void) 1166 void arch_enable_nonboot_cpus_begin(void)
1167 { 1167 {
1168 set_mtrr_aps_delayed_init(); 1168 set_mtrr_aps_delayed_init();
1169 } 1169 }
1170 1170
1171 void arch_enable_nonboot_cpus_end(void) 1171 void arch_enable_nonboot_cpus_end(void)
1172 { 1172 {
1173 mtrr_aps_init(); 1173 mtrr_aps_init();
1174 } 1174 }
1175 1175
1176 /* 1176 /*
1177 * Early setup to make printk work. 1177 * Early setup to make printk work.
1178 */ 1178 */
1179 void __init native_smp_prepare_boot_cpu(void) 1179 void __init native_smp_prepare_boot_cpu(void)
1180 { 1180 {
1181 int me = smp_processor_id(); 1181 int me = smp_processor_id();
1182 switch_to_new_gdt(me); 1182 switch_to_new_gdt(me);
1183 /* already set me in cpu_online_mask in boot_cpu_init() */ 1183 /* already set me in cpu_online_mask in boot_cpu_init() */
1184 cpumask_set_cpu(me, cpu_callout_mask); 1184 cpumask_set_cpu(me, cpu_callout_mask);
1185 per_cpu(cpu_state, me) = CPU_ONLINE; 1185 per_cpu(cpu_state, me) = CPU_ONLINE;
1186 } 1186 }
1187 1187
1188 void __init native_smp_cpus_done(unsigned int max_cpus) 1188 void __init native_smp_cpus_done(unsigned int max_cpus)
1189 { 1189 {
1190 pr_debug("Boot done.\n"); 1190 pr_debug("Boot done.\n");
1191 1191
1192 impress_friends(); 1192 impress_friends();
1193 #ifdef CONFIG_X86_IO_APIC 1193 #ifdef CONFIG_X86_IO_APIC
1194 setup_ioapic_dest(); 1194 setup_ioapic_dest();
1195 #endif 1195 #endif
1196 check_nmi_watchdog(); 1196 check_nmi_watchdog();
1197 mtrr_aps_init(); 1197 mtrr_aps_init();
1198 } 1198 }
1199 1199
1200 static int __initdata setup_possible_cpus = -1; 1200 static int __initdata setup_possible_cpus = -1;
1201 static int __init _setup_possible_cpus(char *str) 1201 static int __init _setup_possible_cpus(char *str)
1202 { 1202 {
1203 get_option(&str, &setup_possible_cpus); 1203 get_option(&str, &setup_possible_cpus);
1204 return 0; 1204 return 0;
1205 } 1205 }
1206 early_param("possible_cpus", _setup_possible_cpus); 1206 early_param("possible_cpus", _setup_possible_cpus);
1207 1207
1208 1208
1209 /* 1209 /*
1210 * cpu_possible_mask should be static, it cannot change as cpu's 1210 * cpu_possible_mask should be static, it cannot change as cpu's
1211 * are onlined, or offlined. The reason is per-cpu data-structures 1211 * are onlined, or offlined. The reason is per-cpu data-structures
1212 * are allocated by some modules at init time, and dont expect to 1212 * are allocated by some modules at init time, and dont expect to
1213 * do this dynamically on cpu arrival/departure. 1213 * do this dynamically on cpu arrival/departure.
1214 * cpu_present_mask on the other hand can change dynamically. 1214 * cpu_present_mask on the other hand can change dynamically.
1215 * In case when cpu_hotplug is not compiled, then we resort to current 1215 * In case when cpu_hotplug is not compiled, then we resort to current
1216 * behaviour, which is cpu_possible == cpu_present. 1216 * behaviour, which is cpu_possible == cpu_present.
1217 * - Ashok Raj 1217 * - Ashok Raj
1218 * 1218 *
1219 * Three ways to find out the number of additional hotplug CPUs: 1219 * Three ways to find out the number of additional hotplug CPUs:
1220 * - If the BIOS specified disabled CPUs in ACPI/mptables use that. 1220 * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
1221 * - The user can overwrite it with possible_cpus=NUM 1221 * - The user can overwrite it with possible_cpus=NUM
1222 * - Otherwise don't reserve additional CPUs. 1222 * - Otherwise don't reserve additional CPUs.
1223 * We do this because additional CPUs waste a lot of memory. 1223 * We do this because additional CPUs waste a lot of memory.
1224 * -AK 1224 * -AK
1225 */ 1225 */
1226 __init void prefill_possible_map(void) 1226 __init void prefill_possible_map(void)
1227 { 1227 {
1228 int i, possible; 1228 int i, possible;
1229 1229
1230 /* no processor from mptable or madt */ 1230 /* no processor from mptable or madt */
1231 if (!num_processors) 1231 if (!num_processors)
1232 num_processors = 1; 1232 num_processors = 1;
1233 1233
1234 i = setup_max_cpus ?: 1; 1234 i = setup_max_cpus ?: 1;
1235 if (setup_possible_cpus == -1) { 1235 if (setup_possible_cpus == -1) {
1236 possible = num_processors; 1236 possible = num_processors;
1237 #ifdef CONFIG_HOTPLUG_CPU 1237 #ifdef CONFIG_HOTPLUG_CPU
1238 if (setup_max_cpus) 1238 if (setup_max_cpus)
1239 possible += disabled_cpus; 1239 possible += disabled_cpus;
1240 #else 1240 #else
1241 if (possible > i) 1241 if (possible > i)
1242 possible = i; 1242 possible = i;
1243 #endif 1243 #endif
1244 } else 1244 } else
1245 possible = setup_possible_cpus; 1245 possible = setup_possible_cpus;
1246 1246
1247 total_cpus = max_t(int, possible, num_processors + disabled_cpus); 1247 total_cpus = max_t(int, possible, num_processors + disabled_cpus);
1248 1248
1249 /* nr_cpu_ids could be reduced via nr_cpus= */ 1249 /* nr_cpu_ids could be reduced via nr_cpus= */
1250 if (possible > nr_cpu_ids) { 1250 if (possible > nr_cpu_ids) {
1251 printk(KERN_WARNING 1251 printk(KERN_WARNING
1252 "%d Processors exceeds NR_CPUS limit of %d\n", 1252 "%d Processors exceeds NR_CPUS limit of %d\n",
1253 possible, nr_cpu_ids); 1253 possible, nr_cpu_ids);
1254 possible = nr_cpu_ids; 1254 possible = nr_cpu_ids;
1255 } 1255 }
1256 1256
1257 #ifdef CONFIG_HOTPLUG_CPU 1257 #ifdef CONFIG_HOTPLUG_CPU
1258 if (!setup_max_cpus) 1258 if (!setup_max_cpus)
1259 #endif 1259 #endif
1260 if (possible > i) { 1260 if (possible > i) {
1261 printk(KERN_WARNING 1261 printk(KERN_WARNING
1262 "%d Processors exceeds max_cpus limit of %u\n", 1262 "%d Processors exceeds max_cpus limit of %u\n",
1263 possible, setup_max_cpus); 1263 possible, setup_max_cpus);
1264 possible = i; 1264 possible = i;
1265 } 1265 }
1266 1266
1267 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", 1267 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
1268 possible, max_t(int, possible - num_processors, 0)); 1268 possible, max_t(int, possible - num_processors, 0));
1269 1269
1270 for (i = 0; i < possible; i++) 1270 for (i = 0; i < possible; i++)
1271 set_cpu_possible(i, true); 1271 set_cpu_possible(i, true);
1272 for (; i < NR_CPUS; i++) 1272 for (; i < NR_CPUS; i++)
1273 set_cpu_possible(i, false); 1273 set_cpu_possible(i, false);
1274 1274
1275 nr_cpu_ids = possible; 1275 nr_cpu_ids = possible;
1276 } 1276 }
1277 1277
1278 #ifdef CONFIG_HOTPLUG_CPU 1278 #ifdef CONFIG_HOTPLUG_CPU
1279 1279
1280 static void remove_siblinginfo(int cpu) 1280 static void remove_siblinginfo(int cpu)
1281 { 1281 {
1282 int sibling; 1282 int sibling;
1283 struct cpuinfo_x86 *c = &cpu_data(cpu); 1283 struct cpuinfo_x86 *c = &cpu_data(cpu);
1284 1284
1285 for_each_cpu(sibling, cpu_core_mask(cpu)) { 1285 for_each_cpu(sibling, cpu_core_mask(cpu)) {
1286 cpumask_clear_cpu(cpu, cpu_core_mask(sibling)); 1286 cpumask_clear_cpu(cpu, cpu_core_mask(sibling));
1287 /*/ 1287 /*/
1288 * last thread sibling in this cpu core going down 1288 * last thread sibling in this cpu core going down
1289 */ 1289 */
1290 if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) 1290 if (cpumask_weight(cpu_sibling_mask(cpu)) == 1)
1291 cpu_data(sibling).booted_cores--; 1291 cpu_data(sibling).booted_cores--;
1292 } 1292 }
1293 1293
1294 for_each_cpu(sibling, cpu_sibling_mask(cpu)) 1294 for_each_cpu(sibling, cpu_sibling_mask(cpu))
1295 cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling)); 1295 cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling));
1296 cpumask_clear(cpu_sibling_mask(cpu)); 1296 cpumask_clear(cpu_sibling_mask(cpu));
1297 cpumask_clear(cpu_core_mask(cpu)); 1297 cpumask_clear(cpu_core_mask(cpu));
1298 c->phys_proc_id = 0; 1298 c->phys_proc_id = 0;
1299 c->cpu_core_id = 0; 1299 c->cpu_core_id = 0;
1300 cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); 1300 cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
1301 } 1301 }
1302 1302
1303 static void __ref remove_cpu_from_maps(int cpu) 1303 static void __ref remove_cpu_from_maps(int cpu)
1304 { 1304 {
1305 set_cpu_online(cpu, false); 1305 set_cpu_online(cpu, false);
1306 cpumask_clear_cpu(cpu, cpu_callout_mask); 1306 cpumask_clear_cpu(cpu, cpu_callout_mask);
1307 cpumask_clear_cpu(cpu, cpu_callin_mask); 1307 cpumask_clear_cpu(cpu, cpu_callin_mask);
1308 /* was set by cpu_init() */ 1308 /* was set by cpu_init() */
1309 cpumask_clear_cpu(cpu, cpu_initialized_mask); 1309 cpumask_clear_cpu(cpu, cpu_initialized_mask);
1310 numa_remove_cpu(cpu); 1310 numa_remove_cpu(cpu);
1311 } 1311 }
1312 1312
1313 void cpu_disable_common(void) 1313 void cpu_disable_common(void)
1314 { 1314 {
1315 int cpu = smp_processor_id(); 1315 int cpu = smp_processor_id();
1316 1316
1317 remove_siblinginfo(cpu); 1317 remove_siblinginfo(cpu);
1318 1318
1319 /* It's now safe to remove this processor from the online map */ 1319 /* It's now safe to remove this processor from the online map */
1320 lock_vector_lock(); 1320 lock_vector_lock();
1321 remove_cpu_from_maps(cpu); 1321 remove_cpu_from_maps(cpu);
1322 unlock_vector_lock(); 1322 unlock_vector_lock();
1323 fixup_irqs(); 1323 fixup_irqs();
1324 } 1324 }
1325 1325
1326 int native_cpu_disable(void) 1326 int native_cpu_disable(void)
1327 { 1327 {
1328 int cpu = smp_processor_id(); 1328 int cpu = smp_processor_id();
1329 1329
1330 /* 1330 /*
1331 * Perhaps use cpufreq to drop frequency, but that could go 1331 * Perhaps use cpufreq to drop frequency, but that could go
1332 * into generic code. 1332 * into generic code.
1333 * 1333 *
1334 * We won't take down the boot processor on i386 due to some 1334 * We won't take down the boot processor on i386 due to some
1335 * interrupts only being able to be serviced by the BSP. 1335 * interrupts only being able to be serviced by the BSP.
1336 * Especially so if we're not using an IOAPIC -zwane 1336 * Especially so if we're not using an IOAPIC -zwane
1337 */ 1337 */
1338 if (cpu == 0) 1338 if (cpu == 0)
1339 return -EBUSY; 1339 return -EBUSY;
1340 1340
1341 if (nmi_watchdog == NMI_LOCAL_APIC) 1341 if (nmi_watchdog == NMI_LOCAL_APIC)
1342 stop_apic_nmi_watchdog(NULL); 1342 stop_apic_nmi_watchdog(NULL);
1343 clear_local_APIC(); 1343 clear_local_APIC();
1344 1344
1345 cpu_disable_common(); 1345 cpu_disable_common();
1346 return 0; 1346 return 0;
1347 } 1347 }
1348 1348
1349 void native_cpu_die(unsigned int cpu) 1349 void native_cpu_die(unsigned int cpu)
1350 { 1350 {
1351 /* We don't do anything here: idle task is faking death itself. */ 1351 /* We don't do anything here: idle task is faking death itself. */
1352 unsigned int i; 1352 unsigned int i;
1353 1353
1354 for (i = 0; i < 10; i++) { 1354 for (i = 0; i < 10; i++) {
1355 /* They ack this in play_dead by setting CPU_DEAD */ 1355 /* They ack this in play_dead by setting CPU_DEAD */
1356 if (per_cpu(cpu_state, cpu) == CPU_DEAD) { 1356 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
1357 if (system_state == SYSTEM_RUNNING) 1357 if (system_state == SYSTEM_RUNNING)
1358 pr_info("CPU %u is now offline\n", cpu); 1358 pr_info("CPU %u is now offline\n", cpu);
1359 1359
1360 if (1 == num_online_cpus()) 1360 if (1 == num_online_cpus())
1361 alternatives_smp_switch(0); 1361 alternatives_smp_switch(0);
1362 return; 1362 return;
1363 } 1363 }
1364 msleep(100); 1364 msleep(100);
1365 } 1365 }
1366 pr_err("CPU %u didn't die...\n", cpu); 1366 pr_err("CPU %u didn't die...\n", cpu);
1367 } 1367 }
1368 1368
1369 void play_dead_common(void) 1369 void play_dead_common(void)
1370 { 1370 {
1371 idle_task_exit(); 1371 idle_task_exit();
1372 reset_lazy_tlbstate(); 1372 reset_lazy_tlbstate();
1373 irq_ctx_exit(raw_smp_processor_id()); 1373 irq_ctx_exit(raw_smp_processor_id());
1374 c1e_remove_cpu(raw_smp_processor_id()); 1374 c1e_remove_cpu(raw_smp_processor_id());
1375 1375
1376 mb(); 1376 mb();
1377 /* Ack it */ 1377 /* Ack it */
1378 __get_cpu_var(cpu_state) = CPU_DEAD; 1378 __get_cpu_var(cpu_state) = CPU_DEAD;
1379 1379
1380 /* 1380 /*
1381 * With physical CPU hotplug, we should halt the cpu 1381 * With physical CPU hotplug, we should halt the cpu
1382 */ 1382 */
1383 local_irq_disable(); 1383 local_irq_disable();
1384 } 1384 }
1385 1385
1386 void native_play_dead(void) 1386 void native_play_dead(void)
1387 { 1387 {
1388 play_dead_common(); 1388 play_dead_common();
1389 tboot_shutdown(TB_SHUTDOWN_WFS); 1389 tboot_shutdown(TB_SHUTDOWN_WFS);
1390 wbinvd_halt(); 1390 wbinvd_halt();
1391 } 1391 }
1392 1392
1393 #else /* ... !CONFIG_HOTPLUG_CPU */ 1393 #else /* ... !CONFIG_HOTPLUG_CPU */
1394 int native_cpu_disable(void) 1394 int native_cpu_disable(void)
1395 { 1395 {
1396 return -ENOSYS; 1396 return -ENOSYS;
1397 } 1397 }
1398 1398
1399 void native_cpu_die(unsigned int cpu) 1399 void native_cpu_die(unsigned int cpu)
1400 { 1400 {
1401 /* We said "no" in __cpu_disable */ 1401 /* We said "no" in __cpu_disable */
1402 BUG(); 1402 BUG();
1403 } 1403 }
1404 1404
1405 void native_play_dead(void) 1405 void native_play_dead(void)
1406 { 1406 {
1407 BUG(); 1407 BUG();
1408 } 1408 }
1409 1409
1410 #endif 1410 #endif
1411 1411