Commit df79efde82952edc653fa6eb1338a82b87aa0585
Committed by
Linus Torvalds
1 parent
05b3cbd8bb
Exists in
master
and in
7 other branches
[PATCH] x86_64: Node local pda take 2 -- cpu_pda preparation
Helper patch to change cpu_pda users to use macros to access cpu_pda instead of the cpu_pda[] array. Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org> Signed-off-by: Shai Fultheim <shai@scalex86.org> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 9 changed files with 21 additions and 20 deletions Inline Diff
arch/x86_64/kernel/irq.c
1 | /* | 1 | /* |
2 | * linux/arch/x86_64/kernel/irq.c | 2 | * linux/arch/x86_64/kernel/irq.c |
3 | * | 3 | * |
4 | * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar | 4 | * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar |
5 | * | 5 | * |
6 | * This file contains the lowest level x86_64-specific interrupt | 6 | * This file contains the lowest level x86_64-specific interrupt |
7 | * entry and irq statistics code. All the remaining irq logic is | 7 | * entry and irq statistics code. All the remaining irq logic is |
8 | * done by the generic kernel/irq/ code and in the | 8 | * done by the generic kernel/irq/ code and in the |
9 | * x86_64-specific irq controller code. (e.g. i8259.c and | 9 | * x86_64-specific irq controller code. (e.g. i8259.c and |
10 | * io_apic.c.) | 10 | * io_apic.c.) |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/kernel_stat.h> | 13 | #include <linux/kernel_stat.h> |
14 | #include <linux/interrupt.h> | 14 | #include <linux/interrupt.h> |
15 | #include <linux/seq_file.h> | 15 | #include <linux/seq_file.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/delay.h> | 17 | #include <linux/delay.h> |
18 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
19 | #include <asm/io_apic.h> | 19 | #include <asm/io_apic.h> |
20 | #include <asm/idle.h> | 20 | #include <asm/idle.h> |
21 | 21 | ||
22 | atomic_t irq_err_count; | 22 | atomic_t irq_err_count; |
23 | #ifdef CONFIG_X86_IO_APIC | 23 | #ifdef CONFIG_X86_IO_APIC |
24 | #ifdef APIC_MISMATCH_DEBUG | 24 | #ifdef APIC_MISMATCH_DEBUG |
25 | atomic_t irq_mis_count; | 25 | atomic_t irq_mis_count; |
26 | #endif | 26 | #endif |
27 | #endif | 27 | #endif |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * Generic, controller-independent functions: | 30 | * Generic, controller-independent functions: |
31 | */ | 31 | */ |
32 | 32 | ||
33 | int show_interrupts(struct seq_file *p, void *v) | 33 | int show_interrupts(struct seq_file *p, void *v) |
34 | { | 34 | { |
35 | int i = *(loff_t *) v, j; | 35 | int i = *(loff_t *) v, j; |
36 | struct irqaction * action; | 36 | struct irqaction * action; |
37 | unsigned long flags; | 37 | unsigned long flags; |
38 | 38 | ||
39 | if (i == 0) { | 39 | if (i == 0) { |
40 | seq_printf(p, " "); | 40 | seq_printf(p, " "); |
41 | for (j=0; j<NR_CPUS; j++) | 41 | for (j=0; j<NR_CPUS; j++) |
42 | if (cpu_online(j)) | 42 | if (cpu_online(j)) |
43 | seq_printf(p, "CPU%d ",j); | 43 | seq_printf(p, "CPU%d ",j); |
44 | seq_putc(p, '\n'); | 44 | seq_putc(p, '\n'); |
45 | } | 45 | } |
46 | 46 | ||
47 | if (i < NR_IRQS) { | 47 | if (i < NR_IRQS) { |
48 | spin_lock_irqsave(&irq_desc[i].lock, flags); | 48 | spin_lock_irqsave(&irq_desc[i].lock, flags); |
49 | action = irq_desc[i].action; | 49 | action = irq_desc[i].action; |
50 | if (!action) | 50 | if (!action) |
51 | goto skip; | 51 | goto skip; |
52 | seq_printf(p, "%3d: ",i); | 52 | seq_printf(p, "%3d: ",i); |
53 | #ifndef CONFIG_SMP | 53 | #ifndef CONFIG_SMP |
54 | seq_printf(p, "%10u ", kstat_irqs(i)); | 54 | seq_printf(p, "%10u ", kstat_irqs(i)); |
55 | #else | 55 | #else |
56 | for (j=0; j<NR_CPUS; j++) | 56 | for (j=0; j<NR_CPUS; j++) |
57 | if (cpu_online(j)) | 57 | if (cpu_online(j)) |
58 | seq_printf(p, "%10u ", | 58 | seq_printf(p, "%10u ", |
59 | kstat_cpu(j).irqs[i]); | 59 | kstat_cpu(j).irqs[i]); |
60 | #endif | 60 | #endif |
61 | seq_printf(p, " %14s", irq_desc[i].handler->typename); | 61 | seq_printf(p, " %14s", irq_desc[i].handler->typename); |
62 | 62 | ||
63 | seq_printf(p, " %s", action->name); | 63 | seq_printf(p, " %s", action->name); |
64 | for (action=action->next; action; action = action->next) | 64 | for (action=action->next; action; action = action->next) |
65 | seq_printf(p, ", %s", action->name); | 65 | seq_printf(p, ", %s", action->name); |
66 | seq_putc(p, '\n'); | 66 | seq_putc(p, '\n'); |
67 | skip: | 67 | skip: |
68 | spin_unlock_irqrestore(&irq_desc[i].lock, flags); | 68 | spin_unlock_irqrestore(&irq_desc[i].lock, flags); |
69 | } else if (i == NR_IRQS) { | 69 | } else if (i == NR_IRQS) { |
70 | seq_printf(p, "NMI: "); | 70 | seq_printf(p, "NMI: "); |
71 | for (j = 0; j < NR_CPUS; j++) | 71 | for (j = 0; j < NR_CPUS; j++) |
72 | if (cpu_online(j)) | 72 | if (cpu_online(j)) |
73 | seq_printf(p, "%10u ", cpu_pda[j].__nmi_count); | 73 | seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); |
74 | seq_putc(p, '\n'); | 74 | seq_putc(p, '\n'); |
75 | #ifdef CONFIG_X86_LOCAL_APIC | 75 | #ifdef CONFIG_X86_LOCAL_APIC |
76 | seq_printf(p, "LOC: "); | 76 | seq_printf(p, "LOC: "); |
77 | for (j = 0; j < NR_CPUS; j++) | 77 | for (j = 0; j < NR_CPUS; j++) |
78 | if (cpu_online(j)) | 78 | if (cpu_online(j)) |
79 | seq_printf(p, "%10u ", cpu_pda[j].apic_timer_irqs); | 79 | seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); |
80 | seq_putc(p, '\n'); | 80 | seq_putc(p, '\n'); |
81 | #endif | 81 | #endif |
82 | seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); | 82 | seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); |
83 | #ifdef CONFIG_X86_IO_APIC | 83 | #ifdef CONFIG_X86_IO_APIC |
84 | #ifdef APIC_MISMATCH_DEBUG | 84 | #ifdef APIC_MISMATCH_DEBUG |
85 | seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); | 85 | seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); |
86 | #endif | 86 | #endif |
87 | #endif | 87 | #endif |
88 | } | 88 | } |
89 | return 0; | 89 | return 0; |
90 | } | 90 | } |
91 | 91 | ||
92 | /* | 92 | /* |
93 | * do_IRQ handles all normal device IRQ's (the special | 93 | * do_IRQ handles all normal device IRQ's (the special |
94 | * SMP cross-CPU interrupts have their own specific | 94 | * SMP cross-CPU interrupts have their own specific |
95 | * handlers). | 95 | * handlers). |
96 | */ | 96 | */ |
97 | asmlinkage unsigned int do_IRQ(struct pt_regs *regs) | 97 | asmlinkage unsigned int do_IRQ(struct pt_regs *regs) |
98 | { | 98 | { |
99 | /* high bits used in ret_from_ code */ | 99 | /* high bits used in ret_from_ code */ |
100 | unsigned irq = regs->orig_rax & 0xff; | 100 | unsigned irq = regs->orig_rax & 0xff; |
101 | 101 | ||
102 | exit_idle(); | 102 | exit_idle(); |
103 | irq_enter(); | 103 | irq_enter(); |
104 | 104 | ||
105 | __do_IRQ(irq, regs); | 105 | __do_IRQ(irq, regs); |
106 | irq_exit(); | 106 | irq_exit(); |
107 | 107 | ||
108 | return 1; | 108 | return 1; |
109 | } | 109 | } |
110 | 110 | ||
111 | #ifdef CONFIG_HOTPLUG_CPU | 111 | #ifdef CONFIG_HOTPLUG_CPU |
112 | void fixup_irqs(cpumask_t map) | 112 | void fixup_irqs(cpumask_t map) |
113 | { | 113 | { |
114 | unsigned int irq; | 114 | unsigned int irq; |
115 | static int warned; | 115 | static int warned; |
116 | 116 | ||
117 | for (irq = 0; irq < NR_IRQS; irq++) { | 117 | for (irq = 0; irq < NR_IRQS; irq++) { |
118 | cpumask_t mask; | 118 | cpumask_t mask; |
119 | if (irq == 2) | 119 | if (irq == 2) |
120 | continue; | 120 | continue; |
121 | 121 | ||
122 | cpus_and(mask, irq_affinity[irq], map); | 122 | cpus_and(mask, irq_affinity[irq], map); |
123 | if (any_online_cpu(mask) == NR_CPUS) { | 123 | if (any_online_cpu(mask) == NR_CPUS) { |
124 | printk("Breaking affinity for irq %i\n", irq); | 124 | printk("Breaking affinity for irq %i\n", irq); |
125 | mask = map; | 125 | mask = map; |
126 | } | 126 | } |
127 | if (irq_desc[irq].handler->set_affinity) | 127 | if (irq_desc[irq].handler->set_affinity) |
128 | irq_desc[irq].handler->set_affinity(irq, mask); | 128 | irq_desc[irq].handler->set_affinity(irq, mask); |
129 | else if (irq_desc[irq].action && !(warned++)) | 129 | else if (irq_desc[irq].action && !(warned++)) |
130 | printk("Cannot set affinity for irq %i\n", irq); | 130 | printk("Cannot set affinity for irq %i\n", irq); |
131 | } | 131 | } |
132 | 132 | ||
133 | /* That doesn't seem sufficient. Give it 1ms. */ | 133 | /* That doesn't seem sufficient. Give it 1ms. */ |
134 | local_irq_enable(); | 134 | local_irq_enable(); |
135 | mdelay(1); | 135 | mdelay(1); |
136 | local_irq_disable(); | 136 | local_irq_disable(); |
137 | } | 137 | } |
138 | #endif | 138 | #endif |
139 | 139 | ||
140 | extern void call_softirq(void); | 140 | extern void call_softirq(void); |
141 | 141 | ||
142 | asmlinkage void do_softirq(void) | 142 | asmlinkage void do_softirq(void) |
143 | { | 143 | { |
144 | __u32 pending; | 144 | __u32 pending; |
145 | unsigned long flags; | 145 | unsigned long flags; |
146 | 146 | ||
147 | if (in_interrupt()) | 147 | if (in_interrupt()) |
148 | return; | 148 | return; |
149 | 149 | ||
150 | local_irq_save(flags); | 150 | local_irq_save(flags); |
151 | pending = local_softirq_pending(); | 151 | pending = local_softirq_pending(); |
152 | /* Switch to interrupt stack */ | 152 | /* Switch to interrupt stack */ |
153 | if (pending) | 153 | if (pending) |
154 | call_softirq(); | 154 | call_softirq(); |
155 | local_irq_restore(flags); | 155 | local_irq_restore(flags); |
156 | } | 156 | } |
157 | EXPORT_SYMBOL(do_softirq); | 157 | EXPORT_SYMBOL(do_softirq); |
158 | 158 |
arch/x86_64/kernel/nmi.c
1 | /* | 1 | /* |
2 | * linux/arch/x86_64/nmi.c | 2 | * linux/arch/x86_64/nmi.c |
3 | * | 3 | * |
4 | * NMI watchdog support on APIC systems | 4 | * NMI watchdog support on APIC systems |
5 | * | 5 | * |
6 | * Started by Ingo Molnar <mingo@redhat.com> | 6 | * Started by Ingo Molnar <mingo@redhat.com> |
7 | * | 7 | * |
8 | * Fixes: | 8 | * Fixes: |
9 | * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. | 9 | * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. |
10 | * Mikael Pettersson : Power Management for local APIC NMI watchdog. | 10 | * Mikael Pettersson : Power Management for local APIC NMI watchdog. |
11 | * Pavel Machek and | 11 | * Pavel Machek and |
12 | * Mikael Pettersson : PM converted to driver model. Disable/enable API. | 12 | * Mikael Pettersson : PM converted to driver model. Disable/enable API. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/config.h> | 15 | #include <linux/config.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/delay.h> | 17 | #include <linux/delay.h> |
18 | #include <linux/bootmem.h> | 18 | #include <linux/bootmem.h> |
19 | #include <linux/smp_lock.h> | 19 | #include <linux/smp_lock.h> |
20 | #include <linux/interrupt.h> | 20 | #include <linux/interrupt.h> |
21 | #include <linux/mc146818rtc.h> | 21 | #include <linux/mc146818rtc.h> |
22 | #include <linux/kernel_stat.h> | 22 | #include <linux/kernel_stat.h> |
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/sysdev.h> | 24 | #include <linux/sysdev.h> |
25 | #include <linux/nmi.h> | 25 | #include <linux/nmi.h> |
26 | #include <linux/sysctl.h> | 26 | #include <linux/sysctl.h> |
27 | 27 | ||
28 | #include <asm/smp.h> | 28 | #include <asm/smp.h> |
29 | #include <asm/mtrr.h> | 29 | #include <asm/mtrr.h> |
30 | #include <asm/mpspec.h> | 30 | #include <asm/mpspec.h> |
31 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
32 | #include <asm/msr.h> | 32 | #include <asm/msr.h> |
33 | #include <asm/proto.h> | 33 | #include <asm/proto.h> |
34 | #include <asm/kdebug.h> | 34 | #include <asm/kdebug.h> |
35 | #include <asm/local.h> | 35 | #include <asm/local.h> |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: | 38 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: |
39 | * - it may be reserved by some other driver, or not | 39 | * - it may be reserved by some other driver, or not |
40 | * - when not reserved by some other driver, it may be used for | 40 | * - when not reserved by some other driver, it may be used for |
41 | * the NMI watchdog, or not | 41 | * the NMI watchdog, or not |
42 | * | 42 | * |
43 | * This is maintained separately from nmi_active because the NMI | 43 | * This is maintained separately from nmi_active because the NMI |
44 | * watchdog may also be driven from the I/O APIC timer. | 44 | * watchdog may also be driven from the I/O APIC timer. |
45 | */ | 45 | */ |
46 | static DEFINE_SPINLOCK(lapic_nmi_owner_lock); | 46 | static DEFINE_SPINLOCK(lapic_nmi_owner_lock); |
47 | static unsigned int lapic_nmi_owner; | 47 | static unsigned int lapic_nmi_owner; |
48 | #define LAPIC_NMI_WATCHDOG (1<<0) | 48 | #define LAPIC_NMI_WATCHDOG (1<<0) |
49 | #define LAPIC_NMI_RESERVED (1<<1) | 49 | #define LAPIC_NMI_RESERVED (1<<1) |
50 | 50 | ||
51 | /* nmi_active: | 51 | /* nmi_active: |
52 | * +1: the lapic NMI watchdog is active, but can be disabled | 52 | * +1: the lapic NMI watchdog is active, but can be disabled |
53 | * 0: the lapic NMI watchdog has not been set up, and cannot | 53 | * 0: the lapic NMI watchdog has not been set up, and cannot |
54 | * be enabled | 54 | * be enabled |
55 | * -1: the lapic NMI watchdog is disabled, but can be enabled | 55 | * -1: the lapic NMI watchdog is disabled, but can be enabled |
56 | */ | 56 | */ |
57 | int nmi_active; /* oprofile uses this */ | 57 | int nmi_active; /* oprofile uses this */ |
58 | int panic_on_timeout; | 58 | int panic_on_timeout; |
59 | 59 | ||
60 | unsigned int nmi_watchdog = NMI_DEFAULT; | 60 | unsigned int nmi_watchdog = NMI_DEFAULT; |
61 | static unsigned int nmi_hz = HZ; | 61 | static unsigned int nmi_hz = HZ; |
62 | static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ | 62 | static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ |
63 | static unsigned int nmi_p4_cccr_val; | 63 | static unsigned int nmi_p4_cccr_val; |
64 | 64 | ||
65 | /* Note that these events don't tick when the CPU idles. This means | 65 | /* Note that these events don't tick when the CPU idles. This means |
66 | the frequency varies with CPU load. */ | 66 | the frequency varies with CPU load. */ |
67 | 67 | ||
68 | #define K7_EVNTSEL_ENABLE (1 << 22) | 68 | #define K7_EVNTSEL_ENABLE (1 << 22) |
69 | #define K7_EVNTSEL_INT (1 << 20) | 69 | #define K7_EVNTSEL_INT (1 << 20) |
70 | #define K7_EVNTSEL_OS (1 << 17) | 70 | #define K7_EVNTSEL_OS (1 << 17) |
71 | #define K7_EVNTSEL_USR (1 << 16) | 71 | #define K7_EVNTSEL_USR (1 << 16) |
72 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | 72 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 |
73 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | 73 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING |
74 | 74 | ||
75 | #define MSR_P4_MISC_ENABLE 0x1A0 | 75 | #define MSR_P4_MISC_ENABLE 0x1A0 |
76 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | 76 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) |
77 | #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) | 77 | #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) |
78 | #define MSR_P4_PERFCTR0 0x300 | 78 | #define MSR_P4_PERFCTR0 0x300 |
79 | #define MSR_P4_CCCR0 0x360 | 79 | #define MSR_P4_CCCR0 0x360 |
80 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | 80 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) |
81 | #define P4_ESCR_OS (1<<3) | 81 | #define P4_ESCR_OS (1<<3) |
82 | #define P4_ESCR_USR (1<<2) | 82 | #define P4_ESCR_USR (1<<2) |
83 | #define P4_CCCR_OVF_PMI0 (1<<26) | 83 | #define P4_CCCR_OVF_PMI0 (1<<26) |
84 | #define P4_CCCR_OVF_PMI1 (1<<27) | 84 | #define P4_CCCR_OVF_PMI1 (1<<27) |
85 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | 85 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) |
86 | #define P4_CCCR_COMPLEMENT (1<<19) | 86 | #define P4_CCCR_COMPLEMENT (1<<19) |
87 | #define P4_CCCR_COMPARE (1<<18) | 87 | #define P4_CCCR_COMPARE (1<<18) |
88 | #define P4_CCCR_REQUIRED (3<<16) | 88 | #define P4_CCCR_REQUIRED (3<<16) |
89 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | 89 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) |
90 | #define P4_CCCR_ENABLE (1<<12) | 90 | #define P4_CCCR_ENABLE (1<<12) |
91 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | 91 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter |
92 | CRU_ESCR0 (with any non-null event selector) through a complemented | 92 | CRU_ESCR0 (with any non-null event selector) through a complemented |
93 | max threshold. [IA32-Vol3, Section 14.9.9] */ | 93 | max threshold. [IA32-Vol3, Section 14.9.9] */ |
94 | #define MSR_P4_IQ_COUNTER0 0x30C | 94 | #define MSR_P4_IQ_COUNTER0 0x30C |
95 | #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) | 95 | #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) |
96 | #define P4_NMI_IQ_CCCR0 \ | 96 | #define P4_NMI_IQ_CCCR0 \ |
97 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ | 97 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ |
98 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) | 98 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) |
99 | 99 | ||
100 | static __cpuinit inline int nmi_known_cpu(void) | 100 | static __cpuinit inline int nmi_known_cpu(void) |
101 | { | 101 | { |
102 | switch (boot_cpu_data.x86_vendor) { | 102 | switch (boot_cpu_data.x86_vendor) { |
103 | case X86_VENDOR_AMD: | 103 | case X86_VENDOR_AMD: |
104 | return boot_cpu_data.x86 == 15; | 104 | return boot_cpu_data.x86 == 15; |
105 | case X86_VENDOR_INTEL: | 105 | case X86_VENDOR_INTEL: |
106 | return boot_cpu_data.x86 == 15; | 106 | return boot_cpu_data.x86 == 15; |
107 | } | 107 | } |
108 | return 0; | 108 | return 0; |
109 | } | 109 | } |
110 | 110 | ||
111 | /* Run after command line and cpu_init init, but before all other checks */ | 111 | /* Run after command line and cpu_init init, but before all other checks */ |
112 | void __cpuinit nmi_watchdog_default(void) | 112 | void __cpuinit nmi_watchdog_default(void) |
113 | { | 113 | { |
114 | if (nmi_watchdog != NMI_DEFAULT) | 114 | if (nmi_watchdog != NMI_DEFAULT) |
115 | return; | 115 | return; |
116 | if (nmi_known_cpu()) | 116 | if (nmi_known_cpu()) |
117 | nmi_watchdog = NMI_LOCAL_APIC; | 117 | nmi_watchdog = NMI_LOCAL_APIC; |
118 | else | 118 | else |
119 | nmi_watchdog = NMI_IO_APIC; | 119 | nmi_watchdog = NMI_IO_APIC; |
120 | } | 120 | } |
121 | 121 | ||
122 | #ifdef CONFIG_SMP | 122 | #ifdef CONFIG_SMP |
123 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when | 123 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when |
124 | * the CPU is idle. To make sure the NMI watchdog really ticks on all | 124 | * the CPU is idle. To make sure the NMI watchdog really ticks on all |
125 | * CPUs during the test make them busy. | 125 | * CPUs during the test make them busy. |
126 | */ | 126 | */ |
127 | static __init void nmi_cpu_busy(void *data) | 127 | static __init void nmi_cpu_busy(void *data) |
128 | { | 128 | { |
129 | volatile int *endflag = data; | 129 | volatile int *endflag = data; |
130 | local_irq_enable(); | 130 | local_irq_enable(); |
131 | /* Intentionally don't use cpu_relax here. This is | 131 | /* Intentionally don't use cpu_relax here. This is |
132 | to make sure that the performance counter really ticks, | 132 | to make sure that the performance counter really ticks, |
133 | even if there is a simulator or similar that catches the | 133 | even if there is a simulator or similar that catches the |
134 | pause instruction. On a real HT machine this is fine because | 134 | pause instruction. On a real HT machine this is fine because |
135 | all other CPUs are busy with "useless" delay loops and don't | 135 | all other CPUs are busy with "useless" delay loops and don't |
136 | care if they get somewhat less cycles. */ | 136 | care if they get somewhat less cycles. */ |
137 | while (*endflag == 0) | 137 | while (*endflag == 0) |
138 | barrier(); | 138 | barrier(); |
139 | } | 139 | } |
140 | #endif | 140 | #endif |
141 | 141 | ||
142 | int __init check_nmi_watchdog (void) | 142 | int __init check_nmi_watchdog (void) |
143 | { | 143 | { |
144 | volatile int endflag = 0; | 144 | volatile int endflag = 0; |
145 | int *counts; | 145 | int *counts; |
146 | int cpu; | 146 | int cpu; |
147 | 147 | ||
148 | counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); | 148 | counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); |
149 | if (!counts) | 149 | if (!counts) |
150 | return -1; | 150 | return -1; |
151 | 151 | ||
152 | printk(KERN_INFO "testing NMI watchdog ... "); | 152 | printk(KERN_INFO "testing NMI watchdog ... "); |
153 | 153 | ||
154 | if (nmi_watchdog == NMI_LOCAL_APIC) | 154 | if (nmi_watchdog == NMI_LOCAL_APIC) |
155 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); | 155 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); |
156 | 156 | ||
157 | for (cpu = 0; cpu < NR_CPUS; cpu++) | 157 | for (cpu = 0; cpu < NR_CPUS; cpu++) |
158 | counts[cpu] = cpu_pda[cpu].__nmi_count; | 158 | counts[cpu] = cpu_pda(cpu)->__nmi_count; |
159 | local_irq_enable(); | 159 | local_irq_enable(); |
160 | mdelay((10*1000)/nmi_hz); // wait 10 ticks | 160 | mdelay((10*1000)/nmi_hz); // wait 10 ticks |
161 | 161 | ||
162 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 162 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
163 | if (!cpu_online(cpu)) | 163 | if (!cpu_online(cpu)) |
164 | continue; | 164 | continue; |
165 | if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) { | 165 | if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { |
166 | endflag = 1; | 166 | endflag = 1; |
167 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", | 167 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", |
168 | cpu, | 168 | cpu, |
169 | counts[cpu], | 169 | counts[cpu], |
170 | cpu_pda[cpu].__nmi_count); | 170 | cpu_pda(cpu)->__nmi_count); |
171 | nmi_active = 0; | 171 | nmi_active = 0; |
172 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; | 172 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; |
173 | nmi_perfctr_msr = 0; | 173 | nmi_perfctr_msr = 0; |
174 | kfree(counts); | 174 | kfree(counts); |
175 | return -1; | 175 | return -1; |
176 | } | 176 | } |
177 | } | 177 | } |
178 | endflag = 1; | 178 | endflag = 1; |
179 | printk("OK.\n"); | 179 | printk("OK.\n"); |
180 | 180 | ||
181 | /* now that we know it works we can reduce NMI frequency to | 181 | /* now that we know it works we can reduce NMI frequency to |
182 | something more reasonable; makes a difference in some configs */ | 182 | something more reasonable; makes a difference in some configs */ |
183 | if (nmi_watchdog == NMI_LOCAL_APIC) | 183 | if (nmi_watchdog == NMI_LOCAL_APIC) |
184 | nmi_hz = 1; | 184 | nmi_hz = 1; |
185 | 185 | ||
186 | kfree(counts); | 186 | kfree(counts); |
187 | return 0; | 187 | return 0; |
188 | } | 188 | } |
189 | 189 | ||
190 | int __init setup_nmi_watchdog(char *str) | 190 | int __init setup_nmi_watchdog(char *str) |
191 | { | 191 | { |
192 | int nmi; | 192 | int nmi; |
193 | 193 | ||
194 | if (!strncmp(str,"panic",5)) { | 194 | if (!strncmp(str,"panic",5)) { |
195 | panic_on_timeout = 1; | 195 | panic_on_timeout = 1; |
196 | str = strchr(str, ','); | 196 | str = strchr(str, ','); |
197 | if (!str) | 197 | if (!str) |
198 | return 1; | 198 | return 1; |
199 | ++str; | 199 | ++str; |
200 | } | 200 | } |
201 | 201 | ||
202 | get_option(&str, &nmi); | 202 | get_option(&str, &nmi); |
203 | 203 | ||
204 | if (nmi >= NMI_INVALID) | 204 | if (nmi >= NMI_INVALID) |
205 | return 0; | 205 | return 0; |
206 | nmi_watchdog = nmi; | 206 | nmi_watchdog = nmi; |
207 | return 1; | 207 | return 1; |
208 | } | 208 | } |
209 | 209 | ||
210 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 210 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
211 | 211 | ||
212 | static void disable_lapic_nmi_watchdog(void) | 212 | static void disable_lapic_nmi_watchdog(void) |
213 | { | 213 | { |
214 | if (nmi_active <= 0) | 214 | if (nmi_active <= 0) |
215 | return; | 215 | return; |
216 | switch (boot_cpu_data.x86_vendor) { | 216 | switch (boot_cpu_data.x86_vendor) { |
217 | case X86_VENDOR_AMD: | 217 | case X86_VENDOR_AMD: |
218 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); | 218 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); |
219 | break; | 219 | break; |
220 | case X86_VENDOR_INTEL: | 220 | case X86_VENDOR_INTEL: |
221 | if (boot_cpu_data.x86 == 15) { | 221 | if (boot_cpu_data.x86 == 15) { |
222 | wrmsr(MSR_P4_IQ_CCCR0, 0, 0); | 222 | wrmsr(MSR_P4_IQ_CCCR0, 0, 0); |
223 | wrmsr(MSR_P4_CRU_ESCR0, 0, 0); | 223 | wrmsr(MSR_P4_CRU_ESCR0, 0, 0); |
224 | } | 224 | } |
225 | break; | 225 | break; |
226 | } | 226 | } |
227 | nmi_active = -1; | 227 | nmi_active = -1; |
228 | /* tell do_nmi() and others that we're not active any more */ | 228 | /* tell do_nmi() and others that we're not active any more */ |
229 | nmi_watchdog = 0; | 229 | nmi_watchdog = 0; |
230 | } | 230 | } |
231 | 231 | ||
232 | static void enable_lapic_nmi_watchdog(void) | 232 | static void enable_lapic_nmi_watchdog(void) |
233 | { | 233 | { |
234 | if (nmi_active < 0) { | 234 | if (nmi_active < 0) { |
235 | nmi_watchdog = NMI_LOCAL_APIC; | 235 | nmi_watchdog = NMI_LOCAL_APIC; |
236 | setup_apic_nmi_watchdog(); | 236 | setup_apic_nmi_watchdog(); |
237 | } | 237 | } |
238 | } | 238 | } |
239 | 239 | ||
240 | int reserve_lapic_nmi(void) | 240 | int reserve_lapic_nmi(void) |
241 | { | 241 | { |
242 | unsigned int old_owner; | 242 | unsigned int old_owner; |
243 | 243 | ||
244 | spin_lock(&lapic_nmi_owner_lock); | 244 | spin_lock(&lapic_nmi_owner_lock); |
245 | old_owner = lapic_nmi_owner; | 245 | old_owner = lapic_nmi_owner; |
246 | lapic_nmi_owner |= LAPIC_NMI_RESERVED; | 246 | lapic_nmi_owner |= LAPIC_NMI_RESERVED; |
247 | spin_unlock(&lapic_nmi_owner_lock); | 247 | spin_unlock(&lapic_nmi_owner_lock); |
248 | if (old_owner & LAPIC_NMI_RESERVED) | 248 | if (old_owner & LAPIC_NMI_RESERVED) |
249 | return -EBUSY; | 249 | return -EBUSY; |
250 | if (old_owner & LAPIC_NMI_WATCHDOG) | 250 | if (old_owner & LAPIC_NMI_WATCHDOG) |
251 | disable_lapic_nmi_watchdog(); | 251 | disable_lapic_nmi_watchdog(); |
252 | return 0; | 252 | return 0; |
253 | } | 253 | } |
254 | 254 | ||
255 | void release_lapic_nmi(void) | 255 | void release_lapic_nmi(void) |
256 | { | 256 | { |
257 | unsigned int new_owner; | 257 | unsigned int new_owner; |
258 | 258 | ||
259 | spin_lock(&lapic_nmi_owner_lock); | 259 | spin_lock(&lapic_nmi_owner_lock); |
260 | new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; | 260 | new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; |
261 | lapic_nmi_owner = new_owner; | 261 | lapic_nmi_owner = new_owner; |
262 | spin_unlock(&lapic_nmi_owner_lock); | 262 | spin_unlock(&lapic_nmi_owner_lock); |
263 | if (new_owner & LAPIC_NMI_WATCHDOG) | 263 | if (new_owner & LAPIC_NMI_WATCHDOG) |
264 | enable_lapic_nmi_watchdog(); | 264 | enable_lapic_nmi_watchdog(); |
265 | } | 265 | } |
266 | 266 | ||
267 | void disable_timer_nmi_watchdog(void) | 267 | void disable_timer_nmi_watchdog(void) |
268 | { | 268 | { |
269 | if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) | 269 | if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) |
270 | return; | 270 | return; |
271 | 271 | ||
272 | disable_irq(0); | 272 | disable_irq(0); |
273 | unset_nmi_callback(); | 273 | unset_nmi_callback(); |
274 | nmi_active = -1; | 274 | nmi_active = -1; |
275 | nmi_watchdog = NMI_NONE; | 275 | nmi_watchdog = NMI_NONE; |
276 | } | 276 | } |
277 | 277 | ||
278 | void enable_timer_nmi_watchdog(void) | 278 | void enable_timer_nmi_watchdog(void) |
279 | { | 279 | { |
280 | if (nmi_active < 0) { | 280 | if (nmi_active < 0) { |
281 | nmi_watchdog = NMI_IO_APIC; | 281 | nmi_watchdog = NMI_IO_APIC; |
282 | touch_nmi_watchdog(); | 282 | touch_nmi_watchdog(); |
283 | nmi_active = 1; | 283 | nmi_active = 1; |
284 | enable_irq(0); | 284 | enable_irq(0); |
285 | } | 285 | } |
286 | } | 286 | } |
287 | 287 | ||
288 | #ifdef CONFIG_PM | 288 | #ifdef CONFIG_PM |
289 | 289 | ||
290 | static int nmi_pm_active; /* nmi_active before suspend */ | 290 | static int nmi_pm_active; /* nmi_active before suspend */ |
291 | 291 | ||
292 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) | 292 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) |
293 | { | 293 | { |
294 | nmi_pm_active = nmi_active; | 294 | nmi_pm_active = nmi_active; |
295 | disable_lapic_nmi_watchdog(); | 295 | disable_lapic_nmi_watchdog(); |
296 | return 0; | 296 | return 0; |
297 | } | 297 | } |
298 | 298 | ||
299 | static int lapic_nmi_resume(struct sys_device *dev) | 299 | static int lapic_nmi_resume(struct sys_device *dev) |
300 | { | 300 | { |
301 | if (nmi_pm_active > 0) | 301 | if (nmi_pm_active > 0) |
302 | enable_lapic_nmi_watchdog(); | 302 | enable_lapic_nmi_watchdog(); |
303 | return 0; | 303 | return 0; |
304 | } | 304 | } |
305 | 305 | ||
306 | static struct sysdev_class nmi_sysclass = { | 306 | static struct sysdev_class nmi_sysclass = { |
307 | set_kset_name("lapic_nmi"), | 307 | set_kset_name("lapic_nmi"), |
308 | .resume = lapic_nmi_resume, | 308 | .resume = lapic_nmi_resume, |
309 | .suspend = lapic_nmi_suspend, | 309 | .suspend = lapic_nmi_suspend, |
310 | }; | 310 | }; |
311 | 311 | ||
312 | static struct sys_device device_lapic_nmi = { | 312 | static struct sys_device device_lapic_nmi = { |
313 | .id = 0, | 313 | .id = 0, |
314 | .cls = &nmi_sysclass, | 314 | .cls = &nmi_sysclass, |
315 | }; | 315 | }; |
316 | 316 | ||
317 | static int __init init_lapic_nmi_sysfs(void) | 317 | static int __init init_lapic_nmi_sysfs(void) |
318 | { | 318 | { |
319 | int error; | 319 | int error; |
320 | 320 | ||
321 | if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) | 321 | if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) |
322 | return 0; | 322 | return 0; |
323 | 323 | ||
324 | error = sysdev_class_register(&nmi_sysclass); | 324 | error = sysdev_class_register(&nmi_sysclass); |
325 | if (!error) | 325 | if (!error) |
326 | error = sysdev_register(&device_lapic_nmi); | 326 | error = sysdev_register(&device_lapic_nmi); |
327 | return error; | 327 | return error; |
328 | } | 328 | } |
329 | /* must come after the local APIC's device_initcall() */ | 329 | /* must come after the local APIC's device_initcall() */ |
330 | late_initcall(init_lapic_nmi_sysfs); | 330 | late_initcall(init_lapic_nmi_sysfs); |
331 | 331 | ||
332 | #endif /* CONFIG_PM */ | 332 | #endif /* CONFIG_PM */ |
333 | 333 | ||
334 | /* | 334 | /* |
335 | * Activate the NMI watchdog via the local APIC. | 335 | * Activate the NMI watchdog via the local APIC. |
336 | * Original code written by Keith Owens. | 336 | * Original code written by Keith Owens. |
337 | */ | 337 | */ |
338 | 338 | ||
339 | static void clear_msr_range(unsigned int base, unsigned int n) | 339 | static void clear_msr_range(unsigned int base, unsigned int n) |
340 | { | 340 | { |
341 | unsigned int i; | 341 | unsigned int i; |
342 | 342 | ||
343 | for(i = 0; i < n; ++i) | 343 | for(i = 0; i < n; ++i) |
344 | wrmsr(base+i, 0, 0); | 344 | wrmsr(base+i, 0, 0); |
345 | } | 345 | } |
346 | 346 | ||
347 | static void setup_k7_watchdog(void) | 347 | static void setup_k7_watchdog(void) |
348 | { | 348 | { |
349 | int i; | 349 | int i; |
350 | unsigned int evntsel; | 350 | unsigned int evntsel; |
351 | 351 | ||
352 | nmi_perfctr_msr = MSR_K7_PERFCTR0; | 352 | nmi_perfctr_msr = MSR_K7_PERFCTR0; |
353 | 353 | ||
354 | for(i = 0; i < 4; ++i) { | 354 | for(i = 0; i < 4; ++i) { |
355 | /* Simulator may not support it */ | 355 | /* Simulator may not support it */ |
356 | if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) { | 356 | if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) { |
357 | nmi_perfctr_msr = 0; | 357 | nmi_perfctr_msr = 0; |
358 | return; | 358 | return; |
359 | } | 359 | } |
360 | wrmsrl(MSR_K7_PERFCTR0+i, 0UL); | 360 | wrmsrl(MSR_K7_PERFCTR0+i, 0UL); |
361 | } | 361 | } |
362 | 362 | ||
363 | evntsel = K7_EVNTSEL_INT | 363 | evntsel = K7_EVNTSEL_INT |
364 | | K7_EVNTSEL_OS | 364 | | K7_EVNTSEL_OS |
365 | | K7_EVNTSEL_USR | 365 | | K7_EVNTSEL_USR |
366 | | K7_NMI_EVENT; | 366 | | K7_NMI_EVENT; |
367 | 367 | ||
368 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 368 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); |
369 | wrmsrl(MSR_K7_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz)); | 369 | wrmsrl(MSR_K7_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz)); |
370 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 370 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
371 | evntsel |= K7_EVNTSEL_ENABLE; | 371 | evntsel |= K7_EVNTSEL_ENABLE; |
372 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 372 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); |
373 | } | 373 | } |
374 | 374 | ||
375 | 375 | ||
376 | static int setup_p4_watchdog(void) | 376 | static int setup_p4_watchdog(void) |
377 | { | 377 | { |
378 | unsigned int misc_enable, dummy; | 378 | unsigned int misc_enable, dummy; |
379 | 379 | ||
380 | rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); | 380 | rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); |
381 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | 381 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) |
382 | return 0; | 382 | return 0; |
383 | 383 | ||
384 | nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; | 384 | nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; |
385 | nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; | 385 | nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; |
386 | #ifdef CONFIG_SMP | 386 | #ifdef CONFIG_SMP |
387 | if (smp_num_siblings == 2) | 387 | if (smp_num_siblings == 2) |
388 | nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; | 388 | nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; |
389 | #endif | 389 | #endif |
390 | 390 | ||
391 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) | 391 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) |
392 | clear_msr_range(0x3F1, 2); | 392 | clear_msr_range(0x3F1, 2); |
393 | /* MSR 0x3F0 seems to have a default value of 0xFC00, but current | 393 | /* MSR 0x3F0 seems to have a default value of 0xFC00, but current |
394 | docs doesn't fully define it, so leave it alone for now. */ | 394 | docs doesn't fully define it, so leave it alone for now. */ |
395 | if (boot_cpu_data.x86_model >= 0x3) { | 395 | if (boot_cpu_data.x86_model >= 0x3) { |
396 | /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ | 396 | /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ |
397 | clear_msr_range(0x3A0, 26); | 397 | clear_msr_range(0x3A0, 26); |
398 | clear_msr_range(0x3BC, 3); | 398 | clear_msr_range(0x3BC, 3); |
399 | } else { | 399 | } else { |
400 | clear_msr_range(0x3A0, 31); | 400 | clear_msr_range(0x3A0, 31); |
401 | } | 401 | } |
402 | clear_msr_range(0x3C0, 6); | 402 | clear_msr_range(0x3C0, 6); |
403 | clear_msr_range(0x3C8, 6); | 403 | clear_msr_range(0x3C8, 6); |
404 | clear_msr_range(0x3E0, 2); | 404 | clear_msr_range(0x3E0, 2); |
405 | clear_msr_range(MSR_P4_CCCR0, 18); | 405 | clear_msr_range(MSR_P4_CCCR0, 18); |
406 | clear_msr_range(MSR_P4_PERFCTR0, 18); | 406 | clear_msr_range(MSR_P4_PERFCTR0, 18); |
407 | 407 | ||
408 | wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); | 408 | wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); |
409 | wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); | 409 | wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); |
410 | Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz * 1000UL / nmi_hz)); | 410 | Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz * 1000UL / nmi_hz)); |
411 | wrmsrl(MSR_P4_IQ_COUNTER0, -((u64)cpu_khz * 1000 / nmi_hz)); | 411 | wrmsrl(MSR_P4_IQ_COUNTER0, -((u64)cpu_khz * 1000 / nmi_hz)); |
412 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 412 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
413 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | 413 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); |
414 | return 1; | 414 | return 1; |
415 | } | 415 | } |
416 | 416 | ||
417 | void setup_apic_nmi_watchdog(void) | 417 | void setup_apic_nmi_watchdog(void) |
418 | { | 418 | { |
419 | switch (boot_cpu_data.x86_vendor) { | 419 | switch (boot_cpu_data.x86_vendor) { |
420 | case X86_VENDOR_AMD: | 420 | case X86_VENDOR_AMD: |
421 | if (boot_cpu_data.x86 != 15) | 421 | if (boot_cpu_data.x86 != 15) |
422 | return; | 422 | return; |
423 | if (strstr(boot_cpu_data.x86_model_id, "Screwdriver")) | 423 | if (strstr(boot_cpu_data.x86_model_id, "Screwdriver")) |
424 | return; | 424 | return; |
425 | setup_k7_watchdog(); | 425 | setup_k7_watchdog(); |
426 | break; | 426 | break; |
427 | case X86_VENDOR_INTEL: | 427 | case X86_VENDOR_INTEL: |
428 | if (boot_cpu_data.x86 != 15) | 428 | if (boot_cpu_data.x86 != 15) |
429 | return; | 429 | return; |
430 | if (!setup_p4_watchdog()) | 430 | if (!setup_p4_watchdog()) |
431 | return; | 431 | return; |
432 | break; | 432 | break; |
433 | 433 | ||
434 | default: | 434 | default: |
435 | return; | 435 | return; |
436 | } | 436 | } |
437 | lapic_nmi_owner = LAPIC_NMI_WATCHDOG; | 437 | lapic_nmi_owner = LAPIC_NMI_WATCHDOG; |
438 | nmi_active = 1; | 438 | nmi_active = 1; |
439 | } | 439 | } |
440 | 440 | ||
441 | /* | 441 | /* |
442 | * the best way to detect whether a CPU has a 'hard lockup' problem | 442 | * the best way to detect whether a CPU has a 'hard lockup' problem |
443 | * is to check it's local APIC timer IRQ counts. If they are not | 443 | * is to check it's local APIC timer IRQ counts. If they are not |
444 | * changing then that CPU has some problem. | 444 | * changing then that CPU has some problem. |
445 | * | 445 | * |
446 | * as these watchdog NMI IRQs are generated on every CPU, we only | 446 | * as these watchdog NMI IRQs are generated on every CPU, we only |
447 | * have to check the current processor. | 447 | * have to check the current processor. |
448 | */ | 448 | */ |
449 | 449 | ||
450 | static DEFINE_PER_CPU(unsigned, last_irq_sum); | 450 | static DEFINE_PER_CPU(unsigned, last_irq_sum); |
451 | static DEFINE_PER_CPU(local_t, alert_counter); | 451 | static DEFINE_PER_CPU(local_t, alert_counter); |
452 | static DEFINE_PER_CPU(int, nmi_touch); | 452 | static DEFINE_PER_CPU(int, nmi_touch); |
453 | 453 | ||
454 | void touch_nmi_watchdog (void) | 454 | void touch_nmi_watchdog (void) |
455 | { | 455 | { |
456 | int i; | 456 | int i; |
457 | 457 | ||
458 | /* | 458 | /* |
459 | * Tell other CPUs to reset their alert counters. We cannot | 459 | * Tell other CPUs to reset their alert counters. We cannot |
460 | * do it ourselves because the alert count increase is not | 460 | * do it ourselves because the alert count increase is not |
461 | * atomic. | 461 | * atomic. |
462 | */ | 462 | */ |
463 | for (i = 0; i < NR_CPUS; i++) | 463 | for (i = 0; i < NR_CPUS; i++) |
464 | per_cpu(nmi_touch, i) = 1; | 464 | per_cpu(nmi_touch, i) = 1; |
465 | 465 | ||
466 | touch_softlockup_watchdog(); | 466 | touch_softlockup_watchdog(); |
467 | } | 467 | } |
468 | 468 | ||
469 | void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) | 469 | void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) |
470 | { | 470 | { |
471 | int sum; | 471 | int sum; |
472 | int touched = 0; | 472 | int touched = 0; |
473 | 473 | ||
474 | sum = read_pda(apic_timer_irqs); | 474 | sum = read_pda(apic_timer_irqs); |
475 | if (__get_cpu_var(nmi_touch)) { | 475 | if (__get_cpu_var(nmi_touch)) { |
476 | __get_cpu_var(nmi_touch) = 0; | 476 | __get_cpu_var(nmi_touch) = 0; |
477 | touched = 1; | 477 | touched = 1; |
478 | } | 478 | } |
479 | if (!touched && __get_cpu_var(last_irq_sum) == sum) { | 479 | if (!touched && __get_cpu_var(last_irq_sum) == sum) { |
480 | /* | 480 | /* |
481 | * Ayiee, looks like this CPU is stuck ... | 481 | * Ayiee, looks like this CPU is stuck ... |
482 | * wait a few IRQs (5 seconds) before doing the oops ... | 482 | * wait a few IRQs (5 seconds) before doing the oops ... |
483 | */ | 483 | */ |
484 | local_inc(&__get_cpu_var(alert_counter)); | 484 | local_inc(&__get_cpu_var(alert_counter)); |
485 | if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) { | 485 | if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) { |
486 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | 486 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) |
487 | == NOTIFY_STOP) { | 487 | == NOTIFY_STOP) { |
488 | local_set(&__get_cpu_var(alert_counter), 0); | 488 | local_set(&__get_cpu_var(alert_counter), 0); |
489 | return; | 489 | return; |
490 | } | 490 | } |
491 | die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs); | 491 | die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs); |
492 | } | 492 | } |
493 | } else { | 493 | } else { |
494 | __get_cpu_var(last_irq_sum) = sum; | 494 | __get_cpu_var(last_irq_sum) = sum; |
495 | local_set(&__get_cpu_var(alert_counter), 0); | 495 | local_set(&__get_cpu_var(alert_counter), 0); |
496 | } | 496 | } |
497 | if (nmi_perfctr_msr) { | 497 | if (nmi_perfctr_msr) { |
498 | if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { | 498 | if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { |
499 | /* | 499 | /* |
500 | * P4 quirks: | 500 | * P4 quirks: |
501 | * - An overflown perfctr will assert its interrupt | 501 | * - An overflown perfctr will assert its interrupt |
502 | * until the OVF flag in its CCCR is cleared. | 502 | * until the OVF flag in its CCCR is cleared. |
503 | * - LVTPC is masked on interrupt and must be | 503 | * - LVTPC is masked on interrupt and must be |
504 | * unmasked by the LVTPC handler. | 504 | * unmasked by the LVTPC handler. |
505 | */ | 505 | */ |
506 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | 506 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); |
507 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 507 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
508 | } | 508 | } |
509 | wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | 509 | wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); |
510 | } | 510 | } |
511 | } | 511 | } |
512 | 512 | ||
513 | static int dummy_nmi_callback(struct pt_regs * regs, int cpu) | 513 | static int dummy_nmi_callback(struct pt_regs * regs, int cpu) |
514 | { | 514 | { |
515 | return 0; | 515 | return 0; |
516 | } | 516 | } |
517 | 517 | ||
518 | static nmi_callback_t nmi_callback = dummy_nmi_callback; | 518 | static nmi_callback_t nmi_callback = dummy_nmi_callback; |
519 | 519 | ||
520 | asmlinkage void do_nmi(struct pt_regs * regs, long error_code) | 520 | asmlinkage void do_nmi(struct pt_regs * regs, long error_code) |
521 | { | 521 | { |
522 | int cpu = safe_smp_processor_id(); | 522 | int cpu = safe_smp_processor_id(); |
523 | 523 | ||
524 | nmi_enter(); | 524 | nmi_enter(); |
525 | add_pda(__nmi_count,1); | 525 | add_pda(__nmi_count,1); |
526 | if (!rcu_dereference(nmi_callback)(regs, cpu)) | 526 | if (!rcu_dereference(nmi_callback)(regs, cpu)) |
527 | default_do_nmi(regs); | 527 | default_do_nmi(regs); |
528 | nmi_exit(); | 528 | nmi_exit(); |
529 | } | 529 | } |
530 | 530 | ||
531 | void set_nmi_callback(nmi_callback_t callback) | 531 | void set_nmi_callback(nmi_callback_t callback) |
532 | { | 532 | { |
533 | rcu_assign_pointer(nmi_callback, callback); | 533 | rcu_assign_pointer(nmi_callback, callback); |
534 | } | 534 | } |
535 | 535 | ||
536 | void unset_nmi_callback(void) | 536 | void unset_nmi_callback(void) |
537 | { | 537 | { |
538 | nmi_callback = dummy_nmi_callback; | 538 | nmi_callback = dummy_nmi_callback; |
539 | } | 539 | } |
540 | 540 | ||
541 | #ifdef CONFIG_SYSCTL | 541 | #ifdef CONFIG_SYSCTL |
542 | 542 | ||
543 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) | 543 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) |
544 | { | 544 | { |
545 | unsigned char reason = get_nmi_reason(); | 545 | unsigned char reason = get_nmi_reason(); |
546 | char buf[64]; | 546 | char buf[64]; |
547 | 547 | ||
548 | if (!(reason & 0xc0)) { | 548 | if (!(reason & 0xc0)) { |
549 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); | 549 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); |
550 | die_nmi(buf,regs); | 550 | die_nmi(buf,regs); |
551 | } | 551 | } |
552 | return 0; | 552 | return 0; |
553 | } | 553 | } |
554 | 554 | ||
555 | /* | 555 | /* |
556 | * proc handler for /proc/sys/kernel/unknown_nmi_panic | 556 | * proc handler for /proc/sys/kernel/unknown_nmi_panic |
557 | */ | 557 | */ |
558 | int proc_unknown_nmi_panic(struct ctl_table *table, int write, struct file *file, | 558 | int proc_unknown_nmi_panic(struct ctl_table *table, int write, struct file *file, |
559 | void __user *buffer, size_t *length, loff_t *ppos) | 559 | void __user *buffer, size_t *length, loff_t *ppos) |
560 | { | 560 | { |
561 | int old_state; | 561 | int old_state; |
562 | 562 | ||
563 | old_state = unknown_nmi_panic; | 563 | old_state = unknown_nmi_panic; |
564 | proc_dointvec(table, write, file, buffer, length, ppos); | 564 | proc_dointvec(table, write, file, buffer, length, ppos); |
565 | if (!!old_state == !!unknown_nmi_panic) | 565 | if (!!old_state == !!unknown_nmi_panic) |
566 | return 0; | 566 | return 0; |
567 | 567 | ||
568 | if (unknown_nmi_panic) { | 568 | if (unknown_nmi_panic) { |
569 | if (reserve_lapic_nmi() < 0) { | 569 | if (reserve_lapic_nmi() < 0) { |
570 | unknown_nmi_panic = 0; | 570 | unknown_nmi_panic = 0; |
571 | return -EBUSY; | 571 | return -EBUSY; |
572 | } else { | 572 | } else { |
573 | set_nmi_callback(unknown_nmi_panic_callback); | 573 | set_nmi_callback(unknown_nmi_panic_callback); |
574 | } | 574 | } |
575 | } else { | 575 | } else { |
576 | release_lapic_nmi(); | 576 | release_lapic_nmi(); |
577 | unset_nmi_callback(); | 577 | unset_nmi_callback(); |
578 | } | 578 | } |
579 | return 0; | 579 | return 0; |
580 | } | 580 | } |
581 | 581 | ||
582 | #endif | 582 | #endif |
583 | 583 | ||
584 | EXPORT_SYMBOL(nmi_active); | 584 | EXPORT_SYMBOL(nmi_active); |
585 | EXPORT_SYMBOL(nmi_watchdog); | 585 | EXPORT_SYMBOL(nmi_watchdog); |
586 | EXPORT_SYMBOL(reserve_lapic_nmi); | 586 | EXPORT_SYMBOL(reserve_lapic_nmi); |
587 | EXPORT_SYMBOL(release_lapic_nmi); | 587 | EXPORT_SYMBOL(release_lapic_nmi); |
588 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); | 588 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); |
589 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); | 589 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); |
590 | EXPORT_SYMBOL(touch_nmi_watchdog); | 590 | EXPORT_SYMBOL(touch_nmi_watchdog); |
591 | 591 |
arch/x86_64/kernel/setup64.c
1 | /* | 1 | /* |
2 | * X86-64 specific CPU setup. | 2 | * X86-64 specific CPU setup. |
3 | * Copyright (C) 1995 Linus Torvalds | 3 | * Copyright (C) 1995 Linus Torvalds |
4 | * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen. | 4 | * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen. |
5 | * See setup.c for older changelog. | 5 | * See setup.c for older changelog. |
6 | * $Id: setup64.c,v 1.12 2002/03/21 10:09:17 ak Exp $ | 6 | * $Id: setup64.c,v 1.12 2002/03/21 10:09:17 ak Exp $ |
7 | */ | 7 | */ |
8 | #include <linux/config.h> | 8 | #include <linux/config.h> |
9 | #include <linux/init.h> | 9 | #include <linux/init.h> |
10 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
11 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
12 | #include <linux/string.h> | 12 | #include <linux/string.h> |
13 | #include <linux/bootmem.h> | 13 | #include <linux/bootmem.h> |
14 | #include <linux/bitops.h> | 14 | #include <linux/bitops.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <asm/bootsetup.h> | 16 | #include <asm/bootsetup.h> |
17 | #include <asm/pda.h> | 17 | #include <asm/pda.h> |
18 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
19 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
20 | #include <asm/desc.h> | 20 | #include <asm/desc.h> |
21 | #include <asm/atomic.h> | 21 | #include <asm/atomic.h> |
22 | #include <asm/mmu_context.h> | 22 | #include <asm/mmu_context.h> |
23 | #include <asm/smp.h> | 23 | #include <asm/smp.h> |
24 | #include <asm/i387.h> | 24 | #include <asm/i387.h> |
25 | #include <asm/percpu.h> | 25 | #include <asm/percpu.h> |
26 | #include <asm/proto.h> | 26 | #include <asm/proto.h> |
27 | #include <asm/sections.h> | 27 | #include <asm/sections.h> |
28 | 28 | ||
29 | char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,}; | 29 | char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,}; |
30 | 30 | ||
31 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | 31 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; |
32 | 32 | ||
33 | struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned; | 33 | struct x8664_pda _cpu_pda[NR_CPUS] __cacheline_aligned; |
34 | 34 | ||
35 | struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; | 35 | struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; |
36 | 36 | ||
37 | char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); | 37 | char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); |
38 | 38 | ||
39 | unsigned long __supported_pte_mask __read_mostly = ~0UL; | 39 | unsigned long __supported_pte_mask __read_mostly = ~0UL; |
40 | static int do_not_nx __initdata = 0; | 40 | static int do_not_nx __initdata = 0; |
41 | 41 | ||
42 | /* noexec=on|off | 42 | /* noexec=on|off |
43 | Control non executable mappings for 64bit processes. | 43 | Control non executable mappings for 64bit processes. |
44 | 44 | ||
45 | on Enable(default) | 45 | on Enable(default) |
46 | off Disable | 46 | off Disable |
47 | */ | 47 | */ |
48 | int __init nonx_setup(char *str) | 48 | int __init nonx_setup(char *str) |
49 | { | 49 | { |
50 | if (!strncmp(str, "on", 2)) { | 50 | if (!strncmp(str, "on", 2)) { |
51 | __supported_pte_mask |= _PAGE_NX; | 51 | __supported_pte_mask |= _PAGE_NX; |
52 | do_not_nx = 0; | 52 | do_not_nx = 0; |
53 | } else if (!strncmp(str, "off", 3)) { | 53 | } else if (!strncmp(str, "off", 3)) { |
54 | do_not_nx = 1; | 54 | do_not_nx = 1; |
55 | __supported_pte_mask &= ~_PAGE_NX; | 55 | __supported_pte_mask &= ~_PAGE_NX; |
56 | } | 56 | } |
57 | return 0; | 57 | return 0; |
58 | } | 58 | } |
59 | __setup("noexec=", nonx_setup); /* parsed early actually */ | 59 | __setup("noexec=", nonx_setup); /* parsed early actually */ |
60 | 60 | ||
61 | int force_personality32 = READ_IMPLIES_EXEC; | 61 | int force_personality32 = READ_IMPLIES_EXEC; |
62 | 62 | ||
63 | /* noexec32=on|off | 63 | /* noexec32=on|off |
64 | Control non executable heap for 32bit processes. | 64 | Control non executable heap for 32bit processes. |
65 | To control the stack too use noexec=off | 65 | To control the stack too use noexec=off |
66 | 66 | ||
67 | on PROT_READ does not imply PROT_EXEC for 32bit processes | 67 | on PROT_READ does not imply PROT_EXEC for 32bit processes |
68 | off PROT_READ implies PROT_EXEC (default) | 68 | off PROT_READ implies PROT_EXEC (default) |
69 | */ | 69 | */ |
70 | static int __init nonx32_setup(char *str) | 70 | static int __init nonx32_setup(char *str) |
71 | { | 71 | { |
72 | if (!strcmp(str, "on")) | 72 | if (!strcmp(str, "on")) |
73 | force_personality32 &= ~READ_IMPLIES_EXEC; | 73 | force_personality32 &= ~READ_IMPLIES_EXEC; |
74 | else if (!strcmp(str, "off")) | 74 | else if (!strcmp(str, "off")) |
75 | force_personality32 |= READ_IMPLIES_EXEC; | 75 | force_personality32 |= READ_IMPLIES_EXEC; |
76 | return 0; | 76 | return 0; |
77 | } | 77 | } |
78 | __setup("noexec32=", nonx32_setup); | 78 | __setup("noexec32=", nonx32_setup); |
79 | 79 | ||
80 | /* | 80 | /* |
81 | * Great future plan: | 81 | * Great future plan: |
82 | * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. | 82 | * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. |
83 | * Always point %gs to its beginning | 83 | * Always point %gs to its beginning |
84 | */ | 84 | */ |
85 | void __init setup_per_cpu_areas(void) | 85 | void __init setup_per_cpu_areas(void) |
86 | { | 86 | { |
87 | int i; | 87 | int i; |
88 | unsigned long size; | 88 | unsigned long size; |
89 | 89 | ||
90 | #ifdef CONFIG_HOTPLUG_CPU | 90 | #ifdef CONFIG_HOTPLUG_CPU |
91 | prefill_possible_map(); | 91 | prefill_possible_map(); |
92 | #endif | 92 | #endif |
93 | 93 | ||
94 | /* Copy section for each CPU (we discard the original) */ | 94 | /* Copy section for each CPU (we discard the original) */ |
95 | size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); | 95 | size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); |
96 | #ifdef CONFIG_MODULES | 96 | #ifdef CONFIG_MODULES |
97 | if (size < PERCPU_ENOUGH_ROOM) | 97 | if (size < PERCPU_ENOUGH_ROOM) |
98 | size = PERCPU_ENOUGH_ROOM; | 98 | size = PERCPU_ENOUGH_ROOM; |
99 | #endif | 99 | #endif |
100 | 100 | ||
101 | for_each_cpu_mask (i, cpu_possible_map) { | 101 | for_each_cpu_mask (i, cpu_possible_map) { |
102 | char *ptr; | 102 | char *ptr; |
103 | 103 | ||
104 | if (!NODE_DATA(cpu_to_node(i))) { | 104 | if (!NODE_DATA(cpu_to_node(i))) { |
105 | printk("cpu with no node %d, num_online_nodes %d\n", | 105 | printk("cpu with no node %d, num_online_nodes %d\n", |
106 | i, num_online_nodes()); | 106 | i, num_online_nodes()); |
107 | ptr = alloc_bootmem(size); | 107 | ptr = alloc_bootmem(size); |
108 | } else { | 108 | } else { |
109 | ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); | 109 | ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); |
110 | } | 110 | } |
111 | if (!ptr) | 111 | if (!ptr) |
112 | panic("Cannot allocate cpu data for CPU %d\n", i); | 112 | panic("Cannot allocate cpu data for CPU %d\n", i); |
113 | cpu_pda[i].data_offset = ptr - __per_cpu_start; | 113 | cpu_pda(i)->data_offset = ptr - __per_cpu_start; |
114 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | 114 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); |
115 | } | 115 | } |
116 | } | 116 | } |
117 | 117 | ||
118 | void pda_init(int cpu) | 118 | void pda_init(int cpu) |
119 | { | 119 | { |
120 | struct x8664_pda *pda = &cpu_pda[cpu]; | 120 | struct x8664_pda *pda = cpu_pda(cpu); |
121 | 121 | ||
122 | /* Setup up data that may be needed in __get_free_pages early */ | 122 | /* Setup up data that may be needed in __get_free_pages early */ |
123 | asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); | 123 | asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); |
124 | wrmsrl(MSR_GS_BASE, cpu_pda + cpu); | 124 | wrmsrl(MSR_GS_BASE, pda); |
125 | 125 | ||
126 | pda->cpunumber = cpu; | 126 | pda->cpunumber = cpu; |
127 | pda->irqcount = -1; | 127 | pda->irqcount = -1; |
128 | pda->kernelstack = | 128 | pda->kernelstack = |
129 | (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; | 129 | (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; |
130 | pda->active_mm = &init_mm; | 130 | pda->active_mm = &init_mm; |
131 | pda->mmu_state = 0; | 131 | pda->mmu_state = 0; |
132 | 132 | ||
133 | if (cpu == 0) { | 133 | if (cpu == 0) { |
134 | /* others are initialized in smpboot.c */ | 134 | /* others are initialized in smpboot.c */ |
135 | pda->pcurrent = &init_task; | 135 | pda->pcurrent = &init_task; |
136 | pda->irqstackptr = boot_cpu_stack; | 136 | pda->irqstackptr = boot_cpu_stack; |
137 | } else { | 137 | } else { |
138 | pda->irqstackptr = (char *) | 138 | pda->irqstackptr = (char *) |
139 | __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); | 139 | __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); |
140 | if (!pda->irqstackptr) | 140 | if (!pda->irqstackptr) |
141 | panic("cannot allocate irqstack for cpu %d", cpu); | 141 | panic("cannot allocate irqstack for cpu %d", cpu); |
142 | } | 142 | } |
143 | 143 | ||
144 | 144 | ||
145 | pda->irqstackptr += IRQSTACKSIZE-64; | 145 | pda->irqstackptr += IRQSTACKSIZE-64; |
146 | } | 146 | } |
147 | 147 | ||
148 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 2) * EXCEPTION_STKSZ + DEBUG_STKSZ] | 148 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 2) * EXCEPTION_STKSZ + DEBUG_STKSZ] |
149 | __attribute__((section(".bss.page_aligned"))); | 149 | __attribute__((section(".bss.page_aligned"))); |
150 | 150 | ||
151 | /* May not be marked __init: used by software suspend */ | 151 | /* May not be marked __init: used by software suspend */ |
152 | void syscall_init(void) | 152 | void syscall_init(void) |
153 | { | 153 | { |
154 | /* | 154 | /* |
155 | * LSTAR and STAR live in a bit strange symbiosis. | 155 | * LSTAR and STAR live in a bit strange symbiosis. |
156 | * They both write to the same internal register. STAR allows to set CS/DS | 156 | * They both write to the same internal register. STAR allows to set CS/DS |
157 | * but only a 32bit target. LSTAR sets the 64bit rip. | 157 | * but only a 32bit target. LSTAR sets the 64bit rip. |
158 | */ | 158 | */ |
159 | wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); | 159 | wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); |
160 | wrmsrl(MSR_LSTAR, system_call); | 160 | wrmsrl(MSR_LSTAR, system_call); |
161 | 161 | ||
162 | #ifdef CONFIG_IA32_EMULATION | 162 | #ifdef CONFIG_IA32_EMULATION |
163 | syscall32_cpu_init (); | 163 | syscall32_cpu_init (); |
164 | #endif | 164 | #endif |
165 | 165 | ||
166 | /* Flags to clear on syscall */ | 166 | /* Flags to clear on syscall */ |
167 | wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); | 167 | wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); |
168 | } | 168 | } |
169 | 169 | ||
170 | void __cpuinit check_efer(void) | 170 | void __cpuinit check_efer(void) |
171 | { | 171 | { |
172 | unsigned long efer; | 172 | unsigned long efer; |
173 | 173 | ||
174 | rdmsrl(MSR_EFER, efer); | 174 | rdmsrl(MSR_EFER, efer); |
175 | if (!(efer & EFER_NX) || do_not_nx) { | 175 | if (!(efer & EFER_NX) || do_not_nx) { |
176 | __supported_pte_mask &= ~_PAGE_NX; | 176 | __supported_pte_mask &= ~_PAGE_NX; |
177 | } | 177 | } |
178 | } | 178 | } |
179 | 179 | ||
180 | /* | 180 | /* |
181 | * cpu_init() initializes state that is per-CPU. Some data is already | 181 | * cpu_init() initializes state that is per-CPU. Some data is already |
182 | * initialized (naturally) in the bootstrap process, such as the GDT | 182 | * initialized (naturally) in the bootstrap process, such as the GDT |
183 | * and IDT. We reload them nevertheless, this function acts as a | 183 | * and IDT. We reload them nevertheless, this function acts as a |
184 | * 'CPU state barrier', nothing should get across. | 184 | * 'CPU state barrier', nothing should get across. |
185 | * A lot of state is already set up in PDA init. | 185 | * A lot of state is already set up in PDA init. |
186 | */ | 186 | */ |
187 | void __cpuinit cpu_init (void) | 187 | void __cpuinit cpu_init (void) |
188 | { | 188 | { |
189 | int cpu = stack_smp_processor_id(); | 189 | int cpu = stack_smp_processor_id(); |
190 | struct tss_struct *t = &per_cpu(init_tss, cpu); | 190 | struct tss_struct *t = &per_cpu(init_tss, cpu); |
191 | unsigned long v; | 191 | unsigned long v; |
192 | char *estacks = NULL; | 192 | char *estacks = NULL; |
193 | struct task_struct *me; | 193 | struct task_struct *me; |
194 | int i; | 194 | int i; |
195 | 195 | ||
196 | /* CPU 0 is initialised in head64.c */ | 196 | /* CPU 0 is initialised in head64.c */ |
197 | if (cpu != 0) { | 197 | if (cpu != 0) { |
198 | pda_init(cpu); | 198 | pda_init(cpu); |
199 | zap_low_mappings(cpu); | 199 | zap_low_mappings(cpu); |
200 | } else | 200 | } else |
201 | estacks = boot_exception_stacks; | 201 | estacks = boot_exception_stacks; |
202 | 202 | ||
203 | me = current; | 203 | me = current; |
204 | 204 | ||
205 | if (cpu_test_and_set(cpu, cpu_initialized)) | 205 | if (cpu_test_and_set(cpu, cpu_initialized)) |
206 | panic("CPU#%d already initialized!\n", cpu); | 206 | panic("CPU#%d already initialized!\n", cpu); |
207 | 207 | ||
208 | printk("Initializing CPU#%d\n", cpu); | 208 | printk("Initializing CPU#%d\n", cpu); |
209 | 209 | ||
210 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | 210 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); |
211 | 211 | ||
212 | /* | 212 | /* |
213 | * Initialize the per-CPU GDT with the boot GDT, | 213 | * Initialize the per-CPU GDT with the boot GDT, |
214 | * and set up the GDT descriptor: | 214 | * and set up the GDT descriptor: |
215 | */ | 215 | */ |
216 | if (cpu) | 216 | if (cpu) |
217 | memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE); | 217 | memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE); |
218 | 218 | ||
219 | cpu_gdt_descr[cpu].size = GDT_SIZE; | 219 | cpu_gdt_descr[cpu].size = GDT_SIZE; |
220 | asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); | 220 | asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); |
221 | asm volatile("lidt %0" :: "m" (idt_descr)); | 221 | asm volatile("lidt %0" :: "m" (idt_descr)); |
222 | 222 | ||
223 | memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); | 223 | memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); |
224 | syscall_init(); | 224 | syscall_init(); |
225 | 225 | ||
226 | wrmsrl(MSR_FS_BASE, 0); | 226 | wrmsrl(MSR_FS_BASE, 0); |
227 | wrmsrl(MSR_KERNEL_GS_BASE, 0); | 227 | wrmsrl(MSR_KERNEL_GS_BASE, 0); |
228 | barrier(); | 228 | barrier(); |
229 | 229 | ||
230 | check_efer(); | 230 | check_efer(); |
231 | 231 | ||
232 | /* | 232 | /* |
233 | * set up and load the per-CPU TSS | 233 | * set up and load the per-CPU TSS |
234 | */ | 234 | */ |
235 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { | 235 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { |
236 | if (cpu) { | 236 | if (cpu) { |
237 | static const unsigned int order[N_EXCEPTION_STACKS] = { | 237 | static const unsigned int order[N_EXCEPTION_STACKS] = { |
238 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, | 238 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, |
239 | [DEBUG_STACK - 1] = DEBUG_STACK_ORDER | 239 | [DEBUG_STACK - 1] = DEBUG_STACK_ORDER |
240 | }; | 240 | }; |
241 | 241 | ||
242 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); | 242 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); |
243 | if (!estacks) | 243 | if (!estacks) |
244 | panic("Cannot allocate exception stack %ld %d\n", | 244 | panic("Cannot allocate exception stack %ld %d\n", |
245 | v, cpu); | 245 | v, cpu); |
246 | } | 246 | } |
247 | switch (v + 1) { | 247 | switch (v + 1) { |
248 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 248 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
249 | case DEBUG_STACK: | 249 | case DEBUG_STACK: |
250 | cpu_pda[cpu].debugstack = (unsigned long)estacks; | 250 | cpu_pda[cpu].debugstack = (unsigned long)estacks; |
251 | estacks += DEBUG_STKSZ; | 251 | estacks += DEBUG_STKSZ; |
252 | break; | 252 | break; |
253 | #endif | 253 | #endif |
254 | default: | 254 | default: |
255 | estacks += EXCEPTION_STKSZ; | 255 | estacks += EXCEPTION_STKSZ; |
256 | break; | 256 | break; |
257 | } | 257 | } |
258 | t->ist[v] = (unsigned long)estacks; | 258 | t->ist[v] = (unsigned long)estacks; |
259 | } | 259 | } |
260 | 260 | ||
261 | t->io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | 261 | t->io_bitmap_base = offsetof(struct tss_struct, io_bitmap); |
262 | /* | 262 | /* |
263 | * <= is required because the CPU will access up to | 263 | * <= is required because the CPU will access up to |
264 | * 8 bits beyond the end of the IO permission bitmap. | 264 | * 8 bits beyond the end of the IO permission bitmap. |
265 | */ | 265 | */ |
266 | for (i = 0; i <= IO_BITMAP_LONGS; i++) | 266 | for (i = 0; i <= IO_BITMAP_LONGS; i++) |
267 | t->io_bitmap[i] = ~0UL; | 267 | t->io_bitmap[i] = ~0UL; |
268 | 268 | ||
269 | atomic_inc(&init_mm.mm_count); | 269 | atomic_inc(&init_mm.mm_count); |
270 | me->active_mm = &init_mm; | 270 | me->active_mm = &init_mm; |
271 | if (me->mm) | 271 | if (me->mm) |
272 | BUG(); | 272 | BUG(); |
273 | enter_lazy_tlb(&init_mm, me); | 273 | enter_lazy_tlb(&init_mm, me); |
274 | 274 | ||
275 | set_tss_desc(cpu, t); | 275 | set_tss_desc(cpu, t); |
276 | load_TR_desc(); | 276 | load_TR_desc(); |
277 | load_LDT(&init_mm.context); | 277 | load_LDT(&init_mm.context); |
278 | 278 | ||
279 | /* | 279 | /* |
280 | * Clear all 6 debug registers: | 280 | * Clear all 6 debug registers: |
281 | */ | 281 | */ |
282 | 282 | ||
283 | set_debug(0UL, 0); | 283 | set_debug(0UL, 0); |
284 | set_debug(0UL, 1); | 284 | set_debug(0UL, 1); |
285 | set_debug(0UL, 2); | 285 | set_debug(0UL, 2); |
286 | set_debug(0UL, 3); | 286 | set_debug(0UL, 3); |
287 | set_debug(0UL, 6); | 287 | set_debug(0UL, 6); |
288 | set_debug(0UL, 7); | 288 | set_debug(0UL, 7); |
289 | 289 | ||
290 | fpu_init(); | 290 | fpu_init(); |
291 | } | 291 | } |
292 | 292 |
arch/x86_64/kernel/smpboot.c
1 | /* | 1 | /* |
2 | * x86 SMP booting functions | 2 | * x86 SMP booting functions |
3 | * | 3 | * |
4 | * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> | 4 | * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> |
5 | * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> | 5 | * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> |
6 | * Copyright 2001 Andi Kleen, SuSE Labs. | 6 | * Copyright 2001 Andi Kleen, SuSE Labs. |
7 | * | 7 | * |
8 | * Much of the core SMP work is based on previous work by Thomas Radke, to | 8 | * Much of the core SMP work is based on previous work by Thomas Radke, to |
9 | * whom a great many thanks are extended. | 9 | * whom a great many thanks are extended. |
10 | * | 10 | * |
11 | * Thanks to Intel for making available several different Pentium, | 11 | * Thanks to Intel for making available several different Pentium, |
12 | * Pentium Pro and Pentium-II/Xeon MP machines. | 12 | * Pentium Pro and Pentium-II/Xeon MP machines. |
13 | * Original development of Linux SMP code supported by Caldera. | 13 | * Original development of Linux SMP code supported by Caldera. |
14 | * | 14 | * |
15 | * This code is released under the GNU General Public License version 2 | 15 | * This code is released under the GNU General Public License version 2 |
16 | * | 16 | * |
17 | * Fixes | 17 | * Fixes |
18 | * Felix Koop : NR_CPUS used properly | 18 | * Felix Koop : NR_CPUS used properly |
19 | * Jose Renau : Handle single CPU case. | 19 | * Jose Renau : Handle single CPU case. |
20 | * Alan Cox : By repeated request 8) - Total BogoMIP report. | 20 | * Alan Cox : By repeated request 8) - Total BogoMIP report. |
21 | * Greg Wright : Fix for kernel stacks panic. | 21 | * Greg Wright : Fix for kernel stacks panic. |
22 | * Erich Boleyn : MP v1.4 and additional changes. | 22 | * Erich Boleyn : MP v1.4 and additional changes. |
23 | * Matthias Sattler : Changes for 2.1 kernel map. | 23 | * Matthias Sattler : Changes for 2.1 kernel map. |
24 | * Michel Lespinasse : Changes for 2.1 kernel map. | 24 | * Michel Lespinasse : Changes for 2.1 kernel map. |
25 | * Michael Chastain : Change trampoline.S to gnu as. | 25 | * Michael Chastain : Change trampoline.S to gnu as. |
26 | * Alan Cox : Dumb bug: 'B' step PPro's are fine | 26 | * Alan Cox : Dumb bug: 'B' step PPro's are fine |
27 | * Ingo Molnar : Added APIC timers, based on code | 27 | * Ingo Molnar : Added APIC timers, based on code |
28 | * from Jose Renau | 28 | * from Jose Renau |
29 | * Ingo Molnar : various cleanups and rewrites | 29 | * Ingo Molnar : various cleanups and rewrites |
30 | * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. | 30 | * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. |
31 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs | 31 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs |
32 | * Andi Kleen : Changed for SMP boot into long mode. | 32 | * Andi Kleen : Changed for SMP boot into long mode. |
33 | * Rusty Russell : Hacked into shape for new "hotplug" boot process. | 33 | * Rusty Russell : Hacked into shape for new "hotplug" boot process. |
34 | * Andi Kleen : Converted to new state machine. | 34 | * Andi Kleen : Converted to new state machine. |
35 | * Various cleanups. | 35 | * Various cleanups. |
36 | * Probably mostly hotplug CPU ready now. | 36 | * Probably mostly hotplug CPU ready now. |
37 | * Ashok Raj : CPU hotplug support | 37 | * Ashok Raj : CPU hotplug support |
38 | */ | 38 | */ |
39 | 39 | ||
40 | 40 | ||
41 | #include <linux/config.h> | 41 | #include <linux/config.h> |
42 | #include <linux/init.h> | 42 | #include <linux/init.h> |
43 | 43 | ||
44 | #include <linux/mm.h> | 44 | #include <linux/mm.h> |
45 | #include <linux/kernel_stat.h> | 45 | #include <linux/kernel_stat.h> |
46 | #include <linux/smp_lock.h> | 46 | #include <linux/smp_lock.h> |
47 | #include <linux/bootmem.h> | 47 | #include <linux/bootmem.h> |
48 | #include <linux/thread_info.h> | 48 | #include <linux/thread_info.h> |
49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
50 | 50 | ||
51 | #include <linux/delay.h> | 51 | #include <linux/delay.h> |
52 | #include <linux/mc146818rtc.h> | 52 | #include <linux/mc146818rtc.h> |
53 | #include <asm/mtrr.h> | 53 | #include <asm/mtrr.h> |
54 | #include <asm/pgalloc.h> | 54 | #include <asm/pgalloc.h> |
55 | #include <asm/desc.h> | 55 | #include <asm/desc.h> |
56 | #include <asm/kdebug.h> | 56 | #include <asm/kdebug.h> |
57 | #include <asm/tlbflush.h> | 57 | #include <asm/tlbflush.h> |
58 | #include <asm/proto.h> | 58 | #include <asm/proto.h> |
59 | #include <asm/nmi.h> | 59 | #include <asm/nmi.h> |
60 | #include <asm/irq.h> | 60 | #include <asm/irq.h> |
61 | #include <asm/hw_irq.h> | 61 | #include <asm/hw_irq.h> |
62 | 62 | ||
63 | /* Number of siblings per CPU package */ | 63 | /* Number of siblings per CPU package */ |
64 | int smp_num_siblings = 1; | 64 | int smp_num_siblings = 1; |
65 | /* Package ID of each logical CPU */ | 65 | /* Package ID of each logical CPU */ |
66 | u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | 66 | u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; |
67 | /* core ID of each logical CPU */ | 67 | /* core ID of each logical CPU */ |
68 | u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | 68 | u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; |
69 | 69 | ||
70 | /* Bitmask of currently online CPUs */ | 70 | /* Bitmask of currently online CPUs */ |
71 | cpumask_t cpu_online_map __read_mostly; | 71 | cpumask_t cpu_online_map __read_mostly; |
72 | 72 | ||
73 | EXPORT_SYMBOL(cpu_online_map); | 73 | EXPORT_SYMBOL(cpu_online_map); |
74 | 74 | ||
75 | /* | 75 | /* |
76 | * Private maps to synchronize booting between AP and BP. | 76 | * Private maps to synchronize booting between AP and BP. |
77 | * Probably not needed anymore, but it makes for easier debugging. -AK | 77 | * Probably not needed anymore, but it makes for easier debugging. -AK |
78 | */ | 78 | */ |
79 | cpumask_t cpu_callin_map; | 79 | cpumask_t cpu_callin_map; |
80 | cpumask_t cpu_callout_map; | 80 | cpumask_t cpu_callout_map; |
81 | 81 | ||
82 | cpumask_t cpu_possible_map; | 82 | cpumask_t cpu_possible_map; |
83 | EXPORT_SYMBOL(cpu_possible_map); | 83 | EXPORT_SYMBOL(cpu_possible_map); |
84 | 84 | ||
85 | /* Per CPU bogomips and other parameters */ | 85 | /* Per CPU bogomips and other parameters */ |
86 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | 86 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; |
87 | 87 | ||
88 | /* Set when the idlers are all forked */ | 88 | /* Set when the idlers are all forked */ |
89 | int smp_threads_ready; | 89 | int smp_threads_ready; |
90 | 90 | ||
91 | /* representing HT siblings of each logical CPU */ | 91 | /* representing HT siblings of each logical CPU */ |
92 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; | 92 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; |
93 | 93 | ||
94 | /* representing HT and core siblings of each logical CPU */ | 94 | /* representing HT and core siblings of each logical CPU */ |
95 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; | 95 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; |
96 | EXPORT_SYMBOL(cpu_core_map); | 96 | EXPORT_SYMBOL(cpu_core_map); |
97 | 97 | ||
98 | /* | 98 | /* |
99 | * Trampoline 80x86 program as an array. | 99 | * Trampoline 80x86 program as an array. |
100 | */ | 100 | */ |
101 | 101 | ||
102 | extern unsigned char trampoline_data[]; | 102 | extern unsigned char trampoline_data[]; |
103 | extern unsigned char trampoline_end[]; | 103 | extern unsigned char trampoline_end[]; |
104 | 104 | ||
105 | /* State of each CPU */ | 105 | /* State of each CPU */ |
106 | DEFINE_PER_CPU(int, cpu_state) = { 0 }; | 106 | DEFINE_PER_CPU(int, cpu_state) = { 0 }; |
107 | 107 | ||
108 | /* | 108 | /* |
109 | * Store all idle threads, this can be reused instead of creating | 109 | * Store all idle threads, this can be reused instead of creating |
110 | * a new thread. Also avoids complicated thread destroy functionality | 110 | * a new thread. Also avoids complicated thread destroy functionality |
111 | * for idle threads. | 111 | * for idle threads. |
112 | */ | 112 | */ |
113 | struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; | 113 | struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; |
114 | 114 | ||
115 | #define get_idle_for_cpu(x) (idle_thread_array[(x)]) | 115 | #define get_idle_for_cpu(x) (idle_thread_array[(x)]) |
116 | #define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p)) | 116 | #define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p)) |
117 | 117 | ||
118 | /* | 118 | /* |
119 | * Currently trivial. Write the real->protected mode | 119 | * Currently trivial. Write the real->protected mode |
120 | * bootstrap into the page concerned. The caller | 120 | * bootstrap into the page concerned. The caller |
121 | * has made sure it's suitably aligned. | 121 | * has made sure it's suitably aligned. |
122 | */ | 122 | */ |
123 | 123 | ||
124 | static unsigned long __cpuinit setup_trampoline(void) | 124 | static unsigned long __cpuinit setup_trampoline(void) |
125 | { | 125 | { |
126 | void *tramp = __va(SMP_TRAMPOLINE_BASE); | 126 | void *tramp = __va(SMP_TRAMPOLINE_BASE); |
127 | memcpy(tramp, trampoline_data, trampoline_end - trampoline_data); | 127 | memcpy(tramp, trampoline_data, trampoline_end - trampoline_data); |
128 | return virt_to_phys(tramp); | 128 | return virt_to_phys(tramp); |
129 | } | 129 | } |
130 | 130 | ||
131 | /* | 131 | /* |
132 | * The bootstrap kernel entry code has set these up. Save them for | 132 | * The bootstrap kernel entry code has set these up. Save them for |
133 | * a given CPU | 133 | * a given CPU |
134 | */ | 134 | */ |
135 | 135 | ||
136 | static void __cpuinit smp_store_cpu_info(int id) | 136 | static void __cpuinit smp_store_cpu_info(int id) |
137 | { | 137 | { |
138 | struct cpuinfo_x86 *c = cpu_data + id; | 138 | struct cpuinfo_x86 *c = cpu_data + id; |
139 | 139 | ||
140 | *c = boot_cpu_data; | 140 | *c = boot_cpu_data; |
141 | identify_cpu(c); | 141 | identify_cpu(c); |
142 | print_cpu_info(c); | 142 | print_cpu_info(c); |
143 | } | 143 | } |
144 | 144 | ||
145 | /* | 145 | /* |
146 | * New Funky TSC sync algorithm borrowed from IA64. | 146 | * New Funky TSC sync algorithm borrowed from IA64. |
147 | * Main advantage is that it doesn't reset the TSCs fully and | 147 | * Main advantage is that it doesn't reset the TSCs fully and |
148 | * in general looks more robust and it works better than my earlier | 148 | * in general looks more robust and it works better than my earlier |
149 | * attempts. I believe it was written by David Mosberger. Some minor | 149 | * attempts. I believe it was written by David Mosberger. Some minor |
150 | * adjustments for x86-64 by me -AK | 150 | * adjustments for x86-64 by me -AK |
151 | * | 151 | * |
152 | * Original comment reproduced below. | 152 | * Original comment reproduced below. |
153 | * | 153 | * |
154 | * Synchronize TSC of the current (slave) CPU with the TSC of the | 154 | * Synchronize TSC of the current (slave) CPU with the TSC of the |
155 | * MASTER CPU (normally the time-keeper CPU). We use a closed loop to | 155 | * MASTER CPU (normally the time-keeper CPU). We use a closed loop to |
156 | * eliminate the possibility of unaccounted-for errors (such as | 156 | * eliminate the possibility of unaccounted-for errors (such as |
157 | * getting a machine check in the middle of a calibration step). The | 157 | * getting a machine check in the middle of a calibration step). The |
158 | * basic idea is for the slave to ask the master what itc value it has | 158 | * basic idea is for the slave to ask the master what itc value it has |
159 | * and to read its own itc before and after the master responds. Each | 159 | * and to read its own itc before and after the master responds. Each |
160 | * iteration gives us three timestamps: | 160 | * iteration gives us three timestamps: |
161 | * | 161 | * |
162 | * slave master | 162 | * slave master |
163 | * | 163 | * |
164 | * t0 ---\ | 164 | * t0 ---\ |
165 | * ---\ | 165 | * ---\ |
166 | * ---> | 166 | * ---> |
167 | * tm | 167 | * tm |
168 | * /--- | 168 | * /--- |
169 | * /--- | 169 | * /--- |
170 | * t1 <--- | 170 | * t1 <--- |
171 | * | 171 | * |
172 | * | 172 | * |
173 | * The goal is to adjust the slave's TSC such that tm falls exactly | 173 | * The goal is to adjust the slave's TSC such that tm falls exactly |
174 | * half-way between t0 and t1. If we achieve this, the clocks are | 174 | * half-way between t0 and t1. If we achieve this, the clocks are |
175 | * synchronized provided the interconnect between the slave and the | 175 | * synchronized provided the interconnect between the slave and the |
176 | * master is symmetric. Even if the interconnect were asymmetric, we | 176 | * master is symmetric. Even if the interconnect were asymmetric, we |
177 | * would still know that the synchronization error is smaller than the | 177 | * would still know that the synchronization error is smaller than the |
178 | * roundtrip latency (t0 - t1). | 178 | * roundtrip latency (t0 - t1). |
179 | * | 179 | * |
180 | * When the interconnect is quiet and symmetric, this lets us | 180 | * When the interconnect is quiet and symmetric, this lets us |
181 | * synchronize the TSC to within one or two cycles. However, we can | 181 | * synchronize the TSC to within one or two cycles. However, we can |
182 | * only *guarantee* that the synchronization is accurate to within a | 182 | * only *guarantee* that the synchronization is accurate to within a |
183 | * round-trip time, which is typically in the range of several hundred | 183 | * round-trip time, which is typically in the range of several hundred |
184 | * cycles (e.g., ~500 cycles). In practice, this means that the TSCs | 184 | * cycles (e.g., ~500 cycles). In practice, this means that the TSCs |
185 | * are usually almost perfectly synchronized, but we shouldn't assume | 185 | * are usually almost perfectly synchronized, but we shouldn't assume |
186 | * that the accuracy is much better than half a micro second or so. | 186 | * that the accuracy is much better than half a micro second or so. |
187 | * | 187 | * |
188 | * [there are other errors like the latency of RDTSC and of the | 188 | * [there are other errors like the latency of RDTSC and of the |
189 | * WRMSR. These can also account to hundreds of cycles. So it's | 189 | * WRMSR. These can also account to hundreds of cycles. So it's |
190 | * probably worse. It claims 153 cycles error on a dual Opteron, | 190 | * probably worse. It claims 153 cycles error on a dual Opteron, |
191 | * but I suspect the numbers are actually somewhat worse -AK] | 191 | * but I suspect the numbers are actually somewhat worse -AK] |
192 | */ | 192 | */ |
193 | 193 | ||
194 | #define MASTER 0 | 194 | #define MASTER 0 |
195 | #define SLAVE (SMP_CACHE_BYTES/8) | 195 | #define SLAVE (SMP_CACHE_BYTES/8) |
196 | 196 | ||
197 | /* Intentionally don't use cpu_relax() while TSC synchronization | 197 | /* Intentionally don't use cpu_relax() while TSC synchronization |
198 | because we don't want to go into funky power save modi or cause | 198 | because we don't want to go into funky power save modi or cause |
199 | hypervisors to schedule us away. Going to sleep would likely affect | 199 | hypervisors to schedule us away. Going to sleep would likely affect |
200 | latency and low latency is the primary objective here. -AK */ | 200 | latency and low latency is the primary objective here. -AK */ |
201 | #define no_cpu_relax() barrier() | 201 | #define no_cpu_relax() barrier() |
202 | 202 | ||
203 | static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); | 203 | static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); |
204 | static volatile __cpuinitdata unsigned long go[SLAVE + 1]; | 204 | static volatile __cpuinitdata unsigned long go[SLAVE + 1]; |
205 | static int notscsync __cpuinitdata; | 205 | static int notscsync __cpuinitdata; |
206 | 206 | ||
207 | #undef DEBUG_TSC_SYNC | 207 | #undef DEBUG_TSC_SYNC |
208 | 208 | ||
209 | #define NUM_ROUNDS 64 /* magic value */ | 209 | #define NUM_ROUNDS 64 /* magic value */ |
210 | #define NUM_ITERS 5 /* likewise */ | 210 | #define NUM_ITERS 5 /* likewise */ |
211 | 211 | ||
212 | /* Callback on boot CPU */ | 212 | /* Callback on boot CPU */ |
213 | static __cpuinit void sync_master(void *arg) | 213 | static __cpuinit void sync_master(void *arg) |
214 | { | 214 | { |
215 | unsigned long flags, i; | 215 | unsigned long flags, i; |
216 | 216 | ||
217 | go[MASTER] = 0; | 217 | go[MASTER] = 0; |
218 | 218 | ||
219 | local_irq_save(flags); | 219 | local_irq_save(flags); |
220 | { | 220 | { |
221 | for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { | 221 | for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { |
222 | while (!go[MASTER]) | 222 | while (!go[MASTER]) |
223 | no_cpu_relax(); | 223 | no_cpu_relax(); |
224 | go[MASTER] = 0; | 224 | go[MASTER] = 0; |
225 | rdtscll(go[SLAVE]); | 225 | rdtscll(go[SLAVE]); |
226 | } | 226 | } |
227 | } | 227 | } |
228 | local_irq_restore(flags); | 228 | local_irq_restore(flags); |
229 | } | 229 | } |
230 | 230 | ||
231 | /* | 231 | /* |
232 | * Return the number of cycles by which our tsc differs from the tsc | 232 | * Return the number of cycles by which our tsc differs from the tsc |
233 | * on the master (time-keeper) CPU. A positive number indicates our | 233 | * on the master (time-keeper) CPU. A positive number indicates our |
234 | * tsc is ahead of the master, negative that it is behind. | 234 | * tsc is ahead of the master, negative that it is behind. |
235 | */ | 235 | */ |
236 | static inline long | 236 | static inline long |
237 | get_delta(long *rt, long *master) | 237 | get_delta(long *rt, long *master) |
238 | { | 238 | { |
239 | unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; | 239 | unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; |
240 | unsigned long tcenter, t0, t1, tm; | 240 | unsigned long tcenter, t0, t1, tm; |
241 | int i; | 241 | int i; |
242 | 242 | ||
243 | for (i = 0; i < NUM_ITERS; ++i) { | 243 | for (i = 0; i < NUM_ITERS; ++i) { |
244 | rdtscll(t0); | 244 | rdtscll(t0); |
245 | go[MASTER] = 1; | 245 | go[MASTER] = 1; |
246 | while (!(tm = go[SLAVE])) | 246 | while (!(tm = go[SLAVE])) |
247 | no_cpu_relax(); | 247 | no_cpu_relax(); |
248 | go[SLAVE] = 0; | 248 | go[SLAVE] = 0; |
249 | rdtscll(t1); | 249 | rdtscll(t1); |
250 | 250 | ||
251 | if (t1 - t0 < best_t1 - best_t0) | 251 | if (t1 - t0 < best_t1 - best_t0) |
252 | best_t0 = t0, best_t1 = t1, best_tm = tm; | 252 | best_t0 = t0, best_t1 = t1, best_tm = tm; |
253 | } | 253 | } |
254 | 254 | ||
255 | *rt = best_t1 - best_t0; | 255 | *rt = best_t1 - best_t0; |
256 | *master = best_tm - best_t0; | 256 | *master = best_tm - best_t0; |
257 | 257 | ||
258 | /* average best_t0 and best_t1 without overflow: */ | 258 | /* average best_t0 and best_t1 without overflow: */ |
259 | tcenter = (best_t0/2 + best_t1/2); | 259 | tcenter = (best_t0/2 + best_t1/2); |
260 | if (best_t0 % 2 + best_t1 % 2 == 2) | 260 | if (best_t0 % 2 + best_t1 % 2 == 2) |
261 | ++tcenter; | 261 | ++tcenter; |
262 | return tcenter - best_tm; | 262 | return tcenter - best_tm; |
263 | } | 263 | } |
264 | 264 | ||
265 | static __cpuinit void sync_tsc(unsigned int master) | 265 | static __cpuinit void sync_tsc(unsigned int master) |
266 | { | 266 | { |
267 | int i, done = 0; | 267 | int i, done = 0; |
268 | long delta, adj, adjust_latency = 0; | 268 | long delta, adj, adjust_latency = 0; |
269 | unsigned long flags, rt, master_time_stamp, bound; | 269 | unsigned long flags, rt, master_time_stamp, bound; |
270 | #ifdef DEBUG_TSC_SYNC | 270 | #ifdef DEBUG_TSC_SYNC |
271 | static struct syncdebug { | 271 | static struct syncdebug { |
272 | long rt; /* roundtrip time */ | 272 | long rt; /* roundtrip time */ |
273 | long master; /* master's timestamp */ | 273 | long master; /* master's timestamp */ |
274 | long diff; /* difference between midpoint and master's timestamp */ | 274 | long diff; /* difference between midpoint and master's timestamp */ |
275 | long lat; /* estimate of tsc adjustment latency */ | 275 | long lat; /* estimate of tsc adjustment latency */ |
276 | } t[NUM_ROUNDS] __cpuinitdata; | 276 | } t[NUM_ROUNDS] __cpuinitdata; |
277 | #endif | 277 | #endif |
278 | 278 | ||
279 | printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", | 279 | printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", |
280 | smp_processor_id(), master); | 280 | smp_processor_id(), master); |
281 | 281 | ||
282 | go[MASTER] = 1; | 282 | go[MASTER] = 1; |
283 | 283 | ||
284 | /* It is dangerous to broadcast IPI as cpus are coming up, | 284 | /* It is dangerous to broadcast IPI as cpus are coming up, |
285 | * as they may not be ready to accept them. So since | 285 | * as they may not be ready to accept them. So since |
286 | * we only need to send the ipi to the boot cpu direct | 286 | * we only need to send the ipi to the boot cpu direct |
287 | * the message, and avoid the race. | 287 | * the message, and avoid the race. |
288 | */ | 288 | */ |
289 | smp_call_function_single(master, sync_master, NULL, 1, 0); | 289 | smp_call_function_single(master, sync_master, NULL, 1, 0); |
290 | 290 | ||
291 | while (go[MASTER]) /* wait for master to be ready */ | 291 | while (go[MASTER]) /* wait for master to be ready */ |
292 | no_cpu_relax(); | 292 | no_cpu_relax(); |
293 | 293 | ||
294 | spin_lock_irqsave(&tsc_sync_lock, flags); | 294 | spin_lock_irqsave(&tsc_sync_lock, flags); |
295 | { | 295 | { |
296 | for (i = 0; i < NUM_ROUNDS; ++i) { | 296 | for (i = 0; i < NUM_ROUNDS; ++i) { |
297 | delta = get_delta(&rt, &master_time_stamp); | 297 | delta = get_delta(&rt, &master_time_stamp); |
298 | if (delta == 0) { | 298 | if (delta == 0) { |
299 | done = 1; /* let's lock on to this... */ | 299 | done = 1; /* let's lock on to this... */ |
300 | bound = rt; | 300 | bound = rt; |
301 | } | 301 | } |
302 | 302 | ||
303 | if (!done) { | 303 | if (!done) { |
304 | unsigned long t; | 304 | unsigned long t; |
305 | if (i > 0) { | 305 | if (i > 0) { |
306 | adjust_latency += -delta; | 306 | adjust_latency += -delta; |
307 | adj = -delta + adjust_latency/4; | 307 | adj = -delta + adjust_latency/4; |
308 | } else | 308 | } else |
309 | adj = -delta; | 309 | adj = -delta; |
310 | 310 | ||
311 | rdtscll(t); | 311 | rdtscll(t); |
312 | wrmsrl(MSR_IA32_TSC, t + adj); | 312 | wrmsrl(MSR_IA32_TSC, t + adj); |
313 | } | 313 | } |
314 | #ifdef DEBUG_TSC_SYNC | 314 | #ifdef DEBUG_TSC_SYNC |
315 | t[i].rt = rt; | 315 | t[i].rt = rt; |
316 | t[i].master = master_time_stamp; | 316 | t[i].master = master_time_stamp; |
317 | t[i].diff = delta; | 317 | t[i].diff = delta; |
318 | t[i].lat = adjust_latency/4; | 318 | t[i].lat = adjust_latency/4; |
319 | #endif | 319 | #endif |
320 | } | 320 | } |
321 | } | 321 | } |
322 | spin_unlock_irqrestore(&tsc_sync_lock, flags); | 322 | spin_unlock_irqrestore(&tsc_sync_lock, flags); |
323 | 323 | ||
324 | #ifdef DEBUG_TSC_SYNC | 324 | #ifdef DEBUG_TSC_SYNC |
325 | for (i = 0; i < NUM_ROUNDS; ++i) | 325 | for (i = 0; i < NUM_ROUNDS; ++i) |
326 | printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", | 326 | printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", |
327 | t[i].rt, t[i].master, t[i].diff, t[i].lat); | 327 | t[i].rt, t[i].master, t[i].diff, t[i].lat); |
328 | #endif | 328 | #endif |
329 | 329 | ||
330 | printk(KERN_INFO | 330 | printk(KERN_INFO |
331 | "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, " | 331 | "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, " |
332 | "maxerr %lu cycles)\n", | 332 | "maxerr %lu cycles)\n", |
333 | smp_processor_id(), master, delta, rt); | 333 | smp_processor_id(), master, delta, rt); |
334 | } | 334 | } |
335 | 335 | ||
336 | static void __cpuinit tsc_sync_wait(void) | 336 | static void __cpuinit tsc_sync_wait(void) |
337 | { | 337 | { |
338 | /* | 338 | /* |
339 | * When the CPU has synchronized TSCs assume the BIOS | 339 | * When the CPU has synchronized TSCs assume the BIOS |
340 | * or the hardware already synced. Otherwise we could | 340 | * or the hardware already synced. Otherwise we could |
341 | * mess up a possible perfect synchronization with a | 341 | * mess up a possible perfect synchronization with a |
342 | * not-quite-perfect algorithm. | 342 | * not-quite-perfect algorithm. |
343 | */ | 343 | */ |
344 | if (notscsync || !cpu_has_tsc || !unsynchronized_tsc()) | 344 | if (notscsync || !cpu_has_tsc || !unsynchronized_tsc()) |
345 | return; | 345 | return; |
346 | sync_tsc(0); | 346 | sync_tsc(0); |
347 | } | 347 | } |
348 | 348 | ||
349 | static __init int notscsync_setup(char *s) | 349 | static __init int notscsync_setup(char *s) |
350 | { | 350 | { |
351 | notscsync = 1; | 351 | notscsync = 1; |
352 | return 0; | 352 | return 0; |
353 | } | 353 | } |
354 | __setup("notscsync", notscsync_setup); | 354 | __setup("notscsync", notscsync_setup); |
355 | 355 | ||
356 | static atomic_t init_deasserted __cpuinitdata; | 356 | static atomic_t init_deasserted __cpuinitdata; |
357 | 357 | ||
358 | /* | 358 | /* |
359 | * Report back to the Boot Processor. | 359 | * Report back to the Boot Processor. |
360 | * Running on AP. | 360 | * Running on AP. |
361 | */ | 361 | */ |
362 | void __cpuinit smp_callin(void) | 362 | void __cpuinit smp_callin(void) |
363 | { | 363 | { |
364 | int cpuid, phys_id; | 364 | int cpuid, phys_id; |
365 | unsigned long timeout; | 365 | unsigned long timeout; |
366 | 366 | ||
367 | /* | 367 | /* |
368 | * If waken up by an INIT in an 82489DX configuration | 368 | * If waken up by an INIT in an 82489DX configuration |
369 | * we may get here before an INIT-deassert IPI reaches | 369 | * we may get here before an INIT-deassert IPI reaches |
370 | * our local APIC. We have to wait for the IPI or we'll | 370 | * our local APIC. We have to wait for the IPI or we'll |
371 | * lock up on an APIC access. | 371 | * lock up on an APIC access. |
372 | */ | 372 | */ |
373 | while (!atomic_read(&init_deasserted)) | 373 | while (!atomic_read(&init_deasserted)) |
374 | cpu_relax(); | 374 | cpu_relax(); |
375 | 375 | ||
376 | /* | 376 | /* |
377 | * (This works even if the APIC is not enabled.) | 377 | * (This works even if the APIC is not enabled.) |
378 | */ | 378 | */ |
379 | phys_id = GET_APIC_ID(apic_read(APIC_ID)); | 379 | phys_id = GET_APIC_ID(apic_read(APIC_ID)); |
380 | cpuid = smp_processor_id(); | 380 | cpuid = smp_processor_id(); |
381 | if (cpu_isset(cpuid, cpu_callin_map)) { | 381 | if (cpu_isset(cpuid, cpu_callin_map)) { |
382 | panic("smp_callin: phys CPU#%d, CPU#%d already present??\n", | 382 | panic("smp_callin: phys CPU#%d, CPU#%d already present??\n", |
383 | phys_id, cpuid); | 383 | phys_id, cpuid); |
384 | } | 384 | } |
385 | Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); | 385 | Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); |
386 | 386 | ||
387 | /* | 387 | /* |
388 | * STARTUP IPIs are fragile beasts as they might sometimes | 388 | * STARTUP IPIs are fragile beasts as they might sometimes |
389 | * trigger some glue motherboard logic. Complete APIC bus | 389 | * trigger some glue motherboard logic. Complete APIC bus |
390 | * silence for 1 second, this overestimates the time the | 390 | * silence for 1 second, this overestimates the time the |
391 | * boot CPU is spending to send the up to 2 STARTUP IPIs | 391 | * boot CPU is spending to send the up to 2 STARTUP IPIs |
392 | * by a factor of two. This should be enough. | 392 | * by a factor of two. This should be enough. |
393 | */ | 393 | */ |
394 | 394 | ||
395 | /* | 395 | /* |
396 | * Waiting 2s total for startup (udelay is not yet working) | 396 | * Waiting 2s total for startup (udelay is not yet working) |
397 | */ | 397 | */ |
398 | timeout = jiffies + 2*HZ; | 398 | timeout = jiffies + 2*HZ; |
399 | while (time_before(jiffies, timeout)) { | 399 | while (time_before(jiffies, timeout)) { |
400 | /* | 400 | /* |
401 | * Has the boot CPU finished it's STARTUP sequence? | 401 | * Has the boot CPU finished it's STARTUP sequence? |
402 | */ | 402 | */ |
403 | if (cpu_isset(cpuid, cpu_callout_map)) | 403 | if (cpu_isset(cpuid, cpu_callout_map)) |
404 | break; | 404 | break; |
405 | cpu_relax(); | 405 | cpu_relax(); |
406 | } | 406 | } |
407 | 407 | ||
408 | if (!time_before(jiffies, timeout)) { | 408 | if (!time_before(jiffies, timeout)) { |
409 | panic("smp_callin: CPU%d started up but did not get a callout!\n", | 409 | panic("smp_callin: CPU%d started up but did not get a callout!\n", |
410 | cpuid); | 410 | cpuid); |
411 | } | 411 | } |
412 | 412 | ||
413 | /* | 413 | /* |
414 | * the boot CPU has finished the init stage and is spinning | 414 | * the boot CPU has finished the init stage and is spinning |
415 | * on callin_map until we finish. We are free to set up this | 415 | * on callin_map until we finish. We are free to set up this |
416 | * CPU, first the APIC. (this is probably redundant on most | 416 | * CPU, first the APIC. (this is probably redundant on most |
417 | * boards) | 417 | * boards) |
418 | */ | 418 | */ |
419 | 419 | ||
420 | Dprintk("CALLIN, before setup_local_APIC().\n"); | 420 | Dprintk("CALLIN, before setup_local_APIC().\n"); |
421 | setup_local_APIC(); | 421 | setup_local_APIC(); |
422 | 422 | ||
423 | /* | 423 | /* |
424 | * Get our bogomips. | 424 | * Get our bogomips. |
425 | * | 425 | * |
426 | * Need to enable IRQs because it can take longer and then | 426 | * Need to enable IRQs because it can take longer and then |
427 | * the NMI watchdog might kill us. | 427 | * the NMI watchdog might kill us. |
428 | */ | 428 | */ |
429 | local_irq_enable(); | 429 | local_irq_enable(); |
430 | calibrate_delay(); | 430 | calibrate_delay(); |
431 | local_irq_disable(); | 431 | local_irq_disable(); |
432 | Dprintk("Stack at about %p\n",&cpuid); | 432 | Dprintk("Stack at about %p\n",&cpuid); |
433 | 433 | ||
434 | disable_APIC_timer(); | 434 | disable_APIC_timer(); |
435 | 435 | ||
436 | /* | 436 | /* |
437 | * Save our processor parameters | 437 | * Save our processor parameters |
438 | */ | 438 | */ |
439 | smp_store_cpu_info(cpuid); | 439 | smp_store_cpu_info(cpuid); |
440 | 440 | ||
441 | /* | 441 | /* |
442 | * Allow the master to continue. | 442 | * Allow the master to continue. |
443 | */ | 443 | */ |
444 | cpu_set(cpuid, cpu_callin_map); | 444 | cpu_set(cpuid, cpu_callin_map); |
445 | } | 445 | } |
446 | 446 | ||
447 | /* representing cpus for which sibling maps can be computed */ | 447 | /* representing cpus for which sibling maps can be computed */ |
448 | static cpumask_t cpu_sibling_setup_map; | 448 | static cpumask_t cpu_sibling_setup_map; |
449 | 449 | ||
450 | static inline void set_cpu_sibling_map(int cpu) | 450 | static inline void set_cpu_sibling_map(int cpu) |
451 | { | 451 | { |
452 | int i; | 452 | int i; |
453 | struct cpuinfo_x86 *c = cpu_data; | 453 | struct cpuinfo_x86 *c = cpu_data; |
454 | 454 | ||
455 | cpu_set(cpu, cpu_sibling_setup_map); | 455 | cpu_set(cpu, cpu_sibling_setup_map); |
456 | 456 | ||
457 | if (smp_num_siblings > 1) { | 457 | if (smp_num_siblings > 1) { |
458 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 458 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
459 | if (phys_proc_id[cpu] == phys_proc_id[i] && | 459 | if (phys_proc_id[cpu] == phys_proc_id[i] && |
460 | cpu_core_id[cpu] == cpu_core_id[i]) { | 460 | cpu_core_id[cpu] == cpu_core_id[i]) { |
461 | cpu_set(i, cpu_sibling_map[cpu]); | 461 | cpu_set(i, cpu_sibling_map[cpu]); |
462 | cpu_set(cpu, cpu_sibling_map[i]); | 462 | cpu_set(cpu, cpu_sibling_map[i]); |
463 | cpu_set(i, cpu_core_map[cpu]); | 463 | cpu_set(i, cpu_core_map[cpu]); |
464 | cpu_set(cpu, cpu_core_map[i]); | 464 | cpu_set(cpu, cpu_core_map[i]); |
465 | } | 465 | } |
466 | } | 466 | } |
467 | } else { | 467 | } else { |
468 | cpu_set(cpu, cpu_sibling_map[cpu]); | 468 | cpu_set(cpu, cpu_sibling_map[cpu]); |
469 | } | 469 | } |
470 | 470 | ||
471 | if (current_cpu_data.x86_max_cores == 1) { | 471 | if (current_cpu_data.x86_max_cores == 1) { |
472 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; | 472 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; |
473 | c[cpu].booted_cores = 1; | 473 | c[cpu].booted_cores = 1; |
474 | return; | 474 | return; |
475 | } | 475 | } |
476 | 476 | ||
477 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 477 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
478 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | 478 | if (phys_proc_id[cpu] == phys_proc_id[i]) { |
479 | cpu_set(i, cpu_core_map[cpu]); | 479 | cpu_set(i, cpu_core_map[cpu]); |
480 | cpu_set(cpu, cpu_core_map[i]); | 480 | cpu_set(cpu, cpu_core_map[i]); |
481 | /* | 481 | /* |
482 | * Does this new cpu bringup a new core? | 482 | * Does this new cpu bringup a new core? |
483 | */ | 483 | */ |
484 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) { | 484 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) { |
485 | /* | 485 | /* |
486 | * for each core in package, increment | 486 | * for each core in package, increment |
487 | * the booted_cores for this new cpu | 487 | * the booted_cores for this new cpu |
488 | */ | 488 | */ |
489 | if (first_cpu(cpu_sibling_map[i]) == i) | 489 | if (first_cpu(cpu_sibling_map[i]) == i) |
490 | c[cpu].booted_cores++; | 490 | c[cpu].booted_cores++; |
491 | /* | 491 | /* |
492 | * increment the core count for all | 492 | * increment the core count for all |
493 | * the other cpus in this package | 493 | * the other cpus in this package |
494 | */ | 494 | */ |
495 | if (i != cpu) | 495 | if (i != cpu) |
496 | c[i].booted_cores++; | 496 | c[i].booted_cores++; |
497 | } else if (i != cpu && !c[cpu].booted_cores) | 497 | } else if (i != cpu && !c[cpu].booted_cores) |
498 | c[cpu].booted_cores = c[i].booted_cores; | 498 | c[cpu].booted_cores = c[i].booted_cores; |
499 | } | 499 | } |
500 | } | 500 | } |
501 | } | 501 | } |
502 | 502 | ||
503 | /* | 503 | /* |
504 | * Setup code on secondary processor (after comming out of the trampoline) | 504 | * Setup code on secondary processor (after comming out of the trampoline) |
505 | */ | 505 | */ |
506 | void __cpuinit start_secondary(void) | 506 | void __cpuinit start_secondary(void) |
507 | { | 507 | { |
508 | /* | 508 | /* |
509 | * Dont put anything before smp_callin(), SMP | 509 | * Dont put anything before smp_callin(), SMP |
510 | * booting is too fragile that we want to limit the | 510 | * booting is too fragile that we want to limit the |
511 | * things done here to the most necessary things. | 511 | * things done here to the most necessary things. |
512 | */ | 512 | */ |
513 | cpu_init(); | 513 | cpu_init(); |
514 | preempt_disable(); | 514 | preempt_disable(); |
515 | smp_callin(); | 515 | smp_callin(); |
516 | 516 | ||
517 | /* otherwise gcc will move up the smp_processor_id before the cpu_init */ | 517 | /* otherwise gcc will move up the smp_processor_id before the cpu_init */ |
518 | barrier(); | 518 | barrier(); |
519 | 519 | ||
520 | Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); | 520 | Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); |
521 | setup_secondary_APIC_clock(); | 521 | setup_secondary_APIC_clock(); |
522 | 522 | ||
523 | Dprintk("cpu %d: enabling apic timer\n", smp_processor_id()); | 523 | Dprintk("cpu %d: enabling apic timer\n", smp_processor_id()); |
524 | 524 | ||
525 | if (nmi_watchdog == NMI_IO_APIC) { | 525 | if (nmi_watchdog == NMI_IO_APIC) { |
526 | disable_8259A_irq(0); | 526 | disable_8259A_irq(0); |
527 | enable_NMI_through_LVT0(NULL); | 527 | enable_NMI_through_LVT0(NULL); |
528 | enable_8259A_irq(0); | 528 | enable_8259A_irq(0); |
529 | } | 529 | } |
530 | 530 | ||
531 | enable_APIC_timer(); | 531 | enable_APIC_timer(); |
532 | 532 | ||
533 | /* | 533 | /* |
534 | * The sibling maps must be set before turing the online map on for | 534 | * The sibling maps must be set before turing the online map on for |
535 | * this cpu | 535 | * this cpu |
536 | */ | 536 | */ |
537 | set_cpu_sibling_map(smp_processor_id()); | 537 | set_cpu_sibling_map(smp_processor_id()); |
538 | 538 | ||
539 | /* | 539 | /* |
540 | * Wait for TSC sync to not schedule things before. | 540 | * Wait for TSC sync to not schedule things before. |
541 | * We still process interrupts, which could see an inconsistent | 541 | * We still process interrupts, which could see an inconsistent |
542 | * time in that window unfortunately. | 542 | * time in that window unfortunately. |
543 | * Do this here because TSC sync has global unprotected state. | 543 | * Do this here because TSC sync has global unprotected state. |
544 | */ | 544 | */ |
545 | tsc_sync_wait(); | 545 | tsc_sync_wait(); |
546 | 546 | ||
547 | /* | 547 | /* |
548 | * We need to hold call_lock, so there is no inconsistency | 548 | * We need to hold call_lock, so there is no inconsistency |
549 | * between the time smp_call_function() determines number of | 549 | * between the time smp_call_function() determines number of |
550 | * IPI receipients, and the time when the determination is made | 550 | * IPI receipients, and the time when the determination is made |
551 | * for which cpus receive the IPI in genapic_flat.c. Holding this | 551 | * for which cpus receive the IPI in genapic_flat.c. Holding this |
552 | * lock helps us to not include this cpu in a currently in progress | 552 | * lock helps us to not include this cpu in a currently in progress |
553 | * smp_call_function(). | 553 | * smp_call_function(). |
554 | */ | 554 | */ |
555 | lock_ipi_call_lock(); | 555 | lock_ipi_call_lock(); |
556 | 556 | ||
557 | /* | 557 | /* |
558 | * Allow the master to continue. | 558 | * Allow the master to continue. |
559 | */ | 559 | */ |
560 | cpu_set(smp_processor_id(), cpu_online_map); | 560 | cpu_set(smp_processor_id(), cpu_online_map); |
561 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 561 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; |
562 | unlock_ipi_call_lock(); | 562 | unlock_ipi_call_lock(); |
563 | 563 | ||
564 | cpu_idle(); | 564 | cpu_idle(); |
565 | } | 565 | } |
566 | 566 | ||
567 | extern volatile unsigned long init_rsp; | 567 | extern volatile unsigned long init_rsp; |
568 | extern void (*initial_code)(void); | 568 | extern void (*initial_code)(void); |
569 | 569 | ||
570 | #ifdef APIC_DEBUG | 570 | #ifdef APIC_DEBUG |
571 | static void inquire_remote_apic(int apicid) | 571 | static void inquire_remote_apic(int apicid) |
572 | { | 572 | { |
573 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; | 573 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; |
574 | char *names[] = { "ID", "VERSION", "SPIV" }; | 574 | char *names[] = { "ID", "VERSION", "SPIV" }; |
575 | int timeout, status; | 575 | int timeout, status; |
576 | 576 | ||
577 | printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); | 577 | printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); |
578 | 578 | ||
579 | for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { | 579 | for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { |
580 | printk("... APIC #%d %s: ", apicid, names[i]); | 580 | printk("... APIC #%d %s: ", apicid, names[i]); |
581 | 581 | ||
582 | /* | 582 | /* |
583 | * Wait for idle. | 583 | * Wait for idle. |
584 | */ | 584 | */ |
585 | apic_wait_icr_idle(); | 585 | apic_wait_icr_idle(); |
586 | 586 | ||
587 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); | 587 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); |
588 | apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); | 588 | apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); |
589 | 589 | ||
590 | timeout = 0; | 590 | timeout = 0; |
591 | do { | 591 | do { |
592 | udelay(100); | 592 | udelay(100); |
593 | status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; | 593 | status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; |
594 | } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); | 594 | } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); |
595 | 595 | ||
596 | switch (status) { | 596 | switch (status) { |
597 | case APIC_ICR_RR_VALID: | 597 | case APIC_ICR_RR_VALID: |
598 | status = apic_read(APIC_RRR); | 598 | status = apic_read(APIC_RRR); |
599 | printk("%08x\n", status); | 599 | printk("%08x\n", status); |
600 | break; | 600 | break; |
601 | default: | 601 | default: |
602 | printk("failed\n"); | 602 | printk("failed\n"); |
603 | } | 603 | } |
604 | } | 604 | } |
605 | } | 605 | } |
606 | #endif | 606 | #endif |
607 | 607 | ||
608 | /* | 608 | /* |
609 | * Kick the secondary to wake up. | 609 | * Kick the secondary to wake up. |
610 | */ | 610 | */ |
611 | static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip) | 611 | static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip) |
612 | { | 612 | { |
613 | unsigned long send_status = 0, accept_status = 0; | 613 | unsigned long send_status = 0, accept_status = 0; |
614 | int maxlvt, timeout, num_starts, j; | 614 | int maxlvt, timeout, num_starts, j; |
615 | 615 | ||
616 | Dprintk("Asserting INIT.\n"); | 616 | Dprintk("Asserting INIT.\n"); |
617 | 617 | ||
618 | /* | 618 | /* |
619 | * Turn INIT on target chip | 619 | * Turn INIT on target chip |
620 | */ | 620 | */ |
621 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | 621 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); |
622 | 622 | ||
623 | /* | 623 | /* |
624 | * Send IPI | 624 | * Send IPI |
625 | */ | 625 | */ |
626 | apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT | 626 | apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT |
627 | | APIC_DM_INIT); | 627 | | APIC_DM_INIT); |
628 | 628 | ||
629 | Dprintk("Waiting for send to finish...\n"); | 629 | Dprintk("Waiting for send to finish...\n"); |
630 | timeout = 0; | 630 | timeout = 0; |
631 | do { | 631 | do { |
632 | Dprintk("+"); | 632 | Dprintk("+"); |
633 | udelay(100); | 633 | udelay(100); |
634 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | 634 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; |
635 | } while (send_status && (timeout++ < 1000)); | 635 | } while (send_status && (timeout++ < 1000)); |
636 | 636 | ||
637 | mdelay(10); | 637 | mdelay(10); |
638 | 638 | ||
639 | Dprintk("Deasserting INIT.\n"); | 639 | Dprintk("Deasserting INIT.\n"); |
640 | 640 | ||
641 | /* Target chip */ | 641 | /* Target chip */ |
642 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | 642 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); |
643 | 643 | ||
644 | /* Send IPI */ | 644 | /* Send IPI */ |
645 | apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); | 645 | apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); |
646 | 646 | ||
647 | Dprintk("Waiting for send to finish...\n"); | 647 | Dprintk("Waiting for send to finish...\n"); |
648 | timeout = 0; | 648 | timeout = 0; |
649 | do { | 649 | do { |
650 | Dprintk("+"); | 650 | Dprintk("+"); |
651 | udelay(100); | 651 | udelay(100); |
652 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | 652 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; |
653 | } while (send_status && (timeout++ < 1000)); | 653 | } while (send_status && (timeout++ < 1000)); |
654 | 654 | ||
655 | mb(); | 655 | mb(); |
656 | atomic_set(&init_deasserted, 1); | 656 | atomic_set(&init_deasserted, 1); |
657 | 657 | ||
658 | num_starts = 2; | 658 | num_starts = 2; |
659 | 659 | ||
660 | /* | 660 | /* |
661 | * Run STARTUP IPI loop. | 661 | * Run STARTUP IPI loop. |
662 | */ | 662 | */ |
663 | Dprintk("#startup loops: %d.\n", num_starts); | 663 | Dprintk("#startup loops: %d.\n", num_starts); |
664 | 664 | ||
665 | maxlvt = get_maxlvt(); | 665 | maxlvt = get_maxlvt(); |
666 | 666 | ||
667 | for (j = 1; j <= num_starts; j++) { | 667 | for (j = 1; j <= num_starts; j++) { |
668 | Dprintk("Sending STARTUP #%d.\n",j); | 668 | Dprintk("Sending STARTUP #%d.\n",j); |
669 | apic_read_around(APIC_SPIV); | 669 | apic_read_around(APIC_SPIV); |
670 | apic_write(APIC_ESR, 0); | 670 | apic_write(APIC_ESR, 0); |
671 | apic_read(APIC_ESR); | 671 | apic_read(APIC_ESR); |
672 | Dprintk("After apic_write.\n"); | 672 | Dprintk("After apic_write.\n"); |
673 | 673 | ||
674 | /* | 674 | /* |
675 | * STARTUP IPI | 675 | * STARTUP IPI |
676 | */ | 676 | */ |
677 | 677 | ||
678 | /* Target chip */ | 678 | /* Target chip */ |
679 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | 679 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); |
680 | 680 | ||
681 | /* Boot on the stack */ | 681 | /* Boot on the stack */ |
682 | /* Kick the second */ | 682 | /* Kick the second */ |
683 | apic_write(APIC_ICR, APIC_DM_STARTUP | (start_rip >> 12)); | 683 | apic_write(APIC_ICR, APIC_DM_STARTUP | (start_rip >> 12)); |
684 | 684 | ||
685 | /* | 685 | /* |
686 | * Give the other CPU some time to accept the IPI. | 686 | * Give the other CPU some time to accept the IPI. |
687 | */ | 687 | */ |
688 | udelay(300); | 688 | udelay(300); |
689 | 689 | ||
690 | Dprintk("Startup point 1.\n"); | 690 | Dprintk("Startup point 1.\n"); |
691 | 691 | ||
692 | Dprintk("Waiting for send to finish...\n"); | 692 | Dprintk("Waiting for send to finish...\n"); |
693 | timeout = 0; | 693 | timeout = 0; |
694 | do { | 694 | do { |
695 | Dprintk("+"); | 695 | Dprintk("+"); |
696 | udelay(100); | 696 | udelay(100); |
697 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | 697 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; |
698 | } while (send_status && (timeout++ < 1000)); | 698 | } while (send_status && (timeout++ < 1000)); |
699 | 699 | ||
700 | /* | 700 | /* |
701 | * Give the other CPU some time to accept the IPI. | 701 | * Give the other CPU some time to accept the IPI. |
702 | */ | 702 | */ |
703 | udelay(200); | 703 | udelay(200); |
704 | /* | 704 | /* |
705 | * Due to the Pentium erratum 3AP. | 705 | * Due to the Pentium erratum 3AP. |
706 | */ | 706 | */ |
707 | if (maxlvt > 3) { | 707 | if (maxlvt > 3) { |
708 | apic_read_around(APIC_SPIV); | 708 | apic_read_around(APIC_SPIV); |
709 | apic_write(APIC_ESR, 0); | 709 | apic_write(APIC_ESR, 0); |
710 | } | 710 | } |
711 | accept_status = (apic_read(APIC_ESR) & 0xEF); | 711 | accept_status = (apic_read(APIC_ESR) & 0xEF); |
712 | if (send_status || accept_status) | 712 | if (send_status || accept_status) |
713 | break; | 713 | break; |
714 | } | 714 | } |
715 | Dprintk("After Startup.\n"); | 715 | Dprintk("After Startup.\n"); |
716 | 716 | ||
717 | if (send_status) | 717 | if (send_status) |
718 | printk(KERN_ERR "APIC never delivered???\n"); | 718 | printk(KERN_ERR "APIC never delivered???\n"); |
719 | if (accept_status) | 719 | if (accept_status) |
720 | printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); | 720 | printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); |
721 | 721 | ||
722 | return (send_status | accept_status); | 722 | return (send_status | accept_status); |
723 | } | 723 | } |
724 | 724 | ||
725 | struct create_idle { | 725 | struct create_idle { |
726 | struct task_struct *idle; | 726 | struct task_struct *idle; |
727 | struct completion done; | 727 | struct completion done; |
728 | int cpu; | 728 | int cpu; |
729 | }; | 729 | }; |
730 | 730 | ||
731 | void do_fork_idle(void *_c_idle) | 731 | void do_fork_idle(void *_c_idle) |
732 | { | 732 | { |
733 | struct create_idle *c_idle = _c_idle; | 733 | struct create_idle *c_idle = _c_idle; |
734 | 734 | ||
735 | c_idle->idle = fork_idle(c_idle->cpu); | 735 | c_idle->idle = fork_idle(c_idle->cpu); |
736 | complete(&c_idle->done); | 736 | complete(&c_idle->done); |
737 | } | 737 | } |
738 | 738 | ||
739 | /* | 739 | /* |
740 | * Boot one CPU. | 740 | * Boot one CPU. |
741 | */ | 741 | */ |
742 | static int __cpuinit do_boot_cpu(int cpu, int apicid) | 742 | static int __cpuinit do_boot_cpu(int cpu, int apicid) |
743 | { | 743 | { |
744 | unsigned long boot_error; | 744 | unsigned long boot_error; |
745 | int timeout; | 745 | int timeout; |
746 | unsigned long start_rip; | 746 | unsigned long start_rip; |
747 | struct create_idle c_idle = { | 747 | struct create_idle c_idle = { |
748 | .cpu = cpu, | 748 | .cpu = cpu, |
749 | .done = COMPLETION_INITIALIZER(c_idle.done), | 749 | .done = COMPLETION_INITIALIZER(c_idle.done), |
750 | }; | 750 | }; |
751 | DECLARE_WORK(work, do_fork_idle, &c_idle); | 751 | DECLARE_WORK(work, do_fork_idle, &c_idle); |
752 | 752 | ||
753 | /* allocate memory for gdts of secondary cpus. Hotplug is considered */ | 753 | /* allocate memory for gdts of secondary cpus. Hotplug is considered */ |
754 | if (!cpu_gdt_descr[cpu].address && | 754 | if (!cpu_gdt_descr[cpu].address && |
755 | !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { | 755 | !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { |
756 | printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu); | 756 | printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu); |
757 | return -1; | 757 | return -1; |
758 | } | 758 | } |
759 | 759 | ||
760 | c_idle.idle = get_idle_for_cpu(cpu); | 760 | c_idle.idle = get_idle_for_cpu(cpu); |
761 | 761 | ||
762 | if (c_idle.idle) { | 762 | if (c_idle.idle) { |
763 | c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *) | 763 | c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *) |
764 | (THREAD_SIZE + (unsigned long) c_idle.idle->thread_info)) - 1); | 764 | (THREAD_SIZE + (unsigned long) c_idle.idle->thread_info)) - 1); |
765 | init_idle(c_idle.idle, cpu); | 765 | init_idle(c_idle.idle, cpu); |
766 | goto do_rest; | 766 | goto do_rest; |
767 | } | 767 | } |
768 | 768 | ||
769 | /* | 769 | /* |
770 | * During cold boot process, keventd thread is not spun up yet. | 770 | * During cold boot process, keventd thread is not spun up yet. |
771 | * When we do cpu hot-add, we create idle threads on the fly, we should | 771 | * When we do cpu hot-add, we create idle threads on the fly, we should |
772 | * not acquire any attributes from the calling context. Hence the clean | 772 | * not acquire any attributes from the calling context. Hence the clean |
773 | * way to create kernel_threads() is to do that from keventd(). | 773 | * way to create kernel_threads() is to do that from keventd(). |
774 | * We do the current_is_keventd() due to the fact that ACPI notifier | 774 | * We do the current_is_keventd() due to the fact that ACPI notifier |
775 | * was also queuing to keventd() and when the caller is already running | 775 | * was also queuing to keventd() and when the caller is already running |
776 | * in context of keventd(), we would end up with locking up the keventd | 776 | * in context of keventd(), we would end up with locking up the keventd |
777 | * thread. | 777 | * thread. |
778 | */ | 778 | */ |
779 | if (!keventd_up() || current_is_keventd()) | 779 | if (!keventd_up() || current_is_keventd()) |
780 | work.func(work.data); | 780 | work.func(work.data); |
781 | else { | 781 | else { |
782 | schedule_work(&work); | 782 | schedule_work(&work); |
783 | wait_for_completion(&c_idle.done); | 783 | wait_for_completion(&c_idle.done); |
784 | } | 784 | } |
785 | 785 | ||
786 | if (IS_ERR(c_idle.idle)) { | 786 | if (IS_ERR(c_idle.idle)) { |
787 | printk("failed fork for CPU %d\n", cpu); | 787 | printk("failed fork for CPU %d\n", cpu); |
788 | return PTR_ERR(c_idle.idle); | 788 | return PTR_ERR(c_idle.idle); |
789 | } | 789 | } |
790 | 790 | ||
791 | set_idle_for_cpu(cpu, c_idle.idle); | 791 | set_idle_for_cpu(cpu, c_idle.idle); |
792 | 792 | ||
793 | do_rest: | 793 | do_rest: |
794 | 794 | ||
795 | cpu_pda[cpu].pcurrent = c_idle.idle; | 795 | cpu_pda(cpu)->pcurrent = c_idle.idle; |
796 | 796 | ||
797 | start_rip = setup_trampoline(); | 797 | start_rip = setup_trampoline(); |
798 | 798 | ||
799 | init_rsp = c_idle.idle->thread.rsp; | 799 | init_rsp = c_idle.idle->thread.rsp; |
800 | per_cpu(init_tss,cpu).rsp0 = init_rsp; | 800 | per_cpu(init_tss,cpu).rsp0 = init_rsp; |
801 | initial_code = start_secondary; | 801 | initial_code = start_secondary; |
802 | clear_ti_thread_flag(c_idle.idle->thread_info, TIF_FORK); | 802 | clear_ti_thread_flag(c_idle.idle->thread_info, TIF_FORK); |
803 | 803 | ||
804 | printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu, | 804 | printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu, |
805 | cpus_weight(cpu_present_map), | 805 | cpus_weight(cpu_present_map), |
806 | apicid); | 806 | apicid); |
807 | 807 | ||
808 | /* | 808 | /* |
809 | * This grunge runs the startup process for | 809 | * This grunge runs the startup process for |
810 | * the targeted processor. | 810 | * the targeted processor. |
811 | */ | 811 | */ |
812 | 812 | ||
813 | atomic_set(&init_deasserted, 0); | 813 | atomic_set(&init_deasserted, 0); |
814 | 814 | ||
815 | Dprintk("Setting warm reset code and vector.\n"); | 815 | Dprintk("Setting warm reset code and vector.\n"); |
816 | 816 | ||
817 | CMOS_WRITE(0xa, 0xf); | 817 | CMOS_WRITE(0xa, 0xf); |
818 | local_flush_tlb(); | 818 | local_flush_tlb(); |
819 | Dprintk("1.\n"); | 819 | Dprintk("1.\n"); |
820 | *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4; | 820 | *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4; |
821 | Dprintk("2.\n"); | 821 | Dprintk("2.\n"); |
822 | *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf; | 822 | *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf; |
823 | Dprintk("3.\n"); | 823 | Dprintk("3.\n"); |
824 | 824 | ||
825 | /* | 825 | /* |
826 | * Be paranoid about clearing APIC errors. | 826 | * Be paranoid about clearing APIC errors. |
827 | */ | 827 | */ |
828 | if (APIC_INTEGRATED(apic_version[apicid])) { | 828 | if (APIC_INTEGRATED(apic_version[apicid])) { |
829 | apic_read_around(APIC_SPIV); | 829 | apic_read_around(APIC_SPIV); |
830 | apic_write(APIC_ESR, 0); | 830 | apic_write(APIC_ESR, 0); |
831 | apic_read(APIC_ESR); | 831 | apic_read(APIC_ESR); |
832 | } | 832 | } |
833 | 833 | ||
834 | /* | 834 | /* |
835 | * Status is now clean | 835 | * Status is now clean |
836 | */ | 836 | */ |
837 | boot_error = 0; | 837 | boot_error = 0; |
838 | 838 | ||
839 | /* | 839 | /* |
840 | * Starting actual IPI sequence... | 840 | * Starting actual IPI sequence... |
841 | */ | 841 | */ |
842 | boot_error = wakeup_secondary_via_INIT(apicid, start_rip); | 842 | boot_error = wakeup_secondary_via_INIT(apicid, start_rip); |
843 | 843 | ||
844 | if (!boot_error) { | 844 | if (!boot_error) { |
845 | /* | 845 | /* |
846 | * allow APs to start initializing. | 846 | * allow APs to start initializing. |
847 | */ | 847 | */ |
848 | Dprintk("Before Callout %d.\n", cpu); | 848 | Dprintk("Before Callout %d.\n", cpu); |
849 | cpu_set(cpu, cpu_callout_map); | 849 | cpu_set(cpu, cpu_callout_map); |
850 | Dprintk("After Callout %d.\n", cpu); | 850 | Dprintk("After Callout %d.\n", cpu); |
851 | 851 | ||
852 | /* | 852 | /* |
853 | * Wait 5s total for a response | 853 | * Wait 5s total for a response |
854 | */ | 854 | */ |
855 | for (timeout = 0; timeout < 50000; timeout++) { | 855 | for (timeout = 0; timeout < 50000; timeout++) { |
856 | if (cpu_isset(cpu, cpu_callin_map)) | 856 | if (cpu_isset(cpu, cpu_callin_map)) |
857 | break; /* It has booted */ | 857 | break; /* It has booted */ |
858 | udelay(100); | 858 | udelay(100); |
859 | } | 859 | } |
860 | 860 | ||
861 | if (cpu_isset(cpu, cpu_callin_map)) { | 861 | if (cpu_isset(cpu, cpu_callin_map)) { |
862 | /* number CPUs logically, starting from 1 (BSP is 0) */ | 862 | /* number CPUs logically, starting from 1 (BSP is 0) */ |
863 | Dprintk("CPU has booted.\n"); | 863 | Dprintk("CPU has booted.\n"); |
864 | } else { | 864 | } else { |
865 | boot_error = 1; | 865 | boot_error = 1; |
866 | if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE)) | 866 | if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE)) |
867 | == 0xA5) | 867 | == 0xA5) |
868 | /* trampoline started but...? */ | 868 | /* trampoline started but...? */ |
869 | printk("Stuck ??\n"); | 869 | printk("Stuck ??\n"); |
870 | else | 870 | else |
871 | /* trampoline code not run */ | 871 | /* trampoline code not run */ |
872 | printk("Not responding.\n"); | 872 | printk("Not responding.\n"); |
873 | #ifdef APIC_DEBUG | 873 | #ifdef APIC_DEBUG |
874 | inquire_remote_apic(apicid); | 874 | inquire_remote_apic(apicid); |
875 | #endif | 875 | #endif |
876 | } | 876 | } |
877 | } | 877 | } |
878 | if (boot_error) { | 878 | if (boot_error) { |
879 | cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ | 879 | cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ |
880 | clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ | 880 | clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ |
881 | cpu_clear(cpu, cpu_present_map); | 881 | cpu_clear(cpu, cpu_present_map); |
882 | cpu_clear(cpu, cpu_possible_map); | 882 | cpu_clear(cpu, cpu_possible_map); |
883 | x86_cpu_to_apicid[cpu] = BAD_APICID; | 883 | x86_cpu_to_apicid[cpu] = BAD_APICID; |
884 | x86_cpu_to_log_apicid[cpu] = BAD_APICID; | 884 | x86_cpu_to_log_apicid[cpu] = BAD_APICID; |
885 | return -EIO; | 885 | return -EIO; |
886 | } | 886 | } |
887 | 887 | ||
888 | return 0; | 888 | return 0; |
889 | } | 889 | } |
890 | 890 | ||
891 | cycles_t cacheflush_time; | 891 | cycles_t cacheflush_time; |
892 | unsigned long cache_decay_ticks; | 892 | unsigned long cache_decay_ticks; |
893 | 893 | ||
894 | /* | 894 | /* |
895 | * Cleanup possible dangling ends... | 895 | * Cleanup possible dangling ends... |
896 | */ | 896 | */ |
897 | static __cpuinit void smp_cleanup_boot(void) | 897 | static __cpuinit void smp_cleanup_boot(void) |
898 | { | 898 | { |
899 | /* | 899 | /* |
900 | * Paranoid: Set warm reset code and vector here back | 900 | * Paranoid: Set warm reset code and vector here back |
901 | * to default values. | 901 | * to default values. |
902 | */ | 902 | */ |
903 | CMOS_WRITE(0, 0xf); | 903 | CMOS_WRITE(0, 0xf); |
904 | 904 | ||
905 | /* | 905 | /* |
906 | * Reset trampoline flag | 906 | * Reset trampoline flag |
907 | */ | 907 | */ |
908 | *((volatile int *) phys_to_virt(0x467)) = 0; | 908 | *((volatile int *) phys_to_virt(0x467)) = 0; |
909 | } | 909 | } |
910 | 910 | ||
911 | /* | 911 | /* |
912 | * Fall back to non SMP mode after errors. | 912 | * Fall back to non SMP mode after errors. |
913 | * | 913 | * |
914 | * RED-PEN audit/test this more. I bet there is more state messed up here. | 914 | * RED-PEN audit/test this more. I bet there is more state messed up here. |
915 | */ | 915 | */ |
916 | static __init void disable_smp(void) | 916 | static __init void disable_smp(void) |
917 | { | 917 | { |
918 | cpu_present_map = cpumask_of_cpu(0); | 918 | cpu_present_map = cpumask_of_cpu(0); |
919 | cpu_possible_map = cpumask_of_cpu(0); | 919 | cpu_possible_map = cpumask_of_cpu(0); |
920 | if (smp_found_config) | 920 | if (smp_found_config) |
921 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); | 921 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); |
922 | else | 922 | else |
923 | phys_cpu_present_map = physid_mask_of_physid(0); | 923 | phys_cpu_present_map = physid_mask_of_physid(0); |
924 | cpu_set(0, cpu_sibling_map[0]); | 924 | cpu_set(0, cpu_sibling_map[0]); |
925 | cpu_set(0, cpu_core_map[0]); | 925 | cpu_set(0, cpu_core_map[0]); |
926 | } | 926 | } |
927 | 927 | ||
928 | #ifdef CONFIG_HOTPLUG_CPU | 928 | #ifdef CONFIG_HOTPLUG_CPU |
929 | 929 | ||
930 | int additional_cpus __initdata = -1; | 930 | int additional_cpus __initdata = -1; |
931 | 931 | ||
932 | /* | 932 | /* |
933 | * cpu_possible_map should be static, it cannot change as cpu's | 933 | * cpu_possible_map should be static, it cannot change as cpu's |
934 | * are onlined, or offlined. The reason is per-cpu data-structures | 934 | * are onlined, or offlined. The reason is per-cpu data-structures |
935 | * are allocated by some modules at init time, and dont expect to | 935 | * are allocated by some modules at init time, and dont expect to |
936 | * do this dynamically on cpu arrival/departure. | 936 | * do this dynamically on cpu arrival/departure. |
937 | * cpu_present_map on the other hand can change dynamically. | 937 | * cpu_present_map on the other hand can change dynamically. |
938 | * In case when cpu_hotplug is not compiled, then we resort to current | 938 | * In case when cpu_hotplug is not compiled, then we resort to current |
939 | * behaviour, which is cpu_possible == cpu_present. | 939 | * behaviour, which is cpu_possible == cpu_present. |
940 | * - Ashok Raj | 940 | * - Ashok Raj |
941 | * | 941 | * |
942 | * Three ways to find out the number of additional hotplug CPUs: | 942 | * Three ways to find out the number of additional hotplug CPUs: |
943 | * - If the BIOS specified disabled CPUs in ACPI/mptables use that. | 943 | * - If the BIOS specified disabled CPUs in ACPI/mptables use that. |
944 | * - The user can overwrite it with additional_cpus=NUM | 944 | * - The user can overwrite it with additional_cpus=NUM |
945 | * - Otherwise don't reserve additional CPUs. | 945 | * - Otherwise don't reserve additional CPUs. |
946 | * We do this because additional CPUs waste a lot of memory. | 946 | * We do this because additional CPUs waste a lot of memory. |
947 | * -AK | 947 | * -AK |
948 | */ | 948 | */ |
949 | __init void prefill_possible_map(void) | 949 | __init void prefill_possible_map(void) |
950 | { | 950 | { |
951 | int i; | 951 | int i; |
952 | int possible; | 952 | int possible; |
953 | 953 | ||
954 | if (additional_cpus == -1) { | 954 | if (additional_cpus == -1) { |
955 | if (disabled_cpus > 0) | 955 | if (disabled_cpus > 0) |
956 | additional_cpus = disabled_cpus; | 956 | additional_cpus = disabled_cpus; |
957 | else | 957 | else |
958 | additional_cpus = 0; | 958 | additional_cpus = 0; |
959 | } | 959 | } |
960 | possible = num_processors + additional_cpus; | 960 | possible = num_processors + additional_cpus; |
961 | if (possible > NR_CPUS) | 961 | if (possible > NR_CPUS) |
962 | possible = NR_CPUS; | 962 | possible = NR_CPUS; |
963 | 963 | ||
964 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", | 964 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", |
965 | possible, | 965 | possible, |
966 | max_t(int, possible - num_processors, 0)); | 966 | max_t(int, possible - num_processors, 0)); |
967 | 967 | ||
968 | for (i = 0; i < possible; i++) | 968 | for (i = 0; i < possible; i++) |
969 | cpu_set(i, cpu_possible_map); | 969 | cpu_set(i, cpu_possible_map); |
970 | } | 970 | } |
971 | #endif | 971 | #endif |
972 | 972 | ||
973 | /* | 973 | /* |
974 | * Various sanity checks. | 974 | * Various sanity checks. |
975 | */ | 975 | */ |
976 | static int __init smp_sanity_check(unsigned max_cpus) | 976 | static int __init smp_sanity_check(unsigned max_cpus) |
977 | { | 977 | { |
978 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { | 978 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { |
979 | printk("weird, boot CPU (#%d) not listed by the BIOS.\n", | 979 | printk("weird, boot CPU (#%d) not listed by the BIOS.\n", |
980 | hard_smp_processor_id()); | 980 | hard_smp_processor_id()); |
981 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); | 981 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); |
982 | } | 982 | } |
983 | 983 | ||
984 | /* | 984 | /* |
985 | * If we couldn't find an SMP configuration at boot time, | 985 | * If we couldn't find an SMP configuration at boot time, |
986 | * get out of here now! | 986 | * get out of here now! |
987 | */ | 987 | */ |
988 | if (!smp_found_config) { | 988 | if (!smp_found_config) { |
989 | printk(KERN_NOTICE "SMP motherboard not detected.\n"); | 989 | printk(KERN_NOTICE "SMP motherboard not detected.\n"); |
990 | disable_smp(); | 990 | disable_smp(); |
991 | if (APIC_init_uniprocessor()) | 991 | if (APIC_init_uniprocessor()) |
992 | printk(KERN_NOTICE "Local APIC not detected." | 992 | printk(KERN_NOTICE "Local APIC not detected." |
993 | " Using dummy APIC emulation.\n"); | 993 | " Using dummy APIC emulation.\n"); |
994 | return -1; | 994 | return -1; |
995 | } | 995 | } |
996 | 996 | ||
997 | /* | 997 | /* |
998 | * Should not be necessary because the MP table should list the boot | 998 | * Should not be necessary because the MP table should list the boot |
999 | * CPU too, but we do it for the sake of robustness anyway. | 999 | * CPU too, but we do it for the sake of robustness anyway. |
1000 | */ | 1000 | */ |
1001 | if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) { | 1001 | if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) { |
1002 | printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n", | 1002 | printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n", |
1003 | boot_cpu_id); | 1003 | boot_cpu_id); |
1004 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); | 1004 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); |
1005 | } | 1005 | } |
1006 | 1006 | ||
1007 | /* | 1007 | /* |
1008 | * If we couldn't find a local APIC, then get out of here now! | 1008 | * If we couldn't find a local APIC, then get out of here now! |
1009 | */ | 1009 | */ |
1010 | if (APIC_INTEGRATED(apic_version[boot_cpu_id]) && !cpu_has_apic) { | 1010 | if (APIC_INTEGRATED(apic_version[boot_cpu_id]) && !cpu_has_apic) { |
1011 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", | 1011 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", |
1012 | boot_cpu_id); | 1012 | boot_cpu_id); |
1013 | printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); | 1013 | printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); |
1014 | nr_ioapics = 0; | 1014 | nr_ioapics = 0; |
1015 | return -1; | 1015 | return -1; |
1016 | } | 1016 | } |
1017 | 1017 | ||
1018 | /* | 1018 | /* |
1019 | * If SMP should be disabled, then really disable it! | 1019 | * If SMP should be disabled, then really disable it! |
1020 | */ | 1020 | */ |
1021 | if (!max_cpus) { | 1021 | if (!max_cpus) { |
1022 | printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); | 1022 | printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); |
1023 | nr_ioapics = 0; | 1023 | nr_ioapics = 0; |
1024 | return -1; | 1024 | return -1; |
1025 | } | 1025 | } |
1026 | 1026 | ||
1027 | return 0; | 1027 | return 0; |
1028 | } | 1028 | } |
1029 | 1029 | ||
1030 | /* | 1030 | /* |
1031 | * Prepare for SMP bootup. The MP table or ACPI has been read | 1031 | * Prepare for SMP bootup. The MP table or ACPI has been read |
1032 | * earlier. Just do some sanity checking here and enable APIC mode. | 1032 | * earlier. Just do some sanity checking here and enable APIC mode. |
1033 | */ | 1033 | */ |
1034 | void __init smp_prepare_cpus(unsigned int max_cpus) | 1034 | void __init smp_prepare_cpus(unsigned int max_cpus) |
1035 | { | 1035 | { |
1036 | nmi_watchdog_default(); | 1036 | nmi_watchdog_default(); |
1037 | current_cpu_data = boot_cpu_data; | 1037 | current_cpu_data = boot_cpu_data; |
1038 | current_thread_info()->cpu = 0; /* needed? */ | 1038 | current_thread_info()->cpu = 0; /* needed? */ |
1039 | set_cpu_sibling_map(0); | 1039 | set_cpu_sibling_map(0); |
1040 | 1040 | ||
1041 | if (smp_sanity_check(max_cpus) < 0) { | 1041 | if (smp_sanity_check(max_cpus) < 0) { |
1042 | printk(KERN_INFO "SMP disabled\n"); | 1042 | printk(KERN_INFO "SMP disabled\n"); |
1043 | disable_smp(); | 1043 | disable_smp(); |
1044 | return; | 1044 | return; |
1045 | } | 1045 | } |
1046 | 1046 | ||
1047 | 1047 | ||
1048 | /* | 1048 | /* |
1049 | * Switch from PIC to APIC mode. | 1049 | * Switch from PIC to APIC mode. |
1050 | */ | 1050 | */ |
1051 | connect_bsp_APIC(); | 1051 | connect_bsp_APIC(); |
1052 | setup_local_APIC(); | 1052 | setup_local_APIC(); |
1053 | 1053 | ||
1054 | if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) { | 1054 | if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) { |
1055 | panic("Boot APIC ID in local APIC unexpected (%d vs %d)", | 1055 | panic("Boot APIC ID in local APIC unexpected (%d vs %d)", |
1056 | GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); | 1056 | GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); |
1057 | /* Or can we switch back to PIC here? */ | 1057 | /* Or can we switch back to PIC here? */ |
1058 | } | 1058 | } |
1059 | 1059 | ||
1060 | /* | 1060 | /* |
1061 | * Now start the IO-APICs | 1061 | * Now start the IO-APICs |
1062 | */ | 1062 | */ |
1063 | if (!skip_ioapic_setup && nr_ioapics) | 1063 | if (!skip_ioapic_setup && nr_ioapics) |
1064 | setup_IO_APIC(); | 1064 | setup_IO_APIC(); |
1065 | else | 1065 | else |
1066 | nr_ioapics = 0; | 1066 | nr_ioapics = 0; |
1067 | 1067 | ||
1068 | /* | 1068 | /* |
1069 | * Set up local APIC timer on boot CPU. | 1069 | * Set up local APIC timer on boot CPU. |
1070 | */ | 1070 | */ |
1071 | 1071 | ||
1072 | setup_boot_APIC_clock(); | 1072 | setup_boot_APIC_clock(); |
1073 | } | 1073 | } |
1074 | 1074 | ||
1075 | /* | 1075 | /* |
1076 | * Early setup to make printk work. | 1076 | * Early setup to make printk work. |
1077 | */ | 1077 | */ |
1078 | void __init smp_prepare_boot_cpu(void) | 1078 | void __init smp_prepare_boot_cpu(void) |
1079 | { | 1079 | { |
1080 | int me = smp_processor_id(); | 1080 | int me = smp_processor_id(); |
1081 | cpu_set(me, cpu_online_map); | 1081 | cpu_set(me, cpu_online_map); |
1082 | cpu_set(me, cpu_callout_map); | 1082 | cpu_set(me, cpu_callout_map); |
1083 | per_cpu(cpu_state, me) = CPU_ONLINE; | 1083 | per_cpu(cpu_state, me) = CPU_ONLINE; |
1084 | } | 1084 | } |
1085 | 1085 | ||
1086 | /* | 1086 | /* |
1087 | * Entry point to boot a CPU. | 1087 | * Entry point to boot a CPU. |
1088 | */ | 1088 | */ |
1089 | int __cpuinit __cpu_up(unsigned int cpu) | 1089 | int __cpuinit __cpu_up(unsigned int cpu) |
1090 | { | 1090 | { |
1091 | int err; | 1091 | int err; |
1092 | int apicid = cpu_present_to_apicid(cpu); | 1092 | int apicid = cpu_present_to_apicid(cpu); |
1093 | 1093 | ||
1094 | WARN_ON(irqs_disabled()); | 1094 | WARN_ON(irqs_disabled()); |
1095 | 1095 | ||
1096 | Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); | 1096 | Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); |
1097 | 1097 | ||
1098 | if (apicid == BAD_APICID || apicid == boot_cpu_id || | 1098 | if (apicid == BAD_APICID || apicid == boot_cpu_id || |
1099 | !physid_isset(apicid, phys_cpu_present_map)) { | 1099 | !physid_isset(apicid, phys_cpu_present_map)) { |
1100 | printk("__cpu_up: bad cpu %d\n", cpu); | 1100 | printk("__cpu_up: bad cpu %d\n", cpu); |
1101 | return -EINVAL; | 1101 | return -EINVAL; |
1102 | } | 1102 | } |
1103 | 1103 | ||
1104 | /* | 1104 | /* |
1105 | * Already booted CPU? | 1105 | * Already booted CPU? |
1106 | */ | 1106 | */ |
1107 | if (cpu_isset(cpu, cpu_callin_map)) { | 1107 | if (cpu_isset(cpu, cpu_callin_map)) { |
1108 | Dprintk("do_boot_cpu %d Already started\n", cpu); | 1108 | Dprintk("do_boot_cpu %d Already started\n", cpu); |
1109 | return -ENOSYS; | 1109 | return -ENOSYS; |
1110 | } | 1110 | } |
1111 | 1111 | ||
1112 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 1112 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; |
1113 | /* Boot it! */ | 1113 | /* Boot it! */ |
1114 | err = do_boot_cpu(cpu, apicid); | 1114 | err = do_boot_cpu(cpu, apicid); |
1115 | if (err < 0) { | 1115 | if (err < 0) { |
1116 | Dprintk("do_boot_cpu failed %d\n", err); | 1116 | Dprintk("do_boot_cpu failed %d\n", err); |
1117 | return err; | 1117 | return err; |
1118 | } | 1118 | } |
1119 | 1119 | ||
1120 | /* Unleash the CPU! */ | 1120 | /* Unleash the CPU! */ |
1121 | Dprintk("waiting for cpu %d\n", cpu); | 1121 | Dprintk("waiting for cpu %d\n", cpu); |
1122 | 1122 | ||
1123 | while (!cpu_isset(cpu, cpu_online_map)) | 1123 | while (!cpu_isset(cpu, cpu_online_map)) |
1124 | cpu_relax(); | 1124 | cpu_relax(); |
1125 | err = 0; | 1125 | err = 0; |
1126 | 1126 | ||
1127 | return err; | 1127 | return err; |
1128 | } | 1128 | } |
1129 | 1129 | ||
1130 | /* | 1130 | /* |
1131 | * Finish the SMP boot. | 1131 | * Finish the SMP boot. |
1132 | */ | 1132 | */ |
1133 | void __init smp_cpus_done(unsigned int max_cpus) | 1133 | void __init smp_cpus_done(unsigned int max_cpus) |
1134 | { | 1134 | { |
1135 | smp_cleanup_boot(); | 1135 | smp_cleanup_boot(); |
1136 | 1136 | ||
1137 | #ifdef CONFIG_X86_IO_APIC | 1137 | #ifdef CONFIG_X86_IO_APIC |
1138 | setup_ioapic_dest(); | 1138 | setup_ioapic_dest(); |
1139 | #endif | 1139 | #endif |
1140 | 1140 | ||
1141 | time_init_gtod(); | 1141 | time_init_gtod(); |
1142 | 1142 | ||
1143 | check_nmi_watchdog(); | 1143 | check_nmi_watchdog(); |
1144 | } | 1144 | } |
1145 | 1145 | ||
1146 | #ifdef CONFIG_HOTPLUG_CPU | 1146 | #ifdef CONFIG_HOTPLUG_CPU |
1147 | 1147 | ||
1148 | static void remove_siblinginfo(int cpu) | 1148 | static void remove_siblinginfo(int cpu) |
1149 | { | 1149 | { |
1150 | int sibling; | 1150 | int sibling; |
1151 | struct cpuinfo_x86 *c = cpu_data; | 1151 | struct cpuinfo_x86 *c = cpu_data; |
1152 | 1152 | ||
1153 | for_each_cpu_mask(sibling, cpu_core_map[cpu]) { | 1153 | for_each_cpu_mask(sibling, cpu_core_map[cpu]) { |
1154 | cpu_clear(cpu, cpu_core_map[sibling]); | 1154 | cpu_clear(cpu, cpu_core_map[sibling]); |
1155 | /* | 1155 | /* |
1156 | * last thread sibling in this cpu core going down | 1156 | * last thread sibling in this cpu core going down |
1157 | */ | 1157 | */ |
1158 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) | 1158 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) |
1159 | c[sibling].booted_cores--; | 1159 | c[sibling].booted_cores--; |
1160 | } | 1160 | } |
1161 | 1161 | ||
1162 | for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) | 1162 | for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) |
1163 | cpu_clear(cpu, cpu_sibling_map[sibling]); | 1163 | cpu_clear(cpu, cpu_sibling_map[sibling]); |
1164 | cpus_clear(cpu_sibling_map[cpu]); | 1164 | cpus_clear(cpu_sibling_map[cpu]); |
1165 | cpus_clear(cpu_core_map[cpu]); | 1165 | cpus_clear(cpu_core_map[cpu]); |
1166 | phys_proc_id[cpu] = BAD_APICID; | 1166 | phys_proc_id[cpu] = BAD_APICID; |
1167 | cpu_core_id[cpu] = BAD_APICID; | 1167 | cpu_core_id[cpu] = BAD_APICID; |
1168 | cpu_clear(cpu, cpu_sibling_setup_map); | 1168 | cpu_clear(cpu, cpu_sibling_setup_map); |
1169 | } | 1169 | } |
1170 | 1170 | ||
1171 | void remove_cpu_from_maps(void) | 1171 | void remove_cpu_from_maps(void) |
1172 | { | 1172 | { |
1173 | int cpu = smp_processor_id(); | 1173 | int cpu = smp_processor_id(); |
1174 | 1174 | ||
1175 | cpu_clear(cpu, cpu_callout_map); | 1175 | cpu_clear(cpu, cpu_callout_map); |
1176 | cpu_clear(cpu, cpu_callin_map); | 1176 | cpu_clear(cpu, cpu_callin_map); |
1177 | clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ | 1177 | clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ |
1178 | } | 1178 | } |
1179 | 1179 | ||
1180 | int __cpu_disable(void) | 1180 | int __cpu_disable(void) |
1181 | { | 1181 | { |
1182 | int cpu = smp_processor_id(); | 1182 | int cpu = smp_processor_id(); |
1183 | 1183 | ||
1184 | /* | 1184 | /* |
1185 | * Perhaps use cpufreq to drop frequency, but that could go | 1185 | * Perhaps use cpufreq to drop frequency, but that could go |
1186 | * into generic code. | 1186 | * into generic code. |
1187 | * | 1187 | * |
1188 | * We won't take down the boot processor on i386 due to some | 1188 | * We won't take down the boot processor on i386 due to some |
1189 | * interrupts only being able to be serviced by the BSP. | 1189 | * interrupts only being able to be serviced by the BSP. |
1190 | * Especially so if we're not using an IOAPIC -zwane | 1190 | * Especially so if we're not using an IOAPIC -zwane |
1191 | */ | 1191 | */ |
1192 | if (cpu == 0) | 1192 | if (cpu == 0) |
1193 | return -EBUSY; | 1193 | return -EBUSY; |
1194 | 1194 | ||
1195 | clear_local_APIC(); | 1195 | clear_local_APIC(); |
1196 | 1196 | ||
1197 | /* | 1197 | /* |
1198 | * HACK: | 1198 | * HACK: |
1199 | * Allow any queued timer interrupts to get serviced | 1199 | * Allow any queued timer interrupts to get serviced |
1200 | * This is only a temporary solution until we cleanup | 1200 | * This is only a temporary solution until we cleanup |
1201 | * fixup_irqs as we do for IA64. | 1201 | * fixup_irqs as we do for IA64. |
1202 | */ | 1202 | */ |
1203 | local_irq_enable(); | 1203 | local_irq_enable(); |
1204 | mdelay(1); | 1204 | mdelay(1); |
1205 | 1205 | ||
1206 | local_irq_disable(); | 1206 | local_irq_disable(); |
1207 | remove_siblinginfo(cpu); | 1207 | remove_siblinginfo(cpu); |
1208 | 1208 | ||
1209 | /* It's now safe to remove this processor from the online map */ | 1209 | /* It's now safe to remove this processor from the online map */ |
1210 | cpu_clear(cpu, cpu_online_map); | 1210 | cpu_clear(cpu, cpu_online_map); |
1211 | remove_cpu_from_maps(); | 1211 | remove_cpu_from_maps(); |
1212 | fixup_irqs(cpu_online_map); | 1212 | fixup_irqs(cpu_online_map); |
1213 | return 0; | 1213 | return 0; |
1214 | } | 1214 | } |
1215 | 1215 | ||
1216 | void __cpu_die(unsigned int cpu) | 1216 | void __cpu_die(unsigned int cpu) |
1217 | { | 1217 | { |
1218 | /* We don't do anything here: idle task is faking death itself. */ | 1218 | /* We don't do anything here: idle task is faking death itself. */ |
1219 | unsigned int i; | 1219 | unsigned int i; |
1220 | 1220 | ||
1221 | for (i = 0; i < 10; i++) { | 1221 | for (i = 0; i < 10; i++) { |
1222 | /* They ack this in play_dead by setting CPU_DEAD */ | 1222 | /* They ack this in play_dead by setting CPU_DEAD */ |
1223 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { | 1223 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { |
1224 | printk ("CPU %d is now offline\n", cpu); | 1224 | printk ("CPU %d is now offline\n", cpu); |
1225 | return; | 1225 | return; |
1226 | } | 1226 | } |
1227 | msleep(100); | 1227 | msleep(100); |
1228 | } | 1228 | } |
1229 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); | 1229 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); |
1230 | } | 1230 | } |
1231 | 1231 | ||
1232 | static __init int setup_additional_cpus(char *s) | 1232 | static __init int setup_additional_cpus(char *s) |
1233 | { | 1233 | { |
1234 | return get_option(&s, &additional_cpus); | 1234 | return get_option(&s, &additional_cpus); |
1235 | } | 1235 | } |
1236 | __setup("additional_cpus=", setup_additional_cpus); | 1236 | __setup("additional_cpus=", setup_additional_cpus); |
1237 | 1237 | ||
1238 | #else /* ... !CONFIG_HOTPLUG_CPU */ | 1238 | #else /* ... !CONFIG_HOTPLUG_CPU */ |
1239 | 1239 | ||
1240 | int __cpu_disable(void) | 1240 | int __cpu_disable(void) |
1241 | { | 1241 | { |
1242 | return -ENOSYS; | 1242 | return -ENOSYS; |
1243 | } | 1243 | } |
1244 | 1244 | ||
1245 | void __cpu_die(unsigned int cpu) | 1245 | void __cpu_die(unsigned int cpu) |
1246 | { | 1246 | { |
1247 | /* We said "no" in __cpu_disable */ | 1247 | /* We said "no" in __cpu_disable */ |
1248 | BUG(); | 1248 | BUG(); |
1249 | } | 1249 | } |
1250 | #endif /* CONFIG_HOTPLUG_CPU */ | 1250 | #endif /* CONFIG_HOTPLUG_CPU */ |
1251 | 1251 |
arch/x86_64/kernel/traps.c
1 | /* | 1 | /* |
2 | * linux/arch/x86-64/traps.c | 2 | * linux/arch/x86-64/traps.c |
3 | * | 3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | 5 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs |
6 | * | 6 | * |
7 | * Pentium III FXSR, SSE support | 7 | * Pentium III FXSR, SSE support |
8 | * Gareth Hughes <gareth@valinux.com>, May 2000 | 8 | * Gareth Hughes <gareth@valinux.com>, May 2000 |
9 | * | 9 | * |
10 | * $Id: traps.c,v 1.36 2002/03/24 11:09:10 ak Exp $ | 10 | * $Id: traps.c,v 1.36 2002/03/24 11:09:10 ak Exp $ |
11 | */ | 11 | */ |
12 | 12 | ||
13 | /* | 13 | /* |
14 | * 'Traps.c' handles hardware traps and faults after we have saved some | 14 | * 'Traps.c' handles hardware traps and faults after we have saved some |
15 | * state in 'entry.S'. | 15 | * state in 'entry.S'. |
16 | */ | 16 | */ |
17 | #include <linux/config.h> | 17 | #include <linux/config.h> |
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/string.h> | 20 | #include <linux/string.h> |
21 | #include <linux/errno.h> | 21 | #include <linux/errno.h> |
22 | #include <linux/ptrace.h> | 22 | #include <linux/ptrace.h> |
23 | #include <linux/timer.h> | 23 | #include <linux/timer.h> |
24 | #include <linux/mm.h> | 24 | #include <linux/mm.h> |
25 | #include <linux/init.h> | 25 | #include <linux/init.h> |
26 | #include <linux/delay.h> | 26 | #include <linux/delay.h> |
27 | #include <linux/spinlock.h> | 27 | #include <linux/spinlock.h> |
28 | #include <linux/interrupt.h> | 28 | #include <linux/interrupt.h> |
29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <linux/moduleparam.h> | 30 | #include <linux/moduleparam.h> |
31 | #include <linux/nmi.h> | 31 | #include <linux/nmi.h> |
32 | #include <linux/kprobes.h> | 32 | #include <linux/kprobes.h> |
33 | 33 | ||
34 | #include <asm/system.h> | 34 | #include <asm/system.h> |
35 | #include <asm/uaccess.h> | 35 | #include <asm/uaccess.h> |
36 | #include <asm/io.h> | 36 | #include <asm/io.h> |
37 | #include <asm/atomic.h> | 37 | #include <asm/atomic.h> |
38 | #include <asm/debugreg.h> | 38 | #include <asm/debugreg.h> |
39 | #include <asm/desc.h> | 39 | #include <asm/desc.h> |
40 | #include <asm/i387.h> | 40 | #include <asm/i387.h> |
41 | #include <asm/kdebug.h> | 41 | #include <asm/kdebug.h> |
42 | #include <asm/processor.h> | 42 | #include <asm/processor.h> |
43 | 43 | ||
44 | #include <asm/smp.h> | 44 | #include <asm/smp.h> |
45 | #include <asm/pgalloc.h> | 45 | #include <asm/pgalloc.h> |
46 | #include <asm/pda.h> | 46 | #include <asm/pda.h> |
47 | #include <asm/proto.h> | 47 | #include <asm/proto.h> |
48 | #include <asm/nmi.h> | 48 | #include <asm/nmi.h> |
49 | 49 | ||
50 | extern struct gate_struct idt_table[256]; | 50 | extern struct gate_struct idt_table[256]; |
51 | 51 | ||
52 | asmlinkage void divide_error(void); | 52 | asmlinkage void divide_error(void); |
53 | asmlinkage void debug(void); | 53 | asmlinkage void debug(void); |
54 | asmlinkage void nmi(void); | 54 | asmlinkage void nmi(void); |
55 | asmlinkage void int3(void); | 55 | asmlinkage void int3(void); |
56 | asmlinkage void overflow(void); | 56 | asmlinkage void overflow(void); |
57 | asmlinkage void bounds(void); | 57 | asmlinkage void bounds(void); |
58 | asmlinkage void invalid_op(void); | 58 | asmlinkage void invalid_op(void); |
59 | asmlinkage void device_not_available(void); | 59 | asmlinkage void device_not_available(void); |
60 | asmlinkage void double_fault(void); | 60 | asmlinkage void double_fault(void); |
61 | asmlinkage void coprocessor_segment_overrun(void); | 61 | asmlinkage void coprocessor_segment_overrun(void); |
62 | asmlinkage void invalid_TSS(void); | 62 | asmlinkage void invalid_TSS(void); |
63 | asmlinkage void segment_not_present(void); | 63 | asmlinkage void segment_not_present(void); |
64 | asmlinkage void stack_segment(void); | 64 | asmlinkage void stack_segment(void); |
65 | asmlinkage void general_protection(void); | 65 | asmlinkage void general_protection(void); |
66 | asmlinkage void page_fault(void); | 66 | asmlinkage void page_fault(void); |
67 | asmlinkage void coprocessor_error(void); | 67 | asmlinkage void coprocessor_error(void); |
68 | asmlinkage void simd_coprocessor_error(void); | 68 | asmlinkage void simd_coprocessor_error(void); |
69 | asmlinkage void reserved(void); | 69 | asmlinkage void reserved(void); |
70 | asmlinkage void alignment_check(void); | 70 | asmlinkage void alignment_check(void); |
71 | asmlinkage void machine_check(void); | 71 | asmlinkage void machine_check(void); |
72 | asmlinkage void spurious_interrupt_bug(void); | 72 | asmlinkage void spurious_interrupt_bug(void); |
73 | asmlinkage void call_debug(void); | ||
74 | 73 | ||
75 | struct notifier_block *die_chain; | 74 | struct notifier_block *die_chain; |
76 | static DEFINE_SPINLOCK(die_notifier_lock); | 75 | static DEFINE_SPINLOCK(die_notifier_lock); |
77 | 76 | ||
78 | int register_die_notifier(struct notifier_block *nb) | 77 | int register_die_notifier(struct notifier_block *nb) |
79 | { | 78 | { |
80 | int err = 0; | 79 | int err = 0; |
81 | unsigned long flags; | 80 | unsigned long flags; |
82 | spin_lock_irqsave(&die_notifier_lock, flags); | 81 | spin_lock_irqsave(&die_notifier_lock, flags); |
83 | err = notifier_chain_register(&die_chain, nb); | 82 | err = notifier_chain_register(&die_chain, nb); |
84 | spin_unlock_irqrestore(&die_notifier_lock, flags); | 83 | spin_unlock_irqrestore(&die_notifier_lock, flags); |
85 | return err; | 84 | return err; |
86 | } | 85 | } |
87 | 86 | ||
88 | static inline void conditional_sti(struct pt_regs *regs) | 87 | static inline void conditional_sti(struct pt_regs *regs) |
89 | { | 88 | { |
90 | if (regs->eflags & X86_EFLAGS_IF) | 89 | if (regs->eflags & X86_EFLAGS_IF) |
91 | local_irq_enable(); | 90 | local_irq_enable(); |
92 | } | 91 | } |
93 | 92 | ||
94 | static int kstack_depth_to_print = 10; | 93 | static int kstack_depth_to_print = 10; |
95 | 94 | ||
96 | #ifdef CONFIG_KALLSYMS | 95 | #ifdef CONFIG_KALLSYMS |
97 | #include <linux/kallsyms.h> | 96 | #include <linux/kallsyms.h> |
98 | int printk_address(unsigned long address) | 97 | int printk_address(unsigned long address) |
99 | { | 98 | { |
100 | unsigned long offset = 0, symsize; | 99 | unsigned long offset = 0, symsize; |
101 | const char *symname; | 100 | const char *symname; |
102 | char *modname; | 101 | char *modname; |
103 | char *delim = ":"; | 102 | char *delim = ":"; |
104 | char namebuf[128]; | 103 | char namebuf[128]; |
105 | 104 | ||
106 | symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); | 105 | symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); |
107 | if (!symname) | 106 | if (!symname) |
108 | return printk("[<%016lx>]", address); | 107 | return printk("[<%016lx>]", address); |
109 | if (!modname) | 108 | if (!modname) |
110 | modname = delim = ""; | 109 | modname = delim = ""; |
111 | return printk("<%016lx>{%s%s%s%s%+ld}", | 110 | return printk("<%016lx>{%s%s%s%s%+ld}", |
112 | address,delim,modname,delim,symname,offset); | 111 | address,delim,modname,delim,symname,offset); |
113 | } | 112 | } |
114 | #else | 113 | #else |
115 | int printk_address(unsigned long address) | 114 | int printk_address(unsigned long address) |
116 | { | 115 | { |
117 | return printk("[<%016lx>]", address); | 116 | return printk("[<%016lx>]", address); |
118 | } | 117 | } |
119 | #endif | 118 | #endif |
120 | 119 | ||
121 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 120 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
122 | unsigned *usedp, const char **idp) | 121 | unsigned *usedp, const char **idp) |
123 | { | 122 | { |
124 | static char ids[][8] = { | 123 | static char ids[][8] = { |
125 | [DEBUG_STACK - 1] = "#DB", | 124 | [DEBUG_STACK - 1] = "#DB", |
126 | [NMI_STACK - 1] = "NMI", | 125 | [NMI_STACK - 1] = "NMI", |
127 | [DOUBLEFAULT_STACK - 1] = "#DF", | 126 | [DOUBLEFAULT_STACK - 1] = "#DF", |
128 | [STACKFAULT_STACK - 1] = "#SS", | 127 | [STACKFAULT_STACK - 1] = "#SS", |
129 | [MCE_STACK - 1] = "#MC", | 128 | [MCE_STACK - 1] = "#MC", |
130 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 129 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
131 | [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" | 130 | [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" |
132 | #endif | 131 | #endif |
133 | }; | 132 | }; |
134 | unsigned k; | 133 | unsigned k; |
135 | 134 | ||
136 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { | 135 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { |
137 | unsigned long end; | 136 | unsigned long end; |
138 | 137 | ||
139 | switch (k + 1) { | 138 | switch (k + 1) { |
140 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 139 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
141 | case DEBUG_STACK: | 140 | case DEBUG_STACK: |
142 | end = cpu_pda[cpu].debugstack + DEBUG_STKSZ; | 141 | end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ; |
143 | break; | 142 | break; |
144 | #endif | 143 | #endif |
145 | default: | 144 | default: |
146 | end = per_cpu(init_tss, cpu).ist[k]; | 145 | end = per_cpu(init_tss, cpu).ist[k]; |
147 | break; | 146 | break; |
148 | } | 147 | } |
149 | if (stack >= end) | 148 | if (stack >= end) |
150 | continue; | 149 | continue; |
151 | if (stack >= end - EXCEPTION_STKSZ) { | 150 | if (stack >= end - EXCEPTION_STKSZ) { |
152 | if (*usedp & (1U << k)) | 151 | if (*usedp & (1U << k)) |
153 | break; | 152 | break; |
154 | *usedp |= 1U << k; | 153 | *usedp |= 1U << k; |
155 | *idp = ids[k]; | 154 | *idp = ids[k]; |
156 | return (unsigned long *)end; | 155 | return (unsigned long *)end; |
157 | } | 156 | } |
158 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 157 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
159 | if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { | 158 | if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { |
160 | unsigned j = N_EXCEPTION_STACKS - 1; | 159 | unsigned j = N_EXCEPTION_STACKS - 1; |
161 | 160 | ||
162 | do { | 161 | do { |
163 | ++j; | 162 | ++j; |
164 | end -= EXCEPTION_STKSZ; | 163 | end -= EXCEPTION_STKSZ; |
165 | ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); | 164 | ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); |
166 | } while (stack < end - EXCEPTION_STKSZ); | 165 | } while (stack < end - EXCEPTION_STKSZ); |
167 | if (*usedp & (1U << j)) | 166 | if (*usedp & (1U << j)) |
168 | break; | 167 | break; |
169 | *usedp |= 1U << j; | 168 | *usedp |= 1U << j; |
170 | *idp = ids[j]; | 169 | *idp = ids[j]; |
171 | return (unsigned long *)end; | 170 | return (unsigned long *)end; |
172 | } | 171 | } |
173 | #endif | 172 | #endif |
174 | } | 173 | } |
175 | return NULL; | 174 | return NULL; |
176 | } | 175 | } |
177 | 176 | ||
178 | /* | 177 | /* |
179 | * x86-64 can have upto three kernel stacks: | 178 | * x86-64 can have upto three kernel stacks: |
180 | * process stack | 179 | * process stack |
181 | * interrupt stack | 180 | * interrupt stack |
182 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 181 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
183 | */ | 182 | */ |
184 | 183 | ||
185 | void show_trace(unsigned long *stack) | 184 | void show_trace(unsigned long *stack) |
186 | { | 185 | { |
187 | unsigned long addr; | 186 | unsigned long addr; |
188 | const unsigned cpu = safe_smp_processor_id(); | 187 | const unsigned cpu = safe_smp_processor_id(); |
189 | unsigned long *irqstack_end = (unsigned long *)cpu_pda[cpu].irqstackptr; | 188 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; |
190 | int i; | 189 | int i; |
191 | unsigned used = 0; | 190 | unsigned used = 0; |
192 | 191 | ||
193 | printk("\nCall Trace:"); | 192 | printk("\nCall Trace:"); |
194 | 193 | ||
195 | #define HANDLE_STACK(cond) \ | 194 | #define HANDLE_STACK(cond) \ |
196 | do while (cond) { \ | 195 | do while (cond) { \ |
197 | addr = *stack++; \ | 196 | addr = *stack++; \ |
198 | if (kernel_text_address(addr)) { \ | 197 | if (kernel_text_address(addr)) { \ |
199 | /* \ | 198 | /* \ |
200 | * If the address is either in the text segment of the \ | 199 | * If the address is either in the text segment of the \ |
201 | * kernel, or in the region which contains vmalloc'ed \ | 200 | * kernel, or in the region which contains vmalloc'ed \ |
202 | * memory, it *may* be the address of a calling \ | 201 | * memory, it *may* be the address of a calling \ |
203 | * routine; if so, print it so that someone tracing \ | 202 | * routine; if so, print it so that someone tracing \ |
204 | * down the cause of the crash will be able to figure \ | 203 | * down the cause of the crash will be able to figure \ |
205 | * out the call path that was taken. \ | 204 | * out the call path that was taken. \ |
206 | */ \ | 205 | */ \ |
207 | i += printk_address(addr); \ | 206 | i += printk_address(addr); \ |
208 | if (i > 50) { \ | 207 | if (i > 50) { \ |
209 | printk("\n "); \ | 208 | printk("\n "); \ |
210 | i = 0; \ | 209 | i = 0; \ |
211 | } \ | 210 | } \ |
212 | else \ | 211 | else \ |
213 | i += printk(" "); \ | 212 | i += printk(" "); \ |
214 | } \ | 213 | } \ |
215 | } while (0) | 214 | } while (0) |
216 | 215 | ||
217 | for(i = 0; ; ) { | 216 | for(i = 0; ; ) { |
218 | const char *id; | 217 | const char *id; |
219 | unsigned long *estack_end; | 218 | unsigned long *estack_end; |
220 | estack_end = in_exception_stack(cpu, (unsigned long)stack, | 219 | estack_end = in_exception_stack(cpu, (unsigned long)stack, |
221 | &used, &id); | 220 | &used, &id); |
222 | 221 | ||
223 | if (estack_end) { | 222 | if (estack_end) { |
224 | i += printk(" <%s> ", id); | 223 | i += printk(" <%s> ", id); |
225 | HANDLE_STACK (stack < estack_end); | 224 | HANDLE_STACK (stack < estack_end); |
226 | i += printk(" <EOE> "); | 225 | i += printk(" <EOE> "); |
227 | stack = (unsigned long *) estack_end[-2]; | 226 | stack = (unsigned long *) estack_end[-2]; |
228 | continue; | 227 | continue; |
229 | } | 228 | } |
230 | if (irqstack_end) { | 229 | if (irqstack_end) { |
231 | unsigned long *irqstack; | 230 | unsigned long *irqstack; |
232 | irqstack = irqstack_end - | 231 | irqstack = irqstack_end - |
233 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); | 232 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); |
234 | 233 | ||
235 | if (stack >= irqstack && stack < irqstack_end) { | 234 | if (stack >= irqstack && stack < irqstack_end) { |
236 | i += printk(" <IRQ> "); | 235 | i += printk(" <IRQ> "); |
237 | HANDLE_STACK (stack < irqstack_end); | 236 | HANDLE_STACK (stack < irqstack_end); |
238 | stack = (unsigned long *) (irqstack_end[-1]); | 237 | stack = (unsigned long *) (irqstack_end[-1]); |
239 | irqstack_end = NULL; | 238 | irqstack_end = NULL; |
240 | i += printk(" <EOI> "); | 239 | i += printk(" <EOI> "); |
241 | continue; | 240 | continue; |
242 | } | 241 | } |
243 | } | 242 | } |
244 | break; | 243 | break; |
245 | } | 244 | } |
246 | 245 | ||
247 | HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); | 246 | HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); |
248 | #undef HANDLE_STACK | 247 | #undef HANDLE_STACK |
249 | printk("\n"); | 248 | printk("\n"); |
250 | } | 249 | } |
251 | 250 | ||
252 | void show_stack(struct task_struct *tsk, unsigned long * rsp) | 251 | void show_stack(struct task_struct *tsk, unsigned long * rsp) |
253 | { | 252 | { |
254 | unsigned long *stack; | 253 | unsigned long *stack; |
255 | int i; | 254 | int i; |
256 | const int cpu = safe_smp_processor_id(); | 255 | const int cpu = safe_smp_processor_id(); |
257 | unsigned long *irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr); | 256 | unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); |
258 | unsigned long *irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE); | 257 | unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); |
259 | 258 | ||
260 | // debugging aid: "show_stack(NULL, NULL);" prints the | 259 | // debugging aid: "show_stack(NULL, NULL);" prints the |
261 | // back trace for this cpu. | 260 | // back trace for this cpu. |
262 | 261 | ||
263 | if (rsp == NULL) { | 262 | if (rsp == NULL) { |
264 | if (tsk) | 263 | if (tsk) |
265 | rsp = (unsigned long *)tsk->thread.rsp; | 264 | rsp = (unsigned long *)tsk->thread.rsp; |
266 | else | 265 | else |
267 | rsp = (unsigned long *)&rsp; | 266 | rsp = (unsigned long *)&rsp; |
268 | } | 267 | } |
269 | 268 | ||
270 | stack = rsp; | 269 | stack = rsp; |
271 | for(i=0; i < kstack_depth_to_print; i++) { | 270 | for(i=0; i < kstack_depth_to_print; i++) { |
272 | if (stack >= irqstack && stack <= irqstack_end) { | 271 | if (stack >= irqstack && stack <= irqstack_end) { |
273 | if (stack == irqstack_end) { | 272 | if (stack == irqstack_end) { |
274 | stack = (unsigned long *) (irqstack_end[-1]); | 273 | stack = (unsigned long *) (irqstack_end[-1]); |
275 | printk(" <EOI> "); | 274 | printk(" <EOI> "); |
276 | } | 275 | } |
277 | } else { | 276 | } else { |
278 | if (((long) stack & (THREAD_SIZE-1)) == 0) | 277 | if (((long) stack & (THREAD_SIZE-1)) == 0) |
279 | break; | 278 | break; |
280 | } | 279 | } |
281 | if (i && ((i % 4) == 0)) | 280 | if (i && ((i % 4) == 0)) |
282 | printk("\n "); | 281 | printk("\n "); |
283 | printk("%016lx ", *stack++); | 282 | printk("%016lx ", *stack++); |
284 | touch_nmi_watchdog(); | 283 | touch_nmi_watchdog(); |
285 | } | 284 | } |
286 | show_trace((unsigned long *)rsp); | 285 | show_trace((unsigned long *)rsp); |
287 | } | 286 | } |
288 | 287 | ||
289 | /* | 288 | /* |
290 | * The architecture-independent dump_stack generator | 289 | * The architecture-independent dump_stack generator |
291 | */ | 290 | */ |
292 | void dump_stack(void) | 291 | void dump_stack(void) |
293 | { | 292 | { |
294 | unsigned long dummy; | 293 | unsigned long dummy; |
295 | show_trace(&dummy); | 294 | show_trace(&dummy); |
296 | } | 295 | } |
297 | 296 | ||
298 | EXPORT_SYMBOL(dump_stack); | 297 | EXPORT_SYMBOL(dump_stack); |
299 | 298 | ||
300 | void show_registers(struct pt_regs *regs) | 299 | void show_registers(struct pt_regs *regs) |
301 | { | 300 | { |
302 | int i; | 301 | int i; |
303 | int in_kernel = !user_mode(regs); | 302 | int in_kernel = !user_mode(regs); |
304 | unsigned long rsp; | 303 | unsigned long rsp; |
305 | const int cpu = safe_smp_processor_id(); | 304 | const int cpu = safe_smp_processor_id(); |
306 | struct task_struct *cur = cpu_pda[cpu].pcurrent; | 305 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; |
307 | 306 | ||
308 | rsp = regs->rsp; | 307 | rsp = regs->rsp; |
309 | 308 | ||
310 | printk("CPU %d ", cpu); | 309 | printk("CPU %d ", cpu); |
311 | __show_regs(regs); | 310 | __show_regs(regs); |
312 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | 311 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", |
313 | cur->comm, cur->pid, cur->thread_info, cur); | 312 | cur->comm, cur->pid, cur->thread_info, cur); |
314 | 313 | ||
315 | /* | 314 | /* |
316 | * When in-kernel, we also print out the stack and code at the | 315 | * When in-kernel, we also print out the stack and code at the |
317 | * time of the fault.. | 316 | * time of the fault.. |
318 | */ | 317 | */ |
319 | if (in_kernel) { | 318 | if (in_kernel) { |
320 | 319 | ||
321 | printk("Stack: "); | 320 | printk("Stack: "); |
322 | show_stack(NULL, (unsigned long*)rsp); | 321 | show_stack(NULL, (unsigned long*)rsp); |
323 | 322 | ||
324 | printk("\nCode: "); | 323 | printk("\nCode: "); |
325 | if(regs->rip < PAGE_OFFSET) | 324 | if(regs->rip < PAGE_OFFSET) |
326 | goto bad; | 325 | goto bad; |
327 | 326 | ||
328 | for(i=0;i<20;i++) | 327 | for(i=0;i<20;i++) |
329 | { | 328 | { |
330 | unsigned char c; | 329 | unsigned char c; |
331 | if(__get_user(c, &((unsigned char*)regs->rip)[i])) { | 330 | if(__get_user(c, &((unsigned char*)regs->rip)[i])) { |
332 | bad: | 331 | bad: |
333 | printk(" Bad RIP value."); | 332 | printk(" Bad RIP value."); |
334 | break; | 333 | break; |
335 | } | 334 | } |
336 | printk("%02x ", c); | 335 | printk("%02x ", c); |
337 | } | 336 | } |
338 | } | 337 | } |
339 | printk("\n"); | 338 | printk("\n"); |
340 | } | 339 | } |
341 | 340 | ||
342 | void handle_BUG(struct pt_regs *regs) | 341 | void handle_BUG(struct pt_regs *regs) |
343 | { | 342 | { |
344 | struct bug_frame f; | 343 | struct bug_frame f; |
345 | char tmp; | 344 | char tmp; |
346 | 345 | ||
347 | if (user_mode(regs)) | 346 | if (user_mode(regs)) |
348 | return; | 347 | return; |
349 | if (__copy_from_user(&f, (struct bug_frame *) regs->rip, | 348 | if (__copy_from_user(&f, (struct bug_frame *) regs->rip, |
350 | sizeof(struct bug_frame))) | 349 | sizeof(struct bug_frame))) |
351 | return; | 350 | return; |
352 | if (f.filename >= 0 || | 351 | if (f.filename >= 0 || |
353 | f.ud2[0] != 0x0f || f.ud2[1] != 0x0b) | 352 | f.ud2[0] != 0x0f || f.ud2[1] != 0x0b) |
354 | return; | 353 | return; |
355 | if (__get_user(tmp, (char *)(long)f.filename)) | 354 | if (__get_user(tmp, (char *)(long)f.filename)) |
356 | f.filename = (int)(long)"unmapped filename"; | 355 | f.filename = (int)(long)"unmapped filename"; |
357 | printk("----------- [cut here ] --------- [please bite here ] ---------\n"); | 356 | printk("----------- [cut here ] --------- [please bite here ] ---------\n"); |
358 | printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", (char *)(long)f.filename, f.line); | 357 | printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", (char *)(long)f.filename, f.line); |
359 | } | 358 | } |
360 | 359 | ||
361 | #ifdef CONFIG_BUG | 360 | #ifdef CONFIG_BUG |
362 | void out_of_line_bug(void) | 361 | void out_of_line_bug(void) |
363 | { | 362 | { |
364 | BUG(); | 363 | BUG(); |
365 | } | 364 | } |
366 | #endif | 365 | #endif |
367 | 366 | ||
368 | static DEFINE_SPINLOCK(die_lock); | 367 | static DEFINE_SPINLOCK(die_lock); |
369 | static int die_owner = -1; | 368 | static int die_owner = -1; |
370 | 369 | ||
371 | unsigned long oops_begin(void) | 370 | unsigned long oops_begin(void) |
372 | { | 371 | { |
373 | int cpu = safe_smp_processor_id(); | 372 | int cpu = safe_smp_processor_id(); |
374 | unsigned long flags; | 373 | unsigned long flags; |
375 | 374 | ||
376 | /* racy, but better than risking deadlock. */ | 375 | /* racy, but better than risking deadlock. */ |
377 | local_irq_save(flags); | 376 | local_irq_save(flags); |
378 | if (!spin_trylock(&die_lock)) { | 377 | if (!spin_trylock(&die_lock)) { |
379 | if (cpu == die_owner) | 378 | if (cpu == die_owner) |
380 | /* nested oops. should stop eventually */; | 379 | /* nested oops. should stop eventually */; |
381 | else | 380 | else |
382 | spin_lock(&die_lock); | 381 | spin_lock(&die_lock); |
383 | } | 382 | } |
384 | die_owner = cpu; | 383 | die_owner = cpu; |
385 | console_verbose(); | 384 | console_verbose(); |
386 | bust_spinlocks(1); | 385 | bust_spinlocks(1); |
387 | return flags; | 386 | return flags; |
388 | } | 387 | } |
389 | 388 | ||
390 | void oops_end(unsigned long flags) | 389 | void oops_end(unsigned long flags) |
391 | { | 390 | { |
392 | die_owner = -1; | 391 | die_owner = -1; |
393 | bust_spinlocks(0); | 392 | bust_spinlocks(0); |
394 | spin_unlock_irqrestore(&die_lock, flags); | 393 | spin_unlock_irqrestore(&die_lock, flags); |
395 | if (panic_on_oops) | 394 | if (panic_on_oops) |
396 | panic("Oops"); | 395 | panic("Oops"); |
397 | } | 396 | } |
398 | 397 | ||
399 | void __die(const char * str, struct pt_regs * regs, long err) | 398 | void __die(const char * str, struct pt_regs * regs, long err) |
400 | { | 399 | { |
401 | static int die_counter; | 400 | static int die_counter; |
402 | printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter); | 401 | printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter); |
403 | #ifdef CONFIG_PREEMPT | 402 | #ifdef CONFIG_PREEMPT |
404 | printk("PREEMPT "); | 403 | printk("PREEMPT "); |
405 | #endif | 404 | #endif |
406 | #ifdef CONFIG_SMP | 405 | #ifdef CONFIG_SMP |
407 | printk("SMP "); | 406 | printk("SMP "); |
408 | #endif | 407 | #endif |
409 | #ifdef CONFIG_DEBUG_PAGEALLOC | 408 | #ifdef CONFIG_DEBUG_PAGEALLOC |
410 | printk("DEBUG_PAGEALLOC"); | 409 | printk("DEBUG_PAGEALLOC"); |
411 | #endif | 410 | #endif |
412 | printk("\n"); | 411 | printk("\n"); |
413 | notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV); | 412 | notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV); |
414 | show_registers(regs); | 413 | show_registers(regs); |
415 | /* Executive summary in case the oops scrolled away */ | 414 | /* Executive summary in case the oops scrolled away */ |
416 | printk(KERN_ALERT "RIP "); | 415 | printk(KERN_ALERT "RIP "); |
417 | printk_address(regs->rip); | 416 | printk_address(regs->rip); |
418 | printk(" RSP <%016lx>\n", regs->rsp); | 417 | printk(" RSP <%016lx>\n", regs->rsp); |
419 | } | 418 | } |
420 | 419 | ||
421 | void die(const char * str, struct pt_regs * regs, long err) | 420 | void die(const char * str, struct pt_regs * regs, long err) |
422 | { | 421 | { |
423 | unsigned long flags = oops_begin(); | 422 | unsigned long flags = oops_begin(); |
424 | 423 | ||
425 | handle_BUG(regs); | 424 | handle_BUG(regs); |
426 | __die(str, regs, err); | 425 | __die(str, regs, err); |
427 | oops_end(flags); | 426 | oops_end(flags); |
428 | do_exit(SIGSEGV); | 427 | do_exit(SIGSEGV); |
429 | } | 428 | } |
430 | 429 | ||
431 | void die_nmi(char *str, struct pt_regs *regs) | 430 | void die_nmi(char *str, struct pt_regs *regs) |
432 | { | 431 | { |
433 | unsigned long flags = oops_begin(); | 432 | unsigned long flags = oops_begin(); |
434 | 433 | ||
435 | /* | 434 | /* |
436 | * We are in trouble anyway, lets at least try | 435 | * We are in trouble anyway, lets at least try |
437 | * to get a message out. | 436 | * to get a message out. |
438 | */ | 437 | */ |
439 | printk(str, safe_smp_processor_id()); | 438 | printk(str, safe_smp_processor_id()); |
440 | show_registers(regs); | 439 | show_registers(regs); |
441 | if (panic_on_timeout || panic_on_oops) | 440 | if (panic_on_timeout || panic_on_oops) |
442 | panic("nmi watchdog"); | 441 | panic("nmi watchdog"); |
443 | printk("console shuts up ...\n"); | 442 | printk("console shuts up ...\n"); |
444 | oops_end(flags); | 443 | oops_end(flags); |
445 | do_exit(SIGSEGV); | 444 | do_exit(SIGSEGV); |
446 | } | 445 | } |
447 | 446 | ||
448 | static void __kprobes do_trap(int trapnr, int signr, char *str, | 447 | static void __kprobes do_trap(int trapnr, int signr, char *str, |
449 | struct pt_regs * regs, long error_code, | 448 | struct pt_regs * regs, long error_code, |
450 | siginfo_t *info) | 449 | siginfo_t *info) |
451 | { | 450 | { |
452 | struct task_struct *tsk = current; | 451 | struct task_struct *tsk = current; |
453 | 452 | ||
454 | conditional_sti(regs); | 453 | conditional_sti(regs); |
455 | 454 | ||
456 | tsk->thread.error_code = error_code; | 455 | tsk->thread.error_code = error_code; |
457 | tsk->thread.trap_no = trapnr; | 456 | tsk->thread.trap_no = trapnr; |
458 | 457 | ||
459 | if (user_mode(regs)) { | 458 | if (user_mode(regs)) { |
460 | if (exception_trace && unhandled_signal(tsk, signr)) | 459 | if (exception_trace && unhandled_signal(tsk, signr)) |
461 | printk(KERN_INFO | 460 | printk(KERN_INFO |
462 | "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", | 461 | "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", |
463 | tsk->comm, tsk->pid, str, | 462 | tsk->comm, tsk->pid, str, |
464 | regs->rip,regs->rsp,error_code); | 463 | regs->rip,regs->rsp,error_code); |
465 | 464 | ||
466 | if (info) | 465 | if (info) |
467 | force_sig_info(signr, info, tsk); | 466 | force_sig_info(signr, info, tsk); |
468 | else | 467 | else |
469 | force_sig(signr, tsk); | 468 | force_sig(signr, tsk); |
470 | return; | 469 | return; |
471 | } | 470 | } |
472 | 471 | ||
473 | 472 | ||
474 | /* kernel trap */ | 473 | /* kernel trap */ |
475 | { | 474 | { |
476 | const struct exception_table_entry *fixup; | 475 | const struct exception_table_entry *fixup; |
477 | fixup = search_exception_tables(regs->rip); | 476 | fixup = search_exception_tables(regs->rip); |
478 | if (fixup) { | 477 | if (fixup) { |
479 | regs->rip = fixup->fixup; | 478 | regs->rip = fixup->fixup; |
480 | } else | 479 | } else |
481 | die(str, regs, error_code); | 480 | die(str, regs, error_code); |
482 | return; | 481 | return; |
483 | } | 482 | } |
484 | } | 483 | } |
485 | 484 | ||
486 | #define DO_ERROR(trapnr, signr, str, name) \ | 485 | #define DO_ERROR(trapnr, signr, str, name) \ |
487 | asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | 486 | asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ |
488 | { \ | 487 | { \ |
489 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 488 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
490 | == NOTIFY_STOP) \ | 489 | == NOTIFY_STOP) \ |
491 | return; \ | 490 | return; \ |
492 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ | 491 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ |
493 | } | 492 | } |
494 | 493 | ||
495 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | 494 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ |
496 | asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | 495 | asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ |
497 | { \ | 496 | { \ |
498 | siginfo_t info; \ | 497 | siginfo_t info; \ |
499 | info.si_signo = signr; \ | 498 | info.si_signo = signr; \ |
500 | info.si_errno = 0; \ | 499 | info.si_errno = 0; \ |
501 | info.si_code = sicode; \ | 500 | info.si_code = sicode; \ |
502 | info.si_addr = (void __user *)siaddr; \ | 501 | info.si_addr = (void __user *)siaddr; \ |
503 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 502 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
504 | == NOTIFY_STOP) \ | 503 | == NOTIFY_STOP) \ |
505 | return; \ | 504 | return; \ |
506 | do_trap(trapnr, signr, str, regs, error_code, &info); \ | 505 | do_trap(trapnr, signr, str, regs, error_code, &info); \ |
507 | } | 506 | } |
508 | 507 | ||
509 | DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip) | 508 | DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip) |
510 | DO_ERROR( 4, SIGSEGV, "overflow", overflow) | 509 | DO_ERROR( 4, SIGSEGV, "overflow", overflow) |
511 | DO_ERROR( 5, SIGSEGV, "bounds", bounds) | 510 | DO_ERROR( 5, SIGSEGV, "bounds", bounds) |
512 | DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip) | 511 | DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip) |
513 | DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) | 512 | DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) |
514 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | 513 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) |
515 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | 514 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) |
516 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | 515 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) |
517 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) | 516 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) |
518 | DO_ERROR(18, SIGSEGV, "reserved", reserved) | 517 | DO_ERROR(18, SIGSEGV, "reserved", reserved) |
519 | DO_ERROR(12, SIGBUS, "stack segment", stack_segment) | 518 | DO_ERROR(12, SIGBUS, "stack segment", stack_segment) |
520 | 519 | ||
521 | asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) | 520 | asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) |
522 | { | 521 | { |
523 | static const char str[] = "double fault"; | 522 | static const char str[] = "double fault"; |
524 | struct task_struct *tsk = current; | 523 | struct task_struct *tsk = current; |
525 | 524 | ||
526 | /* Return not checked because double check cannot be ignored */ | 525 | /* Return not checked because double check cannot be ignored */ |
527 | notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); | 526 | notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); |
528 | 527 | ||
529 | tsk->thread.error_code = error_code; | 528 | tsk->thread.error_code = error_code; |
530 | tsk->thread.trap_no = 8; | 529 | tsk->thread.trap_no = 8; |
531 | 530 | ||
532 | /* This is always a kernel trap and never fixable (and thus must | 531 | /* This is always a kernel trap and never fixable (and thus must |
533 | never return). */ | 532 | never return). */ |
534 | for (;;) | 533 | for (;;) |
535 | die(str, regs, error_code); | 534 | die(str, regs, error_code); |
536 | } | 535 | } |
537 | 536 | ||
538 | asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, | 537 | asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, |
539 | long error_code) | 538 | long error_code) |
540 | { | 539 | { |
541 | struct task_struct *tsk = current; | 540 | struct task_struct *tsk = current; |
542 | 541 | ||
543 | conditional_sti(regs); | 542 | conditional_sti(regs); |
544 | 543 | ||
545 | tsk->thread.error_code = error_code; | 544 | tsk->thread.error_code = error_code; |
546 | tsk->thread.trap_no = 13; | 545 | tsk->thread.trap_no = 13; |
547 | 546 | ||
548 | if (user_mode(regs)) { | 547 | if (user_mode(regs)) { |
549 | if (exception_trace && unhandled_signal(tsk, SIGSEGV)) | 548 | if (exception_trace && unhandled_signal(tsk, SIGSEGV)) |
550 | printk(KERN_INFO | 549 | printk(KERN_INFO |
551 | "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", | 550 | "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", |
552 | tsk->comm, tsk->pid, | 551 | tsk->comm, tsk->pid, |
553 | regs->rip,regs->rsp,error_code); | 552 | regs->rip,regs->rsp,error_code); |
554 | 553 | ||
555 | force_sig(SIGSEGV, tsk); | 554 | force_sig(SIGSEGV, tsk); |
556 | return; | 555 | return; |
557 | } | 556 | } |
558 | 557 | ||
559 | /* kernel gp */ | 558 | /* kernel gp */ |
560 | { | 559 | { |
561 | const struct exception_table_entry *fixup; | 560 | const struct exception_table_entry *fixup; |
562 | fixup = search_exception_tables(regs->rip); | 561 | fixup = search_exception_tables(regs->rip); |
563 | if (fixup) { | 562 | if (fixup) { |
564 | regs->rip = fixup->fixup; | 563 | regs->rip = fixup->fixup; |
565 | return; | 564 | return; |
566 | } | 565 | } |
567 | if (notify_die(DIE_GPF, "general protection fault", regs, | 566 | if (notify_die(DIE_GPF, "general protection fault", regs, |
568 | error_code, 13, SIGSEGV) == NOTIFY_STOP) | 567 | error_code, 13, SIGSEGV) == NOTIFY_STOP) |
569 | return; | 568 | return; |
570 | die("general protection fault", regs, error_code); | 569 | die("general protection fault", regs, error_code); |
571 | } | 570 | } |
572 | } | 571 | } |
573 | 572 | ||
574 | static void mem_parity_error(unsigned char reason, struct pt_regs * regs) | 573 | static void mem_parity_error(unsigned char reason, struct pt_regs * regs) |
575 | { | 574 | { |
576 | printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); | 575 | printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); |
577 | printk("You probably have a hardware problem with your RAM chips\n"); | 576 | printk("You probably have a hardware problem with your RAM chips\n"); |
578 | 577 | ||
579 | /* Clear and disable the memory parity error line. */ | 578 | /* Clear and disable the memory parity error line. */ |
580 | reason = (reason & 0xf) | 4; | 579 | reason = (reason & 0xf) | 4; |
581 | outb(reason, 0x61); | 580 | outb(reason, 0x61); |
582 | } | 581 | } |
583 | 582 | ||
584 | static void io_check_error(unsigned char reason, struct pt_regs * regs) | 583 | static void io_check_error(unsigned char reason, struct pt_regs * regs) |
585 | { | 584 | { |
586 | printk("NMI: IOCK error (debug interrupt?)\n"); | 585 | printk("NMI: IOCK error (debug interrupt?)\n"); |
587 | show_registers(regs); | 586 | show_registers(regs); |
588 | 587 | ||
589 | /* Re-enable the IOCK line, wait for a few seconds */ | 588 | /* Re-enable the IOCK line, wait for a few seconds */ |
590 | reason = (reason & 0xf) | 8; | 589 | reason = (reason & 0xf) | 8; |
591 | outb(reason, 0x61); | 590 | outb(reason, 0x61); |
592 | mdelay(2000); | 591 | mdelay(2000); |
593 | reason &= ~8; | 592 | reason &= ~8; |
594 | outb(reason, 0x61); | 593 | outb(reason, 0x61); |
595 | } | 594 | } |
596 | 595 | ||
597 | static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) | 596 | static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) |
598 | { printk("Uhhuh. NMI received for unknown reason %02x.\n", reason); | 597 | { printk("Uhhuh. NMI received for unknown reason %02x.\n", reason); |
599 | printk("Dazed and confused, but trying to continue\n"); | 598 | printk("Dazed and confused, but trying to continue\n"); |
600 | printk("Do you have a strange power saving mode enabled?\n"); | 599 | printk("Do you have a strange power saving mode enabled?\n"); |
601 | } | 600 | } |
602 | 601 | ||
603 | /* Runs on IST stack. This code must keep interrupts off all the time. | 602 | /* Runs on IST stack. This code must keep interrupts off all the time. |
604 | Nested NMIs are prevented by the CPU. */ | 603 | Nested NMIs are prevented by the CPU. */ |
605 | asmlinkage void default_do_nmi(struct pt_regs *regs) | 604 | asmlinkage void default_do_nmi(struct pt_regs *regs) |
606 | { | 605 | { |
607 | unsigned char reason = 0; | 606 | unsigned char reason = 0; |
608 | int cpu; | 607 | int cpu; |
609 | 608 | ||
610 | cpu = smp_processor_id(); | 609 | cpu = smp_processor_id(); |
611 | 610 | ||
612 | /* Only the BSP gets external NMIs from the system. */ | 611 | /* Only the BSP gets external NMIs from the system. */ |
613 | if (!cpu) | 612 | if (!cpu) |
614 | reason = get_nmi_reason(); | 613 | reason = get_nmi_reason(); |
615 | 614 | ||
616 | if (!(reason & 0xc0)) { | 615 | if (!(reason & 0xc0)) { |
617 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) | 616 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) |
618 | == NOTIFY_STOP) | 617 | == NOTIFY_STOP) |
619 | return; | 618 | return; |
620 | #ifdef CONFIG_X86_LOCAL_APIC | 619 | #ifdef CONFIG_X86_LOCAL_APIC |
621 | /* | 620 | /* |
622 | * Ok, so this is none of the documented NMI sources, | 621 | * Ok, so this is none of the documented NMI sources, |
623 | * so it must be the NMI watchdog. | 622 | * so it must be the NMI watchdog. |
624 | */ | 623 | */ |
625 | if (nmi_watchdog > 0) { | 624 | if (nmi_watchdog > 0) { |
626 | nmi_watchdog_tick(regs,reason); | 625 | nmi_watchdog_tick(regs,reason); |
627 | return; | 626 | return; |
628 | } | 627 | } |
629 | #endif | 628 | #endif |
630 | unknown_nmi_error(reason, regs); | 629 | unknown_nmi_error(reason, regs); |
631 | return; | 630 | return; |
632 | } | 631 | } |
633 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | 632 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) |
634 | return; | 633 | return; |
635 | 634 | ||
636 | /* AK: following checks seem to be broken on modern chipsets. FIXME */ | 635 | /* AK: following checks seem to be broken on modern chipsets. FIXME */ |
637 | 636 | ||
638 | if (reason & 0x80) | 637 | if (reason & 0x80) |
639 | mem_parity_error(reason, regs); | 638 | mem_parity_error(reason, regs); |
640 | if (reason & 0x40) | 639 | if (reason & 0x40) |
641 | io_check_error(reason, regs); | 640 | io_check_error(reason, regs); |
642 | } | 641 | } |
643 | 642 | ||
644 | /* runs on IST stack. */ | 643 | /* runs on IST stack. */ |
645 | asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) | 644 | asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) |
646 | { | 645 | { |
647 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { | 646 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { |
648 | return; | 647 | return; |
649 | } | 648 | } |
650 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); | 649 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); |
651 | return; | 650 | return; |
652 | } | 651 | } |
653 | 652 | ||
654 | /* Help handler running on IST stack to switch back to user stack | 653 | /* Help handler running on IST stack to switch back to user stack |
655 | for scheduling or signal handling. The actual stack switch is done in | 654 | for scheduling or signal handling. The actual stack switch is done in |
656 | entry.S */ | 655 | entry.S */ |
657 | asmlinkage struct pt_regs *sync_regs(struct pt_regs *eregs) | 656 | asmlinkage struct pt_regs *sync_regs(struct pt_regs *eregs) |
658 | { | 657 | { |
659 | struct pt_regs *regs = eregs; | 658 | struct pt_regs *regs = eregs; |
660 | /* Did already sync */ | 659 | /* Did already sync */ |
661 | if (eregs == (struct pt_regs *)eregs->rsp) | 660 | if (eregs == (struct pt_regs *)eregs->rsp) |
662 | ; | 661 | ; |
663 | /* Exception from user space */ | 662 | /* Exception from user space */ |
664 | else if (user_mode(eregs)) | 663 | else if (user_mode(eregs)) |
665 | regs = ((struct pt_regs *)current->thread.rsp0) - 1; | 664 | regs = ((struct pt_regs *)current->thread.rsp0) - 1; |
666 | /* Exception from kernel and interrupts are enabled. Move to | 665 | /* Exception from kernel and interrupts are enabled. Move to |
667 | kernel process stack. */ | 666 | kernel process stack. */ |
668 | else if (eregs->eflags & X86_EFLAGS_IF) | 667 | else if (eregs->eflags & X86_EFLAGS_IF) |
669 | regs = (struct pt_regs *)(eregs->rsp -= sizeof(struct pt_regs)); | 668 | regs = (struct pt_regs *)(eregs->rsp -= sizeof(struct pt_regs)); |
670 | if (eregs != regs) | 669 | if (eregs != regs) |
671 | *regs = *eregs; | 670 | *regs = *eregs; |
672 | return regs; | 671 | return regs; |
673 | } | 672 | } |
674 | 673 | ||
675 | /* runs on IST stack. */ | 674 | /* runs on IST stack. */ |
676 | asmlinkage void __kprobes do_debug(struct pt_regs * regs, | 675 | asmlinkage void __kprobes do_debug(struct pt_regs * regs, |
677 | unsigned long error_code) | 676 | unsigned long error_code) |
678 | { | 677 | { |
679 | unsigned long condition; | 678 | unsigned long condition; |
680 | struct task_struct *tsk = current; | 679 | struct task_struct *tsk = current; |
681 | siginfo_t info; | 680 | siginfo_t info; |
682 | 681 | ||
683 | get_debugreg(condition, 6); | 682 | get_debugreg(condition, 6); |
684 | 683 | ||
685 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 684 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, |
686 | SIGTRAP) == NOTIFY_STOP) | 685 | SIGTRAP) == NOTIFY_STOP) |
687 | return; | 686 | return; |
688 | 687 | ||
689 | conditional_sti(regs); | 688 | conditional_sti(regs); |
690 | 689 | ||
691 | /* Mask out spurious debug traps due to lazy DR7 setting */ | 690 | /* Mask out spurious debug traps due to lazy DR7 setting */ |
692 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | 691 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { |
693 | if (!tsk->thread.debugreg7) { | 692 | if (!tsk->thread.debugreg7) { |
694 | goto clear_dr7; | 693 | goto clear_dr7; |
695 | } | 694 | } |
696 | } | 695 | } |
697 | 696 | ||
698 | tsk->thread.debugreg6 = condition; | 697 | tsk->thread.debugreg6 = condition; |
699 | 698 | ||
700 | /* Mask out spurious TF errors due to lazy TF clearing */ | 699 | /* Mask out spurious TF errors due to lazy TF clearing */ |
701 | if (condition & DR_STEP) { | 700 | if (condition & DR_STEP) { |
702 | /* | 701 | /* |
703 | * The TF error should be masked out only if the current | 702 | * The TF error should be masked out only if the current |
704 | * process is not traced and if the TRAP flag has been set | 703 | * process is not traced and if the TRAP flag has been set |
705 | * previously by a tracing process (condition detected by | 704 | * previously by a tracing process (condition detected by |
706 | * the PT_DTRACE flag); remember that the i386 TRAP flag | 705 | * the PT_DTRACE flag); remember that the i386 TRAP flag |
707 | * can be modified by the process itself in user mode, | 706 | * can be modified by the process itself in user mode, |
708 | * allowing programs to debug themselves without the ptrace() | 707 | * allowing programs to debug themselves without the ptrace() |
709 | * interface. | 708 | * interface. |
710 | */ | 709 | */ |
711 | if (!user_mode(regs)) | 710 | if (!user_mode(regs)) |
712 | goto clear_TF_reenable; | 711 | goto clear_TF_reenable; |
713 | /* | 712 | /* |
714 | * Was the TF flag set by a debugger? If so, clear it now, | 713 | * Was the TF flag set by a debugger? If so, clear it now, |
715 | * so that register information is correct. | 714 | * so that register information is correct. |
716 | */ | 715 | */ |
717 | if (tsk->ptrace & PT_DTRACE) { | 716 | if (tsk->ptrace & PT_DTRACE) { |
718 | regs->eflags &= ~TF_MASK; | 717 | regs->eflags &= ~TF_MASK; |
719 | tsk->ptrace &= ~PT_DTRACE; | 718 | tsk->ptrace &= ~PT_DTRACE; |
720 | } | 719 | } |
721 | } | 720 | } |
722 | 721 | ||
723 | /* Ok, finally something we can handle */ | 722 | /* Ok, finally something we can handle */ |
724 | tsk->thread.trap_no = 1; | 723 | tsk->thread.trap_no = 1; |
725 | tsk->thread.error_code = error_code; | 724 | tsk->thread.error_code = error_code; |
726 | info.si_signo = SIGTRAP; | 725 | info.si_signo = SIGTRAP; |
727 | info.si_errno = 0; | 726 | info.si_errno = 0; |
728 | info.si_code = TRAP_BRKPT; | 727 | info.si_code = TRAP_BRKPT; |
729 | info.si_addr = user_mode(regs) ? (void __user *)regs->rip : NULL; | 728 | info.si_addr = user_mode(regs) ? (void __user *)regs->rip : NULL; |
730 | force_sig_info(SIGTRAP, &info, tsk); | 729 | force_sig_info(SIGTRAP, &info, tsk); |
731 | 730 | ||
732 | clear_dr7: | 731 | clear_dr7: |
733 | set_debugreg(0UL, 7); | 732 | set_debugreg(0UL, 7); |
734 | return; | 733 | return; |
735 | 734 | ||
736 | clear_TF_reenable: | 735 | clear_TF_reenable: |
737 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | 736 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); |
738 | regs->eflags &= ~TF_MASK; | 737 | regs->eflags &= ~TF_MASK; |
739 | } | 738 | } |
740 | 739 | ||
741 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | 740 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) |
742 | { | 741 | { |
743 | const struct exception_table_entry *fixup; | 742 | const struct exception_table_entry *fixup; |
744 | fixup = search_exception_tables(regs->rip); | 743 | fixup = search_exception_tables(regs->rip); |
745 | if (fixup) { | 744 | if (fixup) { |
746 | regs->rip = fixup->fixup; | 745 | regs->rip = fixup->fixup; |
747 | return 1; | 746 | return 1; |
748 | } | 747 | } |
749 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); | 748 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); |
750 | /* Illegal floating point operation in the kernel */ | 749 | /* Illegal floating point operation in the kernel */ |
751 | current->thread.trap_no = trapnr; | 750 | current->thread.trap_no = trapnr; |
752 | die(str, regs, 0); | 751 | die(str, regs, 0); |
753 | return 0; | 752 | return 0; |
754 | } | 753 | } |
755 | 754 | ||
756 | /* | 755 | /* |
757 | * Note that we play around with the 'TS' bit in an attempt to get | 756 | * Note that we play around with the 'TS' bit in an attempt to get |
758 | * the correct behaviour even in the presence of the asynchronous | 757 | * the correct behaviour even in the presence of the asynchronous |
759 | * IRQ13 behaviour | 758 | * IRQ13 behaviour |
760 | */ | 759 | */ |
761 | asmlinkage void do_coprocessor_error(struct pt_regs *regs) | 760 | asmlinkage void do_coprocessor_error(struct pt_regs *regs) |
762 | { | 761 | { |
763 | void __user *rip = (void __user *)(regs->rip); | 762 | void __user *rip = (void __user *)(regs->rip); |
764 | struct task_struct * task; | 763 | struct task_struct * task; |
765 | siginfo_t info; | 764 | siginfo_t info; |
766 | unsigned short cwd, swd; | 765 | unsigned short cwd, swd; |
767 | 766 | ||
768 | conditional_sti(regs); | 767 | conditional_sti(regs); |
769 | if (!user_mode(regs) && | 768 | if (!user_mode(regs) && |
770 | kernel_math_error(regs, "kernel x87 math error", 16)) | 769 | kernel_math_error(regs, "kernel x87 math error", 16)) |
771 | return; | 770 | return; |
772 | 771 | ||
773 | /* | 772 | /* |
774 | * Save the info for the exception handler and clear the error. | 773 | * Save the info for the exception handler and clear the error. |
775 | */ | 774 | */ |
776 | task = current; | 775 | task = current; |
777 | save_init_fpu(task); | 776 | save_init_fpu(task); |
778 | task->thread.trap_no = 16; | 777 | task->thread.trap_no = 16; |
779 | task->thread.error_code = 0; | 778 | task->thread.error_code = 0; |
780 | info.si_signo = SIGFPE; | 779 | info.si_signo = SIGFPE; |
781 | info.si_errno = 0; | 780 | info.si_errno = 0; |
782 | info.si_code = __SI_FAULT; | 781 | info.si_code = __SI_FAULT; |
783 | info.si_addr = rip; | 782 | info.si_addr = rip; |
784 | /* | 783 | /* |
785 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | 784 | * (~cwd & swd) will mask out exceptions that are not set to unmasked |
786 | * status. 0x3f is the exception bits in these regs, 0x200 is the | 785 | * status. 0x3f is the exception bits in these regs, 0x200 is the |
787 | * C1 reg you need in case of a stack fault, 0x040 is the stack | 786 | * C1 reg you need in case of a stack fault, 0x040 is the stack |
788 | * fault bit. We should only be taking one exception at a time, | 787 | * fault bit. We should only be taking one exception at a time, |
789 | * so if this combination doesn't produce any single exception, | 788 | * so if this combination doesn't produce any single exception, |
790 | * then we have a bad program that isn't synchronizing its FPU usage | 789 | * then we have a bad program that isn't synchronizing its FPU usage |
791 | * and it will suffer the consequences since we won't be able to | 790 | * and it will suffer the consequences since we won't be able to |
792 | * fully reproduce the context of the exception | 791 | * fully reproduce the context of the exception |
793 | */ | 792 | */ |
794 | cwd = get_fpu_cwd(task); | 793 | cwd = get_fpu_cwd(task); |
795 | swd = get_fpu_swd(task); | 794 | swd = get_fpu_swd(task); |
796 | switch (swd & ~cwd & 0x3f) { | 795 | switch (swd & ~cwd & 0x3f) { |
797 | case 0x000: | 796 | case 0x000: |
798 | default: | 797 | default: |
799 | break; | 798 | break; |
800 | case 0x001: /* Invalid Op */ | 799 | case 0x001: /* Invalid Op */ |
801 | /* | 800 | /* |
802 | * swd & 0x240 == 0x040: Stack Underflow | 801 | * swd & 0x240 == 0x040: Stack Underflow |
803 | * swd & 0x240 == 0x240: Stack Overflow | 802 | * swd & 0x240 == 0x240: Stack Overflow |
804 | * User must clear the SF bit (0x40) if set | 803 | * User must clear the SF bit (0x40) if set |
805 | */ | 804 | */ |
806 | info.si_code = FPE_FLTINV; | 805 | info.si_code = FPE_FLTINV; |
807 | break; | 806 | break; |
808 | case 0x002: /* Denormalize */ | 807 | case 0x002: /* Denormalize */ |
809 | case 0x010: /* Underflow */ | 808 | case 0x010: /* Underflow */ |
810 | info.si_code = FPE_FLTUND; | 809 | info.si_code = FPE_FLTUND; |
811 | break; | 810 | break; |
812 | case 0x004: /* Zero Divide */ | 811 | case 0x004: /* Zero Divide */ |
813 | info.si_code = FPE_FLTDIV; | 812 | info.si_code = FPE_FLTDIV; |
814 | break; | 813 | break; |
815 | case 0x008: /* Overflow */ | 814 | case 0x008: /* Overflow */ |
816 | info.si_code = FPE_FLTOVF; | 815 | info.si_code = FPE_FLTOVF; |
817 | break; | 816 | break; |
818 | case 0x020: /* Precision */ | 817 | case 0x020: /* Precision */ |
819 | info.si_code = FPE_FLTRES; | 818 | info.si_code = FPE_FLTRES; |
820 | break; | 819 | break; |
821 | } | 820 | } |
822 | force_sig_info(SIGFPE, &info, task); | 821 | force_sig_info(SIGFPE, &info, task); |
823 | } | 822 | } |
824 | 823 | ||
825 | asmlinkage void bad_intr(void) | 824 | asmlinkage void bad_intr(void) |
826 | { | 825 | { |
827 | printk("bad interrupt"); | 826 | printk("bad interrupt"); |
828 | } | 827 | } |
829 | 828 | ||
830 | asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | 829 | asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) |
831 | { | 830 | { |
832 | void __user *rip = (void __user *)(regs->rip); | 831 | void __user *rip = (void __user *)(regs->rip); |
833 | struct task_struct * task; | 832 | struct task_struct * task; |
834 | siginfo_t info; | 833 | siginfo_t info; |
835 | unsigned short mxcsr; | 834 | unsigned short mxcsr; |
836 | 835 | ||
837 | conditional_sti(regs); | 836 | conditional_sti(regs); |
838 | if (!user_mode(regs) && | 837 | if (!user_mode(regs) && |
839 | kernel_math_error(regs, "kernel simd math error", 19)) | 838 | kernel_math_error(regs, "kernel simd math error", 19)) |
840 | return; | 839 | return; |
841 | 840 | ||
842 | /* | 841 | /* |
843 | * Save the info for the exception handler and clear the error. | 842 | * Save the info for the exception handler and clear the error. |
844 | */ | 843 | */ |
845 | task = current; | 844 | task = current; |
846 | save_init_fpu(task); | 845 | save_init_fpu(task); |
847 | task->thread.trap_no = 19; | 846 | task->thread.trap_no = 19; |
848 | task->thread.error_code = 0; | 847 | task->thread.error_code = 0; |
849 | info.si_signo = SIGFPE; | 848 | info.si_signo = SIGFPE; |
850 | info.si_errno = 0; | 849 | info.si_errno = 0; |
851 | info.si_code = __SI_FAULT; | 850 | info.si_code = __SI_FAULT; |
852 | info.si_addr = rip; | 851 | info.si_addr = rip; |
853 | /* | 852 | /* |
854 | * The SIMD FPU exceptions are handled a little differently, as there | 853 | * The SIMD FPU exceptions are handled a little differently, as there |
855 | * is only a single status/control register. Thus, to determine which | 854 | * is only a single status/control register. Thus, to determine which |
856 | * unmasked exception was caught we must mask the exception mask bits | 855 | * unmasked exception was caught we must mask the exception mask bits |
857 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | 856 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. |
858 | */ | 857 | */ |
859 | mxcsr = get_fpu_mxcsr(task); | 858 | mxcsr = get_fpu_mxcsr(task); |
860 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { | 859 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { |
861 | case 0x000: | 860 | case 0x000: |
862 | default: | 861 | default: |
863 | break; | 862 | break; |
864 | case 0x001: /* Invalid Op */ | 863 | case 0x001: /* Invalid Op */ |
865 | info.si_code = FPE_FLTINV; | 864 | info.si_code = FPE_FLTINV; |
866 | break; | 865 | break; |
867 | case 0x002: /* Denormalize */ | 866 | case 0x002: /* Denormalize */ |
868 | case 0x010: /* Underflow */ | 867 | case 0x010: /* Underflow */ |
869 | info.si_code = FPE_FLTUND; | 868 | info.si_code = FPE_FLTUND; |
870 | break; | 869 | break; |
871 | case 0x004: /* Zero Divide */ | 870 | case 0x004: /* Zero Divide */ |
872 | info.si_code = FPE_FLTDIV; | 871 | info.si_code = FPE_FLTDIV; |
873 | break; | 872 | break; |
874 | case 0x008: /* Overflow */ | 873 | case 0x008: /* Overflow */ |
875 | info.si_code = FPE_FLTOVF; | 874 | info.si_code = FPE_FLTOVF; |
876 | break; | 875 | break; |
877 | case 0x020: /* Precision */ | 876 | case 0x020: /* Precision */ |
878 | info.si_code = FPE_FLTRES; | 877 | info.si_code = FPE_FLTRES; |
879 | break; | 878 | break; |
880 | } | 879 | } |
881 | force_sig_info(SIGFPE, &info, task); | 880 | force_sig_info(SIGFPE, &info, task); |
882 | } | 881 | } |
883 | 882 | ||
884 | asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs) | 883 | asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs) |
885 | { | 884 | { |
886 | } | 885 | } |
887 | 886 | ||
888 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) | 887 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) |
889 | { | 888 | { |
890 | } | 889 | } |
891 | 890 | ||
892 | asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) | 891 | asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) |
893 | { | 892 | { |
894 | } | 893 | } |
895 | 894 | ||
896 | /* | 895 | /* |
897 | * 'math_state_restore()' saves the current math information in the | 896 | * 'math_state_restore()' saves the current math information in the |
898 | * old math state array, and gets the new ones from the current task | 897 | * old math state array, and gets the new ones from the current task |
899 | * | 898 | * |
900 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. | 899 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. |
901 | * Don't touch unless you *really* know how it works. | 900 | * Don't touch unless you *really* know how it works. |
902 | */ | 901 | */ |
903 | asmlinkage void math_state_restore(void) | 902 | asmlinkage void math_state_restore(void) |
904 | { | 903 | { |
905 | struct task_struct *me = current; | 904 | struct task_struct *me = current; |
906 | clts(); /* Allow maths ops (or we recurse) */ | 905 | clts(); /* Allow maths ops (or we recurse) */ |
907 | 906 | ||
908 | if (!used_math()) | 907 | if (!used_math()) |
909 | init_fpu(me); | 908 | init_fpu(me); |
910 | restore_fpu_checking(&me->thread.i387.fxsave); | 909 | restore_fpu_checking(&me->thread.i387.fxsave); |
911 | me->thread_info->status |= TS_USEDFPU; | 910 | me->thread_info->status |= TS_USEDFPU; |
912 | } | 911 | } |
913 | 912 | ||
914 | void do_call_debug(struct pt_regs *regs) | 913 | void do_call_debug(struct pt_regs *regs) |
915 | { | 914 | { |
916 | notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT); | 915 | notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT); |
917 | } | 916 | } |
918 | 917 | ||
919 | void __init trap_init(void) | 918 | void __init trap_init(void) |
920 | { | 919 | { |
921 | set_intr_gate(0,÷_error); | 920 | set_intr_gate(0,÷_error); |
922 | set_intr_gate_ist(1,&debug,DEBUG_STACK); | 921 | set_intr_gate_ist(1,&debug,DEBUG_STACK); |
923 | set_intr_gate_ist(2,&nmi,NMI_STACK); | 922 | set_intr_gate_ist(2,&nmi,NMI_STACK); |
924 | set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */ | 923 | set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */ |
925 | set_system_gate(4,&overflow); /* int4 can be called from all */ | 924 | set_system_gate(4,&overflow); /* int4 can be called from all */ |
926 | set_intr_gate(5,&bounds); | 925 | set_intr_gate(5,&bounds); |
927 | set_intr_gate(6,&invalid_op); | 926 | set_intr_gate(6,&invalid_op); |
928 | set_intr_gate(7,&device_not_available); | 927 | set_intr_gate(7,&device_not_available); |
929 | set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK); | 928 | set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK); |
930 | set_intr_gate(9,&coprocessor_segment_overrun); | 929 | set_intr_gate(9,&coprocessor_segment_overrun); |
931 | set_intr_gate(10,&invalid_TSS); | 930 | set_intr_gate(10,&invalid_TSS); |
932 | set_intr_gate(11,&segment_not_present); | 931 | set_intr_gate(11,&segment_not_present); |
933 | set_intr_gate_ist(12,&stack_segment,STACKFAULT_STACK); | 932 | set_intr_gate_ist(12,&stack_segment,STACKFAULT_STACK); |
934 | set_intr_gate(13,&general_protection); | 933 | set_intr_gate(13,&general_protection); |
935 | set_intr_gate(14,&page_fault); | 934 | set_intr_gate(14,&page_fault); |
936 | set_intr_gate(15,&spurious_interrupt_bug); | 935 | set_intr_gate(15,&spurious_interrupt_bug); |
937 | set_intr_gate(16,&coprocessor_error); | 936 | set_intr_gate(16,&coprocessor_error); |
938 | set_intr_gate(17,&alignment_check); | 937 | set_intr_gate(17,&alignment_check); |
939 | #ifdef CONFIG_X86_MCE | 938 | #ifdef CONFIG_X86_MCE |
940 | set_intr_gate_ist(18,&machine_check, MCE_STACK); | 939 | set_intr_gate_ist(18,&machine_check, MCE_STACK); |
941 | #endif | 940 | #endif |
942 | set_intr_gate(19,&simd_coprocessor_error); | 941 | set_intr_gate(19,&simd_coprocessor_error); |
943 | 942 | ||
944 | #ifdef CONFIG_IA32_EMULATION | 943 | #ifdef CONFIG_IA32_EMULATION |
945 | set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); | 944 | set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); |
946 | #endif | 945 | #endif |
947 | 946 | ||
948 | set_intr_gate(KDB_VECTOR, call_debug); | 947 | set_intr_gate(KDB_VECTOR, call_debug); |
949 | 948 | ||
950 | /* | 949 | /* |
951 | * Should be a barrier for any external CPU state. | 950 | * Should be a barrier for any external CPU state. |
952 | */ | 951 | */ |
953 | cpu_init(); | 952 | cpu_init(); |
954 | } | 953 | } |
955 | 954 | ||
956 | 955 | ||
957 | /* Actual parsing is done early in setup.c. */ | 956 | /* Actual parsing is done early in setup.c. */ |
958 | static int __init oops_dummy(char *s) | 957 | static int __init oops_dummy(char *s) |
959 | { | 958 | { |
960 | panic_on_oops = 1; | 959 | panic_on_oops = 1; |
961 | return -1; | 960 | return -1; |
962 | } | 961 | } |
963 | __setup("oops=", oops_dummy); | 962 | __setup("oops=", oops_dummy); |
964 | 963 | ||
965 | static int __init kstack_setup(char *s) | 964 | static int __init kstack_setup(char *s) |
966 | { | 965 | { |
967 | kstack_depth_to_print = simple_strtoul(s,NULL,0); | 966 | kstack_depth_to_print = simple_strtoul(s,NULL,0); |
968 | return 0; | 967 | return 0; |
969 | } | 968 | } |
970 | __setup("kstack=", kstack_setup); | 969 | __setup("kstack=", kstack_setup); |
971 | 970 | ||
972 | 971 |
arch/x86_64/kernel/x8664_ksyms.c
1 | #include <linux/config.h> | 1 | #include <linux/config.h> |
2 | #include <linux/module.h> | 2 | #include <linux/module.h> |
3 | #include <linux/smp.h> | 3 | #include <linux/smp.h> |
4 | #include <linux/user.h> | 4 | #include <linux/user.h> |
5 | #include <linux/sched.h> | 5 | #include <linux/sched.h> |
6 | #include <linux/in6.h> | 6 | #include <linux/in6.h> |
7 | #include <linux/interrupt.h> | 7 | #include <linux/interrupt.h> |
8 | #include <linux/smp_lock.h> | 8 | #include <linux/smp_lock.h> |
9 | #include <linux/pm.h> | 9 | #include <linux/pm.h> |
10 | #include <linux/pci.h> | 10 | #include <linux/pci.h> |
11 | #include <linux/apm_bios.h> | 11 | #include <linux/apm_bios.h> |
12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
13 | #include <linux/string.h> | 13 | #include <linux/string.h> |
14 | #include <linux/syscalls.h> | 14 | #include <linux/syscalls.h> |
15 | #include <linux/tty.h> | 15 | #include <linux/tty.h> |
16 | 16 | ||
17 | #include <asm/semaphore.h> | 17 | #include <asm/semaphore.h> |
18 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
19 | #include <asm/i387.h> | 19 | #include <asm/i387.h> |
20 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
21 | #include <asm/checksum.h> | 21 | #include <asm/checksum.h> |
22 | #include <asm/io.h> | 22 | #include <asm/io.h> |
23 | #include <asm/delay.h> | 23 | #include <asm/delay.h> |
24 | #include <asm/irq.h> | 24 | #include <asm/irq.h> |
25 | #include <asm/mmx.h> | 25 | #include <asm/mmx.h> |
26 | #include <asm/desc.h> | 26 | #include <asm/desc.h> |
27 | #include <asm/pgtable.h> | 27 | #include <asm/pgtable.h> |
28 | #include <asm/pgalloc.h> | 28 | #include <asm/pgalloc.h> |
29 | #include <asm/nmi.h> | 29 | #include <asm/nmi.h> |
30 | #include <asm/kdebug.h> | 30 | #include <asm/kdebug.h> |
31 | #include <asm/unistd.h> | 31 | #include <asm/unistd.h> |
32 | #include <asm/tlbflush.h> | 32 | #include <asm/tlbflush.h> |
33 | #include <asm/kdebug.h> | 33 | #include <asm/kdebug.h> |
34 | 34 | ||
35 | extern spinlock_t rtc_lock; | 35 | extern spinlock_t rtc_lock; |
36 | 36 | ||
37 | #ifdef CONFIG_SMP | 37 | #ifdef CONFIG_SMP |
38 | extern void __write_lock_failed(rwlock_t *rw); | 38 | extern void __write_lock_failed(rwlock_t *rw); |
39 | extern void __read_lock_failed(rwlock_t *rw); | 39 | extern void __read_lock_failed(rwlock_t *rw); |
40 | #endif | 40 | #endif |
41 | 41 | ||
42 | #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) | 42 | #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) |
43 | extern struct drive_info_struct drive_info; | 43 | extern struct drive_info_struct drive_info; |
44 | EXPORT_SYMBOL(drive_info); | 44 | EXPORT_SYMBOL(drive_info); |
45 | #endif | 45 | #endif |
46 | 46 | ||
47 | extern unsigned long get_cmos_time(void); | 47 | extern unsigned long get_cmos_time(void); |
48 | 48 | ||
49 | /* platform dependent support */ | 49 | /* platform dependent support */ |
50 | EXPORT_SYMBOL(boot_cpu_data); | 50 | EXPORT_SYMBOL(boot_cpu_data); |
51 | //EXPORT_SYMBOL(dump_fpu); | 51 | //EXPORT_SYMBOL(dump_fpu); |
52 | EXPORT_SYMBOL(__ioremap); | 52 | EXPORT_SYMBOL(__ioremap); |
53 | EXPORT_SYMBOL(ioremap_nocache); | 53 | EXPORT_SYMBOL(ioremap_nocache); |
54 | EXPORT_SYMBOL(iounmap); | 54 | EXPORT_SYMBOL(iounmap); |
55 | EXPORT_SYMBOL(kernel_thread); | 55 | EXPORT_SYMBOL(kernel_thread); |
56 | EXPORT_SYMBOL(pm_idle); | 56 | EXPORT_SYMBOL(pm_idle); |
57 | EXPORT_SYMBOL(pm_power_off); | 57 | EXPORT_SYMBOL(pm_power_off); |
58 | EXPORT_SYMBOL(get_cmos_time); | 58 | EXPORT_SYMBOL(get_cmos_time); |
59 | 59 | ||
60 | EXPORT_SYMBOL(__down_failed); | 60 | EXPORT_SYMBOL(__down_failed); |
61 | EXPORT_SYMBOL(__down_failed_interruptible); | 61 | EXPORT_SYMBOL(__down_failed_interruptible); |
62 | EXPORT_SYMBOL(__down_failed_trylock); | 62 | EXPORT_SYMBOL(__down_failed_trylock); |
63 | EXPORT_SYMBOL(__up_wakeup); | 63 | EXPORT_SYMBOL(__up_wakeup); |
64 | /* Networking helper routines. */ | 64 | /* Networking helper routines. */ |
65 | EXPORT_SYMBOL(csum_partial_copy_nocheck); | 65 | EXPORT_SYMBOL(csum_partial_copy_nocheck); |
66 | EXPORT_SYMBOL(ip_compute_csum); | 66 | EXPORT_SYMBOL(ip_compute_csum); |
67 | /* Delay loops */ | 67 | /* Delay loops */ |
68 | EXPORT_SYMBOL(__udelay); | 68 | EXPORT_SYMBOL(__udelay); |
69 | EXPORT_SYMBOL(__ndelay); | 69 | EXPORT_SYMBOL(__ndelay); |
70 | EXPORT_SYMBOL(__delay); | 70 | EXPORT_SYMBOL(__delay); |
71 | EXPORT_SYMBOL(__const_udelay); | 71 | EXPORT_SYMBOL(__const_udelay); |
72 | 72 | ||
73 | EXPORT_SYMBOL(__get_user_1); | 73 | EXPORT_SYMBOL(__get_user_1); |
74 | EXPORT_SYMBOL(__get_user_2); | 74 | EXPORT_SYMBOL(__get_user_2); |
75 | EXPORT_SYMBOL(__get_user_4); | 75 | EXPORT_SYMBOL(__get_user_4); |
76 | EXPORT_SYMBOL(__get_user_8); | 76 | EXPORT_SYMBOL(__get_user_8); |
77 | EXPORT_SYMBOL(__put_user_1); | 77 | EXPORT_SYMBOL(__put_user_1); |
78 | EXPORT_SYMBOL(__put_user_2); | 78 | EXPORT_SYMBOL(__put_user_2); |
79 | EXPORT_SYMBOL(__put_user_4); | 79 | EXPORT_SYMBOL(__put_user_4); |
80 | EXPORT_SYMBOL(__put_user_8); | 80 | EXPORT_SYMBOL(__put_user_8); |
81 | 81 | ||
82 | EXPORT_SYMBOL(strncpy_from_user); | 82 | EXPORT_SYMBOL(strncpy_from_user); |
83 | EXPORT_SYMBOL(__strncpy_from_user); | 83 | EXPORT_SYMBOL(__strncpy_from_user); |
84 | EXPORT_SYMBOL(clear_user); | 84 | EXPORT_SYMBOL(clear_user); |
85 | EXPORT_SYMBOL(__clear_user); | 85 | EXPORT_SYMBOL(__clear_user); |
86 | EXPORT_SYMBOL(copy_user_generic); | 86 | EXPORT_SYMBOL(copy_user_generic); |
87 | EXPORT_SYMBOL(copy_from_user); | 87 | EXPORT_SYMBOL(copy_from_user); |
88 | EXPORT_SYMBOL(copy_to_user); | 88 | EXPORT_SYMBOL(copy_to_user); |
89 | EXPORT_SYMBOL(copy_in_user); | 89 | EXPORT_SYMBOL(copy_in_user); |
90 | EXPORT_SYMBOL(strnlen_user); | 90 | EXPORT_SYMBOL(strnlen_user); |
91 | 91 | ||
92 | #ifdef CONFIG_PCI | 92 | #ifdef CONFIG_PCI |
93 | EXPORT_SYMBOL(pci_mem_start); | 93 | EXPORT_SYMBOL(pci_mem_start); |
94 | #endif | 94 | #endif |
95 | 95 | ||
96 | EXPORT_SYMBOL(copy_page); | 96 | EXPORT_SYMBOL(copy_page); |
97 | EXPORT_SYMBOL(clear_page); | 97 | EXPORT_SYMBOL(clear_page); |
98 | 98 | ||
99 | EXPORT_SYMBOL(cpu_pda); | 99 | EXPORT_SYMBOL(_cpu_pda); |
100 | #ifdef CONFIG_SMP | 100 | #ifdef CONFIG_SMP |
101 | EXPORT_SYMBOL(cpu_data); | 101 | EXPORT_SYMBOL(cpu_data); |
102 | EXPORT_SYMBOL(__write_lock_failed); | 102 | EXPORT_SYMBOL(__write_lock_failed); |
103 | EXPORT_SYMBOL(__read_lock_failed); | 103 | EXPORT_SYMBOL(__read_lock_failed); |
104 | 104 | ||
105 | EXPORT_SYMBOL(smp_call_function); | 105 | EXPORT_SYMBOL(smp_call_function); |
106 | EXPORT_SYMBOL(cpu_callout_map); | 106 | EXPORT_SYMBOL(cpu_callout_map); |
107 | #endif | 107 | #endif |
108 | 108 | ||
109 | #ifdef CONFIG_VT | 109 | #ifdef CONFIG_VT |
110 | EXPORT_SYMBOL(screen_info); | 110 | EXPORT_SYMBOL(screen_info); |
111 | #endif | 111 | #endif |
112 | 112 | ||
113 | EXPORT_SYMBOL(get_wchan); | 113 | EXPORT_SYMBOL(get_wchan); |
114 | 114 | ||
115 | EXPORT_SYMBOL(rtc_lock); | 115 | EXPORT_SYMBOL(rtc_lock); |
116 | 116 | ||
117 | EXPORT_SYMBOL_GPL(set_nmi_callback); | 117 | EXPORT_SYMBOL_GPL(set_nmi_callback); |
118 | EXPORT_SYMBOL_GPL(unset_nmi_callback); | 118 | EXPORT_SYMBOL_GPL(unset_nmi_callback); |
119 | 119 | ||
120 | /* Export string functions. We normally rely on gcc builtin for most of these, | 120 | /* Export string functions. We normally rely on gcc builtin for most of these, |
121 | but gcc sometimes decides not to inline them. */ | 121 | but gcc sometimes decides not to inline them. */ |
122 | #undef memcpy | 122 | #undef memcpy |
123 | #undef memset | 123 | #undef memset |
124 | #undef memmove | 124 | #undef memmove |
125 | #undef strlen | 125 | #undef strlen |
126 | 126 | ||
127 | extern void * memset(void *,int,__kernel_size_t); | 127 | extern void * memset(void *,int,__kernel_size_t); |
128 | extern size_t strlen(const char *); | 128 | extern size_t strlen(const char *); |
129 | extern void * memmove(void * dest,const void *src,size_t count); | 129 | extern void * memmove(void * dest,const void *src,size_t count); |
130 | extern void * memcpy(void *,const void *,__kernel_size_t); | 130 | extern void * memcpy(void *,const void *,__kernel_size_t); |
131 | extern void * __memcpy(void *,const void *,__kernel_size_t); | 131 | extern void * __memcpy(void *,const void *,__kernel_size_t); |
132 | 132 | ||
133 | EXPORT_SYMBOL(memset); | 133 | EXPORT_SYMBOL(memset); |
134 | EXPORT_SYMBOL(strlen); | 134 | EXPORT_SYMBOL(strlen); |
135 | EXPORT_SYMBOL(memmove); | 135 | EXPORT_SYMBOL(memmove); |
136 | EXPORT_SYMBOL(memcpy); | 136 | EXPORT_SYMBOL(memcpy); |
137 | EXPORT_SYMBOL(__memcpy); | 137 | EXPORT_SYMBOL(__memcpy); |
138 | 138 | ||
139 | #ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM | 139 | #ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM |
140 | /* prototypes are wrong, these are assembly with custom calling functions */ | 140 | /* prototypes are wrong, these are assembly with custom calling functions */ |
141 | extern void rwsem_down_read_failed_thunk(void); | 141 | extern void rwsem_down_read_failed_thunk(void); |
142 | extern void rwsem_wake_thunk(void); | 142 | extern void rwsem_wake_thunk(void); |
143 | extern void rwsem_downgrade_thunk(void); | 143 | extern void rwsem_downgrade_thunk(void); |
144 | extern void rwsem_down_write_failed_thunk(void); | 144 | extern void rwsem_down_write_failed_thunk(void); |
145 | EXPORT_SYMBOL(rwsem_down_read_failed_thunk); | 145 | EXPORT_SYMBOL(rwsem_down_read_failed_thunk); |
146 | EXPORT_SYMBOL(rwsem_wake_thunk); | 146 | EXPORT_SYMBOL(rwsem_wake_thunk); |
147 | EXPORT_SYMBOL(rwsem_downgrade_thunk); | 147 | EXPORT_SYMBOL(rwsem_downgrade_thunk); |
148 | EXPORT_SYMBOL(rwsem_down_write_failed_thunk); | 148 | EXPORT_SYMBOL(rwsem_down_write_failed_thunk); |
149 | #endif | 149 | #endif |
150 | 150 | ||
151 | EXPORT_SYMBOL(empty_zero_page); | 151 | EXPORT_SYMBOL(empty_zero_page); |
152 | 152 | ||
153 | EXPORT_SYMBOL(die_chain); | 153 | EXPORT_SYMBOL(die_chain); |
154 | EXPORT_SYMBOL(register_die_notifier); | 154 | EXPORT_SYMBOL(register_die_notifier); |
155 | 155 | ||
156 | #ifdef CONFIG_SMP | 156 | #ifdef CONFIG_SMP |
157 | EXPORT_SYMBOL(cpu_sibling_map); | 157 | EXPORT_SYMBOL(cpu_sibling_map); |
158 | EXPORT_SYMBOL(smp_num_siblings); | 158 | EXPORT_SYMBOL(smp_num_siblings); |
159 | #endif | 159 | #endif |
160 | 160 | ||
161 | extern void do_softirq_thunk(void); | 161 | extern void do_softirq_thunk(void); |
162 | EXPORT_SYMBOL(do_softirq_thunk); | 162 | EXPORT_SYMBOL(do_softirq_thunk); |
163 | 163 | ||
164 | #ifdef CONFIG_BUG | 164 | #ifdef CONFIG_BUG |
165 | EXPORT_SYMBOL(out_of_line_bug); | 165 | EXPORT_SYMBOL(out_of_line_bug); |
166 | #endif | 166 | #endif |
167 | 167 | ||
168 | EXPORT_SYMBOL(init_level4_pgt); | 168 | EXPORT_SYMBOL(init_level4_pgt); |
169 | 169 | ||
170 | extern unsigned long __supported_pte_mask; | 170 | extern unsigned long __supported_pte_mask; |
171 | EXPORT_SYMBOL(__supported_pte_mask); | 171 | EXPORT_SYMBOL(__supported_pte_mask); |
172 | 172 | ||
173 | #ifdef CONFIG_SMP | 173 | #ifdef CONFIG_SMP |
174 | EXPORT_SYMBOL(flush_tlb_page); | 174 | EXPORT_SYMBOL(flush_tlb_page); |
175 | #endif | 175 | #endif |
176 | 176 | ||
177 | EXPORT_SYMBOL(cpu_khz); | 177 | EXPORT_SYMBOL(cpu_khz); |
178 | 178 | ||
179 | EXPORT_SYMBOL(load_gs_index); | 179 | EXPORT_SYMBOL(load_gs_index); |
180 | 180 | ||
181 | 181 |
arch/x86_64/mm/numa.c
1 | /* | 1 | /* |
2 | * Generic VM initialization for x86-64 NUMA setups. | 2 | * Generic VM initialization for x86-64 NUMA setups. |
3 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. | 3 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. |
4 | */ | 4 | */ |
5 | #include <linux/kernel.h> | 5 | #include <linux/kernel.h> |
6 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/bootmem.h> | 9 | #include <linux/bootmem.h> |
10 | #include <linux/mmzone.h> | 10 | #include <linux/mmzone.h> |
11 | #include <linux/ctype.h> | 11 | #include <linux/ctype.h> |
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/nodemask.h> | 13 | #include <linux/nodemask.h> |
14 | 14 | ||
15 | #include <asm/e820.h> | 15 | #include <asm/e820.h> |
16 | #include <asm/proto.h> | 16 | #include <asm/proto.h> |
17 | #include <asm/dma.h> | 17 | #include <asm/dma.h> |
18 | #include <asm/numa.h> | 18 | #include <asm/numa.h> |
19 | #include <asm/acpi.h> | 19 | #include <asm/acpi.h> |
20 | 20 | ||
21 | #ifndef Dprintk | 21 | #ifndef Dprintk |
22 | #define Dprintk(x...) | 22 | #define Dprintk(x...) |
23 | #endif | 23 | #endif |
24 | 24 | ||
25 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 25 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
26 | bootmem_data_t plat_node_bdata[MAX_NUMNODES]; | 26 | bootmem_data_t plat_node_bdata[MAX_NUMNODES]; |
27 | 27 | ||
28 | int memnode_shift; | 28 | int memnode_shift; |
29 | u8 memnodemap[NODEMAPSIZE]; | 29 | u8 memnodemap[NODEMAPSIZE]; |
30 | 30 | ||
31 | unsigned char cpu_to_node[NR_CPUS] __read_mostly = { | 31 | unsigned char cpu_to_node[NR_CPUS] __read_mostly = { |
32 | [0 ... NR_CPUS-1] = NUMA_NO_NODE | 32 | [0 ... NR_CPUS-1] = NUMA_NO_NODE |
33 | }; | 33 | }; |
34 | unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | 34 | unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { |
35 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | 35 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE |
36 | }; | 36 | }; |
37 | cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; | 37 | cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; |
38 | 38 | ||
39 | int numa_off __initdata; | 39 | int numa_off __initdata; |
40 | 40 | ||
41 | 41 | ||
42 | /* | 42 | /* |
43 | * Given a shift value, try to populate memnodemap[] | 43 | * Given a shift value, try to populate memnodemap[] |
44 | * Returns : | 44 | * Returns : |
45 | * 1 if OK | 45 | * 1 if OK |
46 | * 0 if memnodmap[] too small (of shift too small) | 46 | * 0 if memnodmap[] too small (of shift too small) |
47 | * -1 if node overlap or lost ram (shift too big) | 47 | * -1 if node overlap or lost ram (shift too big) |
48 | */ | 48 | */ |
49 | static int __init | 49 | static int __init |
50 | populate_memnodemap(const struct node *nodes, int numnodes, int shift) | 50 | populate_memnodemap(const struct node *nodes, int numnodes, int shift) |
51 | { | 51 | { |
52 | int i; | 52 | int i; |
53 | int res = -1; | 53 | int res = -1; |
54 | unsigned long addr, end; | 54 | unsigned long addr, end; |
55 | 55 | ||
56 | if (shift >= 64) | 56 | if (shift >= 64) |
57 | return -1; | 57 | return -1; |
58 | memset(memnodemap, 0xff, sizeof(memnodemap)); | 58 | memset(memnodemap, 0xff, sizeof(memnodemap)); |
59 | for (i = 0; i < numnodes; i++) { | 59 | for (i = 0; i < numnodes; i++) { |
60 | addr = nodes[i].start; | 60 | addr = nodes[i].start; |
61 | end = nodes[i].end; | 61 | end = nodes[i].end; |
62 | if (addr >= end) | 62 | if (addr >= end) |
63 | continue; | 63 | continue; |
64 | if ((end >> shift) >= NODEMAPSIZE) | 64 | if ((end >> shift) >= NODEMAPSIZE) |
65 | return 0; | 65 | return 0; |
66 | do { | 66 | do { |
67 | if (memnodemap[addr >> shift] != 0xff) | 67 | if (memnodemap[addr >> shift] != 0xff) |
68 | return -1; | 68 | return -1; |
69 | memnodemap[addr >> shift] = i; | 69 | memnodemap[addr >> shift] = i; |
70 | addr += (1UL << shift); | 70 | addr += (1UL << shift); |
71 | } while (addr < end); | 71 | } while (addr < end); |
72 | res = 1; | 72 | res = 1; |
73 | } | 73 | } |
74 | return res; | 74 | return res; |
75 | } | 75 | } |
76 | 76 | ||
77 | int __init compute_hash_shift(struct node *nodes, int numnodes) | 77 | int __init compute_hash_shift(struct node *nodes, int numnodes) |
78 | { | 78 | { |
79 | int shift = 20; | 79 | int shift = 20; |
80 | 80 | ||
81 | while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0) | 81 | while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0) |
82 | shift++; | 82 | shift++; |
83 | 83 | ||
84 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", | 84 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", |
85 | shift); | 85 | shift); |
86 | 86 | ||
87 | if (populate_memnodemap(nodes, numnodes, shift) != 1) { | 87 | if (populate_memnodemap(nodes, numnodes, shift) != 1) { |
88 | printk(KERN_INFO | 88 | printk(KERN_INFO |
89 | "Your memory is not aligned you need to rebuild your kernel " | 89 | "Your memory is not aligned you need to rebuild your kernel " |
90 | "with a bigger NODEMAPSIZE shift=%d\n", | 90 | "with a bigger NODEMAPSIZE shift=%d\n", |
91 | shift); | 91 | shift); |
92 | return -1; | 92 | return -1; |
93 | } | 93 | } |
94 | return shift; | 94 | return shift; |
95 | } | 95 | } |
96 | 96 | ||
97 | #ifdef CONFIG_SPARSEMEM | 97 | #ifdef CONFIG_SPARSEMEM |
98 | int early_pfn_to_nid(unsigned long pfn) | 98 | int early_pfn_to_nid(unsigned long pfn) |
99 | { | 99 | { |
100 | return phys_to_nid(pfn << PAGE_SHIFT); | 100 | return phys_to_nid(pfn << PAGE_SHIFT); |
101 | } | 101 | } |
102 | #endif | 102 | #endif |
103 | 103 | ||
104 | /* Initialize bootmem allocator for a node */ | 104 | /* Initialize bootmem allocator for a node */ |
105 | void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | 105 | void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
106 | { | 106 | { |
107 | unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start; | 107 | unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start; |
108 | unsigned long nodedata_phys; | 108 | unsigned long nodedata_phys; |
109 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); | 109 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); |
110 | 110 | ||
111 | start = round_up(start, ZONE_ALIGN); | 111 | start = round_up(start, ZONE_ALIGN); |
112 | 112 | ||
113 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end); | 113 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end); |
114 | 114 | ||
115 | start_pfn = start >> PAGE_SHIFT; | 115 | start_pfn = start >> PAGE_SHIFT; |
116 | end_pfn = end >> PAGE_SHIFT; | 116 | end_pfn = end >> PAGE_SHIFT; |
117 | 117 | ||
118 | nodedata_phys = find_e820_area(start, end, pgdat_size); | 118 | nodedata_phys = find_e820_area(start, end, pgdat_size); |
119 | if (nodedata_phys == -1L) | 119 | if (nodedata_phys == -1L) |
120 | panic("Cannot find memory pgdat in node %d\n", nodeid); | 120 | panic("Cannot find memory pgdat in node %d\n", nodeid); |
121 | 121 | ||
122 | Dprintk("nodedata_phys %lx\n", nodedata_phys); | 122 | Dprintk("nodedata_phys %lx\n", nodedata_phys); |
123 | 123 | ||
124 | node_data[nodeid] = phys_to_virt(nodedata_phys); | 124 | node_data[nodeid] = phys_to_virt(nodedata_phys); |
125 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | 125 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); |
126 | NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; | 126 | NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; |
127 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 127 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
128 | NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; | 128 | NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; |
129 | 129 | ||
130 | /* Find a place for the bootmem map */ | 130 | /* Find a place for the bootmem map */ |
131 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); | 131 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); |
132 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); | 132 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); |
133 | bootmap_start = find_e820_area(bootmap_start, end, bootmap_pages<<PAGE_SHIFT); | 133 | bootmap_start = find_e820_area(bootmap_start, end, bootmap_pages<<PAGE_SHIFT); |
134 | if (bootmap_start == -1L) | 134 | if (bootmap_start == -1L) |
135 | panic("Not enough continuous space for bootmap on node %d", nodeid); | 135 | panic("Not enough continuous space for bootmap on node %d", nodeid); |
136 | Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages); | 136 | Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages); |
137 | 137 | ||
138 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), | 138 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), |
139 | bootmap_start >> PAGE_SHIFT, | 139 | bootmap_start >> PAGE_SHIFT, |
140 | start_pfn, end_pfn); | 140 | start_pfn, end_pfn); |
141 | 141 | ||
142 | e820_bootmem_free(NODE_DATA(nodeid), start, end); | 142 | e820_bootmem_free(NODE_DATA(nodeid), start, end); |
143 | 143 | ||
144 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); | 144 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); |
145 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT); | 145 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT); |
146 | node_set_online(nodeid); | 146 | node_set_online(nodeid); |
147 | } | 147 | } |
148 | 148 | ||
149 | /* Initialize final allocator for a zone */ | 149 | /* Initialize final allocator for a zone */ |
150 | void __init setup_node_zones(int nodeid) | 150 | void __init setup_node_zones(int nodeid) |
151 | { | 151 | { |
152 | unsigned long start_pfn, end_pfn; | 152 | unsigned long start_pfn, end_pfn; |
153 | unsigned long zones[MAX_NR_ZONES]; | 153 | unsigned long zones[MAX_NR_ZONES]; |
154 | unsigned long holes[MAX_NR_ZONES]; | 154 | unsigned long holes[MAX_NR_ZONES]; |
155 | 155 | ||
156 | start_pfn = node_start_pfn(nodeid); | 156 | start_pfn = node_start_pfn(nodeid); |
157 | end_pfn = node_end_pfn(nodeid); | 157 | end_pfn = node_end_pfn(nodeid); |
158 | 158 | ||
159 | Dprintk(KERN_INFO "Setting up node %d %lx-%lx\n", | 159 | Dprintk(KERN_INFO "Setting up node %d %lx-%lx\n", |
160 | nodeid, start_pfn, end_pfn); | 160 | nodeid, start_pfn, end_pfn); |
161 | 161 | ||
162 | size_zones(zones, holes, start_pfn, end_pfn); | 162 | size_zones(zones, holes, start_pfn, end_pfn); |
163 | free_area_init_node(nodeid, NODE_DATA(nodeid), zones, | 163 | free_area_init_node(nodeid, NODE_DATA(nodeid), zones, |
164 | start_pfn, holes); | 164 | start_pfn, holes); |
165 | } | 165 | } |
166 | 166 | ||
167 | void __init numa_init_array(void) | 167 | void __init numa_init_array(void) |
168 | { | 168 | { |
169 | int rr, i; | 169 | int rr, i; |
170 | /* There are unfortunately some poorly designed mainboards around | 170 | /* There are unfortunately some poorly designed mainboards around |
171 | that only connect memory to a single CPU. This breaks the 1:1 cpu->node | 171 | that only connect memory to a single CPU. This breaks the 1:1 cpu->node |
172 | mapping. To avoid this fill in the mapping for all possible | 172 | mapping. To avoid this fill in the mapping for all possible |
173 | CPUs, as the number of CPUs is not known yet. | 173 | CPUs, as the number of CPUs is not known yet. |
174 | We round robin the existing nodes. */ | 174 | We round robin the existing nodes. */ |
175 | rr = first_node(node_online_map); | 175 | rr = first_node(node_online_map); |
176 | for (i = 0; i < NR_CPUS; i++) { | 176 | for (i = 0; i < NR_CPUS; i++) { |
177 | if (cpu_to_node[i] != NUMA_NO_NODE) | 177 | if (cpu_to_node[i] != NUMA_NO_NODE) |
178 | continue; | 178 | continue; |
179 | numa_set_node(i, rr); | 179 | numa_set_node(i, rr); |
180 | rr = next_node(rr, node_online_map); | 180 | rr = next_node(rr, node_online_map); |
181 | if (rr == MAX_NUMNODES) | 181 | if (rr == MAX_NUMNODES) |
182 | rr = first_node(node_online_map); | 182 | rr = first_node(node_online_map); |
183 | } | 183 | } |
184 | 184 | ||
185 | } | 185 | } |
186 | 186 | ||
187 | #ifdef CONFIG_NUMA_EMU | 187 | #ifdef CONFIG_NUMA_EMU |
188 | int numa_fake __initdata = 0; | 188 | int numa_fake __initdata = 0; |
189 | 189 | ||
190 | /* Numa emulation */ | 190 | /* Numa emulation */ |
191 | static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | 191 | static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) |
192 | { | 192 | { |
193 | int i; | 193 | int i; |
194 | struct node nodes[MAX_NUMNODES]; | 194 | struct node nodes[MAX_NUMNODES]; |
195 | unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake; | 195 | unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake; |
196 | 196 | ||
197 | /* Kludge needed for the hash function */ | 197 | /* Kludge needed for the hash function */ |
198 | if (hweight64(sz) > 1) { | 198 | if (hweight64(sz) > 1) { |
199 | unsigned long x = 1; | 199 | unsigned long x = 1; |
200 | while ((x << 1) < sz) | 200 | while ((x << 1) < sz) |
201 | x <<= 1; | 201 | x <<= 1; |
202 | if (x < sz/2) | 202 | if (x < sz/2) |
203 | printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n"); | 203 | printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n"); |
204 | sz = x; | 204 | sz = x; |
205 | } | 205 | } |
206 | 206 | ||
207 | memset(&nodes,0,sizeof(nodes)); | 207 | memset(&nodes,0,sizeof(nodes)); |
208 | for (i = 0; i < numa_fake; i++) { | 208 | for (i = 0; i < numa_fake; i++) { |
209 | nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz; | 209 | nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz; |
210 | if (i == numa_fake-1) | 210 | if (i == numa_fake-1) |
211 | sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; | 211 | sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; |
212 | nodes[i].end = nodes[i].start + sz; | 212 | nodes[i].end = nodes[i].start + sz; |
213 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", | 213 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", |
214 | i, | 214 | i, |
215 | nodes[i].start, nodes[i].end, | 215 | nodes[i].start, nodes[i].end, |
216 | (nodes[i].end - nodes[i].start) >> 20); | 216 | (nodes[i].end - nodes[i].start) >> 20); |
217 | node_set_online(i); | 217 | node_set_online(i); |
218 | } | 218 | } |
219 | memnode_shift = compute_hash_shift(nodes, numa_fake); | 219 | memnode_shift = compute_hash_shift(nodes, numa_fake); |
220 | if (memnode_shift < 0) { | 220 | if (memnode_shift < 0) { |
221 | memnode_shift = 0; | 221 | memnode_shift = 0; |
222 | printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n"); | 222 | printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n"); |
223 | return -1; | 223 | return -1; |
224 | } | 224 | } |
225 | for_each_online_node(i) | 225 | for_each_online_node(i) |
226 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 226 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
227 | numa_init_array(); | 227 | numa_init_array(); |
228 | return 0; | 228 | return 0; |
229 | } | 229 | } |
230 | #endif | 230 | #endif |
231 | 231 | ||
232 | void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | 232 | void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) |
233 | { | 233 | { |
234 | int i; | 234 | int i; |
235 | 235 | ||
236 | #ifdef CONFIG_NUMA_EMU | 236 | #ifdef CONFIG_NUMA_EMU |
237 | if (numa_fake && !numa_emulation(start_pfn, end_pfn)) | 237 | if (numa_fake && !numa_emulation(start_pfn, end_pfn)) |
238 | return; | 238 | return; |
239 | #endif | 239 | #endif |
240 | 240 | ||
241 | #ifdef CONFIG_ACPI_NUMA | 241 | #ifdef CONFIG_ACPI_NUMA |
242 | if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, | 242 | if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, |
243 | end_pfn << PAGE_SHIFT)) | 243 | end_pfn << PAGE_SHIFT)) |
244 | return; | 244 | return; |
245 | #endif | 245 | #endif |
246 | 246 | ||
247 | #ifdef CONFIG_K8_NUMA | 247 | #ifdef CONFIG_K8_NUMA |
248 | if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT)) | 248 | if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT)) |
249 | return; | 249 | return; |
250 | #endif | 250 | #endif |
251 | printk(KERN_INFO "%s\n", | 251 | printk(KERN_INFO "%s\n", |
252 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); | 252 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); |
253 | 253 | ||
254 | printk(KERN_INFO "Faking a node at %016lx-%016lx\n", | 254 | printk(KERN_INFO "Faking a node at %016lx-%016lx\n", |
255 | start_pfn << PAGE_SHIFT, | 255 | start_pfn << PAGE_SHIFT, |
256 | end_pfn << PAGE_SHIFT); | 256 | end_pfn << PAGE_SHIFT); |
257 | /* setup dummy node covering all memory */ | 257 | /* setup dummy node covering all memory */ |
258 | memnode_shift = 63; | 258 | memnode_shift = 63; |
259 | memnodemap[0] = 0; | 259 | memnodemap[0] = 0; |
260 | nodes_clear(node_online_map); | 260 | nodes_clear(node_online_map); |
261 | node_set_online(0); | 261 | node_set_online(0); |
262 | for (i = 0; i < NR_CPUS; i++) | 262 | for (i = 0; i < NR_CPUS; i++) |
263 | numa_set_node(i, 0); | 263 | numa_set_node(i, 0); |
264 | node_to_cpumask[0] = cpumask_of_cpu(0); | 264 | node_to_cpumask[0] = cpumask_of_cpu(0); |
265 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); | 265 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); |
266 | } | 266 | } |
267 | 267 | ||
268 | __cpuinit void numa_add_cpu(int cpu) | 268 | __cpuinit void numa_add_cpu(int cpu) |
269 | { | 269 | { |
270 | set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); | 270 | set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); |
271 | } | 271 | } |
272 | 272 | ||
273 | void __cpuinit numa_set_node(int cpu, int node) | 273 | void __cpuinit numa_set_node(int cpu, int node) |
274 | { | 274 | { |
275 | cpu_pda[cpu].nodenumber = node; | 275 | cpu_pda(cpu)->nodenumber = node; |
276 | cpu_to_node[cpu] = node; | 276 | cpu_to_node[cpu] = node; |
277 | } | 277 | } |
278 | 278 | ||
279 | unsigned long __init numa_free_all_bootmem(void) | 279 | unsigned long __init numa_free_all_bootmem(void) |
280 | { | 280 | { |
281 | int i; | 281 | int i; |
282 | unsigned long pages = 0; | 282 | unsigned long pages = 0; |
283 | for_each_online_node(i) { | 283 | for_each_online_node(i) { |
284 | pages += free_all_bootmem_node(NODE_DATA(i)); | 284 | pages += free_all_bootmem_node(NODE_DATA(i)); |
285 | } | 285 | } |
286 | return pages; | 286 | return pages; |
287 | } | 287 | } |
288 | 288 | ||
289 | #ifdef CONFIG_SPARSEMEM | 289 | #ifdef CONFIG_SPARSEMEM |
290 | static void __init arch_sparse_init(void) | 290 | static void __init arch_sparse_init(void) |
291 | { | 291 | { |
292 | int i; | 292 | int i; |
293 | 293 | ||
294 | for_each_online_node(i) | 294 | for_each_online_node(i) |
295 | memory_present(i, node_start_pfn(i), node_end_pfn(i)); | 295 | memory_present(i, node_start_pfn(i), node_end_pfn(i)); |
296 | 296 | ||
297 | sparse_init(); | 297 | sparse_init(); |
298 | } | 298 | } |
299 | #else | 299 | #else |
300 | #define arch_sparse_init() do {} while (0) | 300 | #define arch_sparse_init() do {} while (0) |
301 | #endif | 301 | #endif |
302 | 302 | ||
303 | void __init paging_init(void) | 303 | void __init paging_init(void) |
304 | { | 304 | { |
305 | int i; | 305 | int i; |
306 | 306 | ||
307 | arch_sparse_init(); | 307 | arch_sparse_init(); |
308 | 308 | ||
309 | for_each_online_node(i) { | 309 | for_each_online_node(i) { |
310 | setup_node_zones(i); | 310 | setup_node_zones(i); |
311 | } | 311 | } |
312 | } | 312 | } |
313 | 313 | ||
314 | /* [numa=off] */ | 314 | /* [numa=off] */ |
315 | __init int numa_setup(char *opt) | 315 | __init int numa_setup(char *opt) |
316 | { | 316 | { |
317 | if (!strncmp(opt,"off",3)) | 317 | if (!strncmp(opt,"off",3)) |
318 | numa_off = 1; | 318 | numa_off = 1; |
319 | #ifdef CONFIG_NUMA_EMU | 319 | #ifdef CONFIG_NUMA_EMU |
320 | if(!strncmp(opt, "fake=", 5)) { | 320 | if(!strncmp(opt, "fake=", 5)) { |
321 | numa_fake = simple_strtoul(opt+5,NULL,0); ; | 321 | numa_fake = simple_strtoul(opt+5,NULL,0); ; |
322 | if (numa_fake >= MAX_NUMNODES) | 322 | if (numa_fake >= MAX_NUMNODES) |
323 | numa_fake = MAX_NUMNODES; | 323 | numa_fake = MAX_NUMNODES; |
324 | } | 324 | } |
325 | #endif | 325 | #endif |
326 | #ifdef CONFIG_ACPI_NUMA | 326 | #ifdef CONFIG_ACPI_NUMA |
327 | if (!strncmp(opt,"noacpi",6)) | 327 | if (!strncmp(opt,"noacpi",6)) |
328 | acpi_numa = -1; | 328 | acpi_numa = -1; |
329 | #endif | 329 | #endif |
330 | return 1; | 330 | return 1; |
331 | } | 331 | } |
332 | 332 | ||
333 | /* | 333 | /* |
334 | * Setup early cpu_to_node. | 334 | * Setup early cpu_to_node. |
335 | * | 335 | * |
336 | * Populate cpu_to_node[] only if x86_cpu_to_apicid[], | 336 | * Populate cpu_to_node[] only if x86_cpu_to_apicid[], |
337 | * and apicid_to_node[] tables have valid entries for a CPU. | 337 | * and apicid_to_node[] tables have valid entries for a CPU. |
338 | * This means we skip cpu_to_node[] initialisation for NUMA | 338 | * This means we skip cpu_to_node[] initialisation for NUMA |
339 | * emulation and faking node case (when running a kernel compiled | 339 | * emulation and faking node case (when running a kernel compiled |
340 | * for NUMA on a non NUMA box), which is OK as cpu_to_node[] | 340 | * for NUMA on a non NUMA box), which is OK as cpu_to_node[] |
341 | * is already initialized in a round robin manner at numa_init_array, | 341 | * is already initialized in a round robin manner at numa_init_array, |
342 | * prior to this call, and this initialization is good enough | 342 | * prior to this call, and this initialization is good enough |
343 | * for the fake NUMA cases. | 343 | * for the fake NUMA cases. |
344 | */ | 344 | */ |
345 | void __init init_cpu_to_node(void) | 345 | void __init init_cpu_to_node(void) |
346 | { | 346 | { |
347 | int i; | 347 | int i; |
348 | for (i = 0; i < NR_CPUS; i++) { | 348 | for (i = 0; i < NR_CPUS; i++) { |
349 | u8 apicid = x86_cpu_to_apicid[i]; | 349 | u8 apicid = x86_cpu_to_apicid[i]; |
350 | if (apicid == BAD_APICID) | 350 | if (apicid == BAD_APICID) |
351 | continue; | 351 | continue; |
352 | if (apicid_to_node[apicid] == NUMA_NO_NODE) | 352 | if (apicid_to_node[apicid] == NUMA_NO_NODE) |
353 | continue; | 353 | continue; |
354 | cpu_to_node[i] = apicid_to_node[apicid]; | 354 | cpu_to_node[i] = apicid_to_node[apicid]; |
355 | } | 355 | } |
356 | } | 356 | } |
357 | 357 | ||
358 | EXPORT_SYMBOL(cpu_to_node); | 358 | EXPORT_SYMBOL(cpu_to_node); |
359 | EXPORT_SYMBOL(node_to_cpumask); | 359 | EXPORT_SYMBOL(node_to_cpumask); |
360 | EXPORT_SYMBOL(memnode_shift); | 360 | EXPORT_SYMBOL(memnode_shift); |
361 | EXPORT_SYMBOL(memnodemap); | 361 | EXPORT_SYMBOL(memnodemap); |
362 | EXPORT_SYMBOL(node_data); | 362 | EXPORT_SYMBOL(node_data); |
363 | 363 |
include/asm-x86_64/pda.h
1 | #ifndef X86_64_PDA_H | 1 | #ifndef X86_64_PDA_H |
2 | #define X86_64_PDA_H | 2 | #define X86_64_PDA_H |
3 | 3 | ||
4 | #ifndef __ASSEMBLY__ | 4 | #ifndef __ASSEMBLY__ |
5 | #include <linux/stddef.h> | 5 | #include <linux/stddef.h> |
6 | #include <linux/types.h> | 6 | #include <linux/types.h> |
7 | #include <linux/cache.h> | 7 | #include <linux/cache.h> |
8 | #include <asm/page.h> | 8 | #include <asm/page.h> |
9 | 9 | ||
10 | /* Per processor datastructure. %gs points to it while the kernel runs */ | 10 | /* Per processor datastructure. %gs points to it while the kernel runs */ |
11 | struct x8664_pda { | 11 | struct x8664_pda { |
12 | struct task_struct *pcurrent; /* Current process */ | 12 | struct task_struct *pcurrent; /* Current process */ |
13 | unsigned long data_offset; /* Per cpu data offset from linker address */ | 13 | unsigned long data_offset; /* Per cpu data offset from linker address */ |
14 | unsigned long kernelstack; /* top of kernel stack for current */ | 14 | unsigned long kernelstack; /* top of kernel stack for current */ |
15 | unsigned long oldrsp; /* user rsp for system call */ | 15 | unsigned long oldrsp; /* user rsp for system call */ |
16 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 16 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
17 | unsigned long debugstack; /* #DB/#BP stack. */ | 17 | unsigned long debugstack; /* #DB/#BP stack. */ |
18 | #endif | 18 | #endif |
19 | int irqcount; /* Irq nesting counter. Starts with -1 */ | 19 | int irqcount; /* Irq nesting counter. Starts with -1 */ |
20 | int cpunumber; /* Logical CPU number */ | 20 | int cpunumber; /* Logical CPU number */ |
21 | char *irqstackptr; /* top of irqstack */ | 21 | char *irqstackptr; /* top of irqstack */ |
22 | int nodenumber; /* number of current node */ | 22 | int nodenumber; /* number of current node */ |
23 | unsigned int __softirq_pending; | 23 | unsigned int __softirq_pending; |
24 | unsigned int __nmi_count; /* number of NMI on this CPUs */ | 24 | unsigned int __nmi_count; /* number of NMI on this CPUs */ |
25 | struct mm_struct *active_mm; | 25 | struct mm_struct *active_mm; |
26 | int mmu_state; | 26 | int mmu_state; |
27 | unsigned apic_timer_irqs; | 27 | unsigned apic_timer_irqs; |
28 | } ____cacheline_aligned_in_smp; | 28 | } ____cacheline_aligned_in_smp; |
29 | 29 | ||
30 | extern struct x8664_pda cpu_pda[]; | 30 | extern struct x8664_pda _cpu_pda[]; |
31 | |||
32 | #define cpu_pda(i) (&_cpu_pda[i]) | ||
31 | 33 | ||
32 | /* | 34 | /* |
33 | * There is no fast way to get the base address of the PDA, all the accesses | 35 | * There is no fast way to get the base address of the PDA, all the accesses |
34 | * have to mention %fs/%gs. So it needs to be done this Torvaldian way. | 36 | * have to mention %fs/%gs. So it needs to be done this Torvaldian way. |
35 | */ | 37 | */ |
36 | #define sizeof_field(type,field) (sizeof(((type *)0)->field)) | 38 | #define sizeof_field(type,field) (sizeof(((type *)0)->field)) |
37 | #define typeof_field(type,field) typeof(((type *)0)->field) | 39 | #define typeof_field(type,field) typeof(((type *)0)->field) |
38 | 40 | ||
39 | extern void __bad_pda_field(void); | 41 | extern void __bad_pda_field(void); |
40 | 42 | ||
41 | #define pda_offset(field) offsetof(struct x8664_pda, field) | 43 | #define pda_offset(field) offsetof(struct x8664_pda, field) |
42 | 44 | ||
43 | #define pda_to_op(op,field,val) do { \ | 45 | #define pda_to_op(op,field,val) do { \ |
44 | typedef typeof_field(struct x8664_pda, field) T__; \ | 46 | typedef typeof_field(struct x8664_pda, field) T__; \ |
45 | switch (sizeof_field(struct x8664_pda, field)) { \ | 47 | switch (sizeof_field(struct x8664_pda, field)) { \ |
46 | case 2: \ | 48 | case 2: \ |
47 | asm volatile(op "w %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \ | 49 | asm volatile(op "w %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \ |
48 | case 4: \ | 50 | case 4: \ |
49 | asm volatile(op "l %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \ | 51 | asm volatile(op "l %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \ |
50 | case 8: \ | 52 | case 8: \ |
51 | asm volatile(op "q %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \ | 53 | asm volatile(op "q %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \ |
52 | default: __bad_pda_field(); \ | 54 | default: __bad_pda_field(); \ |
53 | } \ | 55 | } \ |
54 | } while (0) | 56 | } while (0) |
55 | 57 | ||
56 | /* | 58 | /* |
57 | * AK: PDA read accesses should be neither volatile nor have an memory clobber. | 59 | * AK: PDA read accesses should be neither volatile nor have an memory clobber. |
58 | * Unfortunately removing them causes all hell to break lose currently. | 60 | * Unfortunately removing them causes all hell to break lose currently. |
59 | */ | 61 | */ |
60 | #define pda_from_op(op,field) ({ \ | 62 | #define pda_from_op(op,field) ({ \ |
61 | typeof_field(struct x8664_pda, field) ret__; \ | 63 | typeof_field(struct x8664_pda, field) ret__; \ |
62 | switch (sizeof_field(struct x8664_pda, field)) { \ | 64 | switch (sizeof_field(struct x8664_pda, field)) { \ |
63 | case 2: \ | 65 | case 2: \ |
64 | asm volatile(op "w %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ | 66 | asm volatile(op "w %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ |
65 | case 4: \ | 67 | case 4: \ |
66 | asm volatile(op "l %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ | 68 | asm volatile(op "l %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ |
67 | case 8: \ | 69 | case 8: \ |
68 | asm volatile(op "q %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ | 70 | asm volatile(op "q %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ |
69 | default: __bad_pda_field(); \ | 71 | default: __bad_pda_field(); \ |
70 | } \ | 72 | } \ |
71 | ret__; }) | 73 | ret__; }) |
72 | 74 | ||
73 | 75 | ||
74 | #define read_pda(field) pda_from_op("mov",field) | 76 | #define read_pda(field) pda_from_op("mov",field) |
75 | #define write_pda(field,val) pda_to_op("mov",field,val) | 77 | #define write_pda(field,val) pda_to_op("mov",field,val) |
76 | #define add_pda(field,val) pda_to_op("add",field,val) | 78 | #define add_pda(field,val) pda_to_op("add",field,val) |
77 | #define sub_pda(field,val) pda_to_op("sub",field,val) | 79 | #define sub_pda(field,val) pda_to_op("sub",field,val) |
78 | #define or_pda(field,val) pda_to_op("or",field,val) | 80 | #define or_pda(field,val) pda_to_op("or",field,val) |
79 | 81 | ||
80 | #endif | 82 | #endif |
81 | 83 | ||
82 | #define PDA_STACKOFFSET (5*8) | 84 | #define PDA_STACKOFFSET (5*8) |
83 | 85 | ||
84 | #endif | 86 | #endif |
85 | 87 |
include/asm-x86_64/percpu.h
1 | #ifndef _ASM_X8664_PERCPU_H_ | 1 | #ifndef _ASM_X8664_PERCPU_H_ |
2 | #define _ASM_X8664_PERCPU_H_ | 2 | #define _ASM_X8664_PERCPU_H_ |
3 | #include <linux/compiler.h> | 3 | #include <linux/compiler.h> |
4 | 4 | ||
5 | /* Same as asm-generic/percpu.h, except that we store the per cpu offset | 5 | /* Same as asm-generic/percpu.h, except that we store the per cpu offset |
6 | in the PDA. Longer term the PDA and every per cpu variable | 6 | in the PDA. Longer term the PDA and every per cpu variable |
7 | should be just put into a single section and referenced directly | 7 | should be just put into a single section and referenced directly |
8 | from %gs */ | 8 | from %gs */ |
9 | 9 | ||
10 | #ifdef CONFIG_SMP | 10 | #ifdef CONFIG_SMP |
11 | 11 | ||
12 | #include <asm/pda.h> | 12 | #include <asm/pda.h> |
13 | 13 | ||
14 | #define __per_cpu_offset(cpu) (cpu_pda[cpu].data_offset) | 14 | #define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) |
15 | #define __my_cpu_offset() read_pda(data_offset) | 15 | #define __my_cpu_offset() read_pda(data_offset) |
16 | 16 | ||
17 | /* Separate out the type, so (int[3], foo) works. */ | 17 | /* Separate out the type, so (int[3], foo) works. */ |
18 | #define DEFINE_PER_CPU(type, name) \ | 18 | #define DEFINE_PER_CPU(type, name) \ |
19 | __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name | 19 | __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name |
20 | 20 | ||
21 | /* var is in discarded region: offset to particular copy we want */ | 21 | /* var is in discarded region: offset to particular copy we want */ |
22 | #define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu))) | 22 | #define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu))) |
23 | #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset())) | 23 | #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset())) |
24 | 24 | ||
25 | /* A macro to avoid #include hell... */ | 25 | /* A macro to avoid #include hell... */ |
26 | #define percpu_modcopy(pcpudst, src, size) \ | 26 | #define percpu_modcopy(pcpudst, src, size) \ |
27 | do { \ | 27 | do { \ |
28 | unsigned int __i; \ | 28 | unsigned int __i; \ |
29 | for (__i = 0; __i < NR_CPUS; __i++) \ | 29 | for (__i = 0; __i < NR_CPUS; __i++) \ |
30 | if (cpu_possible(__i)) \ | 30 | if (cpu_possible(__i)) \ |
31 | memcpy((pcpudst)+__per_cpu_offset(__i), \ | 31 | memcpy((pcpudst)+__per_cpu_offset(__i), \ |
32 | (src), (size)); \ | 32 | (src), (size)); \ |
33 | } while (0) | 33 | } while (0) |
34 | 34 | ||
35 | extern void setup_per_cpu_areas(void); | 35 | extern void setup_per_cpu_areas(void); |
36 | 36 | ||
37 | #else /* ! SMP */ | 37 | #else /* ! SMP */ |
38 | 38 | ||
39 | #define DEFINE_PER_CPU(type, name) \ | 39 | #define DEFINE_PER_CPU(type, name) \ |
40 | __typeof__(type) per_cpu__##name | 40 | __typeof__(type) per_cpu__##name |
41 | 41 | ||
42 | #define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) | 42 | #define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) |
43 | #define __get_cpu_var(var) per_cpu__##var | 43 | #define __get_cpu_var(var) per_cpu__##var |
44 | 44 | ||
45 | #endif /* SMP */ | 45 | #endif /* SMP */ |
46 | 46 | ||
47 | #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name | 47 | #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name |
48 | 48 | ||
49 | #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) | 49 | #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) |
50 | #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) | 50 | #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) |
51 | 51 | ||
52 | #endif /* _ASM_X8664_PERCPU_H_ */ | 52 | #endif /* _ASM_X8664_PERCPU_H_ */ |
53 | 53 |