Commit 4d37e7e3fd851428dede4d05d3e69d03795a744a

Authored by Zachary Amsden
Committed by Linus Torvalds
1 parent 245067d167

[PATCH] i386: inline assembler: cleanup and encapsulate descriptor and task register management

i386 inline assembler cleanup.

This change encapsulates descriptor and task register management.  Also,
it is possible to improve assembler generation in two cases; savesegment
may store the value in a register instead of a memory location, which
allows GCC to optimize stack variables into registers, and MOV MEM, SEG
is always a 16-bit write to memory, making the casting in math-emu
unnecessary.

Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 11 changed files with 43 additions and 40 deletions Inline Diff

arch/i386/kernel/cpu/common.c
1 #include <linux/init.h> 1 #include <linux/init.h>
2 #include <linux/string.h> 2 #include <linux/string.h>
3 #include <linux/delay.h> 3 #include <linux/delay.h>
4 #include <linux/smp.h> 4 #include <linux/smp.h>
5 #include <linux/module.h> 5 #include <linux/module.h>
6 #include <linux/percpu.h> 6 #include <linux/percpu.h>
7 #include <asm/semaphore.h> 7 #include <asm/semaphore.h>
8 #include <asm/processor.h> 8 #include <asm/processor.h>
9 #include <asm/i387.h> 9 #include <asm/i387.h>
10 #include <asm/msr.h> 10 #include <asm/msr.h>
11 #include <asm/io.h> 11 #include <asm/io.h>
12 #include <asm/mmu_context.h> 12 #include <asm/mmu_context.h>
13 #ifdef CONFIG_X86_LOCAL_APIC 13 #ifdef CONFIG_X86_LOCAL_APIC
14 #include <asm/mpspec.h> 14 #include <asm/mpspec.h>
15 #include <asm/apic.h> 15 #include <asm/apic.h>
16 #include <mach_apic.h> 16 #include <mach_apic.h>
17 #endif 17 #endif
18 18
19 #include "cpu.h" 19 #include "cpu.h"
20 20
21 DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]); 21 DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
22 EXPORT_PER_CPU_SYMBOL(cpu_gdt_table); 22 EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
23 23
24 DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); 24 DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
25 EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); 25 EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
26 26
27 static int cachesize_override __devinitdata = -1; 27 static int cachesize_override __devinitdata = -1;
28 static int disable_x86_fxsr __devinitdata = 0; 28 static int disable_x86_fxsr __devinitdata = 0;
29 static int disable_x86_serial_nr __devinitdata = 1; 29 static int disable_x86_serial_nr __devinitdata = 1;
30 30
31 struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; 31 struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
32 32
33 extern void mcheck_init(struct cpuinfo_x86 *c); 33 extern void mcheck_init(struct cpuinfo_x86 *c);
34 34
35 extern int disable_pse; 35 extern int disable_pse;
36 36
37 static void default_init(struct cpuinfo_x86 * c) 37 static void default_init(struct cpuinfo_x86 * c)
38 { 38 {
39 /* Not much we can do here... */ 39 /* Not much we can do here... */
40 /* Check if at least it has cpuid */ 40 /* Check if at least it has cpuid */
41 if (c->cpuid_level == -1) { 41 if (c->cpuid_level == -1) {
42 /* No cpuid. It must be an ancient CPU */ 42 /* No cpuid. It must be an ancient CPU */
43 if (c->x86 == 4) 43 if (c->x86 == 4)
44 strcpy(c->x86_model_id, "486"); 44 strcpy(c->x86_model_id, "486");
45 else if (c->x86 == 3) 45 else if (c->x86 == 3)
46 strcpy(c->x86_model_id, "386"); 46 strcpy(c->x86_model_id, "386");
47 } 47 }
48 } 48 }
49 49
50 static struct cpu_dev default_cpu = { 50 static struct cpu_dev default_cpu = {
51 .c_init = default_init, 51 .c_init = default_init,
52 }; 52 };
53 static struct cpu_dev * this_cpu = &default_cpu; 53 static struct cpu_dev * this_cpu = &default_cpu;
54 54
55 static int __init cachesize_setup(char *str) 55 static int __init cachesize_setup(char *str)
56 { 56 {
57 get_option (&str, &cachesize_override); 57 get_option (&str, &cachesize_override);
58 return 1; 58 return 1;
59 } 59 }
60 __setup("cachesize=", cachesize_setup); 60 __setup("cachesize=", cachesize_setup);
61 61
62 int __devinit get_model_name(struct cpuinfo_x86 *c) 62 int __devinit get_model_name(struct cpuinfo_x86 *c)
63 { 63 {
64 unsigned int *v; 64 unsigned int *v;
65 char *p, *q; 65 char *p, *q;
66 66
67 if (cpuid_eax(0x80000000) < 0x80000004) 67 if (cpuid_eax(0x80000000) < 0x80000004)
68 return 0; 68 return 0;
69 69
70 v = (unsigned int *) c->x86_model_id; 70 v = (unsigned int *) c->x86_model_id;
71 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); 71 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
72 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); 72 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
73 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); 73 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
74 c->x86_model_id[48] = 0; 74 c->x86_model_id[48] = 0;
75 75
76 /* Intel chips right-justify this string for some dumb reason; 76 /* Intel chips right-justify this string for some dumb reason;
77 undo that brain damage */ 77 undo that brain damage */
78 p = q = &c->x86_model_id[0]; 78 p = q = &c->x86_model_id[0];
79 while ( *p == ' ' ) 79 while ( *p == ' ' )
80 p++; 80 p++;
81 if ( p != q ) { 81 if ( p != q ) {
82 while ( *p ) 82 while ( *p )
83 *q++ = *p++; 83 *q++ = *p++;
84 while ( q <= &c->x86_model_id[48] ) 84 while ( q <= &c->x86_model_id[48] )
85 *q++ = '\0'; /* Zero-pad the rest */ 85 *q++ = '\0'; /* Zero-pad the rest */
86 } 86 }
87 87
88 return 1; 88 return 1;
89 } 89 }
90 90
91 91
92 void __devinit display_cacheinfo(struct cpuinfo_x86 *c) 92 void __devinit display_cacheinfo(struct cpuinfo_x86 *c)
93 { 93 {
94 unsigned int n, dummy, ecx, edx, l2size; 94 unsigned int n, dummy, ecx, edx, l2size;
95 95
96 n = cpuid_eax(0x80000000); 96 n = cpuid_eax(0x80000000);
97 97
98 if (n >= 0x80000005) { 98 if (n >= 0x80000005) {
99 cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); 99 cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
100 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", 100 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
101 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); 101 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
102 c->x86_cache_size=(ecx>>24)+(edx>>24); 102 c->x86_cache_size=(ecx>>24)+(edx>>24);
103 } 103 }
104 104
105 if (n < 0x80000006) /* Some chips just has a large L1. */ 105 if (n < 0x80000006) /* Some chips just has a large L1. */
106 return; 106 return;
107 107
108 ecx = cpuid_ecx(0x80000006); 108 ecx = cpuid_ecx(0x80000006);
109 l2size = ecx >> 16; 109 l2size = ecx >> 16;
110 110
111 /* do processor-specific cache resizing */ 111 /* do processor-specific cache resizing */
112 if (this_cpu->c_size_cache) 112 if (this_cpu->c_size_cache)
113 l2size = this_cpu->c_size_cache(c,l2size); 113 l2size = this_cpu->c_size_cache(c,l2size);
114 114
115 /* Allow user to override all this if necessary. */ 115 /* Allow user to override all this if necessary. */
116 if (cachesize_override != -1) 116 if (cachesize_override != -1)
117 l2size = cachesize_override; 117 l2size = cachesize_override;
118 118
119 if ( l2size == 0 ) 119 if ( l2size == 0 )
120 return; /* Again, no L2 cache is possible */ 120 return; /* Again, no L2 cache is possible */
121 121
122 c->x86_cache_size = l2size; 122 c->x86_cache_size = l2size;
123 123
124 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", 124 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
125 l2size, ecx & 0xFF); 125 l2size, ecx & 0xFF);
126 } 126 }
127 127
128 /* Naming convention should be: <Name> [(<Codename>)] */ 128 /* Naming convention should be: <Name> [(<Codename>)] */
129 /* This table only is used unless init_<vendor>() below doesn't set it; */ 129 /* This table only is used unless init_<vendor>() below doesn't set it; */
130 /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ 130 /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
131 131
132 /* Look up CPU names by table lookup. */ 132 /* Look up CPU names by table lookup. */
133 static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) 133 static char __devinit *table_lookup_model(struct cpuinfo_x86 *c)
134 { 134 {
135 struct cpu_model_info *info; 135 struct cpu_model_info *info;
136 136
137 if ( c->x86_model >= 16 ) 137 if ( c->x86_model >= 16 )
138 return NULL; /* Range check */ 138 return NULL; /* Range check */
139 139
140 if (!this_cpu) 140 if (!this_cpu)
141 return NULL; 141 return NULL;
142 142
143 info = this_cpu->c_models; 143 info = this_cpu->c_models;
144 144
145 while (info && info->family) { 145 while (info && info->family) {
146 if (info->family == c->x86) 146 if (info->family == c->x86)
147 return info->model_names[c->x86_model]; 147 return info->model_names[c->x86_model];
148 info++; 148 info++;
149 } 149 }
150 return NULL; /* Not found */ 150 return NULL; /* Not found */
151 } 151 }
152 152
153 153
154 void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) 154 void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
155 { 155 {
156 char *v = c->x86_vendor_id; 156 char *v = c->x86_vendor_id;
157 int i; 157 int i;
158 158
159 for (i = 0; i < X86_VENDOR_NUM; i++) { 159 for (i = 0; i < X86_VENDOR_NUM; i++) {
160 if (cpu_devs[i]) { 160 if (cpu_devs[i]) {
161 if (!strcmp(v,cpu_devs[i]->c_ident[0]) || 161 if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
162 (cpu_devs[i]->c_ident[1] && 162 (cpu_devs[i]->c_ident[1] &&
163 !strcmp(v,cpu_devs[i]->c_ident[1]))) { 163 !strcmp(v,cpu_devs[i]->c_ident[1]))) {
164 c->x86_vendor = i; 164 c->x86_vendor = i;
165 if (!early) 165 if (!early)
166 this_cpu = cpu_devs[i]; 166 this_cpu = cpu_devs[i];
167 break; 167 break;
168 } 168 }
169 } 169 }
170 } 170 }
171 } 171 }
172 172
173 173
174 static int __init x86_fxsr_setup(char * s) 174 static int __init x86_fxsr_setup(char * s)
175 { 175 {
176 disable_x86_fxsr = 1; 176 disable_x86_fxsr = 1;
177 return 1; 177 return 1;
178 } 178 }
179 __setup("nofxsr", x86_fxsr_setup); 179 __setup("nofxsr", x86_fxsr_setup);
180 180
181 181
182 /* Standard macro to see if a specific flag is changeable */ 182 /* Standard macro to see if a specific flag is changeable */
183 static inline int flag_is_changeable_p(u32 flag) 183 static inline int flag_is_changeable_p(u32 flag)
184 { 184 {
185 u32 f1, f2; 185 u32 f1, f2;
186 186
187 asm("pushfl\n\t" 187 asm("pushfl\n\t"
188 "pushfl\n\t" 188 "pushfl\n\t"
189 "popl %0\n\t" 189 "popl %0\n\t"
190 "movl %0,%1\n\t" 190 "movl %0,%1\n\t"
191 "xorl %2,%0\n\t" 191 "xorl %2,%0\n\t"
192 "pushl %0\n\t" 192 "pushl %0\n\t"
193 "popfl\n\t" 193 "popfl\n\t"
194 "pushfl\n\t" 194 "pushfl\n\t"
195 "popl %0\n\t" 195 "popl %0\n\t"
196 "popfl\n\t" 196 "popfl\n\t"
197 : "=&r" (f1), "=&r" (f2) 197 : "=&r" (f1), "=&r" (f2)
198 : "ir" (flag)); 198 : "ir" (flag));
199 199
200 return ((f1^f2) & flag) != 0; 200 return ((f1^f2) & flag) != 0;
201 } 201 }
202 202
203 203
204 /* Probe for the CPUID instruction */ 204 /* Probe for the CPUID instruction */
205 static int __devinit have_cpuid_p(void) 205 static int __devinit have_cpuid_p(void)
206 { 206 {
207 return flag_is_changeable_p(X86_EFLAGS_ID); 207 return flag_is_changeable_p(X86_EFLAGS_ID);
208 } 208 }
209 209
210 /* Do minimum CPU detection early. 210 /* Do minimum CPU detection early.
211 Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. 211 Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
212 The others are not touched to avoid unwanted side effects. */ 212 The others are not touched to avoid unwanted side effects. */
213 static void __init early_cpu_detect(void) 213 static void __init early_cpu_detect(void)
214 { 214 {
215 struct cpuinfo_x86 *c = &boot_cpu_data; 215 struct cpuinfo_x86 *c = &boot_cpu_data;
216 216
217 c->x86_cache_alignment = 32; 217 c->x86_cache_alignment = 32;
218 218
219 if (!have_cpuid_p()) 219 if (!have_cpuid_p())
220 return; 220 return;
221 221
222 /* Get vendor name */ 222 /* Get vendor name */
223 cpuid(0x00000000, &c->cpuid_level, 223 cpuid(0x00000000, &c->cpuid_level,
224 (int *)&c->x86_vendor_id[0], 224 (int *)&c->x86_vendor_id[0],
225 (int *)&c->x86_vendor_id[8], 225 (int *)&c->x86_vendor_id[8],
226 (int *)&c->x86_vendor_id[4]); 226 (int *)&c->x86_vendor_id[4]);
227 227
228 get_cpu_vendor(c, 1); 228 get_cpu_vendor(c, 1);
229 229
230 c->x86 = 4; 230 c->x86 = 4;
231 if (c->cpuid_level >= 0x00000001) { 231 if (c->cpuid_level >= 0x00000001) {
232 u32 junk, tfms, cap0, misc; 232 u32 junk, tfms, cap0, misc;
233 cpuid(0x00000001, &tfms, &misc, &junk, &cap0); 233 cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
234 c->x86 = (tfms >> 8) & 15; 234 c->x86 = (tfms >> 8) & 15;
235 c->x86_model = (tfms >> 4) & 15; 235 c->x86_model = (tfms >> 4) & 15;
236 if (c->x86 == 0xf) { 236 if (c->x86 == 0xf) {
237 c->x86 += (tfms >> 20) & 0xff; 237 c->x86 += (tfms >> 20) & 0xff;
238 c->x86_model += ((tfms >> 16) & 0xF) << 4; 238 c->x86_model += ((tfms >> 16) & 0xF) << 4;
239 } 239 }
240 c->x86_mask = tfms & 15; 240 c->x86_mask = tfms & 15;
241 if (cap0 & (1<<19)) 241 if (cap0 & (1<<19))
242 c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; 242 c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
243 } 243 }
244 244
245 early_intel_workaround(c); 245 early_intel_workaround(c);
246 246
247 #ifdef CONFIG_X86_HT 247 #ifdef CONFIG_X86_HT
248 phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; 248 phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
249 #endif 249 #endif
250 } 250 }
251 251
252 void __devinit generic_identify(struct cpuinfo_x86 * c) 252 void __devinit generic_identify(struct cpuinfo_x86 * c)
253 { 253 {
254 u32 tfms, xlvl; 254 u32 tfms, xlvl;
255 int junk; 255 int junk;
256 256
257 if (have_cpuid_p()) { 257 if (have_cpuid_p()) {
258 /* Get vendor name */ 258 /* Get vendor name */
259 cpuid(0x00000000, &c->cpuid_level, 259 cpuid(0x00000000, &c->cpuid_level,
260 (int *)&c->x86_vendor_id[0], 260 (int *)&c->x86_vendor_id[0],
261 (int *)&c->x86_vendor_id[8], 261 (int *)&c->x86_vendor_id[8],
262 (int *)&c->x86_vendor_id[4]); 262 (int *)&c->x86_vendor_id[4]);
263 263
264 get_cpu_vendor(c, 0); 264 get_cpu_vendor(c, 0);
265 /* Initialize the standard set of capabilities */ 265 /* Initialize the standard set of capabilities */
266 /* Note that the vendor-specific code below might override */ 266 /* Note that the vendor-specific code below might override */
267 267
268 /* Intel-defined flags: level 0x00000001 */ 268 /* Intel-defined flags: level 0x00000001 */
269 if ( c->cpuid_level >= 0x00000001 ) { 269 if ( c->cpuid_level >= 0x00000001 ) {
270 u32 capability, excap; 270 u32 capability, excap;
271 cpuid(0x00000001, &tfms, &junk, &excap, &capability); 271 cpuid(0x00000001, &tfms, &junk, &excap, &capability);
272 c->x86_capability[0] = capability; 272 c->x86_capability[0] = capability;
273 c->x86_capability[4] = excap; 273 c->x86_capability[4] = excap;
274 c->x86 = (tfms >> 8) & 15; 274 c->x86 = (tfms >> 8) & 15;
275 c->x86_model = (tfms >> 4) & 15; 275 c->x86_model = (tfms >> 4) & 15;
276 if (c->x86 == 0xf) { 276 if (c->x86 == 0xf) {
277 c->x86 += (tfms >> 20) & 0xff; 277 c->x86 += (tfms >> 20) & 0xff;
278 c->x86_model += ((tfms >> 16) & 0xF) << 4; 278 c->x86_model += ((tfms >> 16) & 0xF) << 4;
279 } 279 }
280 c->x86_mask = tfms & 15; 280 c->x86_mask = tfms & 15;
281 } else { 281 } else {
282 /* Have CPUID level 0 only - unheard of */ 282 /* Have CPUID level 0 only - unheard of */
283 c->x86 = 4; 283 c->x86 = 4;
284 } 284 }
285 285
286 /* AMD-defined flags: level 0x80000001 */ 286 /* AMD-defined flags: level 0x80000001 */
287 xlvl = cpuid_eax(0x80000000); 287 xlvl = cpuid_eax(0x80000000);
288 if ( (xlvl & 0xffff0000) == 0x80000000 ) { 288 if ( (xlvl & 0xffff0000) == 0x80000000 ) {
289 if ( xlvl >= 0x80000001 ) { 289 if ( xlvl >= 0x80000001 ) {
290 c->x86_capability[1] = cpuid_edx(0x80000001); 290 c->x86_capability[1] = cpuid_edx(0x80000001);
291 c->x86_capability[6] = cpuid_ecx(0x80000001); 291 c->x86_capability[6] = cpuid_ecx(0x80000001);
292 } 292 }
293 if ( xlvl >= 0x80000004 ) 293 if ( xlvl >= 0x80000004 )
294 get_model_name(c); /* Default name */ 294 get_model_name(c); /* Default name */
295 } 295 }
296 } 296 }
297 } 297 }
298 298
299 static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 299 static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
300 { 300 {
301 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { 301 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
302 /* Disable processor serial number */ 302 /* Disable processor serial number */
303 unsigned long lo,hi; 303 unsigned long lo,hi;
304 rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi); 304 rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
305 lo |= 0x200000; 305 lo |= 0x200000;
306 wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi); 306 wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
307 printk(KERN_NOTICE "CPU serial number disabled.\n"); 307 printk(KERN_NOTICE "CPU serial number disabled.\n");
308 clear_bit(X86_FEATURE_PN, c->x86_capability); 308 clear_bit(X86_FEATURE_PN, c->x86_capability);
309 309
310 /* Disabling the serial number may affect the cpuid level */ 310 /* Disabling the serial number may affect the cpuid level */
311 c->cpuid_level = cpuid_eax(0); 311 c->cpuid_level = cpuid_eax(0);
312 } 312 }
313 } 313 }
314 314
315 static int __init x86_serial_nr_setup(char *s) 315 static int __init x86_serial_nr_setup(char *s)
316 { 316 {
317 disable_x86_serial_nr = 0; 317 disable_x86_serial_nr = 0;
318 return 1; 318 return 1;
319 } 319 }
320 __setup("serialnumber", x86_serial_nr_setup); 320 __setup("serialnumber", x86_serial_nr_setup);
321 321
322 322
323 323
324 /* 324 /*
325 * This does the hard work of actually picking apart the CPU stuff... 325 * This does the hard work of actually picking apart the CPU stuff...
326 */ 326 */
327 void __devinit identify_cpu(struct cpuinfo_x86 *c) 327 void __devinit identify_cpu(struct cpuinfo_x86 *c)
328 { 328 {
329 int i; 329 int i;
330 330
331 c->loops_per_jiffy = loops_per_jiffy; 331 c->loops_per_jiffy = loops_per_jiffy;
332 c->x86_cache_size = -1; 332 c->x86_cache_size = -1;
333 c->x86_vendor = X86_VENDOR_UNKNOWN; 333 c->x86_vendor = X86_VENDOR_UNKNOWN;
334 c->cpuid_level = -1; /* CPUID not detected */ 334 c->cpuid_level = -1; /* CPUID not detected */
335 c->x86_model = c->x86_mask = 0; /* So far unknown... */ 335 c->x86_model = c->x86_mask = 0; /* So far unknown... */
336 c->x86_vendor_id[0] = '\0'; /* Unset */ 336 c->x86_vendor_id[0] = '\0'; /* Unset */
337 c->x86_model_id[0] = '\0'; /* Unset */ 337 c->x86_model_id[0] = '\0'; /* Unset */
338 c->x86_num_cores = 1; 338 c->x86_num_cores = 1;
339 memset(&c->x86_capability, 0, sizeof c->x86_capability); 339 memset(&c->x86_capability, 0, sizeof c->x86_capability);
340 340
341 if (!have_cpuid_p()) { 341 if (!have_cpuid_p()) {
342 /* First of all, decide if this is a 486 or higher */ 342 /* First of all, decide if this is a 486 or higher */
343 /* It's a 486 if we can modify the AC flag */ 343 /* It's a 486 if we can modify the AC flag */
344 if ( flag_is_changeable_p(X86_EFLAGS_AC) ) 344 if ( flag_is_changeable_p(X86_EFLAGS_AC) )
345 c->x86 = 4; 345 c->x86 = 4;
346 else 346 else
347 c->x86 = 3; 347 c->x86 = 3;
348 } 348 }
349 349
350 generic_identify(c); 350 generic_identify(c);
351 351
352 printk(KERN_DEBUG "CPU: After generic identify, caps:"); 352 printk(KERN_DEBUG "CPU: After generic identify, caps:");
353 for (i = 0; i < NCAPINTS; i++) 353 for (i = 0; i < NCAPINTS; i++)
354 printk(" %08lx", c->x86_capability[i]); 354 printk(" %08lx", c->x86_capability[i]);
355 printk("\n"); 355 printk("\n");
356 356
357 if (this_cpu->c_identify) { 357 if (this_cpu->c_identify) {
358 this_cpu->c_identify(c); 358 this_cpu->c_identify(c);
359 359
360 printk(KERN_DEBUG "CPU: After vendor identify, caps:"); 360 printk(KERN_DEBUG "CPU: After vendor identify, caps:");
361 for (i = 0; i < NCAPINTS; i++) 361 for (i = 0; i < NCAPINTS; i++)
362 printk(" %08lx", c->x86_capability[i]); 362 printk(" %08lx", c->x86_capability[i]);
363 printk("\n"); 363 printk("\n");
364 } 364 }
365 365
366 /* 366 /*
367 * Vendor-specific initialization. In this section we 367 * Vendor-specific initialization. In this section we
368 * canonicalize the feature flags, meaning if there are 368 * canonicalize the feature flags, meaning if there are
369 * features a certain CPU supports which CPUID doesn't 369 * features a certain CPU supports which CPUID doesn't
370 * tell us, CPUID claiming incorrect flags, or other bugs, 370 * tell us, CPUID claiming incorrect flags, or other bugs,
371 * we handle them here. 371 * we handle them here.
372 * 372 *
373 * At the end of this section, c->x86_capability better 373 * At the end of this section, c->x86_capability better
374 * indicate the features this CPU genuinely supports! 374 * indicate the features this CPU genuinely supports!
375 */ 375 */
376 if (this_cpu->c_init) 376 if (this_cpu->c_init)
377 this_cpu->c_init(c); 377 this_cpu->c_init(c);
378 378
379 /* Disable the PN if appropriate */ 379 /* Disable the PN if appropriate */
380 squash_the_stupid_serial_number(c); 380 squash_the_stupid_serial_number(c);
381 381
382 /* 382 /*
383 * The vendor-specific functions might have changed features. Now 383 * The vendor-specific functions might have changed features. Now
384 * we do "generic changes." 384 * we do "generic changes."
385 */ 385 */
386 386
387 /* TSC disabled? */ 387 /* TSC disabled? */
388 if ( tsc_disable ) 388 if ( tsc_disable )
389 clear_bit(X86_FEATURE_TSC, c->x86_capability); 389 clear_bit(X86_FEATURE_TSC, c->x86_capability);
390 390
391 /* FXSR disabled? */ 391 /* FXSR disabled? */
392 if (disable_x86_fxsr) { 392 if (disable_x86_fxsr) {
393 clear_bit(X86_FEATURE_FXSR, c->x86_capability); 393 clear_bit(X86_FEATURE_FXSR, c->x86_capability);
394 clear_bit(X86_FEATURE_XMM, c->x86_capability); 394 clear_bit(X86_FEATURE_XMM, c->x86_capability);
395 } 395 }
396 396
397 if (disable_pse) 397 if (disable_pse)
398 clear_bit(X86_FEATURE_PSE, c->x86_capability); 398 clear_bit(X86_FEATURE_PSE, c->x86_capability);
399 399
400 /* If the model name is still unset, do table lookup. */ 400 /* If the model name is still unset, do table lookup. */
401 if ( !c->x86_model_id[0] ) { 401 if ( !c->x86_model_id[0] ) {
402 char *p; 402 char *p;
403 p = table_lookup_model(c); 403 p = table_lookup_model(c);
404 if ( p ) 404 if ( p )
405 strcpy(c->x86_model_id, p); 405 strcpy(c->x86_model_id, p);
406 else 406 else
407 /* Last resort... */ 407 /* Last resort... */
408 sprintf(c->x86_model_id, "%02x/%02x", 408 sprintf(c->x86_model_id, "%02x/%02x",
409 c->x86_vendor, c->x86_model); 409 c->x86_vendor, c->x86_model);
410 } 410 }
411 411
412 /* Now the feature flags better reflect actual CPU features! */ 412 /* Now the feature flags better reflect actual CPU features! */
413 413
414 printk(KERN_DEBUG "CPU: After all inits, caps:"); 414 printk(KERN_DEBUG "CPU: After all inits, caps:");
415 for (i = 0; i < NCAPINTS; i++) 415 for (i = 0; i < NCAPINTS; i++)
416 printk(" %08lx", c->x86_capability[i]); 416 printk(" %08lx", c->x86_capability[i]);
417 printk("\n"); 417 printk("\n");
418 418
419 /* 419 /*
420 * On SMP, boot_cpu_data holds the common feature set between 420 * On SMP, boot_cpu_data holds the common feature set between
421 * all CPUs; so make sure that we indicate which features are 421 * all CPUs; so make sure that we indicate which features are
422 * common between the CPUs. The first time this routine gets 422 * common between the CPUs. The first time this routine gets
423 * executed, c == &boot_cpu_data. 423 * executed, c == &boot_cpu_data.
424 */ 424 */
425 if ( c != &boot_cpu_data ) { 425 if ( c != &boot_cpu_data ) {
426 /* AND the already accumulated flags with these */ 426 /* AND the already accumulated flags with these */
427 for ( i = 0 ; i < NCAPINTS ; i++ ) 427 for ( i = 0 ; i < NCAPINTS ; i++ )
428 boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; 428 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
429 } 429 }
430 430
431 /* Init Machine Check Exception if available. */ 431 /* Init Machine Check Exception if available. */
432 #ifdef CONFIG_X86_MCE 432 #ifdef CONFIG_X86_MCE
433 mcheck_init(c); 433 mcheck_init(c);
434 #endif 434 #endif
435 if (c == &boot_cpu_data) 435 if (c == &boot_cpu_data)
436 sysenter_setup(); 436 sysenter_setup();
437 enable_sep_cpu(); 437 enable_sep_cpu();
438 438
439 if (c == &boot_cpu_data) 439 if (c == &boot_cpu_data)
440 mtrr_bp_init(); 440 mtrr_bp_init();
441 else 441 else
442 mtrr_ap_init(); 442 mtrr_ap_init();
443 } 443 }
444 444
445 #ifdef CONFIG_X86_HT 445 #ifdef CONFIG_X86_HT
446 void __devinit detect_ht(struct cpuinfo_x86 *c) 446 void __devinit detect_ht(struct cpuinfo_x86 *c)
447 { 447 {
448 u32 eax, ebx, ecx, edx; 448 u32 eax, ebx, ecx, edx;
449 int index_msb, tmp; 449 int index_msb, tmp;
450 int cpu = smp_processor_id(); 450 int cpu = smp_processor_id();
451 451
452 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 452 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
453 return; 453 return;
454 454
455 cpuid(1, &eax, &ebx, &ecx, &edx); 455 cpuid(1, &eax, &ebx, &ecx, &edx);
456 smp_num_siblings = (ebx & 0xff0000) >> 16; 456 smp_num_siblings = (ebx & 0xff0000) >> 16;
457 457
458 if (smp_num_siblings == 1) { 458 if (smp_num_siblings == 1) {
459 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 459 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
460 } else if (smp_num_siblings > 1 ) { 460 } else if (smp_num_siblings > 1 ) {
461 index_msb = 31; 461 index_msb = 31;
462 462
463 if (smp_num_siblings > NR_CPUS) { 463 if (smp_num_siblings > NR_CPUS) {
464 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); 464 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
465 smp_num_siblings = 1; 465 smp_num_siblings = 1;
466 return; 466 return;
467 } 467 }
468 tmp = smp_num_siblings; 468 tmp = smp_num_siblings;
469 while ((tmp & 0x80000000 ) == 0) { 469 while ((tmp & 0x80000000 ) == 0) {
470 tmp <<=1 ; 470 tmp <<=1 ;
471 index_msb--; 471 index_msb--;
472 } 472 }
473 if (smp_num_siblings & (smp_num_siblings - 1)) 473 if (smp_num_siblings & (smp_num_siblings - 1))
474 index_msb++; 474 index_msb++;
475 phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); 475 phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
476 476
477 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 477 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
478 phys_proc_id[cpu]); 478 phys_proc_id[cpu]);
479 479
480 smp_num_siblings = smp_num_siblings / c->x86_num_cores; 480 smp_num_siblings = smp_num_siblings / c->x86_num_cores;
481 481
482 tmp = smp_num_siblings; 482 tmp = smp_num_siblings;
483 index_msb = 31; 483 index_msb = 31;
484 while ((tmp & 0x80000000) == 0) { 484 while ((tmp & 0x80000000) == 0) {
485 tmp <<=1 ; 485 tmp <<=1 ;
486 index_msb--; 486 index_msb--;
487 } 487 }
488 488
489 if (smp_num_siblings & (smp_num_siblings - 1)) 489 if (smp_num_siblings & (smp_num_siblings - 1))
490 index_msb++; 490 index_msb++;
491 491
492 cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); 492 cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
493 493
494 if (c->x86_num_cores > 1) 494 if (c->x86_num_cores > 1)
495 printk(KERN_INFO "CPU: Processor Core ID: %d\n", 495 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
496 cpu_core_id[cpu]); 496 cpu_core_id[cpu]);
497 } 497 }
498 } 498 }
499 #endif 499 #endif
500 500
501 void __devinit print_cpu_info(struct cpuinfo_x86 *c) 501 void __devinit print_cpu_info(struct cpuinfo_x86 *c)
502 { 502 {
503 char *vendor = NULL; 503 char *vendor = NULL;
504 504
505 if (c->x86_vendor < X86_VENDOR_NUM) 505 if (c->x86_vendor < X86_VENDOR_NUM)
506 vendor = this_cpu->c_vendor; 506 vendor = this_cpu->c_vendor;
507 else if (c->cpuid_level >= 0) 507 else if (c->cpuid_level >= 0)
508 vendor = c->x86_vendor_id; 508 vendor = c->x86_vendor_id;
509 509
510 if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) 510 if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
511 printk("%s ", vendor); 511 printk("%s ", vendor);
512 512
513 if (!c->x86_model_id[0]) 513 if (!c->x86_model_id[0])
514 printk("%d86", c->x86); 514 printk("%d86", c->x86);
515 else 515 else
516 printk("%s", c->x86_model_id); 516 printk("%s", c->x86_model_id);
517 517
518 if (c->x86_mask || c->cpuid_level >= 0) 518 if (c->x86_mask || c->cpuid_level >= 0)
519 printk(" stepping %02x\n", c->x86_mask); 519 printk(" stepping %02x\n", c->x86_mask);
520 else 520 else
521 printk("\n"); 521 printk("\n");
522 } 522 }
523 523
524 cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE; 524 cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE;
525 525
526 /* This is hacky. :) 526 /* This is hacky. :)
527 * We're emulating future behavior. 527 * We're emulating future behavior.
528 * In the future, the cpu-specific init functions will be called implicitly 528 * In the future, the cpu-specific init functions will be called implicitly
529 * via the magic of initcalls. 529 * via the magic of initcalls.
530 * They will insert themselves into the cpu_devs structure. 530 * They will insert themselves into the cpu_devs structure.
531 * Then, when cpu_init() is called, we can just iterate over that array. 531 * Then, when cpu_init() is called, we can just iterate over that array.
532 */ 532 */
533 533
534 extern int intel_cpu_init(void); 534 extern int intel_cpu_init(void);
535 extern int cyrix_init_cpu(void); 535 extern int cyrix_init_cpu(void);
536 extern int nsc_init_cpu(void); 536 extern int nsc_init_cpu(void);
537 extern int amd_init_cpu(void); 537 extern int amd_init_cpu(void);
538 extern int centaur_init_cpu(void); 538 extern int centaur_init_cpu(void);
539 extern int transmeta_init_cpu(void); 539 extern int transmeta_init_cpu(void);
540 extern int rise_init_cpu(void); 540 extern int rise_init_cpu(void);
541 extern int nexgen_init_cpu(void); 541 extern int nexgen_init_cpu(void);
542 extern int umc_init_cpu(void); 542 extern int umc_init_cpu(void);
543 543
544 void __init early_cpu_init(void) 544 void __init early_cpu_init(void)
545 { 545 {
546 intel_cpu_init(); 546 intel_cpu_init();
547 cyrix_init_cpu(); 547 cyrix_init_cpu();
548 nsc_init_cpu(); 548 nsc_init_cpu();
549 amd_init_cpu(); 549 amd_init_cpu();
550 centaur_init_cpu(); 550 centaur_init_cpu();
551 transmeta_init_cpu(); 551 transmeta_init_cpu();
552 rise_init_cpu(); 552 rise_init_cpu();
553 nexgen_init_cpu(); 553 nexgen_init_cpu();
554 umc_init_cpu(); 554 umc_init_cpu();
555 early_cpu_detect(); 555 early_cpu_detect();
556 556
557 #ifdef CONFIG_DEBUG_PAGEALLOC 557 #ifdef CONFIG_DEBUG_PAGEALLOC
558 /* pse is not compatible with on-the-fly unmapping, 558 /* pse is not compatible with on-the-fly unmapping,
559 * disable it even if the cpus claim to support it. 559 * disable it even if the cpus claim to support it.
560 */ 560 */
561 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); 561 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
562 disable_pse = 1; 562 disable_pse = 1;
563 #endif 563 #endif
564 } 564 }
565 /* 565 /*
566 * cpu_init() initializes state that is per-CPU. Some data is already 566 * cpu_init() initializes state that is per-CPU. Some data is already
567 * initialized (naturally) in the bootstrap process, such as the GDT 567 * initialized (naturally) in the bootstrap process, such as the GDT
568 * and IDT. We reload them nevertheless, this function acts as a 568 * and IDT. We reload them nevertheless, this function acts as a
569 * 'CPU state barrier', nothing should get across. 569 * 'CPU state barrier', nothing should get across.
570 */ 570 */
571 void __devinit cpu_init(void) 571 void __devinit cpu_init(void)
572 { 572 {
573 int cpu = smp_processor_id(); 573 int cpu = smp_processor_id();
574 struct tss_struct * t = &per_cpu(init_tss, cpu); 574 struct tss_struct * t = &per_cpu(init_tss, cpu);
575 struct thread_struct *thread = &current->thread; 575 struct thread_struct *thread = &current->thread;
576 __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu); 576 __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
577 577
578 if (cpu_test_and_set(cpu, cpu_initialized)) { 578 if (cpu_test_and_set(cpu, cpu_initialized)) {
579 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); 579 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
580 for (;;) local_irq_enable(); 580 for (;;) local_irq_enable();
581 } 581 }
582 printk(KERN_INFO "Initializing CPU#%d\n", cpu); 582 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
583 583
584 if (cpu_has_vme || cpu_has_tsc || cpu_has_de) 584 if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
585 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 585 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
586 if (tsc_disable && cpu_has_tsc) { 586 if (tsc_disable && cpu_has_tsc) {
587 printk(KERN_NOTICE "Disabling TSC...\n"); 587 printk(KERN_NOTICE "Disabling TSC...\n");
588 /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ 588 /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
589 clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); 589 clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
590 set_in_cr4(X86_CR4_TSD); 590 set_in_cr4(X86_CR4_TSD);
591 } 591 }
592 592
593 /* 593 /*
594 * Initialize the per-CPU GDT with the boot GDT, 594 * Initialize the per-CPU GDT with the boot GDT,
595 * and set up the GDT descriptor: 595 * and set up the GDT descriptor:
596 */ 596 */
597 memcpy(&per_cpu(cpu_gdt_table, cpu), cpu_gdt_table, 597 memcpy(&per_cpu(cpu_gdt_table, cpu), cpu_gdt_table,
598 GDT_SIZE); 598 GDT_SIZE);
599 599
600 /* Set up GDT entry for 16bit stack */ 600 /* Set up GDT entry for 16bit stack */
601 *(__u64 *)&(per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_ESPFIX_SS]) |= 601 *(__u64 *)&(per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_ESPFIX_SS]) |=
602 ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | 602 ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
603 ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | 603 ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
604 (CPU_16BIT_STACK_SIZE - 1); 604 (CPU_16BIT_STACK_SIZE - 1);
605 605
606 cpu_gdt_descr[cpu].size = GDT_SIZE - 1; 606 cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
607 cpu_gdt_descr[cpu].address = 607 cpu_gdt_descr[cpu].address =
608 (unsigned long)&per_cpu(cpu_gdt_table, cpu); 608 (unsigned long)&per_cpu(cpu_gdt_table, cpu);
609 609
610 /* 610 /*
611 * Set up the per-thread TLS descriptor cache: 611 * Set up the per-thread TLS descriptor cache:
612 */ 612 */
613 memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu), 613 memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu),
614 GDT_ENTRY_TLS_ENTRIES * 8); 614 GDT_ENTRY_TLS_ENTRIES * 8);
615 615
616 __asm__ __volatile__("lgdt %0" : : "m" (cpu_gdt_descr[cpu])); 616 load_gdt(&cpu_gdt_descr[cpu]);
617 __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); 617 load_idt(&idt_descr);
618 618
619 /* 619 /*
620 * Delete NT 620 * Delete NT
621 */ 621 */
622 __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); 622 __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
623 623
624 /* 624 /*
625 * Set up and load the per-CPU TSS and LDT 625 * Set up and load the per-CPU TSS and LDT
626 */ 626 */
627 atomic_inc(&init_mm.mm_count); 627 atomic_inc(&init_mm.mm_count);
628 current->active_mm = &init_mm; 628 current->active_mm = &init_mm;
629 if (current->mm) 629 if (current->mm)
630 BUG(); 630 BUG();
631 enter_lazy_tlb(&init_mm, current); 631 enter_lazy_tlb(&init_mm, current);
632 632
633 load_esp0(t, thread); 633 load_esp0(t, thread);
634 set_tss_desc(cpu,t); 634 set_tss_desc(cpu,t);
635 load_TR_desc(); 635 load_TR_desc();
636 load_LDT(&init_mm.context); 636 load_LDT(&init_mm.context);
637 637
638 /* Set up doublefault TSS pointer in the GDT */ 638 /* Set up doublefault TSS pointer in the GDT */
639 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); 639 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
640 640
641 /* Clear %fs and %gs. */ 641 /* Clear %fs and %gs. */
642 asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); 642 asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
643 643
644 /* Clear all 6 debug registers: */ 644 /* Clear all 6 debug registers: */
645 set_debugreg(0, 0); 645 set_debugreg(0, 0);
646 set_debugreg(0, 1); 646 set_debugreg(0, 1);
647 set_debugreg(0, 2); 647 set_debugreg(0, 2);
648 set_debugreg(0, 3); 648 set_debugreg(0, 3);
649 set_debugreg(0, 6); 649 set_debugreg(0, 6);
650 set_debugreg(0, 7); 650 set_debugreg(0, 7);
651 651
652 /* 652 /*
653 * Force FPU initialization: 653 * Force FPU initialization:
654 */ 654 */
655 current_thread_info()->status = 0; 655 current_thread_info()->status = 0;
656 clear_used_math(); 656 clear_used_math();
657 mxcsr_feature_mask_init(); 657 mxcsr_feature_mask_init();
658 } 658 }
659 659
660 #ifdef CONFIG_HOTPLUG_CPU 660 #ifdef CONFIG_HOTPLUG_CPU
661 void __devinit cpu_uninit(void) 661 void __devinit cpu_uninit(void)
662 { 662 {
663 int cpu = raw_smp_processor_id(); 663 int cpu = raw_smp_processor_id();
664 cpu_clear(cpu, cpu_initialized); 664 cpu_clear(cpu, cpu_initialized);
665 665
666 /* lazy TLB state */ 666 /* lazy TLB state */
667 per_cpu(cpu_tlbstate, cpu).state = 0; 667 per_cpu(cpu_tlbstate, cpu).state = 0;
668 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; 668 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
669 } 669 }
670 #endif 670 #endif
671 671
arch/i386/kernel/doublefault.c
1 #include <linux/mm.h> 1 #include <linux/mm.h>
2 #include <linux/sched.h> 2 #include <linux/sched.h>
3 #include <linux/init.h> 3 #include <linux/init.h>
4 #include <linux/init_task.h> 4 #include <linux/init_task.h>
5 #include <linux/fs.h> 5 #include <linux/fs.h>
6 6
7 #include <asm/uaccess.h> 7 #include <asm/uaccess.h>
8 #include <asm/pgtable.h> 8 #include <asm/pgtable.h>
9 #include <asm/processor.h> 9 #include <asm/processor.h>
10 #include <asm/desc.h> 10 #include <asm/desc.h>
11 11
12 #define DOUBLEFAULT_STACKSIZE (1024) 12 #define DOUBLEFAULT_STACKSIZE (1024)
13 static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; 13 static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
14 #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) 14 #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
15 15
16 #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000) 16 #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000)
17 17
18 static void doublefault_fn(void) 18 static void doublefault_fn(void)
19 { 19 {
20 struct Xgt_desc_struct gdt_desc = {0, 0}; 20 struct Xgt_desc_struct gdt_desc = {0, 0};
21 unsigned long gdt, tss; 21 unsigned long gdt, tss;
22 22
23 __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory"); 23 store_gdt(&gdt_desc);
24 gdt = gdt_desc.address; 24 gdt = gdt_desc.address;
25 25
26 printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); 26 printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
27 27
28 if (ptr_ok(gdt)) { 28 if (ptr_ok(gdt)) {
29 gdt += GDT_ENTRY_TSS << 3; 29 gdt += GDT_ENTRY_TSS << 3;
30 tss = *(u16 *)(gdt+2); 30 tss = *(u16 *)(gdt+2);
31 tss += *(u8 *)(gdt+4) << 16; 31 tss += *(u8 *)(gdt+4) << 16;
32 tss += *(u8 *)(gdt+7) << 24; 32 tss += *(u8 *)(gdt+7) << 24;
33 printk("double fault, tss at %08lx\n", tss); 33 printk("double fault, tss at %08lx\n", tss);
34 34
35 if (ptr_ok(tss)) { 35 if (ptr_ok(tss)) {
36 struct tss_struct *t = (struct tss_struct *)tss; 36 struct tss_struct *t = (struct tss_struct *)tss;
37 37
38 printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp); 38 printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp);
39 39
40 printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", 40 printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
41 t->eax, t->ebx, t->ecx, t->edx); 41 t->eax, t->ebx, t->ecx, t->edx);
42 printk("esi = %08lx, edi = %08lx\n", 42 printk("esi = %08lx, edi = %08lx\n",
43 t->esi, t->edi); 43 t->esi, t->edi);
44 } 44 }
45 } 45 }
46 46
47 for (;;) /* nothing */; 47 for (;;) /* nothing */;
48 } 48 }
49 49
50 struct tss_struct doublefault_tss __cacheline_aligned = { 50 struct tss_struct doublefault_tss __cacheline_aligned = {
51 .esp0 = STACK_START, 51 .esp0 = STACK_START,
52 .ss0 = __KERNEL_DS, 52 .ss0 = __KERNEL_DS,
53 .ldt = 0, 53 .ldt = 0,
54 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, 54 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
55 55
56 .eip = (unsigned long) doublefault_fn, 56 .eip = (unsigned long) doublefault_fn,
57 .eflags = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */ 57 .eflags = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */
58 .esp = STACK_START, 58 .esp = STACK_START,
59 .es = __USER_DS, 59 .es = __USER_DS,
60 .cs = __KERNEL_CS, 60 .cs = __KERNEL_CS,
61 .ss = __KERNEL_DS, 61 .ss = __KERNEL_DS,
62 .ds = __USER_DS, 62 .ds = __USER_DS,
63 63
64 .__cr3 = __pa(swapper_pg_dir) 64 .__cr3 = __pa(swapper_pg_dir)
65 }; 65 };
66 66
arch/i386/kernel/efi.c
1 /* 1 /*
2 * Extensible Firmware Interface 2 * Extensible Firmware Interface
3 * 3 *
4 * Based on Extensible Firmware Interface Specification version 1.0 4 * Based on Extensible Firmware Interface Specification version 1.0
5 * 5 *
6 * Copyright (C) 1999 VA Linux Systems 6 * Copyright (C) 1999 VA Linux Systems
7 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> 7 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
8 * Copyright (C) 1999-2002 Hewlett-Packard Co. 8 * Copyright (C) 1999-2002 Hewlett-Packard Co.
9 * David Mosberger-Tang <davidm@hpl.hp.com> 9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 * Stephane Eranian <eranian@hpl.hp.com> 10 * Stephane Eranian <eranian@hpl.hp.com>
11 * 11 *
12 * All EFI Runtime Services are not implemented yet as EFI only 12 * All EFI Runtime Services are not implemented yet as EFI only
13 * supports physical mode addressing on SoftSDV. This is to be fixed 13 * supports physical mode addressing on SoftSDV. This is to be fixed
14 * in a future version. --drummond 1999-07-20 14 * in a future version. --drummond 1999-07-20
15 * 15 *
16 * Implemented EFI runtime services and virtual mode calls. --davidm 16 * Implemented EFI runtime services and virtual mode calls. --davidm
17 * 17 *
18 * Goutham Rao: <goutham.rao@intel.com> 18 * Goutham Rao: <goutham.rao@intel.com>
19 * Skip non-WB memory and ignore empty memory ranges. 19 * Skip non-WB memory and ignore empty memory ranges.
20 */ 20 */
21 21
22 #include <linux/config.h> 22 #include <linux/config.h>
23 #include <linux/kernel.h> 23 #include <linux/kernel.h>
24 #include <linux/init.h> 24 #include <linux/init.h>
25 #include <linux/mm.h> 25 #include <linux/mm.h>
26 #include <linux/types.h> 26 #include <linux/types.h>
27 #include <linux/time.h> 27 #include <linux/time.h>
28 #include <linux/spinlock.h> 28 #include <linux/spinlock.h>
29 #include <linux/bootmem.h> 29 #include <linux/bootmem.h>
30 #include <linux/ioport.h> 30 #include <linux/ioport.h>
31 #include <linux/module.h> 31 #include <linux/module.h>
32 #include <linux/efi.h> 32 #include <linux/efi.h>
33 #include <linux/kexec.h> 33 #include <linux/kexec.h>
34 34
35 #include <asm/setup.h> 35 #include <asm/setup.h>
36 #include <asm/io.h> 36 #include <asm/io.h>
37 #include <asm/page.h> 37 #include <asm/page.h>
38 #include <asm/pgtable.h> 38 #include <asm/pgtable.h>
39 #include <asm/processor.h> 39 #include <asm/processor.h>
40 #include <asm/desc.h> 40 #include <asm/desc.h>
41 #include <asm/tlbflush.h> 41 #include <asm/tlbflush.h>
42 42
43 #define EFI_DEBUG 0 43 #define EFI_DEBUG 0
44 #define PFX "EFI: " 44 #define PFX "EFI: "
45 45
46 extern efi_status_t asmlinkage efi_call_phys(void *, ...); 46 extern efi_status_t asmlinkage efi_call_phys(void *, ...);
47 47
48 struct efi efi; 48 struct efi efi;
49 EXPORT_SYMBOL(efi); 49 EXPORT_SYMBOL(efi);
50 static struct efi efi_phys; 50 static struct efi efi_phys;
51 struct efi_memory_map memmap; 51 struct efi_memory_map memmap;
52 52
53 /* 53 /*
54 * We require an early boot_ioremap mapping mechanism initially 54 * We require an early boot_ioremap mapping mechanism initially
55 */ 55 */
56 extern void * boot_ioremap(unsigned long, unsigned long); 56 extern void * boot_ioremap(unsigned long, unsigned long);
57 57
58 /* 58 /*
59 * To make EFI call EFI runtime service in physical addressing mode we need 59 * To make EFI call EFI runtime service in physical addressing mode we need
60 * prelog/epilog before/after the invocation to disable interrupt, to 60 * prelog/epilog before/after the invocation to disable interrupt, to
61 * claim EFI runtime service handler exclusively and to duplicate a memory in 61 * claim EFI runtime service handler exclusively and to duplicate a memory in
62 * low memory space say 0 - 3G. 62 * low memory space say 0 - 3G.
63 */ 63 */
64 64
65 static unsigned long efi_rt_eflags; 65 static unsigned long efi_rt_eflags;
66 static DEFINE_SPINLOCK(efi_rt_lock); 66 static DEFINE_SPINLOCK(efi_rt_lock);
67 static pgd_t efi_bak_pg_dir_pointer[2]; 67 static pgd_t efi_bak_pg_dir_pointer[2];
68 68
69 static void efi_call_phys_prelog(void) 69 static void efi_call_phys_prelog(void)
70 { 70 {
71 unsigned long cr4; 71 unsigned long cr4;
72 unsigned long temp; 72 unsigned long temp;
73 73
74 spin_lock(&efi_rt_lock); 74 spin_lock(&efi_rt_lock);
75 local_irq_save(efi_rt_eflags); 75 local_irq_save(efi_rt_eflags);
76 76
77 /* 77 /*
78 * If I don't have PSE, I should just duplicate two entries in page 78 * If I don't have PSE, I should just duplicate two entries in page
79 * directory. If I have PSE, I just need to duplicate one entry in 79 * directory. If I have PSE, I just need to duplicate one entry in
80 * page directory. 80 * page directory.
81 */ 81 */
82 cr4 = read_cr4(); 82 cr4 = read_cr4();
83 83
84 if (cr4 & X86_CR4_PSE) { 84 if (cr4 & X86_CR4_PSE) {
85 efi_bak_pg_dir_pointer[0].pgd = 85 efi_bak_pg_dir_pointer[0].pgd =
86 swapper_pg_dir[pgd_index(0)].pgd; 86 swapper_pg_dir[pgd_index(0)].pgd;
87 swapper_pg_dir[0].pgd = 87 swapper_pg_dir[0].pgd =
88 swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; 88 swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
89 } else { 89 } else {
90 efi_bak_pg_dir_pointer[0].pgd = 90 efi_bak_pg_dir_pointer[0].pgd =
91 swapper_pg_dir[pgd_index(0)].pgd; 91 swapper_pg_dir[pgd_index(0)].pgd;
92 efi_bak_pg_dir_pointer[1].pgd = 92 efi_bak_pg_dir_pointer[1].pgd =
93 swapper_pg_dir[pgd_index(0x400000)].pgd; 93 swapper_pg_dir[pgd_index(0x400000)].pgd;
94 swapper_pg_dir[pgd_index(0)].pgd = 94 swapper_pg_dir[pgd_index(0)].pgd =
95 swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; 95 swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
96 temp = PAGE_OFFSET + 0x400000; 96 temp = PAGE_OFFSET + 0x400000;
97 swapper_pg_dir[pgd_index(0x400000)].pgd = 97 swapper_pg_dir[pgd_index(0x400000)].pgd =
98 swapper_pg_dir[pgd_index(temp)].pgd; 98 swapper_pg_dir[pgd_index(temp)].pgd;
99 } 99 }
100 100
101 /* 101 /*
102 * After the lock is released, the original page table is restored. 102 * After the lock is released, the original page table is restored.
103 */ 103 */
104 local_flush_tlb(); 104 local_flush_tlb();
105 105
106 cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); 106 cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address);
107 __asm__ __volatile__("lgdt %0":"=m" 107 load_gdt((struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0]));
108 (*(struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])));
109 } 108 }
110 109
111 static void efi_call_phys_epilog(void) 110 static void efi_call_phys_epilog(void)
112 { 111 {
113 unsigned long cr4; 112 unsigned long cr4;
114 113
115 cpu_gdt_descr[0].address = 114 cpu_gdt_descr[0].address =
116 (unsigned long) __va(cpu_gdt_descr[0].address); 115 (unsigned long) __va(cpu_gdt_descr[0].address);
117 __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr)); 116 load_gdt(&cpu_gdt_descr[0]);
118 cr4 = read_cr4(); 117 cr4 = read_cr4();
119 118
120 if (cr4 & X86_CR4_PSE) { 119 if (cr4 & X86_CR4_PSE) {
121 swapper_pg_dir[pgd_index(0)].pgd = 120 swapper_pg_dir[pgd_index(0)].pgd =
122 efi_bak_pg_dir_pointer[0].pgd; 121 efi_bak_pg_dir_pointer[0].pgd;
123 } else { 122 } else {
124 swapper_pg_dir[pgd_index(0)].pgd = 123 swapper_pg_dir[pgd_index(0)].pgd =
125 efi_bak_pg_dir_pointer[0].pgd; 124 efi_bak_pg_dir_pointer[0].pgd;
126 swapper_pg_dir[pgd_index(0x400000)].pgd = 125 swapper_pg_dir[pgd_index(0x400000)].pgd =
127 efi_bak_pg_dir_pointer[1].pgd; 126 efi_bak_pg_dir_pointer[1].pgd;
128 } 127 }
129 128
130 /* 129 /*
131 * After the lock is released, the original page table is restored. 130 * After the lock is released, the original page table is restored.
132 */ 131 */
133 local_flush_tlb(); 132 local_flush_tlb();
134 133
135 local_irq_restore(efi_rt_eflags); 134 local_irq_restore(efi_rt_eflags);
136 spin_unlock(&efi_rt_lock); 135 spin_unlock(&efi_rt_lock);
137 } 136 }
138 137
139 static efi_status_t 138 static efi_status_t
140 phys_efi_set_virtual_address_map(unsigned long memory_map_size, 139 phys_efi_set_virtual_address_map(unsigned long memory_map_size,
141 unsigned long descriptor_size, 140 unsigned long descriptor_size,
142 u32 descriptor_version, 141 u32 descriptor_version,
143 efi_memory_desc_t *virtual_map) 142 efi_memory_desc_t *virtual_map)
144 { 143 {
145 efi_status_t status; 144 efi_status_t status;
146 145
147 efi_call_phys_prelog(); 146 efi_call_phys_prelog();
148 status = efi_call_phys(efi_phys.set_virtual_address_map, 147 status = efi_call_phys(efi_phys.set_virtual_address_map,
149 memory_map_size, descriptor_size, 148 memory_map_size, descriptor_size,
150 descriptor_version, virtual_map); 149 descriptor_version, virtual_map);
151 efi_call_phys_epilog(); 150 efi_call_phys_epilog();
152 return status; 151 return status;
153 } 152 }
154 153
155 static efi_status_t 154 static efi_status_t
156 phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) 155 phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
157 { 156 {
158 efi_status_t status; 157 efi_status_t status;
159 158
160 efi_call_phys_prelog(); 159 efi_call_phys_prelog();
161 status = efi_call_phys(efi_phys.get_time, tm, tc); 160 status = efi_call_phys(efi_phys.get_time, tm, tc);
162 efi_call_phys_epilog(); 161 efi_call_phys_epilog();
163 return status; 162 return status;
164 } 163 }
165 164
166 inline int efi_set_rtc_mmss(unsigned long nowtime) 165 inline int efi_set_rtc_mmss(unsigned long nowtime)
167 { 166 {
168 int real_seconds, real_minutes; 167 int real_seconds, real_minutes;
169 efi_status_t status; 168 efi_status_t status;
170 efi_time_t eft; 169 efi_time_t eft;
171 efi_time_cap_t cap; 170 efi_time_cap_t cap;
172 171
173 spin_lock(&efi_rt_lock); 172 spin_lock(&efi_rt_lock);
174 status = efi.get_time(&eft, &cap); 173 status = efi.get_time(&eft, &cap);
175 spin_unlock(&efi_rt_lock); 174 spin_unlock(&efi_rt_lock);
176 if (status != EFI_SUCCESS) 175 if (status != EFI_SUCCESS)
177 panic("Ooops, efitime: can't read time!\n"); 176 panic("Ooops, efitime: can't read time!\n");
178 real_seconds = nowtime % 60; 177 real_seconds = nowtime % 60;
179 real_minutes = nowtime / 60; 178 real_minutes = nowtime / 60;
180 179
181 if (((abs(real_minutes - eft.minute) + 15)/30) & 1) 180 if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
182 real_minutes += 30; 181 real_minutes += 30;
183 real_minutes %= 60; 182 real_minutes %= 60;
184 183
185 eft.minute = real_minutes; 184 eft.minute = real_minutes;
186 eft.second = real_seconds; 185 eft.second = real_seconds;
187 186
188 if (status != EFI_SUCCESS) { 187 if (status != EFI_SUCCESS) {
189 printk("Ooops: efitime: can't read time!\n"); 188 printk("Ooops: efitime: can't read time!\n");
190 return -1; 189 return -1;
191 } 190 }
192 return 0; 191 return 0;
193 } 192 }
194 /* 193 /*
195 * This should only be used during kernel init and before runtime 194 * This should only be used during kernel init and before runtime
196 * services have been remapped, therefore, we'll need to call in physical 195 * services have been remapped, therefore, we'll need to call in physical
197 * mode. Note, this call isn't used later, so mark it __init. 196 * mode. Note, this call isn't used later, so mark it __init.
198 */ 197 */
199 inline unsigned long __init efi_get_time(void) 198 inline unsigned long __init efi_get_time(void)
200 { 199 {
201 efi_status_t status; 200 efi_status_t status;
202 efi_time_t eft; 201 efi_time_t eft;
203 efi_time_cap_t cap; 202 efi_time_cap_t cap;
204 203
205 status = phys_efi_get_time(&eft, &cap); 204 status = phys_efi_get_time(&eft, &cap);
206 if (status != EFI_SUCCESS) 205 if (status != EFI_SUCCESS)
207 printk("Oops: efitime: can't read time status: 0x%lx\n",status); 206 printk("Oops: efitime: can't read time status: 0x%lx\n",status);
208 207
209 return mktime(eft.year, eft.month, eft.day, eft.hour, 208 return mktime(eft.year, eft.month, eft.day, eft.hour,
210 eft.minute, eft.second); 209 eft.minute, eft.second);
211 } 210 }
212 211
213 int is_available_memory(efi_memory_desc_t * md) 212 int is_available_memory(efi_memory_desc_t * md)
214 { 213 {
215 if (!(md->attribute & EFI_MEMORY_WB)) 214 if (!(md->attribute & EFI_MEMORY_WB))
216 return 0; 215 return 0;
217 216
218 switch (md->type) { 217 switch (md->type) {
219 case EFI_LOADER_CODE: 218 case EFI_LOADER_CODE:
220 case EFI_LOADER_DATA: 219 case EFI_LOADER_DATA:
221 case EFI_BOOT_SERVICES_CODE: 220 case EFI_BOOT_SERVICES_CODE:
222 case EFI_BOOT_SERVICES_DATA: 221 case EFI_BOOT_SERVICES_DATA:
223 case EFI_CONVENTIONAL_MEMORY: 222 case EFI_CONVENTIONAL_MEMORY:
224 return 1; 223 return 1;
225 } 224 }
226 return 0; 225 return 0;
227 } 226 }
228 227
229 /* 228 /*
230 * We need to map the EFI memory map again after paging_init(). 229 * We need to map the EFI memory map again after paging_init().
231 */ 230 */
232 void __init efi_map_memmap(void) 231 void __init efi_map_memmap(void)
233 { 232 {
234 memmap.map = NULL; 233 memmap.map = NULL;
235 234
236 memmap.map = bt_ioremap((unsigned long) memmap.phys_map, 235 memmap.map = bt_ioremap((unsigned long) memmap.phys_map,
237 (memmap.nr_map * memmap.desc_size)); 236 (memmap.nr_map * memmap.desc_size));
238 if (memmap.map == NULL) 237 if (memmap.map == NULL)
239 printk(KERN_ERR PFX "Could not remap the EFI memmap!\n"); 238 printk(KERN_ERR PFX "Could not remap the EFI memmap!\n");
240 239
241 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); 240 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
242 } 241 }
243 242
244 #if EFI_DEBUG 243 #if EFI_DEBUG
245 static void __init print_efi_memmap(void) 244 static void __init print_efi_memmap(void)
246 { 245 {
247 efi_memory_desc_t *md; 246 efi_memory_desc_t *md;
248 void *p; 247 void *p;
249 int i; 248 int i;
250 249
251 for (p = memmap.map, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { 250 for (p = memmap.map, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) {
252 md = p; 251 md = p;
253 printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, " 252 printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, "
254 "range=[0x%016llx-0x%016llx) (%lluMB)\n", 253 "range=[0x%016llx-0x%016llx) (%lluMB)\n",
255 i, md->type, md->attribute, md->phys_addr, 254 i, md->type, md->attribute, md->phys_addr,
256 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), 255 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
257 (md->num_pages >> (20 - EFI_PAGE_SHIFT))); 256 (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
258 } 257 }
259 } 258 }
260 #endif /* EFI_DEBUG */ 259 #endif /* EFI_DEBUG */
261 260
262 /* 261 /*
263 * Walks the EFI memory map and calls CALLBACK once for each EFI 262 * Walks the EFI memory map and calls CALLBACK once for each EFI
264 * memory descriptor that has memory that is available for kernel use. 263 * memory descriptor that has memory that is available for kernel use.
265 */ 264 */
266 void efi_memmap_walk(efi_freemem_callback_t callback, void *arg) 265 void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
267 { 266 {
268 int prev_valid = 0; 267 int prev_valid = 0;
269 struct range { 268 struct range {
270 unsigned long start; 269 unsigned long start;
271 unsigned long end; 270 unsigned long end;
272 } prev, curr; 271 } prev, curr;
273 efi_memory_desc_t *md; 272 efi_memory_desc_t *md;
274 unsigned long start, end; 273 unsigned long start, end;
275 void *p; 274 void *p;
276 275
277 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 276 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
278 md = p; 277 md = p;
279 278
280 if ((md->num_pages == 0) || (!is_available_memory(md))) 279 if ((md->num_pages == 0) || (!is_available_memory(md)))
281 continue; 280 continue;
282 281
283 curr.start = md->phys_addr; 282 curr.start = md->phys_addr;
284 curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT); 283 curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
285 284
286 if (!prev_valid) { 285 if (!prev_valid) {
287 prev = curr; 286 prev = curr;
288 prev_valid = 1; 287 prev_valid = 1;
289 } else { 288 } else {
290 if (curr.start < prev.start) 289 if (curr.start < prev.start)
291 printk(KERN_INFO PFX "Unordered memory map\n"); 290 printk(KERN_INFO PFX "Unordered memory map\n");
292 if (prev.end == curr.start) 291 if (prev.end == curr.start)
293 prev.end = curr.end; 292 prev.end = curr.end;
294 else { 293 else {
295 start = 294 start =
296 (unsigned long) (PAGE_ALIGN(prev.start)); 295 (unsigned long) (PAGE_ALIGN(prev.start));
297 end = (unsigned long) (prev.end & PAGE_MASK); 296 end = (unsigned long) (prev.end & PAGE_MASK);
298 if ((end > start) 297 if ((end > start)
299 && (*callback) (start, end, arg) < 0) 298 && (*callback) (start, end, arg) < 0)
300 return; 299 return;
301 prev = curr; 300 prev = curr;
302 } 301 }
303 } 302 }
304 } 303 }
305 if (prev_valid) { 304 if (prev_valid) {
306 start = (unsigned long) PAGE_ALIGN(prev.start); 305 start = (unsigned long) PAGE_ALIGN(prev.start);
307 end = (unsigned long) (prev.end & PAGE_MASK); 306 end = (unsigned long) (prev.end & PAGE_MASK);
308 if (end > start) 307 if (end > start)
309 (*callback) (start, end, arg); 308 (*callback) (start, end, arg);
310 } 309 }
311 } 310 }
312 311
313 void __init efi_init(void) 312 void __init efi_init(void)
314 { 313 {
315 efi_config_table_t *config_tables; 314 efi_config_table_t *config_tables;
316 efi_runtime_services_t *runtime; 315 efi_runtime_services_t *runtime;
317 efi_char16_t *c16; 316 efi_char16_t *c16;
318 char vendor[100] = "unknown"; 317 char vendor[100] = "unknown";
319 unsigned long num_config_tables; 318 unsigned long num_config_tables;
320 int i = 0; 319 int i = 0;
321 320
322 memset(&efi, 0, sizeof(efi) ); 321 memset(&efi, 0, sizeof(efi) );
323 memset(&efi_phys, 0, sizeof(efi_phys)); 322 memset(&efi_phys, 0, sizeof(efi_phys));
324 323
325 efi_phys.systab = EFI_SYSTAB; 324 efi_phys.systab = EFI_SYSTAB;
326 memmap.phys_map = EFI_MEMMAP; 325 memmap.phys_map = EFI_MEMMAP;
327 memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE; 326 memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE;
328 memmap.desc_version = EFI_MEMDESC_VERSION; 327 memmap.desc_version = EFI_MEMDESC_VERSION;
329 memmap.desc_size = EFI_MEMDESC_SIZE; 328 memmap.desc_size = EFI_MEMDESC_SIZE;
330 329
331 efi.systab = (efi_system_table_t *) 330 efi.systab = (efi_system_table_t *)
332 boot_ioremap((unsigned long) efi_phys.systab, 331 boot_ioremap((unsigned long) efi_phys.systab,
333 sizeof(efi_system_table_t)); 332 sizeof(efi_system_table_t));
334 /* 333 /*
335 * Verify the EFI Table 334 * Verify the EFI Table
336 */ 335 */
337 if (efi.systab == NULL) 336 if (efi.systab == NULL)
338 printk(KERN_ERR PFX "Woah! Couldn't map the EFI system table.\n"); 337 printk(KERN_ERR PFX "Woah! Couldn't map the EFI system table.\n");
339 if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) 338 if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
340 printk(KERN_ERR PFX "Woah! EFI system table signature incorrect\n"); 339 printk(KERN_ERR PFX "Woah! EFI system table signature incorrect\n");
341 if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0) 340 if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0)
342 printk(KERN_ERR PFX 341 printk(KERN_ERR PFX
343 "Warning: EFI system table major version mismatch: " 342 "Warning: EFI system table major version mismatch: "
344 "got %d.%02d, expected %d.%02d\n", 343 "got %d.%02d, expected %d.%02d\n",
345 efi.systab->hdr.revision >> 16, 344 efi.systab->hdr.revision >> 16,
346 efi.systab->hdr.revision & 0xffff, 345 efi.systab->hdr.revision & 0xffff,
347 EFI_SYSTEM_TABLE_REVISION >> 16, 346 EFI_SYSTEM_TABLE_REVISION >> 16,
348 EFI_SYSTEM_TABLE_REVISION & 0xffff); 347 EFI_SYSTEM_TABLE_REVISION & 0xffff);
349 /* 348 /*
350 * Grab some details from the system table 349 * Grab some details from the system table
351 */ 350 */
352 num_config_tables = efi.systab->nr_tables; 351 num_config_tables = efi.systab->nr_tables;
353 config_tables = (efi_config_table_t *)efi.systab->tables; 352 config_tables = (efi_config_table_t *)efi.systab->tables;
354 runtime = efi.systab->runtime; 353 runtime = efi.systab->runtime;
355 354
356 /* 355 /*
357 * Show what we know for posterity 356 * Show what we know for posterity
358 */ 357 */
359 c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2); 358 c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2);
360 if (c16) { 359 if (c16) {
361 for (i = 0; i < sizeof(vendor) && *c16; ++i) 360 for (i = 0; i < sizeof(vendor) && *c16; ++i)
362 vendor[i] = *c16++; 361 vendor[i] = *c16++;
363 vendor[i] = '\0'; 362 vendor[i] = '\0';
364 } else 363 } else
365 printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); 364 printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
366 365
367 printk(KERN_INFO PFX "EFI v%u.%.02u by %s \n", 366 printk(KERN_INFO PFX "EFI v%u.%.02u by %s \n",
368 efi.systab->hdr.revision >> 16, 367 efi.systab->hdr.revision >> 16,
369 efi.systab->hdr.revision & 0xffff, vendor); 368 efi.systab->hdr.revision & 0xffff, vendor);
370 369
371 /* 370 /*
372 * Let's see what config tables the firmware passed to us. 371 * Let's see what config tables the firmware passed to us.
373 */ 372 */
374 config_tables = (efi_config_table_t *) 373 config_tables = (efi_config_table_t *)
375 boot_ioremap((unsigned long) config_tables, 374 boot_ioremap((unsigned long) config_tables,
376 num_config_tables * sizeof(efi_config_table_t)); 375 num_config_tables * sizeof(efi_config_table_t));
377 376
378 if (config_tables == NULL) 377 if (config_tables == NULL)
379 printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n"); 378 printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n");
380 379
381 for (i = 0; i < num_config_tables; i++) { 380 for (i = 0; i < num_config_tables; i++) {
382 if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { 381 if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
383 efi.mps = (void *)config_tables[i].table; 382 efi.mps = (void *)config_tables[i].table;
384 printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table); 383 printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table);
385 } else 384 } else
386 if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { 385 if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
387 efi.acpi20 = __va(config_tables[i].table); 386 efi.acpi20 = __va(config_tables[i].table);
388 printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table); 387 printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table);
389 } else 388 } else
390 if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { 389 if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
391 efi.acpi = __va(config_tables[i].table); 390 efi.acpi = __va(config_tables[i].table);
392 printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table); 391 printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table);
393 } else 392 } else
394 if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { 393 if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
395 efi.smbios = (void *) config_tables[i].table; 394 efi.smbios = (void *) config_tables[i].table;
396 printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table); 395 printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table);
397 } else 396 } else
398 if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { 397 if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
399 efi.hcdp = (void *)config_tables[i].table; 398 efi.hcdp = (void *)config_tables[i].table;
400 printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table); 399 printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table);
401 } else 400 } else
402 if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) { 401 if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) {
403 efi.uga = (void *)config_tables[i].table; 402 efi.uga = (void *)config_tables[i].table;
404 printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table); 403 printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table);
405 } 404 }
406 } 405 }
407 printk("\n"); 406 printk("\n");
408 407
409 /* 408 /*
410 * Check out the runtime services table. We need to map 409 * Check out the runtime services table. We need to map
411 * the runtime services table so that we can grab the physical 410 * the runtime services table so that we can grab the physical
412 * address of several of the EFI runtime functions, needed to 411 * address of several of the EFI runtime functions, needed to
413 * set the firmware into virtual mode. 412 * set the firmware into virtual mode.
414 */ 413 */
415 414
416 runtime = (efi_runtime_services_t *) boot_ioremap((unsigned long) 415 runtime = (efi_runtime_services_t *) boot_ioremap((unsigned long)
417 runtime, 416 runtime,
418 sizeof(efi_runtime_services_t)); 417 sizeof(efi_runtime_services_t));
419 if (runtime != NULL) { 418 if (runtime != NULL) {
420 /* 419 /*
421 * We will only need *early* access to the following 420 * We will only need *early* access to the following
422 * two EFI runtime services before set_virtual_address_map 421 * two EFI runtime services before set_virtual_address_map
423 * is invoked. 422 * is invoked.
424 */ 423 */
425 efi_phys.get_time = (efi_get_time_t *) runtime->get_time; 424 efi_phys.get_time = (efi_get_time_t *) runtime->get_time;
426 efi_phys.set_virtual_address_map = 425 efi_phys.set_virtual_address_map =
427 (efi_set_virtual_address_map_t *) 426 (efi_set_virtual_address_map_t *)
428 runtime->set_virtual_address_map; 427 runtime->set_virtual_address_map;
429 } else 428 } else
430 printk(KERN_ERR PFX "Could not map the runtime service table!\n"); 429 printk(KERN_ERR PFX "Could not map the runtime service table!\n");
431 430
432 /* Map the EFI memory map for use until paging_init() */ 431 /* Map the EFI memory map for use until paging_init() */
433 memmap.map = boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); 432 memmap.map = boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE);
434 if (memmap.map == NULL) 433 if (memmap.map == NULL)
435 printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); 434 printk(KERN_ERR PFX "Could not map the EFI memory map!\n");
436 435
437 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); 436 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
438 437
439 #if EFI_DEBUG 438 #if EFI_DEBUG
440 print_efi_memmap(); 439 print_efi_memmap();
441 #endif 440 #endif
442 } 441 }
443 442
444 static inline void __init check_range_for_systab(efi_memory_desc_t *md) 443 static inline void __init check_range_for_systab(efi_memory_desc_t *md)
445 { 444 {
446 if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) && 445 if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) &&
447 ((unsigned long)efi_phys.systab < md->phys_addr + 446 ((unsigned long)efi_phys.systab < md->phys_addr +
448 ((unsigned long)md->num_pages << EFI_PAGE_SHIFT))) { 447 ((unsigned long)md->num_pages << EFI_PAGE_SHIFT))) {
449 unsigned long addr; 448 unsigned long addr;
450 449
451 addr = md->virt_addr - md->phys_addr + 450 addr = md->virt_addr - md->phys_addr +
452 (unsigned long)efi_phys.systab; 451 (unsigned long)efi_phys.systab;
453 efi.systab = (efi_system_table_t *)addr; 452 efi.systab = (efi_system_table_t *)addr;
454 } 453 }
455 } 454 }
456 455
457 /* 456 /*
458 * This function will switch the EFI runtime services to virtual mode. 457 * This function will switch the EFI runtime services to virtual mode.
459 * Essentially, look through the EFI memmap and map every region that 458 * Essentially, look through the EFI memmap and map every region that
460 * has the runtime attribute bit set in its memory descriptor and update 459 * has the runtime attribute bit set in its memory descriptor and update
461 * that memory descriptor with the virtual address obtained from ioremap(). 460 * that memory descriptor with the virtual address obtained from ioremap().
462 * This enables the runtime services to be called without having to 461 * This enables the runtime services to be called without having to
463 * thunk back into physical mode for every invocation. 462 * thunk back into physical mode for every invocation.
464 */ 463 */
465 464
466 void __init efi_enter_virtual_mode(void) 465 void __init efi_enter_virtual_mode(void)
467 { 466 {
468 efi_memory_desc_t *md; 467 efi_memory_desc_t *md;
469 efi_status_t status; 468 efi_status_t status;
470 void *p; 469 void *p;
471 470
472 efi.systab = NULL; 471 efi.systab = NULL;
473 472
474 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 473 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
475 md = p; 474 md = p;
476 475
477 if (!(md->attribute & EFI_MEMORY_RUNTIME)) 476 if (!(md->attribute & EFI_MEMORY_RUNTIME))
478 continue; 477 continue;
479 478
480 md->virt_addr = (unsigned long)ioremap(md->phys_addr, 479 md->virt_addr = (unsigned long)ioremap(md->phys_addr,
481 md->num_pages << EFI_PAGE_SHIFT); 480 md->num_pages << EFI_PAGE_SHIFT);
482 if (!(unsigned long)md->virt_addr) { 481 if (!(unsigned long)md->virt_addr) {
483 printk(KERN_ERR PFX "ioremap of 0x%lX failed\n", 482 printk(KERN_ERR PFX "ioremap of 0x%lX failed\n",
484 (unsigned long)md->phys_addr); 483 (unsigned long)md->phys_addr);
485 } 484 }
486 /* update the virtual address of the EFI system table */ 485 /* update the virtual address of the EFI system table */
487 check_range_for_systab(md); 486 check_range_for_systab(md);
488 } 487 }
489 488
490 if (!efi.systab) 489 if (!efi.systab)
491 BUG(); 490 BUG();
492 491
493 status = phys_efi_set_virtual_address_map( 492 status = phys_efi_set_virtual_address_map(
494 memmap.desc_size * memmap.nr_map, 493 memmap.desc_size * memmap.nr_map,
495 memmap.desc_size, 494 memmap.desc_size,
496 memmap.desc_version, 495 memmap.desc_version,
497 memmap.phys_map); 496 memmap.phys_map);
498 497
499 if (status != EFI_SUCCESS) { 498 if (status != EFI_SUCCESS) {
500 printk (KERN_ALERT "You are screwed! " 499 printk (KERN_ALERT "You are screwed! "
501 "Unable to switch EFI into virtual mode " 500 "Unable to switch EFI into virtual mode "
502 "(status=%lx)\n", status); 501 "(status=%lx)\n", status);
503 panic("EFI call to SetVirtualAddressMap() failed!"); 502 panic("EFI call to SetVirtualAddressMap() failed!");
504 } 503 }
505 504
506 /* 505 /*
507 * Now that EFI is in virtual mode, update the function 506 * Now that EFI is in virtual mode, update the function
508 * pointers in the runtime service table to the new virtual addresses. 507 * pointers in the runtime service table to the new virtual addresses.
509 */ 508 */
510 509
511 efi.get_time = (efi_get_time_t *) efi.systab->runtime->get_time; 510 efi.get_time = (efi_get_time_t *) efi.systab->runtime->get_time;
512 efi.set_time = (efi_set_time_t *) efi.systab->runtime->set_time; 511 efi.set_time = (efi_set_time_t *) efi.systab->runtime->set_time;
513 efi.get_wakeup_time = (efi_get_wakeup_time_t *) 512 efi.get_wakeup_time = (efi_get_wakeup_time_t *)
514 efi.systab->runtime->get_wakeup_time; 513 efi.systab->runtime->get_wakeup_time;
515 efi.set_wakeup_time = (efi_set_wakeup_time_t *) 514 efi.set_wakeup_time = (efi_set_wakeup_time_t *)
516 efi.systab->runtime->set_wakeup_time; 515 efi.systab->runtime->set_wakeup_time;
517 efi.get_variable = (efi_get_variable_t *) 516 efi.get_variable = (efi_get_variable_t *)
518 efi.systab->runtime->get_variable; 517 efi.systab->runtime->get_variable;
519 efi.get_next_variable = (efi_get_next_variable_t *) 518 efi.get_next_variable = (efi_get_next_variable_t *)
520 efi.systab->runtime->get_next_variable; 519 efi.systab->runtime->get_next_variable;
521 efi.set_variable = (efi_set_variable_t *) 520 efi.set_variable = (efi_set_variable_t *)
522 efi.systab->runtime->set_variable; 521 efi.systab->runtime->set_variable;
523 efi.get_next_high_mono_count = (efi_get_next_high_mono_count_t *) 522 efi.get_next_high_mono_count = (efi_get_next_high_mono_count_t *)
524 efi.systab->runtime->get_next_high_mono_count; 523 efi.systab->runtime->get_next_high_mono_count;
525 efi.reset_system = (efi_reset_system_t *) 524 efi.reset_system = (efi_reset_system_t *)
526 efi.systab->runtime->reset_system; 525 efi.systab->runtime->reset_system;
527 } 526 }
528 527
529 void __init 528 void __init
530 efi_initialize_iomem_resources(struct resource *code_resource, 529 efi_initialize_iomem_resources(struct resource *code_resource,
531 struct resource *data_resource) 530 struct resource *data_resource)
532 { 531 {
533 struct resource *res; 532 struct resource *res;
534 efi_memory_desc_t *md; 533 efi_memory_desc_t *md;
535 void *p; 534 void *p;
536 535
537 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 536 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
538 md = p; 537 md = p;
539 538
540 if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > 539 if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >
541 0x100000000ULL) 540 0x100000000ULL)
542 continue; 541 continue;
543 res = alloc_bootmem_low(sizeof(struct resource)); 542 res = alloc_bootmem_low(sizeof(struct resource));
544 switch (md->type) { 543 switch (md->type) {
545 case EFI_RESERVED_TYPE: 544 case EFI_RESERVED_TYPE:
546 res->name = "Reserved Memory"; 545 res->name = "Reserved Memory";
547 break; 546 break;
548 case EFI_LOADER_CODE: 547 case EFI_LOADER_CODE:
549 res->name = "Loader Code"; 548 res->name = "Loader Code";
550 break; 549 break;
551 case EFI_LOADER_DATA: 550 case EFI_LOADER_DATA:
552 res->name = "Loader Data"; 551 res->name = "Loader Data";
553 break; 552 break;
554 case EFI_BOOT_SERVICES_DATA: 553 case EFI_BOOT_SERVICES_DATA:
555 res->name = "BootServices Data"; 554 res->name = "BootServices Data";
556 break; 555 break;
557 case EFI_BOOT_SERVICES_CODE: 556 case EFI_BOOT_SERVICES_CODE:
558 res->name = "BootServices Code"; 557 res->name = "BootServices Code";
559 break; 558 break;
560 case EFI_RUNTIME_SERVICES_CODE: 559 case EFI_RUNTIME_SERVICES_CODE:
561 res->name = "Runtime Service Code"; 560 res->name = "Runtime Service Code";
562 break; 561 break;
563 case EFI_RUNTIME_SERVICES_DATA: 562 case EFI_RUNTIME_SERVICES_DATA:
564 res->name = "Runtime Service Data"; 563 res->name = "Runtime Service Data";
565 break; 564 break;
566 case EFI_CONVENTIONAL_MEMORY: 565 case EFI_CONVENTIONAL_MEMORY:
567 res->name = "Conventional Memory"; 566 res->name = "Conventional Memory";
568 break; 567 break;
569 case EFI_UNUSABLE_MEMORY: 568 case EFI_UNUSABLE_MEMORY:
570 res->name = "Unusable Memory"; 569 res->name = "Unusable Memory";
571 break; 570 break;
572 case EFI_ACPI_RECLAIM_MEMORY: 571 case EFI_ACPI_RECLAIM_MEMORY:
573 res->name = "ACPI Reclaim"; 572 res->name = "ACPI Reclaim";
574 break; 573 break;
575 case EFI_ACPI_MEMORY_NVS: 574 case EFI_ACPI_MEMORY_NVS:
576 res->name = "ACPI NVS"; 575 res->name = "ACPI NVS";
577 break; 576 break;
578 case EFI_MEMORY_MAPPED_IO: 577 case EFI_MEMORY_MAPPED_IO:
579 res->name = "Memory Mapped IO"; 578 res->name = "Memory Mapped IO";
580 break; 579 break;
581 case EFI_MEMORY_MAPPED_IO_PORT_SPACE: 580 case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
582 res->name = "Memory Mapped IO Port Space"; 581 res->name = "Memory Mapped IO Port Space";
583 break; 582 break;
584 default: 583 default:
585 res->name = "Reserved"; 584 res->name = "Reserved";
586 break; 585 break;
587 } 586 }
588 res->start = md->phys_addr; 587 res->start = md->phys_addr;
589 res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1); 588 res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1);
590 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; 589 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
591 if (request_resource(&iomem_resource, res) < 0) 590 if (request_resource(&iomem_resource, res) < 0)
592 printk(KERN_ERR PFX "Failed to allocate res %s : 0x%lx-0x%lx\n", 591 printk(KERN_ERR PFX "Failed to allocate res %s : 0x%lx-0x%lx\n",
593 res->name, res->start, res->end); 592 res->name, res->start, res->end);
594 /* 593 /*
595 * We don't know which region contains kernel data so we try 594 * We don't know which region contains kernel data so we try
596 * it repeatedly and let the resource manager test it. 595 * it repeatedly and let the resource manager test it.
597 */ 596 */
598 if (md->type == EFI_CONVENTIONAL_MEMORY) { 597 if (md->type == EFI_CONVENTIONAL_MEMORY) {
599 request_resource(res, code_resource); 598 request_resource(res, code_resource);
600 request_resource(res, data_resource); 599 request_resource(res, data_resource);
601 #ifdef CONFIG_KEXEC 600 #ifdef CONFIG_KEXEC
602 request_resource(res, &crashk_res); 601 request_resource(res, &crashk_res);
603 #endif 602 #endif
604 } 603 }
605 } 604 }
606 } 605 }
607 606
608 /* 607 /*
609 * Convenience functions to obtain memory types and attributes 608 * Convenience functions to obtain memory types and attributes
610 */ 609 */
611 610
612 u32 efi_mem_type(unsigned long phys_addr) 611 u32 efi_mem_type(unsigned long phys_addr)
613 { 612 {
614 efi_memory_desc_t *md; 613 efi_memory_desc_t *md;
615 void *p; 614 void *p;
616 615
617 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 616 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
618 md = p; 617 md = p;
619 if ((md->phys_addr <= phys_addr) && (phys_addr < 618 if ((md->phys_addr <= phys_addr) && (phys_addr <
620 (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) 619 (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
621 return md->type; 620 return md->type;
622 } 621 }
623 return 0; 622 return 0;
624 } 623 }
625 624
626 u64 efi_mem_attributes(unsigned long phys_addr) 625 u64 efi_mem_attributes(unsigned long phys_addr)
627 { 626 {
628 efi_memory_desc_t *md; 627 efi_memory_desc_t *md;
629 void *p; 628 void *p;
630 629
631 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 630 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
632 md = p; 631 md = p;
633 if ((md->phys_addr <= phys_addr) && (phys_addr < 632 if ((md->phys_addr <= phys_addr) && (phys_addr <
634 (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) 633 (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
635 return md->attribute; 634 return md->attribute;
636 } 635 }
637 return 0; 636 return 0;
638 } 637 }
639 638
arch/i386/kernel/reboot.c
1 /* 1 /*
2 * linux/arch/i386/kernel/reboot.c 2 * linux/arch/i386/kernel/reboot.c
3 */ 3 */
4 4
5 #include <linux/config.h> 5 #include <linux/config.h>
6 #include <linux/mm.h> 6 #include <linux/mm.h>
7 #include <linux/module.h> 7 #include <linux/module.h>
8 #include <linux/delay.h> 8 #include <linux/delay.h>
9 #include <linux/init.h> 9 #include <linux/init.h>
10 #include <linux/interrupt.h> 10 #include <linux/interrupt.h>
11 #include <linux/mc146818rtc.h> 11 #include <linux/mc146818rtc.h>
12 #include <linux/efi.h> 12 #include <linux/efi.h>
13 #include <linux/dmi.h> 13 #include <linux/dmi.h>
14 #include <asm/uaccess.h> 14 #include <asm/uaccess.h>
15 #include <asm/apic.h> 15 #include <asm/apic.h>
16 #include <asm/desc.h>
16 #include "mach_reboot.h" 17 #include "mach_reboot.h"
17 #include <linux/reboot_fixups.h> 18 #include <linux/reboot_fixups.h>
18 19
19 /* 20 /*
20 * Power off function, if any 21 * Power off function, if any
21 */ 22 */
22 void (*pm_power_off)(void); 23 void (*pm_power_off)(void);
23 EXPORT_SYMBOL(pm_power_off); 24 EXPORT_SYMBOL(pm_power_off);
24 25
25 static int reboot_mode; 26 static int reboot_mode;
26 static int reboot_thru_bios; 27 static int reboot_thru_bios;
27 28
28 #ifdef CONFIG_SMP 29 #ifdef CONFIG_SMP
29 static int reboot_cpu = -1; 30 static int reboot_cpu = -1;
30 /* shamelessly grabbed from lib/vsprintf.c for readability */ 31 /* shamelessly grabbed from lib/vsprintf.c for readability */
31 #define is_digit(c) ((c) >= '0' && (c) <= '9') 32 #define is_digit(c) ((c) >= '0' && (c) <= '9')
32 #endif 33 #endif
33 static int __init reboot_setup(char *str) 34 static int __init reboot_setup(char *str)
34 { 35 {
35 while(1) { 36 while(1) {
36 switch (*str) { 37 switch (*str) {
37 case 'w': /* "warm" reboot (no memory testing etc) */ 38 case 'w': /* "warm" reboot (no memory testing etc) */
38 reboot_mode = 0x1234; 39 reboot_mode = 0x1234;
39 break; 40 break;
40 case 'c': /* "cold" reboot (with memory testing etc) */ 41 case 'c': /* "cold" reboot (with memory testing etc) */
41 reboot_mode = 0x0; 42 reboot_mode = 0x0;
42 break; 43 break;
43 case 'b': /* "bios" reboot by jumping through the BIOS */ 44 case 'b': /* "bios" reboot by jumping through the BIOS */
44 reboot_thru_bios = 1; 45 reboot_thru_bios = 1;
45 break; 46 break;
46 case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */ 47 case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */
47 reboot_thru_bios = 0; 48 reboot_thru_bios = 0;
48 break; 49 break;
49 #ifdef CONFIG_SMP 50 #ifdef CONFIG_SMP
50 case 's': /* "smp" reboot by executing reset on BSP or other CPU*/ 51 case 's': /* "smp" reboot by executing reset on BSP or other CPU*/
51 if (is_digit(*(str+1))) { 52 if (is_digit(*(str+1))) {
52 reboot_cpu = (int) (*(str+1) - '0'); 53 reboot_cpu = (int) (*(str+1) - '0');
53 if (is_digit(*(str+2))) 54 if (is_digit(*(str+2)))
54 reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0'); 55 reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0');
55 } 56 }
56 /* we will leave sorting out the final value 57 /* we will leave sorting out the final value
57 when we are ready to reboot, since we might not 58 when we are ready to reboot, since we might not
58 have set up boot_cpu_id or smp_num_cpu */ 59 have set up boot_cpu_id or smp_num_cpu */
59 break; 60 break;
60 #endif 61 #endif
61 } 62 }
62 if((str = strchr(str,',')) != NULL) 63 if((str = strchr(str,',')) != NULL)
63 str++; 64 str++;
64 else 65 else
65 break; 66 break;
66 } 67 }
67 return 1; 68 return 1;
68 } 69 }
69 70
70 __setup("reboot=", reboot_setup); 71 __setup("reboot=", reboot_setup);
71 72
72 /* 73 /*
73 * Reboot options and system auto-detection code provided by 74 * Reboot options and system auto-detection code provided by
74 * Dell Inc. so their systems "just work". :-) 75 * Dell Inc. so their systems "just work". :-)
75 */ 76 */
76 77
77 /* 78 /*
78 * Some machines require the "reboot=b" commandline option, this quirk makes that automatic. 79 * Some machines require the "reboot=b" commandline option, this quirk makes that automatic.
79 */ 80 */
80 static int __init set_bios_reboot(struct dmi_system_id *d) 81 static int __init set_bios_reboot(struct dmi_system_id *d)
81 { 82 {
82 if (!reboot_thru_bios) { 83 if (!reboot_thru_bios) {
83 reboot_thru_bios = 1; 84 reboot_thru_bios = 1;
84 printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident); 85 printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident);
85 } 86 }
86 return 0; 87 return 0;
87 } 88 }
88 89
89 static struct dmi_system_id __initdata reboot_dmi_table[] = { 90 static struct dmi_system_id __initdata reboot_dmi_table[] = {
90 { /* Handle problems with rebooting on Dell 1300's */ 91 { /* Handle problems with rebooting on Dell 1300's */
91 .callback = set_bios_reboot, 92 .callback = set_bios_reboot,
92 .ident = "Dell PowerEdge 1300", 93 .ident = "Dell PowerEdge 1300",
93 .matches = { 94 .matches = {
94 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), 95 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
95 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"), 96 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"),
96 }, 97 },
97 }, 98 },
98 { /* Handle problems with rebooting on Dell 300's */ 99 { /* Handle problems with rebooting on Dell 300's */
99 .callback = set_bios_reboot, 100 .callback = set_bios_reboot,
100 .ident = "Dell PowerEdge 300", 101 .ident = "Dell PowerEdge 300",
101 .matches = { 102 .matches = {
102 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), 103 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
103 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"), 104 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"),
104 }, 105 },
105 }, 106 },
106 { /* Handle problems with rebooting on Dell 2400's */ 107 { /* Handle problems with rebooting on Dell 2400's */
107 .callback = set_bios_reboot, 108 .callback = set_bios_reboot,
108 .ident = "Dell PowerEdge 2400", 109 .ident = "Dell PowerEdge 2400",
109 .matches = { 110 .matches = {
110 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), 111 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
111 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), 112 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"),
112 }, 113 },
113 }, 114 },
114 { } 115 { }
115 }; 116 };
116 117
117 static int __init reboot_init(void) 118 static int __init reboot_init(void)
118 { 119 {
119 dmi_check_system(reboot_dmi_table); 120 dmi_check_system(reboot_dmi_table);
120 return 0; 121 return 0;
121 } 122 }
122 123
123 core_initcall(reboot_init); 124 core_initcall(reboot_init);
124 125
125 /* The following code and data reboots the machine by switching to real 126 /* The following code and data reboots the machine by switching to real
126 mode and jumping to the BIOS reset entry point, as if the CPU has 127 mode and jumping to the BIOS reset entry point, as if the CPU has
127 really been reset. The previous version asked the keyboard 128 really been reset. The previous version asked the keyboard
128 controller to pulse the CPU reset line, which is more thorough, but 129 controller to pulse the CPU reset line, which is more thorough, but
129 doesn't work with at least one type of 486 motherboard. It is easy 130 doesn't work with at least one type of 486 motherboard. It is easy
130 to stop this code working; hence the copious comments. */ 131 to stop this code working; hence the copious comments. */
131 132
132 static unsigned long long 133 static unsigned long long
133 real_mode_gdt_entries [3] = 134 real_mode_gdt_entries [3] =
134 { 135 {
135 0x0000000000000000ULL, /* Null descriptor */ 136 0x0000000000000000ULL, /* Null descriptor */
136 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ 137 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */
137 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ 138 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */
138 }; 139 };
139 140
140 static struct 141 static struct
141 { 142 {
142 unsigned short size __attribute__ ((packed)); 143 unsigned short size __attribute__ ((packed));
143 unsigned long long * base __attribute__ ((packed)); 144 unsigned long long * base __attribute__ ((packed));
144 } 145 }
145 real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries }, 146 real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries },
146 real_mode_idt = { 0x3ff, NULL }, 147 real_mode_idt = { 0x3ff, NULL },
147 no_idt = { 0, NULL }; 148 no_idt = { 0, NULL };
148 149
149 150
150 /* This is 16-bit protected mode code to disable paging and the cache, 151 /* This is 16-bit protected mode code to disable paging and the cache,
151 switch to real mode and jump to the BIOS reset code. 152 switch to real mode and jump to the BIOS reset code.
152 153
153 The instruction that switches to real mode by writing to CR0 must be 154 The instruction that switches to real mode by writing to CR0 must be
154 followed immediately by a far jump instruction, which set CS to a 155 followed immediately by a far jump instruction, which set CS to a
155 valid value for real mode, and flushes the prefetch queue to avoid 156 valid value for real mode, and flushes the prefetch queue to avoid
156 running instructions that have already been decoded in protected 157 running instructions that have already been decoded in protected
157 mode. 158 mode.
158 159
159 Clears all the flags except ET, especially PG (paging), PE 160 Clears all the flags except ET, especially PG (paging), PE
160 (protected-mode enable) and TS (task switch for coprocessor state 161 (protected-mode enable) and TS (task switch for coprocessor state
161 save). Flushes the TLB after paging has been disabled. Sets CD and 162 save). Flushes the TLB after paging has been disabled. Sets CD and
162 NW, to disable the cache on a 486, and invalidates the cache. This 163 NW, to disable the cache on a 486, and invalidates the cache. This
163 is more like the state of a 486 after reset. I don't know if 164 is more like the state of a 486 after reset. I don't know if
164 something else should be done for other chips. 165 something else should be done for other chips.
165 166
166 More could be done here to set up the registers as if a CPU reset had 167 More could be done here to set up the registers as if a CPU reset had
167 occurred; hopefully real BIOSs don't assume much. */ 168 occurred; hopefully real BIOSs don't assume much. */
168 169
169 static unsigned char real_mode_switch [] = 170 static unsigned char real_mode_switch [] =
170 { 171 {
171 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ 172 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */
172 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ 173 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */
173 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60, /* orl $0x60000000,%eax */ 174 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60, /* orl $0x60000000,%eax */
174 0x66, 0x0f, 0x22, 0xc0, /* movl %eax,%cr0 */ 175 0x66, 0x0f, 0x22, 0xc0, /* movl %eax,%cr0 */
175 0x66, 0x0f, 0x22, 0xd8, /* movl %eax,%cr3 */ 176 0x66, 0x0f, 0x22, 0xd8, /* movl %eax,%cr3 */
176 0x66, 0x0f, 0x20, 0xc3, /* movl %cr0,%ebx */ 177 0x66, 0x0f, 0x20, 0xc3, /* movl %cr0,%ebx */
177 0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60, /* andl $0x60000000,%ebx */ 178 0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60, /* andl $0x60000000,%ebx */
178 0x74, 0x02, /* jz f */ 179 0x74, 0x02, /* jz f */
179 0x0f, 0x09, /* wbinvd */ 180 0x0f, 0x09, /* wbinvd */
180 0x24, 0x10, /* f: andb $0x10,al */ 181 0x24, 0x10, /* f: andb $0x10,al */
181 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ 182 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */
182 }; 183 };
183 static unsigned char jump_to_bios [] = 184 static unsigned char jump_to_bios [] =
184 { 185 {
185 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ 186 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */
186 }; 187 };
187 188
188 /* 189 /*
189 * Switch to real mode and then execute the code 190 * Switch to real mode and then execute the code
190 * specified by the code and length parameters. 191 * specified by the code and length parameters.
191 * We assume that length will aways be less that 100! 192 * We assume that length will aways be less that 100!
192 */ 193 */
193 void machine_real_restart(unsigned char *code, int length) 194 void machine_real_restart(unsigned char *code, int length)
194 { 195 {
195 unsigned long flags; 196 unsigned long flags;
196 197
197 local_irq_disable(); 198 local_irq_disable();
198 199
199 /* Write zero to CMOS register number 0x0f, which the BIOS POST 200 /* Write zero to CMOS register number 0x0f, which the BIOS POST
200 routine will recognize as telling it to do a proper reboot. (Well 201 routine will recognize as telling it to do a proper reboot. (Well
201 that's what this book in front of me says -- it may only apply to 202 that's what this book in front of me says -- it may only apply to
202 the Phoenix BIOS though, it's not clear). At the same time, 203 the Phoenix BIOS though, it's not clear). At the same time,
203 disable NMIs by setting the top bit in the CMOS address register, 204 disable NMIs by setting the top bit in the CMOS address register,
204 as we're about to do peculiar things to the CPU. I'm not sure if 205 as we're about to do peculiar things to the CPU. I'm not sure if
205 `outb_p' is needed instead of just `outb'. Use it to be on the 206 `outb_p' is needed instead of just `outb'. Use it to be on the
206 safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) 207 safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.)
207 */ 208 */
208 209
209 spin_lock_irqsave(&rtc_lock, flags); 210 spin_lock_irqsave(&rtc_lock, flags);
210 CMOS_WRITE(0x00, 0x8f); 211 CMOS_WRITE(0x00, 0x8f);
211 spin_unlock_irqrestore(&rtc_lock, flags); 212 spin_unlock_irqrestore(&rtc_lock, flags);
212 213
213 /* Remap the kernel at virtual address zero, as well as offset zero 214 /* Remap the kernel at virtual address zero, as well as offset zero
214 from the kernel segment. This assumes the kernel segment starts at 215 from the kernel segment. This assumes the kernel segment starts at
215 virtual address PAGE_OFFSET. */ 216 virtual address PAGE_OFFSET. */
216 217
217 memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 218 memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
218 sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); 219 sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
219 220
220 /* 221 /*
221 * Use `swapper_pg_dir' as our page directory. 222 * Use `swapper_pg_dir' as our page directory.
222 */ 223 */
223 load_cr3(swapper_pg_dir); 224 load_cr3(swapper_pg_dir);
224 225
225 /* Write 0x1234 to absolute memory location 0x472. The BIOS reads 226 /* Write 0x1234 to absolute memory location 0x472. The BIOS reads
226 this on booting to tell it to "Bypass memory test (also warm 227 this on booting to tell it to "Bypass memory test (also warm
227 boot)". This seems like a fairly standard thing that gets set by 228 boot)". This seems like a fairly standard thing that gets set by
228 REBOOT.COM programs, and the previous reset routine did this 229 REBOOT.COM programs, and the previous reset routine did this
229 too. */ 230 too. */
230 231
231 *((unsigned short *)0x472) = reboot_mode; 232 *((unsigned short *)0x472) = reboot_mode;
232 233
233 /* For the switch to real mode, copy some code to low memory. It has 234 /* For the switch to real mode, copy some code to low memory. It has
234 to be in the first 64k because it is running in 16-bit mode, and it 235 to be in the first 64k because it is running in 16-bit mode, and it
235 has to have the same physical and virtual address, because it turns 236 has to have the same physical and virtual address, because it turns
236 off paging. Copy it near the end of the first page, out of the way 237 off paging. Copy it near the end of the first page, out of the way
237 of BIOS variables. */ 238 of BIOS variables. */
238 239
239 memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100), 240 memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100),
240 real_mode_switch, sizeof (real_mode_switch)); 241 real_mode_switch, sizeof (real_mode_switch));
241 memcpy ((void *) (0x1000 - 100), code, length); 242 memcpy ((void *) (0x1000 - 100), code, length);
242 243
243 /* Set up the IDT for real mode. */ 244 /* Set up the IDT for real mode. */
244 245
245 __asm__ __volatile__ ("lidt %0" : : "m" (real_mode_idt)); 246 load_idt(&real_mode_idt);
246 247
247 /* Set up a GDT from which we can load segment descriptors for real 248 /* Set up a GDT from which we can load segment descriptors for real
248 mode. The GDT is not used in real mode; it is just needed here to 249 mode. The GDT is not used in real mode; it is just needed here to
249 prepare the descriptors. */ 250 prepare the descriptors. */
250 251
251 __asm__ __volatile__ ("lgdt %0" : : "m" (real_mode_gdt)); 252 load_gdt(&real_mode_gdt);
252 253
253 /* Load the data segment registers, and thus the descriptors ready for 254 /* Load the data segment registers, and thus the descriptors ready for
254 real mode. The base address of each segment is 0x100, 16 times the 255 real mode. The base address of each segment is 0x100, 16 times the
255 selector value being loaded here. This is so that the segment 256 selector value being loaded here. This is so that the segment
256 registers don't have to be reloaded after switching to real mode: 257 registers don't have to be reloaded after switching to real mode:
257 the values are consistent for real mode operation already. */ 258 the values are consistent for real mode operation already. */
258 259
259 __asm__ __volatile__ ("movl $0x0010,%%eax\n" 260 __asm__ __volatile__ ("movl $0x0010,%%eax\n"
260 "\tmovl %%eax,%%ds\n" 261 "\tmovl %%eax,%%ds\n"
261 "\tmovl %%eax,%%es\n" 262 "\tmovl %%eax,%%es\n"
262 "\tmovl %%eax,%%fs\n" 263 "\tmovl %%eax,%%fs\n"
263 "\tmovl %%eax,%%gs\n" 264 "\tmovl %%eax,%%gs\n"
264 "\tmovl %%eax,%%ss" : : : "eax"); 265 "\tmovl %%eax,%%ss" : : : "eax");
265 266
266 /* Jump to the 16-bit code that we copied earlier. It disables paging 267 /* Jump to the 16-bit code that we copied earlier. It disables paging
267 and the cache, switches to real mode, and jumps to the BIOS reset 268 and the cache, switches to real mode, and jumps to the BIOS reset
268 entry point. */ 269 entry point. */
269 270
270 __asm__ __volatile__ ("ljmp $0x0008,%0" 271 __asm__ __volatile__ ("ljmp $0x0008,%0"
271 : 272 :
272 : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100))); 273 : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100)));
273 } 274 }
274 #ifdef CONFIG_APM_MODULE 275 #ifdef CONFIG_APM_MODULE
275 EXPORT_SYMBOL(machine_real_restart); 276 EXPORT_SYMBOL(machine_real_restart);
276 #endif 277 #endif
277 278
278 void machine_shutdown(void) 279 void machine_shutdown(void)
279 { 280 {
280 #ifdef CONFIG_SMP 281 #ifdef CONFIG_SMP
281 int reboot_cpu_id; 282 int reboot_cpu_id;
282 283
283 /* The boot cpu is always logical cpu 0 */ 284 /* The boot cpu is always logical cpu 0 */
284 reboot_cpu_id = 0; 285 reboot_cpu_id = 0;
285 286
286 /* See if there has been given a command line override */ 287 /* See if there has been given a command line override */
287 if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && 288 if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
288 cpu_isset(reboot_cpu, cpu_online_map)) { 289 cpu_isset(reboot_cpu, cpu_online_map)) {
289 reboot_cpu_id = reboot_cpu; 290 reboot_cpu_id = reboot_cpu;
290 } 291 }
291 292
292 /* Make certain the cpu I'm rebooting on is online */ 293 /* Make certain the cpu I'm rebooting on is online */
293 if (!cpu_isset(reboot_cpu_id, cpu_online_map)) { 294 if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
294 reboot_cpu_id = smp_processor_id(); 295 reboot_cpu_id = smp_processor_id();
295 } 296 }
296 297
297 /* Make certain I only run on the appropriate processor */ 298 /* Make certain I only run on the appropriate processor */
298 set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id)); 299 set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
299 300
300 /* O.K. Now that I'm on the appropriate processor, stop 301 /* O.K. Now that I'm on the appropriate processor, stop
301 * all of the others, and disable their local APICs. 302 * all of the others, and disable their local APICs.
302 */ 303 */
303 304
304 smp_send_stop(); 305 smp_send_stop();
305 #endif /* CONFIG_SMP */ 306 #endif /* CONFIG_SMP */
306 307
307 lapic_shutdown(); 308 lapic_shutdown();
308 309
309 #ifdef CONFIG_X86_IO_APIC 310 #ifdef CONFIG_X86_IO_APIC
310 disable_IO_APIC(); 311 disable_IO_APIC();
311 #endif 312 #endif
312 } 313 }
313 314
314 void machine_emergency_restart(void) 315 void machine_emergency_restart(void)
315 { 316 {
316 if (!reboot_thru_bios) { 317 if (!reboot_thru_bios) {
317 if (efi_enabled) { 318 if (efi_enabled) {
318 efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); 319 efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL);
319 __asm__ __volatile__("lidt %0": :"m" (no_idt)); 320 load_idt(&no_idt);
320 __asm__ __volatile__("int3"); 321 __asm__ __volatile__("int3");
321 } 322 }
322 /* rebooting needs to touch the page at absolute addr 0 */ 323 /* rebooting needs to touch the page at absolute addr 0 */
323 *((unsigned short *)__va(0x472)) = reboot_mode; 324 *((unsigned short *)__va(0x472)) = reboot_mode;
324 for (;;) { 325 for (;;) {
325 mach_reboot_fixups(); /* for board specific fixups */ 326 mach_reboot_fixups(); /* for board specific fixups */
326 mach_reboot(); 327 mach_reboot();
327 /* That didn't work - force a triple fault.. */ 328 /* That didn't work - force a triple fault.. */
328 __asm__ __volatile__("lidt %0": :"m" (no_idt)); 329 load_idt(&no_idt);
329 __asm__ __volatile__("int3"); 330 __asm__ __volatile__("int3");
330 } 331 }
331 } 332 }
332 if (efi_enabled) 333 if (efi_enabled)
333 efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, NULL); 334 efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, NULL);
334 335
335 machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); 336 machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
336 } 337 }
337 338
338 void machine_restart(char * __unused) 339 void machine_restart(char * __unused)
339 { 340 {
340 machine_shutdown(); 341 machine_shutdown();
341 machine_emergency_restart(); 342 machine_emergency_restart();
342 } 343 }
343 344
344 void machine_halt(void) 345 void machine_halt(void)
345 { 346 {
346 } 347 }
347 348
348 void machine_power_off(void) 349 void machine_power_off(void)
349 { 350 {
350 machine_shutdown(); 351 machine_shutdown();
351 352
352 if (pm_power_off) 353 if (pm_power_off)
353 pm_power_off(); 354 pm_power_off();
354 } 355 }
355 356
356 357
357 358
arch/i386/kernel/signal.c
1 /* 1 /*
2 * linux/arch/i386/kernel/signal.c 2 * linux/arch/i386/kernel/signal.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * 5 *
6 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson 6 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
7 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes 7 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
8 */ 8 */
9 9
10 #include <linux/sched.h> 10 #include <linux/sched.h>
11 #include <linux/mm.h> 11 #include <linux/mm.h>
12 #include <linux/smp.h> 12 #include <linux/smp.h>
13 #include <linux/smp_lock.h> 13 #include <linux/smp_lock.h>
14 #include <linux/kernel.h> 14 #include <linux/kernel.h>
15 #include <linux/signal.h> 15 #include <linux/signal.h>
16 #include <linux/errno.h> 16 #include <linux/errno.h>
17 #include <linux/wait.h> 17 #include <linux/wait.h>
18 #include <linux/unistd.h> 18 #include <linux/unistd.h>
19 #include <linux/stddef.h> 19 #include <linux/stddef.h>
20 #include <linux/personality.h> 20 #include <linux/personality.h>
21 #include <linux/suspend.h> 21 #include <linux/suspend.h>
22 #include <linux/ptrace.h> 22 #include <linux/ptrace.h>
23 #include <linux/elf.h> 23 #include <linux/elf.h>
24 #include <asm/processor.h> 24 #include <asm/processor.h>
25 #include <asm/ucontext.h> 25 #include <asm/ucontext.h>
26 #include <asm/uaccess.h> 26 #include <asm/uaccess.h>
27 #include <asm/i387.h> 27 #include <asm/i387.h>
28 #include "sigframe.h" 28 #include "sigframe.h"
29 29
30 #define DEBUG_SIG 0 30 #define DEBUG_SIG 0
31 31
32 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) 32 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
33 33
34 /* 34 /*
35 * Atomically swap in the new signal mask, and wait for a signal. 35 * Atomically swap in the new signal mask, and wait for a signal.
36 */ 36 */
37 asmlinkage int 37 asmlinkage int
38 sys_sigsuspend(int history0, int history1, old_sigset_t mask) 38 sys_sigsuspend(int history0, int history1, old_sigset_t mask)
39 { 39 {
40 struct pt_regs * regs = (struct pt_regs *) &history0; 40 struct pt_regs * regs = (struct pt_regs *) &history0;
41 sigset_t saveset; 41 sigset_t saveset;
42 42
43 mask &= _BLOCKABLE; 43 mask &= _BLOCKABLE;
44 spin_lock_irq(&current->sighand->siglock); 44 spin_lock_irq(&current->sighand->siglock);
45 saveset = current->blocked; 45 saveset = current->blocked;
46 siginitset(&current->blocked, mask); 46 siginitset(&current->blocked, mask);
47 recalc_sigpending(); 47 recalc_sigpending();
48 spin_unlock_irq(&current->sighand->siglock); 48 spin_unlock_irq(&current->sighand->siglock);
49 49
50 regs->eax = -EINTR; 50 regs->eax = -EINTR;
51 while (1) { 51 while (1) {
52 current->state = TASK_INTERRUPTIBLE; 52 current->state = TASK_INTERRUPTIBLE;
53 schedule(); 53 schedule();
54 if (do_signal(regs, &saveset)) 54 if (do_signal(regs, &saveset))
55 return -EINTR; 55 return -EINTR;
56 } 56 }
57 } 57 }
58 58
59 asmlinkage int 59 asmlinkage int
60 sys_rt_sigsuspend(struct pt_regs regs) 60 sys_rt_sigsuspend(struct pt_regs regs)
61 { 61 {
62 sigset_t saveset, newset; 62 sigset_t saveset, newset;
63 63
64 /* XXX: Don't preclude handling different sized sigset_t's. */ 64 /* XXX: Don't preclude handling different sized sigset_t's. */
65 if (regs.ecx != sizeof(sigset_t)) 65 if (regs.ecx != sizeof(sigset_t))
66 return -EINVAL; 66 return -EINVAL;
67 67
68 if (copy_from_user(&newset, (sigset_t __user *)regs.ebx, sizeof(newset))) 68 if (copy_from_user(&newset, (sigset_t __user *)regs.ebx, sizeof(newset)))
69 return -EFAULT; 69 return -EFAULT;
70 sigdelsetmask(&newset, ~_BLOCKABLE); 70 sigdelsetmask(&newset, ~_BLOCKABLE);
71 71
72 spin_lock_irq(&current->sighand->siglock); 72 spin_lock_irq(&current->sighand->siglock);
73 saveset = current->blocked; 73 saveset = current->blocked;
74 current->blocked = newset; 74 current->blocked = newset;
75 recalc_sigpending(); 75 recalc_sigpending();
76 spin_unlock_irq(&current->sighand->siglock); 76 spin_unlock_irq(&current->sighand->siglock);
77 77
78 regs.eax = -EINTR; 78 regs.eax = -EINTR;
79 while (1) { 79 while (1) {
80 current->state = TASK_INTERRUPTIBLE; 80 current->state = TASK_INTERRUPTIBLE;
81 schedule(); 81 schedule();
82 if (do_signal(&regs, &saveset)) 82 if (do_signal(&regs, &saveset))
83 return -EINTR; 83 return -EINTR;
84 } 84 }
85 } 85 }
86 86
87 asmlinkage int 87 asmlinkage int
88 sys_sigaction(int sig, const struct old_sigaction __user *act, 88 sys_sigaction(int sig, const struct old_sigaction __user *act,
89 struct old_sigaction __user *oact) 89 struct old_sigaction __user *oact)
90 { 90 {
91 struct k_sigaction new_ka, old_ka; 91 struct k_sigaction new_ka, old_ka;
92 int ret; 92 int ret;
93 93
94 if (act) { 94 if (act) {
95 old_sigset_t mask; 95 old_sigset_t mask;
96 if (!access_ok(VERIFY_READ, act, sizeof(*act)) || 96 if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
97 __get_user(new_ka.sa.sa_handler, &act->sa_handler) || 97 __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
98 __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) 98 __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
99 return -EFAULT; 99 return -EFAULT;
100 __get_user(new_ka.sa.sa_flags, &act->sa_flags); 100 __get_user(new_ka.sa.sa_flags, &act->sa_flags);
101 __get_user(mask, &act->sa_mask); 101 __get_user(mask, &act->sa_mask);
102 siginitset(&new_ka.sa.sa_mask, mask); 102 siginitset(&new_ka.sa.sa_mask, mask);
103 } 103 }
104 104
105 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); 105 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
106 106
107 if (!ret && oact) { 107 if (!ret && oact) {
108 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || 108 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
109 __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || 109 __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
110 __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) 110 __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
111 return -EFAULT; 111 return -EFAULT;
112 __put_user(old_ka.sa.sa_flags, &oact->sa_flags); 112 __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
113 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); 113 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
114 } 114 }
115 115
116 return ret; 116 return ret;
117 } 117 }
118 118
119 asmlinkage int 119 asmlinkage int
120 sys_sigaltstack(unsigned long ebx) 120 sys_sigaltstack(unsigned long ebx)
121 { 121 {
122 /* This is needed to make gcc realize it doesn't own the "struct pt_regs" */ 122 /* This is needed to make gcc realize it doesn't own the "struct pt_regs" */
123 struct pt_regs *regs = (struct pt_regs *)&ebx; 123 struct pt_regs *regs = (struct pt_regs *)&ebx;
124 const stack_t __user *uss = (const stack_t __user *)ebx; 124 const stack_t __user *uss = (const stack_t __user *)ebx;
125 stack_t __user *uoss = (stack_t __user *)regs->ecx; 125 stack_t __user *uoss = (stack_t __user *)regs->ecx;
126 126
127 return do_sigaltstack(uss, uoss, regs->esp); 127 return do_sigaltstack(uss, uoss, regs->esp);
128 } 128 }
129 129
130 130
131 /* 131 /*
132 * Do a signal return; undo the signal stack. 132 * Do a signal return; undo the signal stack.
133 */ 133 */
134 134
135 static int 135 static int
136 restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax) 136 restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax)
137 { 137 {
138 unsigned int err = 0; 138 unsigned int err = 0;
139 139
140 /* Always make any pending restarted system calls return -EINTR */ 140 /* Always make any pending restarted system calls return -EINTR */
141 current_thread_info()->restart_block.fn = do_no_restart_syscall; 141 current_thread_info()->restart_block.fn = do_no_restart_syscall;
142 142
143 #define COPY(x) err |= __get_user(regs->x, &sc->x) 143 #define COPY(x) err |= __get_user(regs->x, &sc->x)
144 144
145 #define COPY_SEG(seg) \ 145 #define COPY_SEG(seg) \
146 { unsigned short tmp; \ 146 { unsigned short tmp; \
147 err |= __get_user(tmp, &sc->seg); \ 147 err |= __get_user(tmp, &sc->seg); \
148 regs->x##seg = tmp; } 148 regs->x##seg = tmp; }
149 149
150 #define COPY_SEG_STRICT(seg) \ 150 #define COPY_SEG_STRICT(seg) \
151 { unsigned short tmp; \ 151 { unsigned short tmp; \
152 err |= __get_user(tmp, &sc->seg); \ 152 err |= __get_user(tmp, &sc->seg); \
153 regs->x##seg = tmp|3; } 153 regs->x##seg = tmp|3; }
154 154
155 #define GET_SEG(seg) \ 155 #define GET_SEG(seg) \
156 { unsigned short tmp; \ 156 { unsigned short tmp; \
157 err |= __get_user(tmp, &sc->seg); \ 157 err |= __get_user(tmp, &sc->seg); \
158 loadsegment(seg,tmp); } 158 loadsegment(seg,tmp); }
159 159
160 #define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \ 160 #define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \
161 X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ 161 X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
162 X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) 162 X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
163 163
164 GET_SEG(gs); 164 GET_SEG(gs);
165 GET_SEG(fs); 165 GET_SEG(fs);
166 COPY_SEG(es); 166 COPY_SEG(es);
167 COPY_SEG(ds); 167 COPY_SEG(ds);
168 COPY(edi); 168 COPY(edi);
169 COPY(esi); 169 COPY(esi);
170 COPY(ebp); 170 COPY(ebp);
171 COPY(esp); 171 COPY(esp);
172 COPY(ebx); 172 COPY(ebx);
173 COPY(edx); 173 COPY(edx);
174 COPY(ecx); 174 COPY(ecx);
175 COPY(eip); 175 COPY(eip);
176 COPY_SEG_STRICT(cs); 176 COPY_SEG_STRICT(cs);
177 COPY_SEG_STRICT(ss); 177 COPY_SEG_STRICT(ss);
178 178
179 { 179 {
180 unsigned int tmpflags; 180 unsigned int tmpflags;
181 err |= __get_user(tmpflags, &sc->eflags); 181 err |= __get_user(tmpflags, &sc->eflags);
182 regs->eflags = (regs->eflags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); 182 regs->eflags = (regs->eflags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
183 regs->orig_eax = -1; /* disable syscall checks */ 183 regs->orig_eax = -1; /* disable syscall checks */
184 } 184 }
185 185
186 { 186 {
187 struct _fpstate __user * buf; 187 struct _fpstate __user * buf;
188 err |= __get_user(buf, &sc->fpstate); 188 err |= __get_user(buf, &sc->fpstate);
189 if (buf) { 189 if (buf) {
190 if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) 190 if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
191 goto badframe; 191 goto badframe;
192 err |= restore_i387(buf); 192 err |= restore_i387(buf);
193 } else { 193 } else {
194 struct task_struct *me = current; 194 struct task_struct *me = current;
195 if (used_math()) { 195 if (used_math()) {
196 clear_fpu(me); 196 clear_fpu(me);
197 clear_used_math(); 197 clear_used_math();
198 } 198 }
199 } 199 }
200 } 200 }
201 201
202 err |= __get_user(*peax, &sc->eax); 202 err |= __get_user(*peax, &sc->eax);
203 return err; 203 return err;
204 204
205 badframe: 205 badframe:
206 return 1; 206 return 1;
207 } 207 }
208 208
209 asmlinkage int sys_sigreturn(unsigned long __unused) 209 asmlinkage int sys_sigreturn(unsigned long __unused)
210 { 210 {
211 struct pt_regs *regs = (struct pt_regs *) &__unused; 211 struct pt_regs *regs = (struct pt_regs *) &__unused;
212 struct sigframe __user *frame = (struct sigframe __user *)(regs->esp - 8); 212 struct sigframe __user *frame = (struct sigframe __user *)(regs->esp - 8);
213 sigset_t set; 213 sigset_t set;
214 int eax; 214 int eax;
215 215
216 if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) 216 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
217 goto badframe; 217 goto badframe;
218 if (__get_user(set.sig[0], &frame->sc.oldmask) 218 if (__get_user(set.sig[0], &frame->sc.oldmask)
219 || (_NSIG_WORDS > 1 219 || (_NSIG_WORDS > 1
220 && __copy_from_user(&set.sig[1], &frame->extramask, 220 && __copy_from_user(&set.sig[1], &frame->extramask,
221 sizeof(frame->extramask)))) 221 sizeof(frame->extramask))))
222 goto badframe; 222 goto badframe;
223 223
224 sigdelsetmask(&set, ~_BLOCKABLE); 224 sigdelsetmask(&set, ~_BLOCKABLE);
225 spin_lock_irq(&current->sighand->siglock); 225 spin_lock_irq(&current->sighand->siglock);
226 current->blocked = set; 226 current->blocked = set;
227 recalc_sigpending(); 227 recalc_sigpending();
228 spin_unlock_irq(&current->sighand->siglock); 228 spin_unlock_irq(&current->sighand->siglock);
229 229
230 if (restore_sigcontext(regs, &frame->sc, &eax)) 230 if (restore_sigcontext(regs, &frame->sc, &eax))
231 goto badframe; 231 goto badframe;
232 return eax; 232 return eax;
233 233
234 badframe: 234 badframe:
235 force_sig(SIGSEGV, current); 235 force_sig(SIGSEGV, current);
236 return 0; 236 return 0;
237 } 237 }
238 238
239 asmlinkage int sys_rt_sigreturn(unsigned long __unused) 239 asmlinkage int sys_rt_sigreturn(unsigned long __unused)
240 { 240 {
241 struct pt_regs *regs = (struct pt_regs *) &__unused; 241 struct pt_regs *regs = (struct pt_regs *) &__unused;
242 struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->esp - 4); 242 struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->esp - 4);
243 sigset_t set; 243 sigset_t set;
244 int eax; 244 int eax;
245 245
246 if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) 246 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
247 goto badframe; 247 goto badframe;
248 if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) 248 if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
249 goto badframe; 249 goto badframe;
250 250
251 sigdelsetmask(&set, ~_BLOCKABLE); 251 sigdelsetmask(&set, ~_BLOCKABLE);
252 spin_lock_irq(&current->sighand->siglock); 252 spin_lock_irq(&current->sighand->siglock);
253 current->blocked = set; 253 current->blocked = set;
254 recalc_sigpending(); 254 recalc_sigpending();
255 spin_unlock_irq(&current->sighand->siglock); 255 spin_unlock_irq(&current->sighand->siglock);
256 256
257 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) 257 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
258 goto badframe; 258 goto badframe;
259 259
260 if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->esp) == -EFAULT) 260 if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->esp) == -EFAULT)
261 goto badframe; 261 goto badframe;
262 262
263 return eax; 263 return eax;
264 264
265 badframe: 265 badframe:
266 force_sig(SIGSEGV, current); 266 force_sig(SIGSEGV, current);
267 return 0; 267 return 0;
268 } 268 }
269 269
270 /* 270 /*
271 * Set up a signal frame. 271 * Set up a signal frame.
272 */ 272 */
273 273
274 static int 274 static int
275 setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, 275 setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
276 struct pt_regs *regs, unsigned long mask) 276 struct pt_regs *regs, unsigned long mask)
277 { 277 {
278 int tmp, err = 0; 278 int tmp, err = 0;
279 279
280 tmp = 0; 280 tmp = 0;
281 __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); 281 savesegment(gs, tmp);
282 err |= __put_user(tmp, (unsigned int __user *)&sc->gs); 282 err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
283 __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); 283 savesegment(fs, tmp);
284 err |= __put_user(tmp, (unsigned int __user *)&sc->fs); 284 err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
285 285
286 err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); 286 err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
287 err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds); 287 err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds);
288 err |= __put_user(regs->edi, &sc->edi); 288 err |= __put_user(regs->edi, &sc->edi);
289 err |= __put_user(regs->esi, &sc->esi); 289 err |= __put_user(regs->esi, &sc->esi);
290 err |= __put_user(regs->ebp, &sc->ebp); 290 err |= __put_user(regs->ebp, &sc->ebp);
291 err |= __put_user(regs->esp, &sc->esp); 291 err |= __put_user(regs->esp, &sc->esp);
292 err |= __put_user(regs->ebx, &sc->ebx); 292 err |= __put_user(regs->ebx, &sc->ebx);
293 err |= __put_user(regs->edx, &sc->edx); 293 err |= __put_user(regs->edx, &sc->edx);
294 err |= __put_user(regs->ecx, &sc->ecx); 294 err |= __put_user(regs->ecx, &sc->ecx);
295 err |= __put_user(regs->eax, &sc->eax); 295 err |= __put_user(regs->eax, &sc->eax);
296 err |= __put_user(current->thread.trap_no, &sc->trapno); 296 err |= __put_user(current->thread.trap_no, &sc->trapno);
297 err |= __put_user(current->thread.error_code, &sc->err); 297 err |= __put_user(current->thread.error_code, &sc->err);
298 err |= __put_user(regs->eip, &sc->eip); 298 err |= __put_user(regs->eip, &sc->eip);
299 err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs); 299 err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs);
300 err |= __put_user(regs->eflags, &sc->eflags); 300 err |= __put_user(regs->eflags, &sc->eflags);
301 err |= __put_user(regs->esp, &sc->esp_at_signal); 301 err |= __put_user(regs->esp, &sc->esp_at_signal);
302 err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss); 302 err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss);
303 303
304 tmp = save_i387(fpstate); 304 tmp = save_i387(fpstate);
305 if (tmp < 0) 305 if (tmp < 0)
306 err = 1; 306 err = 1;
307 else 307 else
308 err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); 308 err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
309 309
310 /* non-iBCS2 extensions.. */ 310 /* non-iBCS2 extensions.. */
311 err |= __put_user(mask, &sc->oldmask); 311 err |= __put_user(mask, &sc->oldmask);
312 err |= __put_user(current->thread.cr2, &sc->cr2); 312 err |= __put_user(current->thread.cr2, &sc->cr2);
313 313
314 return err; 314 return err;
315 } 315 }
316 316
317 /* 317 /*
318 * Determine which stack to use.. 318 * Determine which stack to use..
319 */ 319 */
320 static inline void __user * 320 static inline void __user *
321 get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) 321 get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
322 { 322 {
323 unsigned long esp; 323 unsigned long esp;
324 324
325 /* Default to using normal stack */ 325 /* Default to using normal stack */
326 esp = regs->esp; 326 esp = regs->esp;
327 327
328 /* This is the X/Open sanctioned signal stack switching. */ 328 /* This is the X/Open sanctioned signal stack switching. */
329 if (ka->sa.sa_flags & SA_ONSTACK) { 329 if (ka->sa.sa_flags & SA_ONSTACK) {
330 if (sas_ss_flags(esp) == 0) 330 if (sas_ss_flags(esp) == 0)
331 esp = current->sas_ss_sp + current->sas_ss_size; 331 esp = current->sas_ss_sp + current->sas_ss_size;
332 } 332 }
333 333
334 /* This is the legacy signal stack switching. */ 334 /* This is the legacy signal stack switching. */
335 else if ((regs->xss & 0xffff) != __USER_DS && 335 else if ((regs->xss & 0xffff) != __USER_DS &&
336 !(ka->sa.sa_flags & SA_RESTORER) && 336 !(ka->sa.sa_flags & SA_RESTORER) &&
337 ka->sa.sa_restorer) { 337 ka->sa.sa_restorer) {
338 esp = (unsigned long) ka->sa.sa_restorer; 338 esp = (unsigned long) ka->sa.sa_restorer;
339 } 339 }
340 340
341 return (void __user *)((esp - frame_size) & -8ul); 341 return (void __user *)((esp - frame_size) & -8ul);
342 } 342 }
343 343
344 /* These symbols are defined with the addresses in the vsyscall page. 344 /* These symbols are defined with the addresses in the vsyscall page.
345 See vsyscall-sigreturn.S. */ 345 See vsyscall-sigreturn.S. */
346 extern void __user __kernel_sigreturn; 346 extern void __user __kernel_sigreturn;
347 extern void __user __kernel_rt_sigreturn; 347 extern void __user __kernel_rt_sigreturn;
348 348
349 static int setup_frame(int sig, struct k_sigaction *ka, 349 static int setup_frame(int sig, struct k_sigaction *ka,
350 sigset_t *set, struct pt_regs * regs) 350 sigset_t *set, struct pt_regs * regs)
351 { 351 {
352 void __user *restorer; 352 void __user *restorer;
353 struct sigframe __user *frame; 353 struct sigframe __user *frame;
354 int err = 0; 354 int err = 0;
355 int usig; 355 int usig;
356 356
357 frame = get_sigframe(ka, regs, sizeof(*frame)); 357 frame = get_sigframe(ka, regs, sizeof(*frame));
358 358
359 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 359 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
360 goto give_sigsegv; 360 goto give_sigsegv;
361 361
362 usig = current_thread_info()->exec_domain 362 usig = current_thread_info()->exec_domain
363 && current_thread_info()->exec_domain->signal_invmap 363 && current_thread_info()->exec_domain->signal_invmap
364 && sig < 32 364 && sig < 32
365 ? current_thread_info()->exec_domain->signal_invmap[sig] 365 ? current_thread_info()->exec_domain->signal_invmap[sig]
366 : sig; 366 : sig;
367 367
368 err = __put_user(usig, &frame->sig); 368 err = __put_user(usig, &frame->sig);
369 if (err) 369 if (err)
370 goto give_sigsegv; 370 goto give_sigsegv;
371 371
372 err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); 372 err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
373 if (err) 373 if (err)
374 goto give_sigsegv; 374 goto give_sigsegv;
375 375
376 if (_NSIG_WORDS > 1) { 376 if (_NSIG_WORDS > 1) {
377 err = __copy_to_user(&frame->extramask, &set->sig[1], 377 err = __copy_to_user(&frame->extramask, &set->sig[1],
378 sizeof(frame->extramask)); 378 sizeof(frame->extramask));
379 if (err) 379 if (err)
380 goto give_sigsegv; 380 goto give_sigsegv;
381 } 381 }
382 382
383 restorer = &__kernel_sigreturn; 383 restorer = &__kernel_sigreturn;
384 if (ka->sa.sa_flags & SA_RESTORER) 384 if (ka->sa.sa_flags & SA_RESTORER)
385 restorer = ka->sa.sa_restorer; 385 restorer = ka->sa.sa_restorer;
386 386
387 /* Set up to return from userspace. */ 387 /* Set up to return from userspace. */
388 err |= __put_user(restorer, &frame->pretcode); 388 err |= __put_user(restorer, &frame->pretcode);
389 389
390 /* 390 /*
391 * This is popl %eax ; movl $,%eax ; int $0x80 391 * This is popl %eax ; movl $,%eax ; int $0x80
392 * 392 *
393 * WE DO NOT USE IT ANY MORE! It's only left here for historical 393 * WE DO NOT USE IT ANY MORE! It's only left here for historical
394 * reasons and because gdb uses it as a signature to notice 394 * reasons and because gdb uses it as a signature to notice
395 * signal handler stack frames. 395 * signal handler stack frames.
396 */ 396 */
397 err |= __put_user(0xb858, (short __user *)(frame->retcode+0)); 397 err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
398 err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2)); 398 err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2));
399 err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); 399 err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
400 400
401 if (err) 401 if (err)
402 goto give_sigsegv; 402 goto give_sigsegv;
403 403
404 /* Set up registers for signal handler */ 404 /* Set up registers for signal handler */
405 regs->esp = (unsigned long) frame; 405 regs->esp = (unsigned long) frame;
406 regs->eip = (unsigned long) ka->sa.sa_handler; 406 regs->eip = (unsigned long) ka->sa.sa_handler;
407 regs->eax = (unsigned long) sig; 407 regs->eax = (unsigned long) sig;
408 regs->edx = (unsigned long) 0; 408 regs->edx = (unsigned long) 0;
409 regs->ecx = (unsigned long) 0; 409 regs->ecx = (unsigned long) 0;
410 410
411 set_fs(USER_DS); 411 set_fs(USER_DS);
412 regs->xds = __USER_DS; 412 regs->xds = __USER_DS;
413 regs->xes = __USER_DS; 413 regs->xes = __USER_DS;
414 regs->xss = __USER_DS; 414 regs->xss = __USER_DS;
415 regs->xcs = __USER_CS; 415 regs->xcs = __USER_CS;
416 416
417 /* 417 /*
418 * Clear TF when entering the signal handler, but 418 * Clear TF when entering the signal handler, but
419 * notify any tracer that was single-stepping it. 419 * notify any tracer that was single-stepping it.
420 * The tracer may want to single-step inside the 420 * The tracer may want to single-step inside the
421 * handler too. 421 * handler too.
422 */ 422 */
423 regs->eflags &= ~TF_MASK; 423 regs->eflags &= ~TF_MASK;
424 if (test_thread_flag(TIF_SINGLESTEP)) 424 if (test_thread_flag(TIF_SINGLESTEP))
425 ptrace_notify(SIGTRAP); 425 ptrace_notify(SIGTRAP);
426 426
427 #if DEBUG_SIG 427 #if DEBUG_SIG
428 printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", 428 printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
429 current->comm, current->pid, frame, regs->eip, frame->pretcode); 429 current->comm, current->pid, frame, regs->eip, frame->pretcode);
430 #endif 430 #endif
431 431
432 return 1; 432 return 1;
433 433
434 give_sigsegv: 434 give_sigsegv:
435 force_sigsegv(sig, current); 435 force_sigsegv(sig, current);
436 return 0; 436 return 0;
437 } 437 }
438 438
439 static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 439 static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
440 sigset_t *set, struct pt_regs * regs) 440 sigset_t *set, struct pt_regs * regs)
441 { 441 {
442 void __user *restorer; 442 void __user *restorer;
443 struct rt_sigframe __user *frame; 443 struct rt_sigframe __user *frame;
444 int err = 0; 444 int err = 0;
445 int usig; 445 int usig;
446 446
447 frame = get_sigframe(ka, regs, sizeof(*frame)); 447 frame = get_sigframe(ka, regs, sizeof(*frame));
448 448
449 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 449 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
450 goto give_sigsegv; 450 goto give_sigsegv;
451 451
452 usig = current_thread_info()->exec_domain 452 usig = current_thread_info()->exec_domain
453 && current_thread_info()->exec_domain->signal_invmap 453 && current_thread_info()->exec_domain->signal_invmap
454 && sig < 32 454 && sig < 32
455 ? current_thread_info()->exec_domain->signal_invmap[sig] 455 ? current_thread_info()->exec_domain->signal_invmap[sig]
456 : sig; 456 : sig;
457 457
458 err |= __put_user(usig, &frame->sig); 458 err |= __put_user(usig, &frame->sig);
459 err |= __put_user(&frame->info, &frame->pinfo); 459 err |= __put_user(&frame->info, &frame->pinfo);
460 err |= __put_user(&frame->uc, &frame->puc); 460 err |= __put_user(&frame->uc, &frame->puc);
461 err |= copy_siginfo_to_user(&frame->info, info); 461 err |= copy_siginfo_to_user(&frame->info, info);
462 if (err) 462 if (err)
463 goto give_sigsegv; 463 goto give_sigsegv;
464 464
465 /* Create the ucontext. */ 465 /* Create the ucontext. */
466 err |= __put_user(0, &frame->uc.uc_flags); 466 err |= __put_user(0, &frame->uc.uc_flags);
467 err |= __put_user(0, &frame->uc.uc_link); 467 err |= __put_user(0, &frame->uc.uc_link);
468 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); 468 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
469 err |= __put_user(sas_ss_flags(regs->esp), 469 err |= __put_user(sas_ss_flags(regs->esp),
470 &frame->uc.uc_stack.ss_flags); 470 &frame->uc.uc_stack.ss_flags);
471 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); 471 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
472 err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, 472 err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
473 regs, set->sig[0]); 473 regs, set->sig[0]);
474 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 474 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
475 if (err) 475 if (err)
476 goto give_sigsegv; 476 goto give_sigsegv;
477 477
478 /* Set up to return from userspace. */ 478 /* Set up to return from userspace. */
479 restorer = &__kernel_rt_sigreturn; 479 restorer = &__kernel_rt_sigreturn;
480 if (ka->sa.sa_flags & SA_RESTORER) 480 if (ka->sa.sa_flags & SA_RESTORER)
481 restorer = ka->sa.sa_restorer; 481 restorer = ka->sa.sa_restorer;
482 err |= __put_user(restorer, &frame->pretcode); 482 err |= __put_user(restorer, &frame->pretcode);
483 483
484 /* 484 /*
485 * This is movl $,%eax ; int $0x80 485 * This is movl $,%eax ; int $0x80
486 * 486 *
487 * WE DO NOT USE IT ANY MORE! It's only left here for historical 487 * WE DO NOT USE IT ANY MORE! It's only left here for historical
488 * reasons and because gdb uses it as a signature to notice 488 * reasons and because gdb uses it as a signature to notice
489 * signal handler stack frames. 489 * signal handler stack frames.
490 */ 490 */
491 err |= __put_user(0xb8, (char __user *)(frame->retcode+0)); 491 err |= __put_user(0xb8, (char __user *)(frame->retcode+0));
492 err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1)); 492 err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1));
493 err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); 493 err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
494 494
495 if (err) 495 if (err)
496 goto give_sigsegv; 496 goto give_sigsegv;
497 497
498 /* Set up registers for signal handler */ 498 /* Set up registers for signal handler */
499 regs->esp = (unsigned long) frame; 499 regs->esp = (unsigned long) frame;
500 regs->eip = (unsigned long) ka->sa.sa_handler; 500 regs->eip = (unsigned long) ka->sa.sa_handler;
501 regs->eax = (unsigned long) usig; 501 regs->eax = (unsigned long) usig;
502 regs->edx = (unsigned long) &frame->info; 502 regs->edx = (unsigned long) &frame->info;
503 regs->ecx = (unsigned long) &frame->uc; 503 regs->ecx = (unsigned long) &frame->uc;
504 504
505 set_fs(USER_DS); 505 set_fs(USER_DS);
506 regs->xds = __USER_DS; 506 regs->xds = __USER_DS;
507 regs->xes = __USER_DS; 507 regs->xes = __USER_DS;
508 regs->xss = __USER_DS; 508 regs->xss = __USER_DS;
509 regs->xcs = __USER_CS; 509 regs->xcs = __USER_CS;
510 510
511 /* 511 /*
512 * Clear TF when entering the signal handler, but 512 * Clear TF when entering the signal handler, but
513 * notify any tracer that was single-stepping it. 513 * notify any tracer that was single-stepping it.
514 * The tracer may want to single-step inside the 514 * The tracer may want to single-step inside the
515 * handler too. 515 * handler too.
516 */ 516 */
517 regs->eflags &= ~TF_MASK; 517 regs->eflags &= ~TF_MASK;
518 if (test_thread_flag(TIF_SINGLESTEP)) 518 if (test_thread_flag(TIF_SINGLESTEP))
519 ptrace_notify(SIGTRAP); 519 ptrace_notify(SIGTRAP);
520 520
521 #if DEBUG_SIG 521 #if DEBUG_SIG
522 printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", 522 printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
523 current->comm, current->pid, frame, regs->eip, frame->pretcode); 523 current->comm, current->pid, frame, regs->eip, frame->pretcode);
524 #endif 524 #endif
525 525
526 return 1; 526 return 1;
527 527
528 give_sigsegv: 528 give_sigsegv:
529 force_sigsegv(sig, current); 529 force_sigsegv(sig, current);
530 return 0; 530 return 0;
531 } 531 }
532 532
533 /* 533 /*
534 * OK, we're invoking a handler 534 * OK, we're invoking a handler
535 */ 535 */
536 536
537 static int 537 static int
538 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, 538 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
539 sigset_t *oldset, struct pt_regs * regs) 539 sigset_t *oldset, struct pt_regs * regs)
540 { 540 {
541 int ret; 541 int ret;
542 542
543 /* Are we from a system call? */ 543 /* Are we from a system call? */
544 if (regs->orig_eax >= 0) { 544 if (regs->orig_eax >= 0) {
545 /* If so, check system call restarting.. */ 545 /* If so, check system call restarting.. */
546 switch (regs->eax) { 546 switch (regs->eax) {
547 case -ERESTART_RESTARTBLOCK: 547 case -ERESTART_RESTARTBLOCK:
548 case -ERESTARTNOHAND: 548 case -ERESTARTNOHAND:
549 regs->eax = -EINTR; 549 regs->eax = -EINTR;
550 break; 550 break;
551 551
552 case -ERESTARTSYS: 552 case -ERESTARTSYS:
553 if (!(ka->sa.sa_flags & SA_RESTART)) { 553 if (!(ka->sa.sa_flags & SA_RESTART)) {
554 regs->eax = -EINTR; 554 regs->eax = -EINTR;
555 break; 555 break;
556 } 556 }
557 /* fallthrough */ 557 /* fallthrough */
558 case -ERESTARTNOINTR: 558 case -ERESTARTNOINTR:
559 regs->eax = regs->orig_eax; 559 regs->eax = regs->orig_eax;
560 regs->eip -= 2; 560 regs->eip -= 2;
561 } 561 }
562 } 562 }
563 563
564 /* 564 /*
565 * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so 565 * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so
566 * that register information in the sigcontext is correct. 566 * that register information in the sigcontext is correct.
567 */ 567 */
568 if (unlikely(regs->eflags & TF_MASK) 568 if (unlikely(regs->eflags & TF_MASK)
569 && likely(current->ptrace & PT_DTRACE)) { 569 && likely(current->ptrace & PT_DTRACE)) {
570 current->ptrace &= ~PT_DTRACE; 570 current->ptrace &= ~PT_DTRACE;
571 regs->eflags &= ~TF_MASK; 571 regs->eflags &= ~TF_MASK;
572 } 572 }
573 573
574 /* Set up the stack frame */ 574 /* Set up the stack frame */
575 if (ka->sa.sa_flags & SA_SIGINFO) 575 if (ka->sa.sa_flags & SA_SIGINFO)
576 ret = setup_rt_frame(sig, ka, info, oldset, regs); 576 ret = setup_rt_frame(sig, ka, info, oldset, regs);
577 else 577 else
578 ret = setup_frame(sig, ka, oldset, regs); 578 ret = setup_frame(sig, ka, oldset, regs);
579 579
580 if (ret) { 580 if (ret) {
581 spin_lock_irq(&current->sighand->siglock); 581 spin_lock_irq(&current->sighand->siglock);
582 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask); 582 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
583 if (!(ka->sa.sa_flags & SA_NODEFER)) 583 if (!(ka->sa.sa_flags & SA_NODEFER))
584 sigaddset(&current->blocked,sig); 584 sigaddset(&current->blocked,sig);
585 recalc_sigpending(); 585 recalc_sigpending();
586 spin_unlock_irq(&current->sighand->siglock); 586 spin_unlock_irq(&current->sighand->siglock);
587 } 587 }
588 588
589 return ret; 589 return ret;
590 } 590 }
591 591
592 /* 592 /*
593 * Note that 'init' is a special process: it doesn't get signals it doesn't 593 * Note that 'init' is a special process: it doesn't get signals it doesn't
594 * want to handle. Thus you cannot kill init even with a SIGKILL even by 594 * want to handle. Thus you cannot kill init even with a SIGKILL even by
595 * mistake. 595 * mistake.
596 */ 596 */
597 int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) 597 int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
598 { 598 {
599 siginfo_t info; 599 siginfo_t info;
600 int signr; 600 int signr;
601 struct k_sigaction ka; 601 struct k_sigaction ka;
602 602
603 /* 603 /*
604 * We want the common case to go fast, which 604 * We want the common case to go fast, which
605 * is why we may in certain cases get here from 605 * is why we may in certain cases get here from
606 * kernel mode. Just return without doing anything 606 * kernel mode. Just return without doing anything
607 * if so. 607 * if so.
608 */ 608 */
609 if (!user_mode(regs)) 609 if (!user_mode(regs))
610 return 1; 610 return 1;
611 611
612 if (try_to_freeze()) 612 if (try_to_freeze())
613 goto no_signal; 613 goto no_signal;
614 614
615 if (!oldset) 615 if (!oldset)
616 oldset = &current->blocked; 616 oldset = &current->blocked;
617 617
618 signr = get_signal_to_deliver(&info, &ka, regs, NULL); 618 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
619 if (signr > 0) { 619 if (signr > 0) {
620 /* Reenable any watchpoints before delivering the 620 /* Reenable any watchpoints before delivering the
621 * signal to user space. The processor register will 621 * signal to user space. The processor register will
622 * have been cleared if the watchpoint triggered 622 * have been cleared if the watchpoint triggered
623 * inside the kernel. 623 * inside the kernel.
624 */ 624 */
625 if (unlikely(current->thread.debugreg[7])) { 625 if (unlikely(current->thread.debugreg[7])) {
626 set_debugreg(current->thread.debugreg[7], 7); 626 set_debugreg(current->thread.debugreg[7], 7);
627 } 627 }
628 628
629 /* Whee! Actually deliver the signal. */ 629 /* Whee! Actually deliver the signal. */
630 return handle_signal(signr, &info, &ka, oldset, regs); 630 return handle_signal(signr, &info, &ka, oldset, regs);
631 } 631 }
632 632
633 no_signal: 633 no_signal:
634 /* Did we come from a system call? */ 634 /* Did we come from a system call? */
635 if (regs->orig_eax >= 0) { 635 if (regs->orig_eax >= 0) {
636 /* Restart the system call - no handlers present */ 636 /* Restart the system call - no handlers present */
637 if (regs->eax == -ERESTARTNOHAND || 637 if (regs->eax == -ERESTARTNOHAND ||
638 regs->eax == -ERESTARTSYS || 638 regs->eax == -ERESTARTSYS ||
639 regs->eax == -ERESTARTNOINTR) { 639 regs->eax == -ERESTARTNOINTR) {
640 regs->eax = regs->orig_eax; 640 regs->eax = regs->orig_eax;
641 regs->eip -= 2; 641 regs->eip -= 2;
642 } 642 }
643 if (regs->eax == -ERESTART_RESTARTBLOCK){ 643 if (regs->eax == -ERESTART_RESTARTBLOCK){
644 regs->eax = __NR_restart_syscall; 644 regs->eax = __NR_restart_syscall;
645 regs->eip -= 2; 645 regs->eip -= 2;
646 } 646 }
647 } 647 }
648 return 0; 648 return 0;
649 } 649 }
650 650
651 /* 651 /*
652 * notification of userspace execution resumption 652 * notification of userspace execution resumption
653 * - triggered by current->work.notify_resume 653 * - triggered by current->work.notify_resume
654 */ 654 */
655 __attribute__((regparm(3))) 655 __attribute__((regparm(3)))
656 void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, 656 void do_notify_resume(struct pt_regs *regs, sigset_t *oldset,
657 __u32 thread_info_flags) 657 __u32 thread_info_flags)
658 { 658 {
659 /* Pending single-step? */ 659 /* Pending single-step? */
660 if (thread_info_flags & _TIF_SINGLESTEP) { 660 if (thread_info_flags & _TIF_SINGLESTEP) {
661 regs->eflags |= TF_MASK; 661 regs->eflags |= TF_MASK;
662 clear_thread_flag(TIF_SINGLESTEP); 662 clear_thread_flag(TIF_SINGLESTEP);
663 } 663 }
664 /* deal with pending signal delivery */ 664 /* deal with pending signal delivery */
665 if (thread_info_flags & _TIF_SIGPENDING) 665 if (thread_info_flags & _TIF_SIGPENDING)
666 do_signal(regs,oldset); 666 do_signal(regs,oldset);
667 667
668 clear_thread_flag(TIF_IRET); 668 clear_thread_flag(TIF_IRET);
669 } 669 }
670 670
arch/i386/kernel/traps.c
1 /* 1 /*
2 * linux/arch/i386/traps.c 2 * linux/arch/i386/traps.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * 5 *
6 * Pentium III FXSR, SSE support 6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000 7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 */ 8 */
9 9
10 /* 10 /*
11 * 'Traps.c' handles hardware traps and faults after we have saved some 11 * 'Traps.c' handles hardware traps and faults after we have saved some
12 * state in 'asm.s'. 12 * state in 'asm.s'.
13 */ 13 */
14 #include <linux/config.h> 14 #include <linux/config.h>
15 #include <linux/sched.h> 15 #include <linux/sched.h>
16 #include <linux/kernel.h> 16 #include <linux/kernel.h>
17 #include <linux/string.h> 17 #include <linux/string.h>
18 #include <linux/errno.h> 18 #include <linux/errno.h>
19 #include <linux/timer.h> 19 #include <linux/timer.h>
20 #include <linux/mm.h> 20 #include <linux/mm.h>
21 #include <linux/init.h> 21 #include <linux/init.h>
22 #include <linux/delay.h> 22 #include <linux/delay.h>
23 #include <linux/spinlock.h> 23 #include <linux/spinlock.h>
24 #include <linux/interrupt.h> 24 #include <linux/interrupt.h>
25 #include <linux/highmem.h> 25 #include <linux/highmem.h>
26 #include <linux/kallsyms.h> 26 #include <linux/kallsyms.h>
27 #include <linux/ptrace.h> 27 #include <linux/ptrace.h>
28 #include <linux/utsname.h> 28 #include <linux/utsname.h>
29 #include <linux/kprobes.h> 29 #include <linux/kprobes.h>
30 #include <linux/kexec.h> 30 #include <linux/kexec.h>
31 31
32 #ifdef CONFIG_EISA 32 #ifdef CONFIG_EISA
33 #include <linux/ioport.h> 33 #include <linux/ioport.h>
34 #include <linux/eisa.h> 34 #include <linux/eisa.h>
35 #endif 35 #endif
36 36
37 #ifdef CONFIG_MCA 37 #ifdef CONFIG_MCA
38 #include <linux/mca.h> 38 #include <linux/mca.h>
39 #endif 39 #endif
40 40
41 #include <asm/processor.h> 41 #include <asm/processor.h>
42 #include <asm/system.h> 42 #include <asm/system.h>
43 #include <asm/uaccess.h> 43 #include <asm/uaccess.h>
44 #include <asm/io.h> 44 #include <asm/io.h>
45 #include <asm/atomic.h> 45 #include <asm/atomic.h>
46 #include <asm/debugreg.h> 46 #include <asm/debugreg.h>
47 #include <asm/desc.h> 47 #include <asm/desc.h>
48 #include <asm/i387.h> 48 #include <asm/i387.h>
49 #include <asm/nmi.h> 49 #include <asm/nmi.h>
50 50
51 #include <asm/smp.h> 51 #include <asm/smp.h>
52 #include <asm/arch_hooks.h> 52 #include <asm/arch_hooks.h>
53 #include <asm/kdebug.h> 53 #include <asm/kdebug.h>
54 54
55 #include <linux/irq.h> 55 #include <linux/irq.h>
56 #include <linux/module.h> 56 #include <linux/module.h>
57 57
58 #include "mach_traps.h" 58 #include "mach_traps.h"
59 59
60 asmlinkage int system_call(void); 60 asmlinkage int system_call(void);
61 61
62 struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, 62 struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
63 { 0, 0 }, { 0, 0 } }; 63 { 0, 0 }, { 0, 0 } };
64 64
65 /* Do we ignore FPU interrupts ? */ 65 /* Do we ignore FPU interrupts ? */
66 char ignore_fpu_irq = 0; 66 char ignore_fpu_irq = 0;
67 67
68 /* 68 /*
69 * The IDT has to be page-aligned to simplify the Pentium 69 * The IDT has to be page-aligned to simplify the Pentium
70 * F0 0F bug workaround.. We have a special link segment 70 * F0 0F bug workaround.. We have a special link segment
71 * for this. 71 * for this.
72 */ 72 */
73 struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; 73 struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };
74 74
75 asmlinkage void divide_error(void); 75 asmlinkage void divide_error(void);
76 asmlinkage void debug(void); 76 asmlinkage void debug(void);
77 asmlinkage void nmi(void); 77 asmlinkage void nmi(void);
78 asmlinkage void int3(void); 78 asmlinkage void int3(void);
79 asmlinkage void overflow(void); 79 asmlinkage void overflow(void);
80 asmlinkage void bounds(void); 80 asmlinkage void bounds(void);
81 asmlinkage void invalid_op(void); 81 asmlinkage void invalid_op(void);
82 asmlinkage void device_not_available(void); 82 asmlinkage void device_not_available(void);
83 asmlinkage void coprocessor_segment_overrun(void); 83 asmlinkage void coprocessor_segment_overrun(void);
84 asmlinkage void invalid_TSS(void); 84 asmlinkage void invalid_TSS(void);
85 asmlinkage void segment_not_present(void); 85 asmlinkage void segment_not_present(void);
86 asmlinkage void stack_segment(void); 86 asmlinkage void stack_segment(void);
87 asmlinkage void general_protection(void); 87 asmlinkage void general_protection(void);
88 asmlinkage void page_fault(void); 88 asmlinkage void page_fault(void);
89 asmlinkage void coprocessor_error(void); 89 asmlinkage void coprocessor_error(void);
90 asmlinkage void simd_coprocessor_error(void); 90 asmlinkage void simd_coprocessor_error(void);
91 asmlinkage void alignment_check(void); 91 asmlinkage void alignment_check(void);
92 asmlinkage void spurious_interrupt_bug(void); 92 asmlinkage void spurious_interrupt_bug(void);
93 asmlinkage void machine_check(void); 93 asmlinkage void machine_check(void);
94 94
95 static int kstack_depth_to_print = 24; 95 static int kstack_depth_to_print = 24;
96 struct notifier_block *i386die_chain; 96 struct notifier_block *i386die_chain;
97 static DEFINE_SPINLOCK(die_notifier_lock); 97 static DEFINE_SPINLOCK(die_notifier_lock);
98 98
99 int register_die_notifier(struct notifier_block *nb) 99 int register_die_notifier(struct notifier_block *nb)
100 { 100 {
101 int err = 0; 101 int err = 0;
102 unsigned long flags; 102 unsigned long flags;
103 spin_lock_irqsave(&die_notifier_lock, flags); 103 spin_lock_irqsave(&die_notifier_lock, flags);
104 err = notifier_chain_register(&i386die_chain, nb); 104 err = notifier_chain_register(&i386die_chain, nb);
105 spin_unlock_irqrestore(&die_notifier_lock, flags); 105 spin_unlock_irqrestore(&die_notifier_lock, flags);
106 return err; 106 return err;
107 } 107 }
108 EXPORT_SYMBOL(register_die_notifier); 108 EXPORT_SYMBOL(register_die_notifier);
109 109
110 static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) 110 static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
111 { 111 {
112 return p > (void *)tinfo && 112 return p > (void *)tinfo &&
113 p < (void *)tinfo + THREAD_SIZE - 3; 113 p < (void *)tinfo + THREAD_SIZE - 3;
114 } 114 }
115 115
116 static inline unsigned long print_context_stack(struct thread_info *tinfo, 116 static inline unsigned long print_context_stack(struct thread_info *tinfo,
117 unsigned long *stack, unsigned long ebp) 117 unsigned long *stack, unsigned long ebp)
118 { 118 {
119 unsigned long addr; 119 unsigned long addr;
120 120
121 #ifdef CONFIG_FRAME_POINTER 121 #ifdef CONFIG_FRAME_POINTER
122 while (valid_stack_ptr(tinfo, (void *)ebp)) { 122 while (valid_stack_ptr(tinfo, (void *)ebp)) {
123 addr = *(unsigned long *)(ebp + 4); 123 addr = *(unsigned long *)(ebp + 4);
124 printk(" [<%08lx>] ", addr); 124 printk(" [<%08lx>] ", addr);
125 print_symbol("%s", addr); 125 print_symbol("%s", addr);
126 printk("\n"); 126 printk("\n");
127 ebp = *(unsigned long *)ebp; 127 ebp = *(unsigned long *)ebp;
128 } 128 }
129 #else 129 #else
130 while (valid_stack_ptr(tinfo, stack)) { 130 while (valid_stack_ptr(tinfo, stack)) {
131 addr = *stack++; 131 addr = *stack++;
132 if (__kernel_text_address(addr)) { 132 if (__kernel_text_address(addr)) {
133 printk(" [<%08lx>]", addr); 133 printk(" [<%08lx>]", addr);
134 print_symbol(" %s", addr); 134 print_symbol(" %s", addr);
135 printk("\n"); 135 printk("\n");
136 } 136 }
137 } 137 }
138 #endif 138 #endif
139 return ebp; 139 return ebp;
140 } 140 }
141 141
142 void show_trace(struct task_struct *task, unsigned long * stack) 142 void show_trace(struct task_struct *task, unsigned long * stack)
143 { 143 {
144 unsigned long ebp; 144 unsigned long ebp;
145 145
146 if (!task) 146 if (!task)
147 task = current; 147 task = current;
148 148
149 if (task == current) { 149 if (task == current) {
150 /* Grab ebp right from our regs */ 150 /* Grab ebp right from our regs */
151 asm ("movl %%ebp, %0" : "=r" (ebp) : ); 151 asm ("movl %%ebp, %0" : "=r" (ebp) : );
152 } else { 152 } else {
153 /* ebp is the last reg pushed by switch_to */ 153 /* ebp is the last reg pushed by switch_to */
154 ebp = *(unsigned long *) task->thread.esp; 154 ebp = *(unsigned long *) task->thread.esp;
155 } 155 }
156 156
157 while (1) { 157 while (1) {
158 struct thread_info *context; 158 struct thread_info *context;
159 context = (struct thread_info *) 159 context = (struct thread_info *)
160 ((unsigned long)stack & (~(THREAD_SIZE - 1))); 160 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
161 ebp = print_context_stack(context, stack, ebp); 161 ebp = print_context_stack(context, stack, ebp);
162 stack = (unsigned long*)context->previous_esp; 162 stack = (unsigned long*)context->previous_esp;
163 if (!stack) 163 if (!stack)
164 break; 164 break;
165 printk(" =======================\n"); 165 printk(" =======================\n");
166 } 166 }
167 } 167 }
168 168
169 void show_stack(struct task_struct *task, unsigned long *esp) 169 void show_stack(struct task_struct *task, unsigned long *esp)
170 { 170 {
171 unsigned long *stack; 171 unsigned long *stack;
172 int i; 172 int i;
173 173
174 if (esp == NULL) { 174 if (esp == NULL) {
175 if (task) 175 if (task)
176 esp = (unsigned long*)task->thread.esp; 176 esp = (unsigned long*)task->thread.esp;
177 else 177 else
178 esp = (unsigned long *)&esp; 178 esp = (unsigned long *)&esp;
179 } 179 }
180 180
181 stack = esp; 181 stack = esp;
182 for(i = 0; i < kstack_depth_to_print; i++) { 182 for(i = 0; i < kstack_depth_to_print; i++) {
183 if (kstack_end(stack)) 183 if (kstack_end(stack))
184 break; 184 break;
185 if (i && ((i % 8) == 0)) 185 if (i && ((i % 8) == 0))
186 printk("\n "); 186 printk("\n ");
187 printk("%08lx ", *stack++); 187 printk("%08lx ", *stack++);
188 } 188 }
189 printk("\nCall Trace:\n"); 189 printk("\nCall Trace:\n");
190 show_trace(task, esp); 190 show_trace(task, esp);
191 } 191 }
192 192
193 /* 193 /*
194 * The architecture-independent dump_stack generator 194 * The architecture-independent dump_stack generator
195 */ 195 */
196 void dump_stack(void) 196 void dump_stack(void)
197 { 197 {
198 unsigned long stack; 198 unsigned long stack;
199 199
200 show_trace(current, &stack); 200 show_trace(current, &stack);
201 } 201 }
202 202
203 EXPORT_SYMBOL(dump_stack); 203 EXPORT_SYMBOL(dump_stack);
204 204
205 void show_registers(struct pt_regs *regs) 205 void show_registers(struct pt_regs *regs)
206 { 206 {
207 int i; 207 int i;
208 int in_kernel = 1; 208 int in_kernel = 1;
209 unsigned long esp; 209 unsigned long esp;
210 unsigned short ss; 210 unsigned short ss;
211 211
212 esp = (unsigned long) (&regs->esp); 212 esp = (unsigned long) (&regs->esp);
213 ss = __KERNEL_DS; 213 ss = __KERNEL_DS;
214 if (user_mode(regs)) { 214 if (user_mode(regs)) {
215 in_kernel = 0; 215 in_kernel = 0;
216 esp = regs->esp; 216 esp = regs->esp;
217 ss = regs->xss & 0xffff; 217 ss = regs->xss & 0xffff;
218 } 218 }
219 print_modules(); 219 print_modules();
220 printk("CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\nEFLAGS: %08lx" 220 printk("CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\nEFLAGS: %08lx"
221 " (%s) \n", 221 " (%s) \n",
222 smp_processor_id(), 0xffff & regs->xcs, regs->eip, 222 smp_processor_id(), 0xffff & regs->xcs, regs->eip,
223 print_tainted(), regs->eflags, system_utsname.release); 223 print_tainted(), regs->eflags, system_utsname.release);
224 print_symbol("EIP is at %s\n", regs->eip); 224 print_symbol("EIP is at %s\n", regs->eip);
225 printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", 225 printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
226 regs->eax, regs->ebx, regs->ecx, regs->edx); 226 regs->eax, regs->ebx, regs->ecx, regs->edx);
227 printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", 227 printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
228 regs->esi, regs->edi, regs->ebp, esp); 228 regs->esi, regs->edi, regs->ebp, esp);
229 printk("ds: %04x es: %04x ss: %04x\n", 229 printk("ds: %04x es: %04x ss: %04x\n",
230 regs->xds & 0xffff, regs->xes & 0xffff, ss); 230 regs->xds & 0xffff, regs->xes & 0xffff, ss);
231 printk("Process %s (pid: %d, threadinfo=%p task=%p)", 231 printk("Process %s (pid: %d, threadinfo=%p task=%p)",
232 current->comm, current->pid, current_thread_info(), current); 232 current->comm, current->pid, current_thread_info(), current);
233 /* 233 /*
234 * When in-kernel, we also print out the stack and code at the 234 * When in-kernel, we also print out the stack and code at the
235 * time of the fault.. 235 * time of the fault..
236 */ 236 */
237 if (in_kernel) { 237 if (in_kernel) {
238 u8 __user *eip; 238 u8 __user *eip;
239 239
240 printk("\nStack: "); 240 printk("\nStack: ");
241 show_stack(NULL, (unsigned long*)esp); 241 show_stack(NULL, (unsigned long*)esp);
242 242
243 printk("Code: "); 243 printk("Code: ");
244 244
245 eip = (u8 __user *)regs->eip - 43; 245 eip = (u8 __user *)regs->eip - 43;
246 for (i = 0; i < 64; i++, eip++) { 246 for (i = 0; i < 64; i++, eip++) {
247 unsigned char c; 247 unsigned char c;
248 248
249 if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { 249 if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
250 printk(" Bad EIP value."); 250 printk(" Bad EIP value.");
251 break; 251 break;
252 } 252 }
253 if (eip == (u8 __user *)regs->eip) 253 if (eip == (u8 __user *)regs->eip)
254 printk("<%02x> ", c); 254 printk("<%02x> ", c);
255 else 255 else
256 printk("%02x ", c); 256 printk("%02x ", c);
257 } 257 }
258 } 258 }
259 printk("\n"); 259 printk("\n");
260 } 260 }
261 261
262 static void handle_BUG(struct pt_regs *regs) 262 static void handle_BUG(struct pt_regs *regs)
263 { 263 {
264 unsigned short ud2; 264 unsigned short ud2;
265 unsigned short line; 265 unsigned short line;
266 char *file; 266 char *file;
267 char c; 267 char c;
268 unsigned long eip; 268 unsigned long eip;
269 269
270 if (user_mode(regs)) 270 if (user_mode(regs))
271 goto no_bug; /* Not in kernel */ 271 goto no_bug; /* Not in kernel */
272 272
273 eip = regs->eip; 273 eip = regs->eip;
274 274
275 if (eip < PAGE_OFFSET) 275 if (eip < PAGE_OFFSET)
276 goto no_bug; 276 goto no_bug;
277 if (__get_user(ud2, (unsigned short __user *)eip)) 277 if (__get_user(ud2, (unsigned short __user *)eip))
278 goto no_bug; 278 goto no_bug;
279 if (ud2 != 0x0b0f) 279 if (ud2 != 0x0b0f)
280 goto no_bug; 280 goto no_bug;
281 if (__get_user(line, (unsigned short __user *)(eip + 2))) 281 if (__get_user(line, (unsigned short __user *)(eip + 2)))
282 goto bug; 282 goto bug;
283 if (__get_user(file, (char * __user *)(eip + 4)) || 283 if (__get_user(file, (char * __user *)(eip + 4)) ||
284 (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) 284 (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
285 file = "<bad filename>"; 285 file = "<bad filename>";
286 286
287 printk("------------[ cut here ]------------\n"); 287 printk("------------[ cut here ]------------\n");
288 printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line); 288 printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line);
289 289
290 no_bug: 290 no_bug:
291 return; 291 return;
292 292
293 /* Here we know it was a BUG but file-n-line is unavailable */ 293 /* Here we know it was a BUG but file-n-line is unavailable */
294 bug: 294 bug:
295 printk("Kernel BUG\n"); 295 printk("Kernel BUG\n");
296 } 296 }
297 297
298 /* This is gone through when something in the kernel 298 /* This is gone through when something in the kernel
299 * has done something bad and is about to be terminated. 299 * has done something bad and is about to be terminated.
300 */ 300 */
301 void die(const char * str, struct pt_regs * regs, long err) 301 void die(const char * str, struct pt_regs * regs, long err)
302 { 302 {
303 static struct { 303 static struct {
304 spinlock_t lock; 304 spinlock_t lock;
305 u32 lock_owner; 305 u32 lock_owner;
306 int lock_owner_depth; 306 int lock_owner_depth;
307 } die = { 307 } die = {
308 .lock = SPIN_LOCK_UNLOCKED, 308 .lock = SPIN_LOCK_UNLOCKED,
309 .lock_owner = -1, 309 .lock_owner = -1,
310 .lock_owner_depth = 0 310 .lock_owner_depth = 0
311 }; 311 };
312 static int die_counter; 312 static int die_counter;
313 313
314 if (die.lock_owner != raw_smp_processor_id()) { 314 if (die.lock_owner != raw_smp_processor_id()) {
315 console_verbose(); 315 console_verbose();
316 spin_lock_irq(&die.lock); 316 spin_lock_irq(&die.lock);
317 die.lock_owner = smp_processor_id(); 317 die.lock_owner = smp_processor_id();
318 die.lock_owner_depth = 0; 318 die.lock_owner_depth = 0;
319 bust_spinlocks(1); 319 bust_spinlocks(1);
320 } 320 }
321 321
322 if (++die.lock_owner_depth < 3) { 322 if (++die.lock_owner_depth < 3) {
323 int nl = 0; 323 int nl = 0;
324 handle_BUG(regs); 324 handle_BUG(regs);
325 printk(KERN_ALERT "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); 325 printk(KERN_ALERT "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
326 #ifdef CONFIG_PREEMPT 326 #ifdef CONFIG_PREEMPT
327 printk("PREEMPT "); 327 printk("PREEMPT ");
328 nl = 1; 328 nl = 1;
329 #endif 329 #endif
330 #ifdef CONFIG_SMP 330 #ifdef CONFIG_SMP
331 printk("SMP "); 331 printk("SMP ");
332 nl = 1; 332 nl = 1;
333 #endif 333 #endif
334 #ifdef CONFIG_DEBUG_PAGEALLOC 334 #ifdef CONFIG_DEBUG_PAGEALLOC
335 printk("DEBUG_PAGEALLOC"); 335 printk("DEBUG_PAGEALLOC");
336 nl = 1; 336 nl = 1;
337 #endif 337 #endif
338 if (nl) 338 if (nl)
339 printk("\n"); 339 printk("\n");
340 notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); 340 notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
341 show_registers(regs); 341 show_registers(regs);
342 } else 342 } else
343 printk(KERN_ERR "Recursive die() failure, output suppressed\n"); 343 printk(KERN_ERR "Recursive die() failure, output suppressed\n");
344 344
345 bust_spinlocks(0); 345 bust_spinlocks(0);
346 die.lock_owner = -1; 346 die.lock_owner = -1;
347 spin_unlock_irq(&die.lock); 347 spin_unlock_irq(&die.lock);
348 348
349 if (kexec_should_crash(current)) 349 if (kexec_should_crash(current))
350 crash_kexec(regs); 350 crash_kexec(regs);
351 351
352 if (in_interrupt()) 352 if (in_interrupt())
353 panic("Fatal exception in interrupt"); 353 panic("Fatal exception in interrupt");
354 354
355 if (panic_on_oops) { 355 if (panic_on_oops) {
356 printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); 356 printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n");
357 ssleep(5); 357 ssleep(5);
358 panic("Fatal exception"); 358 panic("Fatal exception");
359 } 359 }
360 do_exit(SIGSEGV); 360 do_exit(SIGSEGV);
361 } 361 }
362 362
363 static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) 363 static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
364 { 364 {
365 if (!user_mode_vm(regs)) 365 if (!user_mode_vm(regs))
366 die(str, regs, err); 366 die(str, regs, err);
367 } 367 }
368 368
369 static void do_trap(int trapnr, int signr, char *str, int vm86, 369 static void do_trap(int trapnr, int signr, char *str, int vm86,
370 struct pt_regs * regs, long error_code, siginfo_t *info) 370 struct pt_regs * regs, long error_code, siginfo_t *info)
371 { 371 {
372 struct task_struct *tsk = current; 372 struct task_struct *tsk = current;
373 tsk->thread.error_code = error_code; 373 tsk->thread.error_code = error_code;
374 tsk->thread.trap_no = trapnr; 374 tsk->thread.trap_no = trapnr;
375 375
376 if (regs->eflags & VM_MASK) { 376 if (regs->eflags & VM_MASK) {
377 if (vm86) 377 if (vm86)
378 goto vm86_trap; 378 goto vm86_trap;
379 goto trap_signal; 379 goto trap_signal;
380 } 380 }
381 381
382 if (!user_mode(regs)) 382 if (!user_mode(regs))
383 goto kernel_trap; 383 goto kernel_trap;
384 384
385 trap_signal: { 385 trap_signal: {
386 if (info) 386 if (info)
387 force_sig_info(signr, info, tsk); 387 force_sig_info(signr, info, tsk);
388 else 388 else
389 force_sig(signr, tsk); 389 force_sig(signr, tsk);
390 return; 390 return;
391 } 391 }
392 392
393 kernel_trap: { 393 kernel_trap: {
394 if (!fixup_exception(regs)) 394 if (!fixup_exception(regs))
395 die(str, regs, error_code); 395 die(str, regs, error_code);
396 return; 396 return;
397 } 397 }
398 398
399 vm86_trap: { 399 vm86_trap: {
400 int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr); 400 int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr);
401 if (ret) goto trap_signal; 401 if (ret) goto trap_signal;
402 return; 402 return;
403 } 403 }
404 } 404 }
405 405
406 #define DO_ERROR(trapnr, signr, str, name) \ 406 #define DO_ERROR(trapnr, signr, str, name) \
407 fastcall void do_##name(struct pt_regs * regs, long error_code) \ 407 fastcall void do_##name(struct pt_regs * regs, long error_code) \
408 { \ 408 { \
409 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 409 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
410 == NOTIFY_STOP) \ 410 == NOTIFY_STOP) \
411 return; \ 411 return; \
412 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ 412 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
413 } 413 }
414 414
415 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ 415 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
416 fastcall void do_##name(struct pt_regs * regs, long error_code) \ 416 fastcall void do_##name(struct pt_regs * regs, long error_code) \
417 { \ 417 { \
418 siginfo_t info; \ 418 siginfo_t info; \
419 info.si_signo = signr; \ 419 info.si_signo = signr; \
420 info.si_errno = 0; \ 420 info.si_errno = 0; \
421 info.si_code = sicode; \ 421 info.si_code = sicode; \
422 info.si_addr = (void __user *)siaddr; \ 422 info.si_addr = (void __user *)siaddr; \
423 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 423 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
424 == NOTIFY_STOP) \ 424 == NOTIFY_STOP) \
425 return; \ 425 return; \
426 do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ 426 do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
427 } 427 }
428 428
429 #define DO_VM86_ERROR(trapnr, signr, str, name) \ 429 #define DO_VM86_ERROR(trapnr, signr, str, name) \
430 fastcall void do_##name(struct pt_regs * regs, long error_code) \ 430 fastcall void do_##name(struct pt_regs * regs, long error_code) \
431 { \ 431 { \
432 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 432 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
433 == NOTIFY_STOP) \ 433 == NOTIFY_STOP) \
434 return; \ 434 return; \
435 do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ 435 do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
436 } 436 }
437 437
438 #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ 438 #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
439 fastcall void do_##name(struct pt_regs * regs, long error_code) \ 439 fastcall void do_##name(struct pt_regs * regs, long error_code) \
440 { \ 440 { \
441 siginfo_t info; \ 441 siginfo_t info; \
442 info.si_signo = signr; \ 442 info.si_signo = signr; \
443 info.si_errno = 0; \ 443 info.si_errno = 0; \
444 info.si_code = sicode; \ 444 info.si_code = sicode; \
445 info.si_addr = (void __user *)siaddr; \ 445 info.si_addr = (void __user *)siaddr; \
446 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 446 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
447 == NOTIFY_STOP) \ 447 == NOTIFY_STOP) \
448 return; \ 448 return; \
449 do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ 449 do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
450 } 450 }
451 451
452 DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) 452 DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip)
453 #ifndef CONFIG_KPROBES 453 #ifndef CONFIG_KPROBES
454 DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) 454 DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
455 #endif 455 #endif
456 DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) 456 DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
457 DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) 457 DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
458 DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) 458 DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip)
459 DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) 459 DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
460 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) 460 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
461 DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) 461 DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
462 DO_ERROR(12, SIGBUS, "stack segment", stack_segment) 462 DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
463 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) 463 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
464 DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) 464 DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
465 465
466 fastcall void do_general_protection(struct pt_regs * regs, long error_code) 466 fastcall void do_general_protection(struct pt_regs * regs, long error_code)
467 { 467 {
468 int cpu = get_cpu(); 468 int cpu = get_cpu();
469 struct tss_struct *tss = &per_cpu(init_tss, cpu); 469 struct tss_struct *tss = &per_cpu(init_tss, cpu);
470 struct thread_struct *thread = &current->thread; 470 struct thread_struct *thread = &current->thread;
471 471
472 /* 472 /*
473 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an 473 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
474 * invalid offset set (the LAZY one) and the faulting thread has 474 * invalid offset set (the LAZY one) and the faulting thread has
475 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS 475 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS
476 * and we set the offset field correctly. Then we let the CPU to 476 * and we set the offset field correctly. Then we let the CPU to
477 * restart the faulting instruction. 477 * restart the faulting instruction.
478 */ 478 */
479 if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && 479 if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
480 thread->io_bitmap_ptr) { 480 thread->io_bitmap_ptr) {
481 memcpy(tss->io_bitmap, thread->io_bitmap_ptr, 481 memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
482 thread->io_bitmap_max); 482 thread->io_bitmap_max);
483 /* 483 /*
484 * If the previously set map was extending to higher ports 484 * If the previously set map was extending to higher ports
485 * than the current one, pad extra space with 0xff (no access). 485 * than the current one, pad extra space with 0xff (no access).
486 */ 486 */
487 if (thread->io_bitmap_max < tss->io_bitmap_max) 487 if (thread->io_bitmap_max < tss->io_bitmap_max)
488 memset((char *) tss->io_bitmap + 488 memset((char *) tss->io_bitmap +
489 thread->io_bitmap_max, 0xff, 489 thread->io_bitmap_max, 0xff,
490 tss->io_bitmap_max - thread->io_bitmap_max); 490 tss->io_bitmap_max - thread->io_bitmap_max);
491 tss->io_bitmap_max = thread->io_bitmap_max; 491 tss->io_bitmap_max = thread->io_bitmap_max;
492 tss->io_bitmap_base = IO_BITMAP_OFFSET; 492 tss->io_bitmap_base = IO_BITMAP_OFFSET;
493 put_cpu(); 493 put_cpu();
494 return; 494 return;
495 } 495 }
496 put_cpu(); 496 put_cpu();
497 497
498 current->thread.error_code = error_code; 498 current->thread.error_code = error_code;
499 current->thread.trap_no = 13; 499 current->thread.trap_no = 13;
500 500
501 if (regs->eflags & VM_MASK) 501 if (regs->eflags & VM_MASK)
502 goto gp_in_vm86; 502 goto gp_in_vm86;
503 503
504 if (!user_mode(regs)) 504 if (!user_mode(regs))
505 goto gp_in_kernel; 505 goto gp_in_kernel;
506 506
507 current->thread.error_code = error_code; 507 current->thread.error_code = error_code;
508 current->thread.trap_no = 13; 508 current->thread.trap_no = 13;
509 force_sig(SIGSEGV, current); 509 force_sig(SIGSEGV, current);
510 return; 510 return;
511 511
512 gp_in_vm86: 512 gp_in_vm86:
513 local_irq_enable(); 513 local_irq_enable();
514 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); 514 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
515 return; 515 return;
516 516
517 gp_in_kernel: 517 gp_in_kernel:
518 if (!fixup_exception(regs)) { 518 if (!fixup_exception(regs)) {
519 if (notify_die(DIE_GPF, "general protection fault", regs, 519 if (notify_die(DIE_GPF, "general protection fault", regs,
520 error_code, 13, SIGSEGV) == NOTIFY_STOP) 520 error_code, 13, SIGSEGV) == NOTIFY_STOP)
521 return; 521 return;
522 die("general protection fault", regs, error_code); 522 die("general protection fault", regs, error_code);
523 } 523 }
524 } 524 }
525 525
526 static void mem_parity_error(unsigned char reason, struct pt_regs * regs) 526 static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
527 { 527 {
528 printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); 528 printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
529 printk("You probably have a hardware problem with your RAM chips\n"); 529 printk("You probably have a hardware problem with your RAM chips\n");
530 530
531 /* Clear and disable the memory parity error line. */ 531 /* Clear and disable the memory parity error line. */
532 clear_mem_error(reason); 532 clear_mem_error(reason);
533 } 533 }
534 534
535 static void io_check_error(unsigned char reason, struct pt_regs * regs) 535 static void io_check_error(unsigned char reason, struct pt_regs * regs)
536 { 536 {
537 unsigned long i; 537 unsigned long i;
538 538
539 printk("NMI: IOCK error (debug interrupt?)\n"); 539 printk("NMI: IOCK error (debug interrupt?)\n");
540 show_registers(regs); 540 show_registers(regs);
541 541
542 /* Re-enable the IOCK line, wait for a few seconds */ 542 /* Re-enable the IOCK line, wait for a few seconds */
543 reason = (reason & 0xf) | 8; 543 reason = (reason & 0xf) | 8;
544 outb(reason, 0x61); 544 outb(reason, 0x61);
545 i = 2000; 545 i = 2000;
546 while (--i) udelay(1000); 546 while (--i) udelay(1000);
547 reason &= ~8; 547 reason &= ~8;
548 outb(reason, 0x61); 548 outb(reason, 0x61);
549 } 549 }
550 550
551 static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) 551 static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
552 { 552 {
553 #ifdef CONFIG_MCA 553 #ifdef CONFIG_MCA
554 /* Might actually be able to figure out what the guilty party 554 /* Might actually be able to figure out what the guilty party
555 * is. */ 555 * is. */
556 if( MCA_bus ) { 556 if( MCA_bus ) {
557 mca_handle_nmi(); 557 mca_handle_nmi();
558 return; 558 return;
559 } 559 }
560 #endif 560 #endif
561 printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 561 printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
562 reason, smp_processor_id()); 562 reason, smp_processor_id());
563 printk("Dazed and confused, but trying to continue\n"); 563 printk("Dazed and confused, but trying to continue\n");
564 printk("Do you have a strange power saving mode enabled?\n"); 564 printk("Do you have a strange power saving mode enabled?\n");
565 } 565 }
566 566
567 static DEFINE_SPINLOCK(nmi_print_lock); 567 static DEFINE_SPINLOCK(nmi_print_lock);
568 568
569 void die_nmi (struct pt_regs *regs, const char *msg) 569 void die_nmi (struct pt_regs *regs, const char *msg)
570 { 570 {
571 spin_lock(&nmi_print_lock); 571 spin_lock(&nmi_print_lock);
572 /* 572 /*
573 * We are in trouble anyway, lets at least try 573 * We are in trouble anyway, lets at least try
574 * to get a message out. 574 * to get a message out.
575 */ 575 */
576 bust_spinlocks(1); 576 bust_spinlocks(1);
577 printk(msg); 577 printk(msg);
578 printk(" on CPU%d, eip %08lx, registers:\n", 578 printk(" on CPU%d, eip %08lx, registers:\n",
579 smp_processor_id(), regs->eip); 579 smp_processor_id(), regs->eip);
580 show_registers(regs); 580 show_registers(regs);
581 printk("console shuts up ...\n"); 581 printk("console shuts up ...\n");
582 console_silent(); 582 console_silent();
583 spin_unlock(&nmi_print_lock); 583 spin_unlock(&nmi_print_lock);
584 bust_spinlocks(0); 584 bust_spinlocks(0);
585 585
586 /* If we are in kernel we are probably nested up pretty bad 586 /* If we are in kernel we are probably nested up pretty bad
587 * and might aswell get out now while we still can. 587 * and might aswell get out now while we still can.
588 */ 588 */
589 if (!user_mode(regs)) { 589 if (!user_mode(regs)) {
590 current->thread.trap_no = 2; 590 current->thread.trap_no = 2;
591 crash_kexec(regs); 591 crash_kexec(regs);
592 } 592 }
593 593
594 do_exit(SIGSEGV); 594 do_exit(SIGSEGV);
595 } 595 }
596 596
597 static void default_do_nmi(struct pt_regs * regs) 597 static void default_do_nmi(struct pt_regs * regs)
598 { 598 {
599 unsigned char reason = 0; 599 unsigned char reason = 0;
600 600
601 /* Only the BSP gets external NMIs from the system. */ 601 /* Only the BSP gets external NMIs from the system. */
602 if (!smp_processor_id()) 602 if (!smp_processor_id())
603 reason = get_nmi_reason(); 603 reason = get_nmi_reason();
604 604
605 if (!(reason & 0xc0)) { 605 if (!(reason & 0xc0)) {
606 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT) 606 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT)
607 == NOTIFY_STOP) 607 == NOTIFY_STOP)
608 return; 608 return;
609 #ifdef CONFIG_X86_LOCAL_APIC 609 #ifdef CONFIG_X86_LOCAL_APIC
610 /* 610 /*
611 * Ok, so this is none of the documented NMI sources, 611 * Ok, so this is none of the documented NMI sources,
612 * so it must be the NMI watchdog. 612 * so it must be the NMI watchdog.
613 */ 613 */
614 if (nmi_watchdog) { 614 if (nmi_watchdog) {
615 nmi_watchdog_tick(regs); 615 nmi_watchdog_tick(regs);
616 return; 616 return;
617 } 617 }
618 #endif 618 #endif
619 unknown_nmi_error(reason, regs); 619 unknown_nmi_error(reason, regs);
620 return; 620 return;
621 } 621 }
622 if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP) 622 if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP)
623 return; 623 return;
624 if (reason & 0x80) 624 if (reason & 0x80)
625 mem_parity_error(reason, regs); 625 mem_parity_error(reason, regs);
626 if (reason & 0x40) 626 if (reason & 0x40)
627 io_check_error(reason, regs); 627 io_check_error(reason, regs);
628 /* 628 /*
629 * Reassert NMI in case it became active meanwhile 629 * Reassert NMI in case it became active meanwhile
630 * as it's edge-triggered. 630 * as it's edge-triggered.
631 */ 631 */
632 reassert_nmi(); 632 reassert_nmi();
633 } 633 }
634 634
635 static int dummy_nmi_callback(struct pt_regs * regs, int cpu) 635 static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
636 { 636 {
637 return 0; 637 return 0;
638 } 638 }
639 639
640 static nmi_callback_t nmi_callback = dummy_nmi_callback; 640 static nmi_callback_t nmi_callback = dummy_nmi_callback;
641 641
642 fastcall void do_nmi(struct pt_regs * regs, long error_code) 642 fastcall void do_nmi(struct pt_regs * regs, long error_code)
643 { 643 {
644 int cpu; 644 int cpu;
645 645
646 nmi_enter(); 646 nmi_enter();
647 647
648 cpu = smp_processor_id(); 648 cpu = smp_processor_id();
649 649
650 #ifdef CONFIG_HOTPLUG_CPU 650 #ifdef CONFIG_HOTPLUG_CPU
651 if (!cpu_online(cpu)) { 651 if (!cpu_online(cpu)) {
652 nmi_exit(); 652 nmi_exit();
653 return; 653 return;
654 } 654 }
655 #endif 655 #endif
656 656
657 ++nmi_count(cpu); 657 ++nmi_count(cpu);
658 658
659 if (!nmi_callback(regs, cpu)) 659 if (!nmi_callback(regs, cpu))
660 default_do_nmi(regs); 660 default_do_nmi(regs);
661 661
662 nmi_exit(); 662 nmi_exit();
663 } 663 }
664 664
665 void set_nmi_callback(nmi_callback_t callback) 665 void set_nmi_callback(nmi_callback_t callback)
666 { 666 {
667 nmi_callback = callback; 667 nmi_callback = callback;
668 } 668 }
669 EXPORT_SYMBOL_GPL(set_nmi_callback); 669 EXPORT_SYMBOL_GPL(set_nmi_callback);
670 670
671 void unset_nmi_callback(void) 671 void unset_nmi_callback(void)
672 { 672 {
673 nmi_callback = dummy_nmi_callback; 673 nmi_callback = dummy_nmi_callback;
674 } 674 }
675 EXPORT_SYMBOL_GPL(unset_nmi_callback); 675 EXPORT_SYMBOL_GPL(unset_nmi_callback);
676 676
677 #ifdef CONFIG_KPROBES 677 #ifdef CONFIG_KPROBES
678 fastcall void do_int3(struct pt_regs *regs, long error_code) 678 fastcall void do_int3(struct pt_regs *regs, long error_code)
679 { 679 {
680 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) 680 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
681 == NOTIFY_STOP) 681 == NOTIFY_STOP)
682 return; 682 return;
683 /* This is an interrupt gate, because kprobes wants interrupts 683 /* This is an interrupt gate, because kprobes wants interrupts
684 disabled. Normal trap handlers don't. */ 684 disabled. Normal trap handlers don't. */
685 restore_interrupts(regs); 685 restore_interrupts(regs);
686 do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); 686 do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
687 } 687 }
688 #endif 688 #endif
689 689
690 /* 690 /*
691 * Our handling of the processor debug registers is non-trivial. 691 * Our handling of the processor debug registers is non-trivial.
692 * We do not clear them on entry and exit from the kernel. Therefore 692 * We do not clear them on entry and exit from the kernel. Therefore
693 * it is possible to get a watchpoint trap here from inside the kernel. 693 * it is possible to get a watchpoint trap here from inside the kernel.
694 * However, the code in ./ptrace.c has ensured that the user can 694 * However, the code in ./ptrace.c has ensured that the user can
695 * only set watchpoints on userspace addresses. Therefore the in-kernel 695 * only set watchpoints on userspace addresses. Therefore the in-kernel
696 * watchpoint trap can only occur in code which is reading/writing 696 * watchpoint trap can only occur in code which is reading/writing
697 * from user space. Such code must not hold kernel locks (since it 697 * from user space. Such code must not hold kernel locks (since it
698 * can equally take a page fault), therefore it is safe to call 698 * can equally take a page fault), therefore it is safe to call
699 * force_sig_info even though that claims and releases locks. 699 * force_sig_info even though that claims and releases locks.
700 * 700 *
701 * Code in ./signal.c ensures that the debug control register 701 * Code in ./signal.c ensures that the debug control register
702 * is restored before we deliver any signal, and therefore that 702 * is restored before we deliver any signal, and therefore that
703 * user code runs with the correct debug control register even though 703 * user code runs with the correct debug control register even though
704 * we clear it here. 704 * we clear it here.
705 * 705 *
706 * Being careful here means that we don't have to be as careful in a 706 * Being careful here means that we don't have to be as careful in a
707 * lot of more complicated places (task switching can be a bit lazy 707 * lot of more complicated places (task switching can be a bit lazy
708 * about restoring all the debug state, and ptrace doesn't have to 708 * about restoring all the debug state, and ptrace doesn't have to
709 * find every occurrence of the TF bit that could be saved away even 709 * find every occurrence of the TF bit that could be saved away even
710 * by user code) 710 * by user code)
711 */ 711 */
712 fastcall void do_debug(struct pt_regs * regs, long error_code) 712 fastcall void do_debug(struct pt_regs * regs, long error_code)
713 { 713 {
714 unsigned int condition; 714 unsigned int condition;
715 struct task_struct *tsk = current; 715 struct task_struct *tsk = current;
716 716
717 get_debugreg(condition, 6); 717 get_debugreg(condition, 6);
718 718
719 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 719 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
720 SIGTRAP) == NOTIFY_STOP) 720 SIGTRAP) == NOTIFY_STOP)
721 return; 721 return;
722 /* It's safe to allow irq's after DR6 has been saved */ 722 /* It's safe to allow irq's after DR6 has been saved */
723 if (regs->eflags & X86_EFLAGS_IF) 723 if (regs->eflags & X86_EFLAGS_IF)
724 local_irq_enable(); 724 local_irq_enable();
725 725
726 /* Mask out spurious debug traps due to lazy DR7 setting */ 726 /* Mask out spurious debug traps due to lazy DR7 setting */
727 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { 727 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
728 if (!tsk->thread.debugreg[7]) 728 if (!tsk->thread.debugreg[7])
729 goto clear_dr7; 729 goto clear_dr7;
730 } 730 }
731 731
732 if (regs->eflags & VM_MASK) 732 if (regs->eflags & VM_MASK)
733 goto debug_vm86; 733 goto debug_vm86;
734 734
735 /* Save debug status register where ptrace can see it */ 735 /* Save debug status register where ptrace can see it */
736 tsk->thread.debugreg[6] = condition; 736 tsk->thread.debugreg[6] = condition;
737 737
738 /* 738 /*
739 * Single-stepping through TF: make sure we ignore any events in 739 * Single-stepping through TF: make sure we ignore any events in
740 * kernel space (but re-enable TF when returning to user mode). 740 * kernel space (but re-enable TF when returning to user mode).
741 */ 741 */
742 if (condition & DR_STEP) { 742 if (condition & DR_STEP) {
743 /* 743 /*
744 * We already checked v86 mode above, so we can 744 * We already checked v86 mode above, so we can
745 * check for kernel mode by just checking the CPL 745 * check for kernel mode by just checking the CPL
746 * of CS. 746 * of CS.
747 */ 747 */
748 if (!user_mode(regs)) 748 if (!user_mode(regs))
749 goto clear_TF_reenable; 749 goto clear_TF_reenable;
750 } 750 }
751 751
752 /* Ok, finally something we can handle */ 752 /* Ok, finally something we can handle */
753 send_sigtrap(tsk, regs, error_code); 753 send_sigtrap(tsk, regs, error_code);
754 754
755 /* Disable additional traps. They'll be re-enabled when 755 /* Disable additional traps. They'll be re-enabled when
756 * the signal is delivered. 756 * the signal is delivered.
757 */ 757 */
758 clear_dr7: 758 clear_dr7:
759 set_debugreg(0, 7); 759 set_debugreg(0, 7);
760 return; 760 return;
761 761
762 debug_vm86: 762 debug_vm86:
763 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); 763 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
764 return; 764 return;
765 765
766 clear_TF_reenable: 766 clear_TF_reenable:
767 set_tsk_thread_flag(tsk, TIF_SINGLESTEP); 767 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
768 regs->eflags &= ~TF_MASK; 768 regs->eflags &= ~TF_MASK;
769 return; 769 return;
770 } 770 }
771 771
772 /* 772 /*
773 * Note that we play around with the 'TS' bit in an attempt to get 773 * Note that we play around with the 'TS' bit in an attempt to get
774 * the correct behaviour even in the presence of the asynchronous 774 * the correct behaviour even in the presence of the asynchronous
775 * IRQ13 behaviour 775 * IRQ13 behaviour
776 */ 776 */
777 void math_error(void __user *eip) 777 void math_error(void __user *eip)
778 { 778 {
779 struct task_struct * task; 779 struct task_struct * task;
780 siginfo_t info; 780 siginfo_t info;
781 unsigned short cwd, swd; 781 unsigned short cwd, swd;
782 782
783 /* 783 /*
784 * Save the info for the exception handler and clear the error. 784 * Save the info for the exception handler and clear the error.
785 */ 785 */
786 task = current; 786 task = current;
787 save_init_fpu(task); 787 save_init_fpu(task);
788 task->thread.trap_no = 16; 788 task->thread.trap_no = 16;
789 task->thread.error_code = 0; 789 task->thread.error_code = 0;
790 info.si_signo = SIGFPE; 790 info.si_signo = SIGFPE;
791 info.si_errno = 0; 791 info.si_errno = 0;
792 info.si_code = __SI_FAULT; 792 info.si_code = __SI_FAULT;
793 info.si_addr = eip; 793 info.si_addr = eip;
794 /* 794 /*
795 * (~cwd & swd) will mask out exceptions that are not set to unmasked 795 * (~cwd & swd) will mask out exceptions that are not set to unmasked
796 * status. 0x3f is the exception bits in these regs, 0x200 is the 796 * status. 0x3f is the exception bits in these regs, 0x200 is the
797 * C1 reg you need in case of a stack fault, 0x040 is the stack 797 * C1 reg you need in case of a stack fault, 0x040 is the stack
798 * fault bit. We should only be taking one exception at a time, 798 * fault bit. We should only be taking one exception at a time,
799 * so if this combination doesn't produce any single exception, 799 * so if this combination doesn't produce any single exception,
800 * then we have a bad program that isn't syncronizing its FPU usage 800 * then we have a bad program that isn't syncronizing its FPU usage
801 * and it will suffer the consequences since we won't be able to 801 * and it will suffer the consequences since we won't be able to
802 * fully reproduce the context of the exception 802 * fully reproduce the context of the exception
803 */ 803 */
804 cwd = get_fpu_cwd(task); 804 cwd = get_fpu_cwd(task);
805 swd = get_fpu_swd(task); 805 swd = get_fpu_swd(task);
806 switch (swd & ~cwd & 0x3f) { 806 switch (swd & ~cwd & 0x3f) {
807 case 0x000: 807 case 0x000:
808 default: 808 default:
809 break; 809 break;
810 case 0x001: /* Invalid Op */ 810 case 0x001: /* Invalid Op */
811 /* 811 /*
812 * swd & 0x240 == 0x040: Stack Underflow 812 * swd & 0x240 == 0x040: Stack Underflow
813 * swd & 0x240 == 0x240: Stack Overflow 813 * swd & 0x240 == 0x240: Stack Overflow
814 * User must clear the SF bit (0x40) if set 814 * User must clear the SF bit (0x40) if set
815 */ 815 */
816 info.si_code = FPE_FLTINV; 816 info.si_code = FPE_FLTINV;
817 break; 817 break;
818 case 0x002: /* Denormalize */ 818 case 0x002: /* Denormalize */
819 case 0x010: /* Underflow */ 819 case 0x010: /* Underflow */
820 info.si_code = FPE_FLTUND; 820 info.si_code = FPE_FLTUND;
821 break; 821 break;
822 case 0x004: /* Zero Divide */ 822 case 0x004: /* Zero Divide */
823 info.si_code = FPE_FLTDIV; 823 info.si_code = FPE_FLTDIV;
824 break; 824 break;
825 case 0x008: /* Overflow */ 825 case 0x008: /* Overflow */
826 info.si_code = FPE_FLTOVF; 826 info.si_code = FPE_FLTOVF;
827 break; 827 break;
828 case 0x020: /* Precision */ 828 case 0x020: /* Precision */
829 info.si_code = FPE_FLTRES; 829 info.si_code = FPE_FLTRES;
830 break; 830 break;
831 } 831 }
832 force_sig_info(SIGFPE, &info, task); 832 force_sig_info(SIGFPE, &info, task);
833 } 833 }
834 834
835 fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code) 835 fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code)
836 { 836 {
837 ignore_fpu_irq = 1; 837 ignore_fpu_irq = 1;
838 math_error((void __user *)regs->eip); 838 math_error((void __user *)regs->eip);
839 } 839 }
840 840
841 static void simd_math_error(void __user *eip) 841 static void simd_math_error(void __user *eip)
842 { 842 {
843 struct task_struct * task; 843 struct task_struct * task;
844 siginfo_t info; 844 siginfo_t info;
845 unsigned short mxcsr; 845 unsigned short mxcsr;
846 846
847 /* 847 /*
848 * Save the info for the exception handler and clear the error. 848 * Save the info for the exception handler and clear the error.
849 */ 849 */
850 task = current; 850 task = current;
851 save_init_fpu(task); 851 save_init_fpu(task);
852 task->thread.trap_no = 19; 852 task->thread.trap_no = 19;
853 task->thread.error_code = 0; 853 task->thread.error_code = 0;
854 info.si_signo = SIGFPE; 854 info.si_signo = SIGFPE;
855 info.si_errno = 0; 855 info.si_errno = 0;
856 info.si_code = __SI_FAULT; 856 info.si_code = __SI_FAULT;
857 info.si_addr = eip; 857 info.si_addr = eip;
858 /* 858 /*
859 * The SIMD FPU exceptions are handled a little differently, as there 859 * The SIMD FPU exceptions are handled a little differently, as there
860 * is only a single status/control register. Thus, to determine which 860 * is only a single status/control register. Thus, to determine which
861 * unmasked exception was caught we must mask the exception mask bits 861 * unmasked exception was caught we must mask the exception mask bits
862 * at 0x1f80, and then use these to mask the exception bits at 0x3f. 862 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
863 */ 863 */
864 mxcsr = get_fpu_mxcsr(task); 864 mxcsr = get_fpu_mxcsr(task);
865 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { 865 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
866 case 0x000: 866 case 0x000:
867 default: 867 default:
868 break; 868 break;
869 case 0x001: /* Invalid Op */ 869 case 0x001: /* Invalid Op */
870 info.si_code = FPE_FLTINV; 870 info.si_code = FPE_FLTINV;
871 break; 871 break;
872 case 0x002: /* Denormalize */ 872 case 0x002: /* Denormalize */
873 case 0x010: /* Underflow */ 873 case 0x010: /* Underflow */
874 info.si_code = FPE_FLTUND; 874 info.si_code = FPE_FLTUND;
875 break; 875 break;
876 case 0x004: /* Zero Divide */ 876 case 0x004: /* Zero Divide */
877 info.si_code = FPE_FLTDIV; 877 info.si_code = FPE_FLTDIV;
878 break; 878 break;
879 case 0x008: /* Overflow */ 879 case 0x008: /* Overflow */
880 info.si_code = FPE_FLTOVF; 880 info.si_code = FPE_FLTOVF;
881 break; 881 break;
882 case 0x020: /* Precision */ 882 case 0x020: /* Precision */
883 info.si_code = FPE_FLTRES; 883 info.si_code = FPE_FLTRES;
884 break; 884 break;
885 } 885 }
886 force_sig_info(SIGFPE, &info, task); 886 force_sig_info(SIGFPE, &info, task);
887 } 887 }
888 888
889 fastcall void do_simd_coprocessor_error(struct pt_regs * regs, 889 fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
890 long error_code) 890 long error_code)
891 { 891 {
892 if (cpu_has_xmm) { 892 if (cpu_has_xmm) {
893 /* Handle SIMD FPU exceptions on PIII+ processors. */ 893 /* Handle SIMD FPU exceptions on PIII+ processors. */
894 ignore_fpu_irq = 1; 894 ignore_fpu_irq = 1;
895 simd_math_error((void __user *)regs->eip); 895 simd_math_error((void __user *)regs->eip);
896 } else { 896 } else {
897 /* 897 /*
898 * Handle strange cache flush from user space exception 898 * Handle strange cache flush from user space exception
899 * in all other cases. This is undocumented behaviour. 899 * in all other cases. This is undocumented behaviour.
900 */ 900 */
901 if (regs->eflags & VM_MASK) { 901 if (regs->eflags & VM_MASK) {
902 handle_vm86_fault((struct kernel_vm86_regs *)regs, 902 handle_vm86_fault((struct kernel_vm86_regs *)regs,
903 error_code); 903 error_code);
904 return; 904 return;
905 } 905 }
906 current->thread.trap_no = 19; 906 current->thread.trap_no = 19;
907 current->thread.error_code = error_code; 907 current->thread.error_code = error_code;
908 die_if_kernel("cache flush denied", regs, error_code); 908 die_if_kernel("cache flush denied", regs, error_code);
909 force_sig(SIGSEGV, current); 909 force_sig(SIGSEGV, current);
910 } 910 }
911 } 911 }
912 912
913 fastcall void do_spurious_interrupt_bug(struct pt_regs * regs, 913 fastcall void do_spurious_interrupt_bug(struct pt_regs * regs,
914 long error_code) 914 long error_code)
915 { 915 {
916 #if 0 916 #if 0
917 /* No need to warn about this any longer. */ 917 /* No need to warn about this any longer. */
918 printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); 918 printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
919 #endif 919 #endif
920 } 920 }
921 921
922 fastcall void setup_x86_bogus_stack(unsigned char * stk) 922 fastcall void setup_x86_bogus_stack(unsigned char * stk)
923 { 923 {
924 unsigned long *switch16_ptr, *switch32_ptr; 924 unsigned long *switch16_ptr, *switch32_ptr;
925 struct pt_regs *regs; 925 struct pt_regs *regs;
926 unsigned long stack_top, stack_bot; 926 unsigned long stack_top, stack_bot;
927 unsigned short iret_frame16_off; 927 unsigned short iret_frame16_off;
928 int cpu = smp_processor_id(); 928 int cpu = smp_processor_id();
929 /* reserve the space on 32bit stack for the magic switch16 pointer */ 929 /* reserve the space on 32bit stack for the magic switch16 pointer */
930 memmove(stk, stk + 8, sizeof(struct pt_regs)); 930 memmove(stk, stk + 8, sizeof(struct pt_regs));
931 switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs)); 931 switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
932 regs = (struct pt_regs *)stk; 932 regs = (struct pt_regs *)stk;
933 /* now the switch32 on 16bit stack */ 933 /* now the switch32 on 16bit stack */
934 stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); 934 stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
935 stack_top = stack_bot + CPU_16BIT_STACK_SIZE; 935 stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
936 switch32_ptr = (unsigned long *)(stack_top - 8); 936 switch32_ptr = (unsigned long *)(stack_top - 8);
937 iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20; 937 iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
938 /* copy iret frame on 16bit stack */ 938 /* copy iret frame on 16bit stack */
939 memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20); 939 memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20);
940 /* fill in the switch pointers */ 940 /* fill in the switch pointers */
941 switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off; 941 switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
942 switch16_ptr[1] = __ESPFIX_SS; 942 switch16_ptr[1] = __ESPFIX_SS;
943 switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) + 943 switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
944 8 - CPU_16BIT_STACK_SIZE; 944 8 - CPU_16BIT_STACK_SIZE;
945 switch32_ptr[1] = __KERNEL_DS; 945 switch32_ptr[1] = __KERNEL_DS;
946 } 946 }
947 947
948 fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp) 948 fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
949 { 949 {
950 unsigned long *switch32_ptr; 950 unsigned long *switch32_ptr;
951 unsigned char *stack16, *stack32; 951 unsigned char *stack16, *stack32;
952 unsigned long stack_top, stack_bot; 952 unsigned long stack_top, stack_bot;
953 int len; 953 int len;
954 int cpu = smp_processor_id(); 954 int cpu = smp_processor_id();
955 stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); 955 stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
956 stack_top = stack_bot + CPU_16BIT_STACK_SIZE; 956 stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
957 switch32_ptr = (unsigned long *)(stack_top - 8); 957 switch32_ptr = (unsigned long *)(stack_top - 8);
958 /* copy the data from 16bit stack to 32bit stack */ 958 /* copy the data from 16bit stack to 32bit stack */
959 len = CPU_16BIT_STACK_SIZE - 8 - sp; 959 len = CPU_16BIT_STACK_SIZE - 8 - sp;
960 stack16 = (unsigned char *)(stack_bot + sp); 960 stack16 = (unsigned char *)(stack_bot + sp);
961 stack32 = (unsigned char *) 961 stack32 = (unsigned char *)
962 (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len); 962 (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
963 memcpy(stack32, stack16, len); 963 memcpy(stack32, stack16, len);
964 return stack32; 964 return stack32;
965 } 965 }
966 966
967 /* 967 /*
968 * 'math_state_restore()' saves the current math information in the 968 * 'math_state_restore()' saves the current math information in the
969 * old math state array, and gets the new ones from the current task 969 * old math state array, and gets the new ones from the current task
970 * 970 *
971 * Careful.. There are problems with IBM-designed IRQ13 behaviour. 971 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
972 * Don't touch unless you *really* know how it works. 972 * Don't touch unless you *really* know how it works.
973 * 973 *
974 * Must be called with kernel preemption disabled (in this case, 974 * Must be called with kernel preemption disabled (in this case,
975 * local interrupts are disabled at the call-site in entry.S). 975 * local interrupts are disabled at the call-site in entry.S).
976 */ 976 */
977 asmlinkage void math_state_restore(struct pt_regs regs) 977 asmlinkage void math_state_restore(struct pt_regs regs)
978 { 978 {
979 struct thread_info *thread = current_thread_info(); 979 struct thread_info *thread = current_thread_info();
980 struct task_struct *tsk = thread->task; 980 struct task_struct *tsk = thread->task;
981 981
982 clts(); /* Allow maths ops (or we recurse) */ 982 clts(); /* Allow maths ops (or we recurse) */
983 if (!tsk_used_math(tsk)) 983 if (!tsk_used_math(tsk))
984 init_fpu(tsk); 984 init_fpu(tsk);
985 restore_fpu(tsk); 985 restore_fpu(tsk);
986 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ 986 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
987 } 987 }
988 988
989 #ifndef CONFIG_MATH_EMULATION 989 #ifndef CONFIG_MATH_EMULATION
990 990
991 asmlinkage void math_emulate(long arg) 991 asmlinkage void math_emulate(long arg)
992 { 992 {
993 printk("math-emulation not enabled and no coprocessor found.\n"); 993 printk("math-emulation not enabled and no coprocessor found.\n");
994 printk("killing %s.\n",current->comm); 994 printk("killing %s.\n",current->comm);
995 force_sig(SIGFPE,current); 995 force_sig(SIGFPE,current);
996 schedule(); 996 schedule();
997 } 997 }
998 998
999 #endif /* CONFIG_MATH_EMULATION */ 999 #endif /* CONFIG_MATH_EMULATION */
1000 1000
1001 #ifdef CONFIG_X86_F00F_BUG 1001 #ifdef CONFIG_X86_F00F_BUG
1002 void __init trap_init_f00f_bug(void) 1002 void __init trap_init_f00f_bug(void)
1003 { 1003 {
1004 __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); 1004 __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
1005 1005
1006 /* 1006 /*
1007 * Update the IDT descriptor and reload the IDT so that 1007 * Update the IDT descriptor and reload the IDT so that
1008 * it uses the read-only mapped virtual address. 1008 * it uses the read-only mapped virtual address.
1009 */ 1009 */
1010 idt_descr.address = fix_to_virt(FIX_F00F_IDT); 1010 idt_descr.address = fix_to_virt(FIX_F00F_IDT);
1011 __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); 1011 load_idt(&idt_descr);
1012 } 1012 }
1013 #endif 1013 #endif
1014 1014
1015 #define _set_gate(gate_addr,type,dpl,addr,seg) \ 1015 #define _set_gate(gate_addr,type,dpl,addr,seg) \
1016 do { \ 1016 do { \
1017 int __d0, __d1; \ 1017 int __d0, __d1; \
1018 __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ 1018 __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
1019 "movw %4,%%dx\n\t" \ 1019 "movw %4,%%dx\n\t" \
1020 "movl %%eax,%0\n\t" \ 1020 "movl %%eax,%0\n\t" \
1021 "movl %%edx,%1" \ 1021 "movl %%edx,%1" \
1022 :"=m" (*((long *) (gate_addr))), \ 1022 :"=m" (*((long *) (gate_addr))), \
1023 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ 1023 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
1024 :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ 1024 :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
1025 "3" ((char *) (addr)),"2" ((seg) << 16)); \ 1025 "3" ((char *) (addr)),"2" ((seg) << 16)); \
1026 } while (0) 1026 } while (0)
1027 1027
1028 1028
1029 /* 1029 /*
1030 * This needs to use 'idt_table' rather than 'idt', and 1030 * This needs to use 'idt_table' rather than 'idt', and
1031 * thus use the _nonmapped_ version of the IDT, as the 1031 * thus use the _nonmapped_ version of the IDT, as the
1032 * Pentium F0 0F bugfix can have resulted in the mapped 1032 * Pentium F0 0F bugfix can have resulted in the mapped
1033 * IDT being write-protected. 1033 * IDT being write-protected.
1034 */ 1034 */
1035 void set_intr_gate(unsigned int n, void *addr) 1035 void set_intr_gate(unsigned int n, void *addr)
1036 { 1036 {
1037 _set_gate(idt_table+n,14,0,addr,__KERNEL_CS); 1037 _set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
1038 } 1038 }
1039 1039
1040 /* 1040 /*
1041 * This routine sets up an interrupt gate at directory privilege level 3. 1041 * This routine sets up an interrupt gate at directory privilege level 3.
1042 */ 1042 */
1043 static inline void set_system_intr_gate(unsigned int n, void *addr) 1043 static inline void set_system_intr_gate(unsigned int n, void *addr)
1044 { 1044 {
1045 _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS); 1045 _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS);
1046 } 1046 }
1047 1047
1048 static void __init set_trap_gate(unsigned int n, void *addr) 1048 static void __init set_trap_gate(unsigned int n, void *addr)
1049 { 1049 {
1050 _set_gate(idt_table+n,15,0,addr,__KERNEL_CS); 1050 _set_gate(idt_table+n,15,0,addr,__KERNEL_CS);
1051 } 1051 }
1052 1052
1053 static void __init set_system_gate(unsigned int n, void *addr) 1053 static void __init set_system_gate(unsigned int n, void *addr)
1054 { 1054 {
1055 _set_gate(idt_table+n,15,3,addr,__KERNEL_CS); 1055 _set_gate(idt_table+n,15,3,addr,__KERNEL_CS);
1056 } 1056 }
1057 1057
1058 static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) 1058 static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
1059 { 1059 {
1060 _set_gate(idt_table+n,5,0,0,(gdt_entry<<3)); 1060 _set_gate(idt_table+n,5,0,0,(gdt_entry<<3));
1061 } 1061 }
1062 1062
1063 1063
1064 void __init trap_init(void) 1064 void __init trap_init(void)
1065 { 1065 {
1066 #ifdef CONFIG_EISA 1066 #ifdef CONFIG_EISA
1067 void __iomem *p = ioremap(0x0FFFD9, 4); 1067 void __iomem *p = ioremap(0x0FFFD9, 4);
1068 if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) { 1068 if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) {
1069 EISA_bus = 1; 1069 EISA_bus = 1;
1070 } 1070 }
1071 iounmap(p); 1071 iounmap(p);
1072 #endif 1072 #endif
1073 1073
1074 #ifdef CONFIG_X86_LOCAL_APIC 1074 #ifdef CONFIG_X86_LOCAL_APIC
1075 init_apic_mappings(); 1075 init_apic_mappings();
1076 #endif 1076 #endif
1077 1077
1078 set_trap_gate(0,&divide_error); 1078 set_trap_gate(0,&divide_error);
1079 set_intr_gate(1,&debug); 1079 set_intr_gate(1,&debug);
1080 set_intr_gate(2,&nmi); 1080 set_intr_gate(2,&nmi);
1081 set_system_intr_gate(3, &int3); /* int3-5 can be called from all */ 1081 set_system_intr_gate(3, &int3); /* int3-5 can be called from all */
1082 set_system_gate(4,&overflow); 1082 set_system_gate(4,&overflow);
1083 set_system_gate(5,&bounds); 1083 set_system_gate(5,&bounds);
1084 set_trap_gate(6,&invalid_op); 1084 set_trap_gate(6,&invalid_op);
1085 set_trap_gate(7,&device_not_available); 1085 set_trap_gate(7,&device_not_available);
1086 set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS); 1086 set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS);
1087 set_trap_gate(9,&coprocessor_segment_overrun); 1087 set_trap_gate(9,&coprocessor_segment_overrun);
1088 set_trap_gate(10,&invalid_TSS); 1088 set_trap_gate(10,&invalid_TSS);
1089 set_trap_gate(11,&segment_not_present); 1089 set_trap_gate(11,&segment_not_present);
1090 set_trap_gate(12,&stack_segment); 1090 set_trap_gate(12,&stack_segment);
1091 set_trap_gate(13,&general_protection); 1091 set_trap_gate(13,&general_protection);
1092 set_intr_gate(14,&page_fault); 1092 set_intr_gate(14,&page_fault);
1093 set_trap_gate(15,&spurious_interrupt_bug); 1093 set_trap_gate(15,&spurious_interrupt_bug);
1094 set_trap_gate(16,&coprocessor_error); 1094 set_trap_gate(16,&coprocessor_error);
1095 set_trap_gate(17,&alignment_check); 1095 set_trap_gate(17,&alignment_check);
1096 #ifdef CONFIG_X86_MCE 1096 #ifdef CONFIG_X86_MCE
1097 set_trap_gate(18,&machine_check); 1097 set_trap_gate(18,&machine_check);
1098 #endif 1098 #endif
1099 set_trap_gate(19,&simd_coprocessor_error); 1099 set_trap_gate(19,&simd_coprocessor_error);
1100 1100
1101 set_system_gate(SYSCALL_VECTOR,&system_call); 1101 set_system_gate(SYSCALL_VECTOR,&system_call);
1102 1102
1103 /* 1103 /*
1104 * Should be a barrier for any external CPU state. 1104 * Should be a barrier for any external CPU state.
1105 */ 1105 */
1106 cpu_init(); 1106 cpu_init();
1107 1107
1108 trap_init_hook(); 1108 trap_init_hook();
1109 } 1109 }
1110 1110
1111 static int __init kstack_setup(char *s) 1111 static int __init kstack_setup(char *s)
1112 { 1112 {
1113 kstack_depth_to_print = simple_strtoul(s, NULL, 0); 1113 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
1114 return 0; 1114 return 0;
1115 } 1115 }
1116 __setup("kstack=", kstack_setup); 1116 __setup("kstack=", kstack_setup);
1117 1117
arch/i386/kernel/vm86.c
1 /* 1 /*
2 * linux/kernel/vm86.c 2 * linux/kernel/vm86.c
3 * 3 *
4 * Copyright (C) 1994 Linus Torvalds 4 * Copyright (C) 1994 Linus Torvalds
5 * 5 *
6 * 29 dec 2001 - Fixed oopses caused by unchecked access to the vm86 6 * 29 dec 2001 - Fixed oopses caused by unchecked access to the vm86
7 * stack - Manfred Spraul <manfreds@colorfullife.com> 7 * stack - Manfred Spraul <manfreds@colorfullife.com>
8 * 8 *
9 * 22 mar 2002 - Manfred detected the stackfaults, but didn't handle 9 * 22 mar 2002 - Manfred detected the stackfaults, but didn't handle
10 * them correctly. Now the emulation will be in a 10 * them correctly. Now the emulation will be in a
11 * consistent state after stackfaults - Kasper Dupont 11 * consistent state after stackfaults - Kasper Dupont
12 * <kasperd@daimi.au.dk> 12 * <kasperd@daimi.au.dk>
13 * 13 *
14 * 22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont 14 * 22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont
15 * <kasperd@daimi.au.dk> 15 * <kasperd@daimi.au.dk>
16 * 16 *
17 * ?? ??? 2002 - Fixed premature returns from handle_vm86_fault 17 * ?? ??? 2002 - Fixed premature returns from handle_vm86_fault
18 * caused by Kasper Dupont's changes - Stas Sergeev 18 * caused by Kasper Dupont's changes - Stas Sergeev
19 * 19 *
20 * 4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes. 20 * 4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes.
21 * Kasper Dupont <kasperd@daimi.au.dk> 21 * Kasper Dupont <kasperd@daimi.au.dk>
22 * 22 *
23 * 9 apr 2002 - Changed syntax of macros in handle_vm86_fault. 23 * 9 apr 2002 - Changed syntax of macros in handle_vm86_fault.
24 * Kasper Dupont <kasperd@daimi.au.dk> 24 * Kasper Dupont <kasperd@daimi.au.dk>
25 * 25 *
26 * 9 apr 2002 - Changed stack access macros to jump to a label 26 * 9 apr 2002 - Changed stack access macros to jump to a label
27 * instead of returning to userspace. This simplifies 27 * instead of returning to userspace. This simplifies
28 * do_int, and is needed by handle_vm6_fault. Kasper 28 * do_int, and is needed by handle_vm6_fault. Kasper
29 * Dupont <kasperd@daimi.au.dk> 29 * Dupont <kasperd@daimi.au.dk>
30 * 30 *
31 */ 31 */
32 32
33 #include <linux/config.h> 33 #include <linux/config.h>
34 #include <linux/errno.h> 34 #include <linux/errno.h>
35 #include <linux/interrupt.h> 35 #include <linux/interrupt.h>
36 #include <linux/sched.h> 36 #include <linux/sched.h>
37 #include <linux/kernel.h> 37 #include <linux/kernel.h>
38 #include <linux/signal.h> 38 #include <linux/signal.h>
39 #include <linux/string.h> 39 #include <linux/string.h>
40 #include <linux/mm.h> 40 #include <linux/mm.h>
41 #include <linux/smp.h> 41 #include <linux/smp.h>
42 #include <linux/smp_lock.h> 42 #include <linux/smp_lock.h>
43 #include <linux/highmem.h> 43 #include <linux/highmem.h>
44 #include <linux/ptrace.h> 44 #include <linux/ptrace.h>
45 45
46 #include <asm/uaccess.h> 46 #include <asm/uaccess.h>
47 #include <asm/io.h> 47 #include <asm/io.h>
48 #include <asm/tlbflush.h> 48 #include <asm/tlbflush.h>
49 #include <asm/irq.h> 49 #include <asm/irq.h>
50 50
51 /* 51 /*
52 * Known problems: 52 * Known problems:
53 * 53 *
54 * Interrupt handling is not guaranteed: 54 * Interrupt handling is not guaranteed:
55 * - a real x86 will disable all interrupts for one instruction 55 * - a real x86 will disable all interrupts for one instruction
56 * after a "mov ss,xx" to make stack handling atomic even without 56 * after a "mov ss,xx" to make stack handling atomic even without
57 * the 'lss' instruction. We can't guarantee this in v86 mode, 57 * the 'lss' instruction. We can't guarantee this in v86 mode,
58 * as the next instruction might result in a page fault or similar. 58 * as the next instruction might result in a page fault or similar.
59 * - a real x86 will have interrupts disabled for one instruction 59 * - a real x86 will have interrupts disabled for one instruction
60 * past the 'sti' that enables them. We don't bother with all the 60 * past the 'sti' that enables them. We don't bother with all the
61 * details yet. 61 * details yet.
62 * 62 *
63 * Let's hope these problems do not actually matter for anything. 63 * Let's hope these problems do not actually matter for anything.
64 */ 64 */
65 65
66 66
67 #define KVM86 ((struct kernel_vm86_struct *)regs) 67 #define KVM86 ((struct kernel_vm86_struct *)regs)
68 #define VMPI KVM86->vm86plus 68 #define VMPI KVM86->vm86plus
69 69
70 70
71 /* 71 /*
72 * 8- and 16-bit register defines.. 72 * 8- and 16-bit register defines..
73 */ 73 */
74 #define AL(regs) (((unsigned char *)&((regs)->eax))[0]) 74 #define AL(regs) (((unsigned char *)&((regs)->eax))[0])
75 #define AH(regs) (((unsigned char *)&((regs)->eax))[1]) 75 #define AH(regs) (((unsigned char *)&((regs)->eax))[1])
76 #define IP(regs) (*(unsigned short *)&((regs)->eip)) 76 #define IP(regs) (*(unsigned short *)&((regs)->eip))
77 #define SP(regs) (*(unsigned short *)&((regs)->esp)) 77 #define SP(regs) (*(unsigned short *)&((regs)->esp))
78 78
79 /* 79 /*
80 * virtual flags (16 and 32-bit versions) 80 * virtual flags (16 and 32-bit versions)
81 */ 81 */
82 #define VFLAGS (*(unsigned short *)&(current->thread.v86flags)) 82 #define VFLAGS (*(unsigned short *)&(current->thread.v86flags))
83 #define VEFLAGS (current->thread.v86flags) 83 #define VEFLAGS (current->thread.v86flags)
84 84
85 #define set_flags(X,new,mask) \ 85 #define set_flags(X,new,mask) \
86 ((X) = ((X) & ~(mask)) | ((new) & (mask))) 86 ((X) = ((X) & ~(mask)) | ((new) & (mask)))
87 87
88 #define SAFE_MASK (0xDD5) 88 #define SAFE_MASK (0xDD5)
89 #define RETURN_MASK (0xDFF) 89 #define RETURN_MASK (0xDFF)
90 90
91 #define VM86_REGS_PART2 orig_eax 91 #define VM86_REGS_PART2 orig_eax
92 #define VM86_REGS_SIZE1 \ 92 #define VM86_REGS_SIZE1 \
93 ( (unsigned)( & (((struct kernel_vm86_regs *)0)->VM86_REGS_PART2) ) ) 93 ( (unsigned)( & (((struct kernel_vm86_regs *)0)->VM86_REGS_PART2) ) )
94 #define VM86_REGS_SIZE2 (sizeof(struct kernel_vm86_regs) - VM86_REGS_SIZE1) 94 #define VM86_REGS_SIZE2 (sizeof(struct kernel_vm86_regs) - VM86_REGS_SIZE1)
95 95
96 struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs)); 96 struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs));
97 struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) 97 struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
98 { 98 {
99 struct tss_struct *tss; 99 struct tss_struct *tss;
100 struct pt_regs *ret; 100 struct pt_regs *ret;
101 unsigned long tmp; 101 unsigned long tmp;
102 102
103 /* 103 /*
104 * This gets called from entry.S with interrupts disabled, but 104 * This gets called from entry.S with interrupts disabled, but
105 * from process context. Enable interrupts here, before trying 105 * from process context. Enable interrupts here, before trying
106 * to access user space. 106 * to access user space.
107 */ 107 */
108 local_irq_enable(); 108 local_irq_enable();
109 109
110 if (!current->thread.vm86_info) { 110 if (!current->thread.vm86_info) {
111 printk("no vm86_info: BAD\n"); 111 printk("no vm86_info: BAD\n");
112 do_exit(SIGSEGV); 112 do_exit(SIGSEGV);
113 } 113 }
114 set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->thread.v86mask); 114 set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->thread.v86mask);
115 tmp = copy_to_user(&current->thread.vm86_info->regs,regs, VM86_REGS_SIZE1); 115 tmp = copy_to_user(&current->thread.vm86_info->regs,regs, VM86_REGS_SIZE1);
116 tmp += copy_to_user(&current->thread.vm86_info->regs.VM86_REGS_PART2, 116 tmp += copy_to_user(&current->thread.vm86_info->regs.VM86_REGS_PART2,
117 &regs->VM86_REGS_PART2, VM86_REGS_SIZE2); 117 &regs->VM86_REGS_PART2, VM86_REGS_SIZE2);
118 tmp += put_user(current->thread.screen_bitmap,&current->thread.vm86_info->screen_bitmap); 118 tmp += put_user(current->thread.screen_bitmap,&current->thread.vm86_info->screen_bitmap);
119 if (tmp) { 119 if (tmp) {
120 printk("vm86: could not access userspace vm86_info\n"); 120 printk("vm86: could not access userspace vm86_info\n");
121 do_exit(SIGSEGV); 121 do_exit(SIGSEGV);
122 } 122 }
123 123
124 tss = &per_cpu(init_tss, get_cpu()); 124 tss = &per_cpu(init_tss, get_cpu());
125 current->thread.esp0 = current->thread.saved_esp0; 125 current->thread.esp0 = current->thread.saved_esp0;
126 current->thread.sysenter_cs = __KERNEL_CS; 126 current->thread.sysenter_cs = __KERNEL_CS;
127 load_esp0(tss, &current->thread); 127 load_esp0(tss, &current->thread);
128 current->thread.saved_esp0 = 0; 128 current->thread.saved_esp0 = 0;
129 put_cpu(); 129 put_cpu();
130 130
131 loadsegment(fs, current->thread.saved_fs); 131 loadsegment(fs, current->thread.saved_fs);
132 loadsegment(gs, current->thread.saved_gs); 132 loadsegment(gs, current->thread.saved_gs);
133 ret = KVM86->regs32; 133 ret = KVM86->regs32;
134 return ret; 134 return ret;
135 } 135 }
136 136
137 static void mark_screen_rdonly(struct task_struct * tsk) 137 static void mark_screen_rdonly(struct task_struct * tsk)
138 { 138 {
139 pgd_t *pgd; 139 pgd_t *pgd;
140 pud_t *pud; 140 pud_t *pud;
141 pmd_t *pmd; 141 pmd_t *pmd;
142 pte_t *pte, *mapped; 142 pte_t *pte, *mapped;
143 int i; 143 int i;
144 144
145 preempt_disable(); 145 preempt_disable();
146 spin_lock(&tsk->mm->page_table_lock); 146 spin_lock(&tsk->mm->page_table_lock);
147 pgd = pgd_offset(tsk->mm, 0xA0000); 147 pgd = pgd_offset(tsk->mm, 0xA0000);
148 if (pgd_none_or_clear_bad(pgd)) 148 if (pgd_none_or_clear_bad(pgd))
149 goto out; 149 goto out;
150 pud = pud_offset(pgd, 0xA0000); 150 pud = pud_offset(pgd, 0xA0000);
151 if (pud_none_or_clear_bad(pud)) 151 if (pud_none_or_clear_bad(pud))
152 goto out; 152 goto out;
153 pmd = pmd_offset(pud, 0xA0000); 153 pmd = pmd_offset(pud, 0xA0000);
154 if (pmd_none_or_clear_bad(pmd)) 154 if (pmd_none_or_clear_bad(pmd))
155 goto out; 155 goto out;
156 pte = mapped = pte_offset_map(pmd, 0xA0000); 156 pte = mapped = pte_offset_map(pmd, 0xA0000);
157 for (i = 0; i < 32; i++) { 157 for (i = 0; i < 32; i++) {
158 if (pte_present(*pte)) 158 if (pte_present(*pte))
159 set_pte(pte, pte_wrprotect(*pte)); 159 set_pte(pte, pte_wrprotect(*pte));
160 pte++; 160 pte++;
161 } 161 }
162 pte_unmap(mapped); 162 pte_unmap(mapped);
163 out: 163 out:
164 spin_unlock(&tsk->mm->page_table_lock); 164 spin_unlock(&tsk->mm->page_table_lock);
165 preempt_enable(); 165 preempt_enable();
166 flush_tlb(); 166 flush_tlb();
167 } 167 }
168 168
169 169
170 170
171 static int do_vm86_irq_handling(int subfunction, int irqnumber); 171 static int do_vm86_irq_handling(int subfunction, int irqnumber);
172 static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); 172 static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
173 173
174 asmlinkage int sys_vm86old(struct pt_regs regs) 174 asmlinkage int sys_vm86old(struct pt_regs regs)
175 { 175 {
176 struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.ebx; 176 struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.ebx;
177 struct kernel_vm86_struct info; /* declare this _on top_, 177 struct kernel_vm86_struct info; /* declare this _on top_,
178 * this avoids wasting of stack space. 178 * this avoids wasting of stack space.
179 * This remains on the stack until we 179 * This remains on the stack until we
180 * return to 32 bit user space. 180 * return to 32 bit user space.
181 */ 181 */
182 struct task_struct *tsk; 182 struct task_struct *tsk;
183 int tmp, ret = -EPERM; 183 int tmp, ret = -EPERM;
184 184
185 tsk = current; 185 tsk = current;
186 if (tsk->thread.saved_esp0) 186 if (tsk->thread.saved_esp0)
187 goto out; 187 goto out;
188 tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1); 188 tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1);
189 tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2, 189 tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
190 (long)&info.vm86plus - (long)&info.regs.VM86_REGS_PART2); 190 (long)&info.vm86plus - (long)&info.regs.VM86_REGS_PART2);
191 ret = -EFAULT; 191 ret = -EFAULT;
192 if (tmp) 192 if (tmp)
193 goto out; 193 goto out;
194 memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); 194 memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
195 info.regs32 = &regs; 195 info.regs32 = &regs;
196 tsk->thread.vm86_info = v86; 196 tsk->thread.vm86_info = v86;
197 do_sys_vm86(&info, tsk); 197 do_sys_vm86(&info, tsk);
198 ret = 0; /* we never return here */ 198 ret = 0; /* we never return here */
199 out: 199 out:
200 return ret; 200 return ret;
201 } 201 }
202 202
203 203
204 asmlinkage int sys_vm86(struct pt_regs regs) 204 asmlinkage int sys_vm86(struct pt_regs regs)
205 { 205 {
206 struct kernel_vm86_struct info; /* declare this _on top_, 206 struct kernel_vm86_struct info; /* declare this _on top_,
207 * this avoids wasting of stack space. 207 * this avoids wasting of stack space.
208 * This remains on the stack until we 208 * This remains on the stack until we
209 * return to 32 bit user space. 209 * return to 32 bit user space.
210 */ 210 */
211 struct task_struct *tsk; 211 struct task_struct *tsk;
212 int tmp, ret; 212 int tmp, ret;
213 struct vm86plus_struct __user *v86; 213 struct vm86plus_struct __user *v86;
214 214
215 tsk = current; 215 tsk = current;
216 switch (regs.ebx) { 216 switch (regs.ebx) {
217 case VM86_REQUEST_IRQ: 217 case VM86_REQUEST_IRQ:
218 case VM86_FREE_IRQ: 218 case VM86_FREE_IRQ:
219 case VM86_GET_IRQ_BITS: 219 case VM86_GET_IRQ_BITS:
220 case VM86_GET_AND_RESET_IRQ: 220 case VM86_GET_AND_RESET_IRQ:
221 ret = do_vm86_irq_handling(regs.ebx, (int)regs.ecx); 221 ret = do_vm86_irq_handling(regs.ebx, (int)regs.ecx);
222 goto out; 222 goto out;
223 case VM86_PLUS_INSTALL_CHECK: 223 case VM86_PLUS_INSTALL_CHECK:
224 /* NOTE: on old vm86 stuff this will return the error 224 /* NOTE: on old vm86 stuff this will return the error
225 from access_ok(), because the subfunction is 225 from access_ok(), because the subfunction is
226 interpreted as (invalid) address to vm86_struct. 226 interpreted as (invalid) address to vm86_struct.
227 So the installation check works. 227 So the installation check works.
228 */ 228 */
229 ret = 0; 229 ret = 0;
230 goto out; 230 goto out;
231 } 231 }
232 232
233 /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */ 233 /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
234 ret = -EPERM; 234 ret = -EPERM;
235 if (tsk->thread.saved_esp0) 235 if (tsk->thread.saved_esp0)
236 goto out; 236 goto out;
237 v86 = (struct vm86plus_struct __user *)regs.ecx; 237 v86 = (struct vm86plus_struct __user *)regs.ecx;
238 tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1); 238 tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1);
239 tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2, 239 tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
240 (long)&info.regs32 - (long)&info.regs.VM86_REGS_PART2); 240 (long)&info.regs32 - (long)&info.regs.VM86_REGS_PART2);
241 ret = -EFAULT; 241 ret = -EFAULT;
242 if (tmp) 242 if (tmp)
243 goto out; 243 goto out;
244 info.regs32 = &regs; 244 info.regs32 = &regs;
245 info.vm86plus.is_vm86pus = 1; 245 info.vm86plus.is_vm86pus = 1;
246 tsk->thread.vm86_info = (struct vm86_struct __user *)v86; 246 tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
247 do_sys_vm86(&info, tsk); 247 do_sys_vm86(&info, tsk);
248 ret = 0; /* we never return here */ 248 ret = 0; /* we never return here */
249 out: 249 out:
250 return ret; 250 return ret;
251 } 251 }
252 252
253 253
254 static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) 254 static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk)
255 { 255 {
256 struct tss_struct *tss; 256 struct tss_struct *tss;
257 /* 257 /*
258 * make sure the vm86() system call doesn't try to do anything silly 258 * make sure the vm86() system call doesn't try to do anything silly
259 */ 259 */
260 info->regs.__null_ds = 0; 260 info->regs.__null_ds = 0;
261 info->regs.__null_es = 0; 261 info->regs.__null_es = 0;
262 262
263 /* we are clearing fs,gs later just before "jmp resume_userspace", 263 /* we are clearing fs,gs later just before "jmp resume_userspace",
264 * because starting with Linux 2.1.x they aren't no longer saved/restored 264 * because starting with Linux 2.1.x they aren't no longer saved/restored
265 */ 265 */
266 266
267 /* 267 /*
268 * The eflags register is also special: we cannot trust that the user 268 * The eflags register is also special: we cannot trust that the user
269 * has set it up safely, so this makes sure interrupt etc flags are 269 * has set it up safely, so this makes sure interrupt etc flags are
270 * inherited from protected mode. 270 * inherited from protected mode.
271 */ 271 */
272 VEFLAGS = info->regs.eflags; 272 VEFLAGS = info->regs.eflags;
273 info->regs.eflags &= SAFE_MASK; 273 info->regs.eflags &= SAFE_MASK;
274 info->regs.eflags |= info->regs32->eflags & ~SAFE_MASK; 274 info->regs.eflags |= info->regs32->eflags & ~SAFE_MASK;
275 info->regs.eflags |= VM_MASK; 275 info->regs.eflags |= VM_MASK;
276 276
277 switch (info->cpu_type) { 277 switch (info->cpu_type) {
278 case CPU_286: 278 case CPU_286:
279 tsk->thread.v86mask = 0; 279 tsk->thread.v86mask = 0;
280 break; 280 break;
281 case CPU_386: 281 case CPU_386:
282 tsk->thread.v86mask = NT_MASK | IOPL_MASK; 282 tsk->thread.v86mask = NT_MASK | IOPL_MASK;
283 break; 283 break;
284 case CPU_486: 284 case CPU_486:
285 tsk->thread.v86mask = AC_MASK | NT_MASK | IOPL_MASK; 285 tsk->thread.v86mask = AC_MASK | NT_MASK | IOPL_MASK;
286 break; 286 break;
287 default: 287 default:
288 tsk->thread.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK; 288 tsk->thread.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK;
289 break; 289 break;
290 } 290 }
291 291
292 /* 292 /*
293 * Save old state, set default return value (%eax) to 0 293 * Save old state, set default return value (%eax) to 0
294 */ 294 */
295 info->regs32->eax = 0; 295 info->regs32->eax = 0;
296 tsk->thread.saved_esp0 = tsk->thread.esp0; 296 tsk->thread.saved_esp0 = tsk->thread.esp0;
297 asm volatile("mov %%fs,%0":"=m" (tsk->thread.saved_fs)); 297 savesegment(fs, tsk->thread.saved_fs);
298 asm volatile("mov %%gs,%0":"=m" (tsk->thread.saved_gs)); 298 savesegment(gs, tsk->thread.saved_gs);
299 299
300 tss = &per_cpu(init_tss, get_cpu()); 300 tss = &per_cpu(init_tss, get_cpu());
301 tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; 301 tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
302 if (cpu_has_sep) 302 if (cpu_has_sep)
303 tsk->thread.sysenter_cs = 0; 303 tsk->thread.sysenter_cs = 0;
304 load_esp0(tss, &tsk->thread); 304 load_esp0(tss, &tsk->thread);
305 put_cpu(); 305 put_cpu();
306 306
307 tsk->thread.screen_bitmap = info->screen_bitmap; 307 tsk->thread.screen_bitmap = info->screen_bitmap;
308 if (info->flags & VM86_SCREEN_BITMAP) 308 if (info->flags & VM86_SCREEN_BITMAP)
309 mark_screen_rdonly(tsk); 309 mark_screen_rdonly(tsk);
310 __asm__ __volatile__( 310 __asm__ __volatile__(
311 "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t" 311 "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
312 "movl %0,%%esp\n\t" 312 "movl %0,%%esp\n\t"
313 "movl %1,%%ebp\n\t" 313 "movl %1,%%ebp\n\t"
314 "jmp resume_userspace" 314 "jmp resume_userspace"
315 : /* no outputs */ 315 : /* no outputs */
316 :"r" (&info->regs), "r" (tsk->thread_info) : "ax"); 316 :"r" (&info->regs), "r" (tsk->thread_info) : "ax");
317 /* we never return here */ 317 /* we never return here */
318 } 318 }
319 319
320 static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval) 320 static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval)
321 { 321 {
322 struct pt_regs * regs32; 322 struct pt_regs * regs32;
323 323
324 regs32 = save_v86_state(regs16); 324 regs32 = save_v86_state(regs16);
325 regs32->eax = retval; 325 regs32->eax = retval;
326 __asm__ __volatile__("movl %0,%%esp\n\t" 326 __asm__ __volatile__("movl %0,%%esp\n\t"
327 "movl %1,%%ebp\n\t" 327 "movl %1,%%ebp\n\t"
328 "jmp resume_userspace" 328 "jmp resume_userspace"
329 : : "r" (regs32), "r" (current_thread_info())); 329 : : "r" (regs32), "r" (current_thread_info()));
330 } 330 }
331 331
332 static inline void set_IF(struct kernel_vm86_regs * regs) 332 static inline void set_IF(struct kernel_vm86_regs * regs)
333 { 333 {
334 VEFLAGS |= VIF_MASK; 334 VEFLAGS |= VIF_MASK;
335 if (VEFLAGS & VIP_MASK) 335 if (VEFLAGS & VIP_MASK)
336 return_to_32bit(regs, VM86_STI); 336 return_to_32bit(regs, VM86_STI);
337 } 337 }
338 338
339 static inline void clear_IF(struct kernel_vm86_regs * regs) 339 static inline void clear_IF(struct kernel_vm86_regs * regs)
340 { 340 {
341 VEFLAGS &= ~VIF_MASK; 341 VEFLAGS &= ~VIF_MASK;
342 } 342 }
343 343
344 static inline void clear_TF(struct kernel_vm86_regs * regs) 344 static inline void clear_TF(struct kernel_vm86_regs * regs)
345 { 345 {
346 regs->eflags &= ~TF_MASK; 346 regs->eflags &= ~TF_MASK;
347 } 347 }
348 348
349 static inline void clear_AC(struct kernel_vm86_regs * regs) 349 static inline void clear_AC(struct kernel_vm86_regs * regs)
350 { 350 {
351 regs->eflags &= ~AC_MASK; 351 regs->eflags &= ~AC_MASK;
352 } 352 }
353 353
354 /* It is correct to call set_IF(regs) from the set_vflags_* 354 /* It is correct to call set_IF(regs) from the set_vflags_*
355 * functions. However someone forgot to call clear_IF(regs) 355 * functions. However someone forgot to call clear_IF(regs)
356 * in the opposite case. 356 * in the opposite case.
357 * After the command sequence CLI PUSHF STI POPF you should 357 * After the command sequence CLI PUSHF STI POPF you should
358 * end up with interrups disabled, but you ended up with 358 * end up with interrups disabled, but you ended up with
359 * interrupts enabled. 359 * interrupts enabled.
360 * ( I was testing my own changes, but the only bug I 360 * ( I was testing my own changes, but the only bug I
361 * could find was in a function I had not changed. ) 361 * could find was in a function I had not changed. )
362 * [KD] 362 * [KD]
363 */ 363 */
364 364
365 static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs) 365 static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs)
366 { 366 {
367 set_flags(VEFLAGS, eflags, current->thread.v86mask); 367 set_flags(VEFLAGS, eflags, current->thread.v86mask);
368 set_flags(regs->eflags, eflags, SAFE_MASK); 368 set_flags(regs->eflags, eflags, SAFE_MASK);
369 if (eflags & IF_MASK) 369 if (eflags & IF_MASK)
370 set_IF(regs); 370 set_IF(regs);
371 else 371 else
372 clear_IF(regs); 372 clear_IF(regs);
373 } 373 }
374 374
375 static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs) 375 static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs)
376 { 376 {
377 set_flags(VFLAGS, flags, current->thread.v86mask); 377 set_flags(VFLAGS, flags, current->thread.v86mask);
378 set_flags(regs->eflags, flags, SAFE_MASK); 378 set_flags(regs->eflags, flags, SAFE_MASK);
379 if (flags & IF_MASK) 379 if (flags & IF_MASK)
380 set_IF(regs); 380 set_IF(regs);
381 else 381 else
382 clear_IF(regs); 382 clear_IF(regs);
383 } 383 }
384 384
385 static inline unsigned long get_vflags(struct kernel_vm86_regs * regs) 385 static inline unsigned long get_vflags(struct kernel_vm86_regs * regs)
386 { 386 {
387 unsigned long flags = regs->eflags & RETURN_MASK; 387 unsigned long flags = regs->eflags & RETURN_MASK;
388 388
389 if (VEFLAGS & VIF_MASK) 389 if (VEFLAGS & VIF_MASK)
390 flags |= IF_MASK; 390 flags |= IF_MASK;
391 flags |= IOPL_MASK; 391 flags |= IOPL_MASK;
392 return flags | (VEFLAGS & current->thread.v86mask); 392 return flags | (VEFLAGS & current->thread.v86mask);
393 } 393 }
394 394
395 static inline int is_revectored(int nr, struct revectored_struct * bitmap) 395 static inline int is_revectored(int nr, struct revectored_struct * bitmap)
396 { 396 {
397 __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" 397 __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0"
398 :"=r" (nr) 398 :"=r" (nr)
399 :"m" (*bitmap),"r" (nr)); 399 :"m" (*bitmap),"r" (nr));
400 return nr; 400 return nr;
401 } 401 }
402 402
403 #define val_byte(val, n) (((__u8 *)&val)[n]) 403 #define val_byte(val, n) (((__u8 *)&val)[n])
404 404
405 #define pushb(base, ptr, val, err_label) \ 405 #define pushb(base, ptr, val, err_label) \
406 do { \ 406 do { \
407 __u8 __val = val; \ 407 __u8 __val = val; \
408 ptr--; \ 408 ptr--; \
409 if (put_user(__val, base + ptr) < 0) \ 409 if (put_user(__val, base + ptr) < 0) \
410 goto err_label; \ 410 goto err_label; \
411 } while(0) 411 } while(0)
412 412
413 #define pushw(base, ptr, val, err_label) \ 413 #define pushw(base, ptr, val, err_label) \
414 do { \ 414 do { \
415 __u16 __val = val; \ 415 __u16 __val = val; \
416 ptr--; \ 416 ptr--; \
417 if (put_user(val_byte(__val, 1), base + ptr) < 0) \ 417 if (put_user(val_byte(__val, 1), base + ptr) < 0) \
418 goto err_label; \ 418 goto err_label; \
419 ptr--; \ 419 ptr--; \
420 if (put_user(val_byte(__val, 0), base + ptr) < 0) \ 420 if (put_user(val_byte(__val, 0), base + ptr) < 0) \
421 goto err_label; \ 421 goto err_label; \
422 } while(0) 422 } while(0)
423 423
424 #define pushl(base, ptr, val, err_label) \ 424 #define pushl(base, ptr, val, err_label) \
425 do { \ 425 do { \
426 __u32 __val = val; \ 426 __u32 __val = val; \
427 ptr--; \ 427 ptr--; \
428 if (put_user(val_byte(__val, 3), base + ptr) < 0) \ 428 if (put_user(val_byte(__val, 3), base + ptr) < 0) \
429 goto err_label; \ 429 goto err_label; \
430 ptr--; \ 430 ptr--; \
431 if (put_user(val_byte(__val, 2), base + ptr) < 0) \ 431 if (put_user(val_byte(__val, 2), base + ptr) < 0) \
432 goto err_label; \ 432 goto err_label; \
433 ptr--; \ 433 ptr--; \
434 if (put_user(val_byte(__val, 1), base + ptr) < 0) \ 434 if (put_user(val_byte(__val, 1), base + ptr) < 0) \
435 goto err_label; \ 435 goto err_label; \
436 ptr--; \ 436 ptr--; \
437 if (put_user(val_byte(__val, 0), base + ptr) < 0) \ 437 if (put_user(val_byte(__val, 0), base + ptr) < 0) \
438 goto err_label; \ 438 goto err_label; \
439 } while(0) 439 } while(0)
440 440
441 #define popb(base, ptr, err_label) \ 441 #define popb(base, ptr, err_label) \
442 ({ \ 442 ({ \
443 __u8 __res; \ 443 __u8 __res; \
444 if (get_user(__res, base + ptr) < 0) \ 444 if (get_user(__res, base + ptr) < 0) \
445 goto err_label; \ 445 goto err_label; \
446 ptr++; \ 446 ptr++; \
447 __res; \ 447 __res; \
448 }) 448 })
449 449
450 #define popw(base, ptr, err_label) \ 450 #define popw(base, ptr, err_label) \
451 ({ \ 451 ({ \
452 __u16 __res; \ 452 __u16 __res; \
453 if (get_user(val_byte(__res, 0), base + ptr) < 0) \ 453 if (get_user(val_byte(__res, 0), base + ptr) < 0) \
454 goto err_label; \ 454 goto err_label; \
455 ptr++; \ 455 ptr++; \
456 if (get_user(val_byte(__res, 1), base + ptr) < 0) \ 456 if (get_user(val_byte(__res, 1), base + ptr) < 0) \
457 goto err_label; \ 457 goto err_label; \
458 ptr++; \ 458 ptr++; \
459 __res; \ 459 __res; \
460 }) 460 })
461 461
462 #define popl(base, ptr, err_label) \ 462 #define popl(base, ptr, err_label) \
463 ({ \ 463 ({ \
464 __u32 __res; \ 464 __u32 __res; \
465 if (get_user(val_byte(__res, 0), base + ptr) < 0) \ 465 if (get_user(val_byte(__res, 0), base + ptr) < 0) \
466 goto err_label; \ 466 goto err_label; \
467 ptr++; \ 467 ptr++; \
468 if (get_user(val_byte(__res, 1), base + ptr) < 0) \ 468 if (get_user(val_byte(__res, 1), base + ptr) < 0) \
469 goto err_label; \ 469 goto err_label; \
470 ptr++; \ 470 ptr++; \
471 if (get_user(val_byte(__res, 2), base + ptr) < 0) \ 471 if (get_user(val_byte(__res, 2), base + ptr) < 0) \
472 goto err_label; \ 472 goto err_label; \
473 ptr++; \ 473 ptr++; \
474 if (get_user(val_byte(__res, 3), base + ptr) < 0) \ 474 if (get_user(val_byte(__res, 3), base + ptr) < 0) \
475 goto err_label; \ 475 goto err_label; \
476 ptr++; \ 476 ptr++; \
477 __res; \ 477 __res; \
478 }) 478 })
479 479
480 /* There are so many possible reasons for this function to return 480 /* There are so many possible reasons for this function to return
481 * VM86_INTx, so adding another doesn't bother me. We can expect 481 * VM86_INTx, so adding another doesn't bother me. We can expect
482 * userspace programs to be able to handle it. (Getting a problem 482 * userspace programs to be able to handle it. (Getting a problem
483 * in userspace is always better than an Oops anyway.) [KD] 483 * in userspace is always better than an Oops anyway.) [KD]
484 */ 484 */
485 static void do_int(struct kernel_vm86_regs *regs, int i, 485 static void do_int(struct kernel_vm86_regs *regs, int i,
486 unsigned char __user * ssp, unsigned short sp) 486 unsigned char __user * ssp, unsigned short sp)
487 { 487 {
488 unsigned long __user *intr_ptr; 488 unsigned long __user *intr_ptr;
489 unsigned long segoffs; 489 unsigned long segoffs;
490 490
491 if (regs->cs == BIOSSEG) 491 if (regs->cs == BIOSSEG)
492 goto cannot_handle; 492 goto cannot_handle;
493 if (is_revectored(i, &KVM86->int_revectored)) 493 if (is_revectored(i, &KVM86->int_revectored))
494 goto cannot_handle; 494 goto cannot_handle;
495 if (i==0x21 && is_revectored(AH(regs),&KVM86->int21_revectored)) 495 if (i==0x21 && is_revectored(AH(regs),&KVM86->int21_revectored))
496 goto cannot_handle; 496 goto cannot_handle;
497 intr_ptr = (unsigned long __user *) (i << 2); 497 intr_ptr = (unsigned long __user *) (i << 2);
498 if (get_user(segoffs, intr_ptr)) 498 if (get_user(segoffs, intr_ptr))
499 goto cannot_handle; 499 goto cannot_handle;
500 if ((segoffs >> 16) == BIOSSEG) 500 if ((segoffs >> 16) == BIOSSEG)
501 goto cannot_handle; 501 goto cannot_handle;
502 pushw(ssp, sp, get_vflags(regs), cannot_handle); 502 pushw(ssp, sp, get_vflags(regs), cannot_handle);
503 pushw(ssp, sp, regs->cs, cannot_handle); 503 pushw(ssp, sp, regs->cs, cannot_handle);
504 pushw(ssp, sp, IP(regs), cannot_handle); 504 pushw(ssp, sp, IP(regs), cannot_handle);
505 regs->cs = segoffs >> 16; 505 regs->cs = segoffs >> 16;
506 SP(regs) -= 6; 506 SP(regs) -= 6;
507 IP(regs) = segoffs & 0xffff; 507 IP(regs) = segoffs & 0xffff;
508 clear_TF(regs); 508 clear_TF(regs);
509 clear_IF(regs); 509 clear_IF(regs);
510 clear_AC(regs); 510 clear_AC(regs);
511 return; 511 return;
512 512
513 cannot_handle: 513 cannot_handle:
514 return_to_32bit(regs, VM86_INTx + (i << 8)); 514 return_to_32bit(regs, VM86_INTx + (i << 8));
515 } 515 }
516 516
517 int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno) 517 int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno)
518 { 518 {
519 if (VMPI.is_vm86pus) { 519 if (VMPI.is_vm86pus) {
520 if ( (trapno==3) || (trapno==1) ) 520 if ( (trapno==3) || (trapno==1) )
521 return_to_32bit(regs, VM86_TRAP + (trapno << 8)); 521 return_to_32bit(regs, VM86_TRAP + (trapno << 8));
522 do_int(regs, trapno, (unsigned char __user *) (regs->ss << 4), SP(regs)); 522 do_int(regs, trapno, (unsigned char __user *) (regs->ss << 4), SP(regs));
523 return 0; 523 return 0;
524 } 524 }
525 if (trapno !=1) 525 if (trapno !=1)
526 return 1; /* we let this handle by the calling routine */ 526 return 1; /* we let this handle by the calling routine */
527 if (current->ptrace & PT_PTRACED) { 527 if (current->ptrace & PT_PTRACED) {
528 unsigned long flags; 528 unsigned long flags;
529 spin_lock_irqsave(&current->sighand->siglock, flags); 529 spin_lock_irqsave(&current->sighand->siglock, flags);
530 sigdelset(&current->blocked, SIGTRAP); 530 sigdelset(&current->blocked, SIGTRAP);
531 recalc_sigpending(); 531 recalc_sigpending();
532 spin_unlock_irqrestore(&current->sighand->siglock, flags); 532 spin_unlock_irqrestore(&current->sighand->siglock, flags);
533 } 533 }
534 send_sig(SIGTRAP, current, 1); 534 send_sig(SIGTRAP, current, 1);
535 current->thread.trap_no = trapno; 535 current->thread.trap_no = trapno;
536 current->thread.error_code = error_code; 536 current->thread.error_code = error_code;
537 return 0; 537 return 0;
538 } 538 }
539 539
540 void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) 540 void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
541 { 541 {
542 unsigned char opcode; 542 unsigned char opcode;
543 unsigned char __user *csp; 543 unsigned char __user *csp;
544 unsigned char __user *ssp; 544 unsigned char __user *ssp;
545 unsigned short ip, sp, orig_flags; 545 unsigned short ip, sp, orig_flags;
546 int data32, pref_done; 546 int data32, pref_done;
547 547
548 #define CHECK_IF_IN_TRAP \ 548 #define CHECK_IF_IN_TRAP \
549 if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \ 549 if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \
550 newflags |= TF_MASK 550 newflags |= TF_MASK
551 #define VM86_FAULT_RETURN do { \ 551 #define VM86_FAULT_RETURN do { \
552 if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \ 552 if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \
553 return_to_32bit(regs, VM86_PICRETURN); \ 553 return_to_32bit(regs, VM86_PICRETURN); \
554 if (orig_flags & TF_MASK) \ 554 if (orig_flags & TF_MASK) \
555 handle_vm86_trap(regs, 0, 1); \ 555 handle_vm86_trap(regs, 0, 1); \
556 return; } while (0) 556 return; } while (0)
557 557
558 orig_flags = *(unsigned short *)&regs->eflags; 558 orig_flags = *(unsigned short *)&regs->eflags;
559 559
560 csp = (unsigned char __user *) (regs->cs << 4); 560 csp = (unsigned char __user *) (regs->cs << 4);
561 ssp = (unsigned char __user *) (regs->ss << 4); 561 ssp = (unsigned char __user *) (regs->ss << 4);
562 sp = SP(regs); 562 sp = SP(regs);
563 ip = IP(regs); 563 ip = IP(regs);
564 564
565 data32 = 0; 565 data32 = 0;
566 pref_done = 0; 566 pref_done = 0;
567 do { 567 do {
568 switch (opcode = popb(csp, ip, simulate_sigsegv)) { 568 switch (opcode = popb(csp, ip, simulate_sigsegv)) {
569 case 0x66: /* 32-bit data */ data32=1; break; 569 case 0x66: /* 32-bit data */ data32=1; break;
570 case 0x67: /* 32-bit address */ break; 570 case 0x67: /* 32-bit address */ break;
571 case 0x2e: /* CS */ break; 571 case 0x2e: /* CS */ break;
572 case 0x3e: /* DS */ break; 572 case 0x3e: /* DS */ break;
573 case 0x26: /* ES */ break; 573 case 0x26: /* ES */ break;
574 case 0x36: /* SS */ break; 574 case 0x36: /* SS */ break;
575 case 0x65: /* GS */ break; 575 case 0x65: /* GS */ break;
576 case 0x64: /* FS */ break; 576 case 0x64: /* FS */ break;
577 case 0xf2: /* repnz */ break; 577 case 0xf2: /* repnz */ break;
578 case 0xf3: /* rep */ break; 578 case 0xf3: /* rep */ break;
579 default: pref_done = 1; 579 default: pref_done = 1;
580 } 580 }
581 } while (!pref_done); 581 } while (!pref_done);
582 582
583 switch (opcode) { 583 switch (opcode) {
584 584
585 /* pushf */ 585 /* pushf */
586 case 0x9c: 586 case 0x9c:
587 if (data32) { 587 if (data32) {
588 pushl(ssp, sp, get_vflags(regs), simulate_sigsegv); 588 pushl(ssp, sp, get_vflags(regs), simulate_sigsegv);
589 SP(regs) -= 4; 589 SP(regs) -= 4;
590 } else { 590 } else {
591 pushw(ssp, sp, get_vflags(regs), simulate_sigsegv); 591 pushw(ssp, sp, get_vflags(regs), simulate_sigsegv);
592 SP(regs) -= 2; 592 SP(regs) -= 2;
593 } 593 }
594 IP(regs) = ip; 594 IP(regs) = ip;
595 VM86_FAULT_RETURN; 595 VM86_FAULT_RETURN;
596 596
597 /* popf */ 597 /* popf */
598 case 0x9d: 598 case 0x9d:
599 { 599 {
600 unsigned long newflags; 600 unsigned long newflags;
601 if (data32) { 601 if (data32) {
602 newflags=popl(ssp, sp, simulate_sigsegv); 602 newflags=popl(ssp, sp, simulate_sigsegv);
603 SP(regs) += 4; 603 SP(regs) += 4;
604 } else { 604 } else {
605 newflags = popw(ssp, sp, simulate_sigsegv); 605 newflags = popw(ssp, sp, simulate_sigsegv);
606 SP(regs) += 2; 606 SP(regs) += 2;
607 } 607 }
608 IP(regs) = ip; 608 IP(regs) = ip;
609 CHECK_IF_IN_TRAP; 609 CHECK_IF_IN_TRAP;
610 if (data32) { 610 if (data32) {
611 set_vflags_long(newflags, regs); 611 set_vflags_long(newflags, regs);
612 } else { 612 } else {
613 set_vflags_short(newflags, regs); 613 set_vflags_short(newflags, regs);
614 } 614 }
615 VM86_FAULT_RETURN; 615 VM86_FAULT_RETURN;
616 } 616 }
617 617
618 /* int xx */ 618 /* int xx */
619 case 0xcd: { 619 case 0xcd: {
620 int intno=popb(csp, ip, simulate_sigsegv); 620 int intno=popb(csp, ip, simulate_sigsegv);
621 IP(regs) = ip; 621 IP(regs) = ip;
622 if (VMPI.vm86dbg_active) { 622 if (VMPI.vm86dbg_active) {
623 if ( (1 << (intno &7)) & VMPI.vm86dbg_intxxtab[intno >> 3] ) 623 if ( (1 << (intno &7)) & VMPI.vm86dbg_intxxtab[intno >> 3] )
624 return_to_32bit(regs, VM86_INTx + (intno << 8)); 624 return_to_32bit(regs, VM86_INTx + (intno << 8));
625 } 625 }
626 do_int(regs, intno, ssp, sp); 626 do_int(regs, intno, ssp, sp);
627 return; 627 return;
628 } 628 }
629 629
630 /* iret */ 630 /* iret */
631 case 0xcf: 631 case 0xcf:
632 { 632 {
633 unsigned long newip; 633 unsigned long newip;
634 unsigned long newcs; 634 unsigned long newcs;
635 unsigned long newflags; 635 unsigned long newflags;
636 if (data32) { 636 if (data32) {
637 newip=popl(ssp, sp, simulate_sigsegv); 637 newip=popl(ssp, sp, simulate_sigsegv);
638 newcs=popl(ssp, sp, simulate_sigsegv); 638 newcs=popl(ssp, sp, simulate_sigsegv);
639 newflags=popl(ssp, sp, simulate_sigsegv); 639 newflags=popl(ssp, sp, simulate_sigsegv);
640 SP(regs) += 12; 640 SP(regs) += 12;
641 } else { 641 } else {
642 newip = popw(ssp, sp, simulate_sigsegv); 642 newip = popw(ssp, sp, simulate_sigsegv);
643 newcs = popw(ssp, sp, simulate_sigsegv); 643 newcs = popw(ssp, sp, simulate_sigsegv);
644 newflags = popw(ssp, sp, simulate_sigsegv); 644 newflags = popw(ssp, sp, simulate_sigsegv);
645 SP(regs) += 6; 645 SP(regs) += 6;
646 } 646 }
647 IP(regs) = newip; 647 IP(regs) = newip;
648 regs->cs = newcs; 648 regs->cs = newcs;
649 CHECK_IF_IN_TRAP; 649 CHECK_IF_IN_TRAP;
650 if (data32) { 650 if (data32) {
651 set_vflags_long(newflags, regs); 651 set_vflags_long(newflags, regs);
652 } else { 652 } else {
653 set_vflags_short(newflags, regs); 653 set_vflags_short(newflags, regs);
654 } 654 }
655 VM86_FAULT_RETURN; 655 VM86_FAULT_RETURN;
656 } 656 }
657 657
658 /* cli */ 658 /* cli */
659 case 0xfa: 659 case 0xfa:
660 IP(regs) = ip; 660 IP(regs) = ip;
661 clear_IF(regs); 661 clear_IF(regs);
662 VM86_FAULT_RETURN; 662 VM86_FAULT_RETURN;
663 663
664 /* sti */ 664 /* sti */
665 /* 665 /*
666 * Damn. This is incorrect: the 'sti' instruction should actually 666 * Damn. This is incorrect: the 'sti' instruction should actually
667 * enable interrupts after the /next/ instruction. Not good. 667 * enable interrupts after the /next/ instruction. Not good.
668 * 668 *
669 * Probably needs some horsing around with the TF flag. Aiee.. 669 * Probably needs some horsing around with the TF flag. Aiee..
670 */ 670 */
671 case 0xfb: 671 case 0xfb:
672 IP(regs) = ip; 672 IP(regs) = ip;
673 set_IF(regs); 673 set_IF(regs);
674 VM86_FAULT_RETURN; 674 VM86_FAULT_RETURN;
675 675
676 default: 676 default:
677 return_to_32bit(regs, VM86_UNKNOWN); 677 return_to_32bit(regs, VM86_UNKNOWN);
678 } 678 }
679 679
680 return; 680 return;
681 681
682 simulate_sigsegv: 682 simulate_sigsegv:
683 /* FIXME: After a long discussion with Stas we finally 683 /* FIXME: After a long discussion with Stas we finally
684 * agreed, that this is wrong. Here we should 684 * agreed, that this is wrong. Here we should
685 * really send a SIGSEGV to the user program. 685 * really send a SIGSEGV to the user program.
686 * But how do we create the correct context? We 686 * But how do we create the correct context? We
687 * are inside a general protection fault handler 687 * are inside a general protection fault handler
688 * and has just returned from a page fault handler. 688 * and has just returned from a page fault handler.
689 * The correct context for the signal handler 689 * The correct context for the signal handler
690 * should be a mixture of the two, but how do we 690 * should be a mixture of the two, but how do we
691 * get the information? [KD] 691 * get the information? [KD]
692 */ 692 */
693 return_to_32bit(regs, VM86_UNKNOWN); 693 return_to_32bit(regs, VM86_UNKNOWN);
694 } 694 }
695 695
696 /* ---------------- vm86 special IRQ passing stuff ----------------- */ 696 /* ---------------- vm86 special IRQ passing stuff ----------------- */
697 697
698 #define VM86_IRQNAME "vm86irq" 698 #define VM86_IRQNAME "vm86irq"
699 699
700 static struct vm86_irqs { 700 static struct vm86_irqs {
701 struct task_struct *tsk; 701 struct task_struct *tsk;
702 int sig; 702 int sig;
703 } vm86_irqs[16]; 703 } vm86_irqs[16];
704 704
705 static DEFINE_SPINLOCK(irqbits_lock); 705 static DEFINE_SPINLOCK(irqbits_lock);
706 static int irqbits; 706 static int irqbits;
707 707
708 #define ALLOWED_SIGS ( 1 /* 0 = don't send a signal */ \ 708 #define ALLOWED_SIGS ( 1 /* 0 = don't send a signal */ \
709 | (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO) | (1 << SIGURG) \ 709 | (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO) | (1 << SIGURG) \
710 | (1 << SIGUNUSED) ) 710 | (1 << SIGUNUSED) )
711 711
712 static irqreturn_t irq_handler(int intno, void *dev_id, struct pt_regs * regs) 712 static irqreturn_t irq_handler(int intno, void *dev_id, struct pt_regs * regs)
713 { 713 {
714 int irq_bit; 714 int irq_bit;
715 unsigned long flags; 715 unsigned long flags;
716 716
717 spin_lock_irqsave(&irqbits_lock, flags); 717 spin_lock_irqsave(&irqbits_lock, flags);
718 irq_bit = 1 << intno; 718 irq_bit = 1 << intno;
719 if ((irqbits & irq_bit) || ! vm86_irqs[intno].tsk) 719 if ((irqbits & irq_bit) || ! vm86_irqs[intno].tsk)
720 goto out; 720 goto out;
721 irqbits |= irq_bit; 721 irqbits |= irq_bit;
722 if (vm86_irqs[intno].sig) 722 if (vm86_irqs[intno].sig)
723 send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1); 723 send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1);
724 /* 724 /*
725 * IRQ will be re-enabled when user asks for the irq (whether 725 * IRQ will be re-enabled when user asks for the irq (whether
726 * polling or as a result of the signal) 726 * polling or as a result of the signal)
727 */ 727 */
728 disable_irq_nosync(intno); 728 disable_irq_nosync(intno);
729 spin_unlock_irqrestore(&irqbits_lock, flags); 729 spin_unlock_irqrestore(&irqbits_lock, flags);
730 return IRQ_HANDLED; 730 return IRQ_HANDLED;
731 731
732 out: 732 out:
733 spin_unlock_irqrestore(&irqbits_lock, flags); 733 spin_unlock_irqrestore(&irqbits_lock, flags);
734 return IRQ_NONE; 734 return IRQ_NONE;
735 } 735 }
736 736
737 static inline void free_vm86_irq(int irqnumber) 737 static inline void free_vm86_irq(int irqnumber)
738 { 738 {
739 unsigned long flags; 739 unsigned long flags;
740 740
741 free_irq(irqnumber, NULL); 741 free_irq(irqnumber, NULL);
742 vm86_irqs[irqnumber].tsk = NULL; 742 vm86_irqs[irqnumber].tsk = NULL;
743 743
744 spin_lock_irqsave(&irqbits_lock, flags); 744 spin_lock_irqsave(&irqbits_lock, flags);
745 irqbits &= ~(1 << irqnumber); 745 irqbits &= ~(1 << irqnumber);
746 spin_unlock_irqrestore(&irqbits_lock, flags); 746 spin_unlock_irqrestore(&irqbits_lock, flags);
747 } 747 }
748 748
749 void release_vm86_irqs(struct task_struct *task) 749 void release_vm86_irqs(struct task_struct *task)
750 { 750 {
751 int i; 751 int i;
752 for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++) 752 for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++)
753 if (vm86_irqs[i].tsk == task) 753 if (vm86_irqs[i].tsk == task)
754 free_vm86_irq(i); 754 free_vm86_irq(i);
755 } 755 }
756 756
757 static inline int get_and_reset_irq(int irqnumber) 757 static inline int get_and_reset_irq(int irqnumber)
758 { 758 {
759 int bit; 759 int bit;
760 unsigned long flags; 760 unsigned long flags;
761 int ret = 0; 761 int ret = 0;
762 762
763 if (invalid_vm86_irq(irqnumber)) return 0; 763 if (invalid_vm86_irq(irqnumber)) return 0;
764 if (vm86_irqs[irqnumber].tsk != current) return 0; 764 if (vm86_irqs[irqnumber].tsk != current) return 0;
765 spin_lock_irqsave(&irqbits_lock, flags); 765 spin_lock_irqsave(&irqbits_lock, flags);
766 bit = irqbits & (1 << irqnumber); 766 bit = irqbits & (1 << irqnumber);
767 irqbits &= ~bit; 767 irqbits &= ~bit;
768 if (bit) { 768 if (bit) {
769 enable_irq(irqnumber); 769 enable_irq(irqnumber);
770 ret = 1; 770 ret = 1;
771 } 771 }
772 772
773 spin_unlock_irqrestore(&irqbits_lock, flags); 773 spin_unlock_irqrestore(&irqbits_lock, flags);
774 return ret; 774 return ret;
775 } 775 }
776 776
777 777
778 static int do_vm86_irq_handling(int subfunction, int irqnumber) 778 static int do_vm86_irq_handling(int subfunction, int irqnumber)
779 { 779 {
780 int ret; 780 int ret;
781 switch (subfunction) { 781 switch (subfunction) {
782 case VM86_GET_AND_RESET_IRQ: { 782 case VM86_GET_AND_RESET_IRQ: {
783 return get_and_reset_irq(irqnumber); 783 return get_and_reset_irq(irqnumber);
784 } 784 }
785 case VM86_GET_IRQ_BITS: { 785 case VM86_GET_IRQ_BITS: {
786 return irqbits; 786 return irqbits;
787 } 787 }
788 case VM86_REQUEST_IRQ: { 788 case VM86_REQUEST_IRQ: {
789 int sig = irqnumber >> 8; 789 int sig = irqnumber >> 8;
790 int irq = irqnumber & 255; 790 int irq = irqnumber & 255;
791 if (!capable(CAP_SYS_ADMIN)) return -EPERM; 791 if (!capable(CAP_SYS_ADMIN)) return -EPERM;
792 if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM; 792 if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM;
793 if (invalid_vm86_irq(irq)) return -EPERM; 793 if (invalid_vm86_irq(irq)) return -EPERM;
794 if (vm86_irqs[irq].tsk) return -EPERM; 794 if (vm86_irqs[irq].tsk) return -EPERM;
795 ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME, NULL); 795 ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME, NULL);
796 if (ret) return ret; 796 if (ret) return ret;
797 vm86_irqs[irq].sig = sig; 797 vm86_irqs[irq].sig = sig;
798 vm86_irqs[irq].tsk = current; 798 vm86_irqs[irq].tsk = current;
799 return irq; 799 return irq;
800 } 800 }
801 case VM86_FREE_IRQ: { 801 case VM86_FREE_IRQ: {
802 if (invalid_vm86_irq(irqnumber)) return -EPERM; 802 if (invalid_vm86_irq(irqnumber)) return -EPERM;
803 if (!vm86_irqs[irqnumber].tsk) return 0; 803 if (!vm86_irqs[irqnumber].tsk) return 0;
804 if (vm86_irqs[irqnumber].tsk != current) return -EPERM; 804 if (vm86_irqs[irqnumber].tsk != current) return -EPERM;
805 free_vm86_irq(irqnumber); 805 free_vm86_irq(irqnumber);
806 return 0; 806 return 0;
807 } 807 }
808 } 808 }
809 return -EINVAL; 809 return -EINVAL;
810 } 810 }
811 811
812 812
arch/i386/math-emu/get_address.c
1 /*---------------------------------------------------------------------------+ 1 /*---------------------------------------------------------------------------+
2 | get_address.c | 2 | get_address.c |
3 | | 3 | |
4 | Get the effective address from an FPU instruction. | 4 | Get the effective address from an FPU instruction. |
5 | | 5 | |
6 | Copyright (C) 1992,1993,1994,1997 | 6 | Copyright (C) 1992,1993,1994,1997 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@suburbia.net | 8 | Australia. E-mail billm@suburbia.net |
9 | | 9 | |
10 | | 10 | |
11 +---------------------------------------------------------------------------*/ 11 +---------------------------------------------------------------------------*/
12 12
13 /*---------------------------------------------------------------------------+ 13 /*---------------------------------------------------------------------------+
14 | Note: | 14 | Note: |
15 | The file contains code which accesses user memory. | 15 | The file contains code which accesses user memory. |
16 | Emulator static data may change when user memory is accessed, due to | 16 | Emulator static data may change when user memory is accessed, due to |
17 | other processes using the emulator while swapping is in progress. | 17 | other processes using the emulator while swapping is in progress. |
18 +---------------------------------------------------------------------------*/ 18 +---------------------------------------------------------------------------*/
19 19
20 20
21 #include <linux/stddef.h> 21 #include <linux/stddef.h>
22 22
23 #include <asm/uaccess.h> 23 #include <asm/uaccess.h>
24 #include <asm/desc.h> 24 #include <asm/desc.h>
25 25
26 #include "fpu_system.h" 26 #include "fpu_system.h"
27 #include "exception.h" 27 #include "exception.h"
28 #include "fpu_emu.h" 28 #include "fpu_emu.h"
29 29
30 30
31 #define FPU_WRITE_BIT 0x10 31 #define FPU_WRITE_BIT 0x10
32 32
33 static int reg_offset[] = { 33 static int reg_offset[] = {
34 offsetof(struct info,___eax), 34 offsetof(struct info,___eax),
35 offsetof(struct info,___ecx), 35 offsetof(struct info,___ecx),
36 offsetof(struct info,___edx), 36 offsetof(struct info,___edx),
37 offsetof(struct info,___ebx), 37 offsetof(struct info,___ebx),
38 offsetof(struct info,___esp), 38 offsetof(struct info,___esp),
39 offsetof(struct info,___ebp), 39 offsetof(struct info,___ebp),
40 offsetof(struct info,___esi), 40 offsetof(struct info,___esi),
41 offsetof(struct info,___edi) 41 offsetof(struct info,___edi)
42 }; 42 };
43 43
44 #define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info)) 44 #define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info))
45 45
46 static int reg_offset_vm86[] = { 46 static int reg_offset_vm86[] = {
47 offsetof(struct info,___cs), 47 offsetof(struct info,___cs),
48 offsetof(struct info,___vm86_ds), 48 offsetof(struct info,___vm86_ds),
49 offsetof(struct info,___vm86_es), 49 offsetof(struct info,___vm86_es),
50 offsetof(struct info,___vm86_fs), 50 offsetof(struct info,___vm86_fs),
51 offsetof(struct info,___vm86_gs), 51 offsetof(struct info,___vm86_gs),
52 offsetof(struct info,___ss), 52 offsetof(struct info,___ss),
53 offsetof(struct info,___vm86_ds) 53 offsetof(struct info,___vm86_ds)
54 }; 54 };
55 55
56 #define VM86_REG_(x) (*(unsigned short *) \ 56 #define VM86_REG_(x) (*(unsigned short *) \
57 (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) 57 (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info))
58 58
59 /* These are dummy, fs and gs are not saved on the stack. */ 59 /* These are dummy, fs and gs are not saved on the stack. */
60 #define ___FS ___ds 60 #define ___FS ___ds
61 #define ___GS ___ds 61 #define ___GS ___ds
62 62
63 static int reg_offset_pm[] = { 63 static int reg_offset_pm[] = {
64 offsetof(struct info,___cs), 64 offsetof(struct info,___cs),
65 offsetof(struct info,___ds), 65 offsetof(struct info,___ds),
66 offsetof(struct info,___es), 66 offsetof(struct info,___es),
67 offsetof(struct info,___FS), 67 offsetof(struct info,___FS),
68 offsetof(struct info,___GS), 68 offsetof(struct info,___GS),
69 offsetof(struct info,___ss), 69 offsetof(struct info,___ss),
70 offsetof(struct info,___ds) 70 offsetof(struct info,___ds)
71 }; 71 };
72 72
73 #define PM_REG_(x) (*(unsigned short *) \ 73 #define PM_REG_(x) (*(unsigned short *) \
74 (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info)) 74 (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info))
75 75
76 76
77 /* Decode the SIB byte. This function assumes mod != 0 */ 77 /* Decode the SIB byte. This function assumes mod != 0 */
78 static int sib(int mod, unsigned long *fpu_eip) 78 static int sib(int mod, unsigned long *fpu_eip)
79 { 79 {
80 u_char ss,index,base; 80 u_char ss,index,base;
81 long offset; 81 long offset;
82 82
83 RE_ENTRANT_CHECK_OFF; 83 RE_ENTRANT_CHECK_OFF;
84 FPU_code_access_ok(1); 84 FPU_code_access_ok(1);
85 FPU_get_user(base, (u_char __user *) (*fpu_eip)); /* The SIB byte */ 85 FPU_get_user(base, (u_char __user *) (*fpu_eip)); /* The SIB byte */
86 RE_ENTRANT_CHECK_ON; 86 RE_ENTRANT_CHECK_ON;
87 (*fpu_eip)++; 87 (*fpu_eip)++;
88 ss = base >> 6; 88 ss = base >> 6;
89 index = (base >> 3) & 7; 89 index = (base >> 3) & 7;
90 base &= 7; 90 base &= 7;
91 91
92 if ((mod == 0) && (base == 5)) 92 if ((mod == 0) && (base == 5))
93 offset = 0; /* No base register */ 93 offset = 0; /* No base register */
94 else 94 else
95 offset = REG_(base); 95 offset = REG_(base);
96 96
97 if (index == 4) 97 if (index == 4)
98 { 98 {
99 /* No index register */ 99 /* No index register */
100 /* A non-zero ss is illegal */ 100 /* A non-zero ss is illegal */
101 if ( ss ) 101 if ( ss )
102 EXCEPTION(EX_Invalid); 102 EXCEPTION(EX_Invalid);
103 } 103 }
104 else 104 else
105 { 105 {
106 offset += (REG_(index)) << ss; 106 offset += (REG_(index)) << ss;
107 } 107 }
108 108
109 if (mod == 1) 109 if (mod == 1)
110 { 110 {
111 /* 8 bit signed displacement */ 111 /* 8 bit signed displacement */
112 long displacement; 112 long displacement;
113 RE_ENTRANT_CHECK_OFF; 113 RE_ENTRANT_CHECK_OFF;
114 FPU_code_access_ok(1); 114 FPU_code_access_ok(1);
115 FPU_get_user(displacement, (signed char __user *) (*fpu_eip)); 115 FPU_get_user(displacement, (signed char __user *) (*fpu_eip));
116 offset += displacement; 116 offset += displacement;
117 RE_ENTRANT_CHECK_ON; 117 RE_ENTRANT_CHECK_ON;
118 (*fpu_eip)++; 118 (*fpu_eip)++;
119 } 119 }
120 else if (mod == 2 || base == 5) /* The second condition also has mod==0 */ 120 else if (mod == 2 || base == 5) /* The second condition also has mod==0 */
121 { 121 {
122 /* 32 bit displacement */ 122 /* 32 bit displacement */
123 long displacement; 123 long displacement;
124 RE_ENTRANT_CHECK_OFF; 124 RE_ENTRANT_CHECK_OFF;
125 FPU_code_access_ok(4); 125 FPU_code_access_ok(4);
126 FPU_get_user(displacement, (long __user *) (*fpu_eip)); 126 FPU_get_user(displacement, (long __user *) (*fpu_eip));
127 offset += displacement; 127 offset += displacement;
128 RE_ENTRANT_CHECK_ON; 128 RE_ENTRANT_CHECK_ON;
129 (*fpu_eip) += 4; 129 (*fpu_eip) += 4;
130 } 130 }
131 131
132 return offset; 132 return offset;
133 } 133 }
134 134
135 135
136 static unsigned long vm86_segment(u_char segment, 136 static unsigned long vm86_segment(u_char segment,
137 struct address *addr) 137 struct address *addr)
138 { 138 {
139 segment--; 139 segment--;
140 #ifdef PARANOID 140 #ifdef PARANOID
141 if ( segment > PREFIX_SS_ ) 141 if ( segment > PREFIX_SS_ )
142 { 142 {
143 EXCEPTION(EX_INTERNAL|0x130); 143 EXCEPTION(EX_INTERNAL|0x130);
144 math_abort(FPU_info,SIGSEGV); 144 math_abort(FPU_info,SIGSEGV);
145 } 145 }
146 #endif /* PARANOID */ 146 #endif /* PARANOID */
147 addr->selector = VM86_REG_(segment); 147 addr->selector = VM86_REG_(segment);
148 return (unsigned long)VM86_REG_(segment) << 4; 148 return (unsigned long)VM86_REG_(segment) << 4;
149 } 149 }
150 150
151 151
152 /* This should work for 16 and 32 bit protected mode. */ 152 /* This should work for 16 and 32 bit protected mode. */
153 static long pm_address(u_char FPU_modrm, u_char segment, 153 static long pm_address(u_char FPU_modrm, u_char segment,
154 struct address *addr, long offset) 154 struct address *addr, long offset)
155 { 155 {
156 struct desc_struct descriptor; 156 struct desc_struct descriptor;
157 unsigned long base_address, limit, address, seg_top; 157 unsigned long base_address, limit, address, seg_top;
158 unsigned short selector;
159 158
160 segment--; 159 segment--;
161 160
162 #ifdef PARANOID 161 #ifdef PARANOID
163 /* segment is unsigned, so this also detects if segment was 0: */ 162 /* segment is unsigned, so this also detects if segment was 0: */
164 if ( segment > PREFIX_SS_ ) 163 if ( segment > PREFIX_SS_ )
165 { 164 {
166 EXCEPTION(EX_INTERNAL|0x132); 165 EXCEPTION(EX_INTERNAL|0x132);
167 math_abort(FPU_info,SIGSEGV); 166 math_abort(FPU_info,SIGSEGV);
168 } 167 }
169 #endif /* PARANOID */ 168 #endif /* PARANOID */
170 169
171 switch ( segment ) 170 switch ( segment )
172 { 171 {
173 /* fs and gs aren't used by the kernel, so they still have their 172 /* fs and gs aren't used by the kernel, so they still have their
174 user-space values. */ 173 user-space values. */
175 case PREFIX_FS_-1: 174 case PREFIX_FS_-1:
176 /* The cast is needed here to get gcc 2.8.0 to use a 16 bit register 175 /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
177 in the assembler statement. */ 176 savesegment(fs, addr->selector);
178
179 __asm__("mov %%fs,%0":"=r" (selector));
180 addr->selector = selector;
181 break; 177 break;
182 case PREFIX_GS_-1: 178 case PREFIX_GS_-1:
183 /* The cast is needed here to get gcc 2.8.0 to use a 16 bit register 179 savesegment(gs, addr->selector);
184 in the assembler statement. */
185 __asm__("mov %%gs,%0":"=r" (selector));
186 addr->selector = selector;
187 break; 180 break;
188 default: 181 default:
189 addr->selector = PM_REG_(segment); 182 addr->selector = PM_REG_(segment);
190 } 183 }
191 184
192 descriptor = LDT_DESCRIPTOR(PM_REG_(segment)); 185 descriptor = LDT_DESCRIPTOR(PM_REG_(segment));
193 base_address = SEG_BASE_ADDR(descriptor); 186 base_address = SEG_BASE_ADDR(descriptor);
194 address = base_address + offset; 187 address = base_address + offset;
195 limit = base_address 188 limit = base_address
196 + (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1; 189 + (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1;
197 if ( limit < base_address ) limit = 0xffffffff; 190 if ( limit < base_address ) limit = 0xffffffff;
198 191
199 if ( SEG_EXPAND_DOWN(descriptor) ) 192 if ( SEG_EXPAND_DOWN(descriptor) )
200 { 193 {
201 if ( SEG_G_BIT(descriptor) ) 194 if ( SEG_G_BIT(descriptor) )
202 seg_top = 0xffffffff; 195 seg_top = 0xffffffff;
203 else 196 else
204 { 197 {
205 seg_top = base_address + (1 << 20); 198 seg_top = base_address + (1 << 20);
206 if ( seg_top < base_address ) seg_top = 0xffffffff; 199 if ( seg_top < base_address ) seg_top = 0xffffffff;
207 } 200 }
208 access_limit = 201 access_limit =
209 (address <= limit) || (address >= seg_top) ? 0 : 202 (address <= limit) || (address >= seg_top) ? 0 :
210 ((seg_top-address) >= 255 ? 255 : seg_top-address); 203 ((seg_top-address) >= 255 ? 255 : seg_top-address);
211 } 204 }
212 else 205 else
213 { 206 {
214 access_limit = 207 access_limit =
215 (address > limit) || (address < base_address) ? 0 : 208 (address > limit) || (address < base_address) ? 0 :
216 ((limit-address) >= 254 ? 255 : limit-address+1); 209 ((limit-address) >= 254 ? 255 : limit-address+1);
217 } 210 }
218 if ( SEG_EXECUTE_ONLY(descriptor) || 211 if ( SEG_EXECUTE_ONLY(descriptor) ||
219 (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) ) 212 (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) )
220 { 213 {
221 access_limit = 0; 214 access_limit = 0;
222 } 215 }
223 return address; 216 return address;
224 } 217 }
225 218
226 219
227 /* 220 /*
228 MOD R/M byte: MOD == 3 has a special use for the FPU 221 MOD R/M byte: MOD == 3 has a special use for the FPU
229 SIB byte used iff R/M = 100b 222 SIB byte used iff R/M = 100b
230 223
231 7 6 5 4 3 2 1 0 224 7 6 5 4 3 2 1 0
232 ..... ......... ......... 225 ..... ......... .........
233 MOD OPCODE(2) R/M 226 MOD OPCODE(2) R/M
234 227
235 228
236 SIB byte 229 SIB byte
237 230
238 7 6 5 4 3 2 1 0 231 7 6 5 4 3 2 1 0
239 ..... ......... ......... 232 ..... ......... .........
240 SS INDEX BASE 233 SS INDEX BASE
241 234
242 */ 235 */
243 236
244 void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip, 237 void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip,
245 struct address *addr, 238 struct address *addr,
246 fpu_addr_modes addr_modes) 239 fpu_addr_modes addr_modes)
247 { 240 {
248 u_char mod; 241 u_char mod;
249 unsigned rm = FPU_modrm & 7; 242 unsigned rm = FPU_modrm & 7;
250 long *cpu_reg_ptr; 243 long *cpu_reg_ptr;
251 int address = 0; /* Initialized just to stop compiler warnings. */ 244 int address = 0; /* Initialized just to stop compiler warnings. */
252 245
253 /* Memory accessed via the cs selector is write protected 246 /* Memory accessed via the cs selector is write protected
254 in `non-segmented' 32 bit protected mode. */ 247 in `non-segmented' 32 bit protected mode. */
255 if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) 248 if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
256 && (addr_modes.override.segment == PREFIX_CS_) ) 249 && (addr_modes.override.segment == PREFIX_CS_) )
257 { 250 {
258 math_abort(FPU_info,SIGSEGV); 251 math_abort(FPU_info,SIGSEGV);
259 } 252 }
260 253
261 addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ 254 addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */
262 255
263 mod = (FPU_modrm >> 6) & 3; 256 mod = (FPU_modrm >> 6) & 3;
264 257
265 if (rm == 4 && mod != 3) 258 if (rm == 4 && mod != 3)
266 { 259 {
267 address = sib(mod, fpu_eip); 260 address = sib(mod, fpu_eip);
268 } 261 }
269 else 262 else
270 { 263 {
271 cpu_reg_ptr = & REG_(rm); 264 cpu_reg_ptr = & REG_(rm);
272 switch (mod) 265 switch (mod)
273 { 266 {
274 case 0: 267 case 0:
275 if (rm == 5) 268 if (rm == 5)
276 { 269 {
277 /* Special case: disp32 */ 270 /* Special case: disp32 */
278 RE_ENTRANT_CHECK_OFF; 271 RE_ENTRANT_CHECK_OFF;
279 FPU_code_access_ok(4); 272 FPU_code_access_ok(4);
280 FPU_get_user(address, (unsigned long __user *) (*fpu_eip)); 273 FPU_get_user(address, (unsigned long __user *) (*fpu_eip));
281 (*fpu_eip) += 4; 274 (*fpu_eip) += 4;
282 RE_ENTRANT_CHECK_ON; 275 RE_ENTRANT_CHECK_ON;
283 addr->offset = address; 276 addr->offset = address;
284 return (void __user *) address; 277 return (void __user *) address;
285 } 278 }
286 else 279 else
287 { 280 {
288 address = *cpu_reg_ptr; /* Just return the contents 281 address = *cpu_reg_ptr; /* Just return the contents
289 of the cpu register */ 282 of the cpu register */
290 addr->offset = address; 283 addr->offset = address;
291 return (void __user *) address; 284 return (void __user *) address;
292 } 285 }
293 case 1: 286 case 1:
294 /* 8 bit signed displacement */ 287 /* 8 bit signed displacement */
295 RE_ENTRANT_CHECK_OFF; 288 RE_ENTRANT_CHECK_OFF;
296 FPU_code_access_ok(1); 289 FPU_code_access_ok(1);
297 FPU_get_user(address, (signed char __user *) (*fpu_eip)); 290 FPU_get_user(address, (signed char __user *) (*fpu_eip));
298 RE_ENTRANT_CHECK_ON; 291 RE_ENTRANT_CHECK_ON;
299 (*fpu_eip)++; 292 (*fpu_eip)++;
300 break; 293 break;
301 case 2: 294 case 2:
302 /* 32 bit displacement */ 295 /* 32 bit displacement */
303 RE_ENTRANT_CHECK_OFF; 296 RE_ENTRANT_CHECK_OFF;
304 FPU_code_access_ok(4); 297 FPU_code_access_ok(4);
305 FPU_get_user(address, (long __user *) (*fpu_eip)); 298 FPU_get_user(address, (long __user *) (*fpu_eip));
306 (*fpu_eip) += 4; 299 (*fpu_eip) += 4;
307 RE_ENTRANT_CHECK_ON; 300 RE_ENTRANT_CHECK_ON;
308 break; 301 break;
309 case 3: 302 case 3:
310 /* Not legal for the FPU */ 303 /* Not legal for the FPU */
311 EXCEPTION(EX_Invalid); 304 EXCEPTION(EX_Invalid);
312 } 305 }
313 address += *cpu_reg_ptr; 306 address += *cpu_reg_ptr;
314 } 307 }
315 308
316 addr->offset = address; 309 addr->offset = address;
317 310
318 switch ( addr_modes.default_mode ) 311 switch ( addr_modes.default_mode )
319 { 312 {
320 case 0: 313 case 0:
321 break; 314 break;
322 case VM86: 315 case VM86:
323 address += vm86_segment(addr_modes.override.segment, addr); 316 address += vm86_segment(addr_modes.override.segment, addr);
324 break; 317 break;
325 case PM16: 318 case PM16:
326 case SEG32: 319 case SEG32:
327 address = pm_address(FPU_modrm, addr_modes.override.segment, 320 address = pm_address(FPU_modrm, addr_modes.override.segment,
328 addr, address); 321 addr, address);
329 break; 322 break;
330 default: 323 default:
331 EXCEPTION(EX_INTERNAL|0x133); 324 EXCEPTION(EX_INTERNAL|0x133);
332 } 325 }
333 326
334 return (void __user *)address; 327 return (void __user *)address;
335 } 328 }
336 329
337 330
338 void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, 331 void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
339 struct address *addr, 332 struct address *addr,
340 fpu_addr_modes addr_modes) 333 fpu_addr_modes addr_modes)
341 { 334 {
342 u_char mod; 335 u_char mod;
343 unsigned rm = FPU_modrm & 7; 336 unsigned rm = FPU_modrm & 7;
344 int address = 0; /* Default used for mod == 0 */ 337 int address = 0; /* Default used for mod == 0 */
345 338
346 /* Memory accessed via the cs selector is write protected 339 /* Memory accessed via the cs selector is write protected
347 in `non-segmented' 32 bit protected mode. */ 340 in `non-segmented' 32 bit protected mode. */
348 if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) 341 if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
349 && (addr_modes.override.segment == PREFIX_CS_) ) 342 && (addr_modes.override.segment == PREFIX_CS_) )
350 { 343 {
351 math_abort(FPU_info,SIGSEGV); 344 math_abort(FPU_info,SIGSEGV);
352 } 345 }
353 346
354 addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ 347 addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */
355 348
356 mod = (FPU_modrm >> 6) & 3; 349 mod = (FPU_modrm >> 6) & 3;
357 350
358 switch (mod) 351 switch (mod)
359 { 352 {
360 case 0: 353 case 0:
361 if (rm == 6) 354 if (rm == 6)
362 { 355 {
363 /* Special case: disp16 */ 356 /* Special case: disp16 */
364 RE_ENTRANT_CHECK_OFF; 357 RE_ENTRANT_CHECK_OFF;
365 FPU_code_access_ok(2); 358 FPU_code_access_ok(2);
366 FPU_get_user(address, (unsigned short __user *) (*fpu_eip)); 359 FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
367 (*fpu_eip) += 2; 360 (*fpu_eip) += 2;
368 RE_ENTRANT_CHECK_ON; 361 RE_ENTRANT_CHECK_ON;
369 goto add_segment; 362 goto add_segment;
370 } 363 }
371 break; 364 break;
372 case 1: 365 case 1:
373 /* 8 bit signed displacement */ 366 /* 8 bit signed displacement */
374 RE_ENTRANT_CHECK_OFF; 367 RE_ENTRANT_CHECK_OFF;
375 FPU_code_access_ok(1); 368 FPU_code_access_ok(1);
376 FPU_get_user(address, (signed char __user *) (*fpu_eip)); 369 FPU_get_user(address, (signed char __user *) (*fpu_eip));
377 RE_ENTRANT_CHECK_ON; 370 RE_ENTRANT_CHECK_ON;
378 (*fpu_eip)++; 371 (*fpu_eip)++;
379 break; 372 break;
380 case 2: 373 case 2:
381 /* 16 bit displacement */ 374 /* 16 bit displacement */
382 RE_ENTRANT_CHECK_OFF; 375 RE_ENTRANT_CHECK_OFF;
383 FPU_code_access_ok(2); 376 FPU_code_access_ok(2);
384 FPU_get_user(address, (unsigned short __user *) (*fpu_eip)); 377 FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
385 (*fpu_eip) += 2; 378 (*fpu_eip) += 2;
386 RE_ENTRANT_CHECK_ON; 379 RE_ENTRANT_CHECK_ON;
387 break; 380 break;
388 case 3: 381 case 3:
389 /* Not legal for the FPU */ 382 /* Not legal for the FPU */
390 EXCEPTION(EX_Invalid); 383 EXCEPTION(EX_Invalid);
391 break; 384 break;
392 } 385 }
393 switch ( rm ) 386 switch ( rm )
394 { 387 {
395 case 0: 388 case 0:
396 address += FPU_info->___ebx + FPU_info->___esi; 389 address += FPU_info->___ebx + FPU_info->___esi;
397 break; 390 break;
398 case 1: 391 case 1:
399 address += FPU_info->___ebx + FPU_info->___edi; 392 address += FPU_info->___ebx + FPU_info->___edi;
400 break; 393 break;
401 case 2: 394 case 2:
402 address += FPU_info->___ebp + FPU_info->___esi; 395 address += FPU_info->___ebp + FPU_info->___esi;
403 if ( addr_modes.override.segment == PREFIX_DEFAULT ) 396 if ( addr_modes.override.segment == PREFIX_DEFAULT )
404 addr_modes.override.segment = PREFIX_SS_; 397 addr_modes.override.segment = PREFIX_SS_;
405 break; 398 break;
406 case 3: 399 case 3:
407 address += FPU_info->___ebp + FPU_info->___edi; 400 address += FPU_info->___ebp + FPU_info->___edi;
408 if ( addr_modes.override.segment == PREFIX_DEFAULT ) 401 if ( addr_modes.override.segment == PREFIX_DEFAULT )
409 addr_modes.override.segment = PREFIX_SS_; 402 addr_modes.override.segment = PREFIX_SS_;
410 break; 403 break;
411 case 4: 404 case 4:
412 address += FPU_info->___esi; 405 address += FPU_info->___esi;
413 break; 406 break;
414 case 5: 407 case 5:
415 address += FPU_info->___edi; 408 address += FPU_info->___edi;
416 break; 409 break;
417 case 6: 410 case 6:
418 address += FPU_info->___ebp; 411 address += FPU_info->___ebp;
419 if ( addr_modes.override.segment == PREFIX_DEFAULT ) 412 if ( addr_modes.override.segment == PREFIX_DEFAULT )
420 addr_modes.override.segment = PREFIX_SS_; 413 addr_modes.override.segment = PREFIX_SS_;
421 break; 414 break;
422 case 7: 415 case 7:
423 address += FPU_info->___ebx; 416 address += FPU_info->___ebx;
424 break; 417 break;
425 } 418 }
426 419
427 add_segment: 420 add_segment:
428 address &= 0xffff; 421 address &= 0xffff;
429 422
430 addr->offset = address; 423 addr->offset = address;
431 424
432 switch ( addr_modes.default_mode ) 425 switch ( addr_modes.default_mode )
433 { 426 {
434 case 0: 427 case 0:
435 break; 428 break;
436 case VM86: 429 case VM86:
437 address += vm86_segment(addr_modes.override.segment, addr); 430 address += vm86_segment(addr_modes.override.segment, addr);
438 break; 431 break;
439 case PM16: 432 case PM16:
440 case SEG32: 433 case SEG32:
441 address = pm_address(FPU_modrm, addr_modes.override.segment, 434 address = pm_address(FPU_modrm, addr_modes.override.segment,
442 addr, address); 435 addr, address);
443 break; 436 break;
444 default: 437 default:
445 EXCEPTION(EX_INTERNAL|0x131); 438 EXCEPTION(EX_INTERNAL|0x131);
446 } 439 }
447 440
448 return (void __user *)address ; 441 return (void __user *)address ;
449 } 442 }
450 443
arch/i386/power/cpu.c
1 /* 1 /*
2 * Suspend support specific for i386. 2 * Suspend support specific for i386.
3 * 3 *
4 * Distribute under GPLv2 4 * Distribute under GPLv2
5 * 5 *
6 * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> 6 * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
7 * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> 7 * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
8 */ 8 */
9 9
10 #include <linux/config.h> 10 #include <linux/config.h>
11 #include <linux/kernel.h> 11 #include <linux/kernel.h>
12 #include <linux/module.h> 12 #include <linux/module.h>
13 #include <linux/init.h> 13 #include <linux/init.h>
14 #include <linux/types.h> 14 #include <linux/types.h>
15 #include <linux/spinlock.h> 15 #include <linux/spinlock.h>
16 #include <linux/poll.h> 16 #include <linux/poll.h>
17 #include <linux/delay.h> 17 #include <linux/delay.h>
18 #include <linux/sysrq.h> 18 #include <linux/sysrq.h>
19 #include <linux/proc_fs.h> 19 #include <linux/proc_fs.h>
20 #include <linux/irq.h> 20 #include <linux/irq.h>
21 #include <linux/pm.h> 21 #include <linux/pm.h>
22 #include <linux/device.h> 22 #include <linux/device.h>
23 #include <linux/suspend.h> 23 #include <linux/suspend.h>
24 #include <linux/acpi.h> 24 #include <linux/acpi.h>
25 25
26 #include <asm/uaccess.h> 26 #include <asm/uaccess.h>
27 #include <asm/acpi.h> 27 #include <asm/acpi.h>
28 #include <asm/tlbflush.h> 28 #include <asm/tlbflush.h>
29 #include <asm/processor.h> 29 #include <asm/processor.h>
30 30
31 static struct saved_context saved_context; 31 static struct saved_context saved_context;
32 32
33 unsigned long saved_context_ebx; 33 unsigned long saved_context_ebx;
34 unsigned long saved_context_esp, saved_context_ebp; 34 unsigned long saved_context_esp, saved_context_ebp;
35 unsigned long saved_context_esi, saved_context_edi; 35 unsigned long saved_context_esi, saved_context_edi;
36 unsigned long saved_context_eflags; 36 unsigned long saved_context_eflags;
37 37
38 void __save_processor_state(struct saved_context *ctxt) 38 void __save_processor_state(struct saved_context *ctxt)
39 { 39 {
40 kernel_fpu_begin(); 40 kernel_fpu_begin();
41 41
42 /* 42 /*
43 * descriptor tables 43 * descriptor tables
44 */ 44 */
45 asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit)); 45 store_gdt(&ctxt->gdt_limit);
46 asm volatile ("sidt %0" : "=m" (ctxt->idt_limit)); 46 store_idt(&ctxt->idt_limit);
47 asm volatile ("str %0" : "=m" (ctxt->tr)); 47 store_tr(ctxt->tr);
48 48
49 /* 49 /*
50 * segment registers 50 * segment registers
51 */ 51 */
52 asm volatile ("movw %%es, %0" : "=m" (ctxt->es)); 52 savesegment(es, ctxt->es);
53 asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs)); 53 savesegment(fs, ctxt->fs);
54 asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs)); 54 savesegment(gs, ctxt->gs);
55 asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss)); 55 savesegment(ss, ctxt->ss);
56 56
57 /* 57 /*
58 * control registers 58 * control registers
59 */ 59 */
60 ctxt->cr0 = read_cr0(); 60 ctxt->cr0 = read_cr0();
61 ctxt->cr2 = read_cr2(); 61 ctxt->cr2 = read_cr2();
62 ctxt->cr3 = read_cr3(); 62 ctxt->cr3 = read_cr3();
63 ctxt->cr4 = read_cr4(); 63 ctxt->cr4 = read_cr4();
64 } 64 }
65 65
66 void save_processor_state(void) 66 void save_processor_state(void)
67 { 67 {
68 __save_processor_state(&saved_context); 68 __save_processor_state(&saved_context);
69 } 69 }
70 70
71 static void 71 static void
72 do_fpu_end(void) 72 do_fpu_end(void)
73 { 73 {
74 /* restore FPU regs if necessary */ 74 /* restore FPU regs if necessary */
75 /* Do it out of line so that gcc does not move cr0 load to some stupid place */ 75 /* Do it out of line so that gcc does not move cr0 load to some stupid place */
76 kernel_fpu_end(); 76 kernel_fpu_end();
77 mxcsr_feature_mask_init(); 77 mxcsr_feature_mask_init();
78 } 78 }
79 79
80 80
81 static void fix_processor_context(void) 81 static void fix_processor_context(void)
82 { 82 {
83 int cpu = smp_processor_id(); 83 int cpu = smp_processor_id();
84 struct tss_struct * t = &per_cpu(init_tss, cpu); 84 struct tss_struct * t = &per_cpu(init_tss, cpu);
85 85
86 set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ 86 set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
87 per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TSS].b &= 0xfffffdff; 87 per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TSS].b &= 0xfffffdff;
88 88
89 load_TR_desc(); /* This does ltr */ 89 load_TR_desc(); /* This does ltr */
90 load_LDT(&current->active_mm->context); /* This does lldt */ 90 load_LDT(&current->active_mm->context); /* This does lldt */
91 91
92 /* 92 /*
93 * Now maybe reload the debug registers 93 * Now maybe reload the debug registers
94 */ 94 */
95 if (current->thread.debugreg[7]){ 95 if (current->thread.debugreg[7]){
96 set_debugreg(current->thread.debugreg[0], 0); 96 set_debugreg(current->thread.debugreg[0], 0);
97 set_debugreg(current->thread.debugreg[1], 1); 97 set_debugreg(current->thread.debugreg[1], 1);
98 set_debugreg(current->thread.debugreg[2], 2); 98 set_debugreg(current->thread.debugreg[2], 2);
99 set_debugreg(current->thread.debugreg[3], 3); 99 set_debugreg(current->thread.debugreg[3], 3);
100 /* no 4 and 5 */ 100 /* no 4 and 5 */
101 set_debugreg(current->thread.debugreg[6], 6); 101 set_debugreg(current->thread.debugreg[6], 6);
102 set_debugreg(current->thread.debugreg[7], 7); 102 set_debugreg(current->thread.debugreg[7], 7);
103 } 103 }
104 104
105 } 105 }
106 106
107 void __restore_processor_state(struct saved_context *ctxt) 107 void __restore_processor_state(struct saved_context *ctxt)
108 { 108 {
109 /* 109 /*
110 * control registers 110 * control registers
111 */ 111 */
112 write_cr4(ctxt->cr4); 112 write_cr4(ctxt->cr4);
113 write_cr3(ctxt->cr3); 113 write_cr3(ctxt->cr3);
114 write_cr2(ctxt->cr2); 114 write_cr2(ctxt->cr2);
115 write_cr2(ctxt->cr0); 115 write_cr2(ctxt->cr0);
116 116
117 /* 117 /*
118 * now restore the descriptor tables to their proper values 118 * now restore the descriptor tables to their proper values
119 * ltr is done i fix_processor_context(). 119 * ltr is done i fix_processor_context().
120 */ 120 */
121 asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit)); 121 load_gdt(&ctxt->gdt_limit);
122 asm volatile ("lidt %0" :: "m" (ctxt->idt_limit)); 122 load_idt(&ctxt->idt_limit);
123 123
124 /* 124 /*
125 * segment registers 125 * segment registers
126 */ 126 */
127 asm volatile ("movw %0, %%es" :: "r" (ctxt->es)); 127 loadsegment(es, ctxt->es);
128 asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs)); 128 loadsegment(fs, ctxt->fs);
129 asm volatile ("movw %0, %%gs" :: "r" (ctxt->gs)); 129 loadsegment(gs, ctxt->gs);
130 asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss)); 130 loadsegment(ss, ctxt->ss);
131 131
132 /* 132 /*
133 * sysenter MSRs 133 * sysenter MSRs
134 */ 134 */
135 if (boot_cpu_has(X86_FEATURE_SEP)) 135 if (boot_cpu_has(X86_FEATURE_SEP))
136 enable_sep_cpu(); 136 enable_sep_cpu();
137 137
138 fix_processor_context(); 138 fix_processor_context();
139 do_fpu_end(); 139 do_fpu_end();
140 mtrr_ap_init(); 140 mtrr_ap_init();
141 } 141 }
142 142
143 void restore_processor_state(void) 143 void restore_processor_state(void)
144 { 144 {
145 __restore_processor_state(&saved_context); 145 __restore_processor_state(&saved_context);
146 } 146 }
147 147
148 /* Needed by apm.c */ 148 /* Needed by apm.c */
149 EXPORT_SYMBOL(save_processor_state); 149 EXPORT_SYMBOL(save_processor_state);
150 EXPORT_SYMBOL(restore_processor_state); 150 EXPORT_SYMBOL(restore_processor_state);
151 151
include/asm-i386/desc.h
1 #ifndef __ARCH_DESC_H 1 #ifndef __ARCH_DESC_H
2 #define __ARCH_DESC_H 2 #define __ARCH_DESC_H
3 3
4 #include <asm/ldt.h> 4 #include <asm/ldt.h>
5 #include <asm/segment.h> 5 #include <asm/segment.h>
6 6
7 #define CPU_16BIT_STACK_SIZE 1024 7 #define CPU_16BIT_STACK_SIZE 1024
8 8
9 #ifndef __ASSEMBLY__ 9 #ifndef __ASSEMBLY__
10 10
11 #include <linux/preempt.h> 11 #include <linux/preempt.h>
12 #include <linux/smp.h> 12 #include <linux/smp.h>
13 #include <linux/percpu.h> 13 #include <linux/percpu.h>
14 14
15 #include <asm/mmu.h> 15 #include <asm/mmu.h>
16 16
17 extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; 17 extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
18 DECLARE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]); 18 DECLARE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
19 19
20 DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); 20 DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
21 21
22 struct Xgt_desc_struct { 22 struct Xgt_desc_struct {
23 unsigned short size; 23 unsigned short size;
24 unsigned long address __attribute__((packed)); 24 unsigned long address __attribute__((packed));
25 unsigned short pad; 25 unsigned short pad;
26 } __attribute__ ((packed)); 26 } __attribute__ ((packed));
27 27
28 extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS]; 28 extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
29 29
30 #define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8)) 30 #define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
31 #define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8)) 31 #define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
32 32
33 #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
34 #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
35 #define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr))
36 #define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt))
37
38 #define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
39 #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
40 #define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
41 #define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
42
33 /* 43 /*
34 * This is the ldt that every process will get unless we need 44 * This is the ldt that every process will get unless we need
35 * something other than this. 45 * something other than this.
36 */ 46 */
37 extern struct desc_struct default_ldt[]; 47 extern struct desc_struct default_ldt[];
38 extern void set_intr_gate(unsigned int irq, void * addr); 48 extern void set_intr_gate(unsigned int irq, void * addr);
39 49
40 #define _set_tssldt_desc(n,addr,limit,type) \ 50 #define _set_tssldt_desc(n,addr,limit,type) \
41 __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \ 51 __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
42 "movw %%ax,2(%2)\n\t" \ 52 "movw %%ax,2(%2)\n\t" \
43 "rorl $16,%%eax\n\t" \ 53 "rorl $16,%%eax\n\t" \
44 "movb %%al,4(%2)\n\t" \ 54 "movb %%al,4(%2)\n\t" \
45 "movb %4,5(%2)\n\t" \ 55 "movb %4,5(%2)\n\t" \
46 "movb $0,6(%2)\n\t" \ 56 "movb $0,6(%2)\n\t" \
47 "movb %%ah,7(%2)\n\t" \ 57 "movb %%ah,7(%2)\n\t" \
48 "rorl $16,%%eax" \ 58 "rorl $16,%%eax" \
49 : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type)) 59 : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
50 60
51 static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr) 61 static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr)
52 { 62 {
53 _set_tssldt_desc(&per_cpu(cpu_gdt_table, cpu)[entry], (int)addr, 63 _set_tssldt_desc(&per_cpu(cpu_gdt_table, cpu)[entry], (int)addr,
54 offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89); 64 offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89);
55 } 65 }
56 66
57 #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) 67 #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
58 68
59 static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) 69 static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
60 { 70 {
61 _set_tssldt_desc(&per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82); 71 _set_tssldt_desc(&per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
62 } 72 }
63 73
64 #define LDT_entry_a(info) \ 74 #define LDT_entry_a(info) \
65 ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) 75 ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
66 76
67 #define LDT_entry_b(info) \ 77 #define LDT_entry_b(info) \
68 (((info)->base_addr & 0xff000000) | \ 78 (((info)->base_addr & 0xff000000) | \
69 (((info)->base_addr & 0x00ff0000) >> 16) | \ 79 (((info)->base_addr & 0x00ff0000) >> 16) | \
70 ((info)->limit & 0xf0000) | \ 80 ((info)->limit & 0xf0000) | \
71 (((info)->read_exec_only ^ 1) << 9) | \ 81 (((info)->read_exec_only ^ 1) << 9) | \
72 ((info)->contents << 10) | \ 82 ((info)->contents << 10) | \
73 (((info)->seg_not_present ^ 1) << 15) | \ 83 (((info)->seg_not_present ^ 1) << 15) | \
74 ((info)->seg_32bit << 22) | \ 84 ((info)->seg_32bit << 22) | \
75 ((info)->limit_in_pages << 23) | \ 85 ((info)->limit_in_pages << 23) | \
76 ((info)->useable << 20) | \ 86 ((info)->useable << 20) | \
77 0x7000) 87 0x7000)
78 88
79 #define LDT_empty(info) (\ 89 #define LDT_empty(info) (\
80 (info)->base_addr == 0 && \ 90 (info)->base_addr == 0 && \
81 (info)->limit == 0 && \ 91 (info)->limit == 0 && \
82 (info)->contents == 0 && \ 92 (info)->contents == 0 && \
83 (info)->read_exec_only == 1 && \ 93 (info)->read_exec_only == 1 && \
84 (info)->seg_32bit == 0 && \ 94 (info)->seg_32bit == 0 && \
85 (info)->limit_in_pages == 0 && \ 95 (info)->limit_in_pages == 0 && \
86 (info)->seg_not_present == 1 && \ 96 (info)->seg_not_present == 1 && \
87 (info)->useable == 0 ) 97 (info)->useable == 0 )
88 98
89 #if TLS_SIZE != 24 99 #if TLS_SIZE != 24
90 # error update this code. 100 # error update this code.
91 #endif 101 #endif
92 102
93 static inline void load_TLS(struct thread_struct *t, unsigned int cpu) 103 static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
94 { 104 {
95 #define C(i) per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] 105 #define C(i) per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
96 C(0); C(1); C(2); 106 C(0); C(1); C(2);
97 #undef C 107 #undef C
98 } 108 }
99 109
100 static inline void clear_LDT(void) 110 static inline void clear_LDT(void)
101 { 111 {
102 int cpu = get_cpu(); 112 int cpu = get_cpu();
103 113
104 set_ldt_desc(cpu, &default_ldt[0], 5); 114 set_ldt_desc(cpu, &default_ldt[0], 5);
105 load_LDT_desc(); 115 load_LDT_desc();
106 put_cpu(); 116 put_cpu();
107 } 117 }
108 118
109 /* 119 /*
110 * load one particular LDT into the current CPU 120 * load one particular LDT into the current CPU
111 */ 121 */
112 static inline void load_LDT_nolock(mm_context_t *pc, int cpu) 122 static inline void load_LDT_nolock(mm_context_t *pc, int cpu)
113 { 123 {
114 void *segments = pc->ldt; 124 void *segments = pc->ldt;
115 int count = pc->size; 125 int count = pc->size;
116 126
117 if (likely(!count)) { 127 if (likely(!count)) {
118 segments = &default_ldt[0]; 128 segments = &default_ldt[0];
119 count = 5; 129 count = 5;
120 } 130 }
121 131
122 set_ldt_desc(cpu, segments, count); 132 set_ldt_desc(cpu, segments, count);
123 load_LDT_desc(); 133 load_LDT_desc();
124 } 134 }
125 135
126 static inline void load_LDT(mm_context_t *pc) 136 static inline void load_LDT(mm_context_t *pc)
127 { 137 {
128 int cpu = get_cpu(); 138 int cpu = get_cpu();
129 load_LDT_nolock(pc, cpu); 139 load_LDT_nolock(pc, cpu);
130 put_cpu(); 140 put_cpu();
131 } 141 }
132 142
133 static inline unsigned long get_desc_base(unsigned long *desc) 143 static inline unsigned long get_desc_base(unsigned long *desc)
134 { 144 {
135 unsigned long base; 145 unsigned long base;
136 base = ((desc[0] >> 16) & 0x0000ffff) | 146 base = ((desc[0] >> 16) & 0x0000ffff) |
137 ((desc[1] << 16) & 0x00ff0000) | 147 ((desc[1] << 16) & 0x00ff0000) |
138 (desc[1] & 0xff000000); 148 (desc[1] & 0xff000000);
139 return base; 149 return base;
140 } 150 }
141 151
142 #endif /* !__ASSEMBLY__ */ 152 #endif /* !__ASSEMBLY__ */
143 153
144 #endif 154 #endif
145 155
include/asm-i386/system.h
1 #ifndef __ASM_SYSTEM_H 1 #ifndef __ASM_SYSTEM_H
2 #define __ASM_SYSTEM_H 2 #define __ASM_SYSTEM_H
3 3
4 #include <linux/config.h> 4 #include <linux/config.h>
5 #include <linux/kernel.h> 5 #include <linux/kernel.h>
6 #include <asm/segment.h> 6 #include <asm/segment.h>
7 #include <asm/cpufeature.h> 7 #include <asm/cpufeature.h>
8 #include <linux/bitops.h> /* for LOCK_PREFIX */ 8 #include <linux/bitops.h> /* for LOCK_PREFIX */
9 9
10 #ifdef __KERNEL__ 10 #ifdef __KERNEL__
11 11
12 struct task_struct; /* one of the stranger aspects of C forward declarations.. */ 12 struct task_struct; /* one of the stranger aspects of C forward declarations.. */
13 extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next)); 13 extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
14 14
15 #define switch_to(prev,next,last) do { \ 15 #define switch_to(prev,next,last) do { \
16 unsigned long esi,edi; \ 16 unsigned long esi,edi; \
17 asm volatile("pushfl\n\t" \ 17 asm volatile("pushfl\n\t" \
18 "pushl %%ebp\n\t" \ 18 "pushl %%ebp\n\t" \
19 "movl %%esp,%0\n\t" /* save ESP */ \ 19 "movl %%esp,%0\n\t" /* save ESP */ \
20 "movl %5,%%esp\n\t" /* restore ESP */ \ 20 "movl %5,%%esp\n\t" /* restore ESP */ \
21 "movl $1f,%1\n\t" /* save EIP */ \ 21 "movl $1f,%1\n\t" /* save EIP */ \
22 "pushl %6\n\t" /* restore EIP */ \ 22 "pushl %6\n\t" /* restore EIP */ \
23 "jmp __switch_to\n" \ 23 "jmp __switch_to\n" \
24 "1:\t" \ 24 "1:\t" \
25 "popl %%ebp\n\t" \ 25 "popl %%ebp\n\t" \
26 "popfl" \ 26 "popfl" \
27 :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \ 27 :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \
28 "=a" (last),"=S" (esi),"=D" (edi) \ 28 "=a" (last),"=S" (esi),"=D" (edi) \
29 :"m" (next->thread.esp),"m" (next->thread.eip), \ 29 :"m" (next->thread.esp),"m" (next->thread.eip), \
30 "2" (prev), "d" (next)); \ 30 "2" (prev), "d" (next)); \
31 } while (0) 31 } while (0)
32 32
33 #define _set_base(addr,base) do { unsigned long __pr; \ 33 #define _set_base(addr,base) do { unsigned long __pr; \
34 __asm__ __volatile__ ("movw %%dx,%1\n\t" \ 34 __asm__ __volatile__ ("movw %%dx,%1\n\t" \
35 "rorl $16,%%edx\n\t" \ 35 "rorl $16,%%edx\n\t" \
36 "movb %%dl,%2\n\t" \ 36 "movb %%dl,%2\n\t" \
37 "movb %%dh,%3" \ 37 "movb %%dh,%3" \
38 :"=&d" (__pr) \ 38 :"=&d" (__pr) \
39 :"m" (*((addr)+2)), \ 39 :"m" (*((addr)+2)), \
40 "m" (*((addr)+4)), \ 40 "m" (*((addr)+4)), \
41 "m" (*((addr)+7)), \ 41 "m" (*((addr)+7)), \
42 "0" (base) \ 42 "0" (base) \
43 ); } while(0) 43 ); } while(0)
44 44
45 #define _set_limit(addr,limit) do { unsigned long __lr; \ 45 #define _set_limit(addr,limit) do { unsigned long __lr; \
46 __asm__ __volatile__ ("movw %%dx,%1\n\t" \ 46 __asm__ __volatile__ ("movw %%dx,%1\n\t" \
47 "rorl $16,%%edx\n\t" \ 47 "rorl $16,%%edx\n\t" \
48 "movb %2,%%dh\n\t" \ 48 "movb %2,%%dh\n\t" \
49 "andb $0xf0,%%dh\n\t" \ 49 "andb $0xf0,%%dh\n\t" \
50 "orb %%dh,%%dl\n\t" \ 50 "orb %%dh,%%dl\n\t" \
51 "movb %%dl,%2" \ 51 "movb %%dl,%2" \
52 :"=&d" (__lr) \ 52 :"=&d" (__lr) \
53 :"m" (*(addr)), \ 53 :"m" (*(addr)), \
54 "m" (*((addr)+6)), \ 54 "m" (*((addr)+6)), \
55 "0" (limit) \ 55 "0" (limit) \
56 ); } while(0) 56 ); } while(0)
57 57
58 #define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) ) 58 #define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) )
59 #define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 ) 59 #define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 )
60 60
61 static inline unsigned long _get_base(char * addr) 61 static inline unsigned long _get_base(char * addr)
62 { 62 {
63 unsigned long __base; 63 unsigned long __base;
64 __asm__("movb %3,%%dh\n\t" 64 __asm__("movb %3,%%dh\n\t"
65 "movb %2,%%dl\n\t" 65 "movb %2,%%dl\n\t"
66 "shll $16,%%edx\n\t" 66 "shll $16,%%edx\n\t"
67 "movw %1,%%dx" 67 "movw %1,%%dx"
68 :"=&d" (__base) 68 :"=&d" (__base)
69 :"m" (*((addr)+2)), 69 :"m" (*((addr)+2)),
70 "m" (*((addr)+4)), 70 "m" (*((addr)+4)),
71 "m" (*((addr)+7))); 71 "m" (*((addr)+7)));
72 return __base; 72 return __base;
73 } 73 }
74 74
75 #define get_base(ldt) _get_base( ((char *)&(ldt)) ) 75 #define get_base(ldt) _get_base( ((char *)&(ldt)) )
76 76
77 /* 77 /*
78 * Load a segment. Fall back on loading the zero 78 * Load a segment. Fall back on loading the zero
79 * segment if something goes wrong.. 79 * segment if something goes wrong..
80 */ 80 */
81 #define loadsegment(seg,value) \ 81 #define loadsegment(seg,value) \
82 asm volatile("\n" \ 82 asm volatile("\n" \
83 "1:\t" \ 83 "1:\t" \
84 "mov %0,%%" #seg "\n" \ 84 "mov %0,%%" #seg "\n" \
85 "2:\n" \ 85 "2:\n" \
86 ".section .fixup,\"ax\"\n" \ 86 ".section .fixup,\"ax\"\n" \
87 "3:\t" \ 87 "3:\t" \
88 "pushl $0\n\t" \ 88 "pushl $0\n\t" \
89 "popl %%" #seg "\n\t" \ 89 "popl %%" #seg "\n\t" \
90 "jmp 2b\n" \ 90 "jmp 2b\n" \
91 ".previous\n" \ 91 ".previous\n" \
92 ".section __ex_table,\"a\"\n\t" \ 92 ".section __ex_table,\"a\"\n\t" \
93 ".align 4\n\t" \ 93 ".align 4\n\t" \
94 ".long 1b,3b\n" \ 94 ".long 1b,3b\n" \
95 ".previous" \ 95 ".previous" \
96 : :"m" (value)) 96 : :"rm" (value))
97 97
98 /* 98 /*
99 * Save a segment register away 99 * Save a segment register away
100 */ 100 */
101 #define savesegment(seg, value) \ 101 #define savesegment(seg, value) \
102 asm volatile("mov %%" #seg ",%0":"=m" (value)) 102 asm volatile("mov %%" #seg ",%0":"=rm" (value))
103 103
104 /* 104 /*
105 * Clear and set 'TS' bit respectively 105 * Clear and set 'TS' bit respectively
106 */ 106 */
107 #define clts() __asm__ __volatile__ ("clts") 107 #define clts() __asm__ __volatile__ ("clts")
108 #define read_cr0() ({ \ 108 #define read_cr0() ({ \
109 unsigned int __dummy; \ 109 unsigned int __dummy; \
110 __asm__ __volatile__( \ 110 __asm__ __volatile__( \
111 "movl %%cr0,%0\n\t" \ 111 "movl %%cr0,%0\n\t" \
112 :"=r" (__dummy)); \ 112 :"=r" (__dummy)); \
113 __dummy; \ 113 __dummy; \
114 }) 114 })
115 #define write_cr0(x) \ 115 #define write_cr0(x) \
116 __asm__ __volatile__("movl %0,%%cr0": :"r" (x)); 116 __asm__ __volatile__("movl %0,%%cr0": :"r" (x));
117 117
118 #define read_cr2() ({ \ 118 #define read_cr2() ({ \
119 unsigned int __dummy; \ 119 unsigned int __dummy; \
120 __asm__ __volatile__( \ 120 __asm__ __volatile__( \
121 "movl %%cr2,%0\n\t" \ 121 "movl %%cr2,%0\n\t" \
122 :"=r" (__dummy)); \ 122 :"=r" (__dummy)); \
123 __dummy; \ 123 __dummy; \
124 }) 124 })
125 #define write_cr2(x) \ 125 #define write_cr2(x) \
126 __asm__ __volatile__("movl %0,%%cr2": :"r" (x)); 126 __asm__ __volatile__("movl %0,%%cr2": :"r" (x));
127 127
128 #define read_cr3() ({ \ 128 #define read_cr3() ({ \
129 unsigned int __dummy; \ 129 unsigned int __dummy; \
130 __asm__ ( \ 130 __asm__ ( \
131 "movl %%cr3,%0\n\t" \ 131 "movl %%cr3,%0\n\t" \
132 :"=r" (__dummy)); \ 132 :"=r" (__dummy)); \
133 __dummy; \ 133 __dummy; \
134 }) 134 })
135 #define write_cr3(x) \ 135 #define write_cr3(x) \
136 __asm__ __volatile__("movl %0,%%cr3": :"r" (x)); 136 __asm__ __volatile__("movl %0,%%cr3": :"r" (x));
137 137
138 #define read_cr4() ({ \ 138 #define read_cr4() ({ \
139 unsigned int __dummy; \ 139 unsigned int __dummy; \
140 __asm__( \ 140 __asm__( \
141 "movl %%cr4,%0\n\t" \ 141 "movl %%cr4,%0\n\t" \
142 :"=r" (__dummy)); \ 142 :"=r" (__dummy)); \
143 __dummy; \ 143 __dummy; \
144 }) 144 })
145 #define write_cr4(x) \ 145 #define write_cr4(x) \
146 __asm__ __volatile__("movl %0,%%cr4": :"r" (x)); 146 __asm__ __volatile__("movl %0,%%cr4": :"r" (x));
147 #define stts() write_cr0(8 | read_cr0()) 147 #define stts() write_cr0(8 | read_cr0())
148 148
149 #endif /* __KERNEL__ */ 149 #endif /* __KERNEL__ */
150 150
151 #define wbinvd() \ 151 #define wbinvd() \
152 __asm__ __volatile__ ("wbinvd": : :"memory"); 152 __asm__ __volatile__ ("wbinvd": : :"memory");
153 153
154 static inline unsigned long get_limit(unsigned long segment) 154 static inline unsigned long get_limit(unsigned long segment)
155 { 155 {
156 unsigned long __limit; 156 unsigned long __limit;
157 __asm__("lsll %1,%0" 157 __asm__("lsll %1,%0"
158 :"=r" (__limit):"r" (segment)); 158 :"=r" (__limit):"r" (segment));
159 return __limit+1; 159 return __limit+1;
160 } 160 }
161 161
162 #define nop() __asm__ __volatile__ ("nop") 162 #define nop() __asm__ __volatile__ ("nop")
163 163
164 #define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) 164 #define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
165 165
166 #define tas(ptr) (xchg((ptr),1)) 166 #define tas(ptr) (xchg((ptr),1))
167 167
168 struct __xchg_dummy { unsigned long a[100]; }; 168 struct __xchg_dummy { unsigned long a[100]; };
169 #define __xg(x) ((struct __xchg_dummy *)(x)) 169 #define __xg(x) ((struct __xchg_dummy *)(x))
170 170
171 171
172 /* 172 /*
173 * The semantics of XCHGCMP8B are a bit strange, this is why 173 * The semantics of XCHGCMP8B are a bit strange, this is why
174 * there is a loop and the loading of %%eax and %%edx has to 174 * there is a loop and the loading of %%eax and %%edx has to
175 * be inside. This inlines well in most cases, the cached 175 * be inside. This inlines well in most cases, the cached
176 * cost is around ~38 cycles. (in the future we might want 176 * cost is around ~38 cycles. (in the future we might want
177 * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that 177 * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
178 * might have an implicit FPU-save as a cost, so it's not 178 * might have an implicit FPU-save as a cost, so it's not
179 * clear which path to go.) 179 * clear which path to go.)
180 * 180 *
181 * cmpxchg8b must be used with the lock prefix here to allow 181 * cmpxchg8b must be used with the lock prefix here to allow
182 * the instruction to be executed atomically, see page 3-102 182 * the instruction to be executed atomically, see page 3-102
183 * of the instruction set reference 24319102.pdf. We need 183 * of the instruction set reference 24319102.pdf. We need
184 * the reader side to see the coherent 64bit value. 184 * the reader side to see the coherent 64bit value.
185 */ 185 */
186 static inline void __set_64bit (unsigned long long * ptr, 186 static inline void __set_64bit (unsigned long long * ptr,
187 unsigned int low, unsigned int high) 187 unsigned int low, unsigned int high)
188 { 188 {
189 __asm__ __volatile__ ( 189 __asm__ __volatile__ (
190 "\n1:\t" 190 "\n1:\t"
191 "movl (%0), %%eax\n\t" 191 "movl (%0), %%eax\n\t"
192 "movl 4(%0), %%edx\n\t" 192 "movl 4(%0), %%edx\n\t"
193 "lock cmpxchg8b (%0)\n\t" 193 "lock cmpxchg8b (%0)\n\t"
194 "jnz 1b" 194 "jnz 1b"
195 : /* no outputs */ 195 : /* no outputs */
196 : "D"(ptr), 196 : "D"(ptr),
197 "b"(low), 197 "b"(low),
198 "c"(high) 198 "c"(high)
199 : "ax","dx","memory"); 199 : "ax","dx","memory");
200 } 200 }
201 201
202 static inline void __set_64bit_constant (unsigned long long *ptr, 202 static inline void __set_64bit_constant (unsigned long long *ptr,
203 unsigned long long value) 203 unsigned long long value)
204 { 204 {
205 __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL)); 205 __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL));
206 } 206 }
207 #define ll_low(x) *(((unsigned int*)&(x))+0) 207 #define ll_low(x) *(((unsigned int*)&(x))+0)
208 #define ll_high(x) *(((unsigned int*)&(x))+1) 208 #define ll_high(x) *(((unsigned int*)&(x))+1)
209 209
210 static inline void __set_64bit_var (unsigned long long *ptr, 210 static inline void __set_64bit_var (unsigned long long *ptr,
211 unsigned long long value) 211 unsigned long long value)
212 { 212 {
213 __set_64bit(ptr,ll_low(value), ll_high(value)); 213 __set_64bit(ptr,ll_low(value), ll_high(value));
214 } 214 }
215 215
216 #define set_64bit(ptr,value) \ 216 #define set_64bit(ptr,value) \
217 (__builtin_constant_p(value) ? \ 217 (__builtin_constant_p(value) ? \
218 __set_64bit_constant(ptr, value) : \ 218 __set_64bit_constant(ptr, value) : \
219 __set_64bit_var(ptr, value) ) 219 __set_64bit_var(ptr, value) )
220 220
221 #define _set_64bit(ptr,value) \ 221 #define _set_64bit(ptr,value) \
222 (__builtin_constant_p(value) ? \ 222 (__builtin_constant_p(value) ? \
223 __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \ 223 __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
224 __set_64bit(ptr, ll_low(value), ll_high(value)) ) 224 __set_64bit(ptr, ll_low(value), ll_high(value)) )
225 225
226 /* 226 /*
227 * Note: no "lock" prefix even on SMP: xchg always implies lock anyway 227 * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
228 * Note 2: xchg has side effect, so that attribute volatile is necessary, 228 * Note 2: xchg has side effect, so that attribute volatile is necessary,
229 * but generally the primitive is invalid, *ptr is output argument. --ANK 229 * but generally the primitive is invalid, *ptr is output argument. --ANK
230 */ 230 */
231 static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) 231 static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
232 { 232 {
233 switch (size) { 233 switch (size) {
234 case 1: 234 case 1:
235 __asm__ __volatile__("xchgb %b0,%1" 235 __asm__ __volatile__("xchgb %b0,%1"
236 :"=q" (x) 236 :"=q" (x)
237 :"m" (*__xg(ptr)), "0" (x) 237 :"m" (*__xg(ptr)), "0" (x)
238 :"memory"); 238 :"memory");
239 break; 239 break;
240 case 2: 240 case 2:
241 __asm__ __volatile__("xchgw %w0,%1" 241 __asm__ __volatile__("xchgw %w0,%1"
242 :"=r" (x) 242 :"=r" (x)
243 :"m" (*__xg(ptr)), "0" (x) 243 :"m" (*__xg(ptr)), "0" (x)
244 :"memory"); 244 :"memory");
245 break; 245 break;
246 case 4: 246 case 4:
247 __asm__ __volatile__("xchgl %0,%1" 247 __asm__ __volatile__("xchgl %0,%1"
248 :"=r" (x) 248 :"=r" (x)
249 :"m" (*__xg(ptr)), "0" (x) 249 :"m" (*__xg(ptr)), "0" (x)
250 :"memory"); 250 :"memory");
251 break; 251 break;
252 } 252 }
253 return x; 253 return x;
254 } 254 }
255 255
256 /* 256 /*
257 * Atomic compare and exchange. Compare OLD with MEM, if identical, 257 * Atomic compare and exchange. Compare OLD with MEM, if identical,
258 * store NEW in MEM. Return the initial value in MEM. Success is 258 * store NEW in MEM. Return the initial value in MEM. Success is
259 * indicated by comparing RETURN with OLD. 259 * indicated by comparing RETURN with OLD.
260 */ 260 */
261 261
262 #ifdef CONFIG_X86_CMPXCHG 262 #ifdef CONFIG_X86_CMPXCHG
263 #define __HAVE_ARCH_CMPXCHG 1 263 #define __HAVE_ARCH_CMPXCHG 1
264 #endif 264 #endif
265 265
266 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, 266 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
267 unsigned long new, int size) 267 unsigned long new, int size)
268 { 268 {
269 unsigned long prev; 269 unsigned long prev;
270 switch (size) { 270 switch (size) {
271 case 1: 271 case 1:
272 __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" 272 __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
273 : "=a"(prev) 273 : "=a"(prev)
274 : "q"(new), "m"(*__xg(ptr)), "0"(old) 274 : "q"(new), "m"(*__xg(ptr)), "0"(old)
275 : "memory"); 275 : "memory");
276 return prev; 276 return prev;
277 case 2: 277 case 2:
278 __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" 278 __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
279 : "=a"(prev) 279 : "=a"(prev)
280 : "q"(new), "m"(*__xg(ptr)), "0"(old) 280 : "q"(new), "m"(*__xg(ptr)), "0"(old)
281 : "memory"); 281 : "memory");
282 return prev; 282 return prev;
283 case 4: 283 case 4:
284 __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" 284 __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
285 : "=a"(prev) 285 : "=a"(prev)
286 : "q"(new), "m"(*__xg(ptr)), "0"(old) 286 : "q"(new), "m"(*__xg(ptr)), "0"(old)
287 : "memory"); 287 : "memory");
288 return prev; 288 return prev;
289 } 289 }
290 return old; 290 return old;
291 } 291 }
292 292
293 #define cmpxchg(ptr,o,n)\ 293 #define cmpxchg(ptr,o,n)\
294 ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ 294 ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
295 (unsigned long)(n),sizeof(*(ptr)))) 295 (unsigned long)(n),sizeof(*(ptr))))
296 296
297 #ifdef __KERNEL__ 297 #ifdef __KERNEL__
298 struct alt_instr { 298 struct alt_instr {
299 __u8 *instr; /* original instruction */ 299 __u8 *instr; /* original instruction */
300 __u8 *replacement; 300 __u8 *replacement;
301 __u8 cpuid; /* cpuid bit set for replacement */ 301 __u8 cpuid; /* cpuid bit set for replacement */
302 __u8 instrlen; /* length of original instruction */ 302 __u8 instrlen; /* length of original instruction */
303 __u8 replacementlen; /* length of new instruction, <= instrlen */ 303 __u8 replacementlen; /* length of new instruction, <= instrlen */
304 __u8 pad; 304 __u8 pad;
305 }; 305 };
306 #endif 306 #endif
307 307
308 /* 308 /*
309 * Alternative instructions for different CPU types or capabilities. 309 * Alternative instructions for different CPU types or capabilities.
310 * 310 *
311 * This allows to use optimized instructions even on generic binary 311 * This allows to use optimized instructions even on generic binary
312 * kernels. 312 * kernels.
313 * 313 *
314 * length of oldinstr must be longer or equal the length of newinstr 314 * length of oldinstr must be longer or equal the length of newinstr
315 * It can be padded with nops as needed. 315 * It can be padded with nops as needed.
316 * 316 *
317 * For non barrier like inlines please define new variants 317 * For non barrier like inlines please define new variants
318 * without volatile and memory clobber. 318 * without volatile and memory clobber.
319 */ 319 */
320 #define alternative(oldinstr, newinstr, feature) \ 320 #define alternative(oldinstr, newinstr, feature) \
321 asm volatile ("661:\n\t" oldinstr "\n662:\n" \ 321 asm volatile ("661:\n\t" oldinstr "\n662:\n" \
322 ".section .altinstructions,\"a\"\n" \ 322 ".section .altinstructions,\"a\"\n" \
323 " .align 4\n" \ 323 " .align 4\n" \
324 " .long 661b\n" /* label */ \ 324 " .long 661b\n" /* label */ \
325 " .long 663f\n" /* new instruction */ \ 325 " .long 663f\n" /* new instruction */ \
326 " .byte %c0\n" /* feature bit */ \ 326 " .byte %c0\n" /* feature bit */ \
327 " .byte 662b-661b\n" /* sourcelen */ \ 327 " .byte 662b-661b\n" /* sourcelen */ \
328 " .byte 664f-663f\n" /* replacementlen */ \ 328 " .byte 664f-663f\n" /* replacementlen */ \
329 ".previous\n" \ 329 ".previous\n" \
330 ".section .altinstr_replacement,\"ax\"\n" \ 330 ".section .altinstr_replacement,\"ax\"\n" \
331 "663:\n\t" newinstr "\n664:\n" /* replacement */ \ 331 "663:\n\t" newinstr "\n664:\n" /* replacement */ \
332 ".previous" :: "i" (feature) : "memory") 332 ".previous" :: "i" (feature) : "memory")
333 333
334 /* 334 /*
335 * Alternative inline assembly with input. 335 * Alternative inline assembly with input.
336 * 336 *
337 * Pecularities: 337 * Pecularities:
338 * No memory clobber here. 338 * No memory clobber here.
339 * Argument numbers start with 1. 339 * Argument numbers start with 1.
340 * Best is to use constraints that are fixed size (like (%1) ... "r") 340 * Best is to use constraints that are fixed size (like (%1) ... "r")
341 * If you use variable sized constraints like "m" or "g" in the 341 * If you use variable sized constraints like "m" or "g" in the
342 * replacement maake sure to pad to the worst case length. 342 * replacement maake sure to pad to the worst case length.
343 */ 343 */
344 #define alternative_input(oldinstr, newinstr, feature, input...) \ 344 #define alternative_input(oldinstr, newinstr, feature, input...) \
345 asm volatile ("661:\n\t" oldinstr "\n662:\n" \ 345 asm volatile ("661:\n\t" oldinstr "\n662:\n" \
346 ".section .altinstructions,\"a\"\n" \ 346 ".section .altinstructions,\"a\"\n" \
347 " .align 4\n" \ 347 " .align 4\n" \
348 " .long 661b\n" /* label */ \ 348 " .long 661b\n" /* label */ \
349 " .long 663f\n" /* new instruction */ \ 349 " .long 663f\n" /* new instruction */ \
350 " .byte %c0\n" /* feature bit */ \ 350 " .byte %c0\n" /* feature bit */ \
351 " .byte 662b-661b\n" /* sourcelen */ \ 351 " .byte 662b-661b\n" /* sourcelen */ \
352 " .byte 664f-663f\n" /* replacementlen */ \ 352 " .byte 664f-663f\n" /* replacementlen */ \
353 ".previous\n" \ 353 ".previous\n" \
354 ".section .altinstr_replacement,\"ax\"\n" \ 354 ".section .altinstr_replacement,\"ax\"\n" \
355 "663:\n\t" newinstr "\n664:\n" /* replacement */ \ 355 "663:\n\t" newinstr "\n664:\n" /* replacement */ \
356 ".previous" :: "i" (feature), ##input) 356 ".previous" :: "i" (feature), ##input)
357 357
358 /* 358 /*
359 * Force strict CPU ordering. 359 * Force strict CPU ordering.
360 * And yes, this is required on UP too when we're talking 360 * And yes, this is required on UP too when we're talking
361 * to devices. 361 * to devices.
362 * 362 *
363 * For now, "wmb()" doesn't actually do anything, as all 363 * For now, "wmb()" doesn't actually do anything, as all
364 * Intel CPU's follow what Intel calls a *Processor Order*, 364 * Intel CPU's follow what Intel calls a *Processor Order*,
365 * in which all writes are seen in the program order even 365 * in which all writes are seen in the program order even
366 * outside the CPU. 366 * outside the CPU.
367 * 367 *
368 * I expect future Intel CPU's to have a weaker ordering, 368 * I expect future Intel CPU's to have a weaker ordering,
369 * but I'd also expect them to finally get their act together 369 * but I'd also expect them to finally get their act together
370 * and add some real memory barriers if so. 370 * and add some real memory barriers if so.
371 * 371 *
372 * Some non intel clones support out of order store. wmb() ceases to be a 372 * Some non intel clones support out of order store. wmb() ceases to be a
373 * nop for these. 373 * nop for these.
374 */ 374 */
375 375
376 376
377 /* 377 /*
378 * Actually only lfence would be needed for mb() because all stores done 378 * Actually only lfence would be needed for mb() because all stores done
379 * by the kernel should be already ordered. But keep a full barrier for now. 379 * by the kernel should be already ordered. But keep a full barrier for now.
380 */ 380 */
381 381
382 #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) 382 #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
383 #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) 383 #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
384 384
385 /** 385 /**
386 * read_barrier_depends - Flush all pending reads that subsequents reads 386 * read_barrier_depends - Flush all pending reads that subsequents reads
387 * depend on. 387 * depend on.
388 * 388 *
389 * No data-dependent reads from memory-like regions are ever reordered 389 * No data-dependent reads from memory-like regions are ever reordered
390 * over this barrier. All reads preceding this primitive are guaranteed 390 * over this barrier. All reads preceding this primitive are guaranteed
391 * to access memory (but not necessarily other CPUs' caches) before any 391 * to access memory (but not necessarily other CPUs' caches) before any
392 * reads following this primitive that depend on the data return by 392 * reads following this primitive that depend on the data return by
393 * any of the preceding reads. This primitive is much lighter weight than 393 * any of the preceding reads. This primitive is much lighter weight than
394 * rmb() on most CPUs, and is never heavier weight than is 394 * rmb() on most CPUs, and is never heavier weight than is
395 * rmb(). 395 * rmb().
396 * 396 *
397 * These ordering constraints are respected by both the local CPU 397 * These ordering constraints are respected by both the local CPU
398 * and the compiler. 398 * and the compiler.
399 * 399 *
400 * Ordering is not guaranteed by anything other than these primitives, 400 * Ordering is not guaranteed by anything other than these primitives,
401 * not even by data dependencies. See the documentation for 401 * not even by data dependencies. See the documentation for
402 * memory_barrier() for examples and URLs to more information. 402 * memory_barrier() for examples and URLs to more information.
403 * 403 *
404 * For example, the following code would force ordering (the initial 404 * For example, the following code would force ordering (the initial
405 * value of "a" is zero, "b" is one, and "p" is "&a"): 405 * value of "a" is zero, "b" is one, and "p" is "&a"):
406 * 406 *
407 * <programlisting> 407 * <programlisting>
408 * CPU 0 CPU 1 408 * CPU 0 CPU 1
409 * 409 *
410 * b = 2; 410 * b = 2;
411 * memory_barrier(); 411 * memory_barrier();
412 * p = &b; q = p; 412 * p = &b; q = p;
413 * read_barrier_depends(); 413 * read_barrier_depends();
414 * d = *q; 414 * d = *q;
415 * </programlisting> 415 * </programlisting>
416 * 416 *
417 * because the read of "*q" depends on the read of "p" and these 417 * because the read of "*q" depends on the read of "p" and these
418 * two reads are separated by a read_barrier_depends(). However, 418 * two reads are separated by a read_barrier_depends(). However,
419 * the following code, with the same initial values for "a" and "b": 419 * the following code, with the same initial values for "a" and "b":
420 * 420 *
421 * <programlisting> 421 * <programlisting>
422 * CPU 0 CPU 1 422 * CPU 0 CPU 1
423 * 423 *
424 * a = 2; 424 * a = 2;
425 * memory_barrier(); 425 * memory_barrier();
426 * b = 3; y = b; 426 * b = 3; y = b;
427 * read_barrier_depends(); 427 * read_barrier_depends();
428 * x = a; 428 * x = a;
429 * </programlisting> 429 * </programlisting>
430 * 430 *
431 * does not enforce ordering, since there is no data dependency between 431 * does not enforce ordering, since there is no data dependency between
432 * the read of "a" and the read of "b". Therefore, on some CPUs, such 432 * the read of "a" and the read of "b". Therefore, on some CPUs, such
433 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() 433 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
434 * in cases like thiswhere there are no data dependencies. 434 * in cases like thiswhere there are no data dependencies.
435 **/ 435 **/
436 436
437 #define read_barrier_depends() do { } while(0) 437 #define read_barrier_depends() do { } while(0)
438 438
439 #ifdef CONFIG_X86_OOSTORE 439 #ifdef CONFIG_X86_OOSTORE
440 /* Actually there are no OOO store capable CPUs for now that do SSE, 440 /* Actually there are no OOO store capable CPUs for now that do SSE,
441 but make it already an possibility. */ 441 but make it already an possibility. */
442 #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) 442 #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
443 #else 443 #else
444 #define wmb() __asm__ __volatile__ ("": : :"memory") 444 #define wmb() __asm__ __volatile__ ("": : :"memory")
445 #endif 445 #endif
446 446
447 #ifdef CONFIG_SMP 447 #ifdef CONFIG_SMP
448 #define smp_mb() mb() 448 #define smp_mb() mb()
449 #define smp_rmb() rmb() 449 #define smp_rmb() rmb()
450 #define smp_wmb() wmb() 450 #define smp_wmb() wmb()
451 #define smp_read_barrier_depends() read_barrier_depends() 451 #define smp_read_barrier_depends() read_barrier_depends()
452 #define set_mb(var, value) do { xchg(&var, value); } while (0) 452 #define set_mb(var, value) do { xchg(&var, value); } while (0)
453 #else 453 #else
454 #define smp_mb() barrier() 454 #define smp_mb() barrier()
455 #define smp_rmb() barrier() 455 #define smp_rmb() barrier()
456 #define smp_wmb() barrier() 456 #define smp_wmb() barrier()
457 #define smp_read_barrier_depends() do { } while(0) 457 #define smp_read_barrier_depends() do { } while(0)
458 #define set_mb(var, value) do { var = value; barrier(); } while (0) 458 #define set_mb(var, value) do { var = value; barrier(); } while (0)
459 #endif 459 #endif
460 460
461 #define set_wmb(var, value) do { var = value; wmb(); } while (0) 461 #define set_wmb(var, value) do { var = value; wmb(); } while (0)
462 462
463 /* interrupt control.. */ 463 /* interrupt control.. */
464 #define local_save_flags(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0) 464 #define local_save_flags(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0)
465 #define local_irq_restore(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0) 465 #define local_irq_restore(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0)
466 #define local_irq_disable() __asm__ __volatile__("cli": : :"memory") 466 #define local_irq_disable() __asm__ __volatile__("cli": : :"memory")
467 #define local_irq_enable() __asm__ __volatile__("sti": : :"memory") 467 #define local_irq_enable() __asm__ __volatile__("sti": : :"memory")
468 /* used in the idle loop; sti takes one instruction cycle to complete */ 468 /* used in the idle loop; sti takes one instruction cycle to complete */
469 #define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") 469 #define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
470 /* used when interrupts are already enabled or to shutdown the processor */ 470 /* used when interrupts are already enabled or to shutdown the processor */
471 #define halt() __asm__ __volatile__("hlt": : :"memory") 471 #define halt() __asm__ __volatile__("hlt": : :"memory")
472 472
473 #define irqs_disabled() \ 473 #define irqs_disabled() \
474 ({ \ 474 ({ \
475 unsigned long flags; \ 475 unsigned long flags; \
476 local_save_flags(flags); \ 476 local_save_flags(flags); \
477 !(flags & (1<<9)); \ 477 !(flags & (1<<9)); \
478 }) 478 })
479 479
480 /* For spinlocks etc */ 480 /* For spinlocks etc */
481 #define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory") 481 #define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
482 482
483 /* 483 /*
484 * disable hlt during certain critical i/o operations 484 * disable hlt during certain critical i/o operations
485 */ 485 */
486 #define HAVE_DISABLE_HLT 486 #define HAVE_DISABLE_HLT
487 void disable_hlt(void); 487 void disable_hlt(void);
488 void enable_hlt(void); 488 void enable_hlt(void);
489 489
490 extern int es7000_plat; 490 extern int es7000_plat;
491 void cpu_idle_wait(void); 491 void cpu_idle_wait(void);
492 492
493 extern unsigned long arch_align_stack(unsigned long sp); 493 extern unsigned long arch_align_stack(unsigned long sp);
494 494
495 #endif 495 #endif
496 496