Commit 4bb0d3ec3e5b1e9e2399cdc641b3b6521ac9cdaa

Authored by Zachary Amsden
Committed by Linus Torvalds
1 parent 2a0694d15d

[PATCH] i386: inline asm cleanup

i386 Inline asm cleanup.  Use cr/dr accessor functions.

Also, a potential bugfix.  Also, some CR accessors really should be volatile.
Reads from CR0 (numeric state may change in an exception handler), writes to
CR4 (flipping CR4.TSD) and reads from CR2 (page fault) prevent instruction
re-ordering.  I did not add memory clobber to CR3 / CR4 / CR0 updates, as it
was not there to begin with, and in no case should kernel memory be clobbered,
except when doing a TLB flush, which already has memory clobber.

I noticed that page invalidation does not have a memory clobber.  I can't find
a bug as a result, but there is definitely a potential for a bug here:

#define __flush_tlb_single(addr) \
	__asm__ __volatile__("invlpg %0": :"m" (*(char *) addr))

Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 15 changed files with 84 additions and 83 deletions Side-by-side Diff

arch/i386/kernel/cpu/common.c
... ... @@ -642,12 +642,12 @@
642 642 asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
643 643  
644 644 /* Clear all 6 debug registers: */
645   -
646   -#define CD(register) set_debugreg(0, register)
647   -
648   - CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
649   -
650   -#undef CD
  645 + set_debugreg(0, 0);
  646 + set_debugreg(0, 1);
  647 + set_debugreg(0, 2);
  648 + set_debugreg(0, 3);
  649 + set_debugreg(0, 6);
  650 + set_debugreg(0, 7);
651 651  
652 652 /*
653 653 * Force FPU initialization:
arch/i386/kernel/cpu/cpufreq/longhaul.c
... ... @@ -64,8 +64,6 @@
64 64 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
65 65  
66 66  
67   -#define __hlt() __asm__ __volatile__("hlt": : :"memory")
68   -
69 67 /* Clock ratios multiplied by 10 */
70 68 static int clock_ratio[32];
71 69 static int eblcr_table[32];
72 70  
... ... @@ -168,11 +166,9 @@
168 166 outb(0xFE,0x21); /* TMR0 only */
169 167 outb(0xFF,0x80); /* delay */
170 168  
171   - local_irq_enable();
172   -
173   - __hlt();
  169 + safe_halt();
174 170 wrmsrl(MSR_VIA_LONGHAUL, longhaul->val);
175   - __hlt();
  171 + halt();
176 172  
177 173 local_irq_disable();
178 174  
... ... @@ -251,9 +247,7 @@
251 247 bcr2.bits.CLOCKMUL = clock_ratio_index;
252 248 local_irq_disable();
253 249 wrmsrl (MSR_VIA_BCR2, bcr2.val);
254   - local_irq_enable();
255   -
256   - __hlt();
  250 + safe_halt();
257 251  
258 252 /* Disable software clock multiplier */
259 253 rdmsrl (MSR_VIA_BCR2, bcr2.val);
arch/i386/kernel/cpu/cyrix.c
... ... @@ -132,11 +132,7 @@
132 132 setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
133 133 /* set 'Not Write-through' */
134 134 cr0 = 0x20000000;
135   - __asm__("movl %%cr0,%%eax\n\t"
136   - "orl %0,%%eax\n\t"
137   - "movl %%eax,%%cr0\n"
138   - : : "r" (cr0)
139   - :"ax");
  135 + write_cr0(read_cr0() | cr0);
140 136 /* CCR2 bit 2: lock NW bit and set WT1 */
141 137 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
142 138 }
arch/i386/kernel/efi.c
... ... @@ -79,7 +79,7 @@
79 79 * directory. If I have PSE, I just need to duplicate one entry in
80 80 * page directory.
81 81 */
82   - __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4));
  82 + cr4 = read_cr4();
83 83  
84 84 if (cr4 & X86_CR4_PSE) {
85 85 efi_bak_pg_dir_pointer[0].pgd =
... ... @@ -115,7 +115,7 @@
115 115 cpu_gdt_descr[0].address =
116 116 (unsigned long) __va(cpu_gdt_descr[0].address);
117 117 __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr));
118   - __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4));
  118 + cr4 = read_cr4();
119 119  
120 120 if (cr4 & X86_CR4_PSE) {
121 121 swapper_pg_dir[pgd_index(0)].pgd =
arch/i386/kernel/machine_kexec.c
... ... @@ -17,13 +17,7 @@
17 17 #include <asm/apic.h>
18 18 #include <asm/cpufeature.h>
19 19 #include <asm/desc.h>
20   -
21   -static inline unsigned long read_cr3(void)
22   -{
23   - unsigned long cr3;
24   - asm volatile("movl %%cr3,%0": "=r"(cr3));
25   - return cr3;
26   -}
  20 +#include <asm/system.h>
27 21  
28 22 #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
29 23  
arch/i386/kernel/process.c
... ... @@ -313,16 +313,12 @@
313 313 printk(" DS: %04x ES: %04x\n",
314 314 0xffff & regs->xds,0xffff & regs->xes);
315 315  
316   - __asm__("movl %%cr0, %0": "=r" (cr0));
317   - __asm__("movl %%cr2, %0": "=r" (cr2));
318   - __asm__("movl %%cr3, %0": "=r" (cr3));
319   - /* This could fault if %cr4 does not exist */
320   - __asm__("1: movl %%cr4, %0 \n"
321   - "2: \n"
322   - ".section __ex_table,\"a\" \n"
323   - ".long 1b,2b \n"
324   - ".previous \n"
325   - : "=r" (cr4): "0" (0));
  316 + cr0 = read_cr0();
  317 + cr2 = read_cr2();
  318 + cr3 = read_cr3();
  319 + if (current_cpu_data.x86 > 4) {
  320 + cr4 = read_cr4();
  321 + }
326 322 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
327 323 show_trace(NULL, &regs->esp);
328 324 }
arch/i386/kernel/smp.c
... ... @@ -576,7 +576,7 @@
576 576 local_irq_disable();
577 577 disable_local_APIC();
578 578 if (cpu_data[smp_processor_id()].hlt_works_ok)
579   - for(;;) __asm__("hlt");
  579 + for(;;) halt();
580 580 for (;;);
581 581 }
582 582  
arch/i386/mm/fault.c
... ... @@ -233,7 +233,7 @@
233 233 int write, si_code;
234 234  
235 235 /* get the address */
236   - __asm__("movl %%cr2,%0":"=r" (address));
  236 + address = read_cr2();
237 237  
238 238 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
239 239 SIGSEGV) == NOTIFY_STOP)
... ... @@ -453,7 +453,7 @@
453 453 printk(" at virtual address %08lx\n",address);
454 454 printk(KERN_ALERT " printing eip:\n");
455 455 printk("%08lx\n", regs->eip);
456   - asm("movl %%cr3,%0":"=r" (page));
  456 + page = read_cr3();
457 457 page = ((unsigned long *) __va(page))[address >> 22];
458 458 printk(KERN_ALERT "*pde = %08lx\n", page);
459 459 /*
... ... @@ -526,7 +526,7 @@
526 526 pmd_t *pmd, *pmd_k;
527 527 pte_t *pte_k;
528 528  
529   - asm("movl %%cr3,%0":"=r" (pgd_paddr));
  529 + pgd_paddr = read_cr3();
530 530 pgd = index + (pgd_t *)__va(pgd_paddr);
531 531 pgd_k = init_mm.pgd + index;
532 532  
arch/i386/mm/pageattr.c
... ... @@ -62,7 +62,7 @@
62 62 {
63 63 /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */
64 64 if (boot_cpu_data.x86_model >= 4)
65   - asm volatile("wbinvd":::"memory");
  65 + wbinvd();
66 66 /* Flush all to work around Errata in early athlons regarding
67 67 * large page flushing.
68 68 */
arch/i386/power/cpu.c
... ... @@ -57,10 +57,10 @@
57 57 /*
58 58 * control registers
59 59 */
60   - asm volatile ("movl %%cr0, %0" : "=r" (ctxt->cr0));
61   - asm volatile ("movl %%cr2, %0" : "=r" (ctxt->cr2));
62   - asm volatile ("movl %%cr3, %0" : "=r" (ctxt->cr3));
63   - asm volatile ("movl %%cr4, %0" : "=r" (ctxt->cr4));
  60 + ctxt->cr0 = read_cr0();
  61 + ctxt->cr2 = read_cr2();
  62 + ctxt->cr3 = read_cr3();
  63 + ctxt->cr4 = read_cr4();
64 64 }
65 65  
66 66 void save_processor_state(void)
... ... @@ -109,10 +109,10 @@
109 109 /*
110 110 * control registers
111 111 */
112   - asm volatile ("movl %0, %%cr4" :: "r" (ctxt->cr4));
113   - asm volatile ("movl %0, %%cr3" :: "r" (ctxt->cr3));
114   - asm volatile ("movl %0, %%cr2" :: "r" (ctxt->cr2));
115   - asm volatile ("movl %0, %%cr0" :: "r" (ctxt->cr0));
  112 + write_cr4(ctxt->cr4);
  113 + write_cr3(ctxt->cr3);
  114 + write_cr2(ctxt->cr2);
  115 + write_cr2(ctxt->cr0);
116 116  
117 117 /*
118 118 * now restore the descriptor tables to their proper values
include/asm-i386/agp.h
... ... @@ -19,7 +19,7 @@
19 19 /* Could use CLFLUSH here if the cpu supports it. But then it would
20 20 need to be called for each cacheline of the whole page so it may not be
21 21 worth it. Would need a page for it. */
22   -#define flush_agp_cache() asm volatile("wbinvd":::"memory")
  22 +#define flush_agp_cache() wbinvd()
23 23  
24 24 /* Convert a physical address to an address suitable for the GART. */
25 25 #define phys_to_gart(x) (x)
include/asm-i386/bugs.h
... ... @@ -118,7 +118,10 @@
118 118 printk("disabled\n");
119 119 return;
120 120 }
121   - __asm__ __volatile__("hlt ; hlt ; hlt ; hlt");
  121 + halt();
  122 + halt();
  123 + halt();
  124 + halt();
122 125 printk("OK.\n");
123 126 }
124 127  
include/asm-i386/processor.h
... ... @@ -203,10 +203,8 @@
203 203 return edx;
204 204 }
205 205  
206   -#define load_cr3(pgdir) \
207   - asm volatile("movl %0,%%cr3": :"r" (__pa(pgdir)))
  206 +#define load_cr3(pgdir) write_cr3(__pa(pgdir))
208 207  
209   -
210 208 /*
211 209 * Intel CPU features in CR4
212 210 */
213 211  
214 212  
215 213  
... ... @@ -232,22 +230,20 @@
232 230  
233 231 static inline void set_in_cr4 (unsigned long mask)
234 232 {
  233 + unsigned cr4;
235 234 mmu_cr4_features |= mask;
236   - __asm__("movl %%cr4,%%eax\n\t"
237   - "orl %0,%%eax\n\t"
238   - "movl %%eax,%%cr4\n"
239   - : : "irg" (mask)
240   - :"ax");
  235 + cr4 = read_cr4();
  236 + cr4 |= mask;
  237 + write_cr4(cr4);
241 238 }
242 239  
243 240 static inline void clear_in_cr4 (unsigned long mask)
244 241 {
  242 + unsigned cr4;
245 243 mmu_cr4_features &= ~mask;
246   - __asm__("movl %%cr4,%%eax\n\t"
247   - "andl %0,%%eax\n\t"
248   - "movl %%eax,%%cr4\n"
249   - : : "irg" (~mask)
250   - :"ax");
  244 + cr4 = read_cr4();
  245 + cr4 &= ~mask;
  246 + write_cr4(cr4);
251 247 }
252 248  
253 249 /*
include/asm-i386/system.h
... ... @@ -107,14 +107,34 @@
107 107 #define clts() __asm__ __volatile__ ("clts")
108 108 #define read_cr0() ({ \
109 109 unsigned int __dummy; \
110   - __asm__( \
  110 + __asm__ __volatile__( \
111 111 "movl %%cr0,%0\n\t" \
112 112 :"=r" (__dummy)); \
113 113 __dummy; \
114 114 })
115 115 #define write_cr0(x) \
116   - __asm__("movl %0,%%cr0": :"r" (x));
  116 + __asm__ __volatile__("movl %0,%%cr0": :"r" (x));
117 117  
  118 +#define read_cr2() ({ \
  119 + unsigned int __dummy; \
  120 + __asm__ __volatile__( \
  121 + "movl %%cr2,%0\n\t" \
  122 + :"=r" (__dummy)); \
  123 + __dummy; \
  124 +})
  125 +#define write_cr2(x) \
  126 + __asm__ __volatile__("movl %0,%%cr2": :"r" (x));
  127 +
  128 +#define read_cr3() ({ \
  129 + unsigned int __dummy; \
  130 + __asm__ ( \
  131 + "movl %%cr3,%0\n\t" \
  132 + :"=r" (__dummy)); \
  133 + __dummy; \
  134 +})
  135 +#define write_cr3(x) \
  136 + __asm__ __volatile__("movl %0,%%cr3": :"r" (x));
  137 +
118 138 #define read_cr4() ({ \
119 139 unsigned int __dummy; \
120 140 __asm__( \
... ... @@ -123,7 +143,7 @@
123 143 __dummy; \
124 144 })
125 145 #define write_cr4(x) \
126   - __asm__("movl %0,%%cr4": :"r" (x));
  146 + __asm__ __volatile__("movl %0,%%cr4": :"r" (x));
127 147 #define stts() write_cr0(8 | read_cr0())
128 148  
129 149 #endif /* __KERNEL__ */
... ... @@ -447,6 +467,8 @@
447 467 #define local_irq_enable() __asm__ __volatile__("sti": : :"memory")
448 468 /* used in the idle loop; sti takes one instruction cycle to complete */
449 469 #define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
  470 +/* used when interrupts are already enabled or to shutdown the processor */
  471 +#define halt() __asm__ __volatile__("hlt": : :"memory")
450 472  
451 473 #define irqs_disabled() \
452 474 ({ \
include/asm-i386/xor.h
... ... @@ -535,14 +535,14 @@
535 535  
536 536 #define XMMS_SAVE do { \
537 537 preempt_disable(); \
  538 + cr0 = read_cr0(); \
  539 + clts(); \
538 540 __asm__ __volatile__ ( \
539   - "movl %%cr0,%0 ;\n\t" \
540   - "clts ;\n\t" \
541   - "movups %%xmm0,(%1) ;\n\t" \
542   - "movups %%xmm1,0x10(%1) ;\n\t" \
543   - "movups %%xmm2,0x20(%1) ;\n\t" \
544   - "movups %%xmm3,0x30(%1) ;\n\t" \
545   - : "=&r" (cr0) \
  541 + "movups %%xmm0,(%0) ;\n\t" \
  542 + "movups %%xmm1,0x10(%0) ;\n\t" \
  543 + "movups %%xmm2,0x20(%0) ;\n\t" \
  544 + "movups %%xmm3,0x30(%0) ;\n\t" \
  545 + : \
546 546 : "r" (xmm_save) \
547 547 : "memory"); \
548 548 } while(0)
549 549  
550 550  
... ... @@ -550,14 +550,14 @@
550 550 #define XMMS_RESTORE do { \
551 551 __asm__ __volatile__ ( \
552 552 "sfence ;\n\t" \
553   - "movups (%1),%%xmm0 ;\n\t" \
554   - "movups 0x10(%1),%%xmm1 ;\n\t" \
555   - "movups 0x20(%1),%%xmm2 ;\n\t" \
556   - "movups 0x30(%1),%%xmm3 ;\n\t" \
557   - "movl %0,%%cr0 ;\n\t" \
  553 + "movups (%0),%%xmm0 ;\n\t" \
  554 + "movups 0x10(%0),%%xmm1 ;\n\t" \
  555 + "movups 0x20(%0),%%xmm2 ;\n\t" \
  556 + "movups 0x30(%0),%%xmm3 ;\n\t" \
558 557 : \
559   - : "r" (cr0), "r" (xmm_save) \
  558 + : "r" (xmm_save) \
560 559 : "memory"); \
  560 + write_cr0(cr0); \
561 561 preempt_enable(); \
562 562 } while(0)
563 563