Commit 139ec7c416248b9ea227d21839235344edfee1e0

Authored by Rusty Russell
Committed by Andi Kleen
1 parent d3561b7fa0

[PATCH] paravirt: Patch inline replacements for paravirt intercepts

It turns out that the most called ops, by several orders of magnitude,
are the interrupt manipulation ops.  These are obvious candidates for
patching, so mark them up and create infrastructure for it.

The method used is that the ops structure has a patch function, which
is called for each place which needs to be patched: this returns a
number of instructions (the rest are NOP-padded).

Usually we can spare a register (%eax) for the binary patched code to
use, but in a couple of critical places in entry.S we can't: we make
the clobbers explicit at the call site, and manually clobber the
allowed registers in debug mode as an extra check.

And:

Don't abuse CONFIG_DEBUG_KERNEL, add CONFIG_DEBUG_PARAVIRT.

And:

AK:  Fix warnings in x86-64 alternative.c build

And:

AK: Fix compilation with defconfig

And:

^From: Andrew Morton <akpm@osdl.org>

Some binutlises still like to emit references to __stop_parainstructions and
__start_parainstructions.

And:

AK: Fix warnings about unused variables when PARAVIRT is disabled.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>

Showing 14 changed files with 459 additions and 188 deletions Side-by-side Diff

arch/i386/Kconfig.debug
... ... @@ -85,5 +85,15 @@
85 85 option saves about 4k and might cause you much additional grey
86 86 hair.
87 87  
  88 +config DEBUG_PARAVIRT
  89 + bool "Enable some paravirtualization debugging"
  90 + default y
  91 + depends on PARAVIRT && DEBUG_KERNEL
  92 + help
  93 + Currently deliberately clobbers regs which are allowed to be
  94 + clobbered in inlined paravirt hooks, even in native mode.
  95 + If turning this off solves a problem, then DISABLE_INTERRUPTS() or
  96 + ENABLE_INTERRUPTS() is lying about what registers can be clobbered.
  97 +
88 98 endmenu
arch/i386/kernel/alternative.c
... ... @@ -124,6 +124,20 @@
124 124  
125 125 #endif /* CONFIG_X86_64 */
126 126  
  127 +static void nop_out(void *insns, unsigned int len)
  128 +{
  129 + unsigned char **noptable = find_nop_table();
  130 +
  131 + while (len > 0) {
  132 + unsigned int noplen = len;
  133 + if (noplen > ASM_NOP_MAX)
  134 + noplen = ASM_NOP_MAX;
  135 + memcpy(insns, noptable[noplen], noplen);
  136 + insns += noplen;
  137 + len -= noplen;
  138 + }
  139 +}
  140 +
127 141 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
128 142 extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
129 143 extern u8 *__smp_locks[], *__smp_locks_end[];
130 144  
... ... @@ -138,10 +152,9 @@
138 152  
139 153 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
140 154 {
141   - unsigned char **noptable = find_nop_table();
142 155 struct alt_instr *a;
143 156 u8 *instr;
144   - int diff, i, k;
  157 + int diff;
145 158  
146 159 DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
147 160 for (a = start; a < end; a++) {
... ... @@ -159,13 +172,7 @@
159 172 #endif
160 173 memcpy(instr, a->replacement, a->replacementlen);
161 174 diff = a->instrlen - a->replacementlen;
162   - /* Pad the rest with nops */
163   - for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
164   - k = diff;
165   - if (k > ASM_NOP_MAX)
166   - k = ASM_NOP_MAX;
167   - memcpy(a->instr + i, noptable[k], k);
168   - }
  175 + nop_out(instr + a->replacementlen, diff);
169 176 }
170 177 }
171 178  
... ... @@ -209,7 +216,6 @@
209 216  
210 217 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
211 218 {
212   - unsigned char **noptable = find_nop_table();
213 219 u8 **ptr;
214 220  
215 221 for (ptr = start; ptr < end; ptr++) {
... ... @@ -217,7 +223,7 @@
217 223 continue;
218 224 if (*ptr > text_end)
219 225 continue;
220   - **ptr = noptable[1][0];
  226 + nop_out(*ptr, 1);
221 227 };
222 228 }
223 229  
... ... @@ -343,6 +349,40 @@
343 349  
344 350 #endif
345 351  
  352 +#ifdef CONFIG_PARAVIRT
  353 +void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
  354 +{
  355 + struct paravirt_patch *p;
  356 +
  357 + for (p = start; p < end; p++) {
  358 + unsigned int used;
  359 +
  360 + used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr,
  361 + p->len);
  362 +#ifdef CONFIG_DEBUG_PARAVIRT
  363 + {
  364 + int i;
  365 + /* Deliberately clobber regs using "not %reg" to find bugs. */
  366 + for (i = 0; i < 3; i++) {
  367 + if (p->len - used >= 2 && (p->clobbers & (1 << i))) {
  368 + memcpy(p->instr + used, "\xf7\xd0", 2);
  369 + p->instr[used+1] |= i;
  370 + used += 2;
  371 + }
  372 + }
  373 + }
  374 +#endif
  375 + /* Pad the rest with nops */
  376 + nop_out(p->instr + used, p->len - used);
  377 + }
  378 +
  379 + /* Sync to be conservative, in case we patched following instructions */
  380 + sync_core();
  381 +}
  382 +extern struct paravirt_patch __start_parainstructions[],
  383 + __stop_parainstructions[];
  384 +#endif /* CONFIG_PARAVIRT */
  385 +
346 386 void __init alternative_instructions(void)
347 387 {
348 388 unsigned long flags;
... ... @@ -390,6 +430,7 @@
390 430 alternatives_smp_switch(0);
391 431 }
392 432 #endif
  433 + apply_paravirt(__start_parainstructions, __stop_parainstructions);
393 434 local_irq_restore(flags);
394 435 }
arch/i386/kernel/entry.S
... ... @@ -53,6 +53,19 @@
53 53 #include <asm/dwarf2.h>
54 54 #include "irq_vectors.h"
55 55  
  56 +/*
  57 + * We use macros for low-level operations which need to be overridden
  58 + * for paravirtualization. The following will never clobber any registers:
  59 + * INTERRUPT_RETURN (aka. "iret")
  60 + * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
  61 + * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
  62 + *
  63 + * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
  64 + * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
  65 + * Allowing a register to be clobbered can shrink the paravirt replacement
  66 + * enough to patch inline, increasing performance.
  67 + */
  68 +
56 69 #define nr_syscalls ((syscall_table_size)/4)
57 70  
58 71 CF_MASK = 0x00000001
59 72  
... ... @@ -63,9 +76,9 @@
63 76 VM_MASK = 0x00020000
64 77  
65 78 #ifdef CONFIG_PREEMPT
66   -#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
  79 +#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
67 80 #else
68   -#define preempt_stop
  81 +#define preempt_stop(clobbers)
69 82 #define resume_kernel restore_nocheck
70 83 #endif
71 84  
... ... @@ -226,7 +239,7 @@
226 239 ALIGN
227 240 RING0_PTREGS_FRAME
228 241 ret_from_exception:
229   - preempt_stop
  242 + preempt_stop(CLBR_ANY)
230 243 ret_from_intr:
231 244 GET_THREAD_INFO(%ebp)
232 245 check_userspace:
... ... @@ -237,7 +250,7 @@
237 250 jb resume_kernel # not returning to v8086 or userspace
238 251  
239 252 ENTRY(resume_userspace)
240   - DISABLE_INTERRUPTS # make sure we don't miss an interrupt
  253 + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
241 254 # setting need_resched or sigpending
242 255 # between sampling and the iret
243 256 movl TI_flags(%ebp), %ecx
... ... @@ -248,7 +261,7 @@
248 261  
249 262 #ifdef CONFIG_PREEMPT
250 263 ENTRY(resume_kernel)
251   - DISABLE_INTERRUPTS
  264 + DISABLE_INTERRUPTS(CLBR_ANY)
252 265 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
253 266 jnz restore_nocheck
254 267 need_resched:
... ... @@ -277,7 +290,7 @@
277 290 * No need to follow this irqs on/off section: the syscall
278 291 * disabled irqs and here we enable it straight after entry:
279 292 */
280   - ENABLE_INTERRUPTS
  293 + ENABLE_INTERRUPTS(CLBR_NONE)
281 294 pushl $(__USER_DS)
282 295 CFI_ADJUST_CFA_OFFSET 4
283 296 /*CFI_REL_OFFSET ss, 0*/
... ... @@ -322,7 +335,7 @@
322 335 jae syscall_badsys
323 336 call *sys_call_table(,%eax,4)
324 337 movl %eax,PT_EAX(%esp)
325   - DISABLE_INTERRUPTS
  338 + DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
326 339 TRACE_IRQS_OFF
327 340 movl TI_flags(%ebp), %ecx
328 341 testw $_TIF_ALLWORK_MASK, %cx
... ... @@ -364,7 +377,7 @@
364 377 call *sys_call_table(,%eax,4)
365 378 movl %eax,PT_EAX(%esp) # store the return value
366 379 syscall_exit:
367   - DISABLE_INTERRUPTS # make sure we don't miss an interrupt
  380 + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
368 381 # setting need_resched or sigpending
369 382 # between sampling and the iret
370 383 TRACE_IRQS_OFF
... ... @@ -393,7 +406,7 @@
393 406 .section .fixup,"ax"
394 407 iret_exc:
395 408 TRACE_IRQS_ON
396   - ENABLE_INTERRUPTS
  409 + ENABLE_INTERRUPTS(CLBR_NONE)
397 410 pushl $0 # no error code
398 411 pushl $do_iret_error
399 412 jmp error_code
... ... @@ -436,7 +449,7 @@
436 449 CFI_ADJUST_CFA_OFFSET 4
437 450 pushl %eax
438 451 CFI_ADJUST_CFA_OFFSET 4
439   - DISABLE_INTERRUPTS
  452 + DISABLE_INTERRUPTS(CLBR_EAX)
440 453 TRACE_IRQS_OFF
441 454 lss (%esp), %esp
442 455 CFI_ADJUST_CFA_OFFSET -8
... ... @@ -451,7 +464,7 @@
451 464 jz work_notifysig
452 465 work_resched:
453 466 call schedule
454   - DISABLE_INTERRUPTS # make sure we don't miss an interrupt
  467 + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
455 468 # setting need_resched or sigpending
456 469 # between sampling and the iret
457 470 TRACE_IRQS_OFF
... ... @@ -509,7 +522,7 @@
509 522 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
510 523 jz work_pending
511 524 TRACE_IRQS_ON
512   - ENABLE_INTERRUPTS # could let do_syscall_trace() call
  525 + ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call
513 526 # schedule() instead
514 527 movl %esp, %eax
515 528 movl $1, %edx
... ... @@ -693,7 +706,7 @@
693 706 GET_CR0_INTO_EAX
694 707 testl $0x4, %eax # EM (math emulation bit)
695 708 jne device_not_available_emulate
696   - preempt_stop
  709 + preempt_stop(CLBR_ANY)
697 710 call math_state_restore
698 711 jmp ret_from_exception
699 712 device_not_available_emulate:
arch/i386/kernel/module.c
... ... @@ -108,7 +108,8 @@
108 108 const Elf_Shdr *sechdrs,
109 109 struct module *me)
110 110 {
111   - const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
  111 + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
  112 + *para = NULL;
112 113 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
113 114  
114 115 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
... ... @@ -118,6 +119,8 @@
118 119 alt = s;
119 120 if (!strcmp(".smp_locks", secstrings + s->sh_name))
120 121 locks= s;
  122 + if (!strcmp(".parainstructions", secstrings + s->sh_name))
  123 + para = s;
121 124 }
122 125  
123 126 if (alt) {
... ... @@ -132,6 +135,12 @@
132 135 lseg, lseg + locks->sh_size,
133 136 tseg, tseg + text->sh_size);
134 137 }
  138 +
  139 + if (para) {
  140 + void *pseg = (void *)para->sh_addr;
  141 + apply_paravirt(pseg, pseg + para->sh_size);
  142 + }
  143 +
135 144 return 0;
136 145 }
137 146  
arch/i386/kernel/paravirt.c
... ... @@ -45,6 +45,49 @@
45 45 return paravirt_ops.memory_setup();
46 46 }
47 47  
  48 +/* Simple instruction patching code. */
  49 +#define DEF_NATIVE(name, code) \
  50 + extern const char start_##name[], end_##name[]; \
  51 + asm("start_" #name ": " code "; end_" #name ":")
  52 +DEF_NATIVE(cli, "cli");
  53 +DEF_NATIVE(sti, "sti");
  54 +DEF_NATIVE(popf, "push %eax; popf");
  55 +DEF_NATIVE(pushf, "pushf; pop %eax");
  56 +DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli");
  57 +DEF_NATIVE(iret, "iret");
  58 +DEF_NATIVE(sti_sysexit, "sti; sysexit");
  59 +
  60 +static const struct native_insns
  61 +{
  62 + const char *start, *end;
  63 +} native_insns[] = {
  64 + [PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli },
  65 + [PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti },
  66 + [PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf },
  67 + [PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf },
  68 + [PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli },
  69 + [PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret },
  70 + [PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit },
  71 +};
  72 +
  73 +static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
  74 +{
  75 + unsigned int insn_len;
  76 +
  77 + /* Don't touch it if we don't have a replacement */
  78 + if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start)
  79 + return len;
  80 +
  81 + insn_len = native_insns[type].end - native_insns[type].start;
  82 +
  83 + /* Similarly if we can't fit replacement. */
  84 + if (len < insn_len)
  85 + return len;
  86 +
  87 + memcpy(insns, native_insns[type].start, insn_len);
  88 + return insn_len;
  89 +}
  90 +
48 91 static fastcall unsigned long native_get_debugreg(int regno)
49 92 {
50 93 unsigned long val = 0; /* Damn you, gcc! */
... ... @@ -349,6 +392,7 @@
349 392 .paravirt_enabled = 0,
350 393 .kernel_rpl = 0,
351 394  
  395 + .patch = native_patch,
352 396 .banner = default_banner,
353 397 .arch_setup = native_nop,
354 398 .memory_setup = machine_specific_memory_setup,
arch/i386/kernel/vmlinux.lds.S
... ... @@ -165,6 +165,12 @@
165 165 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
166 166 *(.altinstr_replacement)
167 167 }
  168 + . = ALIGN(4);
  169 + __start_parainstructions = .;
  170 + .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
  171 + *(.parainstructions)
  172 + }
  173 + __stop_parainstructions = .;
168 174 /* .exit.text is discard at runtime, not link time, to deal with references
169 175 from .altinstructions and .eh_frame */
170 176 .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
include/asm-i386/alternative.h
... ... @@ -4,7 +4,7 @@
4 4 #ifdef __KERNEL__
5 5  
6 6 #include <asm/types.h>
7   -
  7 +#include <linux/stddef.h>
8 8 #include <linux/types.h>
9 9  
10 10 struct alt_instr {
... ... @@ -116,6 +116,17 @@
116 116  
117 117 #else /* ! CONFIG_SMP */
118 118 #define LOCK_PREFIX ""
  119 +#endif
  120 +
  121 +struct paravirt_patch;
  122 +#ifdef CONFIG_PARAVIRT
  123 +void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end);
  124 +#else
  125 +static inline void
  126 +apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
  127 +{}
  128 +#define __start_parainstructions NULL
  129 +#define __stop_parainstructions NULL
119 130 #endif
120 131  
121 132 #endif /* _I386_ALTERNATIVE_H */
include/asm-i386/desc.h
... ... @@ -81,6 +81,10 @@
81 81 #undef C
82 82 }
83 83  
  84 +#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
  85 +#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
  86 +#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
  87 +
84 88 static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
85 89 {
86 90 __u32 *lp = (__u32 *)((char *)dt + entry*8);
... ... @@ -88,26 +92,6 @@
88 92 *(lp+1) = entry_b;
89 93 }
90 94  
91   -#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
92   -#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
93   -#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
94   -
95   -static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
96   -{
97   - __u32 a, b;
98   - pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
99   - write_idt_entry(idt_table, gate, a, b);
100   -}
101   -
102   -static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
103   -{
104   - __u32 a, b;
105   - pack_descriptor(&a, &b, (unsigned long)addr,
106   - offsetof(struct tss_struct, __cacheline_filler) - 1,
107   - DESCTYPE_TSS, 0);
108   - write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
109   -}
110   -
111 95 #define set_ldt native_set_ldt
112 96 #endif /* CONFIG_PARAVIRT */
113 97  
... ... @@ -127,6 +111,23 @@
127 111 __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
128 112 }
129 113 }
  114 +
  115 +static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
  116 +{
  117 + __u32 a, b;
  118 + pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
  119 + write_idt_entry(idt_table, gate, a, b);
  120 +}
  121 +
  122 +static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
  123 +{
  124 + __u32 a, b;
  125 + pack_descriptor(&a, &b, (unsigned long)addr,
  126 + offsetof(struct tss_struct, __cacheline_filler) - 1,
  127 + DESCTYPE_TSS, 0);
  128 + write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
  129 +}
  130 +
130 131  
131 132 #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
132 133  
include/asm-i386/irqflags.h
... ... @@ -79,8 +79,8 @@
79 79 }
80 80  
81 81 #else
82   -#define DISABLE_INTERRUPTS cli
83   -#define ENABLE_INTERRUPTS sti
  82 +#define DISABLE_INTERRUPTS(clobbers) cli
  83 +#define ENABLE_INTERRUPTS(clobbers) sti
84 84 #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
85 85 #define INTERRUPT_RETURN iret
86 86 #define GET_CR0_INTO_EAX movl %cr0, %eax
include/asm-i386/paravirt.h
... ... @@ -3,8 +3,26 @@
3 3 /* Various instructions on x86 need to be replaced for
4 4 * para-virtualization: those hooks are defined here. */
5 5 #include <linux/linkage.h>
  6 +#include <linux/stringify.h>
6 7  
7 8 #ifdef CONFIG_PARAVIRT
  9 +/* These are the most performance critical ops, so we want to be able to patch
  10 + * callers */
  11 +#define PARAVIRT_IRQ_DISABLE 0
  12 +#define PARAVIRT_IRQ_ENABLE 1
  13 +#define PARAVIRT_RESTORE_FLAGS 2
  14 +#define PARAVIRT_SAVE_FLAGS 3
  15 +#define PARAVIRT_SAVE_FLAGS_IRQ_DISABLE 4
  16 +#define PARAVIRT_INTERRUPT_RETURN 5
  17 +#define PARAVIRT_STI_SYSEXIT 6
  18 +
  19 +/* Bitmask of what can be clobbered: usually at least eax. */
  20 +#define CLBR_NONE 0x0
  21 +#define CLBR_EAX 0x1
  22 +#define CLBR_ECX 0x2
  23 +#define CLBR_EDX 0x4
  24 +#define CLBR_ANY 0x7
  25 +
8 26 #ifndef __ASSEMBLY__
9 27 struct thread_struct;
10 28 struct Xgt_desc_struct;
... ... @@ -15,6 +33,15 @@
15 33 int paravirt_enabled;
16 34 const char *name;
17 35  
  36 + /*
  37 + * Patch may replace one of the defined code sequences with arbitrary
  38 + * code, subject to the same register constraints. This generally
  39 + * means the code is not free to clobber any registers other than EAX.
  40 + * The patch function should return the number of bytes of code
  41 + * generated, as we nop pad the rest in generic code.
  42 + */
  43 + unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len);
  44 +
18 45 void (*arch_setup)(void);
19 46 char *(*memory_setup)(void);
20 47 void (*init_IRQ)(void);
... ... @@ -147,35 +174,6 @@
147 174 #define read_cr4_safe(x) paravirt_ops.read_cr4_safe()
148 175 #define write_cr4(x) paravirt_ops.write_cr4(x)
149 176  
150   -static inline unsigned long __raw_local_save_flags(void)
151   -{
152   - return paravirt_ops.save_fl();
153   -}
154   -
155   -static inline void raw_local_irq_restore(unsigned long flags)
156   -{
157   - return paravirt_ops.restore_fl(flags);
158   -}
159   -
160   -static inline void raw_local_irq_disable(void)
161   -{
162   - paravirt_ops.irq_disable();
163   -}
164   -
165   -static inline void raw_local_irq_enable(void)
166   -{
167   - paravirt_ops.irq_enable();
168   -}
169   -
170   -static inline unsigned long __raw_local_irq_save(void)
171   -{
172   - unsigned long flags = paravirt_ops.save_fl();
173   -
174   - paravirt_ops.irq_disable();
175   -
176   - return flags;
177   -}
178   -
179 177 static inline void raw_safe_halt(void)
180 178 {
181 179 paravirt_ops.safe_halt();
182 180  
... ... @@ -267,15 +265,134 @@
267 265 #endif
268 266 }
269 267  
270   -#define CLI_STRING "pushl %eax; pushl %ecx; pushl %edx; call *paravirt_ops+PARAVIRT_irq_disable; popl %edx; popl %ecx; popl %eax"
271   -#define STI_STRING "pushl %eax; pushl %ecx; pushl %edx; call *paravirt_ops+PARAVIRT_irq_enable; popl %edx; popl %ecx; popl %eax"
  268 +/* These all sit in the .parainstructions section to tell us what to patch. */
  269 +struct paravirt_patch {
  270 + u8 *instr; /* original instructions */
  271 + u8 instrtype; /* type of this instruction */
  272 + u8 len; /* length of original instruction */
  273 + u16 clobbers; /* what registers you may clobber */
  274 +};
  275 +
  276 +#define paravirt_alt(insn_string, typenum, clobber) \
  277 + "771:\n\t" insn_string "\n" "772:\n" \
  278 + ".pushsection .parainstructions,\"a\"\n" \
  279 + " .long 771b\n" \
  280 + " .byte " __stringify(typenum) "\n" \
  281 + " .byte 772b-771b\n" \
  282 + " .short " __stringify(clobber) "\n" \
  283 + ".popsection"
  284 +
  285 +static inline unsigned long __raw_local_save_flags(void)
  286 +{
  287 + unsigned long f;
  288 +
  289 + __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
  290 + "call *%1;"
  291 + "popl %%edx; popl %%ecx",
  292 + PARAVIRT_SAVE_FLAGS, CLBR_NONE)
  293 + : "=a"(f): "m"(paravirt_ops.save_fl)
  294 + : "memory", "cc");
  295 + return f;
  296 +}
  297 +
  298 +static inline void raw_local_irq_restore(unsigned long f)
  299 +{
  300 + __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
  301 + "call *%1;"
  302 + "popl %%edx; popl %%ecx",
  303 + PARAVIRT_RESTORE_FLAGS, CLBR_EAX)
  304 + : "=a"(f) : "m" (paravirt_ops.restore_fl), "0"(f)
  305 + : "memory", "cc");
  306 +}
  307 +
  308 +static inline void raw_local_irq_disable(void)
  309 +{
  310 + __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
  311 + "call *%0;"
  312 + "popl %%edx; popl %%ecx",
  313 + PARAVIRT_IRQ_DISABLE, CLBR_EAX)
  314 + : : "m" (paravirt_ops.irq_disable)
  315 + : "memory", "eax", "cc");
  316 +}
  317 +
  318 +static inline void raw_local_irq_enable(void)
  319 +{
  320 + __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
  321 + "call *%0;"
  322 + "popl %%edx; popl %%ecx",
  323 + PARAVIRT_IRQ_ENABLE, CLBR_EAX)
  324 + : : "m" (paravirt_ops.irq_enable)
  325 + : "memory", "eax", "cc");
  326 +}
  327 +
  328 +static inline unsigned long __raw_local_irq_save(void)
  329 +{
  330 + unsigned long f;
  331 +
  332 + __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
  333 + "call *%1; pushl %%eax;"
  334 + "call *%2; popl %%eax;"
  335 + "popl %%edx; popl %%ecx",
  336 + PARAVIRT_SAVE_FLAGS_IRQ_DISABLE,
  337 + CLBR_NONE)
  338 + : "=a"(f)
  339 + : "m" (paravirt_ops.save_fl),
  340 + "m" (paravirt_ops.irq_disable)
  341 + : "memory", "cc");
  342 + return f;
  343 +}
  344 +
  345 +#define CLI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \
  346 + "call *paravirt_ops+%c[irq_disable];" \
  347 + "popl %%edx; popl %%ecx", \
  348 + PARAVIRT_IRQ_DISABLE, CLBR_EAX)
  349 +
  350 +#define STI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \
  351 + "call *paravirt_ops+%c[irq_enable];" \
  352 + "popl %%edx; popl %%ecx", \
  353 + PARAVIRT_IRQ_ENABLE, CLBR_EAX)
  354 +#define CLI_STI_CLOBBERS , "%eax"
  355 +#define CLI_STI_INPUT_ARGS \
  356 + , \
  357 + [irq_disable] "i" (offsetof(struct paravirt_ops, irq_disable)), \
  358 + [irq_enable] "i" (offsetof(struct paravirt_ops, irq_enable))
  359 +
272 360 #else /* __ASSEMBLY__ */
273 361  
274   -#define INTERRUPT_RETURN jmp *%cs:paravirt_ops+PARAVIRT_iret
275   -#define DISABLE_INTERRUPTS pushl %eax; pushl %ecx; pushl %edx; call *paravirt_ops+PARAVIRT_irq_disable; popl %edx; popl %ecx; popl %eax
276   -#define ENABLE_INTERRUPTS pushl %eax; pushl %ecx; pushl %edx; call *%cs:paravirt_ops+PARAVIRT_irq_enable; popl %edx; popl %ecx; popl %eax
277   -#define ENABLE_INTERRUPTS_SYSEXIT jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit
278   -#define GET_CR0_INTO_EAX call *paravirt_ops+PARAVIRT_read_cr0
  362 +#define PARA_PATCH(ptype, clobbers, ops) \
  363 +771:; \
  364 + ops; \
  365 +772:; \
  366 + .pushsection .parainstructions,"a"; \
  367 + .long 771b; \
  368 + .byte ptype; \
  369 + .byte 772b-771b; \
  370 + .short clobbers; \
  371 + .popsection
  372 +
  373 +#define INTERRUPT_RETURN \
  374 + PARA_PATCH(PARAVIRT_INTERRUPT_RETURN, CLBR_ANY, \
  375 + jmp *%cs:paravirt_ops+PARAVIRT_iret)
  376 +
  377 +#define DISABLE_INTERRUPTS(clobbers) \
  378 + PARA_PATCH(PARAVIRT_IRQ_DISABLE, clobbers, \
  379 + pushl %ecx; pushl %edx; \
  380 + call *paravirt_ops+PARAVIRT_irq_disable; \
  381 + popl %edx; popl %ecx) \
  382 +
  383 +#define ENABLE_INTERRUPTS(clobbers) \
  384 + PARA_PATCH(PARAVIRT_IRQ_ENABLE, clobbers, \
  385 + pushl %ecx; pushl %edx; \
  386 + call *%cs:paravirt_ops+PARAVIRT_irq_enable; \
  387 + popl %edx; popl %ecx)
  388 +
  389 +#define ENABLE_INTERRUPTS_SYSEXIT \
  390 + PARA_PATCH(PARAVIRT_STI_SYSEXIT, CLBR_ANY, \
  391 + jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit)
  392 +
  393 +#define GET_CR0_INTO_EAX \
  394 + call *paravirt_ops+PARAVIRT_read_cr0
  395 +
279 396 #endif /* __ASSEMBLY__ */
280 397 #endif /* CONFIG_PARAVIRT */
281 398 #endif /* __ASM_PARAVIRT_H */
include/asm-i386/processor.h
... ... @@ -156,59 +156,6 @@
156 156 : "0" (*eax), "2" (*ecx));
157 157 }
158 158  
159   -/*
160   - * Generic CPUID function
161   - * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
162   - * resulting in stale register contents being returned.
163   - */
164   -static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
165   -{
166   - *eax = op;
167   - *ecx = 0;
168   - __cpuid(eax, ebx, ecx, edx);
169   -}
170   -
171   -/* Some CPUID calls want 'count' to be placed in ecx */
172   -static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
173   - int *edx)
174   -{
175   - *eax = op;
176   - *ecx = count;
177   - __cpuid(eax, ebx, ecx, edx);
178   -}
179   -
180   -/*
181   - * CPUID functions returning a single datum
182   - */
183   -static inline unsigned int cpuid_eax(unsigned int op)
184   -{
185   - unsigned int eax, ebx, ecx, edx;
186   -
187   - cpuid(op, &eax, &ebx, &ecx, &edx);
188   - return eax;
189   -}
190   -static inline unsigned int cpuid_ebx(unsigned int op)
191   -{
192   - unsigned int eax, ebx, ecx, edx;
193   -
194   - cpuid(op, &eax, &ebx, &ecx, &edx);
195   - return ebx;
196   -}
197   -static inline unsigned int cpuid_ecx(unsigned int op)
198   -{
199   - unsigned int eax, ebx, ecx, edx;
200   -
201   - cpuid(op, &eax, &ebx, &ecx, &edx);
202   - return ecx;
203   -}
204   -static inline unsigned int cpuid_edx(unsigned int op)
205   -{
206   - unsigned int eax, ebx, ecx, edx;
207   -
208   - cpuid(op, &eax, &ebx, &ecx, &edx);
209   - return edx;
210   -}
211   -
212 159 #define load_cr3(pgdir) write_cr3(__pa(pgdir))
213 160  
214 161 /*
... ... @@ -491,22 +438,6 @@
491 438 .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \
492 439 }
493 440  
494   -#ifdef CONFIG_PARAVIRT
495   -#include <asm/paravirt.h>
496   -#else
497   -#define paravirt_enabled() 0
498   -#define __cpuid native_cpuid
499   -
500   -static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
501   -{
502   - tss->esp0 = thread->esp0;
503   - /* This can only happen when SEP is enabled, no need to test "SEP"arately */
504   - if (unlikely(tss->ss1 != thread->sysenter_cs)) {
505   - tss->ss1 = thread->sysenter_cs;
506   - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
507   - }
508   -}
509   -
510 441 #define start_thread(regs, new_eip, new_esp) do { \
511 442 __asm__("movl %0,%%fs": :"r" (0)); \
512 443 regs->xgs = 0; \
... ... @@ -519,36 +450,6 @@
519 450 regs->esp = new_esp; \
520 451 } while (0)
521 452  
522   -/*
523   - * These special macros can be used to get or set a debugging register
524   - */
525   -#define get_debugreg(var, register) \
526   - __asm__("movl %%db" #register ", %0" \
527   - :"=r" (var))
528   -#define set_debugreg(value, register) \
529   - __asm__("movl %0,%%db" #register \
530   - : /* no output */ \
531   - :"r" (value))
532   -
533   -#define set_iopl_mask native_set_iopl_mask
534   -#endif /* CONFIG_PARAVIRT */
535   -
536   -/*
537   - * Set IOPL bits in EFLAGS from given mask
538   - */
539   -static fastcall inline void native_set_iopl_mask(unsigned mask)
540   -{
541   - unsigned int reg;
542   - __asm__ __volatile__ ("pushfl;"
543   - "popl %0;"
544   - "andl %1, %0;"
545   - "orl %2, %0;"
546   - "pushl %0;"
547   - "popfl"
548   - : "=&r" (reg)
549   - : "i" (~X86_EFLAGS_IOPL), "r" (mask));
550   -}
551   -
552 453 /* Forward declaration, a strange C thing */
553 454 struct task_struct;
554 455 struct mm_struct;
... ... @@ -639,6 +540,105 @@
639 540 }
640 541  
641 542 #define cpu_relax() rep_nop()
  543 +
  544 +#ifdef CONFIG_PARAVIRT
  545 +#include <asm/paravirt.h>
  546 +#else
  547 +#define paravirt_enabled() 0
  548 +#define __cpuid native_cpuid
  549 +
  550 +static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
  551 +{
  552 + tss->esp0 = thread->esp0;
  553 + /* This can only happen when SEP is enabled, no need to test "SEP"arately */
  554 + if (unlikely(tss->ss1 != thread->sysenter_cs)) {
  555 + tss->ss1 = thread->sysenter_cs;
  556 + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
  557 + }
  558 +}
  559 +
  560 +/*
  561 + * These special macros can be used to get or set a debugging register
  562 + */
  563 +#define get_debugreg(var, register) \
  564 + __asm__("movl %%db" #register ", %0" \
  565 + :"=r" (var))
  566 +#define set_debugreg(value, register) \
  567 + __asm__("movl %0,%%db" #register \
  568 + : /* no output */ \
  569 + :"r" (value))
  570 +
  571 +#define set_iopl_mask native_set_iopl_mask
  572 +#endif /* CONFIG_PARAVIRT */
  573 +
  574 +/*
  575 + * Set IOPL bits in EFLAGS from given mask
  576 + */
  577 +static fastcall inline void native_set_iopl_mask(unsigned mask)
  578 +{
  579 + unsigned int reg;
  580 + __asm__ __volatile__ ("pushfl;"
  581 + "popl %0;"
  582 + "andl %1, %0;"
  583 + "orl %2, %0;"
  584 + "pushl %0;"
  585 + "popfl"
  586 + : "=&r" (reg)
  587 + : "i" (~X86_EFLAGS_IOPL), "r" (mask));
  588 +}
  589 +
  590 +/*
  591 + * Generic CPUID function
  592 + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
  593 + * resulting in stale register contents being returned.
  594 + */
  595 +static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
  596 +{
  597 + *eax = op;
  598 + *ecx = 0;
  599 + __cpuid(eax, ebx, ecx, edx);
  600 +}
  601 +
  602 +/* Some CPUID calls want 'count' to be placed in ecx */
  603 +static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
  604 + int *edx)
  605 +{
  606 + *eax = op;
  607 + *ecx = count;
  608 + __cpuid(eax, ebx, ecx, edx);
  609 +}
  610 +
  611 +/*
  612 + * CPUID functions returning a single datum
  613 + */
  614 +static inline unsigned int cpuid_eax(unsigned int op)
  615 +{
  616 + unsigned int eax, ebx, ecx, edx;
  617 +
  618 + cpuid(op, &eax, &ebx, &ecx, &edx);
  619 + return eax;
  620 +}
  621 +static inline unsigned int cpuid_ebx(unsigned int op)
  622 +{
  623 + unsigned int eax, ebx, ecx, edx;
  624 +
  625 + cpuid(op, &eax, &ebx, &ecx, &edx);
  626 + return ebx;
  627 +}
  628 +static inline unsigned int cpuid_ecx(unsigned int op)
  629 +{
  630 + unsigned int eax, ebx, ecx, edx;
  631 +
  632 + cpuid(op, &eax, &ebx, &ecx, &edx);
  633 + return ecx;
  634 +}
  635 +static inline unsigned int cpuid_edx(unsigned int op)
  636 +{
  637 + unsigned int eax, ebx, ecx, edx;
  638 +
  639 + cpuid(op, &eax, &ebx, &ecx, &edx);
  640 + return edx;
  641 +}
642 642  
643 643 /* generic versions from gas */
644 644 #define GENERIC_NOP1 ".byte 0x90\n"
include/asm-i386/spinlock.h
... ... @@ -12,6 +12,8 @@
12 12 #else
13 13 #define CLI_STRING "cli"
14 14 #define STI_STRING "sti"
  15 +#define CLI_STI_CLOBBERS
  16 +#define CLI_STI_INPUT_ARGS
15 17 #endif /* CONFIG_PARAVIRT */
16 18  
17 19 /*
18 20  
19 21  
20 22  
21 23  
... ... @@ -57,25 +59,28 @@
57 59 {
58 60 asm volatile(
59 61 "\n1:\t"
60   - LOCK_PREFIX " ; decb %0\n\t"
  62 + LOCK_PREFIX " ; decb %[slock]\n\t"
61 63 "jns 5f\n"
62 64 "2:\t"
63   - "testl $0x200, %1\n\t"
  65 + "testl $0x200, %[flags]\n\t"
64 66 "jz 4f\n\t"
65 67 STI_STRING "\n"
66 68 "3:\t"
67 69 "rep;nop\n\t"
68   - "cmpb $0, %0\n\t"
  70 + "cmpb $0, %[slock]\n\t"
69 71 "jle 3b\n\t"
70 72 CLI_STRING "\n\t"
71 73 "jmp 1b\n"
72 74 "4:\t"
73 75 "rep;nop\n\t"
74   - "cmpb $0, %0\n\t"
  76 + "cmpb $0, %[slock]\n\t"
75 77 "jg 1b\n\t"
76 78 "jmp 4b\n"
77 79 "5:\n\t"
78   - : "+m" (lock->slock) : "r" (flags) : "memory");
  80 + : [slock] "+m" (lock->slock)
  81 + : [flags] "r" (flags)
  82 + CLI_STI_INPUT_ARGS
  83 + : "memory" CLI_STI_CLOBBERS);
79 84 }
80 85 #endif
81 86  
include/asm-x86_64/alternative.h
... ... @@ -4,6 +4,7 @@
4 4 #ifdef __KERNEL__
5 5  
6 6 #include <linux/types.h>
  7 +#include <linux/stddef.h>
7 8 #include <asm/cpufeature.h>
8 9  
9 10 struct alt_instr {
... ... @@ -131,6 +132,17 @@
131 132  
132 133 #else /* ! CONFIG_SMP */
133 134 #define LOCK_PREFIX ""
  135 +#endif
  136 +
  137 +struct paravirt_patch;
  138 +#ifdef CONFIG_PARAVIRT
  139 +void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end);
  140 +#else
  141 +static inline void
  142 +apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
  143 +{}
  144 +#define __start_parainstructions NULL
  145 +#define __stop_parainstructions NULL
134 146 #endif
135 147  
136 148 #endif /* _X86_64_ALTERNATIVE_H */
scripts/mod/modpost.c
... ... @@ -911,6 +911,7 @@
911 911 ".toc1", /* used by ppc64 */
912 912 ".stab",
913 913 ".rodata",
  914 + ".parainstructions",
914 915 ".text.lock",
915 916 "__bug_table", /* used by powerpc for BUG() */
916 917 ".pci_fixup_header",
... ... @@ -931,6 +932,7 @@
931 932 ".altinstructions",
932 933 ".eh_frame",
933 934 ".debug",
  935 + ".parainstructions",
934 936 NULL
935 937 };
936 938 /* part of section name */