Commit af65d64845a90c8f2fc90b97e2148ff74672e979

Authored by Roland McGrath
Committed by Ingo Molnar
1 parent 00f8b1bc0e

x86 vDSO: consolidate vdso32

This makes x86_64's ia32 emulation support share the sources used in the
32-bit kernel for the 32-bit vDSO and much of its setup code.

The 32-bit vDSO mapping now behaves the same on x86_64 as on native 32-bit.
The abi.syscall32 sysctl on x86_64 now takes the same values that
vm.vdso_enabled takes on the 32-bit kernel.  That is, 1 means a randomized
vDSO location, 2 means the fixed old address.  The CONFIG_COMPAT_VDSO
option is now available to make this the default setting, the same meaning
it has for the 32-bit kernel.  (This does not affect the 64-bit vDSO.)

The argument vdso32=[012] can be used on both 32-bit and 64-bit kernels to
set this paramter at boot time.  The vdso=[012] argument still does this
same thing on the 32-bit kernel.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Showing 10 changed files with 146 additions and 60 deletions Side-by-side Diff

Documentation/kernel-parameters.txt
... ... @@ -1988,6 +1988,11 @@
1988 1988 vdso=1: enable VDSO (default)
1989 1989 vdso=0: disable VDSO mapping
1990 1990  
  1991 + vdso32= [X86-32,X86-64]
  1992 + vdso32=2: enable compat VDSO (default with COMPAT_VDSO)
  1993 + vdso32=1: enable 32-bit VDSO (default)
  1994 + vdso32=0: disable 32-bit VDSO mapping
  1995 +
1991 1996 vector= [IA-64,SMP]
1992 1997 vector=percpu: enable percpu vector domain
1993 1998  
... ... @@ -1191,9 +1191,9 @@
1191 1191 config COMPAT_VDSO
1192 1192 bool "Compat VDSO support"
1193 1193 default y
1194   - depends on X86_32
  1194 + depends on X86_32 || IA32_EMULATION
1195 1195 help
1196   - Map the VDSO to the predictable old-style address too.
  1196 + Map the 32-bit VDSO to the predictable old-style address too.
1197 1197 ---help---
1198 1198 Say N here if you are running a sufficiently recent glibc
1199 1199 version (2.3.3 or later), to remove the high-mapped
arch/x86/ia32/Makefile
... ... @@ -3,7 +3,7 @@
3 3 #
4 4  
5 5 obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \
6   - ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o
  6 + ia32_binfmt.o fpu32.o ptrace32.o
7 7  
8 8 sysv-$(CONFIG_SYSVIPC) := ipc32.o
9 9 obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
arch/x86/ia32/ia32_binfmt.c
... ... @@ -26,7 +26,7 @@
26 26 #include <asm/i387.h>
27 27 #include <asm/uaccess.h>
28 28 #include <asm/ia32.h>
29   -#include <asm/vsyscall32.h>
  29 +#include <asm/vdso.h>
30 30  
31 31 #undef ELF_ARCH
32 32 #undef ELF_CLASS
33 33  
... ... @@ -47,14 +47,13 @@
47 47 #define AT_SYSINFO 32
48 48 #define AT_SYSINFO_EHDR 33
49 49  
50   -int sysctl_vsyscall32 = 1;
  50 +extern int sysctl_vsyscall32;
51 51  
52 52 #undef ARCH_DLINFO
53 53 #define ARCH_DLINFO do { \
54 54 if (sysctl_vsyscall32) { \
55   - current->mm->context.vdso = (void *)VSYSCALL32_BASE; \
56   - NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \
57   - NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL32_BASE); \
  55 + NEW_AUX_ENT(AT_SYSINFO, (u32)VDSO_ENTRY); \
  56 + NEW_AUX_ENT(AT_SYSINFO_EHDR, (u32)VDSO_CURRENT_BASE); \
58 57 } \
59 58 } while(0)
60 59  
arch/x86/ia32/ia32_signal.c
... ... @@ -31,7 +31,7 @@
31 31 #include <asm/sigcontext32.h>
32 32 #include <asm/fpu32.h>
33 33 #include <asm/proto.h>
34   -#include <asm/vsyscall32.h>
  34 +#include <asm/vdso.h>
35 35  
36 36 #define DEBUG_SIG 0
37 37  
38 38  
... ... @@ -465,13 +465,16 @@
465 465 goto give_sigsegv;
466 466 }
467 467  
468   - /* Return stub is in 32bit vsyscall page */
469   - if (current->binfmt->hasvdso)
470   - restorer = VSYSCALL32_SIGRETURN;
471   - else
472   - restorer = (void *)&frame->retcode;
473   - if (ka->sa.sa_flags & SA_RESTORER)
  468 + if (ka->sa.sa_flags & SA_RESTORER) {
474 469 restorer = ka->sa.sa_restorer;
  470 + } else {
  471 + /* Return stub is in 32bit vsyscall page */
  472 + if (current->binfmt->hasvdso)
  473 + restorer = VDSO32_SYMBOL(current->mm->context.vdso,
  474 + sigreturn);
  475 + else
  476 + restorer = (void *)&frame->retcode;
  477 + }
475 478 err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
476 479  
477 480 /*
... ... @@ -519,7 +522,7 @@
519 522 {
520 523 struct rt_sigframe __user *frame;
521 524 struct exec_domain *ed = current_thread_info()->exec_domain;
522   - void __user *restorer = VSYSCALL32_RTSIGRETURN;
  525 + void __user *restorer;
523 526 int err = 0;
524 527  
525 528 /* __copy_to_user optimizes that into a single 8 byte store */
... ... @@ -564,6 +567,9 @@
564 567  
565 568 if (ka->sa.sa_flags & SA_RESTORER)
566 569 restorer = ka->sa.sa_restorer;
  570 + else
  571 + restorer = VDSO32_SYMBOL(current->mm->context.vdso,
  572 + rt_sigreturn);
567 573 err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
568 574  
569 575 /*
arch/x86/vdso/Makefile
... ... @@ -15,7 +15,7 @@
15 15  
16 16 # files to link into kernel
17 17 obj-$(VDSO64-y) += vma.o vdso.o
18   -obj-$(CONFIG_X86_32) += vdso32.o vdso32-setup.o
  18 +obj-$(VDSO32-y) += vdso32.o vdso32-setup.o
19 19  
20 20 vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
21 21  
arch/x86/vdso/vdso32-setup.c
... ... @@ -24,6 +24,7 @@
24 24 #include <asm/elf.h>
25 25 #include <asm/tlbflush.h>
26 26 #include <asm/vdso.h>
  27 +#include <asm/proto.h>
27 28  
28 29 enum {
29 30 VDSO_DISABLED = 0,
30 31  
31 32  
... ... @@ -37,14 +38,24 @@
37 38 #define VDSO_DEFAULT VDSO_ENABLED
38 39 #endif
39 40  
  41 +#ifdef CONFIG_X86_64
  42 +#define vdso_enabled sysctl_vsyscall32
  43 +#define arch_setup_additional_pages syscall32_setup_pages
  44 +#endif
  45 +
40 46 /*
  47 + * This is the difference between the prelinked addresses in the vDSO images
  48 + * and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO
  49 + * in the user address space.
  50 + */
  51 +#define VDSO_ADDR_ADJUST (VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK)
  52 +
  53 +/*
41 54 * Should the kernel map a VDSO page into processes and pass its
42 55 * address down to glibc upon exec()?
43 56 */
44 57 unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
45 58  
46   -EXPORT_SYMBOL_GPL(vdso_enabled);
47   -
48 59 static int __init vdso_setup(char *s)
49 60 {
50 61 vdso_enabled = simple_strtoul(s, NULL, 0);
51 62  
52 63  
... ... @@ -52,10 +63,19 @@
52 63 return 1;
53 64 }
54 65  
55   -__setup("vdso=", vdso_setup);
  66 +/*
  67 + * For consistency, the argument vdso32=[012] affects the 32-bit vDSO
  68 + * behavior on both 64-bit and 32-bit kernels.
  69 + * On 32-bit kernels, vdso=[012] means the same thing.
  70 + */
  71 +__setup("vdso32=", vdso_setup);
56 72  
57   -extern asmlinkage void ia32_sysenter_target(void);
  73 +#ifdef CONFIG_X86_32
  74 +__setup_param("vdso=", vdso32_setup, vdso_setup, 0);
58 75  
  76 +EXPORT_SYMBOL_GPL(vdso_enabled);
  77 +#endif
  78 +
59 79 static __init void reloc_symtab(Elf32_Ehdr *ehdr,
60 80 unsigned offset, unsigned size)
61 81 {
... ... @@ -79,7 +99,7 @@
79 99 case STT_FUNC:
80 100 case STT_SECTION:
81 101 case STT_FILE:
82   - sym->st_value += VDSO_HIGH_BASE;
  102 + sym->st_value += VDSO_ADDR_ADJUST;
83 103 }
84 104 }
85 105 }
... ... @@ -105,7 +125,7 @@
105 125 case DT_VERNEED:
106 126 case DT_ADDRRNGLO ... DT_ADDRRNGHI:
107 127 /* definitely pointers needing relocation */
108   - dyn->d_un.d_ptr += VDSO_HIGH_BASE;
  128 + dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
109 129 break;
110 130  
111 131 case DT_ENCODING ... OLD_DT_LOOS-1:
... ... @@ -114,7 +134,7 @@
114 134 they're even */
115 135 if (dyn->d_tag >= DT_ENCODING &&
116 136 (dyn->d_tag & 1) == 0)
117   - dyn->d_un.d_ptr += VDSO_HIGH_BASE;
  137 + dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
118 138 break;
119 139  
120 140 case DT_VERDEFNUM:
121 141  
122 142  
... ... @@ -143,15 +163,15 @@
143 163 int i;
144 164  
145 165 BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
146   - !elf_check_arch(ehdr) ||
  166 + !elf_check_arch_ia32(ehdr) ||
147 167 ehdr->e_type != ET_DYN);
148 168  
149   - ehdr->e_entry += VDSO_HIGH_BASE;
  169 + ehdr->e_entry += VDSO_ADDR_ADJUST;
150 170  
151 171 /* rebase phdrs */
152 172 phdr = (void *)ehdr + ehdr->e_phoff;
153 173 for (i = 0; i < ehdr->e_phnum; i++) {
154   - phdr[i].p_vaddr += VDSO_HIGH_BASE;
  174 + phdr[i].p_vaddr += VDSO_ADDR_ADJUST;
155 175  
156 176 /* relocate dynamic stuff */
157 177 if (phdr[i].p_type == PT_DYNAMIC)
... ... @@ -164,7 +184,7 @@
164 184 if (!(shdr[i].sh_flags & SHF_ALLOC))
165 185 continue;
166 186  
167   - shdr[i].sh_addr += VDSO_HIGH_BASE;
  187 + shdr[i].sh_addr += VDSO_ADDR_ADJUST;
168 188  
169 189 if (shdr[i].sh_type == SHT_SYMTAB ||
170 190 shdr[i].sh_type == SHT_DYNSYM)
... ... @@ -173,6 +193,45 @@
173 193 }
174 194 }
175 195  
  196 +/*
  197 + * These symbols are defined by vdso32.S to mark the bounds
  198 + * of the ELF DSO images included therein.
  199 + */
  200 +extern const char vdso32_default_start, vdso32_default_end;
  201 +extern const char vdso32_sysenter_start, vdso32_sysenter_end;
  202 +static struct page *vdso32_pages[1];
  203 +
  204 +#ifdef CONFIG_X86_64
  205 +
  206 +static int use_sysenter __read_mostly = -1;
  207 +
  208 +#define vdso32_sysenter() (use_sysenter > 0)
  209 +
  210 +/* May not be __init: called during resume */
  211 +void syscall32_cpu_init(void)
  212 +{
  213 + if (use_sysenter < 0)
  214 + use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
  215 +
  216 + /* Load these always in case some future AMD CPU supports
  217 + SYSENTER from compat mode too. */
  218 + checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
  219 + checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
  220 + checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
  221 +
  222 + wrmsrl(MSR_CSTAR, ia32_cstar_target);
  223 +}
  224 +
  225 +#define compat_uses_vma 1
  226 +
  227 +static inline void map_compat_vdso(int map)
  228 +{
  229 +}
  230 +
  231 +#else /* CONFIG_X86_32 */
  232 +
  233 +#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
  234 +
176 235 void enable_sep_cpu(void)
177 236 {
178 237 int cpu = get_cpu();
... ... @@ -210,13 +269,7 @@
210 269 return 0;
211 270 }
212 271  
213   -/*
214   - * These symbols are defined by vsyscall.o to mark the bounds
215   - * of the ELF DSO images included therein.
216   - */
217   -extern const char vsyscall_int80_start, vsyscall_int80_end;
218   -extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
219   -static struct page *syscall_pages[1];
  272 +#define compat_uses_vma 0
220 273  
221 274 static void map_compat_vdso(int map)
222 275 {
223 276  
224 277  
225 278  
226 279  
227 280  
228 281  
... ... @@ -227,31 +280,35 @@
227 280  
228 281 vdso_mapped = map;
229 282  
230   - __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT,
  283 + __set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT,
231 284 map ? PAGE_READONLY_EXEC : PAGE_NONE);
232 285  
233 286 /* flush stray tlbs */
234 287 flush_tlb_all();
235 288 }
236 289  
  290 +#endif /* CONFIG_X86_64 */
  291 +
237 292 int __init sysenter_setup(void)
238 293 {
239 294 void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
240 295 const void *vsyscall;
241 296 size_t vsyscall_len;
242 297  
243   - syscall_pages[0] = virt_to_page(syscall_page);
  298 + vdso32_pages[0] = virt_to_page(syscall_page);
244 299  
  300 +#ifdef CONFIG_X86_32
245 301 gate_vma_init();
246 302  
247 303 printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
  304 +#endif
248 305  
249   - if (!boot_cpu_has(X86_FEATURE_SEP)) {
250   - vsyscall = &vsyscall_int80_start;
251   - vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start;
  306 + if (!vdso32_sysenter()) {
  307 + vsyscall = &vdso32_default_start;
  308 + vsyscall_len = &vdso32_default_end - &vdso32_default_start;
252 309 } else {
253   - vsyscall = &vsyscall_sysenter_start;
254   - vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start;
  310 + vsyscall = &vdso32_sysenter_start;
  311 + vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start;
255 312 }
256 313  
257 314 memcpy(syscall_page, vsyscall, vsyscall_len);
258 315  
... ... @@ -284,7 +341,9 @@
284 341 ret = addr;
285 342 goto up_fail;
286 343 }
  344 + }
287 345  
  346 + if (compat_uses_vma || !compat) {
288 347 /*
289 348 * MAYWRITE to allow gdb to COW and set breakpoints
290 349 *
... ... @@ -298,7 +357,7 @@
298 357 VM_READ|VM_EXEC|
299 358 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
300 359 VM_ALWAYSDUMP,
301   - syscall_pages);
  360 + vdso32_pages);
302 361  
303 362 if (ret)
304 363 goto up_fail;
... ... @@ -314,6 +373,12 @@
314 373 return ret;
315 374 }
316 375  
  376 +#ifdef CONFIG_X86_64
  377 +
  378 +__initcall(sysenter_setup);
  379 +
  380 +#else /* CONFIG_X86_32 */
  381 +
317 382 const char *arch_vma_name(struct vm_area_struct *vma)
318 383 {
319 384 if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
... ... @@ -342,4 +407,6 @@
342 407 {
343 408 return 0;
344 409 }
  410 +
  411 +#endif /* CONFIG_X86_64 */
arch/x86/vdso/vdso32.S
... ... @@ -2,15 +2,19 @@
2 2  
3 3 __INITDATA
4 4  
5   - .globl vsyscall_int80_start, vsyscall_int80_end
6   -vsyscall_int80_start:
  5 + .globl vdso32_default_start, vdso32_default_end
  6 +vdso32_default_start:
  7 +#ifdef CONFIG_X86_32
7 8 .incbin "arch/x86/vdso/vdso32-int80.so"
8   -vsyscall_int80_end:
  9 +#else
  10 + .incbin "arch/x86/vdso/vdso32-syscall.so"
  11 +#endif
  12 +vdso32_default_end:
9 13  
10   - .globl vsyscall_sysenter_start, vsyscall_sysenter_end
11   -vsyscall_sysenter_start:
  14 + .globl vdso32_sysenter_start, vdso32_sysenter_end
  15 +vdso32_sysenter_start:
12 16 .incbin "arch/x86/vdso/vdso32-sysenter.so"
13   -vsyscall_sysenter_end:
  17 +vdso32_sysenter_end:
14 18  
15 19 __FINIT
arch/x86/xen/setup.c
... ... @@ -62,8 +62,8 @@
62 62 */
63 63 static void fiddle_vdso(void)
64 64 {
65   - extern char vsyscall_int80_start;
66   - u32 *mask = VDSO32_SYMBOL(&vsyscall_int80_start, NOTE_MASK);
  65 + extern const char vdso32_default_start;
  66 + u32 *mask = VDSO32_SYMBOL(&vdso32_default_start, NOTE_MASK);
67 67 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
68 68 }
69 69  
include/asm-x86/elf.h
... ... @@ -74,17 +74,19 @@
74 74  
75 75 #ifdef __KERNEL__
76 76  
  77 +/*
  78 + * This is used to ensure we don't load something for the wrong architecture.
  79 + */
  80 +#define elf_check_arch_ia32(x) \
  81 + (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
  82 +
77 83 #ifdef CONFIG_X86_32
78 84 #include <asm/processor.h>
79 85 #include <asm/system.h> /* for savesegment */
80 86 #include <asm/desc.h>
81 87 #include <asm/vdso.h>
82 88  
83   -/*
84   - * This is used to ensure we don't load something for the wrong architecture.
85   - */
86   -#define elf_check_arch(x) \
87   - (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
  89 +#define elf_check_arch(x) elf_check_arch_ia32(x)
88 90  
89 91 /* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx
90 92 contains a pointer to a function which might be registered using `atexit'.
91 93  
... ... @@ -247,11 +249,7 @@
247 249 #define ELF_CORE_XFPREG_TYPE NT_PRXFPREG
248 250  
249 251 #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO))
250   -#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso)
251 252  
252   -#define VDSO_ENTRY \
253   - ((unsigned long) VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall))
254   -
255 253 /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
256 254  
257 255 #define ARCH_DLINFO \
... ... @@ -262,6 +260,8 @@
262 260  
263 261 #else /* CONFIG_X86_32 */
264 262  
  263 +#define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */
  264 +
265 265 /* 1GB for 64bit, 8MB for 32bit */
266 266 #define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff)
267 267  
... ... @@ -271,6 +271,11 @@
271 271 } while (0)
272 272  
273 273 #endif /* !CONFIG_X86_32 */
  274 +
  275 +#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso)
  276 +
  277 +#define VDSO_ENTRY \
  278 + ((unsigned long) VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall))
274 279  
275 280 struct linux_binprm;
276 281