Commit a0abcf2e8f8017051830f738ac1bf5ef42703243

Authored by Linus Torvalds

Merge branch 'x86/vdso' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into next

Pull x86 cdso updates from Peter Anvin:
 "Vdso cleanups and improvements largely from Andy Lutomirski.  This
  makes the vdso a lot less ''special''"

* 'x86/vdso' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/vdso, build: Make LE access macros clearer, host-safe
  x86/vdso, build: Fix cross-compilation from big-endian architectures
  x86/vdso, build: When vdso2c fails, unlink the output
  x86, vdso: Fix an OOPS accessing the HPET mapping w/o an HPET
  x86, mm: Replace arch_vma_name with vm_ops->name for vsyscalls
  x86, mm: Improve _install_special_mapping and fix x86 vdso naming
  mm, fs: Add vm_ops->name as an alternative to arch_vma_name
  x86, vdso: Fix an OOPS accessing the HPET mapping w/o an HPET
  x86, vdso: Remove vestiges of VDSO_PRELINK and some outdated comments
  x86, vdso: Move the vvar and hpet mappings next to the 64-bit vDSO
  x86, vdso: Move the 32-bit vdso special pages after the text
  x86, vdso: Reimplement vdso.so preparation in build-time C
  x86, vdso: Move syscall and sysenter setup into kernel/cpu/common.c
  x86, vdso: Clean up 32-bit vs 64-bit vdso params
  x86, mm: Ensure correct alignment of the fixmap

Showing 40 changed files Side-by-side Diff

arch/x86/ia32/ia32_signal.c
... ... @@ -383,8 +383,8 @@
383 383 } else {
384 384 /* Return stub is in 32bit vsyscall page */
385 385 if (current->mm->context.vdso)
386   - restorer = VDSO32_SYMBOL(current->mm->context.vdso,
387   - sigreturn);
  386 + restorer = current->mm->context.vdso +
  387 + selected_vdso32->sym___kernel_sigreturn;
388 388 else
389 389 restorer = &frame->retcode;
390 390 }
... ... @@ -462,8 +462,8 @@
462 462 if (ksig->ka.sa.sa_flags & SA_RESTORER)
463 463 restorer = ksig->ka.sa.sa_restorer;
464 464 else
465   - restorer = VDSO32_SYMBOL(current->mm->context.vdso,
466   - rt_sigreturn);
  465 + restorer = current->mm->context.vdso +
  466 + selected_vdso32->sym___kernel_rt_sigreturn;
467 467 put_user_ex(ptr_to_compat(restorer), &frame->pretcode);
468 468  
469 469 /*
arch/x86/include/asm/elf.h
... ... @@ -75,7 +75,12 @@
75 75  
76 76 #include <asm/vdso.h>
77 77  
78   -extern unsigned int vdso_enabled;
  78 +#ifdef CONFIG_X86_64
  79 +extern unsigned int vdso64_enabled;
  80 +#endif
  81 +#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
  82 +extern unsigned int vdso32_enabled;
  83 +#endif
79 84  
80 85 /*
81 86 * This is used to ensure we don't load something for the wrong architecture.
82 87  
... ... @@ -269,9 +274,9 @@
269 274  
270 275 struct task_struct;
271 276  
272   -#define ARCH_DLINFO_IA32(vdso_enabled) \
  277 +#define ARCH_DLINFO_IA32 \
273 278 do { \
274   - if (vdso_enabled) { \
  279 + if (vdso32_enabled) { \
275 280 NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \
276 281 NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \
277 282 } \
... ... @@ -281,7 +286,7 @@
281 286  
282 287 #define STACK_RND_MASK (0x7ff)
283 288  
284   -#define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled)
  289 +#define ARCH_DLINFO ARCH_DLINFO_IA32
285 290  
286 291 /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
287 292  
288 293  
289 294  
290 295  
291 296  
... ... @@ -292,16 +297,17 @@
292 297  
293 298 #define ARCH_DLINFO \
294 299 do { \
295   - if (vdso_enabled) \
  300 + if (vdso64_enabled) \
296 301 NEW_AUX_ENT(AT_SYSINFO_EHDR, \
297   - (unsigned long)current->mm->context.vdso); \
  302 + (unsigned long __force)current->mm->context.vdso); \
298 303 } while (0)
299 304  
  305 +/* As a historical oddity, the x32 and x86_64 vDSOs are controlled together. */
300 306 #define ARCH_DLINFO_X32 \
301 307 do { \
302   - if (vdso_enabled) \
  308 + if (vdso64_enabled) \
303 309 NEW_AUX_ENT(AT_SYSINFO_EHDR, \
304   - (unsigned long)current->mm->context.vdso); \
  310 + (unsigned long __force)current->mm->context.vdso); \
305 311 } while (0)
306 312  
307 313 #define AT_SYSINFO 32
... ... @@ -310,7 +316,7 @@
310 316 if (test_thread_flag(TIF_X32)) \
311 317 ARCH_DLINFO_X32; \
312 318 else \
313   - ARCH_DLINFO_IA32(sysctl_vsyscall32)
  319 + ARCH_DLINFO_IA32
314 320  
315 321 #define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000)
316 322  
317 323  
... ... @@ -319,18 +325,17 @@
319 325 #define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso)
320 326  
321 327 #define VDSO_ENTRY \
322   - ((unsigned long)VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall))
  328 + ((unsigned long)current->mm->context.vdso + \
  329 + selected_vdso32->sym___kernel_vsyscall)
323 330  
324 331 struct linux_binprm;
325 332  
326 333 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
327 334 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
328 335 int uses_interp);
329   -extern int x32_setup_additional_pages(struct linux_binprm *bprm,
330   - int uses_interp);
331   -
332   -extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
333   -#define compat_arch_setup_additional_pages syscall32_setup_pages
  336 +extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
  337 + int uses_interp);
  338 +#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
334 339  
335 340 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
336 341 #define arch_randomize_brk arch_randomize_brk
arch/x86/include/asm/fixmap.h
... ... @@ -24,7 +24,7 @@
24 24 #include <linux/threads.h>
25 25 #include <asm/kmap_types.h>
26 26 #else
27   -#include <asm/vsyscall.h>
  27 +#include <uapi/asm/vsyscall.h>
28 28 #endif
29 29  
30 30 /*
... ... @@ -41,7 +41,8 @@
41 41 extern unsigned long __FIXADDR_TOP;
42 42 #define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
43 43 #else
44   -#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
  44 +#define FIXADDR_TOP (round_up(VSYSCALL_ADDR + PAGE_SIZE, 1<<PMD_SHIFT) - \
  45 + PAGE_SIZE)
45 46 #endif
46 47  
47 48  
... ... @@ -68,11 +69,7 @@
68 69 #ifdef CONFIG_X86_32
69 70 FIX_HOLE,
70 71 #else
71   - VSYSCALL_LAST_PAGE,
72   - VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
73   - + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
74   - VVAR_PAGE,
75   - VSYSCALL_HPET,
  72 + VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT,
76 73 #ifdef CONFIG_PARAVIRT_CLOCK
77 74 PVCLOCK_FIXMAP_BEGIN,
78 75 PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
arch/x86/include/asm/mmu.h
... ... @@ -18,7 +18,7 @@
18 18 #endif
19 19  
20 20 struct mutex lock;
21   - void *vdso;
  21 + void __user *vdso;
22 22 } mm_context_t;
23 23  
24 24 #ifdef CONFIG_SMP
arch/x86/include/asm/proto.h
... ... @@ -12,8 +12,6 @@
12 12 void ia32_cstar_target(void);
13 13 void ia32_sysenter_target(void);
14 14  
15   -void syscall32_cpu_init(void);
16   -
17 15 void x86_configure_nx(void);
18 16 void x86_report_nx(void);
19 17  
arch/x86/include/asm/vdso.h
... ... @@ -3,63 +3,51 @@
3 3  
4 4 #include <asm/page_types.h>
5 5 #include <linux/linkage.h>
  6 +#include <linux/init.h>
6 7  
7   -#ifdef __ASSEMBLER__
  8 +#ifndef __ASSEMBLER__
8 9  
9   -#define DEFINE_VDSO_IMAGE(symname, filename) \
10   -__PAGE_ALIGNED_DATA ; \
11   - .globl symname##_start, symname##_end ; \
12   - .align PAGE_SIZE ; \
13   - symname##_start: ; \
14   - .incbin filename ; \
15   - symname##_end: ; \
16   - .align PAGE_SIZE /* extra data here leaks to userspace. */ ; \
17   - \
18   -.previous ; \
19   - \
20   - .globl symname##_pages ; \
21   - .bss ; \
22   - .align 8 ; \
23   - .type symname##_pages, @object ; \
24   - symname##_pages: ; \
25   - .zero (symname##_end - symname##_start + PAGE_SIZE - 1) / PAGE_SIZE * (BITS_PER_LONG / 8) ; \
26   - .size symname##_pages, .-symname##_pages
  10 +#include <linux/mm_types.h>
27 11  
28   -#else
  12 +struct vdso_image {
  13 + void *data;
  14 + unsigned long size; /* Always a multiple of PAGE_SIZE */
29 15  
30   -#define DECLARE_VDSO_IMAGE(symname) \
31   - extern char symname##_start[], symname##_end[]; \
32   - extern struct page *symname##_pages[]
  16 + /* text_mapping.pages is big enough for data/size page pointers */
  17 + struct vm_special_mapping text_mapping;
33 18  
34   -#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
  19 + unsigned long alt, alt_len;
35 20  
36   -#include <asm/vdso32.h>
  21 + unsigned long sym_end_mapping; /* Total size of the mapping */
37 22  
38   -DECLARE_VDSO_IMAGE(vdso32_int80);
39   -#ifdef CONFIG_COMPAT
40   -DECLARE_VDSO_IMAGE(vdso32_syscall);
  23 + unsigned long sym_vvar_page;
  24 + unsigned long sym_hpet_page;
  25 + unsigned long sym_VDSO32_NOTE_MASK;
  26 + unsigned long sym___kernel_sigreturn;
  27 + unsigned long sym___kernel_rt_sigreturn;
  28 + unsigned long sym___kernel_vsyscall;
  29 + unsigned long sym_VDSO32_SYSENTER_RETURN;
  30 +};
  31 +
  32 +#ifdef CONFIG_X86_64
  33 +extern const struct vdso_image vdso_image_64;
41 34 #endif
42   -DECLARE_VDSO_IMAGE(vdso32_sysenter);
43 35  
44   -/*
45   - * Given a pointer to the vDSO image, find the pointer to VDSO32_name
46   - * as that symbol is defined in the vDSO sources or linker script.
47   - */
48   -#define VDSO32_SYMBOL(base, name) \
49   -({ \
50   - extern const char VDSO32_##name[]; \
51   - (void __user *)(VDSO32_##name + (unsigned long)(base)); \
52   -})
  36 +#ifdef CONFIG_X86_X32
  37 +extern const struct vdso_image vdso_image_x32;
53 38 #endif
54 39  
55   -/*
56   - * These symbols are defined with the addresses in the vsyscall page.
57   - * See vsyscall-sigreturn.S.
58   - */
59   -extern void __user __kernel_sigreturn;
60   -extern void __user __kernel_rt_sigreturn;
  40 +#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
  41 +extern const struct vdso_image vdso_image_32_int80;
  42 +#ifdef CONFIG_COMPAT
  43 +extern const struct vdso_image vdso_image_32_syscall;
  44 +#endif
  45 +extern const struct vdso_image vdso_image_32_sysenter;
61 46  
62   -void __init patch_vdso32(void *vdso, size_t len);
  47 +extern const struct vdso_image *selected_vdso32;
  48 +#endif
  49 +
  50 +extern void __init init_vdso_image(const struct vdso_image *image);
63 51  
64 52 #endif /* __ASSEMBLER__ */
65 53  
arch/x86/include/asm/vdso32.h
1   -#ifndef _ASM_X86_VDSO32_H
2   -#define _ASM_X86_VDSO32_H
3   -
4   -#define VDSO_BASE_PAGE 0
5   -#define VDSO_VVAR_PAGE 1
6   -#define VDSO_HPET_PAGE 2
7   -#define VDSO_PAGES 3
8   -#define VDSO_PREV_PAGES 2
9   -#define VDSO_OFFSET(x) ((x) * PAGE_SIZE)
10   -
11   -#endif
arch/x86/include/asm/vvar.h
... ... @@ -29,30 +29,12 @@
29 29  
30 30 #else
31 31  
32   -#ifdef BUILD_VDSO32
  32 +extern char __vvar_page;
33 33  
34 34 #define DECLARE_VVAR(offset, type, name) \
35 35 extern type vvar_ ## name __attribute__((visibility("hidden")));
36 36  
37 37 #define VVAR(name) (vvar_ ## name)
38   -
39   -#else
40   -
41   -extern char __vvar_page;
42   -
43   -/* Base address of vvars. This is not ABI. */
44   -#ifdef CONFIG_X86_64
45   -#define VVAR_ADDRESS (-10*1024*1024 - 4096)
46   -#else
47   -#define VVAR_ADDRESS (&__vvar_page)
48   -#endif
49   -
50   -#define DECLARE_VVAR(offset, type, name) \
51   - static type const * const vvaraddr_ ## name = \
52   - (void *)(VVAR_ADDRESS + (offset));
53   -
54   -#define VVAR(name) (*vvaraddr_ ## name)
55   -#endif
56 38  
57 39 #define DEFINE_VVAR(type, name) \
58 40 type name \
arch/x86/include/uapi/asm/vsyscall.h
... ... @@ -7,12 +7,7 @@
7 7 __NR_vgetcpu,
8 8 };
9 9  
10   -#define VSYSCALL_START (-10UL << 20)
11   -#define VSYSCALL_SIZE 1024
12   -#define VSYSCALL_END (-2UL << 20)
13   -#define VSYSCALL_MAPPED_PAGES 1
14   -#define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr))
15   -
  10 +#define VSYSCALL_ADDR (-10UL << 20)
16 11  
17 12 #endif /* _UAPI_ASM_X86_VSYSCALL_H */
arch/x86/kernel/cpu/common.c
... ... @@ -20,6 +20,7 @@
20 20 #include <asm/processor.h>
21 21 #include <asm/debugreg.h>
22 22 #include <asm/sections.h>
  23 +#include <asm/vsyscall.h>
23 24 #include <linux/topology.h>
24 25 #include <linux/cpumask.h>
25 26 #include <asm/pgtable.h>
... ... @@ -952,6 +953,38 @@
952 953 vgetcpu_mode = VGETCPU_RDTSCP;
953 954 else
954 955 vgetcpu_mode = VGETCPU_LSL;
  956 +}
  957 +
  958 +/* May not be __init: called during resume */
  959 +static void syscall32_cpu_init(void)
  960 +{
  961 + /* Load these always in case some future AMD CPU supports
  962 + SYSENTER from compat mode too. */
  963 + wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
  964 + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
  965 + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
  966 +
  967 + wrmsrl(MSR_CSTAR, ia32_cstar_target);
  968 +}
  969 +#endif
  970 +
  971 +#ifdef CONFIG_X86_32
  972 +void enable_sep_cpu(void)
  973 +{
  974 + int cpu = get_cpu();
  975 + struct tss_struct *tss = &per_cpu(init_tss, cpu);
  976 +
  977 + if (!boot_cpu_has(X86_FEATURE_SEP)) {
  978 + put_cpu();
  979 + return;
  980 + }
  981 +
  982 + tss->x86_tss.ss1 = __KERNEL_CS;
  983 + tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
  984 + wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
  985 + wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
  986 + wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
  987 + put_cpu();
955 988 }
956 989 #endif
957 990  
arch/x86/kernel/hpet.c
... ... @@ -74,9 +74,6 @@
74 74 static inline void hpet_set_mapping(void)
75 75 {
76 76 hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
77   -#ifdef CONFIG_X86_64
78   - __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VVAR_NOCACHE);
79   -#endif
80 77 }
81 78  
82 79 static inline void hpet_clear_mapping(void)
arch/x86/kernel/signal.c
... ... @@ -298,7 +298,8 @@
298 298 }
299 299  
300 300 if (current->mm->context.vdso)
301   - restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn);
  301 + restorer = current->mm->context.vdso +
  302 + selected_vdso32->sym___kernel_sigreturn;
302 303 else
303 304 restorer = &frame->retcode;
304 305 if (ksig->ka.sa.sa_flags & SA_RESTORER)
... ... @@ -361,7 +362,8 @@
361 362 save_altstack_ex(&frame->uc.uc_stack, regs->sp);
362 363  
363 364 /* Set up to return from userspace. */
364   - restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
  365 + restorer = current->mm->context.vdso +
  366 + selected_vdso32->sym___kernel_sigreturn;
365 367 if (ksig->ka.sa.sa_flags & SA_RESTORER)
366 368 restorer = ksig->ka.sa.sa_restorer;
367 369 put_user_ex(restorer, &frame->pretcode);
arch/x86/kernel/vsyscall_64.c
... ... @@ -91,7 +91,7 @@
91 91 {
92 92 int nr;
93 93  
94   - if ((addr & ~0xC00UL) != VSYSCALL_START)
  94 + if ((addr & ~0xC00UL) != VSYSCALL_ADDR)
95 95 return -EINVAL;
96 96  
97 97 nr = (addr & 0xC00UL) >> 10;
98 98  
99 99  
100 100  
... ... @@ -330,24 +330,17 @@
330 330 {
331 331 extern char __vsyscall_page;
332 332 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
333   - unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
334 333  
335   - __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall,
  334 + __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
336 335 vsyscall_mode == NATIVE
337 336 ? PAGE_KERNEL_VSYSCALL
338 337 : PAGE_KERNEL_VVAR);
339   - BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) !=
340   - (unsigned long)VSYSCALL_START);
341   -
342   - __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
343   - BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) !=
344   - (unsigned long)VVAR_ADDRESS);
  338 + BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
  339 + (unsigned long)VSYSCALL_ADDR);
345 340 }
346 341  
347 342 static int __init vsyscall_init(void)
348 343 {
349   - BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE));
350   -
351 344 cpu_notifier_register_begin();
352 345  
353 346 on_each_cpu(cpu_vsyscall_init, NULL, 1);
... ... @@ -18,7 +18,8 @@
18 18 #include <asm/traps.h> /* dotraplinkage, ... */
19 19 #include <asm/pgalloc.h> /* pgd_*(), ... */
20 20 #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
21   -#include <asm/fixmap.h> /* VSYSCALL_START */
  21 +#include <asm/fixmap.h> /* VSYSCALL_ADDR */
  22 +#include <asm/vsyscall.h> /* emulate_vsyscall */
22 23  
23 24 #define CREATE_TRACE_POINTS
24 25 #include <asm/trace/exceptions.h>
... ... @@ -771,7 +772,7 @@
771 772 * emulation.
772 773 */
773 774 if (unlikely((error_code & PF_INSTR) &&
774   - ((address & ~0xfff) == VSYSCALL_START))) {
  775 + ((address & ~0xfff) == VSYSCALL_ADDR))) {
775 776 if (emulate_vsyscall(regs, address))
776 777 return;
777 778 }
arch/x86/mm/init_64.c
... ... @@ -1055,8 +1055,8 @@
1055 1055 after_bootmem = 1;
1056 1056  
1057 1057 /* Register memory areas for /proc/kcore */
1058   - kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
1059   - VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
  1058 + kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR,
  1059 + PAGE_SIZE, KCORE_OTHER);
1060 1060  
1061 1061 mem_init_print_info(NULL);
1062 1062 }
1063 1063  
1064 1064  
... ... @@ -1185,11 +1185,19 @@
1185 1185 * covers the 64bit vsyscall page now. 32bit has a real VMA now and does
1186 1186 * not need special handling anymore:
1187 1187 */
  1188 +static const char *gate_vma_name(struct vm_area_struct *vma)
  1189 +{
  1190 + return "[vsyscall]";
  1191 +}
  1192 +static struct vm_operations_struct gate_vma_ops = {
  1193 + .name = gate_vma_name,
  1194 +};
1188 1195 static struct vm_area_struct gate_vma = {
1189   - .vm_start = VSYSCALL_START,
1190   - .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES * PAGE_SIZE),
  1196 + .vm_start = VSYSCALL_ADDR,
  1197 + .vm_end = VSYSCALL_ADDR + PAGE_SIZE,
1191 1198 .vm_page_prot = PAGE_READONLY_EXEC,
1192   - .vm_flags = VM_READ | VM_EXEC
  1199 + .vm_flags = VM_READ | VM_EXEC,
  1200 + .vm_ops = &gate_vma_ops,
1193 1201 };
1194 1202  
1195 1203 struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
... ... @@ -1218,16 +1226,7 @@
1218 1226 */
1219 1227 int in_gate_area_no_mm(unsigned long addr)
1220 1228 {
1221   - return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
1222   -}
1223   -
1224   -const char *arch_vma_name(struct vm_area_struct *vma)
1225   -{
1226   - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
1227   - return "[vdso]";
1228   - if (vma == &gate_vma)
1229   - return "[vsyscall]";
1230   - return NULL;
  1229 + return (addr & PAGE_MASK) == VSYSCALL_ADDR;
1231 1230 }
1232 1231  
1233 1232 static unsigned long probe_memory_block_size(void)
arch/x86/mm/ioremap.c
... ... @@ -367,6 +367,12 @@
367 367 {
368 368 pmd_t *pmd;
369 369  
  370 +#ifdef CONFIG_X86_64
  371 + BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
  372 +#else
  373 + WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
  374 +#endif
  375 +
370 376 early_ioremap_setup();
371 377  
372 378 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
arch/x86/mm/pgtable.c
... ... @@ -456,9 +456,9 @@
456 456 {
457 457 #ifdef CONFIG_X86_32
458 458 BUG_ON(fixmaps_set > 0);
459   - printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
460   - (int)-reserve);
461   - __FIXADDR_TOP = -reserve - PAGE_SIZE;
  459 + __FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE;
  460 + printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n",
  461 + -reserve, __FIXADDR_TOP + PAGE_SIZE);
462 462 #endif
463 463 }
464 464  
arch/x86/um/vdso/vma.c
... ... @@ -12,7 +12,7 @@
12 12 #include <asm/page.h>
13 13 #include <linux/init.h>
14 14  
15   -unsigned int __read_mostly vdso_enabled = 1;
  15 +static unsigned int __read_mostly vdso_enabled = 1;
16 16 unsigned long um_vdso_addr;
17 17  
18 18 extern unsigned long task_size;
arch/x86/vdso/.gitignore
1 1 vdso.lds
2   -vdso-syms.lds
3 2 vdsox32.lds
4   -vdsox32-syms.lds
5   -vdso32-syms.lds
6 3 vdso32-syscall-syms.lds
7 4 vdso32-sysenter-syms.lds
8 5 vdso32-int80-syms.lds
  6 +vdso-image-*.c
  7 +vdso2c
arch/x86/vdso/Makefile
... ... @@ -24,16 +24,31 @@
24 24  
25 25 # files to link into kernel
26 26 obj-y += vma.o
27   -obj-$(VDSO64-y) += vdso.o
28   -obj-$(VDSOX32-y) += vdsox32.o
29   -obj-$(VDSO32-y) += vdso32.o vdso32-setup.o
30 27  
  28 +# vDSO images to build
  29 +vdso_img-$(VDSO64-y) += 64
  30 +vdso_img-$(VDSOX32-y) += x32
  31 +vdso_img-$(VDSO32-y) += 32-int80
  32 +vdso_img-$(CONFIG_COMPAT) += 32-syscall
  33 +vdso_img-$(VDSO32-y) += 32-sysenter
  34 +
  35 +obj-$(VDSO32-y) += vdso32-setup.o
  36 +
31 37 vobjs := $(foreach F,$(vobj64s),$(obj)/$F)
32 38  
33 39 $(obj)/vdso.o: $(obj)/vdso.so
34 40  
35   -targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
  41 +targets += vdso.lds $(vobjs-y)
36 42  
  43 +# Build the vDSO image C files and link them in.
  44 +vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
  45 +vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c)
  46 +vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
  47 +obj-y += $(vdso_img_objs)
  48 +targets += $(vdso_img_cfiles)
  49 +targets += $(vdso_img_sodbg)
  50 +.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c)
  51 +
37 52 export CPPFLAGS_vdso.lds += -P -C
38 53  
39 54 VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
40 55  
41 56  
... ... @@ -41,15 +56,19 @@
41 56 -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \
42 57 $(DISABLE_LTO)
43 58  
44   -$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
45   -
46   -$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
  59 +$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
47 60 $(call if_changed,vdso)
48 61  
49   -$(obj)/%.so: OBJCOPYFLAGS := -S
50   -$(obj)/%.so: $(obj)/%.so.dbg FORCE
51   - $(call if_changed,objcopy)
  62 +hostprogs-y += vdso2c
52 63  
  64 +quiet_cmd_vdso2c = VDSO2C $@
  65 +define cmd_vdso2c
  66 + $(obj)/vdso2c $< $@
  67 +endef
  68 +
  69 +$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso2c FORCE
  70 + $(call if_changed,vdso2c)
  71 +
53 72 #
54 73 # Don't omit frame pointers for ease of userspace debugging, but do
55 74 # optimize sibling calls.
56 75  
... ... @@ -68,23 +87,7 @@
68 87 CFLAGS_REMOVE_vgetcpu.o = -pg
69 88 CFLAGS_REMOVE_vvar.o = -pg
70 89  
71   -targets += vdso-syms.lds
72   -obj-$(VDSO64-y) += vdso-syms.lds
73   -
74 90 #
75   -# Match symbols in the DSO that look like VDSO*; produce a file of constants.
76   -#
77   -sed-vdsosym := -e 's/^00*/0/' \
78   - -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
79   -quiet_cmd_vdsosym = VDSOSYM $@
80   -define cmd_vdsosym
81   - $(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
82   -endef
83   -
84   -$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
85   - $(call if_changed,vdsosym)
86   -
87   -#
88 91 # X32 processes use x32 vDSO to access 64bit kernel data.
89 92 #
90 93 # Build x32 vDSO image:
... ... @@ -94,9 +97,6 @@
94 97 # so that it can reach 64bit address space with 64bit pointers.
95 98 #
96 99  
97   -targets += vdsox32-syms.lds
98   -obj-$(VDSOX32-y) += vdsox32-syms.lds
99   -
100 100 CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds)
101 101 VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \
102 102 -Wl,-soname=linux-vdso.so.1 \
103 103  
104 104  
... ... @@ -113,17 +113,14 @@
113 113 $(obj)/%-x32.o: $(obj)/%.o FORCE
114 114 $(call if_changed,x32)
115 115  
116   -targets += vdsox32.so vdsox32.so.dbg vdsox32.lds $(vobjx32s-y)
  116 +targets += vdsox32.lds $(vobjx32s-y)
117 117  
118   -$(obj)/vdsox32.o: $(src)/vdsox32.S $(obj)/vdsox32.so
119   -
120 118 $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
121 119 $(call if_changed,vdso)
122 120  
123 121 #
124 122 # Build multiple 32-bit vDSO images to choose from at boot time.
125 123 #
126   -obj-$(VDSO32-y) += vdso32-syms.lds
127 124 vdso32.so-$(VDSO32-y) += int80
128 125 vdso32.so-$(CONFIG_COMPAT) += syscall
129 126 vdso32.so-$(VDSO32-y) += sysenter
130 127  
131 128  
... ... @@ -138,11 +135,9 @@
138 135 override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
139 136  
140 137 targets += vdso32/vdso32.lds
141   -targets += $(vdso32-images) $(vdso32-images:=.dbg)
142 138 targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
  139 +targets += vdso32/vclock_gettime.o
143 140  
144   -extra-y += $(vdso32-images)
145   -
146 141 $(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
147 142  
148 143 KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
... ... @@ -166,27 +161,6 @@
166 161 $(obj)/vdso32/%.o
167 162 $(call if_changed,vdso)
168 163  
169   -# Make vdso32-*-syms.lds from each image, and then make sure they match.
170   -# The only difference should be that some do not define VDSO32_SYSENTER_RETURN.
171   -
172   -targets += vdso32-syms.lds $(vdso32.so-y:%=vdso32-%-syms.lds)
173   -
174   -quiet_cmd_vdso32sym = VDSOSYM $@
175   -define cmd_vdso32sym
176   - if LC_ALL=C sort -u $(filter-out FORCE,$^) > $(@D)/.tmp_$(@F) && \
177   - $(foreach H,$(filter-out FORCE,$^),\
178   - if grep -q VDSO32_SYSENTER_RETURN $H; \
179   - then diff -u $(@D)/.tmp_$(@F) $H; \
180   - else sed /VDSO32_SYSENTER_RETURN/d $(@D)/.tmp_$(@F) | \
181   - diff -u - $H; fi &&) : ;\
182   - then mv -f $(@D)/.tmp_$(@F) $@; \
183   - else rm -f $(@D)/.tmp_$(@F); exit 1; \
184   - fi
185   -endef
186   -
187   -$(obj)/vdso32-syms.lds: $(vdso32.so-y:%=$(obj)/vdso32-%-syms.lds) FORCE
188   - $(call if_changed,vdso32sym)
189   -
190 164 #
191 165 # The DSO images are built using a special linker script.
192 166 #
... ... @@ -197,7 +171,7 @@
197 171 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
198 172  
199 173 VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
200   - $(LTO_CFLAGS)
  174 + -Wl,-Bsymbolic $(LTO_CFLAGS)
201 175 GCOV_PROFILE := n
202 176  
203 177 #
arch/x86/vdso/vclock_gettime.c
... ... @@ -30,9 +30,12 @@
30 30 extern time_t __vdso_time(time_t *t);
31 31  
32 32 #ifdef CONFIG_HPET_TIMER
33   -static inline u32 read_hpet_counter(const volatile void *addr)
  33 +extern u8 hpet_page
  34 + __attribute__((visibility("hidden")));
  35 +
  36 +static notrace cycle_t vread_hpet(void)
34 37 {
35   - return *(const volatile u32 *) (addr + HPET_COUNTER);
  38 + return *(const volatile u32 *)(&hpet_page + HPET_COUNTER);
36 39 }
37 40 #endif
38 41  
... ... @@ -43,11 +46,6 @@
43 46 #include <asm/fixmap.h>
44 47 #include <asm/pvclock.h>
45 48  
46   -static notrace cycle_t vread_hpet(void)
47   -{
48   - return read_hpet_counter((const void *)fix_to_virt(VSYSCALL_HPET));
49   -}
50   -
51 49 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
52 50 {
53 51 long ret;
... ... @@ -137,16 +135,6 @@
137 135  
138 136 #else
139 137  
140   -extern u8 hpet_page
141   - __attribute__((visibility("hidden")));
142   -
143   -#ifdef CONFIG_HPET_TIMER
144   -static notrace cycle_t vread_hpet(void)
145   -{
146   - return read_hpet_counter((const void *)(&hpet_page));
147   -}
148   -#endif
149   -
150 138 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
151 139 {
152 140 long ret;
... ... @@ -154,7 +142,7 @@
154 142 asm(
155 143 "mov %%ebx, %%edx \n"
156 144 "mov %2, %%ebx \n"
157   - "call VDSO32_vsyscall \n"
  145 + "call __kernel_vsyscall \n"
158 146 "mov %%edx, %%ebx \n"
159 147 : "=a" (ret)
160 148 : "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
... ... @@ -169,7 +157,7 @@
169 157 asm(
170 158 "mov %%ebx, %%edx \n"
171 159 "mov %2, %%ebx \n"
172   - "call VDSO32_vsyscall \n"
  160 + "call __kernel_vsyscall \n"
173 161 "mov %%edx, %%ebx \n"
174 162 : "=a" (ret)
175 163 : "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
arch/x86/vdso/vdso-layout.lds.S
  1 +#include <asm/vdso.h>
  2 +
1 3 /*
2 4 * Linker script for vDSO. This is an ELF shared object prelinked to
3 5 * its virtual address, and with only one read-only segment.
... ... @@ -6,20 +8,6 @@
6 8  
7 9 SECTIONS
8 10 {
9   -#ifdef BUILD_VDSO32
10   -#include <asm/vdso32.h>
11   -
12   - hpet_page = . - VDSO_OFFSET(VDSO_HPET_PAGE);
13   -
14   - vvar = . - VDSO_OFFSET(VDSO_VVAR_PAGE);
15   -
16   - /* Place all vvars at the offsets in asm/vvar.h. */
17   -#define EMIT_VVAR(name, offset) vvar_ ## name = vvar + offset;
18   -#define __VVAR_KERNEL_LDS
19   -#include <asm/vvar.h>
20   -#undef __VVAR_KERNEL_LDS
21   -#undef EMIT_VVAR
22   -#endif
23 11 . = SIZEOF_HEADERS;
24 12  
25 13 .hash : { *(.hash) } :text
26 14  
... ... @@ -60,9 +48,29 @@
60 48 .text : { *(.text*) } :text =0x90909090,
61 49  
62 50 /*
63   - * The comma above works around a bug in gold:
64   - * https://sourceware.org/bugzilla/show_bug.cgi?id=16804
  51 + * The remainder of the vDSO consists of special pages that are
  52 + * shared between the kernel and userspace. It needs to be at the
  53 + * end so that it doesn't overlap the mapping of the actual
  54 + * vDSO image.
65 55 */
  56 +
  57 + . = ALIGN(PAGE_SIZE);
  58 + vvar_page = .;
  59 +
  60 + /* Place all vvars at the offsets in asm/vvar.h. */
  61 +#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
  62 +#define __VVAR_KERNEL_LDS
  63 +#include <asm/vvar.h>
  64 +#undef __VVAR_KERNEL_LDS
  65 +#undef EMIT_VVAR
  66 +
  67 + . = vvar_page + PAGE_SIZE;
  68 +
  69 + hpet_page = .;
  70 + . = . + PAGE_SIZE;
  71 +
  72 + . = ALIGN(PAGE_SIZE);
  73 + end_mapping = .;
66 74  
67 75 /DISCARD/ : {
68 76 *(.discard)
arch/x86/vdso/vdso.S
1   -#include <asm/vdso.h>
2   -
3   -DEFINE_VDSO_IMAGE(vdso, "arch/x86/vdso/vdso.so")
arch/x86/vdso/vdso.lds.S
1 1 /*
2 2 * Linker script for 64-bit vDSO.
3 3 * We #include the file to define the layout details.
4   - * Here we only choose the prelinked virtual address.
5 4 *
6 5 * This file defines the version script giving the user-exported symbols in
7   - * the DSO. We can define local symbols here called VDSO* to make their
8   - * values visible using the asm-x86/vdso.h macros from the kernel proper.
  6 + * the DSO.
9 7 */
10 8  
11   -#define VDSO_PRELINK 0xffffffffff700000
12 9 #include "vdso-layout.lds.S"
13 10  
14 11 /*
... ... @@ -28,6 +25,4 @@
28 25 local: *;
29 26 };
30 27 }
31   -
32   -VDSO64_PRELINK = VDSO_PRELINK;
arch/x86/vdso/vdso2c.c
  1 +#include <inttypes.h>
  2 +#include <stdint.h>
  3 +#include <unistd.h>
  4 +#include <stdarg.h>
  5 +#include <stdlib.h>
  6 +#include <stdio.h>
  7 +#include <string.h>
  8 +#include <fcntl.h>
  9 +#include <err.h>
  10 +
  11 +#include <sys/mman.h>
  12 +#include <sys/types.h>
  13 +
  14 +#include <linux/elf.h>
  15 +#include <linux/types.h>
  16 +
  17 +const char *outfilename;
  18 +
  19 +/* Symbols that we need in vdso2c. */
  20 +enum {
  21 + sym_vvar_page,
  22 + sym_hpet_page,
  23 + sym_end_mapping,
  24 +};
  25 +
  26 +const int special_pages[] = {
  27 + sym_vvar_page,
  28 + sym_hpet_page,
  29 +};
  30 +
  31 +char const * const required_syms[] = {
  32 + [sym_vvar_page] = "vvar_page",
  33 + [sym_hpet_page] = "hpet_page",
  34 + [sym_end_mapping] = "end_mapping",
  35 + "VDSO32_NOTE_MASK",
  36 + "VDSO32_SYSENTER_RETURN",
  37 + "__kernel_vsyscall",
  38 + "__kernel_sigreturn",
  39 + "__kernel_rt_sigreturn",
  40 +};
  41 +
  42 +__attribute__((format(printf, 1, 2))) __attribute__((noreturn))
  43 +static void fail(const char *format, ...)
  44 +{
  45 + va_list ap;
  46 + va_start(ap, format);
  47 + fprintf(stderr, "Error: ");
  48 + vfprintf(stderr, format, ap);
  49 + unlink(outfilename);
  50 + exit(1);
  51 + va_end(ap);
  52 +}
  53 +
  54 +/*
  55 + * Evil macros to do a little-endian read.
  56 + */
  57 +#define GLE(x, bits, ifnot) \
  58 + __builtin_choose_expr( \
  59 + (sizeof(x) == bits/8), \
  60 + (__typeof__(x))le##bits##toh(x), ifnot)
  61 +
  62 +extern void bad_get_le(uint64_t);
  63 +#define LAST_LE(x) \
  64 + __builtin_choose_expr(sizeof(x) == 1, (x), bad_get_le(x))
  65 +
  66 +#define GET_LE(x) \
  67 + GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_LE(x))))
  68 +
  69 +#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0]))
  70 +
  71 +#define BITS 64
  72 +#define GOFUNC go64
  73 +#define Elf_Ehdr Elf64_Ehdr
  74 +#define Elf_Shdr Elf64_Shdr
  75 +#define Elf_Phdr Elf64_Phdr
  76 +#define Elf_Sym Elf64_Sym
  77 +#define Elf_Dyn Elf64_Dyn
  78 +#include "vdso2c.h"
  79 +#undef BITS
  80 +#undef GOFUNC
  81 +#undef Elf_Ehdr
  82 +#undef Elf_Shdr
  83 +#undef Elf_Phdr
  84 +#undef Elf_Sym
  85 +#undef Elf_Dyn
  86 +
  87 +#define BITS 32
  88 +#define GOFUNC go32
  89 +#define Elf_Ehdr Elf32_Ehdr
  90 +#define Elf_Shdr Elf32_Shdr
  91 +#define Elf_Phdr Elf32_Phdr
  92 +#define Elf_Sym Elf32_Sym
  93 +#define Elf_Dyn Elf32_Dyn
  94 +#include "vdso2c.h"
  95 +#undef BITS
  96 +#undef GOFUNC
  97 +#undef Elf_Ehdr
  98 +#undef Elf_Shdr
  99 +#undef Elf_Phdr
  100 +#undef Elf_Sym
  101 +#undef Elf_Dyn
  102 +
  103 +static void go(void *addr, size_t len, FILE *outfile, const char *name)
  104 +{
  105 + Elf64_Ehdr *hdr = (Elf64_Ehdr *)addr;
  106 +
  107 + if (hdr->e_ident[EI_CLASS] == ELFCLASS64) {
  108 + go64(addr, len, outfile, name);
  109 + } else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) {
  110 + go32(addr, len, outfile, name);
  111 + } else {
  112 + fail("unknown ELF class\n");
  113 + }
  114 +}
  115 +
  116 +int main(int argc, char **argv)
  117 +{
  118 + int fd;
  119 + off_t len;
  120 + void *addr;
  121 + FILE *outfile;
  122 + char *name, *tmp;
  123 + int namelen;
  124 +
  125 + if (argc != 3) {
  126 + printf("Usage: vdso2c INPUT OUTPUT\n");
  127 + return 1;
  128 + }
  129 +
  130 + /*
  131 + * Figure out the struct name. If we're writing to a .so file,
  132 + * generate raw output insted.
  133 + */
  134 + name = strdup(argv[2]);
  135 + namelen = strlen(name);
  136 + if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) {
  137 + name = NULL;
  138 + } else {
  139 + tmp = strrchr(name, '/');
  140 + if (tmp)
  141 + name = tmp + 1;
  142 + tmp = strchr(name, '.');
  143 + if (tmp)
  144 + *tmp = '\0';
  145 + for (tmp = name; *tmp; tmp++)
  146 + if (*tmp == '-')
  147 + *tmp = '_';
  148 + }
  149 +
  150 + fd = open(argv[1], O_RDONLY);
  151 + if (fd == -1)
  152 + err(1, "%s", argv[1]);
  153 +
  154 + len = lseek(fd, 0, SEEK_END);
  155 + if (len == (off_t)-1)
  156 + err(1, "lseek");
  157 +
  158 + addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
  159 + if (addr == MAP_FAILED)
  160 + err(1, "mmap");
  161 +
  162 + outfilename = argv[2];
  163 + outfile = fopen(outfilename, "w");
  164 + if (!outfile)
  165 + err(1, "%s", argv[2]);
  166 +
  167 + go(addr, (size_t)len, outfile, name);
  168 +
  169 + munmap(addr, len);
  170 + fclose(outfile);
  171 +
  172 + return 0;
  173 +}
arch/x86/vdso/vdso2c.h
  1 +/*
  2 + * This file is included twice from vdso2c.c. It generates code for 32-bit
  3 + * and 64-bit vDSOs. We need both for 64-bit builds, since 32-bit vDSOs
  4 + * are built for 32-bit userspace.
  5 + */
  6 +
  7 +static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
  8 +{
  9 + int found_load = 0;
  10 + unsigned long load_size = -1; /* Work around bogus warning */
  11 + unsigned long data_size;
  12 + Elf_Ehdr *hdr = (Elf_Ehdr *)addr;
  13 + int i;
  14 + unsigned long j;
  15 + Elf_Shdr *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
  16 + *alt_sec = NULL;
  17 + Elf_Dyn *dyn = 0, *dyn_end = 0;
  18 + const char *secstrings;
  19 + uint64_t syms[NSYMS] = {};
  20 +
  21 + Elf_Phdr *pt = (Elf_Phdr *)(addr + GET_LE(hdr->e_phoff));
  22 +
  23 + /* Walk the segment table. */
  24 + for (i = 0; i < GET_LE(hdr->e_phnum); i++) {
  25 + if (GET_LE(pt[i].p_type) == PT_LOAD) {
  26 + if (found_load)
  27 + fail("multiple PT_LOAD segs\n");
  28 +
  29 + if (GET_LE(pt[i].p_offset) != 0 ||
  30 + GET_LE(pt[i].p_vaddr) != 0)
  31 + fail("PT_LOAD in wrong place\n");
  32 +
  33 + if (GET_LE(pt[i].p_memsz) != GET_LE(pt[i].p_filesz))
  34 + fail("cannot handle memsz != filesz\n");
  35 +
  36 + load_size = GET_LE(pt[i].p_memsz);
  37 + found_load = 1;
  38 + } else if (GET_LE(pt[i].p_type) == PT_DYNAMIC) {
  39 + dyn = addr + GET_LE(pt[i].p_offset);
  40 + dyn_end = addr + GET_LE(pt[i].p_offset) +
  41 + GET_LE(pt[i].p_memsz);
  42 + }
  43 + }
  44 + if (!found_load)
  45 + fail("no PT_LOAD seg\n");
  46 + data_size = (load_size + 4095) / 4096 * 4096;
  47 +
  48 + /* Walk the dynamic table */
  49 + for (i = 0; dyn + i < dyn_end &&
  50 + GET_LE(dyn[i].d_tag) != DT_NULL; i++) {
  51 + typeof(dyn[i].d_tag) tag = GET_LE(dyn[i].d_tag);
  52 + if (tag == DT_REL || tag == DT_RELSZ ||
  53 + tag == DT_RELENT || tag == DT_TEXTREL)
  54 + fail("vdso image contains dynamic relocations\n");
  55 + }
  56 +
  57 + /* Walk the section table */
  58 + secstrings_hdr = addr + GET_LE(hdr->e_shoff) +
  59 + GET_LE(hdr->e_shentsize)*GET_LE(hdr->e_shstrndx);
  60 + secstrings = addr + GET_LE(secstrings_hdr->sh_offset);
  61 + for (i = 0; i < GET_LE(hdr->e_shnum); i++) {
  62 + Elf_Shdr *sh = addr + GET_LE(hdr->e_shoff) +
  63 + GET_LE(hdr->e_shentsize) * i;
  64 + if (GET_LE(sh->sh_type) == SHT_SYMTAB)
  65 + symtab_hdr = sh;
  66 +
  67 + if (!strcmp(secstrings + GET_LE(sh->sh_name),
  68 + ".altinstructions"))
  69 + alt_sec = sh;
  70 + }
  71 +
  72 + if (!symtab_hdr)
  73 + fail("no symbol table\n");
  74 +
  75 + strtab_hdr = addr + GET_LE(hdr->e_shoff) +
  76 + GET_LE(hdr->e_shentsize) * GET_LE(symtab_hdr->sh_link);
  77 +
  78 + /* Walk the symbol table */
  79 + for (i = 0;
  80 + i < GET_LE(symtab_hdr->sh_size) / GET_LE(symtab_hdr->sh_entsize);
  81 + i++) {
  82 + int k;
  83 + Elf_Sym *sym = addr + GET_LE(symtab_hdr->sh_offset) +
  84 + GET_LE(symtab_hdr->sh_entsize) * i;
  85 + const char *name = addr + GET_LE(strtab_hdr->sh_offset) +
  86 + GET_LE(sym->st_name);
  87 + for (k = 0; k < NSYMS; k++) {
  88 + if (!strcmp(name, required_syms[k])) {
  89 + if (syms[k]) {
  90 + fail("duplicate symbol %s\n",
  91 + required_syms[k]);
  92 + }
  93 + syms[k] = GET_LE(sym->st_value);
  94 + }
  95 + }
  96 + }
  97 +
  98 + /* Validate mapping addresses. */
  99 + for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) {
  100 + if (!syms[i])
  101 + continue; /* The mapping isn't used; ignore it. */
  102 +
  103 + if (syms[i] % 4096)
  104 + fail("%s must be a multiple of 4096\n",
  105 + required_syms[i]);
  106 + if (syms[i] < data_size)
  107 + fail("%s must be after the text mapping\n",
  108 + required_syms[i]);
  109 + if (syms[sym_end_mapping] < syms[i] + 4096)
  110 + fail("%s overruns end_mapping\n", required_syms[i]);
  111 + }
  112 + if (syms[sym_end_mapping] % 4096)
  113 + fail("end_mapping must be a multiple of 4096\n");
  114 +
  115 + /* Remove sections. */
  116 + hdr->e_shoff = 0;
  117 + hdr->e_shentsize = 0;
  118 + hdr->e_shnum = 0;
  119 + hdr->e_shstrndx = htole16(SHN_UNDEF);
  120 +
  121 + if (!name) {
  122 + fwrite(addr, load_size, 1, outfile);
  123 + return;
  124 + }
  125 +
  126 + fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n");
  127 + fprintf(outfile, "#include <linux/linkage.h>\n");
  128 + fprintf(outfile, "#include <asm/page_types.h>\n");
  129 + fprintf(outfile, "#include <asm/vdso.h>\n");
  130 + fprintf(outfile, "\n");
  131 + fprintf(outfile,
  132 + "static unsigned char raw_data[%lu] __page_aligned_data = {",
  133 + data_size);
  134 + for (j = 0; j < load_size; j++) {
  135 + if (j % 10 == 0)
  136 + fprintf(outfile, "\n\t");
  137 + fprintf(outfile, "0x%02X, ", (int)((unsigned char *)addr)[j]);
  138 + }
  139 + fprintf(outfile, "\n};\n\n");
  140 +
  141 + fprintf(outfile, "static struct page *pages[%lu];\n\n",
  142 + data_size / 4096);
  143 +
  144 + fprintf(outfile, "const struct vdso_image %s = {\n", name);
  145 + fprintf(outfile, "\t.data = raw_data,\n");
  146 + fprintf(outfile, "\t.size = %lu,\n", data_size);
  147 + fprintf(outfile, "\t.text_mapping = {\n");
  148 + fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
  149 + fprintf(outfile, "\t\t.pages = pages,\n");
  150 + fprintf(outfile, "\t},\n");
  151 + if (alt_sec) {
  152 + fprintf(outfile, "\t.alt = %lu,\n",
  153 + (unsigned long)GET_LE(alt_sec->sh_offset));
  154 + fprintf(outfile, "\t.alt_len = %lu,\n",
  155 + (unsigned long)GET_LE(alt_sec->sh_size));
  156 + }
  157 + for (i = 0; i < NSYMS; i++) {
  158 + if (syms[i])
  159 + fprintf(outfile, "\t.sym_%s = 0x%" PRIx64 ",\n",
  160 + required_syms[i], syms[i]);
  161 + }
  162 + fprintf(outfile, "};\n");
  163 +}
arch/x86/vdso/vdso32-setup.c
... ... @@ -8,27 +8,12 @@
8 8  
9 9 #include <linux/init.h>
10 10 #include <linux/smp.h>
11   -#include <linux/thread_info.h>
12   -#include <linux/sched.h>
13   -#include <linux/gfp.h>
14   -#include <linux/string.h>
15   -#include <linux/elf.h>
16   -#include <linux/mm.h>
17   -#include <linux/err.h>
18   -#include <linux/module.h>
19   -#include <linux/slab.h>
  11 +#include <linux/kernel.h>
  12 +#include <linux/mm_types.h>
20 13  
21 14 #include <asm/cpufeature.h>
22   -#include <asm/msr.h>
23   -#include <asm/pgtable.h>
24   -#include <asm/unistd.h>
25   -#include <asm/elf.h>
26   -#include <asm/tlbflush.h>
  15 +#include <asm/processor.h>
27 16 #include <asm/vdso.h>
28   -#include <asm/proto.h>
29   -#include <asm/fixmap.h>
30   -#include <asm/hpet.h>
31   -#include <asm/vvar.h>
32 17  
33 18 #ifdef CONFIG_COMPAT_VDSO
34 19 #define VDSO_DEFAULT 0
35 20  
36 21  
37 22  
38 23  
... ... @@ -36,22 +21,17 @@
36 21 #define VDSO_DEFAULT 1
37 22 #endif
38 23  
39   -#ifdef CONFIG_X86_64
40   -#define vdso_enabled sysctl_vsyscall32
41   -#define arch_setup_additional_pages syscall32_setup_pages
42   -#endif
43   -
44 24 /*
45 25 * Should the kernel map a VDSO page into processes and pass its
46 26 * address down to glibc upon exec()?
47 27 */
48   -unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
  28 +unsigned int __read_mostly vdso32_enabled = VDSO_DEFAULT;
49 29  
50   -static int __init vdso_setup(char *s)
  30 +static int __init vdso32_setup(char *s)
51 31 {
52   - vdso_enabled = simple_strtoul(s, NULL, 0);
  32 + vdso32_enabled = simple_strtoul(s, NULL, 0);
53 33  
54   - if (vdso_enabled > 1)
  34 + if (vdso32_enabled > 1)
55 35 pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n");
56 36  
57 37 return 1;
58 38  
59 39  
60 40  
61 41  
62 42  
63 43  
64 44  
65 45  
66 46  
67 47  
68 48  
... ... @@ -62,177 +42,45 @@
62 42 * behavior on both 64-bit and 32-bit kernels.
63 43 * On 32-bit kernels, vdso=[012] means the same thing.
64 44 */
65   -__setup("vdso32=", vdso_setup);
  45 +__setup("vdso32=", vdso32_setup);
66 46  
67 47 #ifdef CONFIG_X86_32
68   -__setup_param("vdso=", vdso32_setup, vdso_setup, 0);
69   -
70   -EXPORT_SYMBOL_GPL(vdso_enabled);
  48 +__setup_param("vdso=", vdso_setup, vdso32_setup, 0);
71 49 #endif
72 50  
73   -static struct page **vdso32_pages;
74   -static unsigned vdso32_size;
75   -
76 51 #ifdef CONFIG_X86_64
77 52  
78 53 #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32))
79 54 #define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32))
80 55  
81   -/* May not be __init: called during resume */
82   -void syscall32_cpu_init(void)
83   -{
84   - /* Load these always in case some future AMD CPU supports
85   - SYSENTER from compat mode too. */
86   - wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
87   - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
88   - wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
89   -
90   - wrmsrl(MSR_CSTAR, ia32_cstar_target);
91   -}
92   -
93 56 #else /* CONFIG_X86_32 */
94 57  
95 58 #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
96 59 #define vdso32_syscall() (0)
97 60  
98   -void enable_sep_cpu(void)
99   -{
100   - int cpu = get_cpu();
101   - struct tss_struct *tss = &per_cpu(init_tss, cpu);
102   -
103   - if (!boot_cpu_has(X86_FEATURE_SEP)) {
104   - put_cpu();
105   - return;
106   - }
107   -
108   - tss->x86_tss.ss1 = __KERNEL_CS;
109   - tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
110   - wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
111   - wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
112   - wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
113   - put_cpu();
114   -}
115   -
116 61 #endif /* CONFIG_X86_64 */
117 62  
  63 +#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
  64 +const struct vdso_image *selected_vdso32;
  65 +#endif
  66 +
118 67 int __init sysenter_setup(void)
119 68 {
120   - char *vdso32_start, *vdso32_end;
121   - int npages, i;
122   -
123 69 #ifdef CONFIG_COMPAT
124   - if (vdso32_syscall()) {
125   - vdso32_start = vdso32_syscall_start;
126   - vdso32_end = vdso32_syscall_end;
127   - vdso32_pages = vdso32_syscall_pages;
128   - } else
  70 + if (vdso32_syscall())
  71 + selected_vdso32 = &vdso_image_32_syscall;
  72 + else
129 73 #endif
130   - if (vdso32_sysenter()) {
131   - vdso32_start = vdso32_sysenter_start;
132   - vdso32_end = vdso32_sysenter_end;
133   - vdso32_pages = vdso32_sysenter_pages;
134   - } else {
135   - vdso32_start = vdso32_int80_start;
136   - vdso32_end = vdso32_int80_end;
137   - vdso32_pages = vdso32_int80_pages;
138   - }
  74 + if (vdso32_sysenter())
  75 + selected_vdso32 = &vdso_image_32_sysenter;
  76 + else
  77 + selected_vdso32 = &vdso_image_32_int80;
139 78  
140   - npages = ((vdso32_end - vdso32_start) + PAGE_SIZE - 1) / PAGE_SIZE;
141   - vdso32_size = npages << PAGE_SHIFT;
142   - for (i = 0; i < npages; i++)
143   - vdso32_pages[i] = virt_to_page(vdso32_start + i*PAGE_SIZE);
  79 + init_vdso_image(selected_vdso32);
144 80  
145   - patch_vdso32(vdso32_start, vdso32_size);
146   -
147 81 return 0;
148 82 }
149 83  
150   -/* Setup a VMA at program startup for the vsyscall page */
151   -int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
152   -{
153   - struct mm_struct *mm = current->mm;
154   - unsigned long addr;
155   - int ret = 0;
156   - struct vm_area_struct *vma;
157   -
158   -#ifdef CONFIG_X86_X32_ABI
159   - if (test_thread_flag(TIF_X32))
160   - return x32_setup_additional_pages(bprm, uses_interp);
161   -#endif
162   -
163   - if (vdso_enabled != 1) /* Other values all mean "disabled" */
164   - return 0;
165   -
166   - down_write(&mm->mmap_sem);
167   -
168   - addr = get_unmapped_area(NULL, 0, vdso32_size + VDSO_OFFSET(VDSO_PREV_PAGES), 0, 0);
169   - if (IS_ERR_VALUE(addr)) {
170   - ret = addr;
171   - goto up_fail;
172   - }
173   -
174   - addr += VDSO_OFFSET(VDSO_PREV_PAGES);
175   -
176   - current->mm->context.vdso = (void *)addr;
177   -
178   - /*
179   - * MAYWRITE to allow gdb to COW and set breakpoints
180   - */
181   - ret = install_special_mapping(mm,
182   - addr,
183   - vdso32_size,
184   - VM_READ|VM_EXEC|
185   - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
186   - vdso32_pages);
187   -
188   - if (ret)
189   - goto up_fail;
190   -
191   - vma = _install_special_mapping(mm,
192   - addr - VDSO_OFFSET(VDSO_PREV_PAGES),
193   - VDSO_OFFSET(VDSO_PREV_PAGES),
194   - VM_READ,
195   - NULL);
196   -
197   - if (IS_ERR(vma)) {
198   - ret = PTR_ERR(vma);
199   - goto up_fail;
200   - }
201   -
202   - ret = remap_pfn_range(vma,
203   - addr - VDSO_OFFSET(VDSO_VVAR_PAGE),
204   - __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
205   - PAGE_SIZE,
206   - PAGE_READONLY);
207   -
208   - if (ret)
209   - goto up_fail;
210   -
211   -#ifdef CONFIG_HPET_TIMER
212   - if (hpet_address) {
213   - ret = io_remap_pfn_range(vma,
214   - addr - VDSO_OFFSET(VDSO_HPET_PAGE),
215   - hpet_address >> PAGE_SHIFT,
216   - PAGE_SIZE,
217   - pgprot_noncached(PAGE_READONLY));
218   -
219   - if (ret)
220   - goto up_fail;
221   - }
222   -#endif
223   -
224   - current_thread_info()->sysenter_return =
225   - VDSO32_SYMBOL(addr, SYSENTER_RETURN);
226   -
227   - up_fail:
228   - if (ret)
229   - current->mm->context.vdso = NULL;
230   -
231   - up_write(&mm->mmap_sem);
232   -
233   - return ret;
234   -}
235   -
236 84 #ifdef CONFIG_X86_64
237 85  
238 86 subsys_initcall(sysenter_setup);
... ... @@ -244,7 +92,7 @@
244 92 static struct ctl_table abi_table2[] = {
245 93 {
246 94 .procname = "vsyscall32",
247   - .data = &sysctl_vsyscall32,
  95 + .data = &vdso32_enabled,
248 96 .maxlen = sizeof(int),
249 97 .mode = 0644,
250 98 .proc_handler = proc_dointvec
... ... @@ -270,13 +118,6 @@
270 118 #endif
271 119  
272 120 #else /* CONFIG_X86_32 */
273   -
274   -const char *arch_vma_name(struct vm_area_struct *vma)
275   -{
276   - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
277   - return "[vdso]";
278   - return NULL;
279   -}
280 121  
281 122 struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
282 123 {
arch/x86/vdso/vdso32.S
1   -#include <asm/vdso.h>
2   -
3   -DEFINE_VDSO_IMAGE(vdso32_int80, "arch/x86/vdso/vdso32-int80.so")
4   -
5   -#ifdef CONFIG_COMPAT
6   -DEFINE_VDSO_IMAGE(vdso32_syscall, "arch/x86/vdso/vdso32-syscall.so")
7   -#endif
8   -
9   -DEFINE_VDSO_IMAGE(vdso32_sysenter, "arch/x86/vdso/vdso32-sysenter.so")
arch/x86/vdso/vdso32/vdso32.lds.S
1 1 /*
2 2 * Linker script for 32-bit vDSO.
3 3 * We #include the file to define the layout details.
4   - * Here we only choose the prelinked virtual address.
5 4 *
6 5 * This file defines the version script giving the user-exported symbols in
7   - * the DSO. We can define local symbols here called VDSO* to make their
8   - * values visible using the asm-x86/vdso.h macros from the kernel proper.
  6 + * the DSO.
9 7 */
10 8  
11 9 #include <asm/page.h>
12 10  
13 11 #define BUILD_VDSO32
14   -#define VDSO_PRELINK 0
15 12  
16 13 #include "../vdso-layout.lds.S"
17 14  
... ... @@ -38,14 +35,4 @@
38 35 local: *;
39 36 };
40 37 }
41   -
42   -/*
43   - * Symbols we define here called VDSO* get their values into vdso32-syms.h.
44   - */
45   -VDSO32_vsyscall = __kernel_vsyscall;
46   -VDSO32_sigreturn = __kernel_sigreturn;
47   -VDSO32_rt_sigreturn = __kernel_rt_sigreturn;
48   -VDSO32_clock_gettime = clock_gettime;
49   -VDSO32_gettimeofday = gettimeofday;
50   -VDSO32_time = time;
arch/x86/vdso/vdsox32.S
1   -#include <asm/vdso.h>
2   -
3   -DEFINE_VDSO_IMAGE(vdsox32, "arch/x86/vdso/vdsox32.so")
arch/x86/vdso/vdsox32.lds.S
1 1 /*
2 2 * Linker script for x32 vDSO.
3 3 * We #include the file to define the layout details.
4   - * Here we only choose the prelinked virtual address.
5 4 *
6 5 * This file defines the version script giving the user-exported symbols in
7   - * the DSO. We can define local symbols here called VDSO* to make their
8   - * values visible using the asm-x86/vdso.h macros from the kernel proper.
  6 + * the DSO.
9 7 */
10 8  
11   -#define VDSO_PRELINK 0
12 9 #include "vdso-layout.lds.S"
13 10  
14 11 /*
... ... @@ -24,6 +21,4 @@
24 21 local: *;
25 22 };
26 23 }
27   -
28   -VDSOX32_PRELINK = VDSO_PRELINK;
... ... @@ -15,115 +15,51 @@
15 15 #include <asm/proto.h>
16 16 #include <asm/vdso.h>
17 17 #include <asm/page.h>
  18 +#include <asm/hpet.h>
18 19  
19 20 #if defined(CONFIG_X86_64)
20   -unsigned int __read_mostly vdso_enabled = 1;
  21 +unsigned int __read_mostly vdso64_enabled = 1;
21 22  
22   -DECLARE_VDSO_IMAGE(vdso);
23 23 extern unsigned short vdso_sync_cpuid;
24   -static unsigned vdso_size;
25   -
26   -#ifdef CONFIG_X86_X32_ABI
27   -DECLARE_VDSO_IMAGE(vdsox32);
28   -static unsigned vdsox32_size;
29 24 #endif
30   -#endif
31 25  
32   -#if defined(CONFIG_X86_32) || defined(CONFIG_X86_X32_ABI) || \
33   - defined(CONFIG_COMPAT)
34   -void __init patch_vdso32(void *vdso, size_t len)
  26 +void __init init_vdso_image(const struct vdso_image *image)
35 27 {
36   - Elf32_Ehdr *hdr = vdso;
37   - Elf32_Shdr *sechdrs, *alt_sec = 0;
38   - char *secstrings;
39   - void *alt_data;
40 28 int i;
  29 + int npages = (image->size) / PAGE_SIZE;
41 30  
42   - BUG_ON(len < sizeof(Elf32_Ehdr));
43   - BUG_ON(memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0);
  31 + BUG_ON(image->size % PAGE_SIZE != 0);
  32 + for (i = 0; i < npages; i++)
  33 + image->text_mapping.pages[i] =
  34 + virt_to_page(image->data + i*PAGE_SIZE);
44 35  
45   - sechdrs = (void *)hdr + hdr->e_shoff;
46   - secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
47   -
48   - for (i = 1; i < hdr->e_shnum; i++) {
49   - Elf32_Shdr *shdr = &sechdrs[i];
50   - if (!strcmp(secstrings + shdr->sh_name, ".altinstructions")) {
51   - alt_sec = shdr;
52   - goto found;
53   - }
54   - }
55   -
56   - /* If we get here, it's probably a bug. */
57   - pr_warning("patch_vdso32: .altinstructions not found\n");
58   - return; /* nothing to patch */
59   -
60   -found:
61   - alt_data = (void *)hdr + alt_sec->sh_offset;
62   - apply_alternatives(alt_data, alt_data + alt_sec->sh_size);
  36 + apply_alternatives((struct alt_instr *)(image->data + image->alt),
  37 + (struct alt_instr *)(image->data + image->alt +
  38 + image->alt_len));
63 39 }
64   -#endif
65 40  
66 41 #if defined(CONFIG_X86_64)
67   -static void __init patch_vdso64(void *vdso, size_t len)
68   -{
69   - Elf64_Ehdr *hdr = vdso;
70   - Elf64_Shdr *sechdrs, *alt_sec = 0;
71   - char *secstrings;
72   - void *alt_data;
73   - int i;
74   -
75   - BUG_ON(len < sizeof(Elf64_Ehdr));
76   - BUG_ON(memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0);
77   -
78   - sechdrs = (void *)hdr + hdr->e_shoff;
79   - secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
80   -
81   - for (i = 1; i < hdr->e_shnum; i++) {
82   - Elf64_Shdr *shdr = &sechdrs[i];
83   - if (!strcmp(secstrings + shdr->sh_name, ".altinstructions")) {
84   - alt_sec = shdr;
85   - goto found;
86   - }
87   - }
88   -
89   - /* If we get here, it's probably a bug. */
90   - pr_warning("patch_vdso64: .altinstructions not found\n");
91   - return; /* nothing to patch */
92   -
93   -found:
94   - alt_data = (void *)hdr + alt_sec->sh_offset;
95   - apply_alternatives(alt_data, alt_data + alt_sec->sh_size);
96   -}
97   -
98 42 static int __init init_vdso(void)
99 43 {
100   - int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
101   - int i;
  44 + init_vdso_image(&vdso_image_64);
102 45  
103   - patch_vdso64(vdso_start, vdso_end - vdso_start);
104   -
105   - vdso_size = npages << PAGE_SHIFT;
106   - for (i = 0; i < npages; i++)
107   - vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE);
108   -
109 46 #ifdef CONFIG_X86_X32_ABI
110   - patch_vdso32(vdsox32_start, vdsox32_end - vdsox32_start);
111   - npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE;
112   - vdsox32_size = npages << PAGE_SHIFT;
113   - for (i = 0; i < npages; i++)
114   - vdsox32_pages[i] = virt_to_page(vdsox32_start + i*PAGE_SIZE);
  47 + init_vdso_image(&vdso_image_x32);
115 48 #endif
116 49  
117 50 return 0;
118 51 }
119 52 subsys_initcall(init_vdso);
  53 +#endif
120 54  
121 55 struct linux_binprm;
122 56  
123 57 /* Put the vdso above the (randomized) stack with another randomized offset.
124 58 This way there is no hole in the middle of address space.
125 59 To save memory make sure it is still in the same PTE as the stack top.
126   - This doesn't give that many random bits */
  60 + This doesn't give that many random bits.
  61 +
  62 + Only used for the 64-bit and x32 vdsos. */
127 63 static unsigned long vdso_addr(unsigned long start, unsigned len)
128 64 {
129 65 unsigned long addr, end;
130 66  
131 67  
132 68  
133 69  
134 70  
135 71  
136 72  
137 73  
138 74  
139 75  
140 76  
141 77  
142 78  
143 79  
144 80  
... ... @@ -149,61 +85,149 @@
149 85 return addr;
150 86 }
151 87  
152   -/* Setup a VMA at program startup for the vsyscall page.
153   - Not called for compat tasks */
154   -static int setup_additional_pages(struct linux_binprm *bprm,
155   - int uses_interp,
156   - struct page **pages,
157   - unsigned size)
  88 +static int map_vdso(const struct vdso_image *image, bool calculate_addr)
158 89 {
159 90 struct mm_struct *mm = current->mm;
  91 + struct vm_area_struct *vma;
160 92 unsigned long addr;
161   - int ret;
  93 + int ret = 0;
  94 + static struct page *no_pages[] = {NULL};
  95 + static struct vm_special_mapping vvar_mapping = {
  96 + .name = "[vvar]",
  97 + .pages = no_pages,
  98 + };
162 99  
163   - if (!vdso_enabled)
164   - return 0;
  100 + if (calculate_addr) {
  101 + addr = vdso_addr(current->mm->start_stack,
  102 + image->sym_end_mapping);
  103 + } else {
  104 + addr = 0;
  105 + }
165 106  
166 107 down_write(&mm->mmap_sem);
167   - addr = vdso_addr(mm->start_stack, size);
168   - addr = get_unmapped_area(NULL, addr, size, 0, 0);
  108 +
  109 + addr = get_unmapped_area(NULL, addr, image->sym_end_mapping, 0, 0);
169 110 if (IS_ERR_VALUE(addr)) {
170 111 ret = addr;
171 112 goto up_fail;
172 113 }
173 114  
174   - current->mm->context.vdso = (void *)addr;
  115 + current->mm->context.vdso = (void __user *)addr;
175 116  
176   - ret = install_special_mapping(mm, addr, size,
177   - VM_READ|VM_EXEC|
178   - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
179   - pages);
180   - if (ret) {
181   - current->mm->context.vdso = NULL;
  117 + /*
  118 + * MAYWRITE to allow gdb to COW and set breakpoints
  119 + */
  120 + vma = _install_special_mapping(mm,
  121 + addr,
  122 + image->size,
  123 + VM_READ|VM_EXEC|
  124 + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
  125 + &image->text_mapping);
  126 +
  127 + if (IS_ERR(vma)) {
  128 + ret = PTR_ERR(vma);
182 129 goto up_fail;
183 130 }
184 131  
  132 + vma = _install_special_mapping(mm,
  133 + addr + image->size,
  134 + image->sym_end_mapping - image->size,
  135 + VM_READ,
  136 + &vvar_mapping);
  137 +
  138 + if (IS_ERR(vma)) {
  139 + ret = PTR_ERR(vma);
  140 + goto up_fail;
  141 + }
  142 +
  143 + if (image->sym_vvar_page)
  144 + ret = remap_pfn_range(vma,
  145 + addr + image->sym_vvar_page,
  146 + __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
  147 + PAGE_SIZE,
  148 + PAGE_READONLY);
  149 +
  150 + if (ret)
  151 + goto up_fail;
  152 +
  153 +#ifdef CONFIG_HPET_TIMER
  154 + if (hpet_address && image->sym_hpet_page) {
  155 + ret = io_remap_pfn_range(vma,
  156 + addr + image->sym_hpet_page,
  157 + hpet_address >> PAGE_SHIFT,
  158 + PAGE_SIZE,
  159 + pgprot_noncached(PAGE_READONLY));
  160 +
  161 + if (ret)
  162 + goto up_fail;
  163 + }
  164 +#endif
  165 +
185 166 up_fail:
  167 + if (ret)
  168 + current->mm->context.vdso = NULL;
  169 +
186 170 up_write(&mm->mmap_sem);
187 171 return ret;
188 172 }
189 173  
  174 +#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
  175 +static int load_vdso32(void)
  176 +{
  177 + int ret;
  178 +
  179 + if (vdso32_enabled != 1) /* Other values all mean "disabled" */
  180 + return 0;
  181 +
  182 + ret = map_vdso(selected_vdso32, false);
  183 + if (ret)
  184 + return ret;
  185 +
  186 + if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
  187 + current_thread_info()->sysenter_return =
  188 + current->mm->context.vdso +
  189 + selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
  190 +
  191 + return 0;
  192 +}
  193 +#endif
  194 +
  195 +#ifdef CONFIG_X86_64
190 196 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
191 197 {
192   - return setup_additional_pages(bprm, uses_interp, vdso_pages,
193   - vdso_size);
  198 + if (!vdso64_enabled)
  199 + return 0;
  200 +
  201 + return map_vdso(&vdso_image_64, true);
194 202 }
195 203  
  204 +#ifdef CONFIG_COMPAT
  205 +int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
  206 + int uses_interp)
  207 +{
196 208 #ifdef CONFIG_X86_X32_ABI
197   -int x32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
  209 + if (test_thread_flag(TIF_X32)) {
  210 + if (!vdso64_enabled)
  211 + return 0;
  212 +
  213 + return map_vdso(&vdso_image_x32, true);
  214 + }
  215 +#endif
  216 +
  217 + return load_vdso32();
  218 +}
  219 +#endif
  220 +#else
  221 +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
198 222 {
199   - return setup_additional_pages(bprm, uses_interp, vdsox32_pages,
200   - vdsox32_size);
  223 + return load_vdso32();
201 224 }
202 225 #endif
203 226  
  227 +#ifdef CONFIG_X86_64
204 228 static __init int vdso_setup(char *s)
205 229 {
206   - vdso_enabled = simple_strtoul(s, NULL, 0);
  230 + vdso64_enabled = simple_strtoul(s, NULL, 0);
207 231 return 0;
208 232 }
209 233 __setup("vdso=", vdso_setup);
... ... @@ -1494,7 +1494,7 @@
1494 1494 page->private = (unsigned long)user_pgd;
1495 1495  
1496 1496 if (user_pgd != NULL) {
1497   - user_pgd[pgd_index(VSYSCALL_START)] =
  1497 + user_pgd[pgd_index(VSYSCALL_ADDR)] =
1498 1498 __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
1499 1499 ret = 0;
1500 1500 }
... ... @@ -2062,8 +2062,7 @@
2062 2062 case FIX_KMAP_BEGIN ... FIX_KMAP_END:
2063 2063 # endif
2064 2064 #else
2065   - case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
2066   - case VVAR_PAGE:
  2065 + case VSYSCALL_PAGE:
2067 2066 #endif
2068 2067 case FIX_TEXT_POKE0:
2069 2068 case FIX_TEXT_POKE1:
... ... @@ -2104,8 +2103,7 @@
2104 2103 #ifdef CONFIG_X86_64
2105 2104 /* Replicate changes to map the vsyscall page into the user
2106 2105 pagetable vsyscall mapping. */
2107   - if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) ||
2108   - idx == VVAR_PAGE) {
  2106 + if (idx == VSYSCALL_PAGE) {
2109 2107 unsigned long vaddr = __fix_to_virt(idx);
2110 2108 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
2111 2109 }
arch/x86/xen/setup.c
... ... @@ -525,10 +525,17 @@
525 525 static void __init fiddle_vdso(void)
526 526 {
527 527 #ifdef CONFIG_X86_32
  528 + /*
  529 + * This could be called before selected_vdso32 is initialized, so
  530 + * just fiddle with both possible images. vdso_image_32_syscall
  531 + * can't be selected, since it only exists on 64-bit systems.
  532 + */
528 533 u32 *mask;
529   - mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK);
  534 + mask = vdso_image_32_int80.data +
  535 + vdso_image_32_int80.sym_VDSO32_NOTE_MASK;
530 536 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
531   - mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK);
  537 + mask = vdso_image_32_sysenter.data +
  538 + vdso_image_32_sysenter.sym_VDSO32_NOTE_MASK;
532 539 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
533 540 #endif
534 541 }
... ... @@ -1108,6 +1108,14 @@
1108 1108 /* Any vsyscall mappings? */
1109 1109 if (vma == get_gate_vma(vma->vm_mm))
1110 1110 return true;
  1111 +
  1112 + /*
  1113 + * Assume that all vmas with a .name op should always be dumped.
  1114 + * If this changes, a new vm_ops field can easily be added.
  1115 + */
  1116 + if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
  1117 + return true;
  1118 +
1111 1119 /*
1112 1120 * arch_vma_name() returns non-NULL for special architecture mappings,
1113 1121 * such as vDSO sections.
... ... @@ -300,6 +300,12 @@
300 300 goto done;
301 301 }
302 302  
  303 + if (vma->vm_ops && vma->vm_ops->name) {
  304 + name = vma->vm_ops->name(vma);
  305 + if (name)
  306 + goto done;
  307 + }
  308 +
303 309 name = arch_vma_name(vma);
304 310 if (!name) {
305 311 pid_t tid;
... ... @@ -239,6 +239,12 @@
239 239 */
240 240 int (*access)(struct vm_area_struct *vma, unsigned long addr,
241 241 void *buf, int len, int write);
  242 +
  243 + /* Called by the /proc/PID/maps code to ask the vma whether it
  244 + * has a special name. Returning non-NULL will also cause this
  245 + * vma to be dumped unconditionally. */
  246 + const char *(*name)(struct vm_area_struct *vma);
  247 +
242 248 #ifdef CONFIG_NUMA
243 249 /*
244 250 * set_policy() op must add a reference to any non-NULL @new mempolicy
... ... @@ -1783,7 +1789,9 @@
1783 1789 extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
1784 1790 extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
1785 1791 unsigned long addr, unsigned long len,
1786   - unsigned long flags, struct page **pages);
  1792 + unsigned long flags,
  1793 + const struct vm_special_mapping *spec);
  1794 +/* This is an obsolete alternative to _install_special_mapping. */
1787 1795 extern int install_special_mapping(struct mm_struct *mm,
1788 1796 unsigned long addr, unsigned long len,
1789 1797 unsigned long flags, struct page **pages);
include/linux/mm_types.h
... ... @@ -510,5 +510,11 @@
510 510 }
511 511 #endif
512 512  
  513 +struct vm_special_mapping
  514 +{
  515 + const char *name;
  516 + struct page **pages;
  517 +};
  518 +
513 519 #endif /* _LINUX_MM_TYPES_H */
... ... @@ -1418,8 +1418,13 @@
1418 1418 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1419 1419 {
1420 1420 .procname = "vdso_enabled",
  1421 +#ifdef CONFIG_X86_32
  1422 + .data = &vdso32_enabled,
  1423 + .maxlen = sizeof(vdso32_enabled),
  1424 +#else
1421 1425 .data = &vdso_enabled,
1422 1426 .maxlen = sizeof(vdso_enabled),
  1427 +#endif
1423 1428 .mode = 0644,
1424 1429 .proc_handler = proc_dointvec,
1425 1430 .extra1 = &zero,
... ... @@ -2871,7 +2871,32 @@
2871 2871 return 1;
2872 2872 }
2873 2873  
  2874 +static int special_mapping_fault(struct vm_area_struct *vma,
  2875 + struct vm_fault *vmf);
2874 2876  
  2877 +/*
  2878 + * Having a close hook prevents vma merging regardless of flags.
  2879 + */
  2880 +static void special_mapping_close(struct vm_area_struct *vma)
  2881 +{
  2882 +}
  2883 +
  2884 +static const char *special_mapping_name(struct vm_area_struct *vma)
  2885 +{
  2886 + return ((struct vm_special_mapping *)vma->vm_private_data)->name;
  2887 +}
  2888 +
  2889 +static const struct vm_operations_struct special_mapping_vmops = {
  2890 + .close = special_mapping_close,
  2891 + .fault = special_mapping_fault,
  2892 + .name = special_mapping_name,
  2893 +};
  2894 +
  2895 +static const struct vm_operations_struct legacy_special_mapping_vmops = {
  2896 + .close = special_mapping_close,
  2897 + .fault = special_mapping_fault,
  2898 +};
  2899 +
2875 2900 static int special_mapping_fault(struct vm_area_struct *vma,
2876 2901 struct vm_fault *vmf)
2877 2902 {
... ... @@ -2886,7 +2911,13 @@
2886 2911 */
2887 2912 pgoff = vmf->pgoff - vma->vm_pgoff;
2888 2913  
2889   - for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
  2914 + if (vma->vm_ops == &legacy_special_mapping_vmops)
  2915 + pages = vma->vm_private_data;
  2916 + else
  2917 + pages = ((struct vm_special_mapping *)vma->vm_private_data)->
  2918 + pages;
  2919 +
  2920 + for (; pgoff && *pages; ++pages)
2890 2921 pgoff--;
2891 2922  
2892 2923 if (*pages) {
2893 2924  
... ... @@ -2899,31 +2930,12 @@
2899 2930 return VM_FAULT_SIGBUS;
2900 2931 }
2901 2932  
2902   -/*
2903   - * Having a close hook prevents vma merging regardless of flags.
2904   - */
2905   -static void special_mapping_close(struct vm_area_struct *vma)
  2933 +static struct vm_area_struct *__install_special_mapping(
  2934 + struct mm_struct *mm,
  2935 + unsigned long addr, unsigned long len,
  2936 + unsigned long vm_flags, const struct vm_operations_struct *ops,
  2937 + void *priv)
2906 2938 {
2907   -}
2908   -
2909   -static const struct vm_operations_struct special_mapping_vmops = {
2910   - .close = special_mapping_close,
2911   - .fault = special_mapping_fault,
2912   -};
2913   -
2914   -/*
2915   - * Called with mm->mmap_sem held for writing.
2916   - * Insert a new vma covering the given region, with the given flags.
2917   - * Its pages are supplied by the given array of struct page *.
2918   - * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
2919   - * The region past the last page supplied will always produce SIGBUS.
2920   - * The array pointer and the pages it points to are assumed to stay alive
2921   - * for as long as this mapping might exist.
2922   - */
2923   -struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
2924   - unsigned long addr, unsigned long len,
2925   - unsigned long vm_flags, struct page **pages)
2926   -{
2927 2939 int ret;
2928 2940 struct vm_area_struct *vma;
2929 2941  
... ... @@ -2939,8 +2951,8 @@
2939 2951 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
2940 2952 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2941 2953  
2942   - vma->vm_ops = &special_mapping_vmops;
2943   - vma->vm_private_data = pages;
  2954 + vma->vm_ops = ops;
  2955 + vma->vm_private_data = priv;
2944 2956  
2945 2957 ret = insert_vm_struct(mm, vma);
2946 2958 if (ret)
2947 2959  
... ... @@ -2957,12 +2969,31 @@
2957 2969 return ERR_PTR(ret);
2958 2970 }
2959 2971  
  2972 +/*
  2973 + * Called with mm->mmap_sem held for writing.
  2974 + * Insert a new vma covering the given region, with the given flags.
  2975 + * Its pages are supplied by the given array of struct page *.
  2976 + * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
  2977 + * The region past the last page supplied will always produce SIGBUS.
  2978 + * The array pointer and the pages it points to are assumed to stay alive
  2979 + * for as long as this mapping might exist.
  2980 + */
  2981 +struct vm_area_struct *_install_special_mapping(
  2982 + struct mm_struct *mm,
  2983 + unsigned long addr, unsigned long len,
  2984 + unsigned long vm_flags, const struct vm_special_mapping *spec)
  2985 +{
  2986 + return __install_special_mapping(mm, addr, len, vm_flags,
  2987 + &special_mapping_vmops, (void *)spec);
  2988 +}
  2989 +
2960 2990 int install_special_mapping(struct mm_struct *mm,
2961 2991 unsigned long addr, unsigned long len,
2962 2992 unsigned long vm_flags, struct page **pages)
2963 2993 {
2964   - struct vm_area_struct *vma = _install_special_mapping(mm,
2965   - addr, len, vm_flags, pages);
  2994 + struct vm_area_struct *vma = __install_special_mapping(
  2995 + mm, addr, len, vm_flags, &legacy_special_mapping_vmops,
  2996 + (void *)pages);
2966 2997  
2967 2998 return PTR_ERR_OR_ZERO(vma);
2968 2999 }