Commit c6f21243ce1e8d81ad8361da4d2eaa5947b667c4

Authored by Linus Torvalds

Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 vdso changes from Peter Anvin:
 "This is the revamp of the 32-bit vdso and the associated cleanups.

  This adds timekeeping support to the 32-bit vdso that we already have
  in the 64-bit vdso.  Although 32-bit x86 is legacy, it is likely to
  remain in the embedded space for a very long time to come.

  This removes the traditional COMPAT_VDSO support; the configuration
  variable is reused for simply removing the 32-bit vdso, which will
  produce correct results but obviously suffer a performance penalty.
  Only one beta version of glibc was affected, but that version was
  unfortunately included in one OpenSUSE release.

  This is not the end of the vdso cleanups.  Stefani and Andy have
  agreed to continue work for the next kernel cycle; in fact Andy has
  already produced another set of cleanups that came too late for this
  cycle.

  An incidental, but arguably important, change is that this ensures
  that unused space in the VVAR page is properly zeroed.  It wasn't
  before, and would contain whatever garbage was left in memory by BIOS
  or the bootloader.  Since the VVAR page is accessible to user space
  this had the potential of information leaks"

* 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits)
  x86, vdso: Fix the symbol versions on the 32-bit vDSO
  x86, vdso, build: Don't rebuild 32-bit vdsos on every make
  x86, vdso: Actually discard the .discard sections
  x86, vdso: Fix size of get_unmapped_area()
  x86, vdso: Finish removing VDSO32_PRELINK
  x86, vdso: Move more vdso definitions into vdso.h
  x86: Load the 32-bit vdso in place, just like the 64-bit vdsos
  x86, vdso32: handle 32 bit vDSO larger one page
  x86, vdso32: Disable stack protector, adjust optimizations
  x86, vdso: Zero-pad the VVAR page
  x86, vdso: Add 32 bit VDSO time support for 64 bit kernel
  x86, vdso: Add 32 bit VDSO time support for 32 bit kernel
  x86, vdso: Patch alternatives in the 32-bit VDSO
  x86, vdso: Introduce VVAR marco for vdso32
  x86, vdso: Cleanup __vdso_gettimeofday()
  x86, vdso: Replace VVAR(vsyscall_gtod_data) by gtod macro
  x86, vdso: __vdso_clock_gettime() cleanup
  x86, vdso: Revamp vclock_gettime.c
  mm: Add new func _install_special_mapping() to mmap.c
  x86, vdso: Make vsyscall_gtod_data handling x86 generic
  ...

Showing 30 changed files Side-by-side Diff

Documentation/kernel-parameters.txt
... ... @@ -3424,14 +3424,24 @@
3424 3424 of CONFIG_HIGHPTE.
3425 3425  
3426 3426 vdso= [X86,SH]
3427   - vdso=2: enable compat VDSO (default with COMPAT_VDSO)
3428   - vdso=1: enable VDSO (default)
  3427 + On X86_32, this is an alias for vdso32=. Otherwise:
  3428 +
  3429 + vdso=1: enable VDSO (the default)
3429 3430 vdso=0: disable VDSO mapping
3430 3431  
3431   - vdso32= [X86]
3432   - vdso32=2: enable compat VDSO (default with COMPAT_VDSO)
3433   - vdso32=1: enable 32-bit VDSO (default)
3434   - vdso32=0: disable 32-bit VDSO mapping
  3432 + vdso32= [X86] Control the 32-bit vDSO
  3433 + vdso32=1: enable 32-bit VDSO
  3434 + vdso32=0 or vdso32=2: disable 32-bit VDSO
  3435 +
  3436 + See the help text for CONFIG_COMPAT_VDSO for more
  3437 + details. If CONFIG_COMPAT_VDSO is set, the default is
  3438 + vdso32=0; otherwise, the default is vdso32=1.
  3439 +
  3440 + For compatibility with older kernels, vdso32=2 is an
  3441 + alias for vdso32=0.
  3442 +
  3443 + Try vdso32=0 if you encounter an error that says:
  3444 + dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed!
3435 3445  
3436 3446 vector= [IA-64,SMP]
3437 3447 vector=percpu: enable percpu vector domain
... ... @@ -107,9 +107,9 @@
107 107 select HAVE_ARCH_SOFT_DIRTY
108 108 select CLOCKSOURCE_WATCHDOG
109 109 select GENERIC_CLOCKEVENTS
110   - select ARCH_CLOCKSOURCE_DATA if X86_64
  110 + select ARCH_CLOCKSOURCE_DATA
111 111 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
112   - select GENERIC_TIME_VSYSCALL if X86_64
  112 + select GENERIC_TIME_VSYSCALL
113 113 select KTIME_SCALAR if X86_32
114 114 select GENERIC_STRNCPY_FROM_USER
115 115 select GENERIC_STRNLEN_USER
116 116  
117 117  
118 118  
... ... @@ -1848,17 +1848,29 @@
1848 1848 If unsure, say N.
1849 1849  
1850 1850 config COMPAT_VDSO
1851   - def_bool y
1852   - prompt "Compat VDSO support"
  1851 + def_bool n
  1852 + prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)"
1853 1853 depends on X86_32 || IA32_EMULATION
1854 1854 ---help---
1855   - Map the 32-bit VDSO to the predictable old-style address too.
  1855 + Certain buggy versions of glibc will crash if they are
  1856 + presented with a 32-bit vDSO that is not mapped at the address
  1857 + indicated in its segment table.
1856 1858  
1857   - Say N here if you are running a sufficiently recent glibc
1858   - version (2.3.3 or later), to remove the high-mapped
1859   - VDSO mapping and to exclusively use the randomized VDSO.
  1859 + The bug was introduced by f866314b89d56845f55e6f365e18b31ec978ec3a
  1860 + and fixed by 3b3ddb4f7db98ec9e912ccdf54d35df4aa30e04a and
  1861 + 49ad572a70b8aeb91e57483a11dd1b77e31c4468. Glibc 2.3.3 is
  1862 + the only released version with the bug, but OpenSUSE 9
  1863 + contains a buggy "glibc 2.3.2".
1860 1864  
1861   - If unsure, say Y.
  1865 + The symptom of the bug is that everything crashes on startup, saying:
  1866 + dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed!
  1867 +
  1868 + Saying Y here changes the default value of the vdso32 boot
  1869 + option from 1 to 0, which turns off the 32-bit vDSO entirely.
  1870 + This works around the glibc bug but hurts performance.
  1871 +
  1872 + If unsure, say N: if you are compiling your own kernel, you
  1873 + are unlikely to be using a buggy version of glibc.
1862 1874  
1863 1875 config CMDLINE_BOOL
1864 1876 bool "Built-in kernel command line"
arch/x86/include/asm/clocksource.h
... ... @@ -3,8 +3,6 @@
3 3 #ifndef _ASM_X86_CLOCKSOURCE_H
4 4 #define _ASM_X86_CLOCKSOURCE_H
5 5  
6   -#ifdef CONFIG_X86_64
7   -
8 6 #define VCLOCK_NONE 0 /* No vDSO clock available. */
9 7 #define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
10 8 #define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
... ... @@ -13,8 +11,6 @@
13 11 struct arch_clocksource_data {
14 12 int vclock_mode;
15 13 };
16   -
17   -#endif /* CONFIG_X86_64 */
18 14  
19 15 #endif /* _ASM_X86_CLOCKSOURCE_H */
arch/x86/include/asm/elf.h
... ... @@ -281,15 +281,11 @@
281 281  
282 282 #define STACK_RND_MASK (0x7ff)
283 283  
284   -#define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO))
285   -
286 284 #define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled)
287 285  
288 286 /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
289 287  
290 288 #else /* CONFIG_X86_32 */
291   -
292   -#define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */
293 289  
294 290 /* 1GB for 64bit, 8MB for 32bit */
295 291 #define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff)
arch/x86/include/asm/fixmap.h
... ... @@ -40,15 +40,8 @@
40 40 */
41 41 extern unsigned long __FIXADDR_TOP;
42 42 #define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
43   -
44   -#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
45   -#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
46 43 #else
47 44 #define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
48   -
49   -/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */
50   -#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL)
51   -#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
52 45 #endif
53 46  
54 47  
... ... @@ -74,7 +67,6 @@
74 67 enum fixed_addresses {
75 68 #ifdef CONFIG_X86_32
76 69 FIX_HOLE,
77   - FIX_VDSO,
78 70 #else
79 71 VSYSCALL_LAST_PAGE,
80 72 VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
arch/x86/include/asm/pgtable_types.h
... ... @@ -214,13 +214,8 @@
214 214 #ifdef CONFIG_X86_64
215 215 #define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC
216 216 #else
217   -/*
218   - * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection
219   - * bits are combined, this will alow user to access the high address mapped
220   - * VDSO in the presence of CONFIG_COMPAT_VDSO
221   - */
222 217 #define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */
223   -#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
  218 +#define PDE_IDENT_ATTR 0x063 /* PRESENT+RW+DIRTY+ACCESSED */
224 219 #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */
225 220 #endif
226 221  
arch/x86/include/asm/vdso.h
1 1 #ifndef _ASM_X86_VDSO_H
2 2 #define _ASM_X86_VDSO_H
3 3  
  4 +#include <asm/page_types.h>
  5 +#include <linux/linkage.h>
  6 +
  7 +#ifdef __ASSEMBLER__
  8 +
  9 +#define DEFINE_VDSO_IMAGE(symname, filename) \
  10 +__PAGE_ALIGNED_DATA ; \
  11 + .globl symname##_start, symname##_end ; \
  12 + .align PAGE_SIZE ; \
  13 + symname##_start: ; \
  14 + .incbin filename ; \
  15 + symname##_end: ; \
  16 + .align PAGE_SIZE /* extra data here leaks to userspace. */ ; \
  17 + \
  18 +.previous ; \
  19 + \
  20 + .globl symname##_pages ; \
  21 + .bss ; \
  22 + .align 8 ; \
  23 + .type symname##_pages, @object ; \
  24 + symname##_pages: ; \
  25 + .zero (symname##_end - symname##_start + PAGE_SIZE - 1) / PAGE_SIZE * (BITS_PER_LONG / 8) ; \
  26 + .size symname##_pages, .-symname##_pages
  27 +
  28 +#else
  29 +
  30 +#define DECLARE_VDSO_IMAGE(symname) \
  31 + extern char symname##_start[], symname##_end[]; \
  32 + extern struct page *symname##_pages[]
  33 +
4 34 #if defined CONFIG_X86_32 || defined CONFIG_COMPAT
5   -extern const char VDSO32_PRELINK[];
6 35  
  36 +#include <asm/vdso32.h>
  37 +
  38 +DECLARE_VDSO_IMAGE(vdso32_int80);
  39 +#ifdef CONFIG_COMPAT
  40 +DECLARE_VDSO_IMAGE(vdso32_syscall);
  41 +#endif
  42 +DECLARE_VDSO_IMAGE(vdso32_sysenter);
  43 +
7 44 /*
8 45 * Given a pointer to the vDSO image, find the pointer to VDSO32_name
9 46 * as that symbol is defined in the vDSO sources or linker script.
... ... @@ -11,8 +48,7 @@
11 48 #define VDSO32_SYMBOL(base, name) \
12 49 ({ \
13 50 extern const char VDSO32_##name[]; \
14   - (void __user *)(VDSO32_##name - VDSO32_PRELINK + \
15   - (unsigned long)(base)); \
  51 + (void __user *)(VDSO32_##name + (unsigned long)(base)); \
16 52 })
17 53 #endif
18 54  
... ... @@ -23,13 +59,9 @@
23 59 extern void __user __kernel_sigreturn;
24 60 extern void __user __kernel_rt_sigreturn;
25 61  
26   -/*
27   - * These symbols are defined by vdso32.S to mark the bounds
28   - * of the ELF DSO images included therein.
29   - */
30   -extern const char vdso32_int80_start, vdso32_int80_end;
31   -extern const char vdso32_syscall_start, vdso32_syscall_end;
32   -extern const char vdso32_sysenter_start, vdso32_sysenter_end;
  62 +void __init patch_vdso32(void *vdso, size_t len);
  63 +
  64 +#endif /* __ASSEMBLER__ */
33 65  
34 66 #endif /* _ASM_X86_VDSO_H */
arch/x86/include/asm/vdso32.h
  1 +#ifndef _ASM_X86_VDSO32_H
  2 +#define _ASM_X86_VDSO32_H
  3 +
  4 +#define VDSO_BASE_PAGE 0
  5 +#define VDSO_VVAR_PAGE 1
  6 +#define VDSO_HPET_PAGE 2
  7 +#define VDSO_PAGES 3
  8 +#define VDSO_PREV_PAGES 2
  9 +#define VDSO_OFFSET(x) ((x) * PAGE_SIZE)
  10 +
  11 +#endif
arch/x86/include/asm/vgtod.h
1 1 #ifndef _ASM_X86_VGTOD_H
2 2 #define _ASM_X86_VGTOD_H
3 3  
4   -#include <asm/vsyscall.h>
  4 +#include <linux/compiler.h>
5 5 #include <linux/clocksource.h>
6 6  
  7 +#ifdef BUILD_VDSO32_64
  8 +typedef u64 gtod_long_t;
  9 +#else
  10 +typedef unsigned long gtod_long_t;
  11 +#endif
  12 +/*
  13 + * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
  14 + * so be carefull by modifying this structure.
  15 + */
7 16 struct vsyscall_gtod_data {
8   - seqcount_t seq;
  17 + unsigned seq;
9 18  
10   - struct { /* extract of a clocksource struct */
11   - int vclock_mode;
12   - cycle_t cycle_last;
13   - cycle_t mask;
14   - u32 mult;
15   - u32 shift;
16   - } clock;
  19 + int vclock_mode;
  20 + cycle_t cycle_last;
  21 + cycle_t mask;
  22 + u32 mult;
  23 + u32 shift;
17 24  
18 25 /* open coded 'struct timespec' */
19   - time_t wall_time_sec;
20 26 u64 wall_time_snsec;
  27 + gtod_long_t wall_time_sec;
  28 + gtod_long_t monotonic_time_sec;
21 29 u64 monotonic_time_snsec;
22   - time_t monotonic_time_sec;
  30 + gtod_long_t wall_time_coarse_sec;
  31 + gtod_long_t wall_time_coarse_nsec;
  32 + gtod_long_t monotonic_time_coarse_sec;
  33 + gtod_long_t monotonic_time_coarse_nsec;
23 34  
24   - struct timezone sys_tz;
25   - struct timespec wall_time_coarse;
26   - struct timespec monotonic_time_coarse;
  35 + int tz_minuteswest;
  36 + int tz_dsttime;
27 37 };
28 38 extern struct vsyscall_gtod_data vsyscall_gtod_data;
  39 +
  40 +static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
  41 +{
  42 + unsigned ret;
  43 +
  44 +repeat:
  45 + ret = ACCESS_ONCE(s->seq);
  46 + if (unlikely(ret & 1)) {
  47 + cpu_relax();
  48 + goto repeat;
  49 + }
  50 + smp_rmb();
  51 + return ret;
  52 +}
  53 +
  54 +static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
  55 + unsigned start)
  56 +{
  57 + smp_rmb();
  58 + return unlikely(s->seq != start);
  59 +}
  60 +
  61 +static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
  62 +{
  63 + ++s->seq;
  64 + smp_wmb();
  65 +}
  66 +
  67 +static inline void gtod_write_end(struct vsyscall_gtod_data *s)
  68 +{
  69 + smp_wmb();
  70 + ++s->seq;
  71 +}
29 72  
30 73 #endif /* _ASM_X86_VGTOD_H */
arch/x86/include/asm/vvar.h
... ... @@ -16,8 +16,8 @@
16 16 * you mess up, the linker will catch it.)
17 17 */
18 18  
19   -/* Base address of vvars. This is not ABI. */
20   -#define VVAR_ADDRESS (-10*1024*1024 - 4096)
  19 +#ifndef _ASM_X86_VVAR_H
  20 +#define _ASM_X86_VVAR_H
21 21  
22 22 #if defined(__VVAR_KERNEL_LDS)
23 23  
24 24  
25 25  
26 26  
... ... @@ -29,16 +29,35 @@
29 29  
30 30 #else
31 31  
  32 +#ifdef BUILD_VDSO32
  33 +
32 34 #define DECLARE_VVAR(offset, type, name) \
  35 + extern type vvar_ ## name __attribute__((visibility("hidden")));
  36 +
  37 +#define VVAR(name) (vvar_ ## name)
  38 +
  39 +#else
  40 +
  41 +extern char __vvar_page;
  42 +
  43 +/* Base address of vvars. This is not ABI. */
  44 +#ifdef CONFIG_X86_64
  45 +#define VVAR_ADDRESS (-10*1024*1024 - 4096)
  46 +#else
  47 +#define VVAR_ADDRESS (&__vvar_page)
  48 +#endif
  49 +
  50 +#define DECLARE_VVAR(offset, type, name) \
33 51 static type const * const vvaraddr_ ## name = \
34 52 (void *)(VVAR_ADDRESS + (offset));
35 53  
  54 +#define VVAR(name) (*vvaraddr_ ## name)
  55 +#endif
  56 +
36 57 #define DEFINE_VVAR(type, name) \
37 58 type name \
38 59 __attribute__((section(".vvar_" #name), aligned(16))) __visible
39 60  
40   -#define VVAR(name) (*vvaraddr_ ## name)
41   -
42 61 #endif
43 62  
44 63 /* DECLARE_VVAR(offset, type, name) */
... ... @@ -48,4 +67,6 @@
48 67 DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
49 68  
50 69 #undef DECLARE_VVAR
  70 +
  71 +#endif
arch/x86/kernel/Makefile
... ... @@ -26,7 +26,7 @@
26 26 obj-y += probe_roms.o
27 27 obj-$(CONFIG_X86_32) += i386_ksyms_32.o
28 28 obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
29   -obj-y += syscall_$(BITS).o
  29 +obj-y += syscall_$(BITS).o vsyscall_gtod.o
30 30 obj-$(CONFIG_X86_64) += vsyscall_64.o
31 31 obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
32 32 obj-$(CONFIG_SYSFS) += ksysfs.o
arch/x86/kernel/hpet.c
... ... @@ -752,9 +752,7 @@
752 752 .mask = HPET_MASK,
753 753 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
754 754 .resume = hpet_resume_counter,
755   -#ifdef CONFIG_X86_64
756 755 .archdata = { .vclock_mode = VCLOCK_HPET },
757   -#endif
758 756 };
759 757  
760 758 static int hpet_clocksource_register(void)
arch/x86/kernel/tsc.c
... ... @@ -984,9 +984,7 @@
984 984 .mask = CLOCKSOURCE_MASK(64),
985 985 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
986 986 CLOCK_SOURCE_MUST_VERIFY,
987   -#ifdef CONFIG_X86_64
988 987 .archdata = { .vclock_mode = VCLOCK_TSC },
989   -#endif
990 988 };
991 989  
992 990 void mark_tsc_unstable(char *reason)
arch/x86/kernel/vmlinux.lds.S
... ... @@ -147,7 +147,6 @@
147 147 _edata = .;
148 148 } :data
149 149  
150   -#ifdef CONFIG_X86_64
151 150  
152 151 . = ALIGN(PAGE_SIZE);
153 152 __vvar_page = .;
154 153  
... ... @@ -165,11 +164,14 @@
165 164 #undef __VVAR_KERNEL_LDS
166 165 #undef EMIT_VVAR
167 166  
  167 + /*
  168 + * Pad the rest of the page with zeros. Otherwise the loader
  169 + * can leave garbage here.
  170 + */
  171 + . = __vvar_beginning_hack + PAGE_SIZE;
168 172 } :data
169 173  
170 174 . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
171   -
172   -#endif /* CONFIG_X86_64 */
173 175  
174 176 /* Init code and data - will be freed after init */
175 177 . = ALIGN(PAGE_SIZE);
arch/x86/kernel/vsyscall_64.c
... ... @@ -47,14 +47,12 @@
47 47 #include <asm/segment.h>
48 48 #include <asm/desc.h>
49 49 #include <asm/topology.h>
50   -#include <asm/vgtod.h>
51 50 #include <asm/traps.h>
52 51  
53 52 #define CREATE_TRACE_POINTS
54 53 #include "vsyscall_trace.h"
55 54  
56 55 DEFINE_VVAR(int, vgetcpu_mode);
57   -DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
58 56  
59 57 static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
60 58  
... ... @@ -77,48 +75,6 @@
77 75 }
78 76 early_param("vsyscall", vsyscall_setup);
79 77  
80   -void update_vsyscall_tz(void)
81   -{
82   - vsyscall_gtod_data.sys_tz = sys_tz;
83   -}
84   -
85   -void update_vsyscall(struct timekeeper *tk)
86   -{
87   - struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
88   -
89   - write_seqcount_begin(&vdata->seq);
90   -
91   - /* copy vsyscall data */
92   - vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
93   - vdata->clock.cycle_last = tk->clock->cycle_last;
94   - vdata->clock.mask = tk->clock->mask;
95   - vdata->clock.mult = tk->mult;
96   - vdata->clock.shift = tk->shift;
97   -
98   - vdata->wall_time_sec = tk->xtime_sec;
99   - vdata->wall_time_snsec = tk->xtime_nsec;
100   -
101   - vdata->monotonic_time_sec = tk->xtime_sec
102   - + tk->wall_to_monotonic.tv_sec;
103   - vdata->monotonic_time_snsec = tk->xtime_nsec
104   - + (tk->wall_to_monotonic.tv_nsec
105   - << tk->shift);
106   - while (vdata->monotonic_time_snsec >=
107   - (((u64)NSEC_PER_SEC) << tk->shift)) {
108   - vdata->monotonic_time_snsec -=
109   - ((u64)NSEC_PER_SEC) << tk->shift;
110   - vdata->monotonic_time_sec++;
111   - }
112   -
113   - vdata->wall_time_coarse.tv_sec = tk->xtime_sec;
114   - vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
115   -
116   - vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse,
117   - tk->wall_to_monotonic);
118   -
119   - write_seqcount_end(&vdata->seq);
120   -}
121   -
122 78 static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
123 79 const char *message)
124 80 {
... ... @@ -374,7 +330,6 @@
374 330 {
375 331 extern char __vsyscall_page;
376 332 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
377   - extern char __vvar_page;
378 333 unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
379 334  
380 335 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall,
arch/x86/kernel/vsyscall_gtod.c
  1 +/*
  2 + * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
  3 + * Copyright 2003 Andi Kleen, SuSE Labs.
  4 + *
  5 + * Modified for x86 32 bit architecture by
  6 + * Stefani Seibold <stefani@seibold.net>
  7 + * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
  8 + *
  9 + * Thanks to hpa@transmeta.com for some useful hint.
  10 + * Special thanks to Ingo Molnar for his early experience with
  11 + * a different vsyscall implementation for Linux/IA32 and for the name.
  12 + *
  13 + */
  14 +
  15 +#include <linux/timekeeper_internal.h>
  16 +#include <asm/vgtod.h>
  17 +#include <asm/vvar.h>
  18 +
  19 +DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
  20 +
  21 +void update_vsyscall_tz(void)
  22 +{
  23 + vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest;
  24 + vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime;
  25 +}
  26 +
  27 +void update_vsyscall(struct timekeeper *tk)
  28 +{
  29 + struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
  30 +
  31 + gtod_write_begin(vdata);
  32 +
  33 + /* copy vsyscall data */
  34 + vdata->vclock_mode = tk->clock->archdata.vclock_mode;
  35 + vdata->cycle_last = tk->clock->cycle_last;
  36 + vdata->mask = tk->clock->mask;
  37 + vdata->mult = tk->mult;
  38 + vdata->shift = tk->shift;
  39 +
  40 + vdata->wall_time_sec = tk->xtime_sec;
  41 + vdata->wall_time_snsec = tk->xtime_nsec;
  42 +
  43 + vdata->monotonic_time_sec = tk->xtime_sec
  44 + + tk->wall_to_monotonic.tv_sec;
  45 + vdata->monotonic_time_snsec = tk->xtime_nsec
  46 + + (tk->wall_to_monotonic.tv_nsec
  47 + << tk->shift);
  48 + while (vdata->monotonic_time_snsec >=
  49 + (((u64)NSEC_PER_SEC) << tk->shift)) {
  50 + vdata->monotonic_time_snsec -=
  51 + ((u64)NSEC_PER_SEC) << tk->shift;
  52 + vdata->monotonic_time_sec++;
  53 + }
  54 +
  55 + vdata->wall_time_coarse_sec = tk->xtime_sec;
  56 + vdata->wall_time_coarse_nsec = (long)(tk->xtime_nsec >> tk->shift);
  57 +
  58 + vdata->monotonic_time_coarse_sec =
  59 + vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
  60 + vdata->monotonic_time_coarse_nsec =
  61 + vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
  62 +
  63 + while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
  64 + vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
  65 + vdata->monotonic_time_coarse_sec++;
  66 + }
  67 +
  68 + gtod_write_end(vdata);
  69 +}
arch/x86/tools/relocs.c
... ... @@ -69,8 +69,8 @@
69 69 "__per_cpu_load|"
70 70 "init_per_cpu__.*|"
71 71 "__end_rodata_hpage_align|"
72   - "__vvar_page|"
73 72 #endif
  73 + "__vvar_page|"
74 74 "_end)$"
75 75 };
76 76  
arch/x86/vdso/Makefile
... ... @@ -23,7 +23,8 @@
23 23 vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y))
24 24  
25 25 # files to link into kernel
26   -obj-$(VDSO64-y) += vma.o vdso.o
  26 +obj-y += vma.o
  27 +obj-$(VDSO64-y) += vdso.o
27 28 obj-$(VDSOX32-y) += vdsox32.o
28 29 obj-$(VDSO32-y) += vdso32.o vdso32-setup.o
29 30  
... ... @@ -138,7 +139,7 @@
138 139  
139 140 targets += vdso32/vdso32.lds
140 141 targets += $(vdso32-images) $(vdso32-images:=.dbg)
141   -targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o)
  142 +targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
142 143  
143 144 extra-y += $(vdso32-images)
144 145  
145 146  
... ... @@ -148,8 +149,19 @@
148 149 $(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
149 150 $(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
150 151  
  152 +KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
  153 +KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
  154 +KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
  155 +KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
  156 +KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
  157 +KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
  158 +KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
  159 +KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
  160 +$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
  161 +
151 162 $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
152 163 $(obj)/vdso32/vdso32.lds \
  164 + $(obj)/vdso32/vclock_gettime.o \
153 165 $(obj)/vdso32/note.o \
154 166 $(obj)/vdso32/%.o
155 167 $(call if_changed,vdso)
arch/x86/vdso/vclock_gettime.c
... ... @@ -4,6 +4,9 @@
4 4 *
5 5 * Fast user context implementation of clock_gettime, gettimeofday, and time.
6 6 *
  7 + * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
  8 + * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
  9 + *
7 10 * The code should have no internal unresolved relocations.
8 11 * Check with readelf after changing.
9 12 */
10 13  
11 14  
12 15  
13 16  
14 17  
15 18  
16 19  
17 20  
18 21  
19 22  
20 23  
... ... @@ -11,56 +14,55 @@
11 14 /* Disable profiling for userspace code: */
12 15 #define DISABLE_BRANCH_PROFILING
13 16  
14   -#include <linux/kernel.h>
15   -#include <linux/posix-timers.h>
16   -#include <linux/time.h>
17   -#include <linux/string.h>
18   -#include <asm/vsyscall.h>
19   -#include <asm/fixmap.h>
  17 +#include <uapi/linux/time.h>
20 18 #include <asm/vgtod.h>
21   -#include <asm/timex.h>
22 19 #include <asm/hpet.h>
  20 +#include <asm/vvar.h>
23 21 #include <asm/unistd.h>
24   -#include <asm/io.h>
25   -#include <asm/pvclock.h>
  22 +#include <asm/msr.h>
  23 +#include <linux/math64.h>
  24 +#include <linux/time.h>
26 25  
27 26 #define gtod (&VVAR(vsyscall_gtod_data))
28 27  
29   -notrace static cycle_t vread_tsc(void)
  28 +extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
  29 +extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
  30 +extern time_t __vdso_time(time_t *t);
  31 +
  32 +#ifdef CONFIG_HPET_TIMER
  33 +static inline u32 read_hpet_counter(const volatile void *addr)
30 34 {
31   - cycle_t ret;
32   - u64 last;
  35 + return *(const volatile u32 *) (addr + HPET_COUNTER);
  36 +}
  37 +#endif
33 38  
34   - /*
35   - * Empirically, a fence (of type that depends on the CPU)
36   - * before rdtsc is enough to ensure that rdtsc is ordered
37   - * with respect to loads. The various CPU manuals are unclear
38   - * as to whether rdtsc can be reordered with later loads,
39   - * but no one has ever seen it happen.
40   - */
41   - rdtsc_barrier();
42   - ret = (cycle_t)vget_cycles();
  39 +#ifndef BUILD_VDSO32
43 40  
44   - last = VVAR(vsyscall_gtod_data).clock.cycle_last;
  41 +#include <linux/kernel.h>
  42 +#include <asm/vsyscall.h>
  43 +#include <asm/fixmap.h>
  44 +#include <asm/pvclock.h>
45 45  
46   - if (likely(ret >= last))
47   - return ret;
  46 +static notrace cycle_t vread_hpet(void)
  47 +{
  48 + return read_hpet_counter((const void *)fix_to_virt(VSYSCALL_HPET));
  49 +}
48 50  
49   - /*
50   - * GCC likes to generate cmov here, but this branch is extremely
51   - * predictable (it's just a funciton of time and the likely is
52   - * very likely) and there's a data dependence, so force GCC
53   - * to generate a branch instead. I don't barrier() because
54   - * we don't actually need a barrier, and if this function
55   - * ever gets inlined it will generate worse code.
56   - */
57   - asm volatile ("");
58   - return last;
  51 +notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
  52 +{
  53 + long ret;
  54 + asm("syscall" : "=a" (ret) :
  55 + "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
  56 + return ret;
59 57 }
60 58  
61   -static notrace cycle_t vread_hpet(void)
  59 +notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
62 60 {
63   - return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER);
  61 + long ret;
  62 +
  63 + asm("syscall" : "=a" (ret) :
  64 + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
  65 + return ret;
64 66 }
65 67  
66 68 #ifdef CONFIG_PARAVIRT_CLOCK
... ... @@ -124,7 +126,7 @@
124 126 *mode = VCLOCK_NONE;
125 127  
126 128 /* refer to tsc.c read_tsc() comment for rationale */
127   - last = VVAR(vsyscall_gtod_data).clock.cycle_last;
  129 + last = gtod->cycle_last;
128 130  
129 131 if (likely(ret >= last))
130 132 return ret;
131 133  
... ... @@ -133,11 +135,30 @@
133 135 }
134 136 #endif
135 137  
  138 +#else
  139 +
  140 +extern u8 hpet_page
  141 + __attribute__((visibility("hidden")));
  142 +
  143 +#ifdef CONFIG_HPET_TIMER
  144 +static notrace cycle_t vread_hpet(void)
  145 +{
  146 + return read_hpet_counter((const void *)(&hpet_page));
  147 +}
  148 +#endif
  149 +
136 150 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
137 151 {
138 152 long ret;
139   - asm("syscall" : "=a" (ret) :
140   - "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory");
  153 +
  154 + asm(
  155 + "mov %%ebx, %%edx \n"
  156 + "mov %2, %%ebx \n"
  157 + "call VDSO32_vsyscall \n"
  158 + "mov %%edx, %%ebx \n"
  159 + : "=a" (ret)
  160 + : "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
  161 + : "memory", "edx");
141 162 return ret;
142 163 }
143 164  
144 165  
145 166  
146 167  
147 168  
148 169  
149 170  
150 171  
151 172  
... ... @@ -145,28 +166,79 @@
145 166 {
146 167 long ret;
147 168  
148   - asm("syscall" : "=a" (ret) :
149   - "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
  169 + asm(
  170 + "mov %%ebx, %%edx \n"
  171 + "mov %2, %%ebx \n"
  172 + "call VDSO32_vsyscall \n"
  173 + "mov %%edx, %%ebx \n"
  174 + : "=a" (ret)
  175 + : "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
  176 + : "memory", "edx");
150 177 return ret;
151 178 }
152 179  
  180 +#ifdef CONFIG_PARAVIRT_CLOCK
153 181  
  182 +static notrace cycle_t vread_pvclock(int *mode)
  183 +{
  184 + *mode = VCLOCK_NONE;
  185 + return 0;
  186 +}
  187 +#endif
  188 +
  189 +#endif
  190 +
  191 +notrace static cycle_t vread_tsc(void)
  192 +{
  193 + cycle_t ret;
  194 + u64 last;
  195 +
  196 + /*
  197 + * Empirically, a fence (of type that depends on the CPU)
  198 + * before rdtsc is enough to ensure that rdtsc is ordered
  199 + * with respect to loads. The various CPU manuals are unclear
  200 + * as to whether rdtsc can be reordered with later loads,
  201 + * but no one has ever seen it happen.
  202 + */
  203 + rdtsc_barrier();
  204 + ret = (cycle_t)__native_read_tsc();
  205 +
  206 + last = gtod->cycle_last;
  207 +
  208 + if (likely(ret >= last))
  209 + return ret;
  210 +
  211 + /*
  212 + * GCC likes to generate cmov here, but this branch is extremely
  213 + * predictable (it's just a funciton of time and the likely is
  214 + * very likely) and there's a data dependence, so force GCC
  215 + * to generate a branch instead. I don't barrier() because
  216 + * we don't actually need a barrier, and if this function
  217 + * ever gets inlined it will generate worse code.
  218 + */
  219 + asm volatile ("");
  220 + return last;
  221 +}
  222 +
154 223 notrace static inline u64 vgetsns(int *mode)
155 224 {
156   - long v;
  225 + u64 v;
157 226 cycles_t cycles;
158   - if (gtod->clock.vclock_mode == VCLOCK_TSC)
  227 +
  228 + if (gtod->vclock_mode == VCLOCK_TSC)
159 229 cycles = vread_tsc();
160   - else if (gtod->clock.vclock_mode == VCLOCK_HPET)
  230 +#ifdef CONFIG_HPET_TIMER
  231 + else if (gtod->vclock_mode == VCLOCK_HPET)
161 232 cycles = vread_hpet();
  233 +#endif
162 234 #ifdef CONFIG_PARAVIRT_CLOCK
163   - else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK)
  235 + else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
164 236 cycles = vread_pvclock(mode);
165 237 #endif
166 238 else
167 239 return 0;
168   - v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
169   - return v * gtod->clock.mult;
  240 + v = (cycles - gtod->cycle_last) & gtod->mask;
  241 + return v * gtod->mult;
170 242 }
171 243  
172 244 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
173 245  
174 246  
175 247  
176 248  
177 249  
178 250  
179 251  
180 252  
181 253  
182 254  
183 255  
184 256  
185 257  
186 258  
187 259  
188 260  
189 261  
190 262  
191 263  
192 264  
193 265  
194 266  
195 267  
... ... @@ -176,106 +248,102 @@
176 248 u64 ns;
177 249 int mode;
178 250  
179   - ts->tv_nsec = 0;
180 251 do {
181   - seq = raw_read_seqcount_begin(&gtod->seq);
182   - mode = gtod->clock.vclock_mode;
  252 + seq = gtod_read_begin(gtod);
  253 + mode = gtod->vclock_mode;
183 254 ts->tv_sec = gtod->wall_time_sec;
184 255 ns = gtod->wall_time_snsec;
185 256 ns += vgetsns(&mode);
186   - ns >>= gtod->clock.shift;
187   - } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
  257 + ns >>= gtod->shift;
  258 + } while (unlikely(gtod_read_retry(gtod, seq)));
188 259  
189   - timespec_add_ns(ts, ns);
  260 + ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
  261 + ts->tv_nsec = ns;
  262 +
190 263 return mode;
191 264 }
192 265  
193   -notrace static int do_monotonic(struct timespec *ts)
  266 +notrace static int __always_inline do_monotonic(struct timespec *ts)
194 267 {
195 268 unsigned long seq;
196 269 u64 ns;
197 270 int mode;
198 271  
199   - ts->tv_nsec = 0;
200 272 do {
201   - seq = raw_read_seqcount_begin(&gtod->seq);
202   - mode = gtod->clock.vclock_mode;
  273 + seq = gtod_read_begin(gtod);
  274 + mode = gtod->vclock_mode;
203 275 ts->tv_sec = gtod->monotonic_time_sec;
204 276 ns = gtod->monotonic_time_snsec;
205 277 ns += vgetsns(&mode);
206   - ns >>= gtod->clock.shift;
207   - } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
208   - timespec_add_ns(ts, ns);
  278 + ns >>= gtod->shift;
  279 + } while (unlikely(gtod_read_retry(gtod, seq)));
209 280  
  281 + ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
  282 + ts->tv_nsec = ns;
  283 +
210 284 return mode;
211 285 }
212 286  
213   -notrace static int do_realtime_coarse(struct timespec *ts)
  287 +notrace static void do_realtime_coarse(struct timespec *ts)
214 288 {
215 289 unsigned long seq;
216 290 do {
217   - seq = raw_read_seqcount_begin(&gtod->seq);
218   - ts->tv_sec = gtod->wall_time_coarse.tv_sec;
219   - ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
220   - } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
221   - return 0;
  291 + seq = gtod_read_begin(gtod);
  292 + ts->tv_sec = gtod->wall_time_coarse_sec;
  293 + ts->tv_nsec = gtod->wall_time_coarse_nsec;
  294 + } while (unlikely(gtod_read_retry(gtod, seq)));
222 295 }
223 296  
224   -notrace static int do_monotonic_coarse(struct timespec *ts)
  297 +notrace static void do_monotonic_coarse(struct timespec *ts)
225 298 {
226 299 unsigned long seq;
227 300 do {
228   - seq = raw_read_seqcount_begin(&gtod->seq);
229   - ts->tv_sec = gtod->monotonic_time_coarse.tv_sec;
230   - ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
231   - } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
232   -
233   - return 0;
  301 + seq = gtod_read_begin(gtod);
  302 + ts->tv_sec = gtod->monotonic_time_coarse_sec;
  303 + ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
  304 + } while (unlikely(gtod_read_retry(gtod, seq)));
234 305 }
235 306  
236 307 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
237 308 {
238   - int ret = VCLOCK_NONE;
239   -
240 309 switch (clock) {
241 310 case CLOCK_REALTIME:
242   - ret = do_realtime(ts);
  311 + if (do_realtime(ts) == VCLOCK_NONE)
  312 + goto fallback;
243 313 break;
244 314 case CLOCK_MONOTONIC:
245   - ret = do_monotonic(ts);
  315 + if (do_monotonic(ts) == VCLOCK_NONE)
  316 + goto fallback;
246 317 break;
247 318 case CLOCK_REALTIME_COARSE:
248   - return do_realtime_coarse(ts);
  319 + do_realtime_coarse(ts);
  320 + break;
249 321 case CLOCK_MONOTONIC_COARSE:
250   - return do_monotonic_coarse(ts);
  322 + do_monotonic_coarse(ts);
  323 + break;
  324 + default:
  325 + goto fallback;
251 326 }
252 327  
253   - if (ret == VCLOCK_NONE)
254   - return vdso_fallback_gettime(clock, ts);
255 328 return 0;
  329 +fallback:
  330 + return vdso_fallback_gettime(clock, ts);
256 331 }
257 332 int clock_gettime(clockid_t, struct timespec *)
258 333 __attribute__((weak, alias("__vdso_clock_gettime")));
259 334  
260 335 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
261 336 {
262   - long ret = VCLOCK_NONE;
263   -
264 337 if (likely(tv != NULL)) {
265   - BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
266   - offsetof(struct timespec, tv_nsec) ||
267   - sizeof(*tv) != sizeof(struct timespec));
268   - ret = do_realtime((struct timespec *)tv);
  338 + if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
  339 + return vdso_fallback_gtod(tv, tz);
269 340 tv->tv_usec /= 1000;
270 341 }
271 342 if (unlikely(tz != NULL)) {
272   - /* Avoid memcpy. Some old compilers fail to inline it */
273   - tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
274   - tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
  343 + tz->tz_minuteswest = gtod->tz_minuteswest;
  344 + tz->tz_dsttime = gtod->tz_dsttime;
275 345 }
276 346  
277   - if (ret == VCLOCK_NONE)
278   - return vdso_fallback_gtod(tv, tz);
279 347 return 0;
280 348 }
281 349 int gettimeofday(struct timeval *, struct timezone *)
... ... @@ -287,8 +355,8 @@
287 355 */
288 356 notrace time_t __vdso_time(time_t *t)
289 357 {
290   - /* This is atomic on x86_64 so we don't need any locks. */
291   - time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec);
  358 + /* This is atomic on x86 so we don't need any locks. */
  359 + time_t result = ACCESS_ONCE(gtod->wall_time_sec);
292 360  
293 361 if (t)
294 362 *t = result;
arch/x86/vdso/vdso-layout.lds.S
... ... @@ -6,8 +6,26 @@
6 6  
7 7 SECTIONS
8 8 {
9   - . = VDSO_PRELINK + SIZEOF_HEADERS;
  9 +#ifdef BUILD_VDSO32
  10 +#include <asm/vdso32.h>
10 11  
  12 + .hpet_sect : {
  13 + hpet_page = . - VDSO_OFFSET(VDSO_HPET_PAGE);
  14 + } :text :hpet_sect
  15 +
  16 + .vvar_sect : {
  17 + vvar = . - VDSO_OFFSET(VDSO_VVAR_PAGE);
  18 +
  19 + /* Place all vvars at the offsets in asm/vvar.h. */
  20 +#define EMIT_VVAR(name, offset) vvar_ ## name = vvar + offset;
  21 +#define __VVAR_KERNEL_LDS
  22 +#include <asm/vvar.h>
  23 +#undef __VVAR_KERNEL_LDS
  24 +#undef EMIT_VVAR
  25 + } :text :vvar_sect
  26 +#endif
  27 + . = SIZEOF_HEADERS;
  28 +
11 29 .hash : { *(.hash) } :text
12 30 .gnu.hash : { *(.gnu.hash) }
13 31 .dynsym : { *(.dynsym) }
... ... @@ -44,6 +62,11 @@
44 62 . = ALIGN(0x100);
45 63  
46 64 .text : { *(.text*) } :text =0x90909090
  65 +
  66 + /DISCARD/ : {
  67 + *(.discard)
  68 + *(.discard.*)
  69 + }
47 70 }
48 71  
49 72 /*
... ... @@ -61,5 +84,9 @@
61 84 dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
62 85 note PT_NOTE FLAGS(4); /* PF_R */
63 86 eh_frame_hdr PT_GNU_EH_FRAME;
  87 +#ifdef BUILD_VDSO32
  88 + vvar_sect PT_NULL FLAGS(4); /* PF_R */
  89 + hpet_sect PT_NULL FLAGS(4); /* PF_R */
  90 +#endif
64 91 }
arch/x86/vdso/vdso.S
1   -#include <asm/page_types.h>
2   -#include <linux/linkage.h>
  1 +#include <asm/vdso.h>
3 2  
4   -__PAGE_ALIGNED_DATA
5   -
6   - .globl vdso_start, vdso_end
7   - .align PAGE_SIZE
8   -vdso_start:
9   - .incbin "arch/x86/vdso/vdso.so"
10   -vdso_end:
11   - .align PAGE_SIZE /* extra data here leaks to userspace. */
12   -
13   -.previous
14   -
15   - .globl vdso_pages
16   - .bss
17   - .align 8
18   - .type vdso_pages, @object
19   -vdso_pages:
20   - .zero (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE * 8
21   - .size vdso_pages, .-vdso_pages
  3 +DEFINE_VDSO_IMAGE(vdso, "arch/x86/vdso/vdso.so")
arch/x86/vdso/vdso32-setup.c
... ... @@ -16,6 +16,7 @@
16 16 #include <linux/mm.h>
17 17 #include <linux/err.h>
18 18 #include <linux/module.h>
  19 +#include <linux/slab.h>
19 20  
20 21 #include <asm/cpufeature.h>
21 22 #include <asm/msr.h>
22 23  
23 24  
24 25  
... ... @@ -25,17 +26,14 @@
25 26 #include <asm/tlbflush.h>
26 27 #include <asm/vdso.h>
27 28 #include <asm/proto.h>
  29 +#include <asm/fixmap.h>
  30 +#include <asm/hpet.h>
  31 +#include <asm/vvar.h>
28 32  
29   -enum {
30   - VDSO_DISABLED = 0,
31   - VDSO_ENABLED = 1,
32   - VDSO_COMPAT = 2,
33   -};
34   -
35 33 #ifdef CONFIG_COMPAT_VDSO
36   -#define VDSO_DEFAULT VDSO_COMPAT
  34 +#define VDSO_DEFAULT 0
37 35 #else
38   -#define VDSO_DEFAULT VDSO_ENABLED
  36 +#define VDSO_DEFAULT 1
39 37 #endif
40 38  
41 39 #ifdef CONFIG_X86_64
... ... @@ -44,13 +42,6 @@
44 42 #endif
45 43  
46 44 /*
47   - * This is the difference between the prelinked addresses in the vDSO images
48   - * and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO
49   - * in the user address space.
50   - */
51   -#define VDSO_ADDR_ADJUST (VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK)
52   -
53   -/*
54 45 * Should the kernel map a VDSO page into processes and pass its
55 46 * address down to glibc upon exec()?
56 47 */
... ... @@ -60,6 +51,9 @@
60 51 {
61 52 vdso_enabled = simple_strtoul(s, NULL, 0);
62 53  
  54 + if (vdso_enabled > 1)
  55 + pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n");
  56 +
63 57 return 1;
64 58 }
65 59  
66 60  
... ... @@ -76,125 +70,9 @@
76 70 EXPORT_SYMBOL_GPL(vdso_enabled);
77 71 #endif
78 72  
79   -static __init void reloc_symtab(Elf32_Ehdr *ehdr,
80   - unsigned offset, unsigned size)
81   -{
82   - Elf32_Sym *sym = (void *)ehdr + offset;
83   - unsigned nsym = size / sizeof(*sym);
84   - unsigned i;
  73 +static struct page **vdso32_pages;
  74 +static unsigned vdso32_size;
85 75  
86   - for(i = 0; i < nsym; i++, sym++) {
87   - if (sym->st_shndx == SHN_UNDEF ||
88   - sym->st_shndx == SHN_ABS)
89   - continue; /* skip */
90   -
91   - if (sym->st_shndx > SHN_LORESERVE) {
92   - printk(KERN_INFO "VDSO: unexpected st_shndx %x\n",
93   - sym->st_shndx);
94   - continue;
95   - }
96   -
97   - switch(ELF_ST_TYPE(sym->st_info)) {
98   - case STT_OBJECT:
99   - case STT_FUNC:
100   - case STT_SECTION:
101   - case STT_FILE:
102   - sym->st_value += VDSO_ADDR_ADJUST;
103   - }
104   - }
105   -}
106   -
107   -static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
108   -{
109   - Elf32_Dyn *dyn = (void *)ehdr + offset;
110   -
111   - for(; dyn->d_tag != DT_NULL; dyn++)
112   - switch(dyn->d_tag) {
113   - case DT_PLTGOT:
114   - case DT_HASH:
115   - case DT_STRTAB:
116   - case DT_SYMTAB:
117   - case DT_RELA:
118   - case DT_INIT:
119   - case DT_FINI:
120   - case DT_REL:
121   - case DT_DEBUG:
122   - case DT_JMPREL:
123   - case DT_VERSYM:
124   - case DT_VERDEF:
125   - case DT_VERNEED:
126   - case DT_ADDRRNGLO ... DT_ADDRRNGHI:
127   - /* definitely pointers needing relocation */
128   - dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
129   - break;
130   -
131   - case DT_ENCODING ... OLD_DT_LOOS-1:
132   - case DT_LOOS ... DT_HIOS-1:
133   - /* Tags above DT_ENCODING are pointers if
134   - they're even */
135   - if (dyn->d_tag >= DT_ENCODING &&
136   - (dyn->d_tag & 1) == 0)
137   - dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
138   - break;
139   -
140   - case DT_VERDEFNUM:
141   - case DT_VERNEEDNUM:
142   - case DT_FLAGS_1:
143   - case DT_RELACOUNT:
144   - case DT_RELCOUNT:
145   - case DT_VALRNGLO ... DT_VALRNGHI:
146   - /* definitely not pointers */
147   - break;
148   -
149   - case OLD_DT_LOOS ... DT_LOOS-1:
150   - case DT_HIOS ... DT_VALRNGLO-1:
151   - default:
152   - if (dyn->d_tag > DT_ENCODING)
153   - printk(KERN_INFO "VDSO: unexpected DT_tag %x\n",
154   - dyn->d_tag);
155   - break;
156   - }
157   -}
158   -
159   -static __init void relocate_vdso(Elf32_Ehdr *ehdr)
160   -{
161   - Elf32_Phdr *phdr;
162   - Elf32_Shdr *shdr;
163   - int i;
164   -
165   - BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 ||
166   - !elf_check_arch_ia32(ehdr) ||
167   - ehdr->e_type != ET_DYN);
168   -
169   - ehdr->e_entry += VDSO_ADDR_ADJUST;
170   -
171   - /* rebase phdrs */
172   - phdr = (void *)ehdr + ehdr->e_phoff;
173   - for (i = 0; i < ehdr->e_phnum; i++) {
174   - phdr[i].p_vaddr += VDSO_ADDR_ADJUST;
175   -
176   - /* relocate dynamic stuff */
177   - if (phdr[i].p_type == PT_DYNAMIC)
178   - reloc_dyn(ehdr, phdr[i].p_offset);
179   - }
180   -
181   - /* rebase sections */
182   - shdr = (void *)ehdr + ehdr->e_shoff;
183   - for(i = 0; i < ehdr->e_shnum; i++) {
184   - if (!(shdr[i].sh_flags & SHF_ALLOC))
185   - continue;
186   -
187   - shdr[i].sh_addr += VDSO_ADDR_ADJUST;
188   -
189   - if (shdr[i].sh_type == SHT_SYMTAB ||
190   - shdr[i].sh_type == SHT_DYNSYM)
191   - reloc_symtab(ehdr, shdr[i].sh_offset,
192   - shdr[i].sh_size);
193   - }
194   -}
195   -
196   -static struct page *vdso32_pages[1];
197   -
198 76 #ifdef CONFIG_X86_64
199 77  
200 78 #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32))
... ... @@ -212,12 +90,6 @@
212 90 wrmsrl(MSR_CSTAR, ia32_cstar_target);
213 91 }
214 92  
215   -#define compat_uses_vma 1
216   -
217   -static inline void map_compat_vdso(int map)
218   -{
219   -}
220   -
221 93 #else /* CONFIG_X86_32 */
222 94  
223 95 #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
224 96  
225 97  
226 98  
227 99  
228 100  
229 101  
... ... @@ -241,65 +113,37 @@
241 113 put_cpu();
242 114 }
243 115  
244   -static struct vm_area_struct gate_vma;
245   -
246   -static int __init gate_vma_init(void)
247   -{
248   - gate_vma.vm_mm = NULL;
249   - gate_vma.vm_start = FIXADDR_USER_START;
250   - gate_vma.vm_end = FIXADDR_USER_END;
251   - gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
252   - gate_vma.vm_page_prot = __P101;
253   -
254   - return 0;
255   -}
256   -
257   -#define compat_uses_vma 0
258   -
259   -static void map_compat_vdso(int map)
260   -{
261   - static int vdso_mapped;
262   -
263   - if (map == vdso_mapped)
264   - return;
265   -
266   - vdso_mapped = map;
267   -
268   - __set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT,
269   - map ? PAGE_READONLY_EXEC : PAGE_NONE);
270   -
271   - /* flush stray tlbs */
272   - flush_tlb_all();
273   -}
274   -
275 116 #endif /* CONFIG_X86_64 */
276 117  
277 118 int __init sysenter_setup(void)
278 119 {
279   - void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
280   - const void *vsyscall;
281   - size_t vsyscall_len;
  120 + char *vdso32_start, *vdso32_end;
  121 + int npages, i;
282 122  
283   - vdso32_pages[0] = virt_to_page(syscall_page);
284   -
285   -#ifdef CONFIG_X86_32
286   - gate_vma_init();
287   -#endif
288   -
  123 +#ifdef CONFIG_COMPAT
289 124 if (vdso32_syscall()) {
290   - vsyscall = &vdso32_syscall_start;
291   - vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start;
292   - } else if (vdso32_sysenter()){
293   - vsyscall = &vdso32_sysenter_start;
294   - vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start;
  125 + vdso32_start = vdso32_syscall_start;
  126 + vdso32_end = vdso32_syscall_end;
  127 + vdso32_pages = vdso32_syscall_pages;
  128 + } else
  129 +#endif
  130 + if (vdso32_sysenter()) {
  131 + vdso32_start = vdso32_sysenter_start;
  132 + vdso32_end = vdso32_sysenter_end;
  133 + vdso32_pages = vdso32_sysenter_pages;
295 134 } else {
296   - vsyscall = &vdso32_int80_start;
297   - vsyscall_len = &vdso32_int80_end - &vdso32_int80_start;
  135 + vdso32_start = vdso32_int80_start;
  136 + vdso32_end = vdso32_int80_end;
  137 + vdso32_pages = vdso32_int80_pages;
298 138 }
299 139  
300   - memcpy(syscall_page, vsyscall, vsyscall_len);
301   - relocate_vdso(syscall_page);
  140 + npages = ((vdso32_end - vdso32_start) + PAGE_SIZE - 1) / PAGE_SIZE;
  141 + vdso32_size = npages << PAGE_SHIFT;
  142 + for (i = 0; i < npages; i++)
  143 + vdso32_pages[i] = virt_to_page(vdso32_start + i*PAGE_SIZE);
302 144  
  145 + patch_vdso32(vdso32_start, vdso32_size);
  146 +
303 147 return 0;
304 148 }
305 149  
306 150  
307 151  
308 152  
309 153  
310 154  
311 155  
312 156  
313 157  
... ... @@ -309,48 +153,73 @@
309 153 struct mm_struct *mm = current->mm;
310 154 unsigned long addr;
311 155 int ret = 0;
312   - bool compat;
  156 + struct vm_area_struct *vma;
313 157  
314 158 #ifdef CONFIG_X86_X32_ABI
315 159 if (test_thread_flag(TIF_X32))
316 160 return x32_setup_additional_pages(bprm, uses_interp);
317 161 #endif
318 162  
319   - if (vdso_enabled == VDSO_DISABLED)
  163 + if (vdso_enabled != 1) /* Other values all mean "disabled" */
320 164 return 0;
321 165  
322 166 down_write(&mm->mmap_sem);
323 167  
324   - /* Test compat mode once here, in case someone
325   - changes it via sysctl */
326   - compat = (vdso_enabled == VDSO_COMPAT);
  168 + addr = get_unmapped_area(NULL, 0, vdso32_size + VDSO_OFFSET(VDSO_PREV_PAGES), 0, 0);
  169 + if (IS_ERR_VALUE(addr)) {
  170 + ret = addr;
  171 + goto up_fail;
  172 + }
327 173  
328   - map_compat_vdso(compat);
  174 + addr += VDSO_OFFSET(VDSO_PREV_PAGES);
329 175  
330   - if (compat)
331   - addr = VDSO_HIGH_BASE;
332   - else {
333   - addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
334   - if (IS_ERR_VALUE(addr)) {
335   - ret = addr;
336   - goto up_fail;
337   - }
  176 + current->mm->context.vdso = (void *)addr;
  177 +
  178 + /*
  179 + * MAYWRITE to allow gdb to COW and set breakpoints
  180 + */
  181 + ret = install_special_mapping(mm,
  182 + addr,
  183 + vdso32_size,
  184 + VM_READ|VM_EXEC|
  185 + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
  186 + vdso32_pages);
  187 +
  188 + if (ret)
  189 + goto up_fail;
  190 +
  191 + vma = _install_special_mapping(mm,
  192 + addr - VDSO_OFFSET(VDSO_PREV_PAGES),
  193 + VDSO_OFFSET(VDSO_PREV_PAGES),
  194 + VM_READ,
  195 + NULL);
  196 +
  197 + if (IS_ERR(vma)) {
  198 + ret = PTR_ERR(vma);
  199 + goto up_fail;
338 200 }
339 201  
340   - current->mm->context.vdso = (void *)addr;
  202 + ret = remap_pfn_range(vma,
  203 + addr - VDSO_OFFSET(VDSO_VVAR_PAGE),
  204 + __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
  205 + PAGE_SIZE,
  206 + PAGE_READONLY);
341 207  
342   - if (compat_uses_vma || !compat) {
343   - /*
344   - * MAYWRITE to allow gdb to COW and set breakpoints
345   - */
346   - ret = install_special_mapping(mm, addr, PAGE_SIZE,
347   - VM_READ|VM_EXEC|
348   - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
349   - vdso32_pages);
  208 + if (ret)
  209 + goto up_fail;
350 210  
  211 +#ifdef CONFIG_HPET_TIMER
  212 + if (hpet_address) {
  213 + ret = io_remap_pfn_range(vma,
  214 + addr - VDSO_OFFSET(VDSO_HPET_PAGE),
  215 + hpet_address >> PAGE_SHIFT,
  216 + PAGE_SIZE,
  217 + pgprot_noncached(PAGE_READONLY));
  218 +
351 219 if (ret)
352 220 goto up_fail;
353 221 }
  222 +#endif
354 223  
355 224 current_thread_info()->sysenter_return =
356 225 VDSO32_SYMBOL(addr, SYSENTER_RETURN);
357 226  
... ... @@ -411,20 +280,12 @@
411 280  
412 281 struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
413 282 {
414   - /*
415   - * Check to see if the corresponding task was created in compat vdso
416   - * mode.
417   - */
418   - if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
419   - return &gate_vma;
420 283 return NULL;
421 284 }
422 285  
423 286 int in_gate_area(struct mm_struct *mm, unsigned long addr)
424 287 {
425   - const struct vm_area_struct *vma = get_gate_vma(mm);
426   -
427   - return vma && addr >= vma->vm_start && addr < vma->vm_end;
  288 + return 0;
428 289 }
429 290  
430 291 int in_gate_area_no_mm(unsigned long addr)
arch/x86/vdso/vdso32.S
1   -#include <linux/init.h>
  1 +#include <asm/vdso.h>
2 2  
3   -__INITDATA
  3 +DEFINE_VDSO_IMAGE(vdso32_int80, "arch/x86/vdso/vdso32-int80.so")
4 4  
5   - .globl vdso32_int80_start, vdso32_int80_end
6   -vdso32_int80_start:
7   - .incbin "arch/x86/vdso/vdso32-int80.so"
8   -vdso32_int80_end:
9   -
10   - .globl vdso32_syscall_start, vdso32_syscall_end
11   -vdso32_syscall_start:
12 5 #ifdef CONFIG_COMPAT
13   - .incbin "arch/x86/vdso/vdso32-syscall.so"
  6 +DEFINE_VDSO_IMAGE(vdso32_syscall, "arch/x86/vdso/vdso32-syscall.so")
14 7 #endif
15   -vdso32_syscall_end:
16 8  
17   - .globl vdso32_sysenter_start, vdso32_sysenter_end
18   -vdso32_sysenter_start:
19   - .incbin "arch/x86/vdso/vdso32-sysenter.so"
20   -vdso32_sysenter_end:
21   -
22   -__FINIT
  9 +DEFINE_VDSO_IMAGE(vdso32_sysenter, "arch/x86/vdso/vdso32-sysenter.so")
arch/x86/vdso/vdso32/vclock_gettime.c
  1 +#define BUILD_VDSO32
  2 +
  3 +#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
  4 +#undef CONFIG_OPTIMIZE_INLINING
  5 +#endif
  6 +
  7 +#undef CONFIG_X86_PPRO_FENCE
  8 +
  9 +#ifdef CONFIG_X86_64
  10 +
  11 +/*
  12 + * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel
  13 + * configuration
  14 + */
  15 +#undef CONFIG_64BIT
  16 +#undef CONFIG_X86_64
  17 +#undef CONFIG_ILLEGAL_POINTER_VALUE
  18 +#undef CONFIG_SPARSEMEM_VMEMMAP
  19 +#undef CONFIG_NR_CPUS
  20 +
  21 +#define CONFIG_X86_32 1
  22 +#define CONFIG_PAGE_OFFSET 0
  23 +#define CONFIG_ILLEGAL_POINTER_VALUE 0
  24 +#define CONFIG_NR_CPUS 1
  25 +
  26 +#define BUILD_VDSO32_64
  27 +
  28 +#endif
  29 +
  30 +#include "../vclock_gettime.c"
arch/x86/vdso/vdso32/vdso32.lds.S
... ... @@ -8,7 +8,11 @@
8 8 * values visible using the asm-x86/vdso.h macros from the kernel proper.
9 9 */
10 10  
  11 +#include <asm/page.h>
  12 +
  13 +#define BUILD_VDSO32
11 14 #define VDSO_PRELINK 0
  15 +
12 16 #include "../vdso-layout.lds.S"
13 17  
14 18 /* The ELF entry point can be used to set the AT_SYSINFO value. */
... ... @@ -19,6 +23,13 @@
19 23 */
20 24 VERSION
21 25 {
  26 + LINUX_2.6 {
  27 + global:
  28 + __vdso_clock_gettime;
  29 + __vdso_gettimeofday;
  30 + __vdso_time;
  31 + };
  32 +
22 33 LINUX_2.5 {
23 34 global:
24 35 __kernel_vsyscall;
25 36  
... ... @@ -31,8 +42,10 @@
31 42 /*
32 43 * Symbols we define here called VDSO* get their values into vdso32-syms.h.
33 44 */
34   -VDSO32_PRELINK = VDSO_PRELINK;
35 45 VDSO32_vsyscall = __kernel_vsyscall;
36 46 VDSO32_sigreturn = __kernel_sigreturn;
37 47 VDSO32_rt_sigreturn = __kernel_rt_sigreturn;
  48 +VDSO32_clock_gettime = clock_gettime;
  49 +VDSO32_gettimeofday = gettimeofday;
  50 +VDSO32_time = time;
arch/x86/vdso/vdsox32.S
1   -#include <asm/page_types.h>
2   -#include <linux/linkage.h>
  1 +#include <asm/vdso.h>
3 2  
4   -__PAGE_ALIGNED_DATA
5   -
6   - .globl vdsox32_start, vdsox32_end
7   - .align PAGE_SIZE
8   -vdsox32_start:
9   - .incbin "arch/x86/vdso/vdsox32.so"
10   -vdsox32_end:
11   - .align PAGE_SIZE /* extra data here leaks to userspace. */
12   -
13   -.previous
14   -
15   - .globl vdsox32_pages
16   - .bss
17   - .align 8
18   - .type vdsox32_pages, @object
19   -vdsox32_pages:
20   - .zero (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE * 8
21   - .size vdsox32_pages, .-vdsox32_pages
  3 +DEFINE_VDSO_IMAGE(vdsox32, "arch/x86/vdso/vdsox32.so")
... ... @@ -16,20 +16,22 @@
16 16 #include <asm/vdso.h>
17 17 #include <asm/page.h>
18 18  
  19 +#if defined(CONFIG_X86_64)
19 20 unsigned int __read_mostly vdso_enabled = 1;
20 21  
21   -extern char vdso_start[], vdso_end[];
  22 +DECLARE_VDSO_IMAGE(vdso);
22 23 extern unsigned short vdso_sync_cpuid;
23   -
24   -extern struct page *vdso_pages[];
25 24 static unsigned vdso_size;
26 25  
27 26 #ifdef CONFIG_X86_X32_ABI
28   -extern char vdsox32_start[], vdsox32_end[];
29   -extern struct page *vdsox32_pages[];
  27 +DECLARE_VDSO_IMAGE(vdsox32);
30 28 static unsigned vdsox32_size;
  29 +#endif
  30 +#endif
31 31  
32   -static void __init patch_vdsox32(void *vdso, size_t len)
  32 +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_X32_ABI) || \
  33 + defined(CONFIG_COMPAT)
  34 +void __init patch_vdso32(void *vdso, size_t len)
33 35 {
34 36 Elf32_Ehdr *hdr = vdso;
35 37 Elf32_Shdr *sechdrs, *alt_sec = 0;
... ... @@ -52,7 +54,7 @@
52 54 }
53 55  
54 56 /* If we get here, it's probably a bug. */
55   - pr_warning("patch_vdsox32: .altinstructions not found\n");
  57 + pr_warning("patch_vdso32: .altinstructions not found\n");
56 58 return; /* nothing to patch */
57 59  
58 60 found:
... ... @@ -61,6 +63,7 @@
61 63 }
62 64 #endif
63 65  
  66 +#if defined(CONFIG_X86_64)
64 67 static void __init patch_vdso64(void *vdso, size_t len)
65 68 {
66 69 Elf64_Ehdr *hdr = vdso;
... ... @@ -104,7 +107,7 @@
104 107 vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE);
105 108  
106 109 #ifdef CONFIG_X86_X32_ABI
107   - patch_vdsox32(vdsox32_start, vdsox32_end - vdsox32_start);
  110 + patch_vdso32(vdsox32_start, vdsox32_end - vdsox32_start);
108 111 npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE;
109 112 vdsox32_size = npages << PAGE_SHIFT;
110 113 for (i = 0; i < npages; i++)
... ... @@ -204,4 +207,5 @@
204 207 return 0;
205 208 }
206 209 __setup("vdso=", vdso_setup);
  210 +#endif
... ... @@ -2058,7 +2058,6 @@
2058 2058 case FIX_RO_IDT:
2059 2059 #ifdef CONFIG_X86_32
2060 2060 case FIX_WP_TEST:
2061   - case FIX_VDSO:
2062 2061 # ifdef CONFIG_HIGHMEM
2063 2062 case FIX_KMAP_BEGIN ... FIX_KMAP_END:
2064 2063 # endif
... ... @@ -1756,6 +1756,9 @@
1756 1756 extern struct file *get_mm_exe_file(struct mm_struct *mm);
1757 1757  
1758 1758 extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
  1759 +extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
  1760 + unsigned long addr, unsigned long len,
  1761 + unsigned long flags, struct page **pages);
1759 1762 extern int install_special_mapping(struct mm_struct *mm,
1760 1763 unsigned long addr, unsigned long len,
1761 1764 unsigned long flags, struct page **pages);
... ... @@ -2918,7 +2918,7 @@
2918 2918 * The array pointer and the pages it points to are assumed to stay alive
2919 2919 * for as long as this mapping might exist.
2920 2920 */
2921   -int install_special_mapping(struct mm_struct *mm,
  2921 +struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
2922 2922 unsigned long addr, unsigned long len,
2923 2923 unsigned long vm_flags, struct page **pages)
2924 2924 {
... ... @@ -2927,7 +2927,7 @@
2927 2927  
2928 2928 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2929 2929 if (unlikely(vma == NULL))
2930   - return -ENOMEM;
  2930 + return ERR_PTR(-ENOMEM);
2931 2931  
2932 2932 INIT_LIST_HEAD(&vma->anon_vma_chain);
2933 2933 vma->vm_mm = mm;
2934 2934  
... ... @@ -2948,11 +2948,23 @@
2948 2948  
2949 2949 perf_event_mmap(vma);
2950 2950  
2951   - return 0;
  2951 + return vma;
2952 2952  
2953 2953 out:
2954 2954 kmem_cache_free(vm_area_cachep, vma);
2955   - return ret;
  2955 + return ERR_PTR(ret);
  2956 +}
  2957 +
  2958 +int install_special_mapping(struct mm_struct *mm,
  2959 + unsigned long addr, unsigned long len,
  2960 + unsigned long vm_flags, struct page **pages)
  2961 +{
  2962 + struct vm_area_struct *vma = _install_special_mapping(mm,
  2963 + addr, len, vm_flags, pages);
  2964 +
  2965 + if (IS_ERR(vma))
  2966 + return PTR_ERR(vma);
  2967 + return 0;
2956 2968 }
2957 2969  
2958 2970 static DEFINE_MUTEX(mm_all_locks_mutex);