Commit 1dbf527c51c6c20c19869c8125cb5b87c3d09506

Authored by Jeremy Fitzhardinge
Committed by Andi Kleen
1 parent d4f7a2c18e

[PATCH] i386: Make COMPAT_VDSO runtime selectable.

Now that relocation of the VDSO for COMPAT_VDSO users is done at
runtime rather than compile time, it is possible to enable/disable
compat mode at runtime.

This patch allows you to enable COMPAT_VDSO mode with "vdso=2" on the
kernel command line, or via sysctl.  (Switching on a running system
shouldn't be done lightly; any process which was relying on the compat
VDSO will be upset if it goes away.)

The COMPAT_VDSO config option still exists, but if enabled it just
makes vdso_enabled default to VDSO_COMPAT.

+From: Hugh Dickins <hugh@veritas.com>

Fix oops from i386-make-compat_vdso-runtime-selectable.patch.

Even mingetty at system startup finds it easy to trigger an oops
while reading /proc/PID/maps: though it has a good hold on the mm
itself, that cannot stop exit_mm() from resetting tsk->mm to NULL.

(It is usually show_map()'s call to get_gate_vma() which oopses,
and I expect we could change that to check priv->tail_vma instead;
but no matter, even m_start()'s call just after get_task_mm() is racy.)

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Zachary Amsden <zach@vmware.com>
Cc: "Jan Beulich" <JBeulich@novell.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roland McGrath <roland@redhat.com>

Showing 3 changed files with 95 additions and 53 deletions Side-by-side Diff

Documentation/kernel-parameters.txt
... ... @@ -1820,6 +1820,7 @@
1820 1820 [USBHID] The interval which mice are to be polled at.
1821 1821  
1822 1822 vdso= [IA-32,SH]
  1823 + vdso=2: enable compat VDSO (default with COMPAT_VDSO)
1823 1824 vdso=1: enable VDSO (default)
1824 1825 vdso=0: disable VDSO mapping
1825 1826  
arch/i386/kernel/sysenter.c
... ... @@ -23,16 +23,25 @@
23 23 #include <asm/pgtable.h>
24 24 #include <asm/unistd.h>
25 25 #include <asm/elf.h>
  26 +#include <asm/tlbflush.h>
26 27  
  28 +enum {
  29 + VDSO_DISABLED = 0,
  30 + VDSO_ENABLED = 1,
  31 + VDSO_COMPAT = 2,
  32 +};
  33 +
  34 +#ifdef CONFIG_COMPAT_VDSO
  35 +#define VDSO_DEFAULT VDSO_COMPAT
  36 +#else
  37 +#define VDSO_DEFAULT VDSO_ENABLED
  38 +#endif
  39 +
27 40 /*
28 41 * Should the kernel map a VDSO page into processes and pass its
29 42 * address down to glibc upon exec()?
30 43 */
31   -#ifdef CONFIG_PARAVIRT
32   -unsigned int __read_mostly vdso_enabled = 0;
33   -#else
34   -unsigned int __read_mostly vdso_enabled = 1;
35   -#endif
  44 +unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
36 45  
37 46 EXPORT_SYMBOL_GPL(vdso_enabled);
38 47  
... ... @@ -47,7 +56,6 @@
47 56  
48 57 extern asmlinkage void sysenter_entry(void);
49 58  
50   -#ifdef CONFIG_COMPAT_VDSO
51 59 static __init void reloc_symtab(Elf32_Ehdr *ehdr,
52 60 unsigned offset, unsigned size)
53 61 {
... ... @@ -164,11 +172,6 @@
164 172 shdr[i].sh_size);
165 173 }
166 174 }
167   -#else
168   -static inline void relocate_vdso(Elf32_Ehdr *ehdr)
169   -{
170   -}
171   -#endif /* COMPAT_VDSO */
172 175  
173 176 void enable_sep_cpu(void)
174 177 {
... ... @@ -188,6 +191,25 @@
188 191 put_cpu();
189 192 }
190 193  
  194 +static struct vm_area_struct gate_vma;
  195 +
  196 +static int __init gate_vma_init(void)
  197 +{
  198 + gate_vma.vm_mm = NULL;
  199 + gate_vma.vm_start = FIXADDR_USER_START;
  200 + gate_vma.vm_end = FIXADDR_USER_END;
  201 + gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
  202 + gate_vma.vm_page_prot = __P101;
  203 + /*
  204 + * Make sure the vDSO gets into every core dump.
  205 + * Dumping its contents makes post-mortem fully interpretable later
  206 + * without matching up the same kernel and hardware config to see
  207 + * what PC values meant.
  208 + */
  209 + gate_vma.vm_flags |= VM_ALWAYSDUMP;
  210 + return 0;
  211 +}
  212 +
191 213 /*
192 214 * These symbols are defined by vsyscall.o to mark the bounds
193 215 * of the ELF DSO images included therein.
... ... @@ -196,6 +218,22 @@
196 218 extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
197 219 static struct page *syscall_pages[1];
198 220  
  221 +static void map_compat_vdso(int map)
  222 +{
  223 + static int vdso_mapped;
  224 +
  225 + if (map == vdso_mapped)
  226 + return;
  227 +
  228 + vdso_mapped = map;
  229 +
  230 + __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT,
  231 + map ? PAGE_READONLY_EXEC : PAGE_NONE);
  232 +
  233 + /* flush stray tlbs */
  234 + flush_tlb_all();
  235 +}
  236 +
199 237 int __init sysenter_setup(void)
200 238 {
201 239 void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
202 240  
... ... @@ -204,10 +242,9 @@
204 242  
205 243 syscall_pages[0] = virt_to_page(syscall_page);
206 244  
207   -#ifdef CONFIG_COMPAT_VDSO
208   - __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC);
  245 + gate_vma_init();
  246 +
209 247 printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
210   -#endif
211 248  
212 249 if (!boot_cpu_has(X86_FEATURE_SEP)) {
213 250 vsyscall = &vsyscall_int80_start;
214 251  
215 252  
216 253  
217 254  
218 255  
219 256  
... ... @@ -226,42 +263,57 @@
226 263 /* Defined in vsyscall-sysenter.S */
227 264 extern void SYSENTER_RETURN;
228 265  
229   -#ifdef __HAVE_ARCH_GATE_AREA
230 266 /* Setup a VMA at program startup for the vsyscall page */
231 267 int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
232 268 {
233 269 struct mm_struct *mm = current->mm;
234 270 unsigned long addr;
235 271 int ret;
  272 + bool compat;
236 273  
237 274 down_write(&mm->mmap_sem);
238   - addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
239   - if (IS_ERR_VALUE(addr)) {
240   - ret = addr;
241   - goto up_fail;
242   - }
243 275  
244   - /*
245   - * MAYWRITE to allow gdb to COW and set breakpoints
246   - *
247   - * Make sure the vDSO gets into every core dump.
248   - * Dumping its contents makes post-mortem fully interpretable later
249   - * without matching up the same kernel and hardware config to see
250   - * what PC values meant.
251   - */
252   - ret = install_special_mapping(mm, addr, PAGE_SIZE,
253   - VM_READ|VM_EXEC|
254   - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
255   - VM_ALWAYSDUMP,
256   - syscall_pages);
257   - if (ret)
258   - goto up_fail;
  276 + /* Test compat mode once here, in case someone
  277 + changes it via sysctl */
  278 + compat = (vdso_enabled == VDSO_COMPAT);
259 279  
  280 + map_compat_vdso(compat);
  281 +
  282 + if (compat)
  283 + addr = VDSO_HIGH_BASE;
  284 + else {
  285 + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
  286 + if (IS_ERR_VALUE(addr)) {
  287 + ret = addr;
  288 + goto up_fail;
  289 + }
  290 +
  291 + /*
  292 + * MAYWRITE to allow gdb to COW and set breakpoints
  293 + *
  294 + * Make sure the vDSO gets into every core dump.
  295 + * Dumping its contents makes post-mortem fully
  296 + * interpretable later without matching up the same
  297 + * kernel and hardware config to see what PC values
  298 + * meant.
  299 + */
  300 + ret = install_special_mapping(mm, addr, PAGE_SIZE,
  301 + VM_READ|VM_EXEC|
  302 + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
  303 + VM_ALWAYSDUMP,
  304 + syscall_pages);
  305 +
  306 + if (ret)
  307 + goto up_fail;
  308 + }
  309 +
260 310 current->mm->context.vdso = (void *)addr;
261 311 current_thread_info()->sysenter_return =
262   - (void *)VDSO_SYM(&SYSENTER_RETURN);
263   -up_fail:
  312 + (void *)VDSO_SYM(&SYSENTER_RETURN);
  313 +
  314 + up_fail:
264 315 up_write(&mm->mmap_sem);
  316 +
265 317 return ret;
266 318 }
267 319  
... ... @@ -274,6 +326,11 @@
274 326  
275 327 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
276 328 {
  329 + struct mm_struct *mm = tsk->mm;
  330 +
  331 + /* Check to see if this task was created in compat vdso mode */
  332 + if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
  333 + return &gate_vma;
277 334 return NULL;
278 335 }
279 336  
... ... @@ -286,18 +343,4 @@
286 343 {
287 344 return 0;
288 345 }
289   -#else /* !__HAVE_ARCH_GATE_AREA */
290   -int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
291   -{
292   - /*
293   - * If not creating userspace VMA, simply set vdso to point to
294   - * fixmap page.
295   - */
296   - current->mm->context.vdso = (void *)VDSO_HIGH_BASE;
297   - current_thread_info()->sysenter_return =
298   - (void *)VDSO_SYM(&SYSENTER_RETURN);
299   -
300   - return 0;
301   -}
302   -#endif /* __HAVE_ARCH_GATE_AREA */
include/asm-i386/page.h
... ... @@ -143,9 +143,7 @@
143 143 #include <asm-generic/memory_model.h>
144 144 #include <asm-generic/page.h>
145 145  
146   -#ifndef CONFIG_COMPAT_VDSO
147 146 #define __HAVE_ARCH_GATE_AREA 1
148   -#endif
149 147 #endif /* __KERNEL__ */
150 148  
151 149 #endif /* _I386_PAGE_H */