Commit 505569d208e61ab14f4b87957be0970ab33eb319

Authored by Linus Torvalds

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
 "Misc fixes: two vdso fixes, two kbuild fixes and a boot failure fix
  with certain odd memory mappings"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, vdso: Use asm volatile in __getcpu
  x86/build: Clean auto-generated processor feature files
  x86: Fix mkcapflags.sh bash-ism
  x86: Fix step size adjustment during initial memory mapping
  x86_64, vdso: Fix the vdso address randomization algorithm

Showing 6 changed files Side-by-side Diff

arch/x86/boot/Makefile
... ... @@ -51,6 +51,7 @@
51 51 $(obj)/cpustr.h: $(obj)/mkcpustr FORCE
52 52 $(call if_changed,cpustr)
53 53 endif
  54 +clean-files += cpustr.h
54 55  
55 56 # ---------------------------------------------------------------------------
56 57  
arch/x86/include/asm/vgtod.h
... ... @@ -80,9 +80,11 @@
80 80  
81 81 /*
82 82 * Load per CPU data from GDT. LSL is faster than RDTSCP and
83   - * works on all CPUs.
  83 + * works on all CPUs. This is volatile so that it orders
  84 + * correctly wrt barrier() and to keep gcc from cleverly
  85 + * hoisting it out of the calling function.
84 86 */
85   - asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
  87 + asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
86 88  
87 89 return p;
88 90 }
arch/x86/kernel/cpu/Makefile
... ... @@ -66,4 +66,5 @@
66 66 $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
67 67 $(call if_changed,mkcapflags)
68 68 endif
  69 +clean-files += capflags.c
arch/x86/kernel/cpu/mkcapflags.sh
... ... @@ -28,7 +28,7 @@
28 28 # If the /* comment */ starts with a quote string, grab that.
29 29 VALUE="$(echo "$i" | sed -n 's@.*/\* *\("[^"]*"\).*\*/@\1@p')"
30 30 [ -z "$VALUE" ] && VALUE="\"$NAME\""
31   - [ "$VALUE" == '""' ] && continue
  31 + [ "$VALUE" = '""' ] && continue
32 32  
33 33 # Name is uppercase, VALUE is all lowercase
34 34 VALUE="$(echo "$VALUE" | tr A-Z a-z)"
... ... @@ -438,20 +438,20 @@
438 438 static unsigned long __init get_new_step_size(unsigned long step_size)
439 439 {
440 440 /*
441   - * Explain why we shift by 5 and why we don't have to worry about
442   - * 'step_size << 5' overflowing:
443   - *
444   - * initial mapped size is PMD_SIZE (2M).
  441 + * Initial mapped size is PMD_SIZE (2M).
445 442 * We can not set step_size to be PUD_SIZE (1G) yet.
446 443 * In worse case, when we cross the 1G boundary, and
447 444 * PG_LEVEL_2M is not set, we will need 1+1+512 pages (2M + 8k)
448   - * to map 1G range with PTE. Use 5 as shift for now.
  445 + * to map 1G range with PTE. Hence we use one less than the
  446 + * difference of page table level shifts.
449 447 *
450   - * Don't need to worry about overflow, on 32bit, when step_size
451   - * is 0, round_down() returns 0 for start, and that turns it
452   - * into 0x100000000ULL.
  448 + * Don't need to worry about overflow in the top-down case, on 32bit,
  449 + * when step_size is 0, round_down() returns 0 for start, and that
  450 + * turns it into 0x100000000ULL.
  451 + * In the bottom-up case, round_up(x, 0) returns 0 though too, which
  452 + * needs to be taken into consideration by the code below.
453 453 */
454   - return step_size << 5;
  454 + return step_size << (PMD_SHIFT - PAGE_SHIFT - 1);
455 455 }
456 456  
457 457 /**
... ... @@ -471,7 +471,6 @@
471 471 unsigned long step_size;
472 472 unsigned long addr;
473 473 unsigned long mapped_ram_size = 0;
474   - unsigned long new_mapped_ram_size;
475 474  
476 475 /* xen has big range in reserved near end of ram, skip it at first.*/
477 476 addr = memblock_find_in_range(map_start, map_end, PMD_SIZE, PMD_SIZE);
478 477  
479 478  
... ... @@ -496,14 +495,12 @@
496 495 start = map_start;
497 496 } else
498 497 start = map_start;
499   - new_mapped_ram_size = init_range_memory_mapping(start,
  498 + mapped_ram_size += init_range_memory_mapping(start,
500 499 last_start);
501 500 last_start = start;
502 501 min_pfn_mapped = last_start >> PAGE_SHIFT;
503   - /* only increase step_size after big range get mapped */
504   - if (new_mapped_ram_size > mapped_ram_size)
  502 + if (mapped_ram_size >= step_size)
505 503 step_size = get_new_step_size(step_size);
506   - mapped_ram_size += new_mapped_ram_size;
507 504 }
508 505  
509 506 if (real_end < map_end)
... ... @@ -524,7 +521,7 @@
524 521 static void __init memory_map_bottom_up(unsigned long map_start,
525 522 unsigned long map_end)
526 523 {
527   - unsigned long next, new_mapped_ram_size, start;
  524 + unsigned long next, start;
528 525 unsigned long mapped_ram_size = 0;
529 526 /* step_size need to be small so pgt_buf from BRK could cover it */
530 527 unsigned long step_size = PMD_SIZE;
531 528  
532 529  
533 530  
534 531  
535 532  
... ... @@ -539,19 +536,19 @@
539 536 * for page table.
540 537 */
541 538 while (start < map_end) {
542   - if (map_end - start > step_size) {
  539 + if (step_size && map_end - start > step_size) {
543 540 next = round_up(start + 1, step_size);
544 541 if (next > map_end)
545 542 next = map_end;
546   - } else
  543 + } else {
547 544 next = map_end;
  545 + }
548 546  
549   - new_mapped_ram_size = init_range_memory_mapping(start, next);
  547 + mapped_ram_size += init_range_memory_mapping(start, next);
550 548 start = next;
551 549  
552   - if (new_mapped_ram_size > mapped_ram_size)
  550 + if (mapped_ram_size >= step_size)
553 551 step_size = get_new_step_size(step_size);
554   - mapped_ram_size += new_mapped_ram_size;
555 552 }
556 553 }
557 554  
... ... @@ -41,12 +41,17 @@
41 41  
42 42 struct linux_binprm;
43 43  
44   -/* Put the vdso above the (randomized) stack with another randomized offset.
45   - This way there is no hole in the middle of address space.
46   - To save memory make sure it is still in the same PTE as the stack top.
47   - This doesn't give that many random bits.
48   -
49   - Only used for the 64-bit and x32 vdsos. */
  44 +/*
  45 + * Put the vdso above the (randomized) stack with another randomized
  46 + * offset. This way there is no hole in the middle of address space.
  47 + * To save memory make sure it is still in the same PTE as the stack
  48 + * top. This doesn't give that many random bits.
  49 + *
  50 + * Note that this algorithm is imperfect: the distribution of the vdso
  51 + * start address within a PMD is biased toward the end.
  52 + *
  53 + * Only used for the 64-bit and x32 vdsos.
  54 + */
50 55 static unsigned long vdso_addr(unsigned long start, unsigned len)
51 56 {
52 57 #ifdef CONFIG_X86_32
53 58  
54 59  
55 60  
56 61  
... ... @@ -54,22 +59,30 @@
54 59 #else
55 60 unsigned long addr, end;
56 61 unsigned offset;
57   - end = (start + PMD_SIZE - 1) & PMD_MASK;
  62 +
  63 + /*
  64 + * Round up the start address. It can start out unaligned as a result
  65 + * of stack start randomization.
  66 + */
  67 + start = PAGE_ALIGN(start);
  68 +
  69 + /* Round the lowest possible end address up to a PMD boundary. */
  70 + end = (start + len + PMD_SIZE - 1) & PMD_MASK;
58 71 if (end >= TASK_SIZE_MAX)
59 72 end = TASK_SIZE_MAX;
60 73 end -= len;
61   - /* This loses some more bits than a modulo, but is cheaper */
62   - offset = get_random_int() & (PTRS_PER_PTE - 1);
63   - addr = start + (offset << PAGE_SHIFT);
64   - if (addr >= end)
65   - addr = end;
66 74  
  75 + if (end > start) {
  76 + offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
  77 + addr = start + (offset << PAGE_SHIFT);
  78 + } else {
  79 + addr = start;
  80 + }
  81 +
67 82 /*
68   - * page-align it here so that get_unmapped_area doesn't
69   - * align it wrongfully again to the next page. addr can come in 4K
70   - * unaligned here as a result of stack start randomization.
  83 + * Forcibly align the final address in case we have a hardware
  84 + * issue that requires alignment for performance reasons.
71 85 */
72   - addr = PAGE_ALIGN(addr);
73 86 addr = align_vdso_addr(addr);
74 87  
75 88 return addr;