Commit fb1ae635772d679eb312fa447290fc02cd0e4cf1

Authored by Linus Torvalds

Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/…

…git/x86/linux-2.6-tip

* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-tip:
  x86: Fix double enable_IR_x2apic() call on SMP kernel on !SMP boards
  x86: Increase CONFIG_NODES_SHIFT max to 10
  ibft, x86: Change reserve_ibft_region() to find_ibft_region()
  x86, hpet: Fix bug in RTC emulation
  x86, hpet: Erratum workaround for read after write of HPET comparator
  bootmem, x86: Fix 32bit numa system without RAM on node 0
  nobootmem, x86: Fix 32bit numa system without RAM on node 0
  x86: Handle overlapping mptables
  x86: Make e820_remove_range to handle all covered case
  x86-32, resume: do a global tlb flush in S4 resume

Showing 10 changed files Side-by-side Diff

... ... @@ -1216,8 +1216,8 @@
1216 1216  
1217 1217 config NODES_SHIFT
1218 1218 int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
1219   - range 1 9
1220   - default "9" if MAXSMP
  1219 + range 1 10
  1220 + default "10" if MAXSMP
1221 1221 default "6" if X86_64
1222 1222 default "4" if X86_NUMAQ
1223 1223 default "3"
arch/x86/kernel/apic/apic.c
... ... @@ -1640,8 +1640,10 @@
1640 1640 }
1641 1641 #endif
1642 1642  
  1643 +#ifndef CONFIG_SMP
1643 1644 enable_IR_x2apic();
1644 1645 default_setup_apic_routing();
  1646 +#endif
1645 1647  
1646 1648 verify_local_APIC();
1647 1649 connect_bsp_APIC();
arch/x86/kernel/e820.c
... ... @@ -519,29 +519,45 @@
519 519 printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
520 520 (unsigned long long) start,
521 521 (unsigned long long) end);
522   - e820_print_type(old_type);
  522 + if (checktype)
  523 + e820_print_type(old_type);
523 524 printk(KERN_CONT "\n");
524 525  
525 526 for (i = 0; i < e820.nr_map; i++) {
526 527 struct e820entry *ei = &e820.map[i];
527 528 u64 final_start, final_end;
  529 + u64 ei_end;
528 530  
529 531 if (checktype && ei->type != old_type)
530 532 continue;
  533 +
  534 + ei_end = ei->addr + ei->size;
531 535 /* totally covered? */
532   - if (ei->addr >= start &&
533   - (ei->addr + ei->size) <= (start + size)) {
  536 + if (ei->addr >= start && ei_end <= end) {
534 537 real_removed_size += ei->size;
535 538 memset(ei, 0, sizeof(struct e820entry));
536 539 continue;
537 540 }
  541 +
  542 + /* new range is totally covered? */
  543 + if (ei->addr < start && ei_end > end) {
  544 + e820_add_region(end, ei_end - end, ei->type);
  545 + ei->size = start - ei->addr;
  546 + real_removed_size += size;
  547 + continue;
  548 + }
  549 +
538 550 /* partially covered */
539 551 final_start = max(start, ei->addr);
540   - final_end = min(start + size, ei->addr + ei->size);
  552 + final_end = min(end, ei_end);
541 553 if (final_start >= final_end)
542 554 continue;
543 555 real_removed_size += final_end - final_start;
544 556  
  557 + /*
  558 + * left range could be head or tail, so need to update
  559 + * size at first.
  560 + */
545 561 ei->size -= final_end - final_start;
546 562 if (ei->addr < final_start)
547 563 continue;
arch/x86/kernel/hpet.c
... ... @@ -400,9 +400,15 @@
400 400 * then we might have a real hardware problem. We can not do
401 401 * much about it here, but at least alert the user/admin with
402 402 * a prominent warning.
  403 + * An erratum on some chipsets (ICH9,..), results in comparator read
  404 + * immediately following a write returning old value. Workaround
  405 + * for this is to read this value second time, when first
  406 + * read returns old value.
403 407 */
404   - WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt,
  408 + if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) {
  409 + WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt,
405 410 KERN_WARNING "hpet: compare register read back failed.\n");
  411 + }
406 412  
407 413 return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
408 414 }
... ... @@ -1144,6 +1150,7 @@
1144 1150 do_div(clc, freq);
1145 1151 clc >>= hpet_clockevent.shift;
1146 1152 hpet_pie_delta = clc;
  1153 + hpet_pie_limit = 0;
1147 1154 }
1148 1155 return 1;
1149 1156 }
arch/x86/kernel/mpparse.c
... ... @@ -664,7 +664,7 @@
664 664 {
665 665 unsigned long size = get_mpc_size(mpf->physptr);
666 666  
667   - reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc");
  667 + reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc");
668 668 }
669 669  
670 670 static int __init smp_scan_config(unsigned long base, unsigned long length)
... ... @@ -693,7 +693,7 @@
693 693 mpf, (u64)virt_to_phys(mpf));
694 694  
695 695 mem = virt_to_phys(mpf);
696   - reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf");
  696 + reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf");
697 697 if (mpf->physptr)
698 698 smp_reserve_memory(mpf);
699 699  
arch/x86/kernel/setup.c
... ... @@ -607,6 +607,16 @@
607 607 early_param("elfcorehdr", setup_elfcorehdr);
608 608 #endif
609 609  
  610 +static __init void reserve_ibft_region(void)
  611 +{
  612 + unsigned long addr, size = 0;
  613 +
  614 + addr = find_ibft_region(&size);
  615 +
  616 + if (size)
  617 + reserve_early_overlap_ok(addr, addr + size, "ibft");
  618 +}
  619 +
610 620 #ifdef CONFIG_X86_RESERVE_LOW_64K
611 621 static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
612 622 {
... ... @@ -909,6 +919,8 @@
909 919 */
910 920 find_smp_config();
911 921  
  922 + reserve_ibft_region();
  923 +
912 924 reserve_trampoline_memory();
913 925  
914 926 #ifdef CONFIG_ACPI_SLEEP
... ... @@ -975,8 +987,6 @@
975 987 #endif
976 988  
977 989 dma32_reserve_bootmem();
978   -
979   - reserve_ibft_region();
980 990  
981 991 #ifdef CONFIG_KVM_CLOCK
982 992 kvmclock_init();
arch/x86/power/hibernate_asm_32.S
... ... @@ -27,10 +27,17 @@
27 27 ret
28 28  
29 29 ENTRY(restore_image)
  30 + movl mmu_cr4_features, %ecx
30 31 movl resume_pg_dir, %eax
31 32 subl $__PAGE_OFFSET, %eax
32 33 movl %eax, %cr3
33 34  
  35 + jecxz 1f # cr4 Pentium and higher, skip if zero
  36 + andl $~(X86_CR4_PGE), %ecx
  37 + movl %ecx, %cr4; # turn off PGE
  38 + movl %cr3, %eax; # flush TLB
  39 + movl %eax, %cr3
  40 +1:
34 41 movl restore_pblist, %edx
35 42 .p2align 4,,7
36 43  
37 44  
... ... @@ -54,15 +61,7 @@
54 61 movl $swapper_pg_dir, %eax
55 62 subl $__PAGE_OFFSET, %eax
56 63 movl %eax, %cr3
57   - /* Flush TLB, including "global" things (vmalloc) */
58 64 movl mmu_cr4_features, %ecx
59   - jecxz 1f # cr4 Pentium and higher, skip if zero
60   - movl %ecx, %edx
61   - andl $~(X86_CR4_PGE), %edx
62   - movl %edx, %cr4; # turn off PGE
63   -1:
64   - movl %cr3, %eax; # flush TLB
65   - movl %eax, %cr3
66 65 jecxz 1f # cr4 Pentium and higher, skip if zero
67 66 movl %ecx, %cr4; # turn PGE back on
68 67 1:
drivers/firmware/iscsi_ibft_find.c
... ... @@ -51,7 +51,7 @@
51 51 * Routine used to find the iSCSI Boot Format Table. The logical
52 52 * kernel address is set in the ibft_addr global variable.
53 53 */
54   -void __init reserve_ibft_region(void)
  54 +unsigned long __init find_ibft_region(unsigned long *sizep)
55 55 {
56 56 unsigned long pos;
57 57 unsigned int len = 0;
... ... @@ -77,7 +77,12 @@
77 77 }
78 78 }
79 79 }
80   - if (ibft_addr)
81   - reserve_bootmem(pos, PAGE_ALIGN(len), BOOTMEM_DEFAULT);
  80 + if (ibft_addr) {
  81 + *sizep = PAGE_ALIGN(len);
  82 + return pos;
  83 + }
  84 +
  85 + *sizep = 0;
  86 + return 0;
82 87 }
include/linux/iscsi_ibft.h
... ... @@ -42,9 +42,13 @@
42 42 * mapped address is set in the ibft_addr variable.
43 43 */
44 44 #ifdef CONFIG_ISCSI_IBFT_FIND
45   -extern void __init reserve_ibft_region(void);
  45 +unsigned long find_ibft_region(unsigned long *sizep);
46 46 #else
47   -static inline void reserve_ibft_region(void) { }
  47 +static inline unsigned long find_ibft_region(unsigned long *sizep)
  48 +{
  49 + *sizep = 0;
  50 + return 0;
  51 +}
48 52 #endif
49 53  
50 54 #endif /* ISCSI_IBFT_H */
... ... @@ -304,9 +304,22 @@
304 304 unsigned long __init free_all_bootmem(void)
305 305 {
306 306 #ifdef CONFIG_NO_BOOTMEM
307   - return free_all_memory_core_early(NODE_DATA(0)->node_id);
  307 + /*
  308 + * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
  309 + * because in some case like Node0 doesnt have RAM installed
  310 + * low ram will be on Node1
  311 + * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
  312 + * will be used instead of only Node0 related
  313 + */
  314 + return free_all_memory_core_early(MAX_NUMNODES);
308 315 #else
309   - return free_all_bootmem_core(NODE_DATA(0)->bdata);
  316 + unsigned long total_pages = 0;
  317 + bootmem_data_t *bdata;
  318 +
  319 + list_for_each_entry(bdata, &bdata_list, list)
  320 + total_pages += free_all_bootmem_core(bdata);
  321 +
  322 + return total_pages;
310 323 #endif
311 324 }
312 325