Commit c4211f42d3e66875298a5e26a75109878c80f15b
Committed by
H. Peter Anvin
1 parent
d8dfe60d6d
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
x86/tlb: add tlb_flushall_shift for specific CPU
Testing show different CPU type(micro architectures and NUMA mode) has different balance points between the TLB flush all and multiple invlpg. And there also has cases the tlb flush change has no any help. This patch give a interface to let x86 vendor developers have a chance to set different shift for different CPU type. like some machine in my hands, balance points is 16 entries on Romely-EP; while it is at 8 entries on Bloomfield NHM-EP; and is 256 on IVB mobile CPU. but on model 15 core2 Xeon using invlpg has nothing help. For untested machine, do a conservative optimization, same as NHM CPU. Signed-off-by: Alex Shi <alex.shi@intel.com> Link: http://lkml.kernel.org/r/1340845344-27557-5-git-send-email-alex.shi@intel.com Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Showing 5 changed files with 53 additions and 7 deletions Side-by-side Diff
arch/x86/include/asm/processor.h
... | ... | @@ -72,6 +72,8 @@ |
72 | 72 | extern u16 __read_mostly tlb_lld_4k[NR_INFO]; |
73 | 73 | extern u16 __read_mostly tlb_lld_2m[NR_INFO]; |
74 | 74 | extern u16 __read_mostly tlb_lld_4m[NR_INFO]; |
75 | +extern s8 __read_mostly tlb_flushall_shift; | |
76 | + | |
75 | 77 | /* |
76 | 78 | * CPU type and hardware bug flags. Kept separately for each CPU. |
77 | 79 | * Members of this structure are referenced in head.S, so think twice |
arch/x86/kernel/cpu/common.c
... | ... | @@ -459,16 +459,26 @@ |
459 | 459 | u16 __read_mostly tlb_lld_2m[NR_INFO]; |
460 | 460 | u16 __read_mostly tlb_lld_4m[NR_INFO]; |
461 | 461 | |
462 | +/* | |
463 | + * tlb_flushall_shift shows the balance point in replacing cr3 write | |
464 | + * with multiple 'invlpg'. It will do this replacement when | |
465 | + * flush_tlb_lines <= active_lines/2^tlb_flushall_shift. | |
466 | + * If tlb_flushall_shift is -1, means the replacement will be disabled. | |
467 | + */ | |
468 | +s8 __read_mostly tlb_flushall_shift = -1; | |
469 | + | |
462 | 470 | void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) |
463 | 471 | { |
464 | 472 | if (this_cpu->c_detect_tlb) |
465 | 473 | this_cpu->c_detect_tlb(c); |
466 | 474 | |
467 | 475 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ |
468 | - "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n", | |
476 | + "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | |
477 | + "tlb_flushall_shift is 0x%x\n", | |
469 | 478 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], |
470 | 479 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], |
471 | - tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES]); | |
480 | + tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], | |
481 | + tlb_flushall_shift); | |
472 | 482 | } |
473 | 483 | |
474 | 484 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
arch/x86/kernel/cpu/intel.c
... | ... | @@ -610,6 +610,39 @@ |
610 | 610 | } |
611 | 611 | } |
612 | 612 | |
613 | +static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) | |
614 | +{ | |
615 | + if (!cpu_has_invlpg) { | |
616 | + tlb_flushall_shift = -1; | |
617 | + return; | |
618 | + } | |
619 | + switch ((c->x86 << 8) + c->x86_model) { | |
620 | + case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | |
621 | + case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | |
622 | + case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | |
623 | + case 0x61d: /* six-core 45 nm xeon "Dunnington" */ | |
624 | + tlb_flushall_shift = -1; | |
625 | + break; | |
626 | + case 0x61a: /* 45 nm nehalem, "Bloomfield" */ | |
627 | + case 0x61e: /* 45 nm nehalem, "Lynnfield" */ | |
628 | + case 0x625: /* 32 nm nehalem, "Clarkdale" */ | |
629 | + case 0x62c: /* 32 nm nehalem, "Gulftown" */ | |
630 | + case 0x62e: /* 45 nm nehalem-ex, "Beckton" */ | |
631 | + case 0x62f: /* 32 nm Xeon E7 */ | |
632 | + tlb_flushall_shift = 6; | |
633 | + break; | |
634 | + case 0x62a: /* SandyBridge */ | |
635 | + case 0x62d: /* SandyBridge, "Romely-EP" */ | |
636 | + tlb_flushall_shift = 5; | |
637 | + break; | |
638 | + case 0x63a: /* Ivybridge */ | |
639 | + tlb_flushall_shift = 1; | |
640 | + break; | |
641 | + default: | |
642 | + tlb_flushall_shift = 6; | |
643 | + } | |
644 | +} | |
645 | + | |
613 | 646 | static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) |
614 | 647 | { |
615 | 648 | int i, j, n; |
... | ... | @@ -630,6 +663,7 @@ |
630 | 663 | for (j = 1 ; j < 16 ; j++) |
631 | 664 | intel_tlb_lookup(desc[j]); |
632 | 665 | } |
666 | + intel_tlb_flushall_shift_set(c); | |
633 | 667 | } |
634 | 668 | |
635 | 669 | static const struct cpu_dev __cpuinitconst intel_cpu_dev = { |
arch/x86/mm/tlb.c
... | ... | @@ -316,8 +316,6 @@ |
316 | 316 | preempt_enable(); |
317 | 317 | } |
318 | 318 | |
319 | -#define FLUSHALL_BAR 16 | |
320 | - | |
321 | 319 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
322 | 320 | static inline unsigned long has_large_page(struct mm_struct *mm, |
323 | 321 | unsigned long start, unsigned long end) |
... | ... | @@ -352,7 +350,7 @@ |
352 | 350 | { |
353 | 351 | struct mm_struct *mm; |
354 | 352 | |
355 | - if (!cpu_has_invlpg || vma->vm_flags & VM_HUGETLB) { | |
353 | + if (vma->vm_flags & VM_HUGETLB || tlb_flushall_shift == -1) { | |
356 | 354 | flush_all: |
357 | 355 | flush_tlb_mm(vma->vm_mm); |
358 | 356 | return; |
... | ... | @@ -373,7 +371,8 @@ |
373 | 371 | act_entries = tlb_entries > mm->total_vm ? |
374 | 372 | mm->total_vm : tlb_entries; |
375 | 373 | |
376 | - if ((end - start)/PAGE_SIZE > act_entries/FLUSHALL_BAR) | |
374 | + if ((end - start) >> PAGE_SHIFT > | |
375 | + act_entries >> tlb_flushall_shift) | |
377 | 376 | local_flush_tlb(); |
378 | 377 | else { |
379 | 378 | if (has_large_page(mm, start, end)) { |
include/asm-generic/tlb.h
... | ... | @@ -113,7 +113,8 @@ |
113 | 113 | |
114 | 114 | void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm); |
115 | 115 | void tlb_flush_mmu(struct mmu_gather *tlb); |
116 | -void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end); | |
116 | +void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, | |
117 | + unsigned long end); | |
117 | 118 | int __tlb_remove_page(struct mmu_gather *tlb, struct page *page); |
118 | 119 | |
119 | 120 | /* tlb_remove_page |