Commit 12031a624af7816ec7660b82be648aa3703b4ebe

Authored by Yinghai Lu
Committed by Thomas Gleixner
1 parent f5098d62c1

x86: mtrr cleanup for converting continuous to discrete - auto detect v4

Loop through mtrr chunk_size and gran_size from 1M to 2G to find out
the optimal value so user does not need to add mtrr_chunk_size and
mtrr_gran_size to the kernel command line.

If optimal value is not found, print out all list to help select less
optimal value.

Add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card
need more entries.

v2: find the one with more spare entries
v3: fix hole_basek offset
v4: tight the compare between range and range_new
    loop stop with 4g

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Gabriel C <nix.or.die@googlemail.com>
Cc: Mika Fischer <mika.fischer@zoopnet.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 3 changed files with 484 additions and 148 deletions Side-by-side Diff

Documentation/kernel-parameters.txt
... ... @@ -610,8 +610,17 @@
610 610 that could hold holes aka. UC entries.
611 611  
612 612 mtrr_gran_size=nn[KMG] [X86]
613   - used for mtrr cleanup. It is granity of mtrr block.
614   - Big value could prevent small alignment use up MTRRs.
  613 + Used for mtrr cleanup. It is granularity of mtrr block.
  614 + Default is 1.
  615 + Large value could prevent small alignment from
  616 + using up MTRRs.
  617 +
  618 + mtrr_spare_reg_nr=n [X86]
  619 + Format: <integer>
  620 + Range: 0,7 : spare reg number
  621 + Default : 1
  622 + Used for mtrr cleanup. It is spare mtrr entries number.
  623 + Set to 2 or more if your graphical card needs more.
615 624  
616 625 disable_mtrr_trim [X86, Intel and AMD only]
617 626 By default the kernel will trim any uncacheable
... ... @@ -1117,6 +1117,15 @@
1117 1117 help
1118 1118 Enable mtrr cleanup default value
1119 1119  
  1120 +config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
  1121 + int "MTRR cleanup spare reg num (0-7)"
  1122 + range 0 7
  1123 + default "1"
  1124 + depends on MTRR_SANITIZER
  1125 + help
  1126 + mtrr cleanup spare entries default, it can be changed via
  1127 + mtrr_spare_reg_nr=
  1128 +
1120 1129 config X86_PAT
1121 1130 bool
1122 1131 prompt "x86 PAT support"
arch/x86/kernel/cpu/mtrr/main.c
... ... @@ -610,28 +610,6 @@
610 610 .resume = mtrr_restore,
611 611 };
612 612  
613   -#ifdef CONFIG_MTRR_SANITIZER
614   -static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
615   -#else
616   -static int enable_mtrr_cleanup __initdata = -1;
617   -#endif
618   -
619   -static int __init disable_mtrr_cleanup_setup(char *str)
620   -{
621   - if (enable_mtrr_cleanup != -1)
622   - enable_mtrr_cleanup = 0;
623   - return 0;
624   -}
625   -early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
626   -
627   -static int __init enable_mtrr_cleanup_setup(char *str)
628   -{
629   - if (enable_mtrr_cleanup != -1)
630   - enable_mtrr_cleanup = 1;
631   - return 0;
632   -}
633   -early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
634   -
635 613 /* should be related to MTRR_VAR_RANGES nums */
636 614 #define RANGE_NUM 256
637 615  
638 616  
... ... @@ -702,13 +680,15 @@
702 680 continue;
703 681 }
704 682  
705   - if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
  683 + if (start <= range[j].start && end < range[j].end &&
  684 + range[j].start < end + 1) {
706 685 range[j].start = end + 1;
707 686 continue;
708 687 }
709 688  
710 689  
711   - if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
  690 + if (start > range[j].start && end >= range[j].end &&
  691 + range[j].end > start - 1) {
712 692 range[j].end = start - 1;
713 693 continue;
714 694 }
715 695  
716 696  
... ... @@ -743,18 +723,123 @@
743 723 return start1 - start2;
744 724 }
745 725  
  726 +struct var_mtrr_range_state {
  727 + unsigned long base_pfn;
  728 + unsigned long size_pfn;
  729 + mtrr_type type;
  730 +};
  731 +
  732 +struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
  733 +
  734 +static int __init
  735 +x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
  736 + unsigned long extra_remove_base,
  737 + unsigned long extra_remove_size)
  738 +{
  739 + unsigned long i, base, size;
  740 + mtrr_type type;
  741 +
  742 + for (i = 0; i < num_var_ranges; i++) {
  743 + type = range_state[i].type;
  744 + if (type != MTRR_TYPE_WRBACK)
  745 + continue;
  746 + base = range_state[i].base_pfn;
  747 + size = range_state[i].size_pfn;
  748 + nr_range = add_range_with_merge(range, nr_range, base,
  749 + base + size - 1);
  750 + }
  751 + printk(KERN_DEBUG "After WB checking\n");
  752 + for (i = 0; i < nr_range; i++)
  753 + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
  754 + range[i].start, range[i].end + 1);
  755 +
  756 + /* take out UC ranges */
  757 + for (i = 0; i < num_var_ranges; i++) {
  758 + type = range_state[i].type;
  759 + if (type != MTRR_TYPE_UNCACHABLE)
  760 + continue;
  761 + size = range_state[i].size_pfn;
  762 + if (!size)
  763 + continue;
  764 + base = range_state[i].base_pfn;
  765 + subtract_range(range, base, base + size - 1);
  766 + }
  767 + if (extra_remove_size)
  768 + subtract_range(range, extra_remove_base,
  769 + extra_remove_base + extra_remove_size - 1);
  770 +
  771 + /* get new range num */
  772 + nr_range = 0;
  773 + for (i = 0; i < RANGE_NUM; i++) {
  774 + if (!range[i].end)
  775 + continue;
  776 + nr_range++;
  777 + }
  778 + printk(KERN_DEBUG "After UC checking\n");
  779 + for (i = 0; i < nr_range; i++)
  780 + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
  781 + range[i].start, range[i].end + 1);
  782 +
  783 + /* sort the ranges */
  784 + sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
  785 + printk(KERN_DEBUG "After sorting\n");
  786 + for (i = 0; i < nr_range; i++)
  787 + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
  788 + range[i].start, range[i].end + 1);
  789 +
  790 + /* clear those is not used */
  791 + for (i = nr_range; i < RANGE_NUM; i++)
  792 + memset(&range[i], 0, sizeof(range[i]));
  793 +
  794 + return nr_range;
  795 +}
  796 +
  797 +static struct res_range __initdata range[RANGE_NUM];
  798 +
  799 +#ifdef CONFIG_MTRR_SANITIZER
  800 +
  801 +static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
  802 +{
  803 + unsigned long sum;
  804 + int i;
  805 +
  806 + sum = 0;
  807 + for (i = 0; i < nr_range; i++)
  808 + sum += range[i].end + 1 - range[i].start;
  809 +
  810 + return sum;
  811 +}
  812 +
  813 +static int enable_mtrr_cleanup __initdata =
  814 + CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
  815 +
  816 +static int __init disable_mtrr_cleanup_setup(char *str)
  817 +{
  818 + if (enable_mtrr_cleanup != -1)
  819 + enable_mtrr_cleanup = 0;
  820 + return 0;
  821 +}
  822 +early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
  823 +
  824 +static int __init enable_mtrr_cleanup_setup(char *str)
  825 +{
  826 + if (enable_mtrr_cleanup != -1)
  827 + enable_mtrr_cleanup = 1;
  828 + return 0;
  829 +}
  830 +early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
  831 +
746 832 struct var_mtrr_state {
747 833 unsigned long range_startk;
748 834 unsigned long range_sizek;
749 835 unsigned long chunk_sizek;
750 836 unsigned long gran_sizek;
751 837 unsigned int reg;
752   - unsigned int address_bits;
753 838 };
754 839  
755 840 static void __init
756 841 set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
757   - unsigned char type, unsigned address_bits)
  842 + unsigned char type, unsigned int address_bits)
758 843 {
759 844 u32 base_lo, base_hi, mask_lo, mask_hi;
760 845 u64 base, mask;
761 846  
... ... @@ -781,10 +866,34 @@
781 866 fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
782 867 }
783 868  
  869 +static void __init
  870 +save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
  871 + unsigned char type)
  872 +{
  873 + range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
  874 + range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
  875 + range_state[reg].type = type;
  876 +}
  877 +
  878 +static void __init
  879 +set_var_mtrr_all(unsigned int address_bits)
  880 +{
  881 + unsigned long basek, sizek;
  882 + unsigned char type;
  883 + unsigned int reg;
  884 +
  885 + for (reg = 0; reg < num_var_ranges; reg++) {
  886 + basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
  887 + sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
  888 + type = range_state[reg].type;
  889 +
  890 + set_var_mtrr(reg, basek, sizek, type, address_bits);
  891 + }
  892 +}
  893 +
784 894 static unsigned int __init
785 895 range_to_mtrr(unsigned int reg, unsigned long range_startk,
786   - unsigned long range_sizek, unsigned char type,
787   - unsigned address_bits)
  896 + unsigned long range_sizek, unsigned char type)
788 897 {
789 898 if (!range_sizek || (reg >= num_var_ranges))
790 899 return reg;
791 900  
... ... @@ -803,12 +912,13 @@
803 912 align = max_align;
804 913  
805 914 sizek = 1 << align;
806   - printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
  915 + printk(KERN_DEBUG "Setting variable MTRR %d, base: %ldMB, "
  916 + "range: %ldMB, type %s\n",
807 917 reg, range_startk >> 10, sizek >> 10,
808 918 (type == MTRR_TYPE_UNCACHABLE)?"UC":
809 919 ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
810 920 );
811   - set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
  921 + save_var_mtrr(reg++, range_startk, sizek, type);
812 922 range_startk += sizek;
813 923 range_sizek -= sizek;
814 924 if (reg >= num_var_ranges)
815 925  
... ... @@ -817,10 +927,12 @@
817 927 return reg;
818 928 }
819 929  
820   -static void __init
821   -range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
  930 +static unsigned __init
  931 +range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
  932 + unsigned long sizek)
822 933 {
823 934 unsigned long hole_basek, hole_sizek;
  935 + unsigned long second_basek, second_sizek;
824 936 unsigned long range0_basek, range0_sizek;
825 937 unsigned long range_basek, range_sizek;
826 938 unsigned long chunk_sizek;
827 939  
828 940  
829 941  
830 942  
831 943  
832 944  
833 945  
834 946  
835 947  
836 948  
837 949  
838 950  
... ... @@ -828,64 +940,95 @@
828 940  
829 941 hole_basek = 0;
830 942 hole_sizek = 0;
  943 + second_basek = 0;
  944 + second_sizek = 0;
831 945 chunk_sizek = state->chunk_sizek;
832 946 gran_sizek = state->gran_sizek;
833 947  
834 948 /* align with gran size, prevent small block used up MTRRs */
835 949 range_basek = ALIGN(state->range_startk, gran_sizek);
836 950 if ((range_basek > basek) && basek)
837   - return;
838   - range_sizek = ALIGN(state->range_sizek - (range_basek - state->range_startk), gran_sizek);
  951 + return second_sizek;
  952 + state->range_sizek -= (range_basek - state->range_startk);
  953 + range_sizek = ALIGN(state->range_sizek, gran_sizek);
839 954  
840   - while (range_basek + range_sizek > (state->range_startk + state->range_sizek)) {
  955 + while (range_sizek > state->range_sizek) {
841 956 range_sizek -= gran_sizek;
842 957 if (!range_sizek)
843   - return;
  958 + return 0;
844 959 }
845   - state->range_startk = range_basek;
846 960 state->range_sizek = range_sizek;
847 961  
848 962 /* try to append some small hole */
849 963 range0_basek = state->range_startk;
850 964 range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
851 965 if (range0_sizek == state->range_sizek) {
852   - printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10);
853   - state->reg = range_to_mtrr(state->reg, range0_basek,
854   - state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
855   - return;
856   - } else if (basek) {
857   - while (range0_basek + range0_sizek - chunk_sizek > basek) {
  966 + printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", range0_basek<<10,
  967 + (range0_basek + state->range_sizek)<<10);
  968 + state->reg = range_to_mtrr(state->reg, range0_basek,
  969 + state->range_sizek, MTRR_TYPE_WRBACK);
  970 + return 0;
  971 + }
  972 +
  973 + range0_sizek -= chunk_sizek;
  974 + if (range0_sizek && sizek) {
  975 + while (range0_basek + range0_sizek > (basek + sizek)) {
858 976 range0_sizek -= chunk_sizek;
859 977 if (!range0_sizek)
860 978 break;
861 979 }
862 980 }
863 981  
  982 + if (range0_sizek) {
  983 + printk(KERN_DEBUG "range0: %016lx - %016lx\n", range0_basek<<10,
  984 + (range0_basek + range0_sizek)<<10);
  985 + state->reg = range_to_mtrr(state->reg, range0_basek,
  986 + range0_sizek, MTRR_TYPE_WRBACK);
864 987  
865   - if (range0_sizek > chunk_sizek)
866   - range0_sizek -= chunk_sizek;
867   - printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
868   - state->reg = range_to_mtrr(state->reg, range0_basek,
869   - range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
  988 + }
870 989  
871 990 range_basek = range0_basek + range0_sizek;
872 991 range_sizek = chunk_sizek;
873 992  
874   - if ((range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) &&
875   - (range_basek + range_sizek <= basek)) {
876   - hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
877   - hole_basek = range_basek + range_sizek - hole_sizek;
878   - } else
  993 + if (range_basek + range_sizek > basek &&
  994 + range_basek + range_sizek <= (basek + sizek)) {
  995 + /* one hole */
  996 + second_basek = basek;
  997 + second_sizek = range_basek + range_sizek - basek;
  998 + }
  999 +
  1000 + /* if last piece, only could one hole near end */
  1001 + if ((second_basek || !basek) &&
  1002 + range_sizek - (state->range_sizek - range0_sizek) - second_sizek <
  1003 + (chunk_sizek >> 1)) {
  1004 + /*
  1005 + * one hole in middle (second_sizek is 0) or at end
  1006 + * (second_sizek is 0 )
  1007 + */
  1008 + hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
  1009 + - second_sizek;
  1010 + hole_basek = range_basek + range_sizek - hole_sizek
  1011 + - second_sizek;
  1012 + } else {
  1013 + /* fallback for big hole, or several holes */
879 1014 range_sizek = state->range_sizek - range0_sizek;
  1015 + second_basek = 0;
  1016 + second_sizek = 0;
  1017 + }
880 1018  
881   - printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
882   - state->reg = range_to_mtrr(state->reg, range_basek,
883   - range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
  1019 + printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
  1020 + (range_basek + range_sizek)<<10);
  1021 + state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
  1022 + MTRR_TYPE_WRBACK);
884 1023 if (hole_sizek) {
885   - printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
886   - state->reg = range_to_mtrr(state->reg, hole_basek,
887   - hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits);
  1024 + printk(KERN_DEBUG "hole: %016lx - %016lx\n", hole_basek<<10,
  1025 + (hole_basek + hole_sizek)<<10);
  1026 + state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
  1027 + MTRR_TYPE_UNCACHABLE);
  1028 +
888 1029 }
  1030 +
  1031 + return second_sizek;
889 1032 }
890 1033  
891 1034 static void __init
... ... @@ -893,6 +1036,7 @@
893 1036 unsigned long size_pfn)
894 1037 {
895 1038 unsigned long basek, sizek;
  1039 + unsigned long second_sizek = 0;
896 1040  
897 1041 if (state->reg >= num_var_ranges)
898 1042 return;
899 1043  
900 1044  
901 1045  
... ... @@ -901,21 +1045,19 @@
901 1045 sizek = size_pfn << (PAGE_SHIFT - 10);
902 1046  
903 1047 /* See if I can merge with the last range */
904   - if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
  1048 + if ((basek <= 1024) ||
  1049 + (state->range_startk + state->range_sizek == basek)) {
905 1050 unsigned long endk = basek + sizek;
906 1051 state->range_sizek = endk - state->range_startk;
907 1052 return;
908 1053 }
909 1054 /* Write the range mtrrs */
910   - if (state->range_sizek != 0) {
911   - range_to_mtrr_with_hole(state, basek);
  1055 + if (state->range_sizek != 0)
  1056 + second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
912 1057  
913   - state->range_startk = 0;
914   - state->range_sizek = 0;
915   - }
916 1058 /* Allocate an msr */
917   - state->range_startk = basek;
918   - state->range_sizek = sizek;
  1059 + state->range_startk = basek + second_sizek;
  1060 + state->range_sizek = sizek - second_sizek;
919 1061 }
920 1062  
921 1063 /* mininum size of mtrr block that can take hole */
... ... @@ -931,7 +1073,7 @@
931 1073 early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
932 1074  
933 1075 /* granity of mtrr of block */
934   -static u64 mtrr_gran_size __initdata = (1ULL<<20);
  1076 +static u64 mtrr_gran_size __initdata;
935 1077  
936 1078 static int __init parse_mtrr_gran_size_opt(char *p)
937 1079 {
938 1080  
939 1081  
940 1082  
941 1083  
942 1084  
943 1085  
944 1086  
945 1087  
946 1088  
947 1089  
948 1090  
949 1091  
950 1092  
951 1093  
... ... @@ -942,91 +1084,84 @@
942 1084 }
943 1085 early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
944 1086  
945   -static void __init
  1087 +static int nr_mtrr_spare_reg __initdata =
  1088 + CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
  1089 +
  1090 +static int __init parse_mtrr_spare_reg(char *arg)
  1091 +{
  1092 + if (arg)
  1093 + nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
  1094 + return 0;
  1095 +}
  1096 +
  1097 +early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
  1098 +
  1099 +static int __init
946 1100 x86_setup_var_mtrrs(struct res_range *range, int nr_range,
947   - unsigned address_bits)
  1101 + u64 chunk_size, u64 gran_size)
948 1102 {
949 1103 struct var_mtrr_state var_state;
950 1104 int i;
  1105 + int num_reg;
951 1106  
952 1107 var_state.range_startk = 0;
953 1108 var_state.range_sizek = 0;
954 1109 var_state.reg = 0;
955   - var_state.address_bits = address_bits;
956   - var_state.chunk_sizek = mtrr_chunk_size >> 10;
957   - var_state.gran_sizek = mtrr_gran_size >> 10;
  1110 + var_state.chunk_sizek = chunk_size >> 10;
  1111 + var_state.gran_sizek = gran_size >> 10;
958 1112  
  1113 + memset(range_state, 0, sizeof(range_state));
  1114 +
959 1115 /* Write the range etc */
960 1116 for (i = 0; i < nr_range; i++)
961   - set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);
  1117 + set_var_mtrr_range(&var_state, range[i].start,
  1118 + range[i].end - range[i].start + 1);
962 1119  
963 1120 /* Write the last range */
964   - range_to_mtrr_with_hole(&var_state, 0);
965   - printk(KERN_INFO "DONE variable MTRRs\n");
  1121 + if (var_state.range_sizek != 0)
  1122 + range_to_mtrr_with_hole(&var_state, 0, 0);
  1123 + printk(KERN_DEBUG "DONE variable MTRRs\n");
  1124 +
  1125 + num_reg = var_state.reg;
966 1126 /* Clear out the extra MTRR's */
967 1127 while (var_state.reg < num_var_ranges) {
968   - set_var_mtrr(var_state.reg, 0, 0, 0, var_state.address_bits);
  1128 + save_var_mtrr(var_state.reg, 0, 0, 0);
969 1129 var_state.reg++;
970 1130 }
  1131 +
  1132 + return num_reg;
971 1133 }
972 1134  
973   -static int __init
974   -x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
975   - unsigned long extra_remove_base,
976   - unsigned long extra_remove_size)
977   -{
978   - unsigned long i, base, size;
979   - mtrr_type type;
  1135 +struct mtrr_cleanup_result {
  1136 + unsigned long gran_sizek;
  1137 + unsigned long chunk_sizek;
  1138 + unsigned long lose_cover_sizek;
  1139 + unsigned int num_reg;
  1140 + int bad;
  1141 +};
980 1142  
981   - for (i = 0; i < num_var_ranges; i++) {
982   - mtrr_if->get(i, &base, &size, &type);
983   - if (type != MTRR_TYPE_WRBACK)
984   - continue;
985   - nr_range = add_range_with_merge(range, nr_range, base, base + size - 1);
986   - }
987   - printk(KERN_INFO "After WB checking\n");
988   - for (i = 0; i < nr_range; i++)
989   - printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
  1143 +/*
  1144 + * gran_size: 1M, 2M, ..., 2G
  1145 + * chunk size: gran_size, ..., 4G
  1146 + * so we need (2+13)*6
  1147 + */
  1148 +#define NUM_RESULT 90
  1149 +#define PSHIFT (PAGE_SHIFT - 10)
990 1150  
991   - /* take out UC ranges */
992   - for (i = 0; i < num_var_ranges; i++) {
993   - mtrr_if->get(i, &base, &size, &type);
994   - if (type != MTRR_TYPE_UNCACHABLE)
995   - continue;
996   - if (!size)
997   - continue;
998   - subtract_range(range, base, base + size - 1);
999   - }
1000   - if (extra_remove_size)
1001   - subtract_range(range, extra_remove_base, extra_remove_base + extra_remove_size - 1);
  1151 +static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
  1152 +static struct res_range __initdata range_new[RANGE_NUM];
  1153 +static unsigned long __initdata min_loss_pfn[RANGE_NUM];
1002 1154  
1003   - /* get new range num */
1004   - nr_range = 0;
1005   - for (i = 0; i < RANGE_NUM; i++) {
1006   - if (!range[i].end)
1007   - continue;
1008   - nr_range++;
1009   - }
1010   - printk(KERN_INFO "After UC checking\n");
1011   - for (i = 0; i < nr_range; i++)
1012   - printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
1013   -
1014   - /* sort the ranges */
1015   - sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
1016   - printk(KERN_INFO "After sorting\n");
1017   - for (i = 0; i < nr_range; i++)
1018   - printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
1019   -
1020   - return nr_range;
1021   -}
1022   -
1023 1155 static int __init mtrr_cleanup(unsigned address_bits)
1024 1156 {
1025 1157 unsigned long extra_remove_base, extra_remove_size;
1026 1158 unsigned long i, base, size, def, dummy;
1027   - struct res_range range[RANGE_NUM];
1028 1159 mtrr_type type;
1029   - int nr_range;
  1160 + int nr_range, nr_range_new;
  1161 + u64 chunk_size, gran_size;
  1162 + unsigned long range_sums, range_sums_new;
  1163 + int index_good;
  1164 + int num_reg_good;
1030 1165  
1031 1166 /* extra one for all 0 */
1032 1167 int num[MTRR_NUM_TYPES + 1];
1033 1168  
... ... @@ -1038,10 +1173,20 @@
1038 1173 if (def != MTRR_TYPE_UNCACHABLE)
1039 1174 return 0;
1040 1175  
  1176 + /* get it and store it aside */
  1177 + memset(range_state, 0, sizeof(range_state));
  1178 + for (i = 0; i < num_var_ranges; i++) {
  1179 + mtrr_if->get(i, &base, &size, &type);
  1180 + range_state[i].base_pfn = base;
  1181 + range_state[i].size_pfn = size;
  1182 + range_state[i].type = type;
  1183 + }
  1184 +
1041 1185 /* check entries number */
1042 1186 memset(num, 0, sizeof(num));
1043 1187 for (i = 0; i < num_var_ranges; i++) {
1044   - mtrr_if->get(i, &base, &size, &type);
  1188 + type = range_state[i].type;
  1189 + size = range_state[i].size_pfn;
1045 1190 if (type >= MTRR_NUM_TYPES)
1046 1191 continue;
1047 1192 if (!size)
1048 1193  
1049 1194  
1050 1195  
1051 1196  
1052 1197  
... ... @@ -1062,16 +1207,173 @@
1062 1207 extra_remove_size = 0;
1063 1208 if (mtrr_tom2) {
1064 1209 extra_remove_base = 1 << (32 - PAGE_SHIFT);
1065   - extra_remove_size = (mtrr_tom2>>PAGE_SHIFT) - extra_remove_base;
  1210 + extra_remove_size =
  1211 + (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
1066 1212 }
1067   - nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size);
  1213 + nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
  1214 + extra_remove_size);
  1215 + range_sums = sum_ranges(range, nr_range);
  1216 + printk(KERN_INFO "total RAM coverred: %ldM\n",
  1217 + range_sums >> (20 - PAGE_SHIFT));
1068 1218  
1069   - /* convert ranges to var ranges state */
1070   - x86_setup_var_mtrrs(range, nr_range, address_bits);
  1219 + if (mtrr_chunk_size && mtrr_gran_size) {
  1220 + int num_reg;
1071 1221  
1072   - return 1;
  1222 + /* convert ranges to var ranges state */
  1223 + num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
  1224 + mtrr_gran_size);
  1225 +
  1226 + /* we got new setting in range_state, check it */
  1227 + memset(range_new, 0, sizeof(range_new));
  1228 + nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
  1229 + extra_remove_base,
  1230 + extra_remove_size);
  1231 + range_sums_new = sum_ranges(range_new, nr_range_new);
  1232 +
  1233 + i = 0;
  1234 + result[i].chunk_sizek = mtrr_chunk_size >> 10;
  1235 + result[i].gran_sizek = mtrr_gran_size >> 10;
  1236 + result[i].num_reg = num_reg;
  1237 + if (range_sums < range_sums_new) {
  1238 + result[i].lose_cover_sizek =
  1239 + (range_sums_new - range_sums) << PSHIFT;
  1240 + result[i].bad = 1;
  1241 + } else
  1242 + result[i].lose_cover_sizek =
  1243 + (range_sums - range_sums_new) << PSHIFT;
  1244 +
  1245 + printk(KERN_INFO " %sgran_size: %ldM \tchunk_size: %ldM \t",
  1246 + result[i].bad?" BAD ":"", result[i].gran_sizek >> 10,
  1247 + result[i].chunk_sizek >> 10);
  1248 + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n",
  1249 + result[i].num_reg, result[i].bad?"-":"",
  1250 + result[i].lose_cover_sizek >> 10);
  1251 + if (!result[i].bad) {
  1252 + set_var_mtrr_all(address_bits);
  1253 + return 1;
  1254 + }
  1255 + printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
  1256 + "will find optimal one\n");
  1257 + memset(result, 0, sizeof(result[0]));
  1258 + }
  1259 +
  1260 + i = 0;
  1261 + memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
  1262 + memset(result, 0, sizeof(result));
  1263 + for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
  1264 + for (chunk_size = gran_size; chunk_size < (1ULL<<33);
  1265 + chunk_size <<= 1) {
  1266 + int num_reg;
  1267 +
  1268 + printk(KERN_INFO
  1269 + "\ngran_size: %lldM chunk_size_size: %lldM\n",
  1270 + gran_size >> 20, chunk_size >> 20);
  1271 + if (i >= NUM_RESULT)
  1272 + continue;
  1273 +
  1274 + /* convert ranges to var ranges state */
  1275 + num_reg = x86_setup_var_mtrrs(range, nr_range,
  1276 + chunk_size, gran_size);
  1277 +
  1278 + /* we got new setting in range_state, check it */
  1279 + memset(range_new, 0, sizeof(range_new));
  1280 + nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
  1281 + extra_remove_base, extra_remove_size);
  1282 + range_sums_new = sum_ranges(range_new, nr_range_new);
  1283 +
  1284 + result[i].chunk_sizek = chunk_size >> 10;
  1285 + result[i].gran_sizek = gran_size >> 10;
  1286 + result[i].num_reg = num_reg;
  1287 + if (range_sums < range_sums_new) {
  1288 + result[i].lose_cover_sizek =
  1289 + (range_sums_new - range_sums) << PSHIFT;
  1290 + result[i].bad = 1;
  1291 + } else
  1292 + result[i].lose_cover_sizek =
  1293 + (range_sums - range_sums_new) << PSHIFT;
  1294 +
  1295 + /* double check it */
  1296 + if (!result[i].bad && !result[i].lose_cover_sizek) {
  1297 + if (nr_range_new != nr_range ||
  1298 + memcmp(range, range_new, sizeof(range)))
  1299 + result[i].bad = 1;
  1300 + }
  1301 +
  1302 + if (!result[i].bad && (range_sums - range_sums_new <
  1303 + min_loss_pfn[num_reg])) {
  1304 + min_loss_pfn[num_reg] =
  1305 + range_sums - range_sums_new;
  1306 + }
  1307 + i++;
  1308 + }
  1309 + }
  1310 +
  1311 + /* print out all */
  1312 + for (i = 0; i < NUM_RESULT; i++) {
  1313 + printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
  1314 + result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
  1315 + result[i].chunk_sizek >> 10);
  1316 + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n",
  1317 + result[i].num_reg, result[i].bad?"-":"",
  1318 + result[i].lose_cover_sizek >> 10);
  1319 + }
  1320 +
  1321 + /* try to find the optimal index */
  1322 + if (nr_mtrr_spare_reg >= num_var_ranges)
  1323 + nr_mtrr_spare_reg = num_var_ranges - 1;
  1324 + num_reg_good = -1;
  1325 + for (i = 1; i < num_var_ranges + 1 - nr_mtrr_spare_reg; i++) {
  1326 + if (!min_loss_pfn[i]) {
  1327 + num_reg_good = i;
  1328 + break;
  1329 + }
  1330 + }
  1331 +
  1332 + index_good = -1;
  1333 + if (num_reg_good != -1) {
  1334 + for (i = 0; i < NUM_RESULT; i++) {
  1335 + if (!result[i].bad &&
  1336 + result[i].num_reg == num_reg_good &&
  1337 + !result[i].lose_cover_sizek) {
  1338 + index_good = i;
  1339 + break;
  1340 + }
  1341 + }
  1342 + }
  1343 +
  1344 + if (index_good != -1) {
  1345 + printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
  1346 + i = index_good;
  1347 + printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t",
  1348 + result[i].gran_sizek >> 10,
  1349 + result[i].chunk_sizek >> 10);
  1350 + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %ldM \n",
  1351 + result[i].num_reg,
  1352 + result[i].lose_cover_sizek >> 10);
  1353 + /* convert ranges to var ranges state */
  1354 + chunk_size = result[i].chunk_sizek;
  1355 + chunk_size <<= 10;
  1356 + gran_size = result[i].gran_sizek;
  1357 + gran_size <<= 10;
  1358 + x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
  1359 + set_var_mtrr_all(address_bits);
  1360 + return 1;
  1361 + }
  1362 +
  1363 + printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
  1364 + printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
  1365 +
  1366 + return 0;
1073 1367 }
  1368 +#else
  1369 +static int __init mtrr_cleanup(unsigned address_bits)
  1370 +{
  1371 + return 0;
  1372 +}
  1373 +#endif
1074 1374  
  1375 +static int __initdata changed_by_mtrr_cleanup;
  1376 +
1075 1377 static int disable_mtrr_trim;
1076 1378  
1077 1379 static int __init disable_mtrr_trim_setup(char *str)
... ... @@ -1111,7 +1413,8 @@
1111 1413 return 0;
1112 1414 }
1113 1415  
1114   -static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
  1416 +static u64 __init real_trim_memory(unsigned long start_pfn,
  1417 + unsigned long limit_pfn)
1115 1418 {
1116 1419 u64 trim_start, trim_size;
1117 1420 trim_start = start_pfn;
1118 1421  
... ... @@ -1138,9 +1441,8 @@
1138 1441 {
1139 1442 unsigned long i, base, size, highest_pfn = 0, def, dummy;
1140 1443 mtrr_type type;
1141   - struct res_range range[RANGE_NUM];
1142 1444 int nr_range;
1143   - u64 total_real_trim_size;
  1445 + u64 total_trim_size;
1144 1446  
1145 1447 /* extra one for all 0 */
1146 1448 int num[MTRR_NUM_TYPES + 1];
1147 1449  
1148 1450  
... ... @@ -1155,11 +1457,22 @@
1155 1457 if (def != MTRR_TYPE_UNCACHABLE)
1156 1458 return 0;
1157 1459  
1158   - /* Find highest cached pfn */
  1460 + /* get it and store it aside */
  1461 + memset(range_state, 0, sizeof(range_state));
1159 1462 for (i = 0; i < num_var_ranges; i++) {
1160 1463 mtrr_if->get(i, &base, &size, &type);
  1464 + range_state[i].base_pfn = base;
  1465 + range_state[i].size_pfn = size;
  1466 + range_state[i].type = type;
  1467 + }
  1468 +
  1469 + /* Find highest cached pfn */
  1470 + for (i = 0; i < num_var_ranges; i++) {
  1471 + type = range_state[i].type;
1161 1472 if (type != MTRR_TYPE_WRBACK)
1162 1473 continue;
  1474 + base = range_state[i].base_pfn;
  1475 + size = range_state[i].size_pfn;
1163 1476 if (highest_pfn < base + size)
1164 1477 highest_pfn = base + size;
1165 1478 }
1166 1479  
... ... @@ -1177,9 +1490,10 @@
1177 1490 /* check entries number */
1178 1491 memset(num, 0, sizeof(num));
1179 1492 for (i = 0; i < num_var_ranges; i++) {
1180   - mtrr_if->get(i, &base, &size, &type);
  1493 + type = range_state[i].type;
1181 1494 if (type >= MTRR_NUM_TYPES)
1182 1495 continue;
  1496 + size = range_state[i].size_pfn;
1183 1497 if (!size)
1184 1498 type = MTRR_NUM_TYPES;
1185 1499 num[type]++;
1186 1500  
1187 1501  
1188 1502  
1189 1503  
1190 1504  
1191 1505  
... ... @@ -1205,26 +1519,28 @@
1205 1519 }
1206 1520 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
1207 1521  
1208   - total_real_trim_size = 0;
  1522 + total_trim_size = 0;
1209 1523 /* check the head */
1210 1524 if (range[0].start)
1211   - total_real_trim_size += real_trim_memory(0, range[0].start);
  1525 + total_trim_size += real_trim_memory(0, range[0].start);
1212 1526 /* check the holes */
1213 1527 for (i = 0; i < nr_range - 1; i++) {
1214 1528 if (range[i].end + 1 < range[i+1].start)
1215   - total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start);
  1529 + total_trim_size += real_trim_memory(range[i].end + 1,
  1530 + range[i+1].start);
1216 1531 }
1217 1532 /* check the top */
1218 1533 i = nr_range - 1;
1219 1534 if (range[i].end + 1 < end_pfn)
1220   - total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
  1535 + total_trim_size += real_trim_memory(range[i].end + 1,
  1536 + end_pfn);
1221 1537  
1222   - if (total_real_trim_size) {
  1538 + if (total_trim_size) {
1223 1539 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
1224 1540 " all of memory, losing %lluMB of RAM.\n",
1225   - total_real_trim_size >> 20);
  1541 + total_trim_size >> 20);
1226 1542  
1227   - if (enable_mtrr_cleanup < 1)
  1543 + if (!changed_by_mtrr_cleanup)
1228 1544 WARN_ON(1);
1229 1545  
1230 1546 printk(KERN_INFO "update e820 for mtrr\n");
1231 1547  
... ... @@ -1314,8 +1630,10 @@
1314 1630 if (use_intel()) {
1315 1631 get_mtrr_state();
1316 1632  
1317   - if (mtrr_cleanup(phys_addr))
  1633 + if (mtrr_cleanup(phys_addr)) {
  1634 + changed_by_mtrr_cleanup = 1;
1318 1635 mtrr_if->set_all();
  1636 + }
1319 1637  
1320 1638 }
1321 1639 }
... ... @@ -1355,7 +1673,7 @@
1355 1673 if (!mtrr_if)
1356 1674 return 0;
1357 1675 if (use_intel()) {
1358   - if (enable_mtrr_cleanup < 1)
  1676 + if (!changed_by_mtrr_cleanup)
1359 1677 mtrr_state_warn();
1360 1678 } else {
1361 1679 /* The CPUs haven't MTRR and seem to not support SMP. They have