Commit 08677214e318297f228237be0042aac754f48f1d
Committed by
H. Peter Anvin
1 parent
c252a5bb1f
Exists in
master
and in
7 other branches
x86: Make 64 bit use early_res instead of bootmem before slab
Finally we can use early_res to replace bootmem for x86_64 now. Still can use CONFIG_NO_BOOTMEM to enable it or not. -v2: fix 32bit compiling about MAX_DMA32_PFN -v3: folded bug fix from LKML message below Signed-off-by: Yinghai Lu <yinghai@kernel.org> LKML-Reference: <4B747239.4070907@kernel.org> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Showing 13 changed files with 454 additions and 23 deletions Side-by-side Diff
arch/x86/Kconfig
... | ... | @@ -568,6 +568,19 @@ |
568 | 568 | Enable to debug paravirt_ops internals. Specifically, BUG if |
569 | 569 | a paravirt_op is missing when it is called. |
570 | 570 | |
571 | +config NO_BOOTMEM | |
572 | + default y | |
573 | + bool "Disable Bootmem code" | |
574 | + depends on X86_64 | |
575 | + ---help--- | |
576 | + Use early_res directly instead of bootmem before slab is ready. | |
577 | + - allocator (buddy) [generic] | |
578 | + - early allocator (bootmem) [generic] | |
579 | + - very early allocator (reserve_early*()) [x86] | |
580 | + - very very early allocator (early brk model) [x86] | |
581 | + So reduce one layer between early allocator to final allocator | |
582 | + | |
583 | + | |
571 | 584 | config MEMTEST |
572 | 585 | bool "Memtest" |
573 | 586 | ---help--- |
arch/x86/include/asm/e820.h
... | ... | @@ -117,6 +117,12 @@ |
117 | 117 | extern void early_res_to_bootmem(u64 start, u64 end); |
118 | 118 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); |
119 | 119 | |
120 | +void reserve_early_without_check(u64 start, u64 end, char *name); | |
121 | +u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, | |
122 | + u64 size, u64 align); | |
123 | +#include <linux/range.h> | |
124 | +int get_free_all_memory_range(struct range **rangep, int nodeid); | |
125 | + | |
120 | 126 | extern unsigned long e820_end_of_ram_pfn(void); |
121 | 127 | extern unsigned long e820_end_of_low_ram_pfn(void); |
122 | 128 | extern int e820_find_active_region(const struct e820entry *ei, |
arch/x86/kernel/e820.c
... | ... | @@ -977,6 +977,25 @@ |
977 | 977 | __reserve_early(start, end, name, 0); |
978 | 978 | } |
979 | 979 | |
980 | +void __init reserve_early_without_check(u64 start, u64 end, char *name) | |
981 | +{ | |
982 | + struct early_res *r; | |
983 | + | |
984 | + if (start >= end) | |
985 | + return; | |
986 | + | |
987 | + __check_and_double_early_res(end); | |
988 | + | |
989 | + r = &early_res[early_res_count]; | |
990 | + | |
991 | + r->start = start; | |
992 | + r->end = end; | |
993 | + r->overlap_ok = 0; | |
994 | + if (name) | |
995 | + strncpy(r->name, name, sizeof(r->name) - 1); | |
996 | + early_res_count++; | |
997 | +} | |
998 | + | |
980 | 999 | void __init free_early(u64 start, u64 end) |
981 | 1000 | { |
982 | 1001 | struct early_res *r; |
... | ... | @@ -991,6 +1010,94 @@ |
991 | 1010 | drop_range(i); |
992 | 1011 | } |
993 | 1012 | |
1013 | +#ifdef CONFIG_NO_BOOTMEM | |
1014 | +static void __init subtract_early_res(struct range *range, int az) | |
1015 | +{ | |
1016 | + int i, count; | |
1017 | + u64 final_start, final_end; | |
1018 | + int idx = 0; | |
1019 | + | |
1020 | + count = 0; | |
1021 | + for (i = 0; i < max_early_res && early_res[i].end; i++) | |
1022 | + count++; | |
1023 | + | |
1024 | + /* need to skip first one ?*/ | |
1025 | + if (early_res != early_res_x) | |
1026 | + idx = 1; | |
1027 | + | |
1028 | +#if 1 | |
1029 | + printk(KERN_INFO "Subtract (%d early reservations)\n", count); | |
1030 | +#endif | |
1031 | + for (i = idx; i < count; i++) { | |
1032 | + struct early_res *r = &early_res[i]; | |
1033 | +#if 0 | |
1034 | + printk(KERN_INFO " #%d [%010llx - %010llx] %15s", i, | |
1035 | + r->start, r->end, r->name); | |
1036 | +#endif | |
1037 | + final_start = PFN_DOWN(r->start); | |
1038 | + final_end = PFN_UP(r->end); | |
1039 | + if (final_start >= final_end) { | |
1040 | +#if 0 | |
1041 | + printk(KERN_CONT "\n"); | |
1042 | +#endif | |
1043 | + continue; | |
1044 | + } | |
1045 | +#if 0 | |
1046 | + printk(KERN_CONT " subtract pfn [%010llx - %010llx]\n", | |
1047 | + final_start, final_end); | |
1048 | +#endif | |
1049 | + subtract_range(range, az, final_start, final_end); | |
1050 | + } | |
1051 | + | |
1052 | +} | |
1053 | + | |
1054 | +int __init get_free_all_memory_range(struct range **rangep, int nodeid) | |
1055 | +{ | |
1056 | + int i, count; | |
1057 | + u64 start = 0, end; | |
1058 | + u64 size; | |
1059 | + u64 mem; | |
1060 | + struct range *range; | |
1061 | + int nr_range; | |
1062 | + | |
1063 | + count = 0; | |
1064 | + for (i = 0; i < max_early_res && early_res[i].end; i++) | |
1065 | + count++; | |
1066 | + | |
1067 | + count *= 2; | |
1068 | + | |
1069 | + size = sizeof(struct range) * count; | |
1070 | +#ifdef MAX_DMA32_PFN | |
1071 | + if (max_pfn_mapped > MAX_DMA32_PFN) | |
1072 | + start = MAX_DMA32_PFN << PAGE_SHIFT; | |
1073 | +#endif | |
1074 | + end = max_pfn_mapped << PAGE_SHIFT; | |
1075 | + mem = find_e820_area(start, end, size, sizeof(struct range)); | |
1076 | + if (mem == -1ULL) | |
1077 | + panic("can not find more space for range free"); | |
1078 | + | |
1079 | + range = __va(mem); | |
1080 | + /* use early_node_map[] and early_res to get range array at first */ | |
1081 | + memset(range, 0, size); | |
1082 | + nr_range = 0; | |
1083 | + | |
1084 | + /* need to go over early_node_map to find out good range for node */ | |
1085 | + nr_range = add_from_early_node_map(range, count, nr_range, nodeid); | |
1086 | + subtract_early_res(range, count); | |
1087 | + nr_range = clean_sort_range(range, count); | |
1088 | + | |
1089 | + /* need to clear it ? */ | |
1090 | + if (nodeid == MAX_NUMNODES) { | |
1091 | + memset(&early_res[0], 0, | |
1092 | + sizeof(struct early_res) * max_early_res); | |
1093 | + early_res = NULL; | |
1094 | + max_early_res = 0; | |
1095 | + } | |
1096 | + | |
1097 | + *rangep = range; | |
1098 | + return nr_range; | |
1099 | +} | |
1100 | +#else | |
994 | 1101 | void __init early_res_to_bootmem(u64 start, u64 end) |
995 | 1102 | { |
996 | 1103 | int i, count; |
... | ... | @@ -1028,6 +1135,7 @@ |
1028 | 1135 | max_early_res = 0; |
1029 | 1136 | early_res_count = 0; |
1030 | 1137 | } |
1138 | +#endif | |
1031 | 1139 | |
1032 | 1140 | /* Check for already reserved areas */ |
1033 | 1141 | static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) |
1034 | 1142 | |
1035 | 1143 | |
1036 | 1144 | |
1037 | 1145 | |
1038 | 1146 | |
... | ... | @@ -1083,31 +1191,56 @@ |
1083 | 1191 | |
1084 | 1192 | /* |
1085 | 1193 | * Find a free area with specified alignment in a specific range. |
1194 | + * only with the area.between start to end is active range from early_node_map | |
1195 | + * so they are good as RAM | |
1086 | 1196 | */ |
1197 | +u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, | |
1198 | + u64 size, u64 align) | |
1199 | +{ | |
1200 | + u64 addr, last; | |
1201 | + | |
1202 | + addr = round_up(ei_start, align); | |
1203 | + if (addr < start) | |
1204 | + addr = round_up(start, align); | |
1205 | + if (addr >= ei_last) | |
1206 | + goto out; | |
1207 | + while (bad_addr(&addr, size, align) && addr+size <= ei_last) | |
1208 | + ; | |
1209 | + last = addr + size; | |
1210 | + if (last > ei_last) | |
1211 | + goto out; | |
1212 | + if (last > end) | |
1213 | + goto out; | |
1214 | + | |
1215 | + return addr; | |
1216 | + | |
1217 | +out: | |
1218 | + return -1ULL; | |
1219 | +} | |
1220 | + | |
1221 | +/* | |
1222 | + * Find a free area with specified alignment in a specific range. | |
1223 | + */ | |
1087 | 1224 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) |
1088 | 1225 | { |
1089 | 1226 | int i; |
1090 | 1227 | |
1091 | 1228 | for (i = 0; i < e820.nr_map; i++) { |
1092 | 1229 | struct e820entry *ei = &e820.map[i]; |
1093 | - u64 addr, last; | |
1094 | - u64 ei_last; | |
1230 | + u64 addr; | |
1231 | + u64 ei_start, ei_last; | |
1095 | 1232 | |
1096 | 1233 | if (ei->type != E820_RAM) |
1097 | 1234 | continue; |
1098 | - addr = round_up(ei->addr, align); | |
1235 | + | |
1099 | 1236 | ei_last = ei->addr + ei->size; |
1100 | - if (addr < start) | |
1101 | - addr = round_up(start, align); | |
1102 | - if (addr >= ei_last) | |
1237 | + ei_start = ei->addr; | |
1238 | + addr = find_early_area(ei_start, ei_last, start, end, | |
1239 | + size, align); | |
1240 | + | |
1241 | + if (addr == -1ULL) | |
1103 | 1242 | continue; |
1104 | - while (bad_addr(&addr, size, align) && addr+size <= ei_last) | |
1105 | - ; | |
1106 | - last = addr + size; | |
1107 | - if (last > ei_last) | |
1108 | - continue; | |
1109 | - if (last > end) | |
1110 | - continue; | |
1243 | + | |
1111 | 1244 | return addr; |
1112 | 1245 | } |
1113 | 1246 | return -1ULL; |
arch/x86/kernel/setup.c
arch/x86/mm/init_64.c
... | ... | @@ -572,6 +572,7 @@ |
572 | 572 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
573 | 573 | int acpi, int k8) |
574 | 574 | { |
575 | +#ifndef CONFIG_NO_BOOTMEM | |
575 | 576 | unsigned long bootmap_size, bootmap; |
576 | 577 | |
577 | 578 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; |
... | ... | @@ -585,6 +586,9 @@ |
585 | 586 | 0, end_pfn); |
586 | 587 | e820_register_active_regions(0, start_pfn, end_pfn); |
587 | 588 | free_bootmem_with_active_regions(0, end_pfn); |
589 | +#else | |
590 | + e820_register_active_regions(0, start_pfn, end_pfn); | |
591 | +#endif | |
588 | 592 | } |
589 | 593 | #endif |
590 | 594 |
arch/x86/mm/numa_64.c
... | ... | @@ -198,11 +198,13 @@ |
198 | 198 | void __init |
199 | 199 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
200 | 200 | { |
201 | - unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; | |
201 | + unsigned long start_pfn, last_pfn, nodedata_phys; | |
202 | 202 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); |
203 | - unsigned long bootmap_start, nodedata_phys; | |
204 | - void *bootmap; | |
205 | 203 | int nid; |
204 | +#ifndef CONFIG_NO_BOOTMEM | |
205 | + unsigned long bootmap_start, bootmap_pages, bootmap_size; | |
206 | + void *bootmap; | |
207 | +#endif | |
206 | 208 | |
207 | 209 | if (!end) |
208 | 210 | return; |
... | ... | @@ -216,7 +218,7 @@ |
216 | 218 | |
217 | 219 | start = roundup(start, ZONE_ALIGN); |
218 | 220 | |
219 | - printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, | |
221 | + printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid, | |
220 | 222 | start, end); |
221 | 223 | |
222 | 224 | start_pfn = start >> PAGE_SHIFT; |
223 | 225 | |
... | ... | @@ -235,10 +237,13 @@ |
235 | 237 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); |
236 | 238 | |
237 | 239 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); |
238 | - NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; | |
240 | + NODE_DATA(nodeid)->node_id = nodeid; | |
239 | 241 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
240 | 242 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; |
241 | 243 | |
244 | +#ifndef CONFIG_NO_BOOTMEM | |
245 | + NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; | |
246 | + | |
242 | 247 | /* |
243 | 248 | * Find a place for the bootmem map |
244 | 249 | * nodedata_phys could be on other nodes by alloc_bootmem, |
... | ... | @@ -275,6 +280,7 @@ |
275 | 280 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); |
276 | 281 | |
277 | 282 | free_bootmem_with_active_regions(nodeid, end); |
283 | +#endif | |
278 | 284 | |
279 | 285 | node_set_online(nodeid); |
280 | 286 | } |
... | ... | @@ -732,6 +738,10 @@ |
732 | 738 | |
733 | 739 | for_each_online_node(i) |
734 | 740 | pages += free_all_bootmem_node(NODE_DATA(i)); |
741 | + | |
742 | +#ifdef CONFIG_NO_BOOTMEM | |
743 | + pages += free_all_memory_core_early(MAX_NUMNODES); | |
744 | +#endif | |
735 | 745 | |
736 | 746 | return pages; |
737 | 747 | } |
include/linux/bootmem.h
... | ... | @@ -23,6 +23,7 @@ |
23 | 23 | extern unsigned long saved_max_pfn; |
24 | 24 | #endif |
25 | 25 | |
26 | +#ifndef CONFIG_NO_BOOTMEM | |
26 | 27 | /* |
27 | 28 | * node_bootmem_map is a map pointer - the bits represent all physical |
28 | 29 | * memory pages (including holes) on the node. |
... | ... | @@ -37,6 +38,7 @@ |
37 | 38 | } bootmem_data_t; |
38 | 39 | |
39 | 40 | extern bootmem_data_t bootmem_node_data[]; |
41 | +#endif | |
40 | 42 | |
41 | 43 | extern unsigned long bootmem_bootmap_pages(unsigned long); |
42 | 44 | |
... | ... | @@ -46,6 +48,7 @@ |
46 | 48 | unsigned long endpfn); |
47 | 49 | extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); |
48 | 50 | |
51 | +unsigned long free_all_memory_core_early(int nodeid); | |
49 | 52 | extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); |
50 | 53 | extern unsigned long free_all_bootmem(void); |
51 | 54 | |
... | ... | @@ -81,6 +84,10 @@ |
81 | 84 | unsigned long align, |
82 | 85 | unsigned long goal); |
83 | 86 | extern void *__alloc_bootmem_node(pg_data_t *pgdat, |
87 | + unsigned long size, | |
88 | + unsigned long align, | |
89 | + unsigned long goal); | |
90 | +void *__alloc_bootmem_node_high(pg_data_t *pgdat, | |
84 | 91 | unsigned long size, |
85 | 92 | unsigned long align, |
86 | 93 | unsigned long goal); |
include/linux/mm.h
... | ... | @@ -12,6 +12,7 @@ |
12 | 12 | #include <linux/prio_tree.h> |
13 | 13 | #include <linux/debug_locks.h> |
14 | 14 | #include <linux/mm_types.h> |
15 | +#include <linux/range.h> | |
15 | 16 | |
16 | 17 | struct mempolicy; |
17 | 18 | struct anon_vma; |
... | ... | @@ -1049,6 +1050,10 @@ |
1049 | 1050 | extern unsigned long find_min_pfn_with_active_regions(void); |
1050 | 1051 | extern void free_bootmem_with_active_regions(int nid, |
1051 | 1052 | unsigned long max_low_pfn); |
1053 | +int add_from_early_node_map(struct range *range, int az, | |
1054 | + int nr_range, int nid); | |
1055 | +void *__alloc_memory_core_early(int nodeid, u64 size, u64 align, | |
1056 | + u64 goal, u64 limit); | |
1052 | 1057 | typedef int (*work_fn_t)(unsigned long, unsigned long, void *); |
1053 | 1058 | extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); |
1054 | 1059 | extern void sparse_memory_present_with_active_regions(int nid); |
include/linux/mmzone.h
... | ... | @@ -620,7 +620,9 @@ |
620 | 620 | struct page_cgroup *node_page_cgroup; |
621 | 621 | #endif |
622 | 622 | #endif |
623 | +#ifndef CONFIG_NO_BOOTMEM | |
623 | 624 | struct bootmem_data *bdata; |
625 | +#endif | |
624 | 626 | #ifdef CONFIG_MEMORY_HOTPLUG |
625 | 627 | /* |
626 | 628 | * Must be held any time you expect node_start_pfn, node_present_pages |
mm/bootmem.c
... | ... | @@ -13,6 +13,7 @@ |
13 | 13 | #include <linux/bootmem.h> |
14 | 14 | #include <linux/module.h> |
15 | 15 | #include <linux/kmemleak.h> |
16 | +#include <linux/range.h> | |
16 | 17 | |
17 | 18 | #include <asm/bug.h> |
18 | 19 | #include <asm/io.h> |
... | ... | @@ -32,6 +33,7 @@ |
32 | 33 | unsigned long saved_max_pfn; |
33 | 34 | #endif |
34 | 35 | |
36 | +#ifndef CONFIG_NO_BOOTMEM | |
35 | 37 | bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; |
36 | 38 | |
37 | 39 | static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); |
... | ... | @@ -142,7 +144,7 @@ |
142 | 144 | min_low_pfn = start; |
143 | 145 | return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); |
144 | 146 | } |
145 | - | |
147 | +#endif | |
146 | 148 | /* |
147 | 149 | * free_bootmem_late - free bootmem pages directly to page allocator |
148 | 150 | * @addr: starting address of the range |
... | ... | @@ -167,6 +169,60 @@ |
167 | 169 | } |
168 | 170 | } |
169 | 171 | |
172 | +#ifdef CONFIG_NO_BOOTMEM | |
173 | +static void __init __free_pages_memory(unsigned long start, unsigned long end) | |
174 | +{ | |
175 | + int i; | |
176 | + unsigned long start_aligned, end_aligned; | |
177 | + int order = ilog2(BITS_PER_LONG); | |
178 | + | |
179 | + start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1); | |
180 | + end_aligned = end & ~(BITS_PER_LONG - 1); | |
181 | + | |
182 | + if (end_aligned <= start_aligned) { | |
183 | +#if 1 | |
184 | + printk(KERN_DEBUG " %lx - %lx\n", start, end); | |
185 | +#endif | |
186 | + for (i = start; i < end; i++) | |
187 | + __free_pages_bootmem(pfn_to_page(i), 0); | |
188 | + | |
189 | + return; | |
190 | + } | |
191 | + | |
192 | +#if 1 | |
193 | + printk(KERN_DEBUG " %lx %lx - %lx %lx\n", | |
194 | + start, start_aligned, end_aligned, end); | |
195 | +#endif | |
196 | + for (i = start; i < start_aligned; i++) | |
197 | + __free_pages_bootmem(pfn_to_page(i), 0); | |
198 | + | |
199 | + for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG) | |
200 | + __free_pages_bootmem(pfn_to_page(i), order); | |
201 | + | |
202 | + for (i = end_aligned; i < end; i++) | |
203 | + __free_pages_bootmem(pfn_to_page(i), 0); | |
204 | +} | |
205 | + | |
206 | +unsigned long __init free_all_memory_core_early(int nodeid) | |
207 | +{ | |
208 | + int i; | |
209 | + u64 start, end; | |
210 | + unsigned long count = 0; | |
211 | + struct range *range = NULL; | |
212 | + int nr_range; | |
213 | + | |
214 | + nr_range = get_free_all_memory_range(&range, nodeid); | |
215 | + | |
216 | + for (i = 0; i < nr_range; i++) { | |
217 | + start = range[i].start; | |
218 | + end = range[i].end; | |
219 | + count += end - start; | |
220 | + __free_pages_memory(start, end); | |
221 | + } | |
222 | + | |
223 | + return count; | |
224 | +} | |
225 | +#else | |
170 | 226 | static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) |
171 | 227 | { |
172 | 228 | int aligned; |
... | ... | @@ -227,6 +283,7 @@ |
227 | 283 | |
228 | 284 | return count; |
229 | 285 | } |
286 | +#endif | |
230 | 287 | |
231 | 288 | /** |
232 | 289 | * free_all_bootmem_node - release a node's free pages to the buddy allocator |
233 | 290 | |
... | ... | @@ -237,7 +294,12 @@ |
237 | 294 | unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) |
238 | 295 | { |
239 | 296 | register_page_bootmem_info_node(pgdat); |
297 | +#ifdef CONFIG_NO_BOOTMEM | |
298 | + /* free_all_memory_core_early(MAX_NUMNODES) will be called later */ | |
299 | + return 0; | |
300 | +#else | |
240 | 301 | return free_all_bootmem_core(pgdat->bdata); |
302 | +#endif | |
241 | 303 | } |
242 | 304 | |
243 | 305 | /** |
244 | 306 | |
245 | 307 | |
... | ... | @@ -247,9 +309,14 @@ |
247 | 309 | */ |
248 | 310 | unsigned long __init free_all_bootmem(void) |
249 | 311 | { |
312 | +#ifdef CONFIG_NO_BOOTMEM | |
313 | + return free_all_memory_core_early(NODE_DATA(0)->node_id); | |
314 | +#else | |
250 | 315 | return free_all_bootmem_core(NODE_DATA(0)->bdata); |
316 | +#endif | |
251 | 317 | } |
252 | 318 | |
319 | +#ifndef CONFIG_NO_BOOTMEM | |
253 | 320 | static void __init __free(bootmem_data_t *bdata, |
254 | 321 | unsigned long sidx, unsigned long eidx) |
255 | 322 | { |
... | ... | @@ -344,6 +411,7 @@ |
344 | 411 | } |
345 | 412 | BUG(); |
346 | 413 | } |
414 | +#endif | |
347 | 415 | |
348 | 416 | /** |
349 | 417 | * free_bootmem_node - mark a page range as usable |
... | ... | @@ -358,6 +426,12 @@ |
358 | 426 | void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, |
359 | 427 | unsigned long size) |
360 | 428 | { |
429 | +#ifdef CONFIG_NO_BOOTMEM | |
430 | + free_early(physaddr, physaddr + size); | |
431 | +#if 0 | |
432 | + printk(KERN_DEBUG "free %lx %lx\n", physaddr, size); | |
433 | +#endif | |
434 | +#else | |
361 | 435 | unsigned long start, end; |
362 | 436 | |
363 | 437 | kmemleak_free_part(__va(physaddr), size); |
... | ... | @@ -366,6 +440,7 @@ |
366 | 440 | end = PFN_DOWN(physaddr + size); |
367 | 441 | |
368 | 442 | mark_bootmem_node(pgdat->bdata, start, end, 0, 0); |
443 | +#endif | |
369 | 444 | } |
370 | 445 | |
371 | 446 | /** |
... | ... | @@ -379,6 +454,12 @@ |
379 | 454 | */ |
380 | 455 | void __init free_bootmem(unsigned long addr, unsigned long size) |
381 | 456 | { |
457 | +#ifdef CONFIG_NO_BOOTMEM | |
458 | + free_early(addr, addr + size); | |
459 | +#if 0 | |
460 | + printk(KERN_DEBUG "free %lx %lx\n", addr, size); | |
461 | +#endif | |
462 | +#else | |
382 | 463 | unsigned long start, end; |
383 | 464 | |
384 | 465 | kmemleak_free_part(__va(addr), size); |
... | ... | @@ -387,6 +468,7 @@ |
387 | 468 | end = PFN_DOWN(addr + size); |
388 | 469 | |
389 | 470 | mark_bootmem(start, end, 0, 0); |
471 | +#endif | |
390 | 472 | } |
391 | 473 | |
392 | 474 | /** |
393 | 475 | |
... | ... | @@ -403,12 +485,17 @@ |
403 | 485 | int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, |
404 | 486 | unsigned long size, int flags) |
405 | 487 | { |
488 | +#ifdef CONFIG_NO_BOOTMEM | |
489 | + panic("no bootmem"); | |
490 | + return 0; | |
491 | +#else | |
406 | 492 | unsigned long start, end; |
407 | 493 | |
408 | 494 | start = PFN_DOWN(physaddr); |
409 | 495 | end = PFN_UP(physaddr + size); |
410 | 496 | |
411 | 497 | return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); |
498 | +#endif | |
412 | 499 | } |
413 | 500 | |
414 | 501 | /** |
415 | 502 | |
416 | 503 | |
... | ... | @@ -424,14 +511,20 @@ |
424 | 511 | int __init reserve_bootmem(unsigned long addr, unsigned long size, |
425 | 512 | int flags) |
426 | 513 | { |
514 | +#ifdef CONFIG_NO_BOOTMEM | |
515 | + panic("no bootmem"); | |
516 | + return 0; | |
517 | +#else | |
427 | 518 | unsigned long start, end; |
428 | 519 | |
429 | 520 | start = PFN_DOWN(addr); |
430 | 521 | end = PFN_UP(addr + size); |
431 | 522 | |
432 | 523 | return mark_bootmem(start, end, 1, flags); |
524 | +#endif | |
433 | 525 | } |
434 | 526 | |
527 | +#ifndef CONFIG_NO_BOOTMEM | |
435 | 528 | static unsigned long __init align_idx(struct bootmem_data *bdata, |
436 | 529 | unsigned long idx, unsigned long step) |
437 | 530 | { |
438 | 531 | |
... | ... | @@ -582,12 +675,33 @@ |
582 | 675 | #endif |
583 | 676 | return NULL; |
584 | 677 | } |
678 | +#endif | |
585 | 679 | |
586 | 680 | static void * __init ___alloc_bootmem_nopanic(unsigned long size, |
587 | 681 | unsigned long align, |
588 | 682 | unsigned long goal, |
589 | 683 | unsigned long limit) |
590 | 684 | { |
685 | +#ifdef CONFIG_NO_BOOTMEM | |
686 | + void *ptr; | |
687 | + | |
688 | + if (WARN_ON_ONCE(slab_is_available())) | |
689 | + return kzalloc(size, GFP_NOWAIT); | |
690 | + | |
691 | +restart: | |
692 | + | |
693 | + ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit); | |
694 | + | |
695 | + if (ptr) | |
696 | + return ptr; | |
697 | + | |
698 | + if (goal != 0) { | |
699 | + goal = 0; | |
700 | + goto restart; | |
701 | + } | |
702 | + | |
703 | + return NULL; | |
704 | +#else | |
591 | 705 | bootmem_data_t *bdata; |
592 | 706 | void *region; |
593 | 707 | |
... | ... | @@ -613,6 +727,7 @@ |
613 | 727 | } |
614 | 728 | |
615 | 729 | return NULL; |
730 | +#endif | |
616 | 731 | } |
617 | 732 | |
618 | 733 | /** |
... | ... | @@ -631,7 +746,13 @@ |
631 | 746 | void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, |
632 | 747 | unsigned long goal) |
633 | 748 | { |
634 | - return ___alloc_bootmem_nopanic(size, align, goal, 0); | |
749 | + unsigned long limit = 0; | |
750 | + | |
751 | +#ifdef CONFIG_NO_BOOTMEM | |
752 | + limit = -1UL; | |
753 | +#endif | |
754 | + | |
755 | + return ___alloc_bootmem_nopanic(size, align, goal, limit); | |
635 | 756 | } |
636 | 757 | |
637 | 758 | static void * __init ___alloc_bootmem(unsigned long size, unsigned long align, |
638 | 759 | |
... | ... | @@ -665,9 +786,16 @@ |
665 | 786 | void * __init __alloc_bootmem(unsigned long size, unsigned long align, |
666 | 787 | unsigned long goal) |
667 | 788 | { |
668 | - return ___alloc_bootmem(size, align, goal, 0); | |
789 | + unsigned long limit = 0; | |
790 | + | |
791 | +#ifdef CONFIG_NO_BOOTMEM | |
792 | + limit = -1UL; | |
793 | +#endif | |
794 | + | |
795 | + return ___alloc_bootmem(size, align, goal, limit); | |
669 | 796 | } |
670 | 797 | |
798 | +#ifndef CONFIG_NO_BOOTMEM | |
671 | 799 | static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, |
672 | 800 | unsigned long size, unsigned long align, |
673 | 801 | unsigned long goal, unsigned long limit) |
... | ... | @@ -684,6 +812,7 @@ |
684 | 812 | |
685 | 813 | return ___alloc_bootmem(size, align, goal, limit); |
686 | 814 | } |
815 | +#endif | |
687 | 816 | |
688 | 817 | /** |
689 | 818 | * __alloc_bootmem_node - allocate boot memory from a specific node |
690 | 819 | |
691 | 820 | |
... | ... | @@ -706,9 +835,48 @@ |
706 | 835 | if (WARN_ON_ONCE(slab_is_available())) |
707 | 836 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); |
708 | 837 | |
838 | +#ifdef CONFIG_NO_BOOTMEM | |
839 | + return __alloc_memory_core_early(pgdat->node_id, size, align, | |
840 | + goal, -1ULL); | |
841 | +#else | |
709 | 842 | return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); |
843 | +#endif | |
710 | 844 | } |
711 | 845 | |
846 | +void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, | |
847 | + unsigned long align, unsigned long goal) | |
848 | +{ | |
849 | +#ifdef MAX_DMA32_PFN | |
850 | + unsigned long end_pfn; | |
851 | + | |
852 | + if (WARN_ON_ONCE(slab_is_available())) | |
853 | + return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | |
854 | + | |
855 | + /* update goal according ...MAX_DMA32_PFN */ | |
856 | + end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages; | |
857 | + | |
858 | + if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) && | |
859 | + (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) { | |
860 | + void *ptr; | |
861 | + unsigned long new_goal; | |
862 | + | |
863 | + new_goal = MAX_DMA32_PFN << PAGE_SHIFT; | |
864 | +#ifdef CONFIG_NO_BOOTMEM | |
865 | + ptr = __alloc_memory_core_early(pgdat->node_id, size, align, | |
866 | + new_goal, -1ULL); | |
867 | +#else | |
868 | + ptr = alloc_bootmem_core(pgdat->bdata, size, align, | |
869 | + new_goal, 0); | |
870 | +#endif | |
871 | + if (ptr) | |
872 | + return ptr; | |
873 | + } | |
874 | +#endif | |
875 | + | |
876 | + return __alloc_bootmem_node(pgdat, size, align, goal); | |
877 | + | |
878 | +} | |
879 | + | |
712 | 880 | #ifdef CONFIG_SPARSEMEM |
713 | 881 | /** |
714 | 882 | * alloc_bootmem_section - allocate boot memory from a specific section |
... | ... | @@ -720,6 +888,16 @@ |
720 | 888 | void * __init alloc_bootmem_section(unsigned long size, |
721 | 889 | unsigned long section_nr) |
722 | 890 | { |
891 | +#ifdef CONFIG_NO_BOOTMEM | |
892 | + unsigned long pfn, goal, limit; | |
893 | + | |
894 | + pfn = section_nr_to_pfn(section_nr); | |
895 | + goal = pfn << PAGE_SHIFT; | |
896 | + limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT; | |
897 | + | |
898 | + return __alloc_memory_core_early(early_pfn_to_nid(pfn), size, | |
899 | + SMP_CACHE_BYTES, goal, limit); | |
900 | +#else | |
723 | 901 | bootmem_data_t *bdata; |
724 | 902 | unsigned long pfn, goal, limit; |
725 | 903 | |
... | ... | @@ -729,6 +907,7 @@ |
729 | 907 | bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; |
730 | 908 | |
731 | 909 | return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit); |
910 | +#endif | |
732 | 911 | } |
733 | 912 | #endif |
734 | 913 | |
735 | 914 | |
... | ... | @@ -740,11 +919,16 @@ |
740 | 919 | if (WARN_ON_ONCE(slab_is_available())) |
741 | 920 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); |
742 | 921 | |
922 | +#ifdef CONFIG_NO_BOOTMEM | |
923 | + ptr = __alloc_memory_core_early(pgdat->node_id, size, align, | |
924 | + goal, -1ULL); | |
925 | +#else | |
743 | 926 | ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); |
744 | 927 | if (ptr) |
745 | 928 | return ptr; |
746 | 929 | |
747 | 930 | ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); |
931 | +#endif | |
748 | 932 | if (ptr) |
749 | 933 | return ptr; |
750 | 934 | |
751 | 935 | |
... | ... | @@ -795,7 +979,12 @@ |
795 | 979 | if (WARN_ON_ONCE(slab_is_available())) |
796 | 980 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); |
797 | 981 | |
982 | +#ifdef CONFIG_NO_BOOTMEM | |
983 | + return __alloc_memory_core_early(pgdat->node_id, size, align, | |
984 | + goal, ARCH_LOW_ADDRESS_LIMIT); | |
985 | +#else | |
798 | 986 | return ___alloc_bootmem_node(pgdat->bdata, size, align, |
799 | 987 | goal, ARCH_LOW_ADDRESS_LIMIT); |
988 | +#endif | |
800 | 989 | } |
mm/page_alloc.c
... | ... | @@ -3435,6 +3435,59 @@ |
3435 | 3435 | } |
3436 | 3436 | } |
3437 | 3437 | |
3438 | +int __init add_from_early_node_map(struct range *range, int az, | |
3439 | + int nr_range, int nid) | |
3440 | +{ | |
3441 | + int i; | |
3442 | + u64 start, end; | |
3443 | + | |
3444 | + /* need to go over early_node_map to find out good range for node */ | |
3445 | + for_each_active_range_index_in_nid(i, nid) { | |
3446 | + start = early_node_map[i].start_pfn; | |
3447 | + end = early_node_map[i].end_pfn; | |
3448 | + nr_range = add_range(range, az, nr_range, start, end); | |
3449 | + } | |
3450 | + return nr_range; | |
3451 | +} | |
3452 | + | |
3453 | +void * __init __alloc_memory_core_early(int nid, u64 size, u64 align, | |
3454 | + u64 goal, u64 limit) | |
3455 | +{ | |
3456 | + int i; | |
3457 | + void *ptr; | |
3458 | + | |
3459 | + /* need to go over early_node_map to find out good range for node */ | |
3460 | + for_each_active_range_index_in_nid(i, nid) { | |
3461 | + u64 addr; | |
3462 | + u64 ei_start, ei_last; | |
3463 | + | |
3464 | + ei_last = early_node_map[i].end_pfn; | |
3465 | + ei_last <<= PAGE_SHIFT; | |
3466 | + ei_start = early_node_map[i].start_pfn; | |
3467 | + ei_start <<= PAGE_SHIFT; | |
3468 | + addr = find_early_area(ei_start, ei_last, | |
3469 | + goal, limit, size, align); | |
3470 | + | |
3471 | + if (addr == -1ULL) | |
3472 | + continue; | |
3473 | + | |
3474 | +#if 0 | |
3475 | + printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n", | |
3476 | + nid, | |
3477 | + ei_start, ei_last, goal, limit, size, | |
3478 | + align, addr); | |
3479 | +#endif | |
3480 | + | |
3481 | + ptr = phys_to_virt(addr); | |
3482 | + memset(ptr, 0, size); | |
3483 | + reserve_early_without_check(addr, addr + size, "BOOTMEM"); | |
3484 | + return ptr; | |
3485 | + } | |
3486 | + | |
3487 | + return NULL; | |
3488 | +} | |
3489 | + | |
3490 | + | |
3438 | 3491 | void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) |
3439 | 3492 | { |
3440 | 3493 | int i; |
... | ... | @@ -4467,7 +4520,11 @@ |
4467 | 4520 | } |
4468 | 4521 | |
4469 | 4522 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
4470 | -struct pglist_data __refdata contig_page_data = { .bdata = &bootmem_node_data[0] }; | |
4523 | +struct pglist_data __refdata contig_page_data = { | |
4524 | +#ifndef CONFIG_NO_BOOTMEM | |
4525 | + .bdata = &bootmem_node_data[0] | |
4526 | +#endif | |
4527 | + }; | |
4471 | 4528 | EXPORT_SYMBOL(contig_page_data); |
4472 | 4529 | #endif |
4473 | 4530 |
mm/percpu.c
... | ... | @@ -1929,7 +1929,10 @@ |
1929 | 1929 | } |
1930 | 1930 | /* copy and return the unused part */ |
1931 | 1931 | memcpy(ptr, __per_cpu_load, ai->static_size); |
1932 | +#ifndef CONFIG_NO_BOOTMEM | |
1933 | + /* fix partial free ! */ | |
1932 | 1934 | free_fn(ptr + size_sum, ai->unit_size - size_sum); |
1935 | +#endif | |
1933 | 1936 | } |
1934 | 1937 | } |
1935 | 1938 |
mm/sparse-vmemmap.c