Commit 08677214e318297f228237be0042aac754f48f1d

Authored by Yinghai Lu
Committed by H. Peter Anvin
1 parent c252a5bb1f

x86: Make 64 bit use early_res instead of bootmem before slab

Finally we can use early_res to replace bootmem for x86_64 now.

Still can use CONFIG_NO_BOOTMEM to enable it or not.

-v2: fix 32bit compiling about MAX_DMA32_PFN
-v3: folded bug fix from LKML message below

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4B747239.4070907@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

Showing 13 changed files with 454 additions and 23 deletions Side-by-side Diff

... ... @@ -568,6 +568,19 @@
568 568 Enable to debug paravirt_ops internals. Specifically, BUG if
569 569 a paravirt_op is missing when it is called.
570 570  
  571 +config NO_BOOTMEM
  572 + default y
  573 + bool "Disable Bootmem code"
  574 + depends on X86_64
  575 + ---help---
  576 + Use early_res directly instead of bootmem before slab is ready.
  577 + - allocator (buddy) [generic]
  578 + - early allocator (bootmem) [generic]
  579 + - very early allocator (reserve_early*()) [x86]
  580 + - very very early allocator (early brk model) [x86]
  581 + So reduce one layer between early allocator to final allocator
  582 +
  583 +
571 584 config MEMTEST
572 585 bool "Memtest"
573 586 ---help---
arch/x86/include/asm/e820.h
... ... @@ -117,6 +117,12 @@
117 117 extern void early_res_to_bootmem(u64 start, u64 end);
118 118 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
119 119  
  120 +void reserve_early_without_check(u64 start, u64 end, char *name);
  121 +u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
  122 + u64 size, u64 align);
  123 +#include <linux/range.h>
  124 +int get_free_all_memory_range(struct range **rangep, int nodeid);
  125 +
120 126 extern unsigned long e820_end_of_ram_pfn(void);
121 127 extern unsigned long e820_end_of_low_ram_pfn(void);
122 128 extern int e820_find_active_region(const struct e820entry *ei,
arch/x86/kernel/e820.c
... ... @@ -977,6 +977,25 @@
977 977 __reserve_early(start, end, name, 0);
978 978 }
979 979  
  980 +void __init reserve_early_without_check(u64 start, u64 end, char *name)
  981 +{
  982 + struct early_res *r;
  983 +
  984 + if (start >= end)
  985 + return;
  986 +
  987 + __check_and_double_early_res(end);
  988 +
  989 + r = &early_res[early_res_count];
  990 +
  991 + r->start = start;
  992 + r->end = end;
  993 + r->overlap_ok = 0;
  994 + if (name)
  995 + strncpy(r->name, name, sizeof(r->name) - 1);
  996 + early_res_count++;
  997 +}
  998 +
980 999 void __init free_early(u64 start, u64 end)
981 1000 {
982 1001 struct early_res *r;
... ... @@ -991,6 +1010,94 @@
991 1010 drop_range(i);
992 1011 }
993 1012  
  1013 +#ifdef CONFIG_NO_BOOTMEM
  1014 +static void __init subtract_early_res(struct range *range, int az)
  1015 +{
  1016 + int i, count;
  1017 + u64 final_start, final_end;
  1018 + int idx = 0;
  1019 +
  1020 + count = 0;
  1021 + for (i = 0; i < max_early_res && early_res[i].end; i++)
  1022 + count++;
  1023 +
  1024 + /* need to skip first one ?*/
  1025 + if (early_res != early_res_x)
  1026 + idx = 1;
  1027 +
  1028 +#if 1
  1029 + printk(KERN_INFO "Subtract (%d early reservations)\n", count);
  1030 +#endif
  1031 + for (i = idx; i < count; i++) {
  1032 + struct early_res *r = &early_res[i];
  1033 +#if 0
  1034 + printk(KERN_INFO " #%d [%010llx - %010llx] %15s", i,
  1035 + r->start, r->end, r->name);
  1036 +#endif
  1037 + final_start = PFN_DOWN(r->start);
  1038 + final_end = PFN_UP(r->end);
  1039 + if (final_start >= final_end) {
  1040 +#if 0
  1041 + printk(KERN_CONT "\n");
  1042 +#endif
  1043 + continue;
  1044 + }
  1045 +#if 0
  1046 + printk(KERN_CONT " subtract pfn [%010llx - %010llx]\n",
  1047 + final_start, final_end);
  1048 +#endif
  1049 + subtract_range(range, az, final_start, final_end);
  1050 + }
  1051 +
  1052 +}
  1053 +
  1054 +int __init get_free_all_memory_range(struct range **rangep, int nodeid)
  1055 +{
  1056 + int i, count;
  1057 + u64 start = 0, end;
  1058 + u64 size;
  1059 + u64 mem;
  1060 + struct range *range;
  1061 + int nr_range;
  1062 +
  1063 + count = 0;
  1064 + for (i = 0; i < max_early_res && early_res[i].end; i++)
  1065 + count++;
  1066 +
  1067 + count *= 2;
  1068 +
  1069 + size = sizeof(struct range) * count;
  1070 +#ifdef MAX_DMA32_PFN
  1071 + if (max_pfn_mapped > MAX_DMA32_PFN)
  1072 + start = MAX_DMA32_PFN << PAGE_SHIFT;
  1073 +#endif
  1074 + end = max_pfn_mapped << PAGE_SHIFT;
  1075 + mem = find_e820_area(start, end, size, sizeof(struct range));
  1076 + if (mem == -1ULL)
  1077 + panic("can not find more space for range free");
  1078 +
  1079 + range = __va(mem);
  1080 + /* use early_node_map[] and early_res to get range array at first */
  1081 + memset(range, 0, size);
  1082 + nr_range = 0;
  1083 +
  1084 + /* need to go over early_node_map to find out good range for node */
  1085 + nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
  1086 + subtract_early_res(range, count);
  1087 + nr_range = clean_sort_range(range, count);
  1088 +
  1089 + /* need to clear it ? */
  1090 + if (nodeid == MAX_NUMNODES) {
  1091 + memset(&early_res[0], 0,
  1092 + sizeof(struct early_res) * max_early_res);
  1093 + early_res = NULL;
  1094 + max_early_res = 0;
  1095 + }
  1096 +
  1097 + *rangep = range;
  1098 + return nr_range;
  1099 +}
  1100 +#else
994 1101 void __init early_res_to_bootmem(u64 start, u64 end)
995 1102 {
996 1103 int i, count;
... ... @@ -1028,6 +1135,7 @@
1028 1135 max_early_res = 0;
1029 1136 early_res_count = 0;
1030 1137 }
  1138 +#endif
1031 1139  
1032 1140 /* Check for already reserved areas */
1033 1141 static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
1034 1142  
1035 1143  
1036 1144  
1037 1145  
1038 1146  
... ... @@ -1083,31 +1191,56 @@
1083 1191  
1084 1192 /*
1085 1193 * Find a free area with specified alignment in a specific range.
  1194 + * only with the area.between start to end is active range from early_node_map
  1195 + * so they are good as RAM
1086 1196 */
  1197 +u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
  1198 + u64 size, u64 align)
  1199 +{
  1200 + u64 addr, last;
  1201 +
  1202 + addr = round_up(ei_start, align);
  1203 + if (addr < start)
  1204 + addr = round_up(start, align);
  1205 + if (addr >= ei_last)
  1206 + goto out;
  1207 + while (bad_addr(&addr, size, align) && addr+size <= ei_last)
  1208 + ;
  1209 + last = addr + size;
  1210 + if (last > ei_last)
  1211 + goto out;
  1212 + if (last > end)
  1213 + goto out;
  1214 +
  1215 + return addr;
  1216 +
  1217 +out:
  1218 + return -1ULL;
  1219 +}
  1220 +
  1221 +/*
  1222 + * Find a free area with specified alignment in a specific range.
  1223 + */
1087 1224 u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
1088 1225 {
1089 1226 int i;
1090 1227  
1091 1228 for (i = 0; i < e820.nr_map; i++) {
1092 1229 struct e820entry *ei = &e820.map[i];
1093   - u64 addr, last;
1094   - u64 ei_last;
  1230 + u64 addr;
  1231 + u64 ei_start, ei_last;
1095 1232  
1096 1233 if (ei->type != E820_RAM)
1097 1234 continue;
1098   - addr = round_up(ei->addr, align);
  1235 +
1099 1236 ei_last = ei->addr + ei->size;
1100   - if (addr < start)
1101   - addr = round_up(start, align);
1102   - if (addr >= ei_last)
  1237 + ei_start = ei->addr;
  1238 + addr = find_early_area(ei_start, ei_last, start, end,
  1239 + size, align);
  1240 +
  1241 + if (addr == -1ULL)
1103 1242 continue;
1104   - while (bad_addr(&addr, size, align) && addr+size <= ei_last)
1105   - ;
1106   - last = addr + size;
1107   - if (last > ei_last)
1108   - continue;
1109   - if (last > end)
1110   - continue;
  1243 +
1111 1244 return addr;
1112 1245 }
1113 1246 return -1ULL;
arch/x86/kernel/setup.c
... ... @@ -967,7 +967,9 @@
967 967 #endif
968 968  
969 969 initmem_init(0, max_pfn, acpi, k8);
  970 +#ifndef CONFIG_NO_BOOTMEM
970 971 early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
  972 +#endif
971 973  
972 974 dma32_reserve_bootmem();
973 975  
arch/x86/mm/init_64.c
... ... @@ -572,6 +572,7 @@
572 572 void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
573 573 int acpi, int k8)
574 574 {
  575 +#ifndef CONFIG_NO_BOOTMEM
575 576 unsigned long bootmap_size, bootmap;
576 577  
577 578 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
... ... @@ -585,6 +586,9 @@
585 586 0, end_pfn);
586 587 e820_register_active_regions(0, start_pfn, end_pfn);
587 588 free_bootmem_with_active_regions(0, end_pfn);
  589 +#else
  590 + e820_register_active_regions(0, start_pfn, end_pfn);
  591 +#endif
588 592 }
589 593 #endif
590 594  
arch/x86/mm/numa_64.c
... ... @@ -198,11 +198,13 @@
198 198 void __init
199 199 setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
200 200 {
201   - unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
  201 + unsigned long start_pfn, last_pfn, nodedata_phys;
202 202 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
203   - unsigned long bootmap_start, nodedata_phys;
204   - void *bootmap;
205 203 int nid;
  204 +#ifndef CONFIG_NO_BOOTMEM
  205 + unsigned long bootmap_start, bootmap_pages, bootmap_size;
  206 + void *bootmap;
  207 +#endif
206 208  
207 209 if (!end)
208 210 return;
... ... @@ -216,7 +218,7 @@
216 218  
217 219 start = roundup(start, ZONE_ALIGN);
218 220  
219   - printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
  221 + printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid,
220 222 start, end);
221 223  
222 224 start_pfn = start >> PAGE_SHIFT;
223 225  
... ... @@ -235,10 +237,13 @@
235 237 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
236 238  
237 239 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
238   - NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
  240 + NODE_DATA(nodeid)->node_id = nodeid;
239 241 NODE_DATA(nodeid)->node_start_pfn = start_pfn;
240 242 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
241 243  
  244 +#ifndef CONFIG_NO_BOOTMEM
  245 + NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
  246 +
242 247 /*
243 248 * Find a place for the bootmem map
244 249 * nodedata_phys could be on other nodes by alloc_bootmem,
... ... @@ -275,6 +280,7 @@
275 280 printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
276 281  
277 282 free_bootmem_with_active_regions(nodeid, end);
  283 +#endif
278 284  
279 285 node_set_online(nodeid);
280 286 }
... ... @@ -732,6 +738,10 @@
732 738  
733 739 for_each_online_node(i)
734 740 pages += free_all_bootmem_node(NODE_DATA(i));
  741 +
  742 +#ifdef CONFIG_NO_BOOTMEM
  743 + pages += free_all_memory_core_early(MAX_NUMNODES);
  744 +#endif
735 745  
736 746 return pages;
737 747 }
include/linux/bootmem.h
... ... @@ -23,6 +23,7 @@
23 23 extern unsigned long saved_max_pfn;
24 24 #endif
25 25  
  26 +#ifndef CONFIG_NO_BOOTMEM
26 27 /*
27 28 * node_bootmem_map is a map pointer - the bits represent all physical
28 29 * memory pages (including holes) on the node.
... ... @@ -37,6 +38,7 @@
37 38 } bootmem_data_t;
38 39  
39 40 extern bootmem_data_t bootmem_node_data[];
  41 +#endif
40 42  
41 43 extern unsigned long bootmem_bootmap_pages(unsigned long);
42 44  
... ... @@ -46,6 +48,7 @@
46 48 unsigned long endpfn);
47 49 extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
48 50  
  51 +unsigned long free_all_memory_core_early(int nodeid);
49 52 extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
50 53 extern unsigned long free_all_bootmem(void);
51 54  
... ... @@ -81,6 +84,10 @@
81 84 unsigned long align,
82 85 unsigned long goal);
83 86 extern void *__alloc_bootmem_node(pg_data_t *pgdat,
  87 + unsigned long size,
  88 + unsigned long align,
  89 + unsigned long goal);
  90 +void *__alloc_bootmem_node_high(pg_data_t *pgdat,
84 91 unsigned long size,
85 92 unsigned long align,
86 93 unsigned long goal);
... ... @@ -12,6 +12,7 @@
12 12 #include <linux/prio_tree.h>
13 13 #include <linux/debug_locks.h>
14 14 #include <linux/mm_types.h>
  15 +#include <linux/range.h>
15 16  
16 17 struct mempolicy;
17 18 struct anon_vma;
... ... @@ -1049,6 +1050,10 @@
1049 1050 extern unsigned long find_min_pfn_with_active_regions(void);
1050 1051 extern void free_bootmem_with_active_regions(int nid,
1051 1052 unsigned long max_low_pfn);
  1053 +int add_from_early_node_map(struct range *range, int az,
  1054 + int nr_range, int nid);
  1055 +void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
  1056 + u64 goal, u64 limit);
1052 1057 typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
1053 1058 extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
1054 1059 extern void sparse_memory_present_with_active_regions(int nid);
include/linux/mmzone.h
... ... @@ -620,7 +620,9 @@
620 620 struct page_cgroup *node_page_cgroup;
621 621 #endif
622 622 #endif
  623 +#ifndef CONFIG_NO_BOOTMEM
623 624 struct bootmem_data *bdata;
  625 +#endif
624 626 #ifdef CONFIG_MEMORY_HOTPLUG
625 627 /*
626 628 * Must be held any time you expect node_start_pfn, node_present_pages
... ... @@ -13,6 +13,7 @@
13 13 #include <linux/bootmem.h>
14 14 #include <linux/module.h>
15 15 #include <linux/kmemleak.h>
  16 +#include <linux/range.h>
16 17  
17 18 #include <asm/bug.h>
18 19 #include <asm/io.h>
... ... @@ -32,6 +33,7 @@
32 33 unsigned long saved_max_pfn;
33 34 #endif
34 35  
  36 +#ifndef CONFIG_NO_BOOTMEM
35 37 bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
36 38  
37 39 static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
... ... @@ -142,7 +144,7 @@
142 144 min_low_pfn = start;
143 145 return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
144 146 }
145   -
  147 +#endif
146 148 /*
147 149 * free_bootmem_late - free bootmem pages directly to page allocator
148 150 * @addr: starting address of the range
... ... @@ -167,6 +169,60 @@
167 169 }
168 170 }
169 171  
  172 +#ifdef CONFIG_NO_BOOTMEM
  173 +static void __init __free_pages_memory(unsigned long start, unsigned long end)
  174 +{
  175 + int i;
  176 + unsigned long start_aligned, end_aligned;
  177 + int order = ilog2(BITS_PER_LONG);
  178 +
  179 + start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
  180 + end_aligned = end & ~(BITS_PER_LONG - 1);
  181 +
  182 + if (end_aligned <= start_aligned) {
  183 +#if 1
  184 + printk(KERN_DEBUG " %lx - %lx\n", start, end);
  185 +#endif
  186 + for (i = start; i < end; i++)
  187 + __free_pages_bootmem(pfn_to_page(i), 0);
  188 +
  189 + return;
  190 + }
  191 +
  192 +#if 1
  193 + printk(KERN_DEBUG " %lx %lx - %lx %lx\n",
  194 + start, start_aligned, end_aligned, end);
  195 +#endif
  196 + for (i = start; i < start_aligned; i++)
  197 + __free_pages_bootmem(pfn_to_page(i), 0);
  198 +
  199 + for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
  200 + __free_pages_bootmem(pfn_to_page(i), order);
  201 +
  202 + for (i = end_aligned; i < end; i++)
  203 + __free_pages_bootmem(pfn_to_page(i), 0);
  204 +}
  205 +
  206 +unsigned long __init free_all_memory_core_early(int nodeid)
  207 +{
  208 + int i;
  209 + u64 start, end;
  210 + unsigned long count = 0;
  211 + struct range *range = NULL;
  212 + int nr_range;
  213 +
  214 + nr_range = get_free_all_memory_range(&range, nodeid);
  215 +
  216 + for (i = 0; i < nr_range; i++) {
  217 + start = range[i].start;
  218 + end = range[i].end;
  219 + count += end - start;
  220 + __free_pages_memory(start, end);
  221 + }
  222 +
  223 + return count;
  224 +}
  225 +#else
170 226 static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
171 227 {
172 228 int aligned;
... ... @@ -227,6 +283,7 @@
227 283  
228 284 return count;
229 285 }
  286 +#endif
230 287  
231 288 /**
232 289 * free_all_bootmem_node - release a node's free pages to the buddy allocator
233 290  
... ... @@ -237,7 +294,12 @@
237 294 unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
238 295 {
239 296 register_page_bootmem_info_node(pgdat);
  297 +#ifdef CONFIG_NO_BOOTMEM
  298 + /* free_all_memory_core_early(MAX_NUMNODES) will be called later */
  299 + return 0;
  300 +#else
240 301 return free_all_bootmem_core(pgdat->bdata);
  302 +#endif
241 303 }
242 304  
243 305 /**
244 306  
245 307  
... ... @@ -247,9 +309,14 @@
247 309 */
248 310 unsigned long __init free_all_bootmem(void)
249 311 {
  312 +#ifdef CONFIG_NO_BOOTMEM
  313 + return free_all_memory_core_early(NODE_DATA(0)->node_id);
  314 +#else
250 315 return free_all_bootmem_core(NODE_DATA(0)->bdata);
  316 +#endif
251 317 }
252 318  
  319 +#ifndef CONFIG_NO_BOOTMEM
253 320 static void __init __free(bootmem_data_t *bdata,
254 321 unsigned long sidx, unsigned long eidx)
255 322 {
... ... @@ -344,6 +411,7 @@
344 411 }
345 412 BUG();
346 413 }
  414 +#endif
347 415  
348 416 /**
349 417 * free_bootmem_node - mark a page range as usable
... ... @@ -358,6 +426,12 @@
358 426 void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
359 427 unsigned long size)
360 428 {
  429 +#ifdef CONFIG_NO_BOOTMEM
  430 + free_early(physaddr, physaddr + size);
  431 +#if 0
  432 + printk(KERN_DEBUG "free %lx %lx\n", physaddr, size);
  433 +#endif
  434 +#else
361 435 unsigned long start, end;
362 436  
363 437 kmemleak_free_part(__va(physaddr), size);
... ... @@ -366,6 +440,7 @@
366 440 end = PFN_DOWN(physaddr + size);
367 441  
368 442 mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
  443 +#endif
369 444 }
370 445  
371 446 /**
... ... @@ -379,6 +454,12 @@
379 454 */
380 455 void __init free_bootmem(unsigned long addr, unsigned long size)
381 456 {
  457 +#ifdef CONFIG_NO_BOOTMEM
  458 + free_early(addr, addr + size);
  459 +#if 0
  460 + printk(KERN_DEBUG "free %lx %lx\n", addr, size);
  461 +#endif
  462 +#else
382 463 unsigned long start, end;
383 464  
384 465 kmemleak_free_part(__va(addr), size);
... ... @@ -387,6 +468,7 @@
387 468 end = PFN_DOWN(addr + size);
388 469  
389 470 mark_bootmem(start, end, 0, 0);
  471 +#endif
390 472 }
391 473  
392 474 /**
393 475  
... ... @@ -403,12 +485,17 @@
403 485 int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
404 486 unsigned long size, int flags)
405 487 {
  488 +#ifdef CONFIG_NO_BOOTMEM
  489 + panic("no bootmem");
  490 + return 0;
  491 +#else
406 492 unsigned long start, end;
407 493  
408 494 start = PFN_DOWN(physaddr);
409 495 end = PFN_UP(physaddr + size);
410 496  
411 497 return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
  498 +#endif
412 499 }
413 500  
414 501 /**
415 502  
416 503  
... ... @@ -424,14 +511,20 @@
424 511 int __init reserve_bootmem(unsigned long addr, unsigned long size,
425 512 int flags)
426 513 {
  514 +#ifdef CONFIG_NO_BOOTMEM
  515 + panic("no bootmem");
  516 + return 0;
  517 +#else
427 518 unsigned long start, end;
428 519  
429 520 start = PFN_DOWN(addr);
430 521 end = PFN_UP(addr + size);
431 522  
432 523 return mark_bootmem(start, end, 1, flags);
  524 +#endif
433 525 }
434 526  
  527 +#ifndef CONFIG_NO_BOOTMEM
435 528 static unsigned long __init align_idx(struct bootmem_data *bdata,
436 529 unsigned long idx, unsigned long step)
437 530 {
438 531  
... ... @@ -582,12 +675,33 @@
582 675 #endif
583 676 return NULL;
584 677 }
  678 +#endif
585 679  
586 680 static void * __init ___alloc_bootmem_nopanic(unsigned long size,
587 681 unsigned long align,
588 682 unsigned long goal,
589 683 unsigned long limit)
590 684 {
  685 +#ifdef CONFIG_NO_BOOTMEM
  686 + void *ptr;
  687 +
  688 + if (WARN_ON_ONCE(slab_is_available()))
  689 + return kzalloc(size, GFP_NOWAIT);
  690 +
  691 +restart:
  692 +
  693 + ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
  694 +
  695 + if (ptr)
  696 + return ptr;
  697 +
  698 + if (goal != 0) {
  699 + goal = 0;
  700 + goto restart;
  701 + }
  702 +
  703 + return NULL;
  704 +#else
591 705 bootmem_data_t *bdata;
592 706 void *region;
593 707  
... ... @@ -613,6 +727,7 @@
613 727 }
614 728  
615 729 return NULL;
  730 +#endif
616 731 }
617 732  
618 733 /**
... ... @@ -631,7 +746,13 @@
631 746 void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
632 747 unsigned long goal)
633 748 {
634   - return ___alloc_bootmem_nopanic(size, align, goal, 0);
  749 + unsigned long limit = 0;
  750 +
  751 +#ifdef CONFIG_NO_BOOTMEM
  752 + limit = -1UL;
  753 +#endif
  754 +
  755 + return ___alloc_bootmem_nopanic(size, align, goal, limit);
635 756 }
636 757  
637 758 static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
638 759  
... ... @@ -665,9 +786,16 @@
665 786 void * __init __alloc_bootmem(unsigned long size, unsigned long align,
666 787 unsigned long goal)
667 788 {
668   - return ___alloc_bootmem(size, align, goal, 0);
  789 + unsigned long limit = 0;
  790 +
  791 +#ifdef CONFIG_NO_BOOTMEM
  792 + limit = -1UL;
  793 +#endif
  794 +
  795 + return ___alloc_bootmem(size, align, goal, limit);
669 796 }
670 797  
  798 +#ifndef CONFIG_NO_BOOTMEM
671 799 static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
672 800 unsigned long size, unsigned long align,
673 801 unsigned long goal, unsigned long limit)
... ... @@ -684,6 +812,7 @@
684 812  
685 813 return ___alloc_bootmem(size, align, goal, limit);
686 814 }
  815 +#endif
687 816  
688 817 /**
689 818 * __alloc_bootmem_node - allocate boot memory from a specific node
690 819  
691 820  
... ... @@ -706,9 +835,48 @@
706 835 if (WARN_ON_ONCE(slab_is_available()))
707 836 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
708 837  
  838 +#ifdef CONFIG_NO_BOOTMEM
  839 + return __alloc_memory_core_early(pgdat->node_id, size, align,
  840 + goal, -1ULL);
  841 +#else
709 842 return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
  843 +#endif
710 844 }
711 845  
  846 +void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
  847 + unsigned long align, unsigned long goal)
  848 +{
  849 +#ifdef MAX_DMA32_PFN
  850 + unsigned long end_pfn;
  851 +
  852 + if (WARN_ON_ONCE(slab_is_available()))
  853 + return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
  854 +
  855 + /* update goal according ...MAX_DMA32_PFN */
  856 + end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;
  857 +
  858 + if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
  859 + (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
  860 + void *ptr;
  861 + unsigned long new_goal;
  862 +
  863 + new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
  864 +#ifdef CONFIG_NO_BOOTMEM
  865 + ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
  866 + new_goal, -1ULL);
  867 +#else
  868 + ptr = alloc_bootmem_core(pgdat->bdata, size, align,
  869 + new_goal, 0);
  870 +#endif
  871 + if (ptr)
  872 + return ptr;
  873 + }
  874 +#endif
  875 +
  876 + return __alloc_bootmem_node(pgdat, size, align, goal);
  877 +
  878 +}
  879 +
712 880 #ifdef CONFIG_SPARSEMEM
713 881 /**
714 882 * alloc_bootmem_section - allocate boot memory from a specific section
... ... @@ -720,6 +888,16 @@
720 888 void * __init alloc_bootmem_section(unsigned long size,
721 889 unsigned long section_nr)
722 890 {
  891 +#ifdef CONFIG_NO_BOOTMEM
  892 + unsigned long pfn, goal, limit;
  893 +
  894 + pfn = section_nr_to_pfn(section_nr);
  895 + goal = pfn << PAGE_SHIFT;
  896 + limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
  897 +
  898 + return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
  899 + SMP_CACHE_BYTES, goal, limit);
  900 +#else
723 901 bootmem_data_t *bdata;
724 902 unsigned long pfn, goal, limit;
725 903  
... ... @@ -729,6 +907,7 @@
729 907 bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
730 908  
731 909 return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
  910 +#endif
732 911 }
733 912 #endif
734 913  
735 914  
... ... @@ -740,11 +919,16 @@
740 919 if (WARN_ON_ONCE(slab_is_available()))
741 920 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
742 921  
  922 +#ifdef CONFIG_NO_BOOTMEM
  923 + ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
  924 + goal, -1ULL);
  925 +#else
743 926 ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
744 927 if (ptr)
745 928 return ptr;
746 929  
747 930 ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
  931 +#endif
748 932 if (ptr)
749 933 return ptr;
750 934  
751 935  
... ... @@ -795,7 +979,12 @@
795 979 if (WARN_ON_ONCE(slab_is_available()))
796 980 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
797 981  
  982 +#ifdef CONFIG_NO_BOOTMEM
  983 + return __alloc_memory_core_early(pgdat->node_id, size, align,
  984 + goal, ARCH_LOW_ADDRESS_LIMIT);
  985 +#else
798 986 return ___alloc_bootmem_node(pgdat->bdata, size, align,
799 987 goal, ARCH_LOW_ADDRESS_LIMIT);
  988 +#endif
800 989 }
... ... @@ -3435,6 +3435,59 @@
3435 3435 }
3436 3436 }
3437 3437  
  3438 +int __init add_from_early_node_map(struct range *range, int az,
  3439 + int nr_range, int nid)
  3440 +{
  3441 + int i;
  3442 + u64 start, end;
  3443 +
  3444 + /* need to go over early_node_map to find out good range for node */
  3445 + for_each_active_range_index_in_nid(i, nid) {
  3446 + start = early_node_map[i].start_pfn;
  3447 + end = early_node_map[i].end_pfn;
  3448 + nr_range = add_range(range, az, nr_range, start, end);
  3449 + }
  3450 + return nr_range;
  3451 +}
  3452 +
  3453 +void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
  3454 + u64 goal, u64 limit)
  3455 +{
  3456 + int i;
  3457 + void *ptr;
  3458 +
  3459 + /* need to go over early_node_map to find out good range for node */
  3460 + for_each_active_range_index_in_nid(i, nid) {
  3461 + u64 addr;
  3462 + u64 ei_start, ei_last;
  3463 +
  3464 + ei_last = early_node_map[i].end_pfn;
  3465 + ei_last <<= PAGE_SHIFT;
  3466 + ei_start = early_node_map[i].start_pfn;
  3467 + ei_start <<= PAGE_SHIFT;
  3468 + addr = find_early_area(ei_start, ei_last,
  3469 + goal, limit, size, align);
  3470 +
  3471 + if (addr == -1ULL)
  3472 + continue;
  3473 +
  3474 +#if 0
  3475 + printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
  3476 + nid,
  3477 + ei_start, ei_last, goal, limit, size,
  3478 + align, addr);
  3479 +#endif
  3480 +
  3481 + ptr = phys_to_virt(addr);
  3482 + memset(ptr, 0, size);
  3483 + reserve_early_without_check(addr, addr + size, "BOOTMEM");
  3484 + return ptr;
  3485 + }
  3486 +
  3487 + return NULL;
  3488 +}
  3489 +
  3490 +
3438 3491 void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3439 3492 {
3440 3493 int i;
... ... @@ -4467,7 +4520,11 @@
4467 4520 }
4468 4521  
4469 4522 #ifndef CONFIG_NEED_MULTIPLE_NODES
4470   -struct pglist_data __refdata contig_page_data = { .bdata = &bootmem_node_data[0] };
  4523 +struct pglist_data __refdata contig_page_data = {
  4524 +#ifndef CONFIG_NO_BOOTMEM
  4525 + .bdata = &bootmem_node_data[0]
  4526 +#endif
  4527 + };
4471 4528 EXPORT_SYMBOL(contig_page_data);
4472 4529 #endif
4473 4530  
... ... @@ -1929,7 +1929,10 @@
1929 1929 }
1930 1930 /* copy and return the unused part */
1931 1931 memcpy(ptr, __per_cpu_load, ai->static_size);
  1932 +#ifndef CONFIG_NO_BOOTMEM
  1933 + /* fix partial free ! */
1932 1934 free_fn(ptr + size_sum, ai->unit_size - size_sum);
  1935 +#endif
1933 1936 }
1934 1937 }
1935 1938  
... ... @@ -40,7 +40,7 @@
40 40 unsigned long align,
41 41 unsigned long goal)
42 42 {
43   - return __alloc_bootmem_node(NODE_DATA(node), size, align, goal);
  43 + return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);
44 44 }
45 45  
46 46