Commit 46723bfa540f0a1e494476a1734d03626a0bd1e0
Committed by
Linus Torvalds
1 parent
24d335ca36
Exists in
master
and in
20 other branches
memory-hotplug: implement register_page_bootmem_info_section of sparse-vmemmap
For removing memmap region of sparse-vmemmap which is allocated bootmem, memmap region of sparse-vmemmap needs to be registered by get_page_bootmem(). So the patch searches pages of virtual mapping and registers the pages by get_page_bootmem(). NOTE: register_page_bootmem_memmap() is not implemented for ia64, ppc, s390, and sparc. So introduce CONFIG_HAVE_BOOTMEM_INFO_NODE and revert register_page_bootmem_info_node() when platform doesn't support it. It's implemented by adding a new Kconfig option named CONFIG_HAVE_BOOTMEM_INFO_NODE, which will be automatically selected by memory-hotplug feature fully supported archs(currently only on x86_64). Since we have 2 config options called MEMORY_HOTPLUG and MEMORY_HOTREMOVE used for memory hot-add and hot-remove separately, and codes in function register_page_bootmem_info_node() are only used for collecting infomation for hot-remove, so reside it under MEMORY_HOTREMOVE. Besides page_isolation.c selected by MEMORY_ISOLATION under MEMORY_HOTPLUG is also such case, move it too. [mhocko@suse.cz: put register_page_bootmem_memmap inside CONFIG_MEMORY_HOTPLUG_SPARSE] [linfeng@cn.fujitsu.com: introduce CONFIG_HAVE_BOOTMEM_INFO_NODE and revert register_page_bootmem_info_node()] [mhocko@suse.cz: remove the arch specific functions without any implementation] [linfeng@cn.fujitsu.com: mm/Kconfig: move auto selects from MEMORY_HOTPLUG to MEMORY_HOTREMOVE as needed] [rientjes@google.com: fix defined but not used warning] Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com> Reviewed-by: Wu Jianguo <wujianguo@huawei.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Jiang Liu <jiang.liu@huawei.com> Cc: Jianguo Wu <wujianguo@huawei.com> Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Signed-off-by: Michal Hocko <mhocko@suse.cz> Signed-off-by: Lin Feng <linfeng@cn.fujitsu.com> Signed-off-by: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 8 changed files with 111 additions and 13 deletions Side-by-side Diff
arch/ia64/mm/discontig.c
arch/powerpc/mm/init_64.c
arch/sparc/mm/init_64.c
arch/x86/mm/init_64.c
... | ... | @@ -1034,6 +1034,66 @@ |
1034 | 1034 | return 0; |
1035 | 1035 | } |
1036 | 1036 | |
1037 | +#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) | |
1038 | +void register_page_bootmem_memmap(unsigned long section_nr, | |
1039 | + struct page *start_page, unsigned long size) | |
1040 | +{ | |
1041 | + unsigned long addr = (unsigned long)start_page; | |
1042 | + unsigned long end = (unsigned long)(start_page + size); | |
1043 | + unsigned long next; | |
1044 | + pgd_t *pgd; | |
1045 | + pud_t *pud; | |
1046 | + pmd_t *pmd; | |
1047 | + unsigned int nr_pages; | |
1048 | + struct page *page; | |
1049 | + | |
1050 | + for (; addr < end; addr = next) { | |
1051 | + pte_t *pte = NULL; | |
1052 | + | |
1053 | + pgd = pgd_offset_k(addr); | |
1054 | + if (pgd_none(*pgd)) { | |
1055 | + next = (addr + PAGE_SIZE) & PAGE_MASK; | |
1056 | + continue; | |
1057 | + } | |
1058 | + get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO); | |
1059 | + | |
1060 | + pud = pud_offset(pgd, addr); | |
1061 | + if (pud_none(*pud)) { | |
1062 | + next = (addr + PAGE_SIZE) & PAGE_MASK; | |
1063 | + continue; | |
1064 | + } | |
1065 | + get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO); | |
1066 | + | |
1067 | + if (!cpu_has_pse) { | |
1068 | + next = (addr + PAGE_SIZE) & PAGE_MASK; | |
1069 | + pmd = pmd_offset(pud, addr); | |
1070 | + if (pmd_none(*pmd)) | |
1071 | + continue; | |
1072 | + get_page_bootmem(section_nr, pmd_page(*pmd), | |
1073 | + MIX_SECTION_INFO); | |
1074 | + | |
1075 | + pte = pte_offset_kernel(pmd, addr); | |
1076 | + if (pte_none(*pte)) | |
1077 | + continue; | |
1078 | + get_page_bootmem(section_nr, pte_page(*pte), | |
1079 | + SECTION_INFO); | |
1080 | + } else { | |
1081 | + next = pmd_addr_end(addr, end); | |
1082 | + | |
1083 | + pmd = pmd_offset(pud, addr); | |
1084 | + if (pmd_none(*pmd)) | |
1085 | + continue; | |
1086 | + | |
1087 | + nr_pages = 1 << (get_order(PMD_SIZE)); | |
1088 | + page = pmd_page(*pmd); | |
1089 | + while (nr_pages--) | |
1090 | + get_page_bootmem(section_nr, page++, | |
1091 | + SECTION_INFO); | |
1092 | + } | |
1093 | + } | |
1094 | +} | |
1095 | +#endif | |
1096 | + | |
1037 | 1097 | void __meminit vmemmap_populate_print_last(void) |
1038 | 1098 | { |
1039 | 1099 | if (p_start) { |
include/linux/memory_hotplug.h
... | ... | @@ -174,17 +174,16 @@ |
174 | 174 | #endif /* CONFIG_NUMA */ |
175 | 175 | #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ |
176 | 176 | |
177 | -#ifdef CONFIG_SPARSEMEM_VMEMMAP | |
177 | +#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE | |
178 | +extern void register_page_bootmem_info_node(struct pglist_data *pgdat); | |
179 | +#else | |
178 | 180 | static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) |
179 | 181 | { |
180 | 182 | } |
181 | -static inline void put_page_bootmem(struct page *page) | |
182 | -{ | |
183 | -} | |
184 | -#else | |
185 | -extern void register_page_bootmem_info_node(struct pglist_data *pgdat); | |
186 | -extern void put_page_bootmem(struct page *page); | |
187 | 183 | #endif |
184 | +extern void put_page_bootmem(struct page *page); | |
185 | +extern void get_page_bootmem(unsigned long ingo, struct page *page, | |
186 | + unsigned long type); | |
188 | 187 | |
189 | 188 | /* |
190 | 189 | * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug |
include/linux/mm.h
... | ... | @@ -1718,7 +1718,8 @@ |
1718 | 1718 | unsigned long pages, int node); |
1719 | 1719 | int vmemmap_populate(struct page *start_page, unsigned long pages, int node); |
1720 | 1720 | void vmemmap_populate_print_last(void); |
1721 | - | |
1721 | +void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, | |
1722 | + unsigned long size); | |
1722 | 1723 | |
1723 | 1724 | enum mf_flags { |
1724 | 1725 | MF_COUNT_INCREASED = 1 << 0, |
mm/Kconfig
... | ... | @@ -162,10 +162,16 @@ |
162 | 162 | Say Y here if you want to hotplug a whole node. |
163 | 163 | Say N here if you want kernel to use memory on all nodes evenly. |
164 | 164 | |
165 | +# | |
166 | +# Only be set on architectures that have completely implemented memory hotplug | |
167 | +# feature. If you are not sure, don't touch it. | |
168 | +# | |
169 | +config HAVE_BOOTMEM_INFO_NODE | |
170 | + def_bool n | |
171 | + | |
165 | 172 | # eventually, we can have this option just 'select SPARSEMEM' |
166 | 173 | config MEMORY_HOTPLUG |
167 | 174 | bool "Allow for memory hot-add" |
168 | - select MEMORY_ISOLATION | |
169 | 175 | depends on SPARSEMEM || X86_64_ACPI_NUMA |
170 | 176 | depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG |
171 | 177 | depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390) |
... | ... | @@ -176,6 +182,8 @@ |
176 | 182 | |
177 | 183 | config MEMORY_HOTREMOVE |
178 | 184 | bool "Allow for memory hot remove" |
185 | + select MEMORY_ISOLATION | |
186 | + select HAVE_BOOTMEM_INFO_NODE if X86_64 | |
179 | 187 | depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE |
180 | 188 | depends on MIGRATION |
181 | 189 |
mm/memory_hotplug.c
... | ... | @@ -91,9 +91,8 @@ |
91 | 91 | } |
92 | 92 | |
93 | 93 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
94 | -#ifndef CONFIG_SPARSEMEM_VMEMMAP | |
95 | -static void get_page_bootmem(unsigned long info, struct page *page, | |
96 | - unsigned long type) | |
94 | +void get_page_bootmem(unsigned long info, struct page *page, | |
95 | + unsigned long type) | |
97 | 96 | { |
98 | 97 | page->lru.next = (struct list_head *) type; |
99 | 98 | SetPagePrivate(page); |
... | ... | @@ -128,6 +127,8 @@ |
128 | 127 | |
129 | 128 | } |
130 | 129 | |
130 | +#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE | |
131 | +#ifndef CONFIG_SPARSEMEM_VMEMMAP | |
131 | 132 | static void register_page_bootmem_info_section(unsigned long start_pfn) |
132 | 133 | { |
133 | 134 | unsigned long *usemap, mapsize, section_nr, i; |
134 | 135 | |
... | ... | @@ -161,7 +162,33 @@ |
161 | 162 | get_page_bootmem(section_nr, page, MIX_SECTION_INFO); |
162 | 163 | |
163 | 164 | } |
165 | +#else /* CONFIG_SPARSEMEM_VMEMMAP */ | |
166 | +static void register_page_bootmem_info_section(unsigned long start_pfn) | |
167 | +{ | |
168 | + unsigned long *usemap, mapsize, section_nr, i; | |
169 | + struct mem_section *ms; | |
170 | + struct page *page, *memmap; | |
164 | 171 | |
172 | + if (!pfn_valid(start_pfn)) | |
173 | + return; | |
174 | + | |
175 | + section_nr = pfn_to_section_nr(start_pfn); | |
176 | + ms = __nr_to_section(section_nr); | |
177 | + | |
178 | + memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); | |
179 | + | |
180 | + register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION); | |
181 | + | |
182 | + usemap = __nr_to_section(section_nr)->pageblock_flags; | |
183 | + page = virt_to_page(usemap); | |
184 | + | |
185 | + mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; | |
186 | + | |
187 | + for (i = 0; i < mapsize; i++, page++) | |
188 | + get_page_bootmem(section_nr, page, MIX_SECTION_INFO); | |
189 | +} | |
190 | +#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ | |
191 | + | |
165 | 192 | void register_page_bootmem_info_node(struct pglist_data *pgdat) |
166 | 193 | { |
167 | 194 | unsigned long i, pfn, end_pfn, nr_pages; |
... | ... | @@ -203,7 +230,7 @@ |
203 | 230 | register_page_bootmem_info_section(pfn); |
204 | 231 | } |
205 | 232 | } |
206 | -#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ | |
233 | +#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ | |
207 | 234 | |
208 | 235 | static void grow_zone_span(struct zone *zone, unsigned long start_pfn, |
209 | 236 | unsigned long end_pfn) |