Commit 46723bfa540f0a1e494476a1734d03626a0bd1e0

Authored by Yasuaki Ishimatsu
Committed by Linus Torvalds
1 parent 24d335ca36

memory-hotplug: implement register_page_bootmem_info_section of sparse-vmemmap

For removing memmap region of sparse-vmemmap which is allocated bootmem,
memmap region of sparse-vmemmap needs to be registered by
get_page_bootmem().  So the patch searches pages of virtual mapping and
registers the pages by get_page_bootmem().

NOTE: register_page_bootmem_memmap() is not implemented for ia64,
      ppc, s390, and sparc.  So introduce CONFIG_HAVE_BOOTMEM_INFO_NODE
      and revert register_page_bootmem_info_node() when platform doesn't
      support it.

      It's implemented by adding a new Kconfig option named
      CONFIG_HAVE_BOOTMEM_INFO_NODE, which will be automatically selected
      by memory-hotplug feature fully supported archs(currently only on
      x86_64).

      Since we have 2 config options called MEMORY_HOTPLUG and
      MEMORY_HOTREMOVE used for memory hot-add and hot-remove separately,
      and codes in function register_page_bootmem_info_node() are only
      used for collecting infomation for hot-remove, so reside it under
      MEMORY_HOTREMOVE.

      Besides page_isolation.c selected by MEMORY_ISOLATION under
      MEMORY_HOTPLUG is also such case, move it too.

[mhocko@suse.cz: put register_page_bootmem_memmap inside CONFIG_MEMORY_HOTPLUG_SPARSE]
[linfeng@cn.fujitsu.com: introduce CONFIG_HAVE_BOOTMEM_INFO_NODE and revert register_page_bootmem_info_node()]
[mhocko@suse.cz: remove the arch specific functions without any implementation]
[linfeng@cn.fujitsu.com: mm/Kconfig: move auto selects from MEMORY_HOTPLUG to MEMORY_HOTREMOVE as needed]
[rientjes@google.com: fix defined but not used warning]
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Reviewed-by: Wu Jianguo <wujianguo@huawei.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Jianguo Wu <wujianguo@huawei.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Lin Feng <linfeng@cn.fujitsu.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 8 changed files with 111 additions and 13 deletions Side-by-side Diff

arch/ia64/mm/discontig.c
... ... @@ -822,5 +822,6 @@
822 822 {
823 823 return vmemmap_populate_basepages(start_page, size, node);
824 824 }
  825 +
825 826 #endif
arch/powerpc/mm/init_64.c
... ... @@ -297,5 +297,6 @@
297 297  
298 298 return 0;
299 299 }
  300 +
300 301 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
arch/sparc/mm/init_64.c
... ... @@ -2235,6 +2235,7 @@
2235 2235 node_start = 0;
2236 2236 }
2237 2237 }
  2238 +
2238 2239 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
2239 2240  
2240 2241 static void prot_init_common(unsigned long page_none,
arch/x86/mm/init_64.c
... ... @@ -1034,6 +1034,66 @@
1034 1034 return 0;
1035 1035 }
1036 1036  
  1037 +#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
  1038 +void register_page_bootmem_memmap(unsigned long section_nr,
  1039 + struct page *start_page, unsigned long size)
  1040 +{
  1041 + unsigned long addr = (unsigned long)start_page;
  1042 + unsigned long end = (unsigned long)(start_page + size);
  1043 + unsigned long next;
  1044 + pgd_t *pgd;
  1045 + pud_t *pud;
  1046 + pmd_t *pmd;
  1047 + unsigned int nr_pages;
  1048 + struct page *page;
  1049 +
  1050 + for (; addr < end; addr = next) {
  1051 + pte_t *pte = NULL;
  1052 +
  1053 + pgd = pgd_offset_k(addr);
  1054 + if (pgd_none(*pgd)) {
  1055 + next = (addr + PAGE_SIZE) & PAGE_MASK;
  1056 + continue;
  1057 + }
  1058 + get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
  1059 +
  1060 + pud = pud_offset(pgd, addr);
  1061 + if (pud_none(*pud)) {
  1062 + next = (addr + PAGE_SIZE) & PAGE_MASK;
  1063 + continue;
  1064 + }
  1065 + get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
  1066 +
  1067 + if (!cpu_has_pse) {
  1068 + next = (addr + PAGE_SIZE) & PAGE_MASK;
  1069 + pmd = pmd_offset(pud, addr);
  1070 + if (pmd_none(*pmd))
  1071 + continue;
  1072 + get_page_bootmem(section_nr, pmd_page(*pmd),
  1073 + MIX_SECTION_INFO);
  1074 +
  1075 + pte = pte_offset_kernel(pmd, addr);
  1076 + if (pte_none(*pte))
  1077 + continue;
  1078 + get_page_bootmem(section_nr, pte_page(*pte),
  1079 + SECTION_INFO);
  1080 + } else {
  1081 + next = pmd_addr_end(addr, end);
  1082 +
  1083 + pmd = pmd_offset(pud, addr);
  1084 + if (pmd_none(*pmd))
  1085 + continue;
  1086 +
  1087 + nr_pages = 1 << (get_order(PMD_SIZE));
  1088 + page = pmd_page(*pmd);
  1089 + while (nr_pages--)
  1090 + get_page_bootmem(section_nr, page++,
  1091 + SECTION_INFO);
  1092 + }
  1093 + }
  1094 +}
  1095 +#endif
  1096 +
1037 1097 void __meminit vmemmap_populate_print_last(void)
1038 1098 {
1039 1099 if (p_start) {
include/linux/memory_hotplug.h
... ... @@ -174,17 +174,16 @@
174 174 #endif /* CONFIG_NUMA */
175 175 #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
176 176  
177   -#ifdef CONFIG_SPARSEMEM_VMEMMAP
  177 +#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
  178 +extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
  179 +#else
178 180 static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
179 181 {
180 182 }
181   -static inline void put_page_bootmem(struct page *page)
182   -{
183   -}
184   -#else
185   -extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
186   -extern void put_page_bootmem(struct page *page);
187 183 #endif
  184 +extern void put_page_bootmem(struct page *page);
  185 +extern void get_page_bootmem(unsigned long ingo, struct page *page,
  186 + unsigned long type);
188 187  
189 188 /*
190 189 * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug
... ... @@ -1718,7 +1718,8 @@
1718 1718 unsigned long pages, int node);
1719 1719 int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
1720 1720 void vmemmap_populate_print_last(void);
1721   -
  1721 +void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
  1722 + unsigned long size);
1722 1723  
1723 1724 enum mf_flags {
1724 1725 MF_COUNT_INCREASED = 1 << 0,
... ... @@ -162,10 +162,16 @@
162 162 Say Y here if you want to hotplug a whole node.
163 163 Say N here if you want kernel to use memory on all nodes evenly.
164 164  
  165 +#
  166 +# Only be set on architectures that have completely implemented memory hotplug
  167 +# feature. If you are not sure, don't touch it.
  168 +#
  169 +config HAVE_BOOTMEM_INFO_NODE
  170 + def_bool n
  171 +
165 172 # eventually, we can have this option just 'select SPARSEMEM'
166 173 config MEMORY_HOTPLUG
167 174 bool "Allow for memory hot-add"
168   - select MEMORY_ISOLATION
169 175 depends on SPARSEMEM || X86_64_ACPI_NUMA
170 176 depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG
171 177 depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390)
... ... @@ -176,6 +182,8 @@
176 182  
177 183 config MEMORY_HOTREMOVE
178 184 bool "Allow for memory hot remove"
  185 + select MEMORY_ISOLATION
  186 + select HAVE_BOOTMEM_INFO_NODE if X86_64
179 187 depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
180 188 depends on MIGRATION
181 189  
... ... @@ -91,9 +91,8 @@
91 91 }
92 92  
93 93 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
94   -#ifndef CONFIG_SPARSEMEM_VMEMMAP
95   -static void get_page_bootmem(unsigned long info, struct page *page,
96   - unsigned long type)
  94 +void get_page_bootmem(unsigned long info, struct page *page,
  95 + unsigned long type)
97 96 {
98 97 page->lru.next = (struct list_head *) type;
99 98 SetPagePrivate(page);
... ... @@ -128,6 +127,8 @@
128 127  
129 128 }
130 129  
  130 +#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
  131 +#ifndef CONFIG_SPARSEMEM_VMEMMAP
131 132 static void register_page_bootmem_info_section(unsigned long start_pfn)
132 133 {
133 134 unsigned long *usemap, mapsize, section_nr, i;
134 135  
... ... @@ -161,7 +162,33 @@
161 162 get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
162 163  
163 164 }
  165 +#else /* CONFIG_SPARSEMEM_VMEMMAP */
  166 +static void register_page_bootmem_info_section(unsigned long start_pfn)
  167 +{
  168 + unsigned long *usemap, mapsize, section_nr, i;
  169 + struct mem_section *ms;
  170 + struct page *page, *memmap;
164 171  
  172 + if (!pfn_valid(start_pfn))
  173 + return;
  174 +
  175 + section_nr = pfn_to_section_nr(start_pfn);
  176 + ms = __nr_to_section(section_nr);
  177 +
  178 + memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
  179 +
  180 + register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
  181 +
  182 + usemap = __nr_to_section(section_nr)->pageblock_flags;
  183 + page = virt_to_page(usemap);
  184 +
  185 + mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
  186 +
  187 + for (i = 0; i < mapsize; i++, page++)
  188 + get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
  189 +}
  190 +#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
  191 +
165 192 void register_page_bootmem_info_node(struct pglist_data *pgdat)
166 193 {
167 194 unsigned long i, pfn, end_pfn, nr_pages;
... ... @@ -203,7 +230,7 @@
203 230 register_page_bootmem_info_section(pfn);
204 231 }
205 232 }
206   -#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
  233 +#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
207 234  
208 235 static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
209 236 unsigned long end_pfn)