Commit 89528127fa5f4aca0483203c87c945555d057770

Authored by Paul Gortmaker
Committed by Greg Kroah-Hartman
1 parent 9ff1f838e9

powerpc: fix compile fail in hugetlb cmdline parsing

Commit 9fb48c744ba6a4bf58b666f4e6fdac3008ea1bd4

    "params: add 3rd arg to option handler callback signature"

added an extra arg to the function, but didn't catch all the use
cases needing it, causing this compile fail in mpc85xx_defconfig:

 arch/powerpc/mm/hugetlbpage.c:316:4: error: passing argument 7 of
 'parse_args' from incompatible pointer type [-Werror]

 include/linux/moduleparam.h:317:12: note: expected
	 'int (*)(char *, char *, const char *)' but argument is of type
	 'int (*)(char *, char *)'

This function has no need to printk out the "doing" value, so
just add the arg as an "unused".

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jim Cromie <jim.cromie@gmail.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Becky Bruce <beckyb@kernel.crashing.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Showing 1 changed file with 2 additions and 1 deletions Inline Diff

arch/powerpc/mm/hugetlbpage.c
1 /* 1 /*
2 * PPC Huge TLB Page Support for Kernel. 2 * PPC Huge TLB Page Support for Kernel.
3 * 3 *
4 * Copyright (C) 2003 David Gibson, IBM Corporation. 4 * Copyright (C) 2003 David Gibson, IBM Corporation.
5 * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor 5 * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
6 * 6 *
7 * Based on the IA-32 version: 7 * Based on the IA-32 version:
8 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> 8 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
9 */ 9 */
10 10
11 #include <linux/mm.h> 11 #include <linux/mm.h>
12 #include <linux/io.h> 12 #include <linux/io.h>
13 #include <linux/slab.h> 13 #include <linux/slab.h>
14 #include <linux/hugetlb.h> 14 #include <linux/hugetlb.h>
15 #include <linux/export.h> 15 #include <linux/export.h>
16 #include <linux/of_fdt.h> 16 #include <linux/of_fdt.h>
17 #include <linux/memblock.h> 17 #include <linux/memblock.h>
18 #include <linux/bootmem.h> 18 #include <linux/bootmem.h>
19 #include <linux/moduleparam.h> 19 #include <linux/moduleparam.h>
20 #include <asm/pgtable.h> 20 #include <asm/pgtable.h>
21 #include <asm/pgalloc.h> 21 #include <asm/pgalloc.h>
22 #include <asm/tlb.h> 22 #include <asm/tlb.h>
23 #include <asm/setup.h> 23 #include <asm/setup.h>
24 24
25 #define PAGE_SHIFT_64K 16 25 #define PAGE_SHIFT_64K 16
26 #define PAGE_SHIFT_16M 24 26 #define PAGE_SHIFT_16M 24
27 #define PAGE_SHIFT_16G 34 27 #define PAGE_SHIFT_16G 34
28 28
29 unsigned int HPAGE_SHIFT; 29 unsigned int HPAGE_SHIFT;
30 30
31 /* 31 /*
32 * Tracks gpages after the device tree is scanned and before the 32 * Tracks gpages after the device tree is scanned and before the
33 * huge_boot_pages list is ready. On non-Freescale implementations, this is 33 * huge_boot_pages list is ready. On non-Freescale implementations, this is
34 * just used to track 16G pages and so is a single array. FSL-based 34 * just used to track 16G pages and so is a single array. FSL-based
35 * implementations may have more than one gpage size, so we need multiple 35 * implementations may have more than one gpage size, so we need multiple
36 * arrays 36 * arrays
37 */ 37 */
38 #ifdef CONFIG_PPC_FSL_BOOK3E 38 #ifdef CONFIG_PPC_FSL_BOOK3E
39 #define MAX_NUMBER_GPAGES 128 39 #define MAX_NUMBER_GPAGES 128
40 struct psize_gpages { 40 struct psize_gpages {
41 u64 gpage_list[MAX_NUMBER_GPAGES]; 41 u64 gpage_list[MAX_NUMBER_GPAGES];
42 unsigned int nr_gpages; 42 unsigned int nr_gpages;
43 }; 43 };
44 static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT]; 44 static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT];
45 #else 45 #else
46 #define MAX_NUMBER_GPAGES 1024 46 #define MAX_NUMBER_GPAGES 1024
47 static u64 gpage_freearray[MAX_NUMBER_GPAGES]; 47 static u64 gpage_freearray[MAX_NUMBER_GPAGES];
48 static unsigned nr_gpages; 48 static unsigned nr_gpages;
49 #endif 49 #endif
50 50
51 static inline int shift_to_mmu_psize(unsigned int shift) 51 static inline int shift_to_mmu_psize(unsigned int shift)
52 { 52 {
53 int psize; 53 int psize;
54 54
55 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) 55 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
56 if (mmu_psize_defs[psize].shift == shift) 56 if (mmu_psize_defs[psize].shift == shift)
57 return psize; 57 return psize;
58 return -1; 58 return -1;
59 } 59 }
60 60
61 static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) 61 static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
62 { 62 {
63 if (mmu_psize_defs[mmu_psize].shift) 63 if (mmu_psize_defs[mmu_psize].shift)
64 return mmu_psize_defs[mmu_psize].shift; 64 return mmu_psize_defs[mmu_psize].shift;
65 BUG(); 65 BUG();
66 } 66 }
67 67
68 #define hugepd_none(hpd) ((hpd).pd == 0) 68 #define hugepd_none(hpd) ((hpd).pd == 0)
69 69
70 pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) 70 pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
71 { 71 {
72 pgd_t *pg; 72 pgd_t *pg;
73 pud_t *pu; 73 pud_t *pu;
74 pmd_t *pm; 74 pmd_t *pm;
75 hugepd_t *hpdp = NULL; 75 hugepd_t *hpdp = NULL;
76 unsigned pdshift = PGDIR_SHIFT; 76 unsigned pdshift = PGDIR_SHIFT;
77 77
78 if (shift) 78 if (shift)
79 *shift = 0; 79 *shift = 0;
80 80
81 pg = pgdir + pgd_index(ea); 81 pg = pgdir + pgd_index(ea);
82 if (is_hugepd(pg)) { 82 if (is_hugepd(pg)) {
83 hpdp = (hugepd_t *)pg; 83 hpdp = (hugepd_t *)pg;
84 } else if (!pgd_none(*pg)) { 84 } else if (!pgd_none(*pg)) {
85 pdshift = PUD_SHIFT; 85 pdshift = PUD_SHIFT;
86 pu = pud_offset(pg, ea); 86 pu = pud_offset(pg, ea);
87 if (is_hugepd(pu)) 87 if (is_hugepd(pu))
88 hpdp = (hugepd_t *)pu; 88 hpdp = (hugepd_t *)pu;
89 else if (!pud_none(*pu)) { 89 else if (!pud_none(*pu)) {
90 pdshift = PMD_SHIFT; 90 pdshift = PMD_SHIFT;
91 pm = pmd_offset(pu, ea); 91 pm = pmd_offset(pu, ea);
92 if (is_hugepd(pm)) 92 if (is_hugepd(pm))
93 hpdp = (hugepd_t *)pm; 93 hpdp = (hugepd_t *)pm;
94 else if (!pmd_none(*pm)) { 94 else if (!pmd_none(*pm)) {
95 return pte_offset_kernel(pm, ea); 95 return pte_offset_kernel(pm, ea);
96 } 96 }
97 } 97 }
98 } 98 }
99 99
100 if (!hpdp) 100 if (!hpdp)
101 return NULL; 101 return NULL;
102 102
103 if (shift) 103 if (shift)
104 *shift = hugepd_shift(*hpdp); 104 *shift = hugepd_shift(*hpdp);
105 return hugepte_offset(hpdp, ea, pdshift); 105 return hugepte_offset(hpdp, ea, pdshift);
106 } 106 }
107 EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); 107 EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
108 108
109 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 109 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
110 { 110 {
111 return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); 111 return find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
112 } 112 }
113 113
114 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 114 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
115 unsigned long address, unsigned pdshift, unsigned pshift) 115 unsigned long address, unsigned pdshift, unsigned pshift)
116 { 116 {
117 struct kmem_cache *cachep; 117 struct kmem_cache *cachep;
118 pte_t *new; 118 pte_t *new;
119 119
120 #ifdef CONFIG_PPC_FSL_BOOK3E 120 #ifdef CONFIG_PPC_FSL_BOOK3E
121 int i; 121 int i;
122 int num_hugepd = 1 << (pshift - pdshift); 122 int num_hugepd = 1 << (pshift - pdshift);
123 cachep = hugepte_cache; 123 cachep = hugepte_cache;
124 #else 124 #else
125 cachep = PGT_CACHE(pdshift - pshift); 125 cachep = PGT_CACHE(pdshift - pshift);
126 #endif 126 #endif
127 127
128 new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT); 128 new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
129 129
130 BUG_ON(pshift > HUGEPD_SHIFT_MASK); 130 BUG_ON(pshift > HUGEPD_SHIFT_MASK);
131 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); 131 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
132 132
133 if (! new) 133 if (! new)
134 return -ENOMEM; 134 return -ENOMEM;
135 135
136 spin_lock(&mm->page_table_lock); 136 spin_lock(&mm->page_table_lock);
137 #ifdef CONFIG_PPC_FSL_BOOK3E 137 #ifdef CONFIG_PPC_FSL_BOOK3E
138 /* 138 /*
139 * We have multiple higher-level entries that point to the same 139 * We have multiple higher-level entries that point to the same
140 * actual pte location. Fill in each as we go and backtrack on error. 140 * actual pte location. Fill in each as we go and backtrack on error.
141 * We need all of these so the DTLB pgtable walk code can find the 141 * We need all of these so the DTLB pgtable walk code can find the
142 * right higher-level entry without knowing if it's a hugepage or not. 142 * right higher-level entry without knowing if it's a hugepage or not.
143 */ 143 */
144 for (i = 0; i < num_hugepd; i++, hpdp++) { 144 for (i = 0; i < num_hugepd; i++, hpdp++) {
145 if (unlikely(!hugepd_none(*hpdp))) 145 if (unlikely(!hugepd_none(*hpdp)))
146 break; 146 break;
147 else 147 else
148 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; 148 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
149 } 149 }
150 /* If we bailed from the for loop early, an error occurred, clean up */ 150 /* If we bailed from the for loop early, an error occurred, clean up */
151 if (i < num_hugepd) { 151 if (i < num_hugepd) {
152 for (i = i - 1 ; i >= 0; i--, hpdp--) 152 for (i = i - 1 ; i >= 0; i--, hpdp--)
153 hpdp->pd = 0; 153 hpdp->pd = 0;
154 kmem_cache_free(cachep, new); 154 kmem_cache_free(cachep, new);
155 } 155 }
156 #else 156 #else
157 if (!hugepd_none(*hpdp)) 157 if (!hugepd_none(*hpdp))
158 kmem_cache_free(cachep, new); 158 kmem_cache_free(cachep, new);
159 else 159 else
160 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; 160 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
161 #endif 161 #endif
162 spin_unlock(&mm->page_table_lock); 162 spin_unlock(&mm->page_table_lock);
163 return 0; 163 return 0;
164 } 164 }
165 165
166 /* 166 /*
167 * These macros define how to determine which level of the page table holds 167 * These macros define how to determine which level of the page table holds
168 * the hpdp. 168 * the hpdp.
169 */ 169 */
170 #ifdef CONFIG_PPC_FSL_BOOK3E 170 #ifdef CONFIG_PPC_FSL_BOOK3E
171 #define HUGEPD_PGD_SHIFT PGDIR_SHIFT 171 #define HUGEPD_PGD_SHIFT PGDIR_SHIFT
172 #define HUGEPD_PUD_SHIFT PUD_SHIFT 172 #define HUGEPD_PUD_SHIFT PUD_SHIFT
173 #else 173 #else
174 #define HUGEPD_PGD_SHIFT PUD_SHIFT 174 #define HUGEPD_PGD_SHIFT PUD_SHIFT
175 #define HUGEPD_PUD_SHIFT PMD_SHIFT 175 #define HUGEPD_PUD_SHIFT PMD_SHIFT
176 #endif 176 #endif
177 177
178 pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) 178 pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
179 { 179 {
180 pgd_t *pg; 180 pgd_t *pg;
181 pud_t *pu; 181 pud_t *pu;
182 pmd_t *pm; 182 pmd_t *pm;
183 hugepd_t *hpdp = NULL; 183 hugepd_t *hpdp = NULL;
184 unsigned pshift = __ffs(sz); 184 unsigned pshift = __ffs(sz);
185 unsigned pdshift = PGDIR_SHIFT; 185 unsigned pdshift = PGDIR_SHIFT;
186 186
187 addr &= ~(sz-1); 187 addr &= ~(sz-1);
188 188
189 pg = pgd_offset(mm, addr); 189 pg = pgd_offset(mm, addr);
190 190
191 if (pshift >= HUGEPD_PGD_SHIFT) { 191 if (pshift >= HUGEPD_PGD_SHIFT) {
192 hpdp = (hugepd_t *)pg; 192 hpdp = (hugepd_t *)pg;
193 } else { 193 } else {
194 pdshift = PUD_SHIFT; 194 pdshift = PUD_SHIFT;
195 pu = pud_alloc(mm, pg, addr); 195 pu = pud_alloc(mm, pg, addr);
196 if (pshift >= HUGEPD_PUD_SHIFT) { 196 if (pshift >= HUGEPD_PUD_SHIFT) {
197 hpdp = (hugepd_t *)pu; 197 hpdp = (hugepd_t *)pu;
198 } else { 198 } else {
199 pdshift = PMD_SHIFT; 199 pdshift = PMD_SHIFT;
200 pm = pmd_alloc(mm, pu, addr); 200 pm = pmd_alloc(mm, pu, addr);
201 hpdp = (hugepd_t *)pm; 201 hpdp = (hugepd_t *)pm;
202 } 202 }
203 } 203 }
204 204
205 if (!hpdp) 205 if (!hpdp)
206 return NULL; 206 return NULL;
207 207
208 BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); 208 BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
209 209
210 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) 210 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
211 return NULL; 211 return NULL;
212 212
213 return hugepte_offset(hpdp, addr, pdshift); 213 return hugepte_offset(hpdp, addr, pdshift);
214 } 214 }
215 215
216 #ifdef CONFIG_PPC_FSL_BOOK3E 216 #ifdef CONFIG_PPC_FSL_BOOK3E
217 /* Build list of addresses of gigantic pages. This function is used in early 217 /* Build list of addresses of gigantic pages. This function is used in early
218 * boot before the buddy or bootmem allocator is setup. 218 * boot before the buddy or bootmem allocator is setup.
219 */ 219 */
220 void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) 220 void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
221 { 221 {
222 unsigned int idx = shift_to_mmu_psize(__ffs(page_size)); 222 unsigned int idx = shift_to_mmu_psize(__ffs(page_size));
223 int i; 223 int i;
224 224
225 if (addr == 0) 225 if (addr == 0)
226 return; 226 return;
227 227
228 gpage_freearray[idx].nr_gpages = number_of_pages; 228 gpage_freearray[idx].nr_gpages = number_of_pages;
229 229
230 for (i = 0; i < number_of_pages; i++) { 230 for (i = 0; i < number_of_pages; i++) {
231 gpage_freearray[idx].gpage_list[i] = addr; 231 gpage_freearray[idx].gpage_list[i] = addr;
232 addr += page_size; 232 addr += page_size;
233 } 233 }
234 } 234 }
235 235
236 /* 236 /*
237 * Moves the gigantic page addresses from the temporary list to the 237 * Moves the gigantic page addresses from the temporary list to the
238 * huge_boot_pages list. 238 * huge_boot_pages list.
239 */ 239 */
240 int alloc_bootmem_huge_page(struct hstate *hstate) 240 int alloc_bootmem_huge_page(struct hstate *hstate)
241 { 241 {
242 struct huge_bootmem_page *m; 242 struct huge_bootmem_page *m;
243 int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT); 243 int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT);
244 int nr_gpages = gpage_freearray[idx].nr_gpages; 244 int nr_gpages = gpage_freearray[idx].nr_gpages;
245 245
246 if (nr_gpages == 0) 246 if (nr_gpages == 0)
247 return 0; 247 return 0;
248 248
249 #ifdef CONFIG_HIGHMEM 249 #ifdef CONFIG_HIGHMEM
250 /* 250 /*
251 * If gpages can be in highmem we can't use the trick of storing the 251 * If gpages can be in highmem we can't use the trick of storing the
252 * data structure in the page; allocate space for this 252 * data structure in the page; allocate space for this
253 */ 253 */
254 m = alloc_bootmem(sizeof(struct huge_bootmem_page)); 254 m = alloc_bootmem(sizeof(struct huge_bootmem_page));
255 m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; 255 m->phys = gpage_freearray[idx].gpage_list[--nr_gpages];
256 #else 256 #else
257 m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); 257 m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]);
258 #endif 258 #endif
259 259
260 list_add(&m->list, &huge_boot_pages); 260 list_add(&m->list, &huge_boot_pages);
261 gpage_freearray[idx].nr_gpages = nr_gpages; 261 gpage_freearray[idx].nr_gpages = nr_gpages;
262 gpage_freearray[idx].gpage_list[nr_gpages] = 0; 262 gpage_freearray[idx].gpage_list[nr_gpages] = 0;
263 m->hstate = hstate; 263 m->hstate = hstate;
264 264
265 return 1; 265 return 1;
266 } 266 }
267 /* 267 /*
268 * Scan the command line hugepagesz= options for gigantic pages; store those in 268 * Scan the command line hugepagesz= options for gigantic pages; store those in
269 * a list that we use to allocate the memory once all options are parsed. 269 * a list that we use to allocate the memory once all options are parsed.
270 */ 270 */
271 271
272 unsigned long gpage_npages[MMU_PAGE_COUNT]; 272 unsigned long gpage_npages[MMU_PAGE_COUNT];
273 273
274 static int __init do_gpage_early_setup(char *param, char *val) 274 static int __init do_gpage_early_setup(char *param, char *val,
275 const char *unused)
275 { 276 {
276 static phys_addr_t size; 277 static phys_addr_t size;
277 unsigned long npages; 278 unsigned long npages;
278 279
279 /* 280 /*
280 * The hugepagesz and hugepages cmdline options are interleaved. We 281 * The hugepagesz and hugepages cmdline options are interleaved. We
281 * use the size variable to keep track of whether or not this was done 282 * use the size variable to keep track of whether or not this was done
282 * properly and skip over instances where it is incorrect. Other 283 * properly and skip over instances where it is incorrect. Other
283 * command-line parsing code will issue warnings, so we don't need to. 284 * command-line parsing code will issue warnings, so we don't need to.
284 * 285 *
285 */ 286 */
286 if ((strcmp(param, "default_hugepagesz") == 0) || 287 if ((strcmp(param, "default_hugepagesz") == 0) ||
287 (strcmp(param, "hugepagesz") == 0)) { 288 (strcmp(param, "hugepagesz") == 0)) {
288 size = memparse(val, NULL); 289 size = memparse(val, NULL);
289 } else if (strcmp(param, "hugepages") == 0) { 290 } else if (strcmp(param, "hugepages") == 0) {
290 if (size != 0) { 291 if (size != 0) {
291 if (sscanf(val, "%lu", &npages) <= 0) 292 if (sscanf(val, "%lu", &npages) <= 0)
292 npages = 0; 293 npages = 0;
293 gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; 294 gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages;
294 size = 0; 295 size = 0;
295 } 296 }
296 } 297 }
297 return 0; 298 return 0;
298 } 299 }
299 300
300 301
301 /* 302 /*
302 * This function allocates physical space for pages that are larger than the 303 * This function allocates physical space for pages that are larger than the
303 * buddy allocator can handle. We want to allocate these in highmem because 304 * buddy allocator can handle. We want to allocate these in highmem because
304 * the amount of lowmem is limited. This means that this function MUST be 305 * the amount of lowmem is limited. This means that this function MUST be
305 * called before lowmem_end_addr is set up in MMU_init() in order for the lmb 306 * called before lowmem_end_addr is set up in MMU_init() in order for the lmb
306 * allocate to grab highmem. 307 * allocate to grab highmem.
307 */ 308 */
308 void __init reserve_hugetlb_gpages(void) 309 void __init reserve_hugetlb_gpages(void)
309 { 310 {
310 static __initdata char cmdline[COMMAND_LINE_SIZE]; 311 static __initdata char cmdline[COMMAND_LINE_SIZE];
311 phys_addr_t size, base; 312 phys_addr_t size, base;
312 int i; 313 int i;
313 314
314 strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE); 315 strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
315 parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0, 316 parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0,
316 &do_gpage_early_setup); 317 &do_gpage_early_setup);
317 318
318 /* 319 /*
319 * Walk gpage list in reverse, allocating larger page sizes first. 320 * Walk gpage list in reverse, allocating larger page sizes first.
320 * Skip over unsupported sizes, or sizes that have 0 gpages allocated. 321 * Skip over unsupported sizes, or sizes that have 0 gpages allocated.
321 * When we reach the point in the list where pages are no longer 322 * When we reach the point in the list where pages are no longer
322 * considered gpages, we're done. 323 * considered gpages, we're done.
323 */ 324 */
324 for (i = MMU_PAGE_COUNT-1; i >= 0; i--) { 325 for (i = MMU_PAGE_COUNT-1; i >= 0; i--) {
325 if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0) 326 if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0)
326 continue; 327 continue;
327 else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT)) 328 else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT))
328 break; 329 break;
329 330
330 size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i)); 331 size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i));
331 base = memblock_alloc_base(size * gpage_npages[i], size, 332 base = memblock_alloc_base(size * gpage_npages[i], size,
332 MEMBLOCK_ALLOC_ANYWHERE); 333 MEMBLOCK_ALLOC_ANYWHERE);
333 add_gpage(base, size, gpage_npages[i]); 334 add_gpage(base, size, gpage_npages[i]);
334 } 335 }
335 } 336 }
336 337
337 #else /* !PPC_FSL_BOOK3E */ 338 #else /* !PPC_FSL_BOOK3E */
338 339
339 /* Build list of addresses of gigantic pages. This function is used in early 340 /* Build list of addresses of gigantic pages. This function is used in early
340 * boot before the buddy or bootmem allocator is setup. 341 * boot before the buddy or bootmem allocator is setup.
341 */ 342 */
342 void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) 343 void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
343 { 344 {
344 if (!addr) 345 if (!addr)
345 return; 346 return;
346 while (number_of_pages > 0) { 347 while (number_of_pages > 0) {
347 gpage_freearray[nr_gpages] = addr; 348 gpage_freearray[nr_gpages] = addr;
348 nr_gpages++; 349 nr_gpages++;
349 number_of_pages--; 350 number_of_pages--;
350 addr += page_size; 351 addr += page_size;
351 } 352 }
352 } 353 }
353 354
354 /* Moves the gigantic page addresses from the temporary list to the 355 /* Moves the gigantic page addresses from the temporary list to the
355 * huge_boot_pages list. 356 * huge_boot_pages list.
356 */ 357 */
357 int alloc_bootmem_huge_page(struct hstate *hstate) 358 int alloc_bootmem_huge_page(struct hstate *hstate)
358 { 359 {
359 struct huge_bootmem_page *m; 360 struct huge_bootmem_page *m;
360 if (nr_gpages == 0) 361 if (nr_gpages == 0)
361 return 0; 362 return 0;
362 m = phys_to_virt(gpage_freearray[--nr_gpages]); 363 m = phys_to_virt(gpage_freearray[--nr_gpages]);
363 gpage_freearray[nr_gpages] = 0; 364 gpage_freearray[nr_gpages] = 0;
364 list_add(&m->list, &huge_boot_pages); 365 list_add(&m->list, &huge_boot_pages);
365 m->hstate = hstate; 366 m->hstate = hstate;
366 return 1; 367 return 1;
367 } 368 }
368 #endif 369 #endif
369 370
370 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 371 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
371 { 372 {
372 return 0; 373 return 0;
373 } 374 }
374 375
375 #ifdef CONFIG_PPC_FSL_BOOK3E 376 #ifdef CONFIG_PPC_FSL_BOOK3E
376 #define HUGEPD_FREELIST_SIZE \ 377 #define HUGEPD_FREELIST_SIZE \
377 ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) 378 ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
378 379
379 struct hugepd_freelist { 380 struct hugepd_freelist {
380 struct rcu_head rcu; 381 struct rcu_head rcu;
381 unsigned int index; 382 unsigned int index;
382 void *ptes[0]; 383 void *ptes[0];
383 }; 384 };
384 385
385 static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur); 386 static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);
386 387
387 static void hugepd_free_rcu_callback(struct rcu_head *head) 388 static void hugepd_free_rcu_callback(struct rcu_head *head)
388 { 389 {
389 struct hugepd_freelist *batch = 390 struct hugepd_freelist *batch =
390 container_of(head, struct hugepd_freelist, rcu); 391 container_of(head, struct hugepd_freelist, rcu);
391 unsigned int i; 392 unsigned int i;
392 393
393 for (i = 0; i < batch->index; i++) 394 for (i = 0; i < batch->index; i++)
394 kmem_cache_free(hugepte_cache, batch->ptes[i]); 395 kmem_cache_free(hugepte_cache, batch->ptes[i]);
395 396
396 free_page((unsigned long)batch); 397 free_page((unsigned long)batch);
397 } 398 }
398 399
399 static void hugepd_free(struct mmu_gather *tlb, void *hugepte) 400 static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
400 { 401 {
401 struct hugepd_freelist **batchp; 402 struct hugepd_freelist **batchp;
402 403
403 batchp = &__get_cpu_var(hugepd_freelist_cur); 404 batchp = &__get_cpu_var(hugepd_freelist_cur);
404 405
405 if (atomic_read(&tlb->mm->mm_users) < 2 || 406 if (atomic_read(&tlb->mm->mm_users) < 2 ||
406 cpumask_equal(mm_cpumask(tlb->mm), 407 cpumask_equal(mm_cpumask(tlb->mm),
407 cpumask_of(smp_processor_id()))) { 408 cpumask_of(smp_processor_id()))) {
408 kmem_cache_free(hugepte_cache, hugepte); 409 kmem_cache_free(hugepte_cache, hugepte);
409 return; 410 return;
410 } 411 }
411 412
412 if (*batchp == NULL) { 413 if (*batchp == NULL) {
413 *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC); 414 *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC);
414 (*batchp)->index = 0; 415 (*batchp)->index = 0;
415 } 416 }
416 417
417 (*batchp)->ptes[(*batchp)->index++] = hugepte; 418 (*batchp)->ptes[(*batchp)->index++] = hugepte;
418 if ((*batchp)->index == HUGEPD_FREELIST_SIZE) { 419 if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
419 call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback); 420 call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
420 *batchp = NULL; 421 *batchp = NULL;
421 } 422 }
422 } 423 }
423 #endif 424 #endif
424 425
425 static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, 426 static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
426 unsigned long start, unsigned long end, 427 unsigned long start, unsigned long end,
427 unsigned long floor, unsigned long ceiling) 428 unsigned long floor, unsigned long ceiling)
428 { 429 {
429 pte_t *hugepte = hugepd_page(*hpdp); 430 pte_t *hugepte = hugepd_page(*hpdp);
430 int i; 431 int i;
431 432
432 unsigned long pdmask = ~((1UL << pdshift) - 1); 433 unsigned long pdmask = ~((1UL << pdshift) - 1);
433 unsigned int num_hugepd = 1; 434 unsigned int num_hugepd = 1;
434 435
435 #ifdef CONFIG_PPC_FSL_BOOK3E 436 #ifdef CONFIG_PPC_FSL_BOOK3E
436 /* Note: On fsl the hpdp may be the first of several */ 437 /* Note: On fsl the hpdp may be the first of several */
437 num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift)); 438 num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift));
438 #else 439 #else
439 unsigned int shift = hugepd_shift(*hpdp); 440 unsigned int shift = hugepd_shift(*hpdp);
440 #endif 441 #endif
441 442
442 start &= pdmask; 443 start &= pdmask;
443 if (start < floor) 444 if (start < floor)
444 return; 445 return;
445 if (ceiling) { 446 if (ceiling) {
446 ceiling &= pdmask; 447 ceiling &= pdmask;
447 if (! ceiling) 448 if (! ceiling)
448 return; 449 return;
449 } 450 }
450 if (end - 1 > ceiling - 1) 451 if (end - 1 > ceiling - 1)
451 return; 452 return;
452 453
453 for (i = 0; i < num_hugepd; i++, hpdp++) 454 for (i = 0; i < num_hugepd; i++, hpdp++)
454 hpdp->pd = 0; 455 hpdp->pd = 0;
455 456
456 tlb->need_flush = 1; 457 tlb->need_flush = 1;
457 458
458 #ifdef CONFIG_PPC_FSL_BOOK3E 459 #ifdef CONFIG_PPC_FSL_BOOK3E
459 hugepd_free(tlb, hugepte); 460 hugepd_free(tlb, hugepte);
460 #else 461 #else
461 pgtable_free_tlb(tlb, hugepte, pdshift - shift); 462 pgtable_free_tlb(tlb, hugepte, pdshift - shift);
462 #endif 463 #endif
463 } 464 }
464 465
465 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 466 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
466 unsigned long addr, unsigned long end, 467 unsigned long addr, unsigned long end,
467 unsigned long floor, unsigned long ceiling) 468 unsigned long floor, unsigned long ceiling)
468 { 469 {
469 pmd_t *pmd; 470 pmd_t *pmd;
470 unsigned long next; 471 unsigned long next;
471 unsigned long start; 472 unsigned long start;
472 473
473 start = addr; 474 start = addr;
474 do { 475 do {
475 pmd = pmd_offset(pud, addr); 476 pmd = pmd_offset(pud, addr);
476 next = pmd_addr_end(addr, end); 477 next = pmd_addr_end(addr, end);
477 if (pmd_none(*pmd)) 478 if (pmd_none(*pmd))
478 continue; 479 continue;
479 #ifdef CONFIG_PPC_FSL_BOOK3E 480 #ifdef CONFIG_PPC_FSL_BOOK3E
480 /* 481 /*
481 * Increment next by the size of the huge mapping since 482 * Increment next by the size of the huge mapping since
482 * there may be more than one entry at this level for a 483 * there may be more than one entry at this level for a
483 * single hugepage, but all of them point to 484 * single hugepage, but all of them point to
484 * the same kmem cache that holds the hugepte. 485 * the same kmem cache that holds the hugepte.
485 */ 486 */
486 next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd)); 487 next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
487 #endif 488 #endif
488 free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT, 489 free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
489 addr, next, floor, ceiling); 490 addr, next, floor, ceiling);
490 } while (addr = next, addr != end); 491 } while (addr = next, addr != end);
491 492
492 start &= PUD_MASK; 493 start &= PUD_MASK;
493 if (start < floor) 494 if (start < floor)
494 return; 495 return;
495 if (ceiling) { 496 if (ceiling) {
496 ceiling &= PUD_MASK; 497 ceiling &= PUD_MASK;
497 if (!ceiling) 498 if (!ceiling)
498 return; 499 return;
499 } 500 }
500 if (end - 1 > ceiling - 1) 501 if (end - 1 > ceiling - 1)
501 return; 502 return;
502 503
503 pmd = pmd_offset(pud, start); 504 pmd = pmd_offset(pud, start);
504 pud_clear(pud); 505 pud_clear(pud);
505 pmd_free_tlb(tlb, pmd, start); 506 pmd_free_tlb(tlb, pmd, start);
506 } 507 }
507 508
508 static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, 509 static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
509 unsigned long addr, unsigned long end, 510 unsigned long addr, unsigned long end,
510 unsigned long floor, unsigned long ceiling) 511 unsigned long floor, unsigned long ceiling)
511 { 512 {
512 pud_t *pud; 513 pud_t *pud;
513 unsigned long next; 514 unsigned long next;
514 unsigned long start; 515 unsigned long start;
515 516
516 start = addr; 517 start = addr;
517 do { 518 do {
518 pud = pud_offset(pgd, addr); 519 pud = pud_offset(pgd, addr);
519 next = pud_addr_end(addr, end); 520 next = pud_addr_end(addr, end);
520 if (!is_hugepd(pud)) { 521 if (!is_hugepd(pud)) {
521 if (pud_none_or_clear_bad(pud)) 522 if (pud_none_or_clear_bad(pud))
522 continue; 523 continue;
523 hugetlb_free_pmd_range(tlb, pud, addr, next, floor, 524 hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
524 ceiling); 525 ceiling);
525 } else { 526 } else {
526 #ifdef CONFIG_PPC_FSL_BOOK3E 527 #ifdef CONFIG_PPC_FSL_BOOK3E
527 /* 528 /*
528 * Increment next by the size of the huge mapping since 529 * Increment next by the size of the huge mapping since
529 * there may be more than one entry at this level for a 530 * there may be more than one entry at this level for a
530 * single hugepage, but all of them point to 531 * single hugepage, but all of them point to
531 * the same kmem cache that holds the hugepte. 532 * the same kmem cache that holds the hugepte.
532 */ 533 */
533 next = addr + (1 << hugepd_shift(*(hugepd_t *)pud)); 534 next = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
534 #endif 535 #endif
535 free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT, 536 free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
536 addr, next, floor, ceiling); 537 addr, next, floor, ceiling);
537 } 538 }
538 } while (addr = next, addr != end); 539 } while (addr = next, addr != end);
539 540
540 start &= PGDIR_MASK; 541 start &= PGDIR_MASK;
541 if (start < floor) 542 if (start < floor)
542 return; 543 return;
543 if (ceiling) { 544 if (ceiling) {
544 ceiling &= PGDIR_MASK; 545 ceiling &= PGDIR_MASK;
545 if (!ceiling) 546 if (!ceiling)
546 return; 547 return;
547 } 548 }
548 if (end - 1 > ceiling - 1) 549 if (end - 1 > ceiling - 1)
549 return; 550 return;
550 551
551 pud = pud_offset(pgd, start); 552 pud = pud_offset(pgd, start);
552 pgd_clear(pgd); 553 pgd_clear(pgd);
553 pud_free_tlb(tlb, pud, start); 554 pud_free_tlb(tlb, pud, start);
554 } 555 }
555 556
556 /* 557 /*
557 * This function frees user-level page tables of a process. 558 * This function frees user-level page tables of a process.
558 * 559 *
559 * Must be called with pagetable lock held. 560 * Must be called with pagetable lock held.
560 */ 561 */
561 void hugetlb_free_pgd_range(struct mmu_gather *tlb, 562 void hugetlb_free_pgd_range(struct mmu_gather *tlb,
562 unsigned long addr, unsigned long end, 563 unsigned long addr, unsigned long end,
563 unsigned long floor, unsigned long ceiling) 564 unsigned long floor, unsigned long ceiling)
564 { 565 {
565 pgd_t *pgd; 566 pgd_t *pgd;
566 unsigned long next; 567 unsigned long next;
567 568
568 /* 569 /*
569 * Because there are a number of different possible pagetable 570 * Because there are a number of different possible pagetable
570 * layouts for hugepage ranges, we limit knowledge of how 571 * layouts for hugepage ranges, we limit knowledge of how
571 * things should be laid out to the allocation path 572 * things should be laid out to the allocation path
572 * (huge_pte_alloc(), above). Everything else works out the 573 * (huge_pte_alloc(), above). Everything else works out the
573 * structure as it goes from information in the hugepd 574 * structure as it goes from information in the hugepd
574 * pointers. That means that we can't here use the 575 * pointers. That means that we can't here use the
575 * optimization used in the normal page free_pgd_range(), of 576 * optimization used in the normal page free_pgd_range(), of
576 * checking whether we're actually covering a large enough 577 * checking whether we're actually covering a large enough
577 * range to have to do anything at the top level of the walk 578 * range to have to do anything at the top level of the walk
578 * instead of at the bottom. 579 * instead of at the bottom.
579 * 580 *
580 * To make sense of this, you should probably go read the big 581 * To make sense of this, you should probably go read the big
581 * block comment at the top of the normal free_pgd_range(), 582 * block comment at the top of the normal free_pgd_range(),
582 * too. 583 * too.
583 */ 584 */
584 585
585 do { 586 do {
586 next = pgd_addr_end(addr, end); 587 next = pgd_addr_end(addr, end);
587 pgd = pgd_offset(tlb->mm, addr); 588 pgd = pgd_offset(tlb->mm, addr);
588 if (!is_hugepd(pgd)) { 589 if (!is_hugepd(pgd)) {
589 if (pgd_none_or_clear_bad(pgd)) 590 if (pgd_none_or_clear_bad(pgd))
590 continue; 591 continue;
591 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); 592 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
592 } else { 593 } else {
593 #ifdef CONFIG_PPC_FSL_BOOK3E 594 #ifdef CONFIG_PPC_FSL_BOOK3E
594 /* 595 /*
595 * Increment next by the size of the huge mapping since 596 * Increment next by the size of the huge mapping since
596 * there may be more than one entry at the pgd level 597 * there may be more than one entry at the pgd level
597 * for a single hugepage, but all of them point to the 598 * for a single hugepage, but all of them point to the
598 * same kmem cache that holds the hugepte. 599 * same kmem cache that holds the hugepte.
599 */ 600 */
600 next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); 601 next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
601 #endif 602 #endif
602 free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, 603 free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
603 addr, next, floor, ceiling); 604 addr, next, floor, ceiling);
604 } 605 }
605 } while (addr = next, addr != end); 606 } while (addr = next, addr != end);
606 } 607 }
607 608
608 struct page * 609 struct page *
609 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 610 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
610 { 611 {
611 pte_t *ptep; 612 pte_t *ptep;
612 struct page *page; 613 struct page *page;
613 unsigned shift; 614 unsigned shift;
614 unsigned long mask; 615 unsigned long mask;
615 616
616 ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); 617 ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
617 618
618 /* Verify it is a huge page else bail. */ 619 /* Verify it is a huge page else bail. */
619 if (!ptep || !shift) 620 if (!ptep || !shift)
620 return ERR_PTR(-EINVAL); 621 return ERR_PTR(-EINVAL);
621 622
622 mask = (1UL << shift) - 1; 623 mask = (1UL << shift) - 1;
623 page = pte_page(*ptep); 624 page = pte_page(*ptep);
624 if (page) 625 if (page)
625 page += (address & mask) / PAGE_SIZE; 626 page += (address & mask) / PAGE_SIZE;
626 627
627 return page; 628 return page;
628 } 629 }
629 630
630 int pmd_huge(pmd_t pmd) 631 int pmd_huge(pmd_t pmd)
631 { 632 {
632 return 0; 633 return 0;
633 } 634 }
634 635
635 int pud_huge(pud_t pud) 636 int pud_huge(pud_t pud)
636 { 637 {
637 return 0; 638 return 0;
638 } 639 }
639 640
640 struct page * 641 struct page *
641 follow_huge_pmd(struct mm_struct *mm, unsigned long address, 642 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
642 pmd_t *pmd, int write) 643 pmd_t *pmd, int write)
643 { 644 {
644 BUG(); 645 BUG();
645 return NULL; 646 return NULL;
646 } 647 }
647 648
648 static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, 649 static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
649 unsigned long end, int write, struct page **pages, int *nr) 650 unsigned long end, int write, struct page **pages, int *nr)
650 { 651 {
651 unsigned long mask; 652 unsigned long mask;
652 unsigned long pte_end; 653 unsigned long pte_end;
653 struct page *head, *page, *tail; 654 struct page *head, *page, *tail;
654 pte_t pte; 655 pte_t pte;
655 int refs; 656 int refs;
656 657
657 pte_end = (addr + sz) & ~(sz-1); 658 pte_end = (addr + sz) & ~(sz-1);
658 if (pte_end < end) 659 if (pte_end < end)
659 end = pte_end; 660 end = pte_end;
660 661
661 pte = *ptep; 662 pte = *ptep;
662 mask = _PAGE_PRESENT | _PAGE_USER; 663 mask = _PAGE_PRESENT | _PAGE_USER;
663 if (write) 664 if (write)
664 mask |= _PAGE_RW; 665 mask |= _PAGE_RW;
665 666
666 if ((pte_val(pte) & mask) != mask) 667 if ((pte_val(pte) & mask) != mask)
667 return 0; 668 return 0;
668 669
669 /* hugepages are never "special" */ 670 /* hugepages are never "special" */
670 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 671 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
671 672
672 refs = 0; 673 refs = 0;
673 head = pte_page(pte); 674 head = pte_page(pte);
674 675
675 page = head + ((addr & (sz-1)) >> PAGE_SHIFT); 676 page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
676 tail = page; 677 tail = page;
677 do { 678 do {
678 VM_BUG_ON(compound_head(page) != head); 679 VM_BUG_ON(compound_head(page) != head);
679 pages[*nr] = page; 680 pages[*nr] = page;
680 (*nr)++; 681 (*nr)++;
681 page++; 682 page++;
682 refs++; 683 refs++;
683 } while (addr += PAGE_SIZE, addr != end); 684 } while (addr += PAGE_SIZE, addr != end);
684 685
685 if (!page_cache_add_speculative(head, refs)) { 686 if (!page_cache_add_speculative(head, refs)) {
686 *nr -= refs; 687 *nr -= refs;
687 return 0; 688 return 0;
688 } 689 }
689 690
690 if (unlikely(pte_val(pte) != pte_val(*ptep))) { 691 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
691 /* Could be optimized better */ 692 /* Could be optimized better */
692 *nr -= refs; 693 *nr -= refs;
693 while (refs--) 694 while (refs--)
694 put_page(head); 695 put_page(head);
695 return 0; 696 return 0;
696 } 697 }
697 698
698 /* 699 /*
699 * Any tail page need their mapcount reference taken before we 700 * Any tail page need their mapcount reference taken before we
700 * return. 701 * return.
701 */ 702 */
702 while (refs--) { 703 while (refs--) {
703 if (PageTail(tail)) 704 if (PageTail(tail))
704 get_huge_page_tail(tail); 705 get_huge_page_tail(tail);
705 tail++; 706 tail++;
706 } 707 }
707 708
708 return 1; 709 return 1;
709 } 710 }
710 711
711 static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, 712 static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
712 unsigned long sz) 713 unsigned long sz)
713 { 714 {
714 unsigned long __boundary = (addr + sz) & ~(sz-1); 715 unsigned long __boundary = (addr + sz) & ~(sz-1);
715 return (__boundary - 1 < end - 1) ? __boundary : end; 716 return (__boundary - 1 < end - 1) ? __boundary : end;
716 } 717 }
717 718
718 int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, 719 int gup_hugepd(hugepd_t *hugepd, unsigned pdshift,
719 unsigned long addr, unsigned long end, 720 unsigned long addr, unsigned long end,
720 int write, struct page **pages, int *nr) 721 int write, struct page **pages, int *nr)
721 { 722 {
722 pte_t *ptep; 723 pte_t *ptep;
723 unsigned long sz = 1UL << hugepd_shift(*hugepd); 724 unsigned long sz = 1UL << hugepd_shift(*hugepd);
724 unsigned long next; 725 unsigned long next;
725 726
726 ptep = hugepte_offset(hugepd, addr, pdshift); 727 ptep = hugepte_offset(hugepd, addr, pdshift);
727 do { 728 do {
728 next = hugepte_addr_end(addr, end, sz); 729 next = hugepte_addr_end(addr, end, sz);
729 if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) 730 if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
730 return 0; 731 return 0;
731 } while (ptep++, addr = next, addr != end); 732 } while (ptep++, addr = next, addr != end);
732 733
733 return 1; 734 return 1;
734 } 735 }
735 736
736 #ifdef CONFIG_PPC_MM_SLICES 737 #ifdef CONFIG_PPC_MM_SLICES
737 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 738 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
738 unsigned long len, unsigned long pgoff, 739 unsigned long len, unsigned long pgoff,
739 unsigned long flags) 740 unsigned long flags)
740 { 741 {
741 struct hstate *hstate = hstate_file(file); 742 struct hstate *hstate = hstate_file(file);
742 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); 743 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
743 744
744 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); 745 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
745 } 746 }
746 #endif 747 #endif
747 748
748 unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 749 unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
749 { 750 {
750 #ifdef CONFIG_PPC_MM_SLICES 751 #ifdef CONFIG_PPC_MM_SLICES
751 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); 752 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
752 753
753 return 1UL << mmu_psize_to_shift(psize); 754 return 1UL << mmu_psize_to_shift(psize);
754 #else 755 #else
755 if (!is_vm_hugetlb_page(vma)) 756 if (!is_vm_hugetlb_page(vma))
756 return PAGE_SIZE; 757 return PAGE_SIZE;
757 758
758 return huge_page_size(hstate_vma(vma)); 759 return huge_page_size(hstate_vma(vma));
759 #endif 760 #endif
760 } 761 }
761 762
762 static inline bool is_power_of_4(unsigned long x) 763 static inline bool is_power_of_4(unsigned long x)
763 { 764 {
764 if (is_power_of_2(x)) 765 if (is_power_of_2(x))
765 return (__ilog2(x) % 2) ? false : true; 766 return (__ilog2(x) % 2) ? false : true;
766 return false; 767 return false;
767 } 768 }
768 769
769 static int __init add_huge_page_size(unsigned long long size) 770 static int __init add_huge_page_size(unsigned long long size)
770 { 771 {
771 int shift = __ffs(size); 772 int shift = __ffs(size);
772 int mmu_psize; 773 int mmu_psize;
773 774
774 /* Check that it is a page size supported by the hardware and 775 /* Check that it is a page size supported by the hardware and
775 * that it fits within pagetable and slice limits. */ 776 * that it fits within pagetable and slice limits. */
776 #ifdef CONFIG_PPC_FSL_BOOK3E 777 #ifdef CONFIG_PPC_FSL_BOOK3E
777 if ((size < PAGE_SIZE) || !is_power_of_4(size)) 778 if ((size < PAGE_SIZE) || !is_power_of_4(size))
778 return -EINVAL; 779 return -EINVAL;
779 #else 780 #else
780 if (!is_power_of_2(size) 781 if (!is_power_of_2(size)
781 || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) 782 || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
782 return -EINVAL; 783 return -EINVAL;
783 #endif 784 #endif
784 785
785 if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) 786 if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
786 return -EINVAL; 787 return -EINVAL;
787 788
788 #ifdef CONFIG_SPU_FS_64K_LS 789 #ifdef CONFIG_SPU_FS_64K_LS
789 /* Disable support for 64K huge pages when 64K SPU local store 790 /* Disable support for 64K huge pages when 64K SPU local store
790 * support is enabled as the current implementation conflicts. 791 * support is enabled as the current implementation conflicts.
791 */ 792 */
792 if (shift == PAGE_SHIFT_64K) 793 if (shift == PAGE_SHIFT_64K)
793 return -EINVAL; 794 return -EINVAL;
794 #endif /* CONFIG_SPU_FS_64K_LS */ 795 #endif /* CONFIG_SPU_FS_64K_LS */
795 796
796 BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); 797 BUG_ON(mmu_psize_defs[mmu_psize].shift != shift);
797 798
798 /* Return if huge page size has already been setup */ 799 /* Return if huge page size has already been setup */
799 if (size_to_hstate(size)) 800 if (size_to_hstate(size))
800 return 0; 801 return 0;
801 802
802 hugetlb_add_hstate(shift - PAGE_SHIFT); 803 hugetlb_add_hstate(shift - PAGE_SHIFT);
803 804
804 return 0; 805 return 0;
805 } 806 }
806 807
807 static int __init hugepage_setup_sz(char *str) 808 static int __init hugepage_setup_sz(char *str)
808 { 809 {
809 unsigned long long size; 810 unsigned long long size;
810 811
811 size = memparse(str, &str); 812 size = memparse(str, &str);
812 813
813 if (add_huge_page_size(size) != 0) 814 if (add_huge_page_size(size) != 0)
814 printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); 815 printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
815 816
816 return 1; 817 return 1;
817 } 818 }
818 __setup("hugepagesz=", hugepage_setup_sz); 819 __setup("hugepagesz=", hugepage_setup_sz);
819 820
820 #ifdef CONFIG_PPC_FSL_BOOK3E 821 #ifdef CONFIG_PPC_FSL_BOOK3E
821 struct kmem_cache *hugepte_cache; 822 struct kmem_cache *hugepte_cache;
822 static int __init hugetlbpage_init(void) 823 static int __init hugetlbpage_init(void)
823 { 824 {
824 int psize; 825 int psize;
825 826
826 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 827 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
827 unsigned shift; 828 unsigned shift;
828 829
829 if (!mmu_psize_defs[psize].shift) 830 if (!mmu_psize_defs[psize].shift)
830 continue; 831 continue;
831 832
832 shift = mmu_psize_to_shift(psize); 833 shift = mmu_psize_to_shift(psize);
833 834
834 /* Don't treat normal page sizes as huge... */ 835 /* Don't treat normal page sizes as huge... */
835 if (shift != PAGE_SHIFT) 836 if (shift != PAGE_SHIFT)
836 if (add_huge_page_size(1ULL << shift) < 0) 837 if (add_huge_page_size(1ULL << shift) < 0)
837 continue; 838 continue;
838 } 839 }
839 840
840 /* 841 /*
841 * Create a kmem cache for hugeptes. The bottom bits in the pte have 842 * Create a kmem cache for hugeptes. The bottom bits in the pte have
842 * size information encoded in them, so align them to allow this 843 * size information encoded in them, so align them to allow this
843 */ 844 */
844 hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t), 845 hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t),
845 HUGEPD_SHIFT_MASK + 1, 0, NULL); 846 HUGEPD_SHIFT_MASK + 1, 0, NULL);
846 if (hugepte_cache == NULL) 847 if (hugepte_cache == NULL)
847 panic("%s: Unable to create kmem cache for hugeptes\n", 848 panic("%s: Unable to create kmem cache for hugeptes\n",
848 __func__); 849 __func__);
849 850
850 /* Default hpage size = 4M */ 851 /* Default hpage size = 4M */
851 if (mmu_psize_defs[MMU_PAGE_4M].shift) 852 if (mmu_psize_defs[MMU_PAGE_4M].shift)
852 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; 853 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
853 else 854 else
854 panic("%s: Unable to set default huge page size\n", __func__); 855 panic("%s: Unable to set default huge page size\n", __func__);
855 856
856 857
857 return 0; 858 return 0;
858 } 859 }
859 #else 860 #else
860 static int __init hugetlbpage_init(void) 861 static int __init hugetlbpage_init(void)
861 { 862 {
862 int psize; 863 int psize;
863 864
864 if (!mmu_has_feature(MMU_FTR_16M_PAGE)) 865 if (!mmu_has_feature(MMU_FTR_16M_PAGE))
865 return -ENODEV; 866 return -ENODEV;
866 867
867 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 868 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
868 unsigned shift; 869 unsigned shift;
869 unsigned pdshift; 870 unsigned pdshift;
870 871
871 if (!mmu_psize_defs[psize].shift) 872 if (!mmu_psize_defs[psize].shift)
872 continue; 873 continue;
873 874
874 shift = mmu_psize_to_shift(psize); 875 shift = mmu_psize_to_shift(psize);
875 876
876 if (add_huge_page_size(1ULL << shift) < 0) 877 if (add_huge_page_size(1ULL << shift) < 0)
877 continue; 878 continue;
878 879
879 if (shift < PMD_SHIFT) 880 if (shift < PMD_SHIFT)
880 pdshift = PMD_SHIFT; 881 pdshift = PMD_SHIFT;
881 else if (shift < PUD_SHIFT) 882 else if (shift < PUD_SHIFT)
882 pdshift = PUD_SHIFT; 883 pdshift = PUD_SHIFT;
883 else 884 else
884 pdshift = PGDIR_SHIFT; 885 pdshift = PGDIR_SHIFT;
885 886
886 pgtable_cache_add(pdshift - shift, NULL); 887 pgtable_cache_add(pdshift - shift, NULL);
887 if (!PGT_CACHE(pdshift - shift)) 888 if (!PGT_CACHE(pdshift - shift))
888 panic("hugetlbpage_init(): could not create " 889 panic("hugetlbpage_init(): could not create "
889 "pgtable cache for %d bit pagesize\n", shift); 890 "pgtable cache for %d bit pagesize\n", shift);
890 } 891 }
891 892
892 /* Set default large page size. Currently, we pick 16M or 1M 893 /* Set default large page size. Currently, we pick 16M or 1M
893 * depending on what is available 894 * depending on what is available
894 */ 895 */
895 if (mmu_psize_defs[MMU_PAGE_16M].shift) 896 if (mmu_psize_defs[MMU_PAGE_16M].shift)
896 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; 897 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
897 else if (mmu_psize_defs[MMU_PAGE_1M].shift) 898 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
898 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; 899 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
899 900
900 return 0; 901 return 0;
901 } 902 }
902 #endif 903 #endif
903 module_init(hugetlbpage_init); 904 module_init(hugetlbpage_init);
904 905
905 void flush_dcache_icache_hugepage(struct page *page) 906 void flush_dcache_icache_hugepage(struct page *page)
906 { 907 {
907 int i; 908 int i;
908 void *start; 909 void *start;
909 910
910 BUG_ON(!PageCompound(page)); 911 BUG_ON(!PageCompound(page));
911 912
912 for (i = 0; i < (1UL << compound_order(page)); i++) { 913 for (i = 0; i < (1UL << compound_order(page)); i++) {
913 if (!PageHighMem(page)) { 914 if (!PageHighMem(page)) {
914 __flush_dcache_icache(page_address(page+i)); 915 __flush_dcache_icache(page_address(page+i));
915 } else { 916 } else {
916 start = kmap_atomic(page+i); 917 start = kmap_atomic(page+i);
917 __flush_dcache_icache(start); 918 __flush_dcache_icache(start);
918 kunmap_atomic(start); 919 kunmap_atomic(start);
919 } 920 }
920 } 921 }
921 } 922 }
922 923