Eric Lee / smarc-ti-linux-kernel | Embedian Git Server

Blame view

mm/hugetlb.c 33.6 KB

1da177e4c Linus Torvalds Linux-2.6.12-rc2	1 2 3 4 5 6 7 8 9	/* * Generic hugetlb support. * (C) William Irwin, April 2004 */ #include <linux/gfp.h> #include <linux/list.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h>
1da177e4c Linus Torvalds Linux-2.6.12-rc2	10 11 12	#include <linux/sysctl.h> #include <linux/highmem.h> #include <linux/nodemask.h>
63551ae0f David Gibson [PATCH] Hugepage ...	13	#include <linux/pagemap.h>
5da7ca860 Christoph Lameter [PATCH] Add NUMA ...	14	#include <linux/mempolicy.h>
aea47ff36 Christoph Lameter [PATCH] mm: make ...	15	#include <linux/cpuset.h>
3935baa9b David Gibson [PATCH] hugepage:...	16	#include <linux/mutex.h>
5da7ca860 Christoph Lameter [PATCH] Add NUMA ...	17
63551ae0f David Gibson [PATCH] Hugepage ...	18 19 20 21	#include <asm/page.h> #include <asm/pgtable.h> #include <linux/hugetlb.h>
7835e98b2 Nick Piggin [PATCH] remove se...	22	#include "internal.h"
1da177e4c Linus Torvalds Linux-2.6.12-rc2	23 24	const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
a43a8c39b Kenneth W Chen [PATCH] tightenin...	25	static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages;
7893d1d50 Adam Litke hugetlb: Try to g...	26	static unsigned long surplus_huge_pages;
064d9efe9 Nishanth Aravamudan hugetlb: fix over...	27	static unsigned long nr_overcommit_huge_pages;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	28	unsigned long max_huge_pages;
064d9efe9 Nishanth Aravamudan hugetlb: fix over...	29	unsigned long sysctl_overcommit_huge_pages;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	30 31 32	static struct list_head hugepage_freelists[MAX_NUMNODES]; static unsigned int nr_huge_pages_node[MAX_NUMNODES]; static unsigned int free_huge_pages_node[MAX_NUMNODES];
7893d1d50 Adam Litke hugetlb: Try to g...	33	static unsigned int surplus_huge_pages_node[MAX_NUMNODES];
396faf030 Mel Gorman Allow huge page a...	34 35	static gfp_t htlb_alloc_mask = GFP_HIGHUSER; unsigned long hugepages_treat_as_movable;
63b4613c3 Nishanth Aravamudan hugetlb: fix huge...	36	static int hugetlb_next_nid;
396faf030 Mel Gorman Allow huge page a...	37
3935baa9b David Gibson [PATCH] hugepage:...	38 39 40 41	/* * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages */ static DEFINE_SPINLOCK(hugetlb_lock);
0bd0f9fb1 Eric Paris [PATCH] hugetlb: ...	42
79ac6ba40 David Gibson [PATCH] hugepage:...	43 44 45 46 47 48 49	static void clear_huge_page(struct page *page, unsigned long addr) { int i; might_sleep(); for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) { cond_resched();
281e0e3b3 Ralf Baechle hugetlb: fix clea...	50	clear_user_highpage(page + i, addr + i * PAGE_SIZE);
79ac6ba40 David Gibson [PATCH] hugepage:...	51 52 53 54	} } static void copy_huge_page(struct page dst, struct page src,
9de455b20 Atsushi Nemoto [PATCH] Pass vma ...	55	unsigned long addr, struct vm_area_struct *vma)
79ac6ba40 David Gibson [PATCH] hugepage:...	56 57 58 59 60 61	{ int i; might_sleep(); for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) { cond_resched();
9de455b20 Atsushi Nemoto [PATCH] Pass vma ...	62	copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma);
79ac6ba40 David Gibson [PATCH] hugepage:...	63 64	} }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	65 66 67 68 69 70 71	static void enqueue_huge_page(struct page *page) { int nid = page_to_nid(page); list_add(&page->lru, &hugepage_freelists[nid]); free_huge_pages++; free_huge_pages_node[nid]++; }
348e1e04b Nishanth Aravamudan hugetlb: fix pool...	72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90	static struct page dequeue_huge_page(void) { int nid; struct page page = NULL; for (nid = 0; nid < MAX_NUMNODES; ++nid) { if (!list_empty(&hugepage_freelists[nid])) { page = list_entry(hugepage_freelists[nid].next, struct page, lru); list_del(&page->lru); free_huge_pages--; free_huge_pages_node[nid]--; break; } } return page; } static struct page dequeue_huge_page_vma(struct vm_area_struct vma,
5da7ca860 Christoph Lameter [PATCH] Add NUMA ...	91	unsigned long address)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	92	{
31a5c6e4f Nishanth Aravamudan hugetlb: remove u...	93	int nid;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	94	struct page *page = NULL;
480eccf9a Lee Schermerhorn Fix NUMA Memory P...	95	struct mempolicy *mpol;
19770b326 Mel Gorman mm: filter based ...	96	nodemask_t *nodemask;
396faf030 Mel Gorman Allow huge page a...	97	struct zonelist *zonelist = huge_zonelist(vma, address,
19770b326 Mel Gorman mm: filter based ...	98	htlb_alloc_mask, &mpol, &nodemask);
dd1a239f6 Mel Gorman mm: have zonelist...	99 100	struct zone zone; struct zoneref z;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	101
19770b326 Mel Gorman mm: filter based ...	102 103	for_each_zone_zonelist_nodemask(zone, z, zonelist, MAX_NR_ZONES - 1, nodemask) {
54a6eb5c4 Mel Gorman mm: use two zonel...	104 105	nid = zone_to_nid(zone); if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
3abf7afd4 Andrew Morton dequeue_huge_page...	106 107 108 109 110 111	!list_empty(&hugepage_freelists[nid])) { page = list_entry(hugepage_freelists[nid].next, struct page, lru); list_del(&page->lru); free_huge_pages--; free_huge_pages_node[nid]--;
e4e574b76 Adam Litke hugetlb: Try to g...	112 113	if (vma && vma->vm_flags & VM_MAYSHARE) resv_huge_pages--;
5ab3ee7b1 Ken Chen fix hugetlb page ...	114	break;
3abf7afd4 Andrew Morton dequeue_huge_page...	115	}
1da177e4c Linus Torvalds Linux-2.6.12-rc2	116	}
52cd3b074 Lee Schermerhorn mempolicy: rework...	117	mpol_cond_put(mpol);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	118 119	return page; }
6af2acb66 Adam Litke hugetlb: Move upd...	120 121 122 123 124 125 126 127 128 129 130 131	static void update_and_free_page(struct page *page) { int i; nr_huge_pages--; nr_huge_pages_node[page_to_nid(page)]--; for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) { page[i].flags &= ~(1 << PG_locked \| 1 << PG_error \| 1 << PG_referenced \| 1 << PG_dirty \| 1 << PG_active \| 1 << PG_reserved \| 1 << PG_private \| 1<< PG_writeback); } set_compound_page_dtor(page, NULL); set_page_refcounted(page);
7f2e9525b Gerald Schaefer hugetlbfs: common...	132	arch_release_hugepage(page);
6af2acb66 Adam Litke hugetlb: Move upd...	133 134	__free_pages(page, HUGETLB_PAGE_ORDER); }
27a85ef1b David Gibson [PATCH] hugepage:...	135 136	static void free_huge_page(struct page *page) {
7893d1d50 Adam Litke hugetlb: Try to g...	137	int nid = page_to_nid(page);
c79fb75e5 Adam Litke hugetlb: fix quot...	138	struct address_space *mapping;
27a85ef1b David Gibson [PATCH] hugepage:...	139
c79fb75e5 Adam Litke hugetlb: fix quot...	140	mapping = (struct address_space *) page_private(page);
e5df70ab1 Andy Whitcroft hugetlb: ensure w...	141	set_page_private(page, 0);
7893d1d50 Adam Litke hugetlb: Try to g...	142	BUG_ON(page_count(page));
27a85ef1b David Gibson [PATCH] hugepage:...	143 144 145	INIT_LIST_HEAD(&page->lru); spin_lock(&hugetlb_lock);
7893d1d50 Adam Litke hugetlb: Try to g...	146 147 148 149 150 151 152	if (surplus_huge_pages_node[nid]) { update_and_free_page(page); surplus_huge_pages--; surplus_huge_pages_node[nid]--; } else { enqueue_huge_page(page); }
27a85ef1b David Gibson [PATCH] hugepage:...	153	spin_unlock(&hugetlb_lock);
c79fb75e5 Adam Litke hugetlb: fix quot...	154	if (mapping)
9a119c056 Adam Litke hugetlb: allow bu...	155	hugetlb_put_quota(mapping, 1);
27a85ef1b David Gibson [PATCH] hugepage:...	156	}
7893d1d50 Adam Litke hugetlb: Try to g...	157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190	/* * Increment or decrement surplus_huge_pages. Keep node-specific counters * balanced by operating on them in a round-robin fashion. * Returns 1 if an adjustment was made. / static int adjust_pool_surplus(int delta) { static int prev_nid; int nid = prev_nid; int ret = 0; VM_BUG_ON(delta != -1 && delta != 1); do { nid = next_node(nid, node_online_map); if (nid == MAX_NUMNODES) nid = first_node(node_online_map); / To shrink on this node, there must be a surplus page / if (delta < 0 && !surplus_huge_pages_node[nid]) continue; / Surplus cannot exceed the total number of pages */ if (delta > 0 && surplus_huge_pages_node[nid] >= nr_huge_pages_node[nid]) continue; surplus_huge_pages += delta; surplus_huge_pages_node[nid] += delta; ret = 1; break; } while (nid != prev_nid); prev_nid = nid; return ret; }
63b4613c3 Nishanth Aravamudan hugetlb: fix huge...	191	static struct page *alloc_fresh_huge_page_node(int nid)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	192	{
1da177e4c Linus Torvalds Linux-2.6.12-rc2	193	struct page *page;
f96efd585 Joe Jin hugetlb: fix race...	194
63b4613c3 Nishanth Aravamudan hugetlb: fix huge...	195	page = alloc_pages_node(nid,
551883ae8 Nishanth Aravamudan page allocator: e...	196 197	htlb_alloc_mask\|__GFP_COMP\|__GFP_THISNODE\| __GFP_REPEAT\|__GFP_NOWARN,
63b4613c3 Nishanth Aravamudan hugetlb: fix huge...	198	HUGETLB_PAGE_ORDER);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	199	if (page) {
7f2e9525b Gerald Schaefer hugetlbfs: common...	200 201	if (arch_prepare_hugepage(page)) { __free_pages(page, HUGETLB_PAGE_ORDER);
7b8ee84d8 Harvey Harrison mm: fix integer a...	202	return NULL;
7f2e9525b Gerald Schaefer hugetlbfs: common...	203	}
33f2ef89f Andy Whitcroft [PATCH] mm: make ...	204	set_compound_page_dtor(page, free_huge_page);
0bd0f9fb1 Eric Paris [PATCH] hugetlb: ...	205	spin_lock(&hugetlb_lock);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	206	nr_huge_pages++;
63b4613c3 Nishanth Aravamudan hugetlb: fix huge...	207	nr_huge_pages_node[nid]++;
0bd0f9fb1 Eric Paris [PATCH] hugetlb: ...	208	spin_unlock(&hugetlb_lock);
a482289d4 Nick Piggin [PATCH] hugepage ...	209	put_page(page); /* free it into the hugepage allocator */
1da177e4c Linus Torvalds Linux-2.6.12-rc2	210	}
63b4613c3 Nishanth Aravamudan hugetlb: fix huge...	211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243	return page; } static int alloc_fresh_huge_page(void) { struct page page; int start_nid; int next_nid; int ret = 0; start_nid = hugetlb_next_nid; do { page = alloc_fresh_huge_page_node(hugetlb_next_nid); if (page) ret = 1; / * Use a helper variable to find the next node and then * copy it back to hugetlb_next_nid afterwards: * otherwise there's a window in which a racer might * pass invalid nid MAX_NUMNODES to alloc_pages_node. * But we don't need to use a spin_lock here: it really * doesn't matter if occasionally a racer chooses the * same nid as we do. Move nid forward in the mask even * if we just successfully allocated a hugepage so that * the next caller gets hugepages on the next node. */ next_nid = next_node(hugetlb_next_nid, node_online_map); if (next_nid == MAX_NUMNODES) next_nid = first_node(node_online_map); hugetlb_next_nid = next_nid; } while (!page && hugetlb_next_nid != start_nid);
3b1163006 Adam Litke Subject: [PATCH] ...	244 245 246 247	if (ret) count_vm_event(HTLB_BUDDY_PGALLOC); else count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
63b4613c3 Nishanth Aravamudan hugetlb: fix huge...	248	return ret;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	249	}
7893d1d50 Adam Litke hugetlb: Try to g...	250 251 252 253	static struct page alloc_buddy_huge_page(struct vm_area_struct vma, unsigned long address) { struct page *page;
d1c3fb1f8 Nishanth Aravamudan hugetlb: introduc...	254	unsigned int nid;
7893d1d50 Adam Litke hugetlb: Try to g...	255
d1c3fb1f8 Nishanth Aravamudan hugetlb: introduc...	256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287	/* * Assume we will successfully allocate the surplus page to * prevent racing processes from causing the surplus to exceed * overcommit * * This however introduces a different race, where a process B * tries to grow the static hugepage pool while alloc_pages() is * called by process A. B will only examine the per-node * counters in determining if surplus huge pages can be * converted to normal huge pages in adjust_pool_surplus(). A * won't be able to increment the per-node counter, until the * lock is dropped by B, but B doesn't drop hugetlb_lock until * no more huge pages can be converted from surplus to normal * state (and doesn't try to convert again). Thus, we have a * case where a surplus huge page exists, the pool is grown, and * the surplus huge page still exists after, even though it * should just have been converted to a normal huge page. This * does not leak memory, though, as the hugepage will be freed * once it is out of use. It also does not allow the counters to * go out of whack in adjust_pool_surplus() as we don't modify * the node values until we've gotten the hugepage and only the * per-node value is checked there. */ spin_lock(&hugetlb_lock); if (surplus_huge_pages >= nr_overcommit_huge_pages) { spin_unlock(&hugetlb_lock); return NULL; } else { nr_huge_pages++; surplus_huge_pages++; } spin_unlock(&hugetlb_lock);
551883ae8 Nishanth Aravamudan page allocator: e...	288 289	page = alloc_pages(htlb_alloc_mask\|__GFP_COMP\| __GFP_REPEAT\|__GFP_NOWARN,
7893d1d50 Adam Litke hugetlb: Try to g...	290	HUGETLB_PAGE_ORDER);
d1c3fb1f8 Nishanth Aravamudan hugetlb: introduc...	291 292	spin_lock(&hugetlb_lock);
7893d1d50 Adam Litke hugetlb: Try to g...	293	if (page) {
2668db911 Adam Litke hugetlb: correct ...	294 295 296 297 298 299	/* * This page is now managed by the hugetlb allocator and has * no users -- drop the buddy allocator's reference. */ put_page_testzero(page); VM_BUG_ON(page_count(page));
d1c3fb1f8 Nishanth Aravamudan hugetlb: introduc...	300	nid = page_to_nid(page);
7893d1d50 Adam Litke hugetlb: Try to g...	301	set_compound_page_dtor(page, free_huge_page);
d1c3fb1f8 Nishanth Aravamudan hugetlb: introduc...	302 303 304 305 306	/* * We incremented the global counters already */ nr_huge_pages_node[nid]++; surplus_huge_pages_node[nid]++;
3b1163006 Adam Litke Subject: [PATCH] ...	307	__count_vm_event(HTLB_BUDDY_PGALLOC);
d1c3fb1f8 Nishanth Aravamudan hugetlb: introduc...	308 309 310	} else { nr_huge_pages--; surplus_huge_pages--;
3b1163006 Adam Litke Subject: [PATCH] ...	311	__count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
7893d1d50 Adam Litke hugetlb: Try to g...	312	}
d1c3fb1f8 Nishanth Aravamudan hugetlb: introduc...	313	spin_unlock(&hugetlb_lock);
7893d1d50 Adam Litke hugetlb: Try to g...	314 315 316	return page; }
e4e574b76 Adam Litke hugetlb: Try to g...	317 318 319 320 321 322 323 324 325 326 327 328	/* * Increase the hugetlb pool such that it can accomodate a reservation * of size 'delta'. / static int gather_surplus_pages(int delta) { struct list_head surplus_list; struct page page, *tmp; int ret, i; int needed, allocated; needed = (resv_huge_pages + delta) - free_huge_pages;
ac09b3a15 Adam Litke hugetlb: close a ...	329 330	if (needed <= 0) { resv_huge_pages += delta;
e4e574b76 Adam Litke hugetlb: Try to g...	331	return 0;
ac09b3a15 Adam Litke hugetlb: close a ...	332	}
e4e574b76 Adam Litke hugetlb: Try to g...	333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369	allocated = 0; INIT_LIST_HEAD(&surplus_list); ret = -ENOMEM; retry: spin_unlock(&hugetlb_lock); for (i = 0; i < needed; i++) { page = alloc_buddy_huge_page(NULL, 0); if (!page) { /* * We were not able to allocate enough pages to * satisfy the entire reservation so we free what * we've allocated so far. / spin_lock(&hugetlb_lock); needed = 0; goto free; } list_add(&page->lru, &surplus_list); } allocated += needed; / * After retaking hugetlb_lock, we need to recalculate 'needed' * because either resv_huge_pages or free_huge_pages may have changed. / spin_lock(&hugetlb_lock); needed = (resv_huge_pages + delta) - (free_huge_pages + allocated); if (needed > 0) goto retry; / * The surplus_list now contains _at_least_ the number of extra pages * needed to accomodate the reservation. Add the appropriate number * of pages to the hugetlb pool and free the extras back to the buddy
ac09b3a15 Adam Litke hugetlb: close a ...	370 371 372	* allocator. Commit the entire reservation here to prevent another * process from stealing the pages as they are added to the pool but * before they are reserved.
e4e574b76 Adam Litke hugetlb: Try to g...	373 374	*/ needed += allocated;
ac09b3a15 Adam Litke hugetlb: close a ...	375	resv_huge_pages += delta;
e4e574b76 Adam Litke hugetlb: Try to g...	376 377	ret = 0; free:
19fc3f0ac Adam Litke hugetlb: decrease...	378	/* Free the needed pages to the hugetlb pool */
e4e574b76 Adam Litke hugetlb: Try to g...	379	list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
19fc3f0ac Adam Litke hugetlb: decrease...	380 381	if ((--needed) < 0) break;
e4e574b76 Adam Litke hugetlb: Try to g...	382	list_del(&page->lru);
19fc3f0ac Adam Litke hugetlb: decrease...	383 384 385 386 387 388 389 390	enqueue_huge_page(page); } /* Free unnecessary surplus pages to the buddy allocator */ if (!list_empty(&surplus_list)) { spin_unlock(&hugetlb_lock); list_for_each_entry_safe(page, tmp, &surplus_list, lru) { list_del(&page->lru);
af767cbdd Adam Litke hugetlb: fix dyna...	391	/*
2668db911 Adam Litke hugetlb: correct ...	392 393 394	* The page has a reference count of zero already, so * call free_huge_page directly instead of using * put_page. This must be done with hugetlb_lock
af767cbdd Adam Litke hugetlb: fix dyna...	395 396 397	* unlocked which is safe because free_huge_page takes * hugetlb_lock before deciding how to free the page. */
2668db911 Adam Litke hugetlb: correct ...	398	free_huge_page(page);
af767cbdd Adam Litke hugetlb: fix dyna...	399	}
19fc3f0ac Adam Litke hugetlb: decrease...	400	spin_lock(&hugetlb_lock);
e4e574b76 Adam Litke hugetlb: Try to g...	401 402 403 404 405 406 407 408 409 410	} return ret; } /* * When releasing a hugetlb pool reservation, any surplus pages that were * allocated to satisfy the reservation must be explicitly freed if they were * never used. */
8cde045c7 Adrian Bunk mm/hugetlb.c: mak...	411	static void return_unused_surplus_pages(unsigned long unused_resv_pages)
e4e574b76 Adam Litke hugetlb: Try to g...	412 413 414 415	{ static int nid = -1; struct page *page; unsigned long nr_pages;
11320d17c Nishanth Aravamudan hugetlb: fix pote...	416 417 418 419 420 421 422	/* * We want to release as many surplus pages as possible, spread * evenly across all nodes. Iterate across all nodes until we * can no longer free unreserved surplus pages. This occurs when * the nodes with surplus pages have no free pages. */ unsigned long remaining_iterations = num_online_nodes();
ac09b3a15 Adam Litke hugetlb: close a ...	423 424	/* Uncommit the reservation */ resv_huge_pages -= unused_resv_pages;
e4e574b76 Adam Litke hugetlb: Try to g...	425	nr_pages = min(unused_resv_pages, surplus_huge_pages);
11320d17c Nishanth Aravamudan hugetlb: fix pote...	426	while (remaining_iterations-- && nr_pages) {
e4e574b76 Adam Litke hugetlb: Try to g...	427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443	nid = next_node(nid, node_online_map); if (nid == MAX_NUMNODES) nid = first_node(node_online_map); if (!surplus_huge_pages_node[nid]) continue; if (!list_empty(&hugepage_freelists[nid])) { page = list_entry(hugepage_freelists[nid].next, struct page, lru); list_del(&page->lru); update_and_free_page(page); free_huge_pages--; free_huge_pages_node[nid]--; surplus_huge_pages--; surplus_huge_pages_node[nid]--; nr_pages--;
11320d17c Nishanth Aravamudan hugetlb: fix pote...	444	remaining_iterations = num_online_nodes();
e4e574b76 Adam Litke hugetlb: Try to g...	445 446 447	} } }
348ea204c Adam Litke hugetlb: split al...	448 449 450	static struct page alloc_huge_page_shared(struct vm_area_struct vma, unsigned long addr)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	451	{
348ea204c Adam Litke hugetlb: split al...	452	struct page *page;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	453 454	spin_lock(&hugetlb_lock);
348e1e04b Nishanth Aravamudan hugetlb: fix pool...	455	page = dequeue_huge_page_vma(vma, addr);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	456	spin_unlock(&hugetlb_lock);
90d8b7e61 Adam Litke hugetlb: enforce ...	457	return page ? page : ERR_PTR(-VM_FAULT_OOM);
348ea204c Adam Litke hugetlb: split al...	458	}
b45b5bd65 David Gibson [PATCH] hugepage:...	459
348ea204c Adam Litke hugetlb: split al...	460 461 462 463	static struct page alloc_huge_page_private(struct vm_area_struct vma, unsigned long addr) { struct page *page = NULL;
7893d1d50 Adam Litke hugetlb: Try to g...	464
90d8b7e61 Adam Litke hugetlb: enforce ...	465 466	if (hugetlb_get_quota(vma->vm_file->f_mapping, 1)) return ERR_PTR(-VM_FAULT_SIGBUS);
348ea204c Adam Litke hugetlb: split al...	467 468	spin_lock(&hugetlb_lock); if (free_huge_pages > resv_huge_pages)
348e1e04b Nishanth Aravamudan hugetlb: fix pool...	469	page = dequeue_huge_page_vma(vma, addr);
348ea204c Adam Litke hugetlb: split al...	470	spin_unlock(&hugetlb_lock);
68842c9b9 Ken Chen hugetlbfs: fix qu...	471	if (!page) {
7893d1d50 Adam Litke hugetlb: Try to g...	472	page = alloc_buddy_huge_page(vma, addr);
68842c9b9 Ken Chen hugetlbfs: fix qu...	473 474 475 476 477 478	if (!page) { hugetlb_put_quota(vma->vm_file->f_mapping, 1); return ERR_PTR(-VM_FAULT_OOM); } } return page;
348ea204c Adam Litke hugetlb: split al...	479 480 481 482 483 484	} static struct page alloc_huge_page(struct vm_area_struct vma, unsigned long addr) { struct page *page;
2fc39cec6 Adam Litke hugetlb: debit qu...	485	struct address_space *mapping = vma->vm_file->f_mapping;
348ea204c Adam Litke hugetlb: split al...	486 487 488 489	if (vma->vm_flags & VM_MAYSHARE) page = alloc_huge_page_shared(vma, addr); else page = alloc_huge_page_private(vma, addr);
90d8b7e61 Adam Litke hugetlb: enforce ...	490 491	if (!IS_ERR(page)) {
348ea204c Adam Litke hugetlb: split al...	492	set_page_refcounted(page);
2fc39cec6 Adam Litke hugetlb: debit qu...	493	set_page_private(page, (unsigned long) mapping);
90d8b7e61 Adam Litke hugetlb: enforce ...	494 495	} return page;
b45b5bd65 David Gibson [PATCH] hugepage:...	496	}
1da177e4c Linus Torvalds Linux-2.6.12-rc2	497 498 499	static int __init hugetlb_init(void) { unsigned long i;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	500
3c726f8de Benjamin Herrenschmidt [PATCH] ppc64: su...	501 502	if (HPAGE_SHIFT == 0) return 0;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	503 504	for (i = 0; i < MAX_NUMNODES; ++i) INIT_LIST_HEAD(&hugepage_freelists[i]);
63b4613c3 Nishanth Aravamudan hugetlb: fix huge...	505	hugetlb_next_nid = first_node(node_online_map);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	506	for (i = 0; i < max_huge_pages; ++i) {
a482289d4 Nick Piggin [PATCH] hugepage ...	507	if (!alloc_fresh_huge_page())
1da177e4c Linus Torvalds Linux-2.6.12-rc2	508	break;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	509 510 511 512 513 514 515 516 517 518 519 520 521 522 523	} max_huge_pages = free_huge_pages = nr_huge_pages = i; printk("Total HugeTLB memory allocated, %ld ", free_huge_pages); return 0; } module_init(hugetlb_init); static int __init hugetlb_setup(char *s) { if (sscanf(s, "%lu", &max_huge_pages) <= 0) max_huge_pages = 0; return 1; } __setup("hugepages=", hugetlb_setup);
8a6301127 Ken Chen pretend cpuset ha...	524 525 526 527 528 529 530 531 532 533	static unsigned int cpuset_mems_nr(unsigned int *array) { int node; unsigned int nr = 0; for_each_node_mask(node, cpuset_current_mems_allowed) nr += array[node]; return nr; }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	534	#ifdef CONFIG_SYSCTL
1da177e4c Linus Torvalds Linux-2.6.12-rc2	535 536 537	#ifdef CONFIG_HIGHMEM static void try_to_free_low(unsigned long count) {
4415cc8df Christoph Lameter [PATCH] Hugepages...	538	int i;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	539 540 541	for (i = 0; i < MAX_NUMNODES; ++i) { struct page page, next; list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
6b0c880df Adam Litke hugetlb: fix pool...	542 543	if (count >= nr_huge_pages) return;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	544 545 546 547	if (PageHighMem(page)) continue; list_del(&page->lru); update_and_free_page(page);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	548	free_huge_pages--;
4415cc8df Christoph Lameter [PATCH] Hugepages...	549	free_huge_pages_node[page_to_nid(page)]--;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	550 551 552 553 554 555 556 557	} } } #else static inline void try_to_free_low(unsigned long count) { } #endif
7893d1d50 Adam Litke hugetlb: Try to g...	558	#define persistent_huge_pages (nr_huge_pages - surplus_huge_pages)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	559 560	static unsigned long set_max_huge_pages(unsigned long count) {
7893d1d50 Adam Litke hugetlb: Try to g...	561	unsigned long min_count, ret;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	562
7893d1d50 Adam Litke hugetlb: Try to g...	563 564 565 566	/* * Increase the pool size * First take pages out of surplus state. Then make up the * remaining difference by allocating fresh huge pages.
d1c3fb1f8 Nishanth Aravamudan hugetlb: introduc...	567 568 569 570 571 572	* * We might race with alloc_buddy_huge_page() here and be unable * to convert a surplus huge page to a normal huge page. That is * not critical, though, it just means the overall size of the * pool might be one hugepage larger than it needs to be, but * within all the constraints specified by the sysctls.
7893d1d50 Adam Litke hugetlb: Try to g...	573	*/
1da177e4c Linus Torvalds Linux-2.6.12-rc2	574	spin_lock(&hugetlb_lock);
7893d1d50 Adam Litke hugetlb: Try to g...	575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593	while (surplus_huge_pages && count > persistent_huge_pages) { if (!adjust_pool_surplus(-1)) break; } while (count > persistent_huge_pages) { int ret; /* * If this allocation races such that we no longer need the * page, free_huge_page will handle it by freeing the page * and reducing the surplus. */ spin_unlock(&hugetlb_lock); ret = alloc_fresh_huge_page(); spin_lock(&hugetlb_lock); if (!ret) goto out; }
7893d1d50 Adam Litke hugetlb: Try to g...	594 595 596 597 598 599 600	/* * Decrease the pool size * First return free pages to the buddy allocator (being careful * to keep enough around to satisfy reservations). Then place * pages into surplus state as needed so the pool will shrink * to the desired size as pages become free.
d1c3fb1f8 Nishanth Aravamudan hugetlb: introduc...	601 602 603 604 605 606 607 608	* * By placing pages into the surplus state independent of the * overcommit value, we are allowing the surplus pool size to * exceed overcommit. There are few sane options here. Since * alloc_buddy_huge_page() is checking the global counter, * though, we'll note that we're not allowed to exceed surplus * and won't grow the pool anywhere else. Not until one of the * sysctls are changed, or the surplus pages go out of use.
7893d1d50 Adam Litke hugetlb: Try to g...	609	*/
6b0c880df Adam Litke hugetlb: fix pool...	610 611	min_count = resv_huge_pages + nr_huge_pages - free_huge_pages; min_count = max(count, min_count);
7893d1d50 Adam Litke hugetlb: Try to g...	612 613	try_to_free_low(min_count); while (min_count < persistent_huge_pages) {
348e1e04b Nishanth Aravamudan hugetlb: fix pool...	614	struct page *page = dequeue_huge_page();
1da177e4c Linus Torvalds Linux-2.6.12-rc2	615 616 617 618	if (!page) break; update_and_free_page(page); }
7893d1d50 Adam Litke hugetlb: Try to g...	619 620 621 622 623 624	while (count < persistent_huge_pages) { if (!adjust_pool_surplus(1)) break; } out: ret = persistent_huge_pages;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	625	spin_unlock(&hugetlb_lock);
7893d1d50 Adam Litke hugetlb: Try to g...	626	return ret;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	627 628 629 630 631 632 633 634 635 636	} int hugetlb_sysctl_handler(struct ctl_table table, int write, struct file file, void __user buffer, size_t length, loff_t *ppos) { proc_doulongvec_minmax(table, write, file, buffer, length, ppos); max_huge_pages = set_max_huge_pages(max_huge_pages); return 0; }
396faf030 Mel Gorman Allow huge page a...	637 638 639 640 641 642 643 644 645 646 647 648	int hugetlb_treat_movable_handler(struct ctl_table table, int write, struct file file, void __user buffer, size_t length, loff_t *ppos) { proc_dointvec(table, write, file, buffer, length, ppos); if (hugepages_treat_as_movable) htlb_alloc_mask = GFP_HIGHUSER_MOVABLE; else htlb_alloc_mask = GFP_HIGHUSER; return 0; }
a3d0c6aa1 Nishanth Aravamudan hugetlb: add lock...	649 650 651 652	int hugetlb_overcommit_handler(struct ctl_table table, int write, struct file file, void __user buffer, size_t length, loff_t *ppos) {
a3d0c6aa1 Nishanth Aravamudan hugetlb: add lock...	653	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
064d9efe9 Nishanth Aravamudan hugetlb: fix over...	654 655	spin_lock(&hugetlb_lock); nr_overcommit_huge_pages = sysctl_overcommit_huge_pages;
a3d0c6aa1 Nishanth Aravamudan hugetlb: add lock...	656 657 658	spin_unlock(&hugetlb_lock); return 0; }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	659 660 661 662 663 664 665 666 667	#endif /* CONFIG_SYSCTL / int hugetlb_report_meminfo(char buf) { return sprintf(buf, "HugePages_Total: %5lu " "HugePages_Free: %5lu "
a43a8c39b Kenneth W Chen [PATCH] tightenin...	668 669	"HugePages_Rsvd: %5lu "
7893d1d50 Adam Litke hugetlb: Try to g...	670 671	"HugePages_Surp: %5lu "
1da177e4c Linus Torvalds Linux-2.6.12-rc2	672 673 674 675	"Hugepagesize: %5lu kB ", nr_huge_pages, free_huge_pages,
a43a8c39b Kenneth W Chen [PATCH] tightenin...	676	resv_huge_pages,
7893d1d50 Adam Litke hugetlb: Try to g...	677	surplus_huge_pages,
1da177e4c Linus Torvalds Linux-2.6.12-rc2	678 679 680 681 682 683 684 685	HPAGE_SIZE/1024); } int hugetlb_report_node_meminfo(int nid, char *buf) { return sprintf(buf, "Node %d HugePages_Total: %5u "
a1de09195 Nishanth Aravamudan hugetlb: indicate...	686 687 688 689	"Node %d HugePages_Free: %5u " "Node %d HugePages_Surp: %5u ",
1da177e4c Linus Torvalds Linux-2.6.12-rc2	690	nid, nr_huge_pages_node[nid],
a1de09195 Nishanth Aravamudan hugetlb: indicate...	691 692	nid, free_huge_pages_node[nid], nid, surplus_huge_pages_node[nid]);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	693	}
1da177e4c Linus Torvalds Linux-2.6.12-rc2	694 695 696 697 698	/* Return the number pages of memory we physically have, in PAGE_SIZE units. / unsigned long hugetlb_total_pages(void) { return nr_huge_pages (HPAGE_SIZE / PAGE_SIZE); }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	699 700 701 702 703 704 705	/* * We cannot handle pagefaults against hugetlb pages at all. They cause * handle_mm_fault() to try to instantiate regular-sized pages in the * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get * this far. */
d0217ac04 Nick Piggin mm: fault feedbac...	706	static int hugetlb_vm_op_fault(struct vm_area_struct vma, struct vm_fault vmf)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	707 708	{ BUG();
d0217ac04 Nick Piggin mm: fault feedbac...	709	return 0;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	710 711 712	} struct vm_operations_struct hugetlb_vm_ops = {
d0217ac04 Nick Piggin mm: fault feedbac...	713	.fault = hugetlb_vm_op_fault,
1da177e4c Linus Torvalds Linux-2.6.12-rc2	714	};
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	715 716	static pte_t make_huge_pte(struct vm_area_struct vma, struct page page, int writable)
63551ae0f David Gibson [PATCH] Hugepage ...	717 718	{ pte_t entry;
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	719	if (writable) {
63551ae0f David Gibson [PATCH] Hugepage ...	720 721 722	entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); } else {
7f2e9525b Gerald Schaefer hugetlbfs: common...	723	entry = huge_pte_wrprotect(mk_pte(page, vma->vm_page_prot));
63551ae0f David Gibson [PATCH] Hugepage ...	724 725 726 727 728 729	} entry = pte_mkyoung(entry); entry = pte_mkhuge(entry); return entry; }
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	730 731 732 733	static void set_huge_ptep_writable(struct vm_area_struct vma, unsigned long address, pte_t ptep) { pte_t entry;
7f2e9525b Gerald Schaefer hugetlbfs: common...	734 735	entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep))); if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) {
8dab5241d Benjamin Herrenschmidt Rework ptep_set_a...	736	update_mmu_cache(vma, address, entry);
8dab5241d Benjamin Herrenschmidt Rework ptep_set_a...	737	}
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	738	}
63551ae0f David Gibson [PATCH] Hugepage ...	739 740 741 742 743	int copy_hugetlb_page_range(struct mm_struct dst, struct mm_struct src, struct vm_area_struct vma) { pte_t src_pte, dst_pte, entry; struct page ptepage;
1c59827d1 Hugh Dickins [PATCH] mm: huget...	744	unsigned long addr;
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	745 746 747	int cow; cow = (vma->vm_flags & (VM_SHARED \| VM_MAYWRITE)) == VM_MAYWRITE;
63551ae0f David Gibson [PATCH] Hugepage ...	748
1c59827d1 Hugh Dickins [PATCH] mm: huget...	749	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
c74df32c7 Hugh Dickins [PATCH] mm: ptd_a...	750 751 752	src_pte = huge_pte_offset(src, addr); if (!src_pte) continue;
63551ae0f David Gibson [PATCH] Hugepage ...	753 754 755	dst_pte = huge_pte_alloc(dst, addr); if (!dst_pte) goto nomem;
c5c99429f Larry Woodman fix hugepages lea...	756 757 758 759	/* If the pagetables are shared don't copy or take references */ if (dst_pte == src_pte) continue;
c74df32c7 Hugh Dickins [PATCH] mm: ptd_a...	760	spin_lock(&dst->page_table_lock);
464787581 Nick Piggin hugetlb: fix lock...	761	spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING);
7f2e9525b Gerald Schaefer hugetlbfs: common...	762	if (!huge_pte_none(huge_ptep_get(src_pte))) {
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	763	if (cow)
7f2e9525b Gerald Schaefer hugetlbfs: common...	764 765	huge_ptep_set_wrprotect(src, addr, src_pte); entry = huge_ptep_get(src_pte);
1c59827d1 Hugh Dickins [PATCH] mm: huget...	766 767	ptepage = pte_page(entry); get_page(ptepage);
1c59827d1 Hugh Dickins [PATCH] mm: huget...	768 769 770	set_huge_pte_at(dst, addr, dst_pte, entry); } spin_unlock(&src->page_table_lock);
c74df32c7 Hugh Dickins [PATCH] mm: ptd_a...	771	spin_unlock(&dst->page_table_lock);
63551ae0f David Gibson [PATCH] Hugepage ...	772 773 774 775 776 777	} return 0; nomem: return -ENOMEM; }
502717f4e Kenneth W Chen [PATCH] hugetlb: ...	778 779	void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
63551ae0f David Gibson [PATCH] Hugepage ...	780 781 782	{ struct mm_struct *mm = vma->vm_mm; unsigned long address;
c7546f8f0 David Gibson [PATCH] Fix hugep...	783	pte_t *ptep;
63551ae0f David Gibson [PATCH] Hugepage ...	784 785	pte_t pte; struct page *page;
fe1668ae5 Kenneth W Chen [PATCH] enforce p...	786	struct page *tmp;
c0a499c2c Kenneth W Chen [PATCH] __unmap_h...	787 788 789 790 791	/* * A page gathering list, protected by per file i_mmap_lock. The * lock is used to avoid list corruption from multiple unmapping * of the same page since we are using page->lru. */
fe1668ae5 Kenneth W Chen [PATCH] enforce p...	792	LIST_HEAD(page_list);
63551ae0f David Gibson [PATCH] Hugepage ...	793 794 795 796	WARN_ON(!is_vm_hugetlb_page(vma)); BUG_ON(start & ~HPAGE_MASK); BUG_ON(end & ~HPAGE_MASK);
508034a32 Hugh Dickins [PATCH] mm: unmap...	797	spin_lock(&mm->page_table_lock);
63551ae0f David Gibson [PATCH] Hugepage ...	798	for (address = start; address < end; address += HPAGE_SIZE) {
c7546f8f0 David Gibson [PATCH] Fix hugep...	799	ptep = huge_pte_offset(mm, address);
4c8872659 Adam Litke [PATCH] hugetlb: ...	800	if (!ptep)
c7546f8f0 David Gibson [PATCH] Fix hugep...	801	continue;
39dde65c9 Kenneth W Chen [PATCH] shared pa...	802 803	if (huge_pmd_unshare(mm, &address, ptep)) continue;
c7546f8f0 David Gibson [PATCH] Fix hugep...	804	pte = huge_ptep_get_and_clear(mm, address, ptep);
7f2e9525b Gerald Schaefer hugetlbfs: common...	805	if (huge_pte_none(pte))
63551ae0f David Gibson [PATCH] Hugepage ...	806	continue;
c7546f8f0 David Gibson [PATCH] Fix hugep...	807
63551ae0f David Gibson [PATCH] Hugepage ...	808	page = pte_page(pte);
6649a3863 Ken Chen [PATCH] hugetlb: ...	809 810	if (pte_dirty(pte)) set_page_dirty(page);
fe1668ae5 Kenneth W Chen [PATCH] enforce p...	811	list_add(&page->lru, &page_list);
63551ae0f David Gibson [PATCH] Hugepage ...	812	}
1da177e4c Linus Torvalds Linux-2.6.12-rc2	813	spin_unlock(&mm->page_table_lock);
508034a32 Hugh Dickins [PATCH] mm: unmap...	814	flush_tlb_range(vma, start, end);
fe1668ae5 Kenneth W Chen [PATCH] enforce p...	815 816 817 818	list_for_each_entry_safe(page, tmp, &page_list, lru) { list_del(&page->lru); put_page(page); }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	819	}
63551ae0f David Gibson [PATCH] Hugepage ...	820
502717f4e Kenneth W Chen [PATCH] hugetlb: ...	821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837	void unmap_hugepage_range(struct vm_area_struct vma, unsigned long start, unsigned long end) { / * It is undesirable to test vma->vm_file as it should be non-null * for valid hugetlb area. However, vm_file will be NULL in the error * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails, * do_mmap_pgoff() nullifies vma->vm_file before calling this function * to clean up. Since no pte has actually been setup, it is safe to * do nothing in this case. */ if (vma->vm_file) { spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); __unmap_hugepage_range(vma, start, end); spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); } }
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	838 839 840 841	static int hugetlb_cow(struct mm_struct mm, struct vm_area_struct vma, unsigned long address, pte_t ptep, pte_t pte) { struct page old_page, *new_page;
79ac6ba40 David Gibson [PATCH] hugepage:...	842	int avoidcopy;
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	843 844 845 846 847 848 849 850	old_page = pte_page(pte); /* If no-one else is actually using this page, avoid the copy * and just make the page writable */ avoidcopy = (page_count(old_page) == 1); if (avoidcopy) { set_huge_ptep_writable(vma, address, ptep);
83c54070e Nick Piggin mm: fault feedbac...	851	return 0;
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	852 853 854	} page_cache_get(old_page);
5da7ca860 Christoph Lameter [PATCH] Add NUMA ...	855	new_page = alloc_huge_page(vma, address);
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	856
2fc39cec6 Adam Litke hugetlb: debit qu...	857	if (IS_ERR(new_page)) {
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	858	page_cache_release(old_page);
2fc39cec6 Adam Litke hugetlb: debit qu...	859	return -PTR_ERR(new_page);
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	860 861 862	} spin_unlock(&mm->page_table_lock);
9de455b20 Atsushi Nemoto [PATCH] Pass vma ...	863	copy_huge_page(new_page, old_page, address, vma);
0ed361dec Nick Piggin mm: fix PageUptod...	864	__SetPageUptodate(new_page);
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	865 866 867	spin_lock(&mm->page_table_lock); ptep = huge_pte_offset(mm, address & HPAGE_MASK);
7f2e9525b Gerald Schaefer hugetlbfs: common...	868	if (likely(pte_same(huge_ptep_get(ptep), pte))) {
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	869	/* Break COW */
8fe627ec5 Gerald Schaefer hugetlbfs: add mi...	870	huge_ptep_clear_flush(vma, address, ptep);
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	871 872 873 874 875 876 877	set_huge_pte_at(mm, address, ptep, make_huge_pte(vma, new_page, 1)); /* Make the old page be freed below */ new_page = old_page; } page_cache_release(new_page); page_cache_release(old_page);
83c54070e Nick Piggin mm: fault feedbac...	878	return 0;
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	879	}
a1ed3dda0 Robert P. J. Day MM: Make needless...	880	static int hugetlb_no_page(struct mm_struct mm, struct vm_area_struct vma,
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	881	unsigned long address, pte_t *ptep, int write_access)
ac9b9c667 Hugh Dickins [PATCH] Fix handl...	882 883	{ int ret = VM_FAULT_SIGBUS;
4c8872659 Adam Litke [PATCH] hugetlb: ...	884 885	unsigned long idx; unsigned long size;
4c8872659 Adam Litke [PATCH] hugetlb: ...	886 887	struct page page; struct address_space mapping;
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	888	pte_t new_pte;
4c8872659 Adam Litke [PATCH] hugetlb: ...	889
4c8872659 Adam Litke [PATCH] hugetlb: ...	890 891 892 893 894 895 896 897	mapping = vma->vm_file->f_mapping; idx = ((address - vma->vm_start) >> HPAGE_SHIFT) + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); /* * Use page lock to guard against racing truncation * before we get page_table_lock. */
6bda666a0 Christoph Lameter [PATCH] hugepages...	898 899 900	retry: page = find_lock_page(mapping, idx); if (!page) {
ebed4bfc8 Hugh Dickins [PATCH] hugetlb: ...	901 902 903	size = i_size_read(mapping->host) >> HPAGE_SHIFT; if (idx >= size) goto out;
6bda666a0 Christoph Lameter [PATCH] hugepages...	904	page = alloc_huge_page(vma, address);
2fc39cec6 Adam Litke hugetlb: debit qu...	905 906	if (IS_ERR(page)) { ret = -PTR_ERR(page);
6bda666a0 Christoph Lameter [PATCH] hugepages...	907 908	goto out; }
79ac6ba40 David Gibson [PATCH] hugepage:...	909	clear_huge_page(page, address);
0ed361dec Nick Piggin mm: fix PageUptod...	910	__SetPageUptodate(page);
ac9b9c667 Hugh Dickins [PATCH] Fix handl...	911
6bda666a0 Christoph Lameter [PATCH] hugepages...	912 913	if (vma->vm_flags & VM_SHARED) { int err;
45c682a68 Ken Chen hugetlb: fix i_bl...	914	struct inode *inode = mapping->host;
6bda666a0 Christoph Lameter [PATCH] hugepages...	915 916 917 918	err = add_to_page_cache(page, mapping, idx, GFP_KERNEL); if (err) { put_page(page);
6bda666a0 Christoph Lameter [PATCH] hugepages...	919 920 921 922	if (err == -EEXIST) goto retry; goto out; }
45c682a68 Ken Chen hugetlb: fix i_bl...	923 924 925 926	spin_lock(&inode->i_lock); inode->i_blocks += BLOCKS_PER_HUGEPAGE; spin_unlock(&inode->i_lock);
6bda666a0 Christoph Lameter [PATCH] hugepages...	927 928 929	} else lock_page(page); }
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	930
ac9b9c667 Hugh Dickins [PATCH] Fix handl...	931	spin_lock(&mm->page_table_lock);
4c8872659 Adam Litke [PATCH] hugetlb: ...	932 933 934	size = i_size_read(mapping->host) >> HPAGE_SHIFT; if (idx >= size) goto backout;
83c54070e Nick Piggin mm: fault feedbac...	935	ret = 0;
7f2e9525b Gerald Schaefer hugetlbfs: common...	936	if (!huge_pte_none(huge_ptep_get(ptep)))
4c8872659 Adam Litke [PATCH] hugetlb: ...	937	goto backout;
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	938 939 940 941 942 943 944 945	new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_SHARED))); set_huge_pte_at(mm, address, ptep, new_pte); if (write_access && !(vma->vm_flags & VM_SHARED)) { /* Optimization, do the COW without a second fault */ ret = hugetlb_cow(mm, vma, address, ptep, new_pte); }
ac9b9c667 Hugh Dickins [PATCH] Fix handl...	946	spin_unlock(&mm->page_table_lock);
4c8872659 Adam Litke [PATCH] hugetlb: ...	947 948	unlock_page(page); out:
ac9b9c667 Hugh Dickins [PATCH] Fix handl...	949	return ret;
4c8872659 Adam Litke [PATCH] hugetlb: ...	950 951 952	backout: spin_unlock(&mm->page_table_lock);
4c8872659 Adam Litke [PATCH] hugetlb: ...	953 954 955	unlock_page(page); put_page(page); goto out;
ac9b9c667 Hugh Dickins [PATCH] Fix handl...	956	}
86e5216f8 Adam Litke [PATCH] Hugetlb: ...	957 958 959 960 961	int hugetlb_fault(struct mm_struct mm, struct vm_area_struct vma, unsigned long address, int write_access) { pte_t *ptep; pte_t entry;
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	962	int ret;
3935baa9b David Gibson [PATCH] hugepage:...	963	static DEFINE_MUTEX(hugetlb_instantiation_mutex);
86e5216f8 Adam Litke [PATCH] Hugetlb: ...	964 965 966 967	ptep = huge_pte_alloc(mm, address); if (!ptep) return VM_FAULT_OOM;
3935baa9b David Gibson [PATCH] hugepage:...	968 969 970 971 972 973	/* * Serialize hugepage allocation and instantiation, so that we don't * get spurious allocation failures if two CPUs race to instantiate * the same page in the page cache. */ mutex_lock(&hugetlb_instantiation_mutex);
7f2e9525b Gerald Schaefer hugetlbfs: common...	974 975	entry = huge_ptep_get(ptep); if (huge_pte_none(entry)) {
3935baa9b David Gibson [PATCH] hugepage:...	976 977 978 979	ret = hugetlb_no_page(mm, vma, address, ptep, write_access); mutex_unlock(&hugetlb_instantiation_mutex); return ret; }
86e5216f8 Adam Litke [PATCH] Hugetlb: ...	980
83c54070e Nick Piggin mm: fault feedbac...	981	ret = 0;
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	982 983 984	spin_lock(&mm->page_table_lock); /* Check for a racing update before calling hugetlb_cow */
7f2e9525b Gerald Schaefer hugetlbfs: common...	985	if (likely(pte_same(entry, huge_ptep_get(ptep))))
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	986 987 988	if (write_access && !pte_write(entry)) ret = hugetlb_cow(mm, vma, address, ptep, entry); spin_unlock(&mm->page_table_lock);
3935baa9b David Gibson [PATCH] hugepage:...	989	mutex_unlock(&hugetlb_instantiation_mutex);
1e8f889b1 David Gibson [PATCH] Hugetlb: ...	990 991	return ret;
86e5216f8 Adam Litke [PATCH] Hugetlb: ...	992	}
63551ae0f David Gibson [PATCH] Hugepage ...	993 994	int follow_hugetlb_page(struct mm_struct mm, struct vm_area_struct vma, struct page pages, struct vm_area_struct vmas,
5b23dbe81 Adam Litke hugetlb: follow_h...	995 996	unsigned long position, int length, int i, int write)
63551ae0f David Gibson [PATCH] Hugepage ...	997	{
d5d4b0aa4 Kenneth W Chen [PATCH] optimize ...	998 999	unsigned long pfn_offset; unsigned long vaddr = *position;
63551ae0f David Gibson [PATCH] Hugepage ...	1000	int remainder = *length;
1c59827d1 Hugh Dickins [PATCH] mm: huget...	1001	spin_lock(&mm->page_table_lock);
63551ae0f David Gibson [PATCH] Hugepage ...	1002	while (vaddr < vma->vm_end && remainder) {
4c8872659 Adam Litke [PATCH] hugetlb: ...	1003 1004	pte_t pte; struct page page;
63551ae0f David Gibson [PATCH] Hugepage ...	1005
4c8872659 Adam Litke [PATCH] hugetlb: ...	1006 1007 1008 1009 1010 1011	/* * Some archs (sparc64, sh) have multiple pte_ts to each hugepage. We have to make * sure we get the * first, for the page indexing below to work. */ pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
63551ae0f David Gibson [PATCH] Hugepage ...	1012
7f2e9525b Gerald Schaefer hugetlbfs: common...	1013 1014	if (!pte \|\| huge_pte_none(huge_ptep_get(pte)) \|\| (write && !pte_write(huge_ptep_get(pte)))) {
4c8872659 Adam Litke [PATCH] hugetlb: ...	1015	int ret;
63551ae0f David Gibson [PATCH] Hugepage ...	1016
4c8872659 Adam Litke [PATCH] hugetlb: ...	1017	spin_unlock(&mm->page_table_lock);
5b23dbe81 Adam Litke hugetlb: follow_h...	1018	ret = hugetlb_fault(mm, vma, vaddr, write);
4c8872659 Adam Litke [PATCH] hugetlb: ...	1019	spin_lock(&mm->page_table_lock);
a89182c76 Adam Litke Fix VM_FAULT flag...	1020	if (!(ret & VM_FAULT_ERROR))
4c8872659 Adam Litke [PATCH] hugetlb: ...	1021	continue;
63551ae0f David Gibson [PATCH] Hugepage ...	1022
4c8872659 Adam Litke [PATCH] hugetlb: ...	1023 1024 1025 1026 1027	remainder = 0; if (!i) i = -EFAULT; break; }
d5d4b0aa4 Kenneth W Chen [PATCH] optimize ...	1028	pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT;
7f2e9525b Gerald Schaefer hugetlbfs: common...	1029	page = pte_page(huge_ptep_get(pte));
d5d4b0aa4 Kenneth W Chen [PATCH] optimize ...	1030	same_page:
d6692183a Kenneth W Chen [PATCH] fix extra...	1031 1032	if (pages) { get_page(page);
d5d4b0aa4 Kenneth W Chen [PATCH] optimize ...	1033	pages[i] = page + pfn_offset;
d6692183a Kenneth W Chen [PATCH] fix extra...	1034	}
63551ae0f David Gibson [PATCH] Hugepage ...	1035 1036 1037 1038 1039	if (vmas) vmas[i] = vma; vaddr += PAGE_SIZE;
d5d4b0aa4 Kenneth W Chen [PATCH] optimize ...	1040	++pfn_offset;
63551ae0f David Gibson [PATCH] Hugepage ...	1041 1042	--remainder; ++i;
d5d4b0aa4 Kenneth W Chen [PATCH] optimize ...	1043 1044 1045 1046 1047 1048 1049 1050	if (vaddr < vma->vm_end && remainder && pfn_offset < HPAGE_SIZE/PAGE_SIZE) { /* * We use pfn_offset to avoid touching the pageframes * of this compound page. */ goto same_page; }
63551ae0f David Gibson [PATCH] Hugepage ...	1051	}
1c59827d1 Hugh Dickins [PATCH] mm: huget...	1052	spin_unlock(&mm->page_table_lock);
63551ae0f David Gibson [PATCH] Hugepage ...	1053 1054 1055 1056 1057	length = remainder; position = vaddr; return i; }
8f860591f Zhang, Yanmin [PATCH] Enable mp...	1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068	void hugetlb_change_protection(struct vm_area_struct vma, unsigned long address, unsigned long end, pgprot_t newprot) { struct mm_struct mm = vma->vm_mm; unsigned long start = address; pte_t *ptep; pte_t pte; BUG_ON(address >= end); flush_cache_range(vma, address, end);
39dde65c9 Kenneth W Chen [PATCH] shared pa...	1069	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
8f860591f Zhang, Yanmin [PATCH] Enable mp...	1070 1071 1072 1073 1074	spin_lock(&mm->page_table_lock); for (; address < end; address += HPAGE_SIZE) { ptep = huge_pte_offset(mm, address); if (!ptep) continue;
39dde65c9 Kenneth W Chen [PATCH] shared pa...	1075 1076	if (huge_pmd_unshare(mm, &address, ptep)) continue;
7f2e9525b Gerald Schaefer hugetlbfs: common...	1077	if (!huge_pte_none(huge_ptep_get(ptep))) {
8f860591f Zhang, Yanmin [PATCH] Enable mp...	1078 1079 1080	pte = huge_ptep_get_and_clear(mm, address, ptep); pte = pte_mkhuge(pte_modify(pte, newprot)); set_huge_pte_at(mm, address, ptep, pte);
8f860591f Zhang, Yanmin [PATCH] Enable mp...	1081 1082 1083	} } spin_unlock(&mm->page_table_lock);
39dde65c9 Kenneth W Chen [PATCH] shared pa...	1084	spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
8f860591f Zhang, Yanmin [PATCH] Enable mp...	1085 1086 1087	flush_tlb_range(vma, start, end); }
a43a8c39b Kenneth W Chen [PATCH] tightenin...	1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141	struct file_region { struct list_head link; long from; long to; }; static long region_add(struct list_head head, long f, long t) { struct file_region rg, nrg, trg; /* Locate the region we are either in or before. / list_for_each_entry(rg, head, link) if (f <= rg->to) break; / Round our left edge to the current segment if it encloses us. / if (f > rg->from) f = rg->from; / Check for and consume any regions we now overlap with. / nrg = rg; list_for_each_entry_safe(rg, trg, rg->link.prev, link) { if (&rg->link == head) break; if (rg->from > t) break; / If this area reaches higher then extend our area to * include it completely. If this is not the first area * which we intend to reuse, free it. / if (rg->to > t) t = rg->to; if (rg != nrg) { list_del(&rg->link); kfree(rg); } } nrg->from = f; nrg->to = t; return 0; } static long region_chg(struct list_head head, long f, long t) { struct file_region rg, nrg; long chg = 0; /* Locate the region we are before or in. / list_for_each_entry(rg, head, link) if (f <= rg->to) break; / If we are below the current region then a new region is required. * Subtle, allocate a new region at the position but make it zero
183ff22bb Simon Arlott spelling fixes: mm/	1142	* size such that we can guarantee to record the reservation. */
a43a8c39b Kenneth W Chen [PATCH] tightenin...	1143 1144	if (&rg->link == head \|\| t < rg->from) { nrg = kmalloc(sizeof(*nrg), GFP_KERNEL);
c80544dc0 Stephen Hemminger sparse pointer us...	1145	if (!nrg)
a43a8c39b Kenneth W Chen [PATCH] tightenin...	1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213	return -ENOMEM; nrg->from = f; nrg->to = f; INIT_LIST_HEAD(&nrg->link); list_add(&nrg->link, rg->link.prev); return t - f; } /* Round our left edge to the current segment if it encloses us. / if (f > rg->from) f = rg->from; chg = t - f; / Check for and consume any regions we now overlap with. / list_for_each_entry(rg, rg->link.prev, link) { if (&rg->link == head) break; if (rg->from > t) return chg; / We overlap with this area, if it extends futher than * us then we must extend ourselves. Account for its * existing reservation. / if (rg->to > t) { chg += rg->to - t; t = rg->to; } chg -= rg->to - rg->from; } return chg; } static long region_truncate(struct list_head head, long end) { struct file_region rg, trg; long chg = 0; /* Locate the region we are either in or before. / list_for_each_entry(rg, head, link) if (end <= rg->to) break; if (&rg->link == head) return 0; / If we are in the middle of a region then adjust it. / if (end > rg->from) { chg = rg->to - end; rg->to = end; rg = list_entry(rg->link.next, typeof(rg), link); } /* Drop any remaining regions. */ list_for_each_entry_safe(rg, trg, rg->link.prev, link) { if (&rg->link == head) break; chg += rg->to - rg->from; list_del(&rg->link); kfree(rg); } return chg; } static int hugetlb_acct_memory(long delta) { int ret = -ENOMEM; spin_lock(&hugetlb_lock);
8a6301127 Ken Chen pretend cpuset ha...	1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230	/* * When cpuset is configured, it breaks the strict hugetlb page * reservation as the accounting is done on a global variable. Such * reservation is completely rubbish in the presence of cpuset because * the reservation is not checked against page availability for the * current cpuset. Application can still potentially OOM'ed by kernel * with lack of free htlb page in cpuset that the task is in. * Attempt to enforce strict accounting with cpuset is almost * impossible (or too ugly) because cpuset is too fluid that * task or memory node can be dynamically moved between cpusets. * * The change of semantics for shared hugetlb mapping with cpuset is * undesirable. However, in order to preserve some of the semantics, * we fall back to check against current free page availability as * a best attempt and hopefully to minimize the impact of changing * semantics that cpuset has. */
e4e574b76 Adam Litke hugetlb: Try to g...	1231 1232 1233	if (delta > 0) { if (gather_surplus_pages(delta) < 0) goto out;
ac09b3a15 Adam Litke hugetlb: close a ...	1234 1235	if (delta > cpuset_mems_nr(free_huge_pages_node)) { return_unused_surplus_pages(delta);
e4e574b76 Adam Litke hugetlb: Try to g...	1236	goto out;
ac09b3a15 Adam Litke hugetlb: close a ...	1237	}
e4e574b76 Adam Litke hugetlb: Try to g...	1238 1239 1240	} ret = 0;
e4e574b76 Adam Litke hugetlb: Try to g...	1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255	if (delta < 0) return_unused_surplus_pages((unsigned long) -delta); out: spin_unlock(&hugetlb_lock); return ret; } int hugetlb_reserve_pages(struct inode *inode, long from, long to) { long ret, chg; chg = region_chg(&inode->i_mapping->private_list, from, to); if (chg < 0) return chg;
8a6301127 Ken Chen pretend cpuset ha...	1256
90d8b7e61 Adam Litke hugetlb: enforce ...	1257 1258	if (hugetlb_get_quota(inode->i_mapping, chg)) return -ENOSPC;
a43a8c39b Kenneth W Chen [PATCH] tightenin...	1259	ret = hugetlb_acct_memory(chg);
68842c9b9 Ken Chen hugetlbfs: fix qu...	1260 1261	if (ret < 0) { hugetlb_put_quota(inode->i_mapping, chg);
a43a8c39b Kenneth W Chen [PATCH] tightenin...	1262	return ret;
68842c9b9 Ken Chen hugetlbfs: fix qu...	1263	}
a43a8c39b Kenneth W Chen [PATCH] tightenin...	1264 1265 1266 1267 1268 1269 1270	region_add(&inode->i_mapping->private_list, from, to); return 0; } void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) { long chg = region_truncate(&inode->i_mapping->private_list, offset);
45c682a68 Ken Chen hugetlb: fix i_bl...	1271 1272 1273 1274	spin_lock(&inode->i_lock); inode->i_blocks -= BLOCKS_PER_HUGEPAGE * freed; spin_unlock(&inode->i_lock);
90d8b7e61 Adam Litke hugetlb: enforce ...	1275 1276	hugetlb_put_quota(inode->i_mapping, (chg - freed)); hugetlb_acct_memory(-(chg - freed));
a43a8c39b Kenneth W Chen [PATCH] tightenin...	1277	}