Eric Lee / smarc-ti-linux-kernel | Embedian Git Server

Blame view

mm/page_alloc.c 83.2 KB

1da177e4c Linus Torvalds Linux-2.6.12-rc2	1 2 3 4 5 6 7 8 9 10 11 12 13 14 15	/* * linux/mm/page_alloc.c * * Manages the free list, the system allocates free pages here. * Note that kmalloc() lives in slab.c * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * Swap reorganised 29.12.95, Stephen Tweedie * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 * Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999 * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 * Zone balancing, Kanoj Sarcar, SGI, Jan 2000 * Per cpu hot/cold page lists, bulk allocation, Martin J. Bligh, Sept 2002 * (lots of bits borrowed from Ingo Molnar & Andrew Morton) */
1da177e4c Linus Torvalds Linux-2.6.12-rc2	16 17 18 19 20 21 22	#include <linux/stddef.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/interrupt.h> #include <linux/pagemap.h> #include <linux/bootmem.h> #include <linux/compiler.h>
9f1583339 Randy Dunlap [PATCH] use add_t...	23	#include <linux/kernel.h>
1da177e4c Linus Torvalds Linux-2.6.12-rc2	24 25 26 27 28 29 30 31 32 33	#include <linux/module.h> #include <linux/suspend.h> #include <linux/pagevec.h> #include <linux/blkdev.h> #include <linux/slab.h> #include <linux/notifier.h> #include <linux/topology.h> #include <linux/sysctl.h> #include <linux/cpu.h> #include <linux/cpuset.h>
bdc8cb984 Dave Hansen [PATCH] memory ho...	34	#include <linux/memory_hotplug.h>
1da177e4c Linus Torvalds Linux-2.6.12-rc2	35 36	#include <linux/nodemask.h> #include <linux/vmalloc.h>
4be38e351 Christoph Lameter [PATCH] mm: move ...	37	#include <linux/mempolicy.h>
6811378e7 Yasunori Goto [PATCH] wait_tabl...	38	#include <linux/stop_machine.h>
c713216de Mel Gorman [PATCH] Introduce...	39 40	#include <linux/sort.h> #include <linux/pfn.h>
1da177e4c Linus Torvalds Linux-2.6.12-rc2	41 42	#include <asm/tlbflush.h>
ac924c603 Andrew Morton [PATCH] setup_per...	43	#include <asm/div64.h>
1da177e4c Linus Torvalds Linux-2.6.12-rc2	44 45 46 47 48 49	#include "internal.h" /* * MCD - HACK: Find somewhere to initialize this EARLY, or make this * initializer cleaner */
c3d8c1414 Christoph Lameter [PATCH] More __re...	50	nodemask_t node_online_map __read_mostly = { { [0] = 1UL } };
7223a93a5 Dean Nelson [IA64] Export nod...	51	EXPORT_SYMBOL(node_online_map);
c3d8c1414 Christoph Lameter [PATCH] More __re...	52	nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
7223a93a5 Dean Nelson [IA64] Export nod...	53	EXPORT_SYMBOL(node_possible_map);
6c231b7ba Ravikiran G Thirumalai [PATCH] Additions...	54	unsigned long totalram_pages __read_mostly;
cb45b0e96 Hideo AOKI [PATCH] overcommi...	55	unsigned long totalreserve_pages __read_mostly;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	56	long nr_swap_pages;
8ad4b1fb8 Rohit Seth [PATCH] Make high...	57	int percpu_pagelist_fraction;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	58
d98c7a098 Hugh Dickins [PATCH] compound ...	59	static void __free_pages_ok(struct page *page, unsigned int order);
a226f6c89 David Howells [PATCH] FRV: Clea...	60
1da177e4c Linus Torvalds Linux-2.6.12-rc2	61 62 63 64 65 66 67	/* * results with 256, 32 in the lowmem_reserve sysctl: * 1G machine -> (16M dma, 800M-16M normal, 1G-800M high) * 1G machine -> (16M dma, 784M normal, 224M high) * NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA * HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL * HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA
a2f1b4249 Andi Kleen [PATCH] x86_64: A...	68 69 70	* * TBD: should special case ZONE_DMA32 machines here - in those we normally * don't need any ZONE_NORMAL reservation
1da177e4c Linus Torvalds Linux-2.6.12-rc2	71	*/
2f1b62486 Christoph Lameter [PATCH] reduce MA...	72 73	int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256,
fb0e7942b Christoph Lameter [PATCH] reduce MA...	74	#ifdef CONFIG_ZONE_DMA32
2f1b62486 Christoph Lameter [PATCH] reduce MA...	75	256,
fb0e7942b Christoph Lameter [PATCH] reduce MA...	76	#endif
e53ef38d0 Christoph Lameter [PATCH] reduce MA...	77	#ifdef CONFIG_HIGHMEM
2f1b62486 Christoph Lameter [PATCH] reduce MA...	78	32
e53ef38d0 Christoph Lameter [PATCH] reduce MA...	79	#endif
2f1b62486 Christoph Lameter [PATCH] reduce MA...	80	};
1da177e4c Linus Torvalds Linux-2.6.12-rc2	81 82	EXPORT_SYMBOL(totalram_pages);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	83 84 85 86 87	/* * Used by page_zone() to look up the address of the struct zone whose * id is encoded in the upper bits of page->flags */
c3d8c1414 Christoph Lameter [PATCH] More __re...	88	struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	89	EXPORT_SYMBOL(zone_table);
2f1b62486 Christoph Lameter [PATCH] reduce MA...	90 91	static char *zone_names[MAX_NR_ZONES] = { "DMA",
fb0e7942b Christoph Lameter [PATCH] reduce MA...	92	#ifdef CONFIG_ZONE_DMA32
2f1b62486 Christoph Lameter [PATCH] reduce MA...	93	"DMA32",
fb0e7942b Christoph Lameter [PATCH] reduce MA...	94	#endif
2f1b62486 Christoph Lameter [PATCH] reduce MA...	95	"Normal",
e53ef38d0 Christoph Lameter [PATCH] reduce MA...	96	#ifdef CONFIG_HIGHMEM
2f1b62486 Christoph Lameter [PATCH] reduce MA...	97	"HighMem"
e53ef38d0 Christoph Lameter [PATCH] reduce MA...	98	#endif
2f1b62486 Christoph Lameter [PATCH] reduce MA...	99	};
1da177e4c Linus Torvalds Linux-2.6.12-rc2	100	int min_free_kbytes = 1024;
86356ab14 Yasunori Goto [PATCH] wait_tabl...	101 102	unsigned long __meminitdata nr_kernel_pages; unsigned long __meminitdata nr_all_pages;
0e0b864e0 Mel Gorman [PATCH] Account f...	103	static unsigned long __initdata dma_reserve;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	104
c713216de Mel Gorman [PATCH] Introduce...	105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129	#ifdef CONFIG_ARCH_POPULATES_NODE_MAP /* * MAX_ACTIVE_REGIONS determines the maxmimum number of distinct * ranges of memory (RAM) that may be registered with add_active_range(). * Ranges passed to add_active_range() will be merged if possible * so the number of times add_active_range() can be called is * related to the number of nodes and the number of holes / #ifdef CONFIG_MAX_ACTIVE_REGIONS / Allow an architecture to set MAX_ACTIVE_REGIONS to save memory / #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS #else #if MAX_NUMNODES >= 32 / If there can be many nodes, allow up to 50 holes per node / #define MAX_ACTIVE_REGIONS (MAX_NUMNODES50) #else /* By default, allow up to 256 distinct regions */ #define MAX_ACTIVE_REGIONS 256 #endif #endif struct node_active_region __initdata early_node_map[MAX_ACTIVE_REGIONS]; int __initdata nr_nodemap_entries; unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
fb01439c5 Mel Gorman [PATCH] Allow an ...	130 131 132 133	#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE unsigned long __initdata node_boundary_start_pfn[MAX_NUMNODES]; unsigned long __initdata node_boundary_end_pfn[MAX_NUMNODES]; #endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
c713216de Mel Gorman [PATCH] Introduce...	134	#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
13e7444b0 Nick Piggin [PATCH] mm: remov...	135	#ifdef CONFIG_DEBUG_VM
c6a57e19e Dave Hansen [PATCH] memory ho...	136	static int page_outside_zone_boundaries(struct zone zone, struct page page)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	137	{
bdc8cb984 Dave Hansen [PATCH] memory ho...	138 139 140	int ret = 0; unsigned seq; unsigned long pfn = page_to_pfn(page);
c6a57e19e Dave Hansen [PATCH] memory ho...	141
bdc8cb984 Dave Hansen [PATCH] memory ho...	142 143 144 145 146 147 148 149 150	do { seq = zone_span_seqbegin(zone); if (pfn >= zone->zone_start_pfn + zone->spanned_pages) ret = 1; else if (pfn < zone->zone_start_pfn) ret = 1; } while (zone_span_seqretry(zone, seq)); return ret;
c6a57e19e Dave Hansen [PATCH] memory ho...	151 152 153 154	} static int page_is_consistent(struct zone zone, struct page page) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	155 156	#ifdef CONFIG_HOLES_IN_ZONE if (!pfn_valid(page_to_pfn(page)))
c6a57e19e Dave Hansen [PATCH] memory ho...	157	return 0;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	158 159	#endif if (zone != page_zone(page))
c6a57e19e Dave Hansen [PATCH] memory ho...	160 161 162 163 164 165 166 167 168 169	return 0; return 1; } /* * Temporary debugging check for pages not lying within a given zone. / static int bad_range(struct zone zone, struct page *page) { if (page_outside_zone_boundaries(zone, page))
1da177e4c Linus Torvalds Linux-2.6.12-rc2	170	return 1;
c6a57e19e Dave Hansen [PATCH] memory ho...	171 172	if (!page_is_consistent(zone, page)) return 1;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	173 174	return 0; }
13e7444b0 Nick Piggin [PATCH] mm: remov...	175 176 177 178 179 180	#else static inline int bad_range(struct zone zone, struct page page) { return 0; } #endif
224abf92b Nick Piggin [PATCH] mm: bad_p...	181	static void bad_page(struct page *page)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	182	{
224abf92b Nick Piggin [PATCH] mm: bad_p...	183 184	printk(KERN_EMERG "Bad page state in process '%s' "
7365f3d16 Hugh Dickins [PATCH] Restore K...	185 186 187 188 189 190	KERN_EMERG "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d " KERN_EMERG "Trying to fix it up, but a reboot is needed " KERN_EMERG "Backtrace: ",
224abf92b Nick Piggin [PATCH] mm: bad_p...	191 192 193	current->comm, page, (int)(2*sizeof(unsigned long)), (unsigned long)page->flags, page->mapping, page_mapcount(page), page_count(page));
1da177e4c Linus Torvalds Linux-2.6.12-rc2	194	dump_stack();
334795eca Hugh Dickins [PATCH] bad_page:...	195 196	page->flags &= ~(1 << PG_lru \| 1 << PG_private \|
1da177e4c Linus Torvalds Linux-2.6.12-rc2	197	1 << PG_locked \|
1da177e4c Linus Torvalds Linux-2.6.12-rc2	198 199	1 << PG_active \| 1 << PG_dirty \|
334795eca Hugh Dickins [PATCH] bad_page:...	200 201	1 << PG_reclaim \| 1 << PG_slab \|
1da177e4c Linus Torvalds Linux-2.6.12-rc2	202	1 << PG_swapcache \|
676165a8a Nick Piggin [PATCH] Fix buddy...	203 204	1 << PG_writeback \| 1 << PG_buddy );
1da177e4c Linus Torvalds Linux-2.6.12-rc2	205 206 207	set_page_count(page, 0); reset_page_mapcount(page); page->mapping = NULL;
9f1583339 Randy Dunlap [PATCH] use add_t...	208	add_taint(TAINT_BAD_PAGE);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	209	}
1da177e4c Linus Torvalds Linux-2.6.12-rc2	210 211 212 213 214 215 216 217 218 219	/* * Higher-order pages are called "compound pages". They are structured thusly: * * The first PAGE_SIZE page is called the "head page". * * The remaining PAGE_SIZE pages are called "tail pages". * * All pages have PG_compound set. All pages have their ->private pointing at * the head page (even the head page has this). *
41d78ba55 Hugh Dickins [PATCH] compound ...	220 221 222	* The first tail page's ->lru.next holds the address of the compound page's * put_page() function. Its ->lru.prev holds the order of allocation. * This usage means that zero-order pages may not be compound.
1da177e4c Linus Torvalds Linux-2.6.12-rc2	223	*/
d98c7a098 Hugh Dickins [PATCH] compound ...	224 225 226 227 228	static void free_compound_page(struct page *page) { __free_pages_ok(page, (unsigned long)page[1].lru.prev); }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	229 230 231 232	static void prep_compound_page(struct page *page, unsigned long order) { int i; int nr_pages = 1 << order;
d98c7a098 Hugh Dickins [PATCH] compound ...	233	page[1].lru.next = (void )free_compound_page; / set dtor */
41d78ba55 Hugh Dickins [PATCH] compound ...	234	page[1].lru.prev = (void *)order;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	235 236	for (i = 0; i < nr_pages; i++) { struct page *p = page + i;
5e9dace8d Nick Piggin [PATCH] mm: page_...	237	__SetPageCompound(p);
4c21e2f24 Hugh Dickins [PATCH] mm: split...	238	set_page_private(p, (unsigned long)page);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	239 240 241 242 243 244 245	} } static void destroy_compound_page(struct page *page, unsigned long order) { int i; int nr_pages = 1 << order;
41d78ba55 Hugh Dickins [PATCH] compound ...	246	if (unlikely((unsigned long)page[1].lru.prev != order))
224abf92b Nick Piggin [PATCH] mm: bad_p...	247	bad_page(page);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	248 249 250	for (i = 0; i < nr_pages; i++) { struct page *p = page + i;
224abf92b Nick Piggin [PATCH] mm: bad_p...	251 252 253	if (unlikely(!PageCompound(p) \| (page_private(p) != (unsigned long)page))) bad_page(page);
5e9dace8d Nick Piggin [PATCH] mm: page_...	254	__ClearPageCompound(p);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	255 256	} }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	257
17cf44064 Nick Piggin [PATCH] mm: clean...	258 259 260	static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) { int i;
725d704ec Nick Piggin [PATCH] mm: VM_BU...	261	VM_BUG_ON((gfp_flags & (__GFP_WAIT \| __GFP_HIGHMEM)) == __GFP_HIGHMEM);
6626c5d53 Andrew Morton [PATCH] mm: prep_...	262 263 264 265	/* * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO * and __GFP_HIGHMEM from hard or soft interrupt context. */
725d704ec Nick Piggin [PATCH] mm: VM_BU...	266	VM_BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt());
17cf44064 Nick Piggin [PATCH] mm: clean...	267 268 269	for (i = 0; i < (1 << order); i++) clear_highpage(page + i); }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	270 271 272 273 274	/* * function for dealing with page's order in buddy system. * zone->lock is already acquired when we use these. * So, we don't need atomic page->flags operations here. */
6aa3001b2 Andrew Morton [PATCH] page_allo...	275 276	static inline unsigned long page_order(struct page *page) {
4c21e2f24 Hugh Dickins [PATCH] mm: split...	277	return page_private(page);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	278	}
6aa3001b2 Andrew Morton [PATCH] page_allo...	279 280	static inline void set_page_order(struct page *page, int order) {
4c21e2f24 Hugh Dickins [PATCH] mm: split...	281	set_page_private(page, order);
676165a8a Nick Piggin [PATCH] Fix buddy...	282	__SetPageBuddy(page);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	283 284 285 286	} static inline void rmv_page_order(struct page *page) {
676165a8a Nick Piggin [PATCH] Fix buddy...	287	__ClearPageBuddy(page);
4c21e2f24 Hugh Dickins [PATCH] mm: split...	288	set_page_private(page, 0);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305	} /* * Locate the struct page for both the matching buddy in our * pair (buddy1) and the combined O(n+1) page they form (page). * * 1) Any buddy B1 will have an order O twin B2 which satisfies * the following equation: * B2 = B1 ^ (1 << O) * For example, if the starting buddy (buddy2) is #8 its order * 1 buddy is #10: * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10 * * 2) Any buddy B will have an order O+1 parent P which * satisfies the following equation: * P = B & ~(1 << O) *
d6e05edc5 Andreas Mohr spelling fixes	306	* Assumption: *_mem_map is contiguous at least up to MAX_ORDER
1da177e4c Linus Torvalds Linux-2.6.12-rc2	307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324	/ static inline struct page __page_find_buddy(struct page page, unsigned long page_idx, unsigned int order) { unsigned long buddy_idx = page_idx ^ (1 << order); return page + (buddy_idx - page_idx); } static inline unsigned long __find_combined_index(unsigned long page_idx, unsigned int order) { return (page_idx & ~(1 << order)); } / * This function checks whether a page is free && is the buddy * we can do coalesce a page and its buddy if
13e7444b0 Nick Piggin [PATCH] mm: remov...	325	* (a) the buddy is not in a hole &&
676165a8a Nick Piggin [PATCH] Fix buddy...	326	* (b) the buddy is in the buddy system &&
cb2b95e1c Andy Whitcroft [PATCH] zone hand...	327 328	* (c) a page and its buddy have the same order && * (d) a page and its buddy are in the same zone.
676165a8a Nick Piggin [PATCH] Fix buddy...	329 330 331	* * For recording whether a page is in the buddy system, we use PG_buddy. * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
1da177e4c Linus Torvalds Linux-2.6.12-rc2	332	*
676165a8a Nick Piggin [PATCH] Fix buddy...	333	* For recording page's order, we use page_private(page).
1da177e4c Linus Torvalds Linux-2.6.12-rc2	334	*/
cb2b95e1c Andy Whitcroft [PATCH] zone hand...	335 336	static inline int page_is_buddy(struct page page, struct page buddy, int order)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	337	{
13e7444b0 Nick Piggin [PATCH] mm: remov...	338	#ifdef CONFIG_HOLES_IN_ZONE
cb2b95e1c Andy Whitcroft [PATCH] zone hand...	339	if (!pfn_valid(page_to_pfn(buddy)))
13e7444b0 Nick Piggin [PATCH] mm: remov...	340 341	return 0; #endif
cb2b95e1c Andy Whitcroft [PATCH] zone hand...	342 343 344 345 346	if (page_zone_id(page) != page_zone_id(buddy)) return 0; if (PageBuddy(buddy) && page_order(buddy) == order) { BUG_ON(page_count(buddy) != 0);
6aa3001b2 Andrew Morton [PATCH] page_allo...	347	return 1;
676165a8a Nick Piggin [PATCH] Fix buddy...	348	}
6aa3001b2 Andrew Morton [PATCH] page_allo...	349	return 0;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	350 351 352 353 354 355 356 357 358 359 360 361 362 363 364	} /* * Freeing function for a buddy system allocator. * * The concept of a buddy system is to maintain direct-mapped table * (containing bit values) for memory blocks of various "orders". * The bottom level table contains the map for the smallest allocatable * units of memory (here, pages), and each level above it describes * pairs of units from the levels below, hence, "buddies". * At a high level, all that happens here is marking the table entry * at the bottom level available, and propagating the changes upward * as necessary, plus some accounting needed to play nicely with other * parts of the VM system. * At each level, we keep a list of pages, which are heads of continuous
676165a8a Nick Piggin [PATCH] Fix buddy...	365	* free pages of length of (1 << order) and marked with PG_buddy. Page's
4c21e2f24 Hugh Dickins [PATCH] mm: split...	366	* order is recorded in page_private(page) field.
1da177e4c Linus Torvalds Linux-2.6.12-rc2	367 368 369 370 371 372 373 374	* So when we are allocating or freeing one, we can derive the state of the * other. That is, if we allocate a small block, and both were * free, the remainder of the region must be split into blocks. * If a block is freed, and its buddy is also free, then this * triggers coalescing into a block of larger size. * * -- wli */
48db57f8f Nick Piggin [PATCH] mm: free_...	375	static inline void __free_one_page(struct page *page,
1da177e4c Linus Torvalds Linux-2.6.12-rc2	376 377 378 379	struct zone *zone, unsigned int order) { unsigned long page_idx; int order_size = 1 << order;
224abf92b Nick Piggin [PATCH] mm: bad_p...	380	if (unlikely(PageCompound(page)))
1da177e4c Linus Torvalds Linux-2.6.12-rc2	381 382 383	destroy_compound_page(page, order); page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
725d704ec Nick Piggin [PATCH] mm: VM_BU...	384 385	VM_BUG_ON(page_idx & (order_size - 1)); VM_BUG_ON(bad_range(zone, page));
1da177e4c Linus Torvalds Linux-2.6.12-rc2	386 387 388 389 390 391	zone->free_pages += order_size; while (order < MAX_ORDER-1) { unsigned long combined_idx; struct free_area area; struct page buddy;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	392	buddy = __page_find_buddy(page, page_idx, order);
cb2b95e1c Andy Whitcroft [PATCH] zone hand...	393	if (!page_is_buddy(page, buddy, order))
1da177e4c Linus Torvalds Linux-2.6.12-rc2	394	break; /* Move the buddy up one level. */
13e7444b0 Nick Piggin [PATCH] mm: remov...	395
1da177e4c Linus Torvalds Linux-2.6.12-rc2	396 397 398 399	list_del(&buddy->lru); area = zone->free_area + order; area->nr_free--; rmv_page_order(buddy);
13e7444b0 Nick Piggin [PATCH] mm: remov...	400	combined_idx = __find_combined_index(page_idx, order);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	401 402 403 404 405 406 407 408	page = page + (combined_idx - page_idx); page_idx = combined_idx; order++; } set_page_order(page, order); list_add(&page->lru, &zone->free_area[order].free_list); zone->free_area[order].nr_free++; }
224abf92b Nick Piggin [PATCH] mm: bad_p...	409	static inline int free_pages_check(struct page *page)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	410	{
92be2e33b Nick Piggin [PATCH] mm: micro...	411 412 413	if (unlikely(page_mapcount(page) \| (page->mapping != NULL) \| (page_count(page) != 0) \|
1da177e4c Linus Torvalds Linux-2.6.12-rc2	414 415 416 417 418 419 420 421	(page->flags & ( 1 << PG_lru \| 1 << PG_private \| 1 << PG_locked \| 1 << PG_active \| 1 << PG_reclaim \| 1 << PG_slab \| 1 << PG_swapcache \|
b5810039a Nick Piggin [PATCH] core remo...	422	1 << PG_writeback \|
676165a8a Nick Piggin [PATCH] Fix buddy...	423 424	1 << PG_reserved \| 1 << PG_buddy ))))
224abf92b Nick Piggin [PATCH] mm: bad_p...	425	bad_page(page);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	426	if (PageDirty(page))
242e54686 Nick Piggin [PATCH] mm: remov...	427	__ClearPageDirty(page);
689bcebfd Hugh Dickins [PATCH] unpaged: ...	428 429 430 431 432 433	/* * For now, we report if PG_reserved was found set, but do not * clear it, and do not free the page. But we shall soon need * to do more, for when the ZERO_PAGE count wraps negative. */ return PageReserved(page);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	434 435 436 437 438	} /* * Frees a list of pages. * Assumes all pages on list are in same zone, and of same order.
207f36eec Renaud Lienhart [PATCH] remove in...	439	* count is the number of pages to free.
1da177e4c Linus Torvalds Linux-2.6.12-rc2	440 441 442 443 444 445 446	* * If the zone was previously in an "all pages pinned" state then look to * see if this freeing clears that state. * * And clear the zone's pages_scanned counter, to hold off the "all pages are * pinned" detection logic. */
48db57f8f Nick Piggin [PATCH] mm: free_...	447 448	static void free_pages_bulk(struct zone zone, int count, struct list_head list, int order)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	449	{
c54ad30c7 Nick Piggin [PATCH] mm: pagea...	450	spin_lock(&zone->lock);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	451 452	zone->all_unreclaimable = 0; zone->pages_scanned = 0;
48db57f8f Nick Piggin [PATCH] mm: free_...	453 454	while (count--) { struct page *page;
725d704ec Nick Piggin [PATCH] mm: VM_BU...	455	VM_BUG_ON(list_empty(list));
1da177e4c Linus Torvalds Linux-2.6.12-rc2	456	page = list_entry(list->prev, struct page, lru);
48db57f8f Nick Piggin [PATCH] mm: free_...	457	/* have to delete it as __free_one_page list manipulates */
1da177e4c Linus Torvalds Linux-2.6.12-rc2	458	list_del(&page->lru);
48db57f8f Nick Piggin [PATCH] mm: free_...	459	__free_one_page(page, zone, order);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	460	}
c54ad30c7 Nick Piggin [PATCH] mm: pagea...	461	spin_unlock(&zone->lock);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	462	}
48db57f8f Nick Piggin [PATCH] mm: free_...	463	static void free_one_page(struct zone zone, struct page page, int order)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	464	{
006d22d9b Christoph Lameter [PATCH] Optimize ...	465 466 467 468 469	spin_lock(&zone->lock); zone->all_unreclaimable = 0; zone->pages_scanned = 0; __free_one_page(page, zone ,order); spin_unlock(&zone->lock);
48db57f8f Nick Piggin [PATCH] mm: free_...	470 471 472 473 474	} static void __free_pages_ok(struct page *page, unsigned int order) { unsigned long flags;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	475	int i;
689bcebfd Hugh Dickins [PATCH] unpaged: ...	476	int reserved = 0;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	477
1da177e4c Linus Torvalds Linux-2.6.12-rc2	478	for (i = 0 ; i < (1 << order) ; ++i)
224abf92b Nick Piggin [PATCH] mm: bad_p...	479	reserved += free_pages_check(page + i);
689bcebfd Hugh Dickins [PATCH] unpaged: ...	480 481	if (reserved) return;
9858db504 Nick Piggin [PATCH] mm: locks...	482 483	if (!PageHighMem(page)) debug_check_no_locks_freed(page_address(page),PAGE_SIZE<<order);
dafb13673 Nick Piggin [PATCH] mm: arch_...	484	arch_free_page(page, order);
48db57f8f Nick Piggin [PATCH] mm: free_...	485	kernel_map_pages(page, 1 << order, 0);
dafb13673 Nick Piggin [PATCH] mm: arch_...	486
c54ad30c7 Nick Piggin [PATCH] mm: pagea...	487	local_irq_save(flags);
f8891e5e1 Christoph Lameter [PATCH] Light wei...	488	__count_vm_events(PGFREE, 1 << order);
48db57f8f Nick Piggin [PATCH] mm: free_...	489	free_one_page(page_zone(page), page, order);
c54ad30c7 Nick Piggin [PATCH] mm: pagea...	490	local_irq_restore(flags);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	491	}
a226f6c89 David Howells [PATCH] FRV: Clea...	492 493 494 495 496 497 498 499	/* * permit the bootmem allocator to evade page validation on high-order frees / void fastcall __init __free_pages_bootmem(struct page page, unsigned int order) { if (order == 0) { __ClearPageReserved(page); set_page_count(page, 0);
7835e98b2 Nick Piggin [PATCH] remove se...	500	set_page_refcounted(page);
545b1ea9b Nick Piggin [PATCH] mm: clean...	501	__free_page(page);
a226f6c89 David Howells [PATCH] FRV: Clea...	502	} else {
a226f6c89 David Howells [PATCH] FRV: Clea...	503	int loop;
545b1ea9b Nick Piggin [PATCH] mm: clean...	504	prefetchw(page);
a226f6c89 David Howells [PATCH] FRV: Clea...	505 506	for (loop = 0; loop < BITS_PER_LONG; loop++) { struct page *p = &page[loop];
545b1ea9b Nick Piggin [PATCH] mm: clean...	507 508	if (loop + 1 < BITS_PER_LONG) prefetchw(p + 1);
a226f6c89 David Howells [PATCH] FRV: Clea...	509 510 511	__ClearPageReserved(p); set_page_count(p, 0); }
7835e98b2 Nick Piggin [PATCH] remove se...	512	set_page_refcounted(page);
545b1ea9b Nick Piggin [PATCH] mm: clean...	513	__free_pages(page, order);
a226f6c89 David Howells [PATCH] FRV: Clea...	514 515	} }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	516 517 518 519 520 521 522 523 524 525 526 527 528 529 530	/* * The order of subdivision here is critical for the IO subsystem. * Please do not alter this order without good reasons and regression * testing. Specifically, as large blocks of memory are subdivided, * the order in which smaller blocks are delivered depends on the order * they're subdivided in this function. This is the primary factor * influencing the order in which pages are delivered to the IO * subsystem according to empirical testing, and this is also justified * by considering the behavior of a buddy system containing a single * large block of memory acted on by a series of small allocations. * This behavior is a critical factor in sglist merging's success. * * -- wli */
085cc7d5d Nick Piggin [PATCH] mm: page_...	531	static inline void expand(struct zone zone, struct page page,
1da177e4c Linus Torvalds Linux-2.6.12-rc2	532 533 534 535 536 537 538 539	int low, int high, struct free_area *area) { unsigned long size = 1 << high; while (high > low) { area--; high--; size >>= 1;
725d704ec Nick Piggin [PATCH] mm: VM_BU...	540	VM_BUG_ON(bad_range(zone, &page[size]));
1da177e4c Linus Torvalds Linux-2.6.12-rc2	541 542 543 544	list_add(&page[size].lru, &area->free_list); area->nr_free++; set_page_order(&page[size], high); }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	545	}
1da177e4c Linus Torvalds Linux-2.6.12-rc2	546 547 548	/* * This page is about to be returned from the page allocator */
17cf44064 Nick Piggin [PATCH] mm: clean...	549	static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	550	{
92be2e33b Nick Piggin [PATCH] mm: micro...	551 552 553	if (unlikely(page_mapcount(page) \| (page->mapping != NULL) \| (page_count(page) != 0) \|
334795eca Hugh Dickins [PATCH] bad_page:...	554 555	(page->flags & ( 1 << PG_lru \|
1da177e4c Linus Torvalds Linux-2.6.12-rc2	556 557	1 << PG_private \| 1 << PG_locked \|
1da177e4c Linus Torvalds Linux-2.6.12-rc2	558 559 560	1 << PG_active \| 1 << PG_dirty \| 1 << PG_reclaim \|
334795eca Hugh Dickins [PATCH] bad_page:...	561	1 << PG_slab \|
1da177e4c Linus Torvalds Linux-2.6.12-rc2	562	1 << PG_swapcache \|
b5810039a Nick Piggin [PATCH] core remo...	563	1 << PG_writeback \|
676165a8a Nick Piggin [PATCH] Fix buddy...	564 565	1 << PG_reserved \| 1 << PG_buddy ))))
224abf92b Nick Piggin [PATCH] mm: bad_p...	566	bad_page(page);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	567
689bcebfd Hugh Dickins [PATCH] unpaged: ...	568 569 570 571 572 573	/* * For now, we report if PG_reserved was found set, but do not * clear it, and do not allocate the page: as a safety net. */ if (PageReserved(page)) return 1;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	574 575 576	page->flags &= ~(1 << PG_uptodate \| 1 << PG_error \| 1 << PG_referenced \| 1 << PG_arch_1 \| 1 << PG_checked \| 1 << PG_mappedtodisk);
4c21e2f24 Hugh Dickins [PATCH] mm: split...	577	set_page_private(page, 0);
7835e98b2 Nick Piggin [PATCH] remove se...	578	set_page_refcounted(page);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	579	kernel_map_pages(page, 1 << order, 1);
17cf44064 Nick Piggin [PATCH] mm: clean...	580 581 582 583 584 585	if (gfp_flags & __GFP_ZERO) prep_zero_page(page, order, gfp_flags); if (order && (gfp_flags & __GFP_COMP)) prep_compound_page(page, order);
689bcebfd Hugh Dickins [PATCH] unpaged: ...	586	return 0;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608	} /* * Do the hard work of removing an element from the buddy allocator. * Call me with the zone->lock already held. / static struct page __rmqueue(struct zone zone, unsigned int order) { struct free_area area; unsigned int current_order; struct page *page; for (current_order = order; current_order < MAX_ORDER; ++current_order) { area = zone->free_area + current_order; if (list_empty(&area->free_list)) continue; page = list_entry(area->free_list.next, struct page, lru); list_del(&page->lru); rmv_page_order(page); area->nr_free--; zone->free_pages -= 1UL << order;
085cc7d5d Nick Piggin [PATCH] mm: page_...	609 610	expand(zone, page, order, current_order, area); return page;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	611 612 613 614 615 616 617 618 619 620 621 622 623	} return NULL; } /* * Obtain a specified number of elements from the buddy allocator, all under * a single hold of the lock, for efficiency. Add them to the supplied list. * Returns the number of new pages which were placed at list. / static int rmqueue_bulk(struct zone zone, unsigned int order, unsigned long count, struct list_head list) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	624	int i;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	625
c54ad30c7 Nick Piggin [PATCH] mm: pagea...	626	spin_lock(&zone->lock);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	627	for (i = 0; i < count; ++i) {
085cc7d5d Nick Piggin [PATCH] mm: page_...	628 629	struct page *page = __rmqueue(zone, order); if (unlikely(page == NULL))
1da177e4c Linus Torvalds Linux-2.6.12-rc2	630	break;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	631 632	list_add_tail(&page->lru, list); }
c54ad30c7 Nick Piggin [PATCH] mm: pagea...	633	spin_unlock(&zone->lock);
085cc7d5d Nick Piggin [PATCH] mm: page_...	634	return i;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	635	}
4ae7c0394 Christoph Lameter [PATCH] Periodica...	636	#ifdef CONFIG_NUMA
8fce4d8e3 Christoph Lameter [PATCH] slab: Nod...	637 638	/* * Called from the slab reaper to drain pagesets on a particular node that
39bbcb8f8 Christoph Lameter [PATCH] mm: do no...	639	* belongs to the currently executing processor.
879336c39 Christoph Lameter [PATCH] drain_nod...	640 641	* Note that this function must be called with the thread pinned to * a single processor.
8fce4d8e3 Christoph Lameter [PATCH] slab: Nod...	642 643	*/ void drain_node_pages(int nodeid)
4ae7c0394 Christoph Lameter [PATCH] Periodica...	644	{
2f6726e54 Christoph Lameter [PATCH] Apply typ...	645 646	int i; enum zone_type z;
4ae7c0394 Christoph Lameter [PATCH] Periodica...	647	unsigned long flags;
8fce4d8e3 Christoph Lameter [PATCH] slab: Nod...	648 649	for (z = 0; z < MAX_NR_ZONES; z++) { struct zone *zone = NODE_DATA(nodeid)->node_zones + z;
4ae7c0394 Christoph Lameter [PATCH] Periodica...	650	struct per_cpu_pageset *pset;
39bbcb8f8 Christoph Lameter [PATCH] mm: do no...	651 652	if (!populated_zone(zone)) continue;
23316bc86 Nick Piggin [PATCH] mm: clean...	653	pset = zone_pcp(zone, smp_processor_id());
4ae7c0394 Christoph Lameter [PATCH] Periodica...	654 655 656 657	for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { struct per_cpu_pages *pcp; pcp = &pset->pcp[i];
879336c39 Christoph Lameter [PATCH] drain_nod...	658 659 660 661 662 663	if (pcp->count) { local_irq_save(flags); free_pages_bulk(zone, pcp->count, &pcp->list, 0); pcp->count = 0; local_irq_restore(flags); }
4ae7c0394 Christoph Lameter [PATCH] Periodica...	664 665	} }
4ae7c0394 Christoph Lameter [PATCH] Periodica...	666 667	} #endif
1da177e4c Linus Torvalds Linux-2.6.12-rc2	668 669 670	#if defined(CONFIG_PM) \|\| defined(CONFIG_HOTPLUG_CPU) static void __drain_pages(unsigned int cpu) {
c54ad30c7 Nick Piggin [PATCH] mm: pagea...	671	unsigned long flags;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	672 673 674 675 676	struct zone zone; int i; for_each_zone(zone) { struct per_cpu_pageset pset;
e7c8d5c99 Christoph Lameter [PATCH] node loca...	677	pset = zone_pcp(zone, cpu);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	678 679 680 681	for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { struct per_cpu_pages *pcp; pcp = &pset->pcp[i];
c54ad30c7 Nick Piggin [PATCH] mm: pagea...	682	local_irq_save(flags);
48db57f8f Nick Piggin [PATCH] mm: free_...	683 684	free_pages_bulk(zone, pcp->count, &pcp->list, 0); pcp->count = 0;
c54ad30c7 Nick Piggin [PATCH] mm: pagea...	685	local_irq_restore(flags);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	686 687 688 689 690 691 692 693 694	} } } #endif /* CONFIG_PM \|\| CONFIG_HOTPLUG_CPU / #ifdef CONFIG_PM void mark_free_pages(struct zone zone) {
f623f0db8 Rafael J. Wysocki [PATCH] swsusp: F...	695 696	unsigned long pfn, max_zone_pfn; unsigned long flags;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	697 698 699 700 701 702 703	int order; struct list_head *curr; if (!zone->spanned_pages) return; spin_lock_irqsave(&zone->lock, flags);
f623f0db8 Rafael J. Wysocki [PATCH] swsusp: F...	704 705 706 707 708 709 710 711 712	max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); if (!PageNosave(page)) ClearPageNosaveFree(page); }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	713 714 715	for (order = MAX_ORDER - 1; order >= 0; --order) list_for_each(curr, &zone->free_area[order].free_list) {
f623f0db8 Rafael J. Wysocki [PATCH] swsusp: F...	716	unsigned long i;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	717
f623f0db8 Rafael J. Wysocki [PATCH] swsusp: F...	718 719 720 721	pfn = page_to_pfn(list_entry(curr, struct page, lru)); for (i = 0; i < (1UL << order); i++) SetPageNosaveFree(pfn_to_page(pfn + i)); }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	722
1da177e4c Linus Torvalds Linux-2.6.12-rc2	723 724 725 726 727 728 729 730 731 732 733 734 735 736 737	spin_unlock_irqrestore(&zone->lock, flags); } /* * Spill all of this CPU's per-cpu pages back into the buddy allocator. / void drain_local_pages(void) { unsigned long flags; local_irq_save(flags); __drain_pages(smp_processor_id()); local_irq_restore(flags); } #endif / CONFIG_PM */
1da177e4c Linus Torvalds Linux-2.6.12-rc2	738 739 740	/* * Free a 0-order page */
1da177e4c Linus Torvalds Linux-2.6.12-rc2	741 742 743 744 745	static void fastcall free_hot_cold_page(struct page page, int cold) { struct zone zone = page_zone(page); struct per_cpu_pages *pcp; unsigned long flags;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	746 747	if (PageAnon(page)) page->mapping = NULL;
224abf92b Nick Piggin [PATCH] mm: bad_p...	748	if (free_pages_check(page))
689bcebfd Hugh Dickins [PATCH] unpaged: ...	749	return;
9858db504 Nick Piggin [PATCH] mm: locks...	750 751	if (!PageHighMem(page)) debug_check_no_locks_freed(page_address(page), PAGE_SIZE);
dafb13673 Nick Piggin [PATCH] mm: arch_...	752	arch_free_page(page, 0);
689bcebfd Hugh Dickins [PATCH] unpaged: ...	753	kernel_map_pages(page, 1, 0);
e7c8d5c99 Christoph Lameter [PATCH] node loca...	754	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
1da177e4c Linus Torvalds Linux-2.6.12-rc2	755	local_irq_save(flags);
f8891e5e1 Christoph Lameter [PATCH] Light wei...	756	__count_vm_event(PGFREE);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	757 758	list_add(&page->lru, &pcp->list); pcp->count++;
48db57f8f Nick Piggin [PATCH] mm: free_...	759 760 761 762	if (pcp->count >= pcp->high) { free_pages_bulk(zone, pcp->batch, &pcp->list, 0); pcp->count -= pcp->batch; }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	763 764 765 766 767 768 769 770 771 772 773 774 775	local_irq_restore(flags); put_cpu(); } void fastcall free_hot_page(struct page page) { free_hot_cold_page(page, 0); } void fastcall free_cold_page(struct page page) { free_hot_cold_page(page, 1); }
8dfcc9ba2 Nick Piggin [PATCH] mm: split...	776 777 778 779 780 781 782 783 784 785 786	/* * split_page takes a non-compound higher-order page, and splits it into * n (1<<order) sub-pages: page[0..n] * Each sub-page must be freed individually. * * Note: this is probably too low level an operation for use in drivers. * Please consult with lkml before using this in your driver. / void split_page(struct page page, unsigned int order) { int i;
725d704ec Nick Piggin [PATCH] mm: VM_BU...	787 788	VM_BUG_ON(PageCompound(page)); VM_BUG_ON(!page_count(page));
7835e98b2 Nick Piggin [PATCH] remove se...	789 790	for (i = 1; i < (1 << order); i++) set_page_refcounted(page + i);
8dfcc9ba2 Nick Piggin [PATCH] mm: split...	791	}
8dfcc9ba2 Nick Piggin [PATCH] mm: split...	792
1da177e4c Linus Torvalds Linux-2.6.12-rc2	793 794 795 796 797	/* * Really, prep_compound_page() should be called from __rmqueue_bulk(). But * we cheat by calling it from here, in the order > 0 path. Saves a branch * or two. */
a74609faf Nick Piggin [PATCH] mm: page_...	798 799	static struct page buffered_rmqueue(struct zonelist zonelist, struct zone *zone, int order, gfp_t gfp_flags)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	800 801	{ unsigned long flags;
689bcebfd Hugh Dickins [PATCH] unpaged: ...	802	struct page *page;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	803	int cold = !!(gfp_flags & __GFP_COLD);
a74609faf Nick Piggin [PATCH] mm: page_...	804	int cpu;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	805
689bcebfd Hugh Dickins [PATCH] unpaged: ...	806	again:
a74609faf Nick Piggin [PATCH] mm: page_...	807	cpu = get_cpu();
48db57f8f Nick Piggin [PATCH] mm: free_...	808	if (likely(order == 0)) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	809	struct per_cpu_pages *pcp;
a74609faf Nick Piggin [PATCH] mm: page_...	810	pcp = &zone_pcp(zone, cpu)->pcp[cold];
1da177e4c Linus Torvalds Linux-2.6.12-rc2	811	local_irq_save(flags);
a74609faf Nick Piggin [PATCH] mm: page_...	812	if (!pcp->count) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	813 814	pcp->count += rmqueue_bulk(zone, 0, pcp->batch, &pcp->list);
a74609faf Nick Piggin [PATCH] mm: page_...	815 816	if (unlikely(!pcp->count)) goto failed;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	817	}
a74609faf Nick Piggin [PATCH] mm: page_...	818 819 820	page = list_entry(pcp->list.next, struct page, lru); list_del(&page->lru); pcp->count--;
7fb1d9fca Rohit Seth [PATCH] mm: __all...	821	} else {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	822 823	spin_lock_irqsave(&zone->lock, flags); page = __rmqueue(zone, order);
a74609faf Nick Piggin [PATCH] mm: page_...	824 825 826	spin_unlock(&zone->lock); if (!page) goto failed;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	827	}
f8891e5e1 Christoph Lameter [PATCH] Light wei...	828	__count_zone_vm_events(PGALLOC, zone, 1 << order);
ca889e6c4 Christoph Lameter [PATCH] Use Zoned...	829	zone_statistics(zonelist, zone);
a74609faf Nick Piggin [PATCH] mm: page_...	830 831	local_irq_restore(flags); put_cpu();
1da177e4c Linus Torvalds Linux-2.6.12-rc2	832
725d704ec Nick Piggin [PATCH] mm: VM_BU...	833	VM_BUG_ON(bad_range(zone, page));
17cf44064 Nick Piggin [PATCH] mm: clean...	834	if (prep_new_page(page, order, gfp_flags))
a74609faf Nick Piggin [PATCH] mm: page_...	835	goto again;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	836	return page;
a74609faf Nick Piggin [PATCH] mm: page_...	837 838 839 840 841	failed: local_irq_restore(flags); put_cpu(); return NULL;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	842	}
7fb1d9fca Rohit Seth [PATCH] mm: __all...	843	#define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */
3148890bf Nick Piggin [PATCH] mm: __all...	844 845 846 847 848 849	#define ALLOC_WMARK_MIN 0x02 /* use pages_min watermark / #define ALLOC_WMARK_LOW 0x04 / use pages_low watermark / #define ALLOC_WMARK_HIGH 0x08 / use pages_high watermark / #define ALLOC_HARDER 0x10 / try to alloc harder / #define ALLOC_HIGH 0x20 / __GFP_HIGH set / #define ALLOC_CPUSET 0x40 / check for correct cpuset */
7fb1d9fca Rohit Seth [PATCH] mm: __all...	850
1da177e4c Linus Torvalds Linux-2.6.12-rc2	851 852 853 854 855	/* * Return 1 if free pages are above 'mark'. This takes into account the order * of the allocation. / int zone_watermark_ok(struct zone z, int order, unsigned long mark,
7fb1d9fca Rohit Seth [PATCH] mm: __all...	856	int classzone_idx, int alloc_flags)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	857 858	{ /* free_pages my go negative - that's OK */
e80ee884a Nick Piggin [PATCH] mm: micro...	859 860	unsigned long min = mark; long free_pages = z->free_pages - (1 << order) + 1;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	861	int o;
7fb1d9fca Rohit Seth [PATCH] mm: __all...	862	if (alloc_flags & ALLOC_HIGH)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	863	min -= min / 2;
7fb1d9fca Rohit Seth [PATCH] mm: __all...	864	if (alloc_flags & ALLOC_HARDER)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880	min -= min / 4; if (free_pages <= min + z->lowmem_reserve[classzone_idx]) return 0; for (o = 0; o < order; o++) { /* At the next order, this order's pages become unavailable / free_pages -= z->free_area[o].nr_free << o; / Require fewer higher order pages to be free */ min >>= 1; if (free_pages <= min) return 0; } return 1; }
7fb1d9fca Rohit Seth [PATCH] mm: __all...	881 882 883 884 885 886 887	/* * get_page_from_freeliest goes through the zonelist trying to allocate * a page. / static struct page get_page_from_freelist(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, int alloc_flags)
753ee7289 Martin Hicks [PATCH] VM: early...	888	{
7fb1d9fca Rohit Seth [PATCH] mm: __all...	889 890 891	struct zone *z = zonelist->zones; struct page page = NULL; int classzone_idx = zone_idx(*z);
1192d5264 Christoph Lameter [PATCH] Cleanup: ...	892	struct zone *zone;
7fb1d9fca Rohit Seth [PATCH] mm: __all...	893 894 895 896 897 898	/* * Go through the zonelist once, looking for a zone with enough free. * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ do {
1192d5264 Christoph Lameter [PATCH] Cleanup: ...	899	zone = *z;
08e0f6a97 Christoph Lameter [PATCH] Add NUMA_...	900	if (unlikely(NUMA_BUILD && (gfp_mask & __GFP_THISNODE) &&
1192d5264 Christoph Lameter [PATCH] Cleanup: ...	901	zone->zone_pgdat != zonelist->zones[0]->zone_pgdat))
9b819d204 Christoph Lameter [PATCH] Add __GFP...	902	break;
7fb1d9fca Rohit Seth [PATCH] mm: __all...	903	if ((alloc_flags & ALLOC_CPUSET) &&
1192d5264 Christoph Lameter [PATCH] Cleanup: ...	904	!cpuset_zone_allowed(zone, gfp_mask))
7fb1d9fca Rohit Seth [PATCH] mm: __all...	905 906 907	continue; if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
3148890bf Nick Piggin [PATCH] mm: __all...	908 909	unsigned long mark; if (alloc_flags & ALLOC_WMARK_MIN)
1192d5264 Christoph Lameter [PATCH] Cleanup: ...	910	mark = zone->pages_min;
3148890bf Nick Piggin [PATCH] mm: __all...	911	else if (alloc_flags & ALLOC_WMARK_LOW)
1192d5264 Christoph Lameter [PATCH] Cleanup: ...	912	mark = zone->pages_low;
3148890bf Nick Piggin [PATCH] mm: __all...	913	else
1192d5264 Christoph Lameter [PATCH] Cleanup: ...	914 915	mark = zone->pages_high; if (!zone_watermark_ok(zone , order, mark,
7fb1d9fca Rohit Seth [PATCH] mm: __all...	916	classzone_idx, alloc_flags))
9eeff2395 Christoph Lameter [PATCH] Zone recl...	917	if (!zone_reclaim_mode \|\|
1192d5264 Christoph Lameter [PATCH] Cleanup: ...	918	!zone_reclaim(zone, gfp_mask, order))
9eeff2395 Christoph Lameter [PATCH] Zone recl...	919	continue;
7fb1d9fca Rohit Seth [PATCH] mm: __all...	920	}
1192d5264 Christoph Lameter [PATCH] Cleanup: ...	921	page = buffered_rmqueue(zonelist, zone, order, gfp_mask);
7fb1d9fca Rohit Seth [PATCH] mm: __all...	922	if (page) {
7fb1d9fca Rohit Seth [PATCH] mm: __all...	923 924 925 926	break; } } while (*(++z) != NULL); return page;
753ee7289 Martin Hicks [PATCH] VM: early...	927	}
1da177e4c Linus Torvalds Linux-2.6.12-rc2	928 929 930 931	/* * This is the 'heart' of the zoned buddy allocator. / struct page fastcall
dd0fc66fb Al Viro [PATCH] gfp flags...	932	__alloc_pages(gfp_t gfp_mask, unsigned int order,
1da177e4c Linus Torvalds Linux-2.6.12-rc2	933 934	struct zonelist *zonelist) {
260b23674 Al Viro [PATCH] gfp_t: th...	935	const gfp_t wait = gfp_mask & __GFP_WAIT;
7fb1d9fca Rohit Seth [PATCH] mm: __all...	936	struct zone **z;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	937 938 939	struct page page; struct reclaim_state reclaim_state; struct task_struct p = current;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	940	int do_retry;
7fb1d9fca Rohit Seth [PATCH] mm: __all...	941	int alloc_flags;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	942 943 944	int did_some_progress; might_sleep_if(wait);
6b1de9161 Jens Axboe [PATCH] VM: fix z...	945	restart:
7fb1d9fca Rohit Seth [PATCH] mm: __all...	946	z = zonelist->zones; /* the list of zones suitable for gfp_mask */
1da177e4c Linus Torvalds Linux-2.6.12-rc2	947
7fb1d9fca Rohit Seth [PATCH] mm: __all...	948	if (unlikely(*z == NULL)) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	949 950 951	/* Should this ever happen?? */ return NULL; }
6b1de9161 Jens Axboe [PATCH] VM: fix z...	952
7fb1d9fca Rohit Seth [PATCH] mm: __all...	953	page = get_page_from_freelist(gfp_mask\|__GFP_HARDWALL, order,
3148890bf Nick Piggin [PATCH] mm: __all...	954	zonelist, ALLOC_WMARK_LOW\|ALLOC_CPUSET);
7fb1d9fca Rohit Seth [PATCH] mm: __all...	955 956	if (page) goto got_pg;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	957
6b1de9161 Jens Axboe [PATCH] VM: fix z...	958	do {
43b0bc00f Chris Wright [PATCH] cpuset: r...	959	wakeup_kswapd(*z, order);
6b1de9161 Jens Axboe [PATCH] VM: fix z...	960	} while (*(++z));
1da177e4c Linus Torvalds Linux-2.6.12-rc2	961
9bf2229f8 Paul Jackson [PATCH] cpusets: ...	962	/*
7fb1d9fca Rohit Seth [PATCH] mm: __all...	963 964 965 966 967 968	* OK, we're below the kswapd watermark and have kicked background * reclaim. Now things get more complex, so set up alloc_flags according * to how we want to proceed. * * The caller may dip into page reserves a bit more if the caller * cannot run direct reclaim, or if the caller has realtime scheduling
4eac915d0 Paul Jackson [PATCH] mm: gfp_a...	969 970	* policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH).
9bf2229f8 Paul Jackson [PATCH] cpusets: ...	971	*/
3148890bf Nick Piggin [PATCH] mm: __all...	972	alloc_flags = ALLOC_WMARK_MIN;
7fb1d9fca Rohit Seth [PATCH] mm: __all...	973 974 975 976	if ((unlikely(rt_task(p)) && !in_interrupt()) \|\| !wait) alloc_flags \|= ALLOC_HARDER; if (gfp_mask & __GFP_HIGH) alloc_flags \|= ALLOC_HIGH;
bdd804f47 Paul Jackson [PATCH] Cpuset: m...	977 978	if (wait) alloc_flags \|= ALLOC_CPUSET;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	979 980 981	/* * Go through the zonelist again. Let __GFP_HIGH and allocations
7fb1d9fca Rohit Seth [PATCH] mm: __all...	982	* coming from realtime tasks go deeper into reserves.
1da177e4c Linus Torvalds Linux-2.6.12-rc2	983 984 985	* * This is the last chance, in general, before the goto nopage. * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
9bf2229f8 Paul Jackson [PATCH] cpusets: ...	986	* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
1da177e4c Linus Torvalds Linux-2.6.12-rc2	987	*/
7fb1d9fca Rohit Seth [PATCH] mm: __all...	988 989 990	page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags); if (page) goto got_pg;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	991 992	/* This allocation should allow future memory freeing. */
b84a35be0 Nick Piggin [PATCH] mempool: ...	993 994 995 996	if (((p->flags & PF_MEMALLOC) \|\| unlikely(test_thread_flag(TIF_MEMDIE))) && !in_interrupt()) { if (!(gfp_mask & __GFP_NOMEMALLOC)) {
885036d32 Kirill Korotaev [PATCH] mm: __GFP...	997	nofail_alloc:
b84a35be0 Nick Piggin [PATCH] mempool: ...	998	/* go through the zonelist yet again, ignoring mins */
7fb1d9fca Rohit Seth [PATCH] mm: __all...	999	page = get_page_from_freelist(gfp_mask, order,
47f3a867f Paul Jackson [PATCH] mm: fix _...	1000	zonelist, ALLOC_NO_WATERMARKS);
7fb1d9fca Rohit Seth [PATCH] mm: __all...	1001 1002	if (page) goto got_pg;
885036d32 Kirill Korotaev [PATCH] mm: __GFP...	1003 1004 1005 1006	if (gfp_mask & __GFP_NOFAIL) { blk_congestion_wait(WRITE, HZ/50); goto nofail_alloc; }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018	} goto nopage; } /* Atomic allocations - we can't balance anything / if (!wait) goto nopage; rebalance: cond_resched(); / We now go into synchronous reclaim */
3e0d98b9f Paul Jackson [PATCH] cpuset: m...	1019	cpuset_memory_pressure_bump();
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1020 1021 1022	p->flags \|= PF_MEMALLOC; reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state;
7fb1d9fca Rohit Seth [PATCH] mm: __all...	1023	did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1024 1025 1026 1027 1028 1029 1030	p->reclaim_state = NULL; p->flags &= ~PF_MEMALLOC; cond_resched(); if (likely(did_some_progress)) {
7fb1d9fca Rohit Seth [PATCH] mm: __all...	1031 1032 1033 1034	page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags); if (page) goto got_pg;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1035 1036 1037 1038 1039 1040 1041	} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { /* * Go through the zonelist yet one more time, keep * very high watermark here, this is only to catch * a parallel oom killing, we must fail if we're still * under heavy pressure. */
7fb1d9fca Rohit Seth [PATCH] mm: __all...	1042	page = get_page_from_freelist(gfp_mask\|__GFP_HARDWALL, order,
3148890bf Nick Piggin [PATCH] mm: __all...	1043	zonelist, ALLOC_WMARK_HIGH\|ALLOC_CPUSET);
7fb1d9fca Rohit Seth [PATCH] mm: __all...	1044 1045	if (page) goto got_pg;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1046
9b0f8b040 Christoph Lameter [PATCH] Terminate...	1047	out_of_memory(zonelist, gfp_mask, order);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076	goto restart; } /* * Don't let big-order allocations loop unless the caller explicitly * requests that. Wait for some write requests to complete then retry. * * In this implementation, __GFP_REPEAT means __GFP_NOFAIL for order * <= 3, but that may not be true in other implementations. */ do_retry = 0; if (!(gfp_mask & __GFP_NORETRY)) { if ((order <= 3) \|\| (gfp_mask & __GFP_REPEAT)) do_retry = 1; if (gfp_mask & __GFP_NOFAIL) do_retry = 1; } if (do_retry) { blk_congestion_wait(WRITE, HZ/50); goto rebalance; } nopage: if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { printk(KERN_WARNING "%s: page allocation failure." " order:%d, mode:0x%x ", p->comm, order, gfp_mask); dump_stack();
578c2fd6a Janet Morgan [PATCH] add OOM d...	1077	show_mem();
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1078	}
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1079	got_pg:
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1080 1081 1082 1083 1084 1085 1086 1087	return page; } EXPORT_SYMBOL(__alloc_pages); /* * Common helper functions. */
dd0fc66fb Al Viro [PATCH] gfp flags...	1088	fastcall unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1089 1090 1091 1092 1093 1094 1095 1096 1097	{ struct page * page; page = alloc_pages(gfp_mask, order); if (!page) return 0; return (unsigned long) page_address(page); } EXPORT_SYMBOL(__get_free_pages);
dd0fc66fb Al Viro [PATCH] gfp flags...	1098	fastcall unsigned long get_zeroed_page(gfp_t gfp_mask)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1099 1100 1101 1102 1103 1104 1105	{ struct page * page; /* * get_zeroed_page() returns a 32-bit address, which cannot represent * a highmem page */
725d704ec Nick Piggin [PATCH] mm: VM_BU...	1106	VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125	page = alloc_pages(gfp_mask \| __GFP_ZERO, 0); if (page) return (unsigned long) page_address(page); return 0; } EXPORT_SYMBOL(get_zeroed_page); void __pagevec_free(struct pagevec pvec) { int i = pagevec_count(pvec); while (--i >= 0) free_hot_cold_page(pvec->pages[i], pvec->cold); } fastcall void __free_pages(struct page page, unsigned int order) {
b5810039a Nick Piggin [PATCH] core remo...	1126	if (put_page_testzero(page)) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138	if (order == 0) free_hot_page(page); else __free_pages_ok(page, order); } } EXPORT_SYMBOL(__free_pages); fastcall void free_pages(unsigned long addr, unsigned int order) { if (addr != 0) {
725d704ec Nick Piggin [PATCH] mm: VM_BU...	1139	VM_BUG_ON(!virt_addr_valid((void *)addr));
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164	__free_pages(virt_to_page((void )addr), order); } } EXPORT_SYMBOL(free_pages); / * Total amount of free (allocatable) RAM: / unsigned int nr_free_pages(void) { unsigned int sum = 0; struct zone zone; for_each_zone(zone) sum += zone->free_pages; return sum; } EXPORT_SYMBOL(nr_free_pages); #ifdef CONFIG_NUMA unsigned int nr_free_pages_pgdat(pg_data_t *pgdat) {
2f6726e54 Christoph Lameter [PATCH] Apply typ...	1165 1166	unsigned int sum = 0; enum zone_type i;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1167 1168 1169 1170 1171 1172 1173 1174 1175 1176	for (i = 0; i < MAX_NR_ZONES; i++) sum += pgdat->node_zones[i].free_pages; return sum; } #endif static unsigned int nr_free_zone_pages(int offset) {
e310fd432 Martin J. Bligh [PATCH] Fix NUMA ...	1177 1178	/* Just pick one node, since fallback list is circular / pg_data_t pgdat = NODE_DATA(numa_node_id());
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1179	unsigned int sum = 0;
e310fd432 Martin J. Bligh [PATCH] Fix NUMA ...	1180 1181 1182	struct zonelist zonelist = pgdat->node_zonelists + offset; struct zone zonep = zonelist->zones; struct zone zone;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1183
e310fd432 Martin J. Bligh [PATCH] Fix NUMA ...	1184 1185 1186 1187 1188	for (zone = zonep++; zone; zone = zonep++) { unsigned long size = zone->present_pages; unsigned long high = zone->pages_high; if (size > high) sum += size - high;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1189 1190 1191 1192 1193 1194 1195 1196 1197 1198	} return sum; } /* * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL */ unsigned int nr_free_buffer_pages(void) {
af4ca457e Al Viro [PATCH] gfp_t: in...	1199	return nr_free_zone_pages(gfp_zone(GFP_USER));
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1200 1201 1202 1203 1204 1205 1206	} /* * Amount of free RAM allocatable within all zones */ unsigned int nr_free_pagecache_pages(void) {
af4ca457e Al Viro [PATCH] gfp_t: in...	1207	return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1208	}
08e0f6a97 Christoph Lameter [PATCH] Add NUMA_...	1209 1210	static inline void show_node(struct zone *zone)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1211	{
08e0f6a97 Christoph Lameter [PATCH] Add NUMA_...	1212 1213	if (NUMA_BUILD) printk("Node %ld ", zone_to_nid(zone));
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1214	}
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1215
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1216 1217 1218 1219 1220 1221	void si_meminfo(struct sysinfo *val) { val->totalram = totalram_pages; val->sharedram = 0; val->freeram = nr_free_pages(); val->bufferram = nr_blockdev_pages();
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1222 1223	val->totalhigh = totalhigh_pages; val->freehigh = nr_free_highpages();
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235	val->mem_unit = PAGE_SIZE; } EXPORT_SYMBOL(si_meminfo); #ifdef CONFIG_NUMA void si_meminfo_node(struct sysinfo val, int nid) { pg_data_t pgdat = NODE_DATA(nid); val->totalram = pgdat->node_present_pages; val->freeram = nr_free_pages_pgdat(pgdat);
98d2b0ebd Christoph Lameter [PATCH] reduce MA...	1236	#ifdef CONFIG_HIGHMEM
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1237 1238	val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
98d2b0ebd Christoph Lameter [PATCH] reduce MA...	1239 1240 1241 1242	#else val->totalhigh = 0; val->freehigh = 0; #endif
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255	val->mem_unit = PAGE_SIZE; } #endif #define K(x) ((x) << (PAGE_SHIFT-10)) /* * Show free area list (used inside shift_scroll-lock stuff) * We also calculate the percentage fragmentation. We do this by counting the * memory on each free list with the exception of the first item on the list. */ void show_free_areas(void) {
c72419138 Jes Sorensen [PATCH] Condense ...	1256	int cpu;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1257 1258 1259 1260 1261 1262	unsigned long active; unsigned long inactive; unsigned long free; struct zone *zone; for_each_zone(zone) {
c72419138 Jes Sorensen [PATCH] Condense ...	1263	if (!populated_zone(zone))
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1264	continue;
c72419138 Jes Sorensen [PATCH] Condense ...	1265 1266 1267 1268	show_node(zone); printk("%s per-cpu: ", zone->name);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1269
6b482c677 Dave Jones [PATCH] Don't pri...	1270	for_each_online_cpu(cpu) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1271	struct per_cpu_pageset *pageset;
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1272	pageset = zone_pcp(zone, cpu);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1273
c72419138 Jes Sorensen [PATCH] Condense ...	1274 1275 1276 1277 1278 1279 1280	printk("CPU %4d: Hot: hi:%5d, btch:%4d usd:%4d " "Cold: hi:%5d, btch:%4d usd:%4d ", cpu, pageset->pcp[0].high, pageset->pcp[0].batch, pageset->pcp[0].count, pageset->pcp[1].high, pageset->pcp[1].batch, pageset->pcp[1].count);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1281 1282	} }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1283	get_zone_counts(&active, &inactive, &free);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1284 1285 1286 1287 1288	printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu " "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu ", active, inactive,
b1e7a8fd8 Christoph Lameter [PATCH] zoned vm ...	1289	global_page_state(NR_FILE_DIRTY),
ce866b34a Christoph Lameter [PATCH] zoned vm ...	1290	global_page_state(NR_WRITEBACK),
fd39fc856 Christoph Lameter [PATCH] zoned vm ...	1291	global_page_state(NR_UNSTABLE_NFS),
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1292	nr_free_pages(),
972d1a7b1 Christoph Lameter [PATCH] ZVC: Supp...	1293 1294	global_page_state(NR_SLAB_RECLAIMABLE) + global_page_state(NR_SLAB_UNRECLAIMABLE),
65ba55f50 Christoph Lameter [PATCH] zoned vm ...	1295	global_page_state(NR_FILE_MAPPED),
df849a152 Christoph Lameter [PATCH] zoned vm ...	1296	global_page_state(NR_PAGETABLE));
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1297 1298 1299	for_each_zone(zone) { int i;
c72419138 Jes Sorensen [PATCH] Condense ...	1300 1301	if (!populated_zone(zone)) continue;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333	show_node(zone); printk("%s" " free:%lukB" " min:%lukB" " low:%lukB" " high:%lukB" " active:%lukB" " inactive:%lukB" " present:%lukB" " pages_scanned:%lu" " all_unreclaimable? %s" " ", zone->name, K(zone->free_pages), K(zone->pages_min), K(zone->pages_low), K(zone->pages_high), K(zone->nr_active), K(zone->nr_inactive), K(zone->present_pages), zone->pages_scanned, (zone->all_unreclaimable ? "yes" : "no") ); printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) printk(" %lu", zone->lowmem_reserve[i]); printk(" "); } for_each_zone(zone) {
8f9de51a4 Kirill Korotaev [PATCH] printk() ...	1334	unsigned long nr[MAX_ORDER], flags, order, total = 0;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1335
c72419138 Jes Sorensen [PATCH] Condense ...	1336 1337	if (!populated_zone(zone)) continue;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1338 1339	show_node(zone); printk("%s: ", zone->name);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1340 1341 1342	spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) {
8f9de51a4 Kirill Korotaev [PATCH] printk() ...	1343 1344	nr[order] = zone->free_area[order].nr_free; total += nr[order] << order;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1345 1346	} spin_unlock_irqrestore(&zone->lock, flags);
8f9de51a4 Kirill Korotaev [PATCH] printk() ...	1347 1348	for (order = 0; order < MAX_ORDER; order++) printk("%lu*%lukB ", nr[order], K(1UL) << order);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1349 1350 1351 1352 1353 1354 1355 1356 1357	printk("= %lukB ", K(total)); } show_swap_cache_info(); } /* * Builds allocation fallback zone lists.
1a93205bd Christoph Lameter [PATCH] mm: simpl...	1358 1359	* * Add all populated zones of a node to the zonelist.
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1360	*/
86356ab14 Yasunori Goto [PATCH] wait_tabl...	1361	static int __meminit build_zonelists_node(pg_data_t *pgdat,
2f6726e54 Christoph Lameter [PATCH] Apply typ...	1362	struct zonelist *zonelist, int nr_zones, enum zone_type zone_type)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1363	{
1a93205bd Christoph Lameter [PATCH] mm: simpl...	1364	struct zone *zone;
98d2b0ebd Christoph Lameter [PATCH] reduce MA...	1365	BUG_ON(zone_type >= MAX_NR_ZONES);
2f6726e54 Christoph Lameter [PATCH] Apply typ...	1366	zone_type++;
02a68a5eb Christoph Lameter [PATCH] Fix zone ...	1367 1368	do {
2f6726e54 Christoph Lameter [PATCH] Apply typ...	1369	zone_type--;
070f80326 Christoph Lameter [PATCH] build_zon...	1370	zone = pgdat->node_zones + zone_type;
1a93205bd Christoph Lameter [PATCH] mm: simpl...	1371	if (populated_zone(zone)) {
070f80326 Christoph Lameter [PATCH] build_zon...	1372 1373	zonelist->zones[nr_zones++] = zone; check_highest_zone(zone_type);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1374	}
02a68a5eb Christoph Lameter [PATCH] Fix zone ...	1375
2f6726e54 Christoph Lameter [PATCH] Apply typ...	1376	} while (zone_type);
070f80326 Christoph Lameter [PATCH] build_zon...	1377	return nr_zones;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1378 1379 1380 1381	} #ifdef CONFIG_NUMA #define MAX_NODE_LOAD (num_online_nodes())
86356ab14 Yasunori Goto [PATCH] wait_tabl...	1382	static int __meminitdata node_load[MAX_NUMNODES];
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1383	/**
4dc3b16ba Pavel Pisa [PATCH] DocBook: ...	1384	* find_next_best_node - find the next node that should appear in a given node's fallback list
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396	* @node: node whose fallback list we're appending * @used_node_mask: nodemask_t of already used nodes * * We use a number of factors to determine which is the next node that should * appear on a given node's fallback list. The node should not have appeared * already in @node's fallback list, and it should be the next closest node * according to the distance array (which contains arbitrary distance values * from each node to each node in the system), and should also prefer nodes * with no CPUs, since presumably they'll have very little allocation pressure * on them otherwise. * It returns -1 if no node is found. */
86356ab14 Yasunori Goto [PATCH] wait_tabl...	1397	static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1398	{
4cf808eb4 Linus Torvalds [PATCH] Handle ho...	1399	int n, val;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1400 1401	int min_val = INT_MAX; int best_node = -1;
4cf808eb4 Linus Torvalds [PATCH] Handle ho...	1402 1403 1404 1405 1406	/* Use the local node if we haven't already / if (!node_isset(node, used_node_mask)) { node_set(node, *used_node_mask); return node; }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1407
4cf808eb4 Linus Torvalds [PATCH] Handle ho...	1408 1409	for_each_online_node(n) { cpumask_t tmp;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1410 1411 1412 1413	/* Don't want a node to appear more than once / if (node_isset(n, used_node_mask)) continue;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1414 1415	/* Use the distance array to find the distance */ val = node_distance(node, n);
4cf808eb4 Linus Torvalds [PATCH] Handle ho...	1416 1417	/* Penalize nodes under us ("prefer the next node") */ val += (n < node);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437	/* Give preference to headless and unused nodes / tmp = node_to_cpumask(n); if (!cpus_empty(tmp)) val += PENALTY_FOR_NODE_WITH_CPUS; / Slight preference for less loaded node / val = (MAX_NODE_LOADMAX_NUMNODES); val += node_load[n]; if (val < min_val) { min_val = val; best_node = n; } } if (best_node >= 0) node_set(best_node, used_node_mask); return best_node; }
86356ab14 Yasunori Goto [PATCH] wait_tabl...	1438	static void __meminit build_zonelists(pg_data_t *pgdat)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1439	{
19655d348 Christoph Lameter [PATCH] linearly ...	1440 1441	int j, node, local_node; enum zone_type i;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1442 1443 1444 1445 1446	int prev_node, load; struct zonelist zonelist; nodemask_t used_mask; / initialize zonelists */
19655d348 Christoph Lameter [PATCH] linearly ...	1447	for (i = 0; i < MAX_NR_ZONES; i++) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1448 1449 1450 1451 1452 1453 1454 1455 1456 1457	zonelist = pgdat->node_zonelists + i; zonelist->zones[0] = NULL; } /* NUMA-aware ordering of nodes */ local_node = pgdat->node_id; load = num_online_nodes(); prev_node = local_node; nodes_clear(used_mask); while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {
9eeff2395 Christoph Lameter [PATCH] Zone recl...	1458 1459 1460 1461 1462 1463 1464 1465	int distance = node_distance(local_node, node); /* * If another node is sufficiently far away then it is better * to reclaim pages in a zone before going off node. */ if (distance > RECLAIM_DISTANCE) zone_reclaim_mode = 1;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1466 1467 1468 1469 1470	/* * We don't want to pressure a particular node. * So adding penalty to the first node in same * distance group to make it round-robin. */
9eeff2395 Christoph Lameter [PATCH] Zone recl...	1471 1472	if (distance != node_distance(local_node, prev_node))
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1473 1474 1475	node_load[node] += load; prev_node = node; load--;
19655d348 Christoph Lameter [PATCH] linearly ...	1476	for (i = 0; i < MAX_NR_ZONES; i++) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1477 1478	zonelist = pgdat->node_zonelists + i; for (j = 0; zonelist->zones[j] != NULL; j++);
19655d348 Christoph Lameter [PATCH] linearly ...	1479	j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1480 1481 1482 1483 1484 1485	zonelist->zones[j] = NULL; } } } #else /* CONFIG_NUMA */
86356ab14 Yasunori Goto [PATCH] wait_tabl...	1486	static void __meminit build_zonelists(pg_data_t *pgdat)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1487	{
19655d348 Christoph Lameter [PATCH] linearly ...	1488 1489	int node, local_node; enum zone_type i,j;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1490 1491	local_node = pgdat->node_id;
19655d348 Christoph Lameter [PATCH] linearly ...	1492	for (i = 0; i < MAX_NR_ZONES; i++) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1493 1494 1495	struct zonelist *zonelist; zonelist = pgdat->node_zonelists + i;
19655d348 Christoph Lameter [PATCH] linearly ...	1496	j = build_zonelists_node(pgdat, zonelist, 0, i);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507	/* * Now we build the zonelist so that it contains the zones * of all the other nodes. * We don't want to pressure a particular node, so when * building the zones for node N, we make sure that the * zones coming right after the local ones are those from * node N+1 (modulo N) */ for (node = local_node + 1; node < MAX_NUMNODES; node++) { if (!node_online(node)) continue;
19655d348 Christoph Lameter [PATCH] linearly ...	1508	j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1509 1510 1511 1512	} for (node = 0; node < local_node; node++) { if (!node_online(node)) continue;
19655d348 Christoph Lameter [PATCH] linearly ...	1513	j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1514 1515 1516 1517 1518 1519 1520	} zonelist->zones[j] = NULL; } } #endif /* CONFIG_NUMA */
6811378e7 Yasunori Goto [PATCH] wait_tabl...	1521 1522	/* return values int ....just for stop_machine_run() / static int __meminit __build_all_zonelists(void dummy)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1523	{
6811378e7 Yasunori Goto [PATCH] wait_tabl...	1524 1525 1526 1527 1528 1529 1530 1531 1532	int nid; for_each_online_node(nid) build_zonelists(NODE_DATA(nid)); return 0; } void __meminit build_all_zonelists(void) { if (system_state == SYSTEM_BOOTING) {
423b41d77 Randy Dunlap [PATCH] mm/page_a...	1533	__build_all_zonelists(NULL);
6811378e7 Yasunori Goto [PATCH] wait_tabl...	1534 1535 1536 1537 1538 1539 1540	cpuset_init_current_mems_allowed(); } else { /* we have to stop all cpus to guaranntee there is no user of zonelist / stop_machine_run(__build_all_zonelists, NULL, NR_CPUS); / cpuset refresh routine should be here */ }
bd1e22b8e Andrew Morton [PATCH] initialis...	1541 1542 1543 1544	vm_total_pages = nr_free_pagecache_pages(); printk("Built %i zonelists. Total pages: %ld ", num_online_nodes(), vm_total_pages);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558	} /* * Helper functions to size the waitqueue hash table. * Essentially these want to choose hash table sizes sufficiently * large so that collisions trying to wait on pages are rare. * But in fact, the number of active page waitqueues on typical * systems is ridiculously low, less than 200. So this is even * conservative, even though it seems large. * * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to * waitqueues, i.e. the size of the waitq table given the number of pages. */ #define PAGES_PER_WAITQUEUE 256
cca448fe9 Yasunori Goto [PATCH] wait_tabl...	1559	#ifndef CONFIG_MEMORY_HOTPLUG
02b694dea Yasunori Goto [PATCH] wait_tabl...	1560	static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577	{ unsigned long size = 1; pages /= PAGES_PER_WAITQUEUE; while (size < pages) size <<= 1; /* * Once we have dozens or even hundreds of threads sleeping * on IO we've got bigger problems than wait queue collision. * Limit the size of the wait table to a reasonable size. */ size = min(size, 4096UL); return max(size, 4UL); }
cca448fe9 Yasunori Goto [PATCH] wait_tabl...	1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600	#else /* * A zone's size might be changed by hot-add, so it is not possible to determine * a suitable size for its wait_table. So we use the maximum size now. * * The max wait table size = 4096 x sizeof(wait_queue_head_t). ie: * * i386 (preemption config) : 4096 x 16 = 64Kbyte. * ia64, x86-64 (no preemption): 4096 x 20 = 80Kbyte. * ia64, x86-64 (preemption) : 4096 x 24 = 96Kbyte. * * The maximum entries are prepared when a zone's memory is (512K + 256) pages * or more by the traditional way. (See above). It equals: * * i386, x86-64, powerpc(4K page size) : = ( 2G + 1M)byte. * ia64(16K page size) : = ( 8G + 4M)byte. * powerpc (64K page size) : = (32G +16M)byte. */ static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) { return 4096UL; } #endif
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612	/* * This is an integer logarithm so that shifts can be used later * to extract the more random high bits from the multiplicative * hash function before the remainder is taken. */ static inline unsigned long wait_table_bits(unsigned long size) { return ffz(~size); } #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1613 1614 1615 1616 1617	/* * Initially all pages are reserved - free ones are freed * up by free_all_bootmem() once the early boot process is * done. Non-atomic initialization, single-pass. */
c09b42404 Matt Tolentino [PATCH] x86_64: a...	1618	void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1619 1620	unsigned long start_pfn) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1621	struct page *page;
29751f699 Andy Whitcroft [PATCH] sparsemem...	1622 1623	unsigned long end_pfn = start_pfn + size; unsigned long pfn;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1624
cbe8dd4af Greg Ungerer [PATCH] memmap_in...	1625	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
d41dee369 Andy Whitcroft [PATCH] sparsemem...	1626 1627 1628 1629	if (!early_pfn_valid(pfn)) continue; page = pfn_to_page(pfn); set_page_links(page, zone, nid, pfn);
7835e98b2 Nick Piggin [PATCH] remove se...	1630	init_page_count(page);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1631 1632 1633 1634 1635 1636	reset_page_mapcount(page); SetPageReserved(page); INIT_LIST_HEAD(&page->lru); #ifdef WANT_PAGE_VIRTUAL /* The shift won't overflow because ZONE_NORMAL is below 4G. */ if (!is_highmem_idx(zone))
3212c6be2 Bob Picco [PATCH] fix WANT_...	1637	set_page_address(page, __va(pfn << PAGE_SHIFT));
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1638	#endif
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650	} } void zone_init_free_lists(struct pglist_data pgdat, struct zone zone, unsigned long size) { int order; for (order = 0; order < MAX_ORDER ; order++) { INIT_LIST_HEAD(&zone->free_area[order].free_list); zone->free_area[order].nr_free = 0; } }
d41dee369 Andy Whitcroft [PATCH] sparsemem...	1651	#define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) \| zone_nr)
2f1b62486 Christoph Lameter [PATCH] reduce MA...	1652 1653	void zonetable_add(struct zone *zone, int nid, enum zone_type zid, unsigned long pfn, unsigned long size)
d41dee369 Andy Whitcroft [PATCH] sparsemem...	1654 1655 1656 1657 1658 1659 1660 1661 1662 1663	{ unsigned long snum = pfn_to_section_nr(pfn); unsigned long end = pfn_to_section_nr(pfn + size); if (FLAGS_HAS_NODE) zone_table[ZONETABLE_INDEX(nid, zid)] = zone; else for (; snum <= end; snum++) zone_table[ZONETABLE_INDEX(snum, zid)] = zone; }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1664 1665 1666 1667	#ifndef __HAVE_ARCH_MEMMAP_INIT #define memmap_init(size, nid, zone, start_pfn) \ memmap_init_zone((size), (nid), (zone), (start_pfn)) #endif
6292d9aaf Ashok Raj [PATCH] __cpuinit...	1668	static int __cpuinit zone_batchsize(struct zone *zone)
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1669 1670 1671 1672 1673	{ int batch; /* * The per-cpu-pages pools are set to around 1000th of the
ba56e91c9 Seth, Rohit [PATCH] mm: page_...	1674	* size of the zone. But no more than 1/2 of a meg.
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1675 1676 1677 1678	* * OK, so we don't know how big the cache is. So guess. */ batch = zone->present_pages / 1024;
ba56e91c9 Seth, Rohit [PATCH] mm: page_...	1679 1680	if (batch * PAGE_SIZE > 512 * 1024) batch = (512 * 1024) / PAGE_SIZE;
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1681 1682 1683 1684 1685	batch /= 4; /* We effectively = 4 below / if (batch < 1) batch = 1; /*
0ceaacc97 Nick Piggin [PATCH] Fix up pe...	1686 1687 1688	* Clamp the batch to a 2^n - 1 value. Having a power * of 2 value was found to be more likely to have * suboptimal cache aliasing properties in some cases.
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1689	*
0ceaacc97 Nick Piggin [PATCH] Fix up pe...	1690 1691 1692 1693	* For example if 2 tasks are alternately allocating * batches of pages, one task can end up with a lot * of pages of one half of the possible page colors * and the other with pages of the other colors.
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1694	*/
0ceaacc97 Nick Piggin [PATCH] Fix up pe...	1695	batch = (1 << (fls(batch + batch/2)-1)) - 1;
ba56e91c9 Seth, Rohit [PATCH] mm: page_...	1696
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1697 1698	return batch; }
2caaad41e Christoph Lameter [PATCH] Reduce si...	1699 1700 1701	inline void setup_pageset(struct per_cpu_pageset p, unsigned long batch) { struct per_cpu_pages pcp;
1c6fe9465 Magnus Damm [PATCH] NUMA: bro...	1702	memset(p, 0, sizeof(*p));
2caaad41e Christoph Lameter [PATCH] Reduce si...	1703 1704	pcp = &p->pcp[0]; /* hot */ pcp->count = 0;
2caaad41e Christoph Lameter [PATCH] Reduce si...	1705 1706 1707 1708 1709 1710	pcp->high = 6 * batch; pcp->batch = max(1UL, 1 * batch); INIT_LIST_HEAD(&pcp->list); pcp = &p->pcp[1]; /* cold*/ pcp->count = 0;
2caaad41e Christoph Lameter [PATCH] Reduce si...	1711	pcp->high = 2 * batch;
e46a5e28c Seth, Rohit [PATCH] mm: set p...	1712	pcp->batch = max(1UL, batch/2);
2caaad41e Christoph Lameter [PATCH] Reduce si...	1713 1714	INIT_LIST_HEAD(&pcp->list); }
8ad4b1fb8 Rohit Seth [PATCH] Make high...	1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730	/* * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist * to the value high for the pageset p. / static void setup_pagelist_highmark(struct per_cpu_pageset p, unsigned long high) { struct per_cpu_pages pcp; pcp = &p->pcp[0]; / hot list / pcp->high = high; pcp->batch = max(1UL, high/4); if ((high/4) > (PAGE_SHIFT 8)) pcp->batch = PAGE_SHIFT * 8; }
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1731 1732	#ifdef CONFIG_NUMA /*
2caaad41e Christoph Lameter [PATCH] Reduce si...	1733 1734 1735 1736 1737 1738 1739	* Boot pageset table. One per cpu which is going to be used for all * zones and all nodes. The parameters will be set in such a way * that an item put on a list will immediately be handed over to * the buddy list. This is safe since pageset manipulation is done * with interrupts disabled. * * Some NUMA counter updates may also be caught by the boot pagesets.
b7c84c6ad Christoph Lameter [PATCH] boot_page...	1740 1741 1742 1743 1744 1745 1746 1747	* * The boot_pagesets must be kept even after bootup is complete for * unused processors and/or zones. They do play a role for bootstrapping * hotplugged processors. * * zoneinfo_show() and maybe other functions do * not check if the processor is online before following the pageset pointer. * Other parts of the kernel may not check if the zone is available.
2caaad41e Christoph Lameter [PATCH] Reduce si...	1748	*/
88a2a4ac6 Eric Dumazet [PATCH] percpu da...	1749	static struct per_cpu_pageset boot_pageset[NR_CPUS];
2caaad41e Christoph Lameter [PATCH] Reduce si...	1750 1751 1752	/* * Dynamically allocate memory for the
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1753 1754	* per cpu pageset array in struct zone. */
6292d9aaf Ashok Raj [PATCH] __cpuinit...	1755	static int __cpuinit process_zones(int cpu)
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1756 1757	{ struct zone zone, dzone;
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1758 1759	for_each_zone(zone) {
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1760
66a550308 Christoph Lameter [PATCH] Do not al...	1761 1762	if (!populated_zone(zone)) continue;
23316bc86 Nick Piggin [PATCH] mm: clean...	1763	zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1764	GFP_KERNEL, cpu_to_node(cpu));
23316bc86 Nick Piggin [PATCH] mm: clean...	1765	if (!zone_pcp(zone, cpu))
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1766	goto bad;
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1767
23316bc86 Nick Piggin [PATCH] mm: clean...	1768	setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
8ad4b1fb8 Rohit Seth [PATCH] Make high...	1769 1770 1771 1772	if (percpu_pagelist_fraction) setup_pagelist_highmark(zone_pcp(zone, cpu), (zone->present_pages / percpu_pagelist_fraction));
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1773 1774 1775 1776 1777 1778 1779	} return 0; bad: for_each_zone(dzone) { if (dzone == zone) break;
23316bc86 Nick Piggin [PATCH] mm: clean...	1780 1781	kfree(zone_pcp(dzone, cpu)); zone_pcp(dzone, cpu) = NULL;
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1782 1783 1784 1785 1786 1787	} return -ENOMEM; } static inline void free_zone_pagesets(int cpu) {
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1788 1789 1790 1791	struct zone zone; for_each_zone(zone) { struct per_cpu_pageset pset = zone_pcp(zone, cpu);
f3ef9ead3 David Rientjes [PATCH] do not fr...	1792 1793 1794	/* Free per_cpu_pageset if it is slab allocated */ if (pset != &boot_pageset[cpu]) kfree(pset);
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1795	zone_pcp(zone, cpu) = NULL;
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1796	}
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1797	}
9c7b216d2 Chandra Seetharaman [PATCH] cpu hotpl...	1798	static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809	unsigned long action, void *hcpu) { int cpu = (long)hcpu; int ret = NOTIFY_OK; switch (action) { case CPU_UP_PREPARE: if (process_zones(cpu)) ret = NOTIFY_BAD; break;
b0d416932 Andi Kleen [PATCH] x86_64: W...	1810	case CPU_UP_CANCELED:
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1811 1812 1813	case CPU_DEAD: free_zone_pagesets(cpu); break;
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1814 1815 1816 1817 1818	default: break; } return ret; }
74b85f379 Chandra Seetharaman [PATCH] cpu hotpl...	1819	static struct notifier_block __cpuinitdata pageset_notifier =
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1820	{ &pageset_cpuup_callback, NULL, 0 };
78d9955bb Al Viro [PATCH] missing p...	1821	void __init setup_per_cpu_pageset(void)
e7c8d5c99 Christoph Lameter [PATCH] node loca...	1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834	{ int err; /* Initialize per_cpu_pageset for cpu 0. * A cpuup callback will do this for every cpu * as it comes online */ err = process_zones(smp_processor_id()); BUG_ON(err); register_cpu_notifier(&pageset_notifier); } #endif
c09b42404 Matt Tolentino [PATCH] x86_64: a...	1835	static __meminit
cca448fe9 Yasunori Goto [PATCH] wait_tabl...	1836	int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
ed8ece2ec Dave Hansen [PATCH] memory ho...	1837 1838 1839	{ int i; struct pglist_data *pgdat = zone->zone_pgdat;
cca448fe9 Yasunori Goto [PATCH] wait_tabl...	1840	size_t alloc_size;
ed8ece2ec Dave Hansen [PATCH] memory ho...	1841 1842 1843 1844 1845	/* * The per-page waitqueue mechanism uses hashed waitqueues * per zone. */
02b694dea Yasunori Goto [PATCH] wait_tabl...	1846 1847 1848 1849	zone->wait_table_hash_nr_entries = wait_table_hash_nr_entries(zone_size_pages); zone->wait_table_bits = wait_table_bits(zone->wait_table_hash_nr_entries);
cca448fe9 Yasunori Goto [PATCH] wait_tabl...	1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870	alloc_size = zone->wait_table_hash_nr_entries * sizeof(wait_queue_head_t); if (system_state == SYSTEM_BOOTING) { zone->wait_table = (wait_queue_head_t ) alloc_bootmem_node(pgdat, alloc_size); } else { / * This case means that a zone whose size was 0 gets new memory * via memory hot-add. * But it may be the case that a new node was hot-added. In * this case vmalloc() will not be able to use this new node's * memory - this wait_table must be initialized to use this new * node itself as well. * To use this new node's memory, further consideration will be * necessary. / zone->wait_table = (wait_queue_head_t )vmalloc(alloc_size); } if (!zone->wait_table) return -ENOMEM;
ed8ece2ec Dave Hansen [PATCH] memory ho...	1871
02b694dea Yasunori Goto [PATCH] wait_tabl...	1872	for(i = 0; i < zone->wait_table_hash_nr_entries; ++i)
ed8ece2ec Dave Hansen [PATCH] memory ho...	1873	init_waitqueue_head(zone->wait_table + i);
cca448fe9 Yasunori Goto [PATCH] wait_tabl...	1874 1875	return 0;
ed8ece2ec Dave Hansen [PATCH] memory ho...	1876	}
c09b42404 Matt Tolentino [PATCH] x86_64: a...	1877	static __meminit void zone_pcp_init(struct zone *zone)
ed8ece2ec Dave Hansen [PATCH] memory ho...	1878 1879 1880 1881 1882 1883 1884	{ int cpu; unsigned long batch = zone_batchsize(zone); for (cpu = 0; cpu < NR_CPUS; cpu++) { #ifdef CONFIG_NUMA /* Early boot. Slab allocator not functional yet */
23316bc86 Nick Piggin [PATCH] mm: clean...	1885	zone_pcp(zone, cpu) = &boot_pageset[cpu];
ed8ece2ec Dave Hansen [PATCH] memory ho...	1886 1887 1888 1889 1890	setup_pageset(&boot_pageset[cpu],0); #else setup_pageset(zone_pcp(zone,cpu), batch); #endif }
f5335c0f1 Anton Blanchard [PATCH] quieten z...	1891 1892 1893 1894	if (zone->present_pages) printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu ", zone->name, zone->present_pages, batch);
ed8ece2ec Dave Hansen [PATCH] memory ho...	1895	}
718127cc3 Yasunori Goto [PATCH] wait_tabl...	1896 1897 1898	__meminit int init_currently_empty_zone(struct zone *zone, unsigned long zone_start_pfn, unsigned long size)
ed8ece2ec Dave Hansen [PATCH] memory ho...	1899 1900	{ struct pglist_data *pgdat = zone->zone_pgdat;
cca448fe9 Yasunori Goto [PATCH] wait_tabl...	1901 1902 1903 1904	int ret; ret = zone_wait_table_init(zone, size); if (ret) return ret;
ed8ece2ec Dave Hansen [PATCH] memory ho...	1905	pgdat->nr_zones = zone_idx(zone) + 1;
ed8ece2ec Dave Hansen [PATCH] memory ho...	1906 1907 1908 1909 1910	zone->zone_start_pfn = zone_start_pfn; memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn); zone_init_free_lists(pgdat, zone, zone->spanned_pages);
718127cc3 Yasunori Goto [PATCH] wait_tabl...	1911 1912	return 0;
ed8ece2ec Dave Hansen [PATCH] memory ho...	1913	}
c713216de Mel Gorman [PATCH] Introduce...	1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972	#ifdef CONFIG_ARCH_POPULATES_NODE_MAP /* * Basic iterator support. Return the first range of PFNs for a node * Note: nid == MAX_NUMNODES returns first region regardless of node / static int __init first_active_region_index_in_nid(int nid) { int i; for (i = 0; i < nr_nodemap_entries; i++) if (nid == MAX_NUMNODES \|\| early_node_map[i].nid == nid) return i; return -1; } / * Basic iterator support. Return the next active range of PFNs for a node * Note: nid == MAX_NUMNODES returns next region regardles of node / static int __init next_active_region_index_in_nid(int index, int nid) { for (index = index + 1; index < nr_nodemap_entries; index++) if (nid == MAX_NUMNODES \|\| early_node_map[index].nid == nid) return index; return -1; } #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID / * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. * Architectures may implement their own version but if add_active_range() * was used and there are no special requirements, this is a convenient * alternative / int __init early_pfn_to_nid(unsigned long pfn) { int i; for (i = 0; i < nr_nodemap_entries; i++) { unsigned long start_pfn = early_node_map[i].start_pfn; unsigned long end_pfn = early_node_map[i].end_pfn; if (start_pfn <= pfn && pfn < end_pfn) return early_node_map[i].nid; } return 0; } #endif / CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID / / Basic iterator support to walk early_node_map[] / #define for_each_active_range_index_in_nid(i, nid) \ for (i = first_active_region_index_in_nid(nid); i != -1; \ i = next_active_region_index_in_nid(i, nid)) /* * free_bootmem_with_active_regions - Call free_bootmem_node for each active range
88ca3b94e Randy Dunlap [PATCH] page_allo...	1973 1974	* @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. * @max_low_pfn: The highest PFN that will be passed to free_bootmem_node
c713216de Mel Gorman [PATCH] Introduce...	1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003	* * If an architecture guarantees that all ranges registered with * add_active_ranges() contain no holes and may be freed, this * this function may be used instead of calling free_bootmem() manually. / void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn) { int i; for_each_active_range_index_in_nid(i, nid) { unsigned long size_pages = 0; unsigned long end_pfn = early_node_map[i].end_pfn; if (early_node_map[i].start_pfn >= max_low_pfn) continue; if (end_pfn > max_low_pfn) end_pfn = max_low_pfn; size_pages = end_pfn - early_node_map[i].start_pfn; free_bootmem_node(NODE_DATA(early_node_map[i].nid), PFN_PHYS(early_node_map[i].start_pfn), size_pages << PAGE_SHIFT); } } /* * sparse_memory_present_with_active_regions - Call memory_present for each active range
88ca3b94e Randy Dunlap [PATCH] page_allo...	2004	* @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
c713216de Mel Gorman [PATCH] Introduce...	2005 2006 2007	* * If an architecture guarantees that all ranges registered with * add_active_ranges() contain no holes and may be freed, this
88ca3b94e Randy Dunlap [PATCH] page_allo...	2008	* function may be used instead of calling memory_present() manually.
c713216de Mel Gorman [PATCH] Introduce...	2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020	/ void __init sparse_memory_present_with_active_regions(int nid) { int i; for_each_active_range_index_in_nid(i, nid) memory_present(early_node_map[i].nid, early_node_map[i].start_pfn, early_node_map[i].end_pfn); } /*
fb01439c5 Mel Gorman [PATCH] Allow an ...	2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078	* push_node_boundaries - Push node boundaries to at least the requested boundary * @nid: The nid of the node to push the boundary for * @start_pfn: The start pfn of the node * @end_pfn: The end pfn of the node * * In reserve-based hot-add, mem_map is allocated that is unused until hotadd * time. Specifically, on x86_64, SRAT will report ranges that can potentially * be hotplugged even though no physical memory exists. This function allows * an arch to push out the node boundaries so mem_map is allocated that can * be used later. / #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE void __init push_node_boundaries(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn) { printk(KERN_DEBUG "Entering push_node_boundaries(%u, %lu, %lu) ", nid, start_pfn, end_pfn); / Initialise the boundary for this node if necessary / if (node_boundary_end_pfn[nid] == 0) node_boundary_start_pfn[nid] = -1UL; / Update the boundaries / if (node_boundary_start_pfn[nid] > start_pfn) node_boundary_start_pfn[nid] = start_pfn; if (node_boundary_end_pfn[nid] < end_pfn) node_boundary_end_pfn[nid] = end_pfn; } / If necessary, push the node boundary out for reserve hotadd / static void __init account_node_boundary(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn) { printk(KERN_DEBUG "Entering account_node_boundary(%u, %lu, %lu) ", nid, start_pfn, end_pfn); / Return if boundary information has not been provided / if (node_boundary_end_pfn[nid] == 0) return; / Check the boundaries and update if necessary / if (node_boundary_start_pfn[nid] < start_pfn) start_pfn = node_boundary_start_pfn[nid]; if (node_boundary_end_pfn[nid] > end_pfn) end_pfn = node_boundary_end_pfn[nid]; } #else void __init push_node_boundaries(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn) {} static void __init account_node_boundary(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn) {} #endif /*
c713216de Mel Gorman [PATCH] Introduce...	2079	* get_pfn_range_for_nid - Return the start and end page frames for a node
88ca3b94e Randy Dunlap [PATCH] page_allo...	2080 2081 2082	* @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned. * @start_pfn: Passed by reference. On return, it will have the node start_pfn. * @end_pfn: Passed by reference. On return, it will have the node end_pfn.
c713216de Mel Gorman [PATCH] Introduce...	2083 2084 2085 2086	* * It returns the start and end page frame of a node based on information * provided by an arch calling add_active_range(). If called for a node * with no available memory, a warning is printed and the start and end
88ca3b94e Randy Dunlap [PATCH] page_allo...	2087	* PFNs will be 0.
c713216de Mel Gorman [PATCH] Introduce...	2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105	/ void __init get_pfn_range_for_nid(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn) { int i; start_pfn = -1UL; end_pfn = 0; for_each_active_range_index_in_nid(i, nid) { start_pfn = min(start_pfn, early_node_map[i].start_pfn); end_pfn = max(end_pfn, early_node_map[i].end_pfn); } if (start_pfn == -1UL) { printk(KERN_WARNING "Node %u active with no memory ", nid); *start_pfn = 0; }
fb01439c5 Mel Gorman [PATCH] Allow an ...	2106 2107 2108	/* Push the node boundaries out if requested */ account_node_boundary(nid, start_pfn, end_pfn);
c713216de Mel Gorman [PATCH] Introduce...	2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140	} /* * Return the number of pages a zone spans in a node, including holes * present_pages = zone_spanned_pages_in_node() - zone_absent_pages_in_node() / unsigned long __init zone_spanned_pages_in_node(int nid, unsigned long zone_type, unsigned long ignored) { unsigned long node_start_pfn, node_end_pfn; unsigned long zone_start_pfn, zone_end_pfn; /* Get the start and end of the node and zone / get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type]; zone_end_pfn = arch_zone_highest_possible_pfn[zone_type]; / Check that this node has pages within the zone's required range / if (zone_end_pfn < node_start_pfn \|\| zone_start_pfn > node_end_pfn) return 0; / Move the zone boundaries inside the node if necessary / zone_end_pfn = min(zone_end_pfn, node_end_pfn); zone_start_pfn = max(zone_start_pfn, node_start_pfn); / Return the spanned pages / return zone_end_pfn - zone_start_pfn; } / * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
88ca3b94e Randy Dunlap [PATCH] page_allo...	2141	* then all holes in the requested range will be accounted for.
c713216de Mel Gorman [PATCH] Introduce...	2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154	/ unsigned long __init __absent_pages_in_range(int nid, unsigned long range_start_pfn, unsigned long range_end_pfn) { int i = 0; unsigned long prev_end_pfn = 0, hole_pages = 0; unsigned long start_pfn; / Find the end_pfn of the first active range of pfns in the node */ i = first_active_region_index_in_nid(nid); if (i == -1) return 0;
9c7cd6877 Mel Gorman [PATCH] Account f...	2155 2156 2157	/* Account for ranges before physical memory on this node */ if (early_node_map[i].start_pfn > range_start_pfn) hole_pages = early_node_map[i].start_pfn - range_start_pfn;
c713216de Mel Gorman [PATCH] Introduce...	2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177	prev_end_pfn = early_node_map[i].start_pfn; /* Find all holes for the zone within the node / for (; i != -1; i = next_active_region_index_in_nid(i, nid)) { / No need to continue if prev_end_pfn is outside the zone / if (prev_end_pfn >= range_end_pfn) break; / Make sure the end of the zone is not within the hole / start_pfn = min(early_node_map[i].start_pfn, range_end_pfn); prev_end_pfn = max(prev_end_pfn, range_start_pfn); / Update the hole size cound and move on */ if (start_pfn > range_start_pfn) { BUG_ON(prev_end_pfn > start_pfn); hole_pages += start_pfn - prev_end_pfn; } prev_end_pfn = early_node_map[i].end_pfn; }
9c7cd6877 Mel Gorman [PATCH] Account f...	2178 2179 2180 2181	/* Account for ranges past physical memory on this node */ if (range_end_pfn > prev_end_pfn) hole_pages = range_end_pfn - max(range_start_pfn, prev_end_pfn);
c713216de Mel Gorman [PATCH] Introduce...	2182 2183 2184 2185 2186 2187 2188 2189	return hole_pages; } /** * absent_pages_in_range - Return number of page frames in holes within a range * @start_pfn: The start PFN to start searching for holes * @end_pfn: The end PFN to stop searching for holes *
88ca3b94e Randy Dunlap [PATCH] page_allo...	2190	* It returns the number of pages frames in memory holes within a range.
c713216de Mel Gorman [PATCH] Introduce...	2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202	/ unsigned long __init absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn) { return __absent_pages_in_range(MAX_NUMNODES, start_pfn, end_pfn); } / Return the number of page frames in holes in a zone on a node / unsigned long __init zone_absent_pages_in_node(int nid, unsigned long zone_type, unsigned long ignored) {
9c7cd6877 Mel Gorman [PATCH] Account f...	2203 2204 2205 2206 2207 2208 2209 2210 2211 2212	unsigned long node_start_pfn, node_end_pfn; unsigned long zone_start_pfn, zone_end_pfn; get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); zone_start_pfn = max(arch_zone_lowest_possible_pfn[zone_type], node_start_pfn); zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type], node_end_pfn); return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
c713216de Mel Gorman [PATCH] Introduce...	2213	}
0e0b864e0 Mel Gorman [PATCH] Account f...	2214
c713216de Mel Gorman [PATCH] Introduce...	2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231	#else static inline unsigned long zone_spanned_pages_in_node(int nid, unsigned long zone_type, unsigned long zones_size) { return zones_size[zone_type]; } static inline unsigned long zone_absent_pages_in_node(int nid, unsigned long zone_type, unsigned long zholes_size) { if (!zholes_size) return 0; return zholes_size[zone_type]; }
0e0b864e0 Mel Gorman [PATCH] Account f...	2232
c713216de Mel Gorman [PATCH] Introduce...	2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255	#endif static void __init calculate_node_totalpages(struct pglist_data pgdat, unsigned long zones_size, unsigned long *zholes_size) { unsigned long realtotalpages, totalpages = 0; enum zone_type i; for (i = 0; i < MAX_NR_ZONES; i++) totalpages += zone_spanned_pages_in_node(pgdat->node_id, i, zones_size); pgdat->node_spanned_pages = totalpages; realtotalpages = totalpages; for (i = 0; i < MAX_NR_ZONES; i++) realtotalpages -= zone_absent_pages_in_node(pgdat->node_id, i, zholes_size); pgdat->node_present_pages = realtotalpages; printk(KERN_DEBUG "On node %d totalpages: %lu ", pgdat->node_id, realtotalpages); }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2256 2257 2258 2259 2260 2261	/* * Set up the zone data structures: * - mark all pages reserved * - mark all memory queues empty * - clear the memory bitmaps */
86356ab14 Yasunori Goto [PATCH] wait_tabl...	2262	static void __meminit free_area_init_core(struct pglist_data *pgdat,
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2263 2264	unsigned long zones_size, unsigned long zholes_size) {
2f1b62486 Christoph Lameter [PATCH] reduce MA...	2265	enum zone_type j;
ed8ece2ec Dave Hansen [PATCH] memory ho...	2266	int nid = pgdat->node_id;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2267	unsigned long zone_start_pfn = pgdat->node_start_pfn;
718127cc3 Yasunori Goto [PATCH] wait_tabl...	2268	int ret;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2269
208d54e55 Dave Hansen [PATCH] memory ho...	2270	pgdat_resize_init(pgdat);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2271 2272 2273 2274 2275 2276	pgdat->nr_zones = 0; init_waitqueue_head(&pgdat->kswapd_wait); pgdat->kswapd_max_order = 0; for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j;
0e0b864e0 Mel Gorman [PATCH] Account f...	2277	unsigned long size, realsize, memmap_pages;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2278
c713216de Mel Gorman [PATCH] Introduce...	2279 2280 2281	size = zone_spanned_pages_in_node(nid, j, zones_size); realsize = size - zone_absent_pages_in_node(nid, j, zholes_size);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2282
0e0b864e0 Mel Gorman [PATCH] Account f...	2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307	/* * Adjust realsize so that it accounts for how much memory * is used by this zone for memmap. This affects the watermark * and per-cpu initialisations / memmap_pages = (size sizeof(struct page)) >> PAGE_SHIFT; if (realsize >= memmap_pages) { realsize -= memmap_pages; printk(KERN_DEBUG " %s zone: %lu pages used for memmap ", zone_names[j], memmap_pages); } else printk(KERN_WARNING " %s zone: %lu pages exceeds realsize %lu ", zone_names[j], memmap_pages, realsize); /* Account for reserved DMA pages */ if (j == ZONE_DMA && realsize > dma_reserve) { realsize -= dma_reserve; printk(KERN_DEBUG " DMA zone: %lu pages reserved ", dma_reserve); }
98d2b0ebd Christoph Lameter [PATCH] reduce MA...	2308	if (!is_highmem_idx(j))
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2309 2310 2311 2312 2313	nr_kernel_pages += realsize; nr_all_pages += realsize; zone->spanned_pages = size; zone->present_pages = realsize;
9614634fe Christoph Lameter [PATCH] ZVC/zone_...	2314	#ifdef CONFIG_NUMA
d5f541ed6 Christoph Lameter [PATCH] Add node ...	2315	zone->node = nid;
8417bba4b Christoph Lameter [PATCH] Replace m...	2316	zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
9614634fe Christoph Lameter [PATCH] ZVC/zone_...	2317	/ 100;
0ff38490c Christoph Lameter [PATCH] zone_recl...	2318	zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
9614634fe Christoph Lameter [PATCH] ZVC/zone_...	2319	#endif
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2320 2321 2322	zone->name = zone_names[j]; spin_lock_init(&zone->lock); spin_lock_init(&zone->lru_lock);
bdc8cb984 Dave Hansen [PATCH] memory ho...	2323	zone_seqlock_init(zone);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2324 2325 2326 2327	zone->zone_pgdat = pgdat; zone->free_pages = 0; zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
ed8ece2ec Dave Hansen [PATCH] memory ho...	2328	zone_pcp_init(zone);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2329 2330 2331 2332 2333 2334	INIT_LIST_HEAD(&zone->active_list); INIT_LIST_HEAD(&zone->inactive_list); zone->nr_scan_active = 0; zone->nr_scan_inactive = 0; zone->nr_active = 0; zone->nr_inactive = 0;
2244b95a7 Christoph Lameter [PATCH] zoned vm ...	2335	zap_zone_vm_stats(zone);
53e9a6159 Martin Hicks [PATCH] VM: zone ...	2336	atomic_set(&zone->reclaim_in_progress, 0);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2337 2338	if (!size) continue;
d41dee369 Andy Whitcroft [PATCH] sparsemem...	2339	zonetable_add(zone, nid, j, zone_start_pfn, size);
718127cc3 Yasunori Goto [PATCH] wait_tabl...	2340 2341	ret = init_currently_empty_zone(zone, zone_start_pfn, size); BUG_ON(ret);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2342	zone_start_pfn += size;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2343 2344 2345 2346 2347	} } static void __init alloc_node_mem_map(struct pglist_data *pgdat) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2348 2349 2350	/* Skip empty nodes */ if (!pgdat->node_spanned_pages) return;
d41dee369 Andy Whitcroft [PATCH] sparsemem...	2351	#ifdef CONFIG_FLAT_NODE_MEM_MAP
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2352 2353	/* ia64 gets its own node_mem_map, before this, without bootmem */ if (!pgdat->node_mem_map) {
e984bb43f Bob Picco [PATCH] Align the...	2354	unsigned long size, start, end;
d41dee369 Andy Whitcroft [PATCH] sparsemem...	2355	struct page *map;
e984bb43f Bob Picco [PATCH] Align the...	2356 2357 2358 2359 2360 2361 2362 2363 2364	/* * The zone's endpoints aren't required to be MAX_ORDER * aligned but the node_mem_map endpoints must be in order * for the buddy allocator to function correctly. / start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); end = pgdat->node_start_pfn + pgdat->node_spanned_pages; end = ALIGN(end, MAX_ORDER_NR_PAGES); size = (end - start) sizeof(struct page);
6f167ec72 Dave Hansen [PATCH] sparsemem...	2365 2366 2367	map = alloc_remap(pgdat->node_id, size); if (!map) map = alloc_bootmem_node(pgdat, size);
e984bb43f Bob Picco [PATCH] Align the...	2368	pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2369	}
d41dee369 Andy Whitcroft [PATCH] sparsemem...	2370	#ifdef CONFIG_FLATMEM
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2371 2372 2373	/* * With no DISCONTIG, the global mem_map is just set as node 0's */
c713216de Mel Gorman [PATCH] Introduce...	2374	if (pgdat == NODE_DATA(0)) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2375	mem_map = NODE_DATA(0)->node_mem_map;
c713216de Mel Gorman [PATCH] Introduce...	2376 2377 2378 2379 2380	#ifdef CONFIG_ARCH_POPULATES_NODE_MAP if (page_to_pfn(mem_map) != pgdat->node_start_pfn) mem_map -= pgdat->node_start_pfn; #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2381	#endif
d41dee369 Andy Whitcroft [PATCH] sparsemem...	2382	#endif /* CONFIG_FLAT_NODE_MEM_MAP */
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2383	}
86356ab14 Yasunori Goto [PATCH] wait_tabl...	2384	void __meminit free_area_init_node(int nid, struct pglist_data *pgdat,
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2385 2386 2387 2388 2389	unsigned long zones_size, unsigned long node_start_pfn, unsigned long zholes_size) { pgdat->node_id = nid; pgdat->node_start_pfn = node_start_pfn;
c713216de Mel Gorman [PATCH] Introduce...	2390	calculate_node_totalpages(pgdat, zones_size, zholes_size);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2391 2392 2393 2394 2395	alloc_node_mem_map(pgdat); free_area_init_core(pgdat, zones_size, zholes_size); }
c713216de Mel Gorman [PATCH] Introduce...	2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484	#ifdef CONFIG_ARCH_POPULATES_NODE_MAP /** * add_active_range - Register a range of PFNs backed by physical memory * @nid: The node ID the range resides on * @start_pfn: The start PFN of the available physical memory * @end_pfn: The end PFN of the available physical memory * * These ranges are stored in an early_node_map[] and later used by * free_area_init_nodes() to calculate zone sizes and holes. If the * range spans a memory hole, it is up to the architecture to ensure * the memory is not freed by the bootmem allocator. If possible * the range being registered will be merged with existing ranges. / void __init add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn) { int i; printk(KERN_DEBUG "Entering add_active_range(%d, %lu, %lu) " "%d entries of %d used ", nid, start_pfn, end_pfn, nr_nodemap_entries, MAX_ACTIVE_REGIONS); / Merge with existing active regions if possible / for (i = 0; i < nr_nodemap_entries; i++) { if (early_node_map[i].nid != nid) continue; / Skip if an existing region covers this new one / if (start_pfn >= early_node_map[i].start_pfn && end_pfn <= early_node_map[i].end_pfn) return; / Merge forward if suitable / if (start_pfn <= early_node_map[i].end_pfn && end_pfn > early_node_map[i].end_pfn) { early_node_map[i].end_pfn = end_pfn; return; } / Merge backward if suitable / if (start_pfn < early_node_map[i].end_pfn && end_pfn >= early_node_map[i].start_pfn) { early_node_map[i].start_pfn = start_pfn; return; } } / Check that early_node_map is large enough / if (i >= MAX_ACTIVE_REGIONS) { printk(KERN_CRIT "More than %d memory regions, truncating ", MAX_ACTIVE_REGIONS); return; } early_node_map[i].nid = nid; early_node_map[i].start_pfn = start_pfn; early_node_map[i].end_pfn = end_pfn; nr_nodemap_entries = i + 1; } /* * shrink_active_range - Shrink an existing registered range of PFNs * @nid: The node id the range is on that should be shrunk * @old_end_pfn: The old end PFN of the range * @new_end_pfn: The new PFN of the range * * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node. * The map is kept at the end physical page range that has already been * registered with add_active_range(). This function allows an arch to shrink * an existing registered range. / void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn, unsigned long new_end_pfn) { int i; / Find the old active region end and shrink / for_each_active_range_index_in_nid(i, nid) if (early_node_map[i].end_pfn == old_end_pfn) { early_node_map[i].end_pfn = new_end_pfn; break; } } /* * remove_all_active_ranges - Remove all currently registered regions
88ca3b94e Randy Dunlap [PATCH] page_allo...	2485	*
c713216de Mel Gorman [PATCH] Introduce...	2486 2487 2488 2489	* During discovery, it may be found that a table like SRAT is invalid * and an alternative discovery method must be used. This function removes * all currently registered regions. */
88ca3b94e Randy Dunlap [PATCH] page_allo...	2490	void __init remove_all_active_ranges(void)
c713216de Mel Gorman [PATCH] Introduce...	2491 2492 2493	{ memset(early_node_map, 0, sizeof(early_node_map)); nr_nodemap_entries = 0;
fb01439c5 Mel Gorman [PATCH] Allow an ...	2494 2495 2496 2497	#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn)); memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn)); #endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
c713216de Mel Gorman [PATCH] Introduce...	2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540	} /* Compare two active node_active_regions / static int __init cmp_node_active_region(const void a, const void b) { struct node_active_region arange = (struct node_active_region )a; struct node_active_region brange = (struct node_active_region )b; / Done this way to avoid overflows / if (arange->start_pfn > brange->start_pfn) return 1; if (arange->start_pfn < brange->start_pfn) return -1; return 0; } / sort the node_map by start_pfn / static void __init sort_node_map(void) { sort(early_node_map, (size_t)nr_nodemap_entries, sizeof(struct node_active_region), cmp_node_active_region, NULL); } / Find the lowest pfn for a node. This depends on a sorted early_node_map / unsigned long __init find_min_pfn_for_node(unsigned long nid) { int i; / Assuming a sorted map, the first range found has the starting pfn / for_each_active_range_index_in_nid(i, nid) return early_node_map[i].start_pfn; printk(KERN_WARNING "Could not find start_pfn for node %lu ", nid); return 0; } /* * find_min_pfn_with_active_regions - Find the minimum PFN registered * * It returns the minimum PFN based on information provided via
88ca3b94e Randy Dunlap [PATCH] page_allo...	2541	* add_active_range().
c713216de Mel Gorman [PATCH] Introduce...	2542 2543 2544 2545 2546 2547 2548 2549 2550 2551	/ unsigned long __init find_min_pfn_with_active_regions(void) { return find_min_pfn_for_node(MAX_NUMNODES); } /* * find_max_pfn_with_active_regions - Find the maximum PFN registered * * It returns the maximum PFN based on information provided via
88ca3b94e Randy Dunlap [PATCH] page_allo...	2552	* add_active_range().
c713216de Mel Gorman [PATCH] Introduce...	2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566	/ unsigned long __init find_max_pfn_with_active_regions(void) { int i; unsigned long max_pfn = 0; for (i = 0; i < nr_nodemap_entries; i++) max_pfn = max(max_pfn, early_node_map[i].end_pfn); return max_pfn; } /* * free_area_init_nodes - Initialise all pg_data_t and zone data
88ca3b94e Randy Dunlap [PATCH] page_allo...	2567	* @max_zone_pfn: an array of max PFNs for each zone
c713216de Mel Gorman [PATCH] Introduce...	2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626	* * This will call free_area_init_node() for each active node in the system. * Using the page ranges provided by add_active_range(), the size of each * zone in each node and their holes is calculated. If the maximum PFN * between two adjacent zones match, it is assumed that the zone is empty. * For example, if arch_max_dma_pfn == arch_max_dma32_pfn, it is assumed * that arch_max_dma32_pfn has no pages. It is also assumed that a zone * starts where the previous one ended. For example, ZONE_DMA32 starts * at arch_max_dma_pfn. / void __init free_area_init_nodes(unsigned long max_zone_pfn) { unsigned long nid; enum zone_type i; /* Record where the zone boundaries are / memset(arch_zone_lowest_possible_pfn, 0, sizeof(arch_zone_lowest_possible_pfn)); memset(arch_zone_highest_possible_pfn, 0, sizeof(arch_zone_highest_possible_pfn)); arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions(); arch_zone_highest_possible_pfn[0] = max_zone_pfn[0]; for (i = 1; i < MAX_NR_ZONES; i++) { arch_zone_lowest_possible_pfn[i] = arch_zone_highest_possible_pfn[i-1]; arch_zone_highest_possible_pfn[i] = max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]); } / Regions in the early_node_map can be in any order / sort_node_map(); / Print out the zone ranges / printk("Zone PFN ranges: "); for (i = 0; i < MAX_NR_ZONES; i++) printk(" %-8s %8lu -> %8lu ", zone_names[i], arch_zone_lowest_possible_pfn[i], arch_zone_highest_possible_pfn[i]); / Print out the early_node_map[] / printk("early_node_map[%d] active PFN ranges ", nr_nodemap_entries); for (i = 0; i < nr_nodemap_entries; i++) printk(" %3d: %8lu -> %8lu ", early_node_map[i].nid, early_node_map[i].start_pfn, early_node_map[i].end_pfn); / Initialise every node / for_each_online_node(nid) { pg_data_t pgdat = NODE_DATA(nid); free_area_init_node(nid, pgdat, NULL, find_min_pfn_for_node(nid), NULL); } } #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
0e0b864e0 Mel Gorman [PATCH] Account f...	2627	/**
88ca3b94e Randy Dunlap [PATCH] page_allo...	2628 2629	* set_dma_reserve - set the specified number of pages reserved in the first zone * @new_dma_reserve: The number of pages to mark reserved
0e0b864e0 Mel Gorman [PATCH] Account f...	2630 2631 2632 2633	* * The per-cpu batchsize and zone watermarks are determined by present_pages. * In the DMA zone, a significant percentage may be consumed by kernel image * and other unfreeable allocations which can skew the watermarks badly. This
88ca3b94e Randy Dunlap [PATCH] page_allo...	2634 2635 2636	* function may optionally be used to account for unfreeable pages in the * first zone (e.g., ZONE_DMA). The effect will be lower watermarks and * smaller per-cpu batchsize.
0e0b864e0 Mel Gorman [PATCH] Account f...	2637 2638 2639 2640 2641	*/ void __init set_dma_reserve(unsigned long new_dma_reserve) { dma_reserve = new_dma_reserve; }
93b7504e3 Dave Hansen [PATCH] Introduce...	2642	#ifndef CONFIG_NEED_MULTIPLE_NODES
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2643 2644 2645 2646	static bootmem_data_t contig_bootmem_data; struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data }; EXPORT_SYMBOL(contig_page_data);
93b7504e3 Dave Hansen [PATCH] Introduce...	2647	#endif
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2648 2649 2650	void __init free_area_init(unsigned long *zones_size) {
93b7504e3 Dave Hansen [PATCH] Introduce...	2651	free_area_init_node(0, NODE_DATA(0), zones_size,
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2652 2653	__pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2654
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2655 2656 2657 2658 2659	#ifdef CONFIG_HOTPLUG_CPU static int page_alloc_cpu_notify(struct notifier_block self, unsigned long action, void hcpu) { int cpu = (unsigned long)hcpu;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2660 2661	if (action == CPU_DEAD) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2662 2663	local_irq_disable(); __drain_pages(cpu);
f8891e5e1 Christoph Lameter [PATCH] Light wei...	2664	vm_events_fold_cpu(cpu);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2665	local_irq_enable();
2244b95a7 Christoph Lameter [PATCH] zoned vm ...	2666	refresh_cpu_vm_stats(cpu);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677	} return NOTIFY_OK; } #endif /* CONFIG_HOTPLUG_CPU / void __init page_alloc_init(void) { hotcpu_notifier(page_alloc_cpu_notify, 0); } /
cb45b0e96 Hideo AOKI [PATCH] overcommi...	2678 2679 2680 2681 2682 2683 2684	* calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio * or min_free_kbytes changes. / static void calculate_totalreserve_pages(void) { struct pglist_data pgdat; unsigned long reserve_pages = 0;
2f6726e54 Christoph Lameter [PATCH] Apply typ...	2685	enum zone_type i, j;
cb45b0e96 Hideo AOKI [PATCH] overcommi...	2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709	for_each_online_pgdat(pgdat) { for (i = 0; i < MAX_NR_ZONES; i++) { struct zone zone = pgdat->node_zones + i; unsigned long max = 0; / Find valid and maximum lowmem_reserve in the zone / for (j = i; j < MAX_NR_ZONES; j++) { if (zone->lowmem_reserve[j] > max) max = zone->lowmem_reserve[j]; } / we treat pages_high as reserved pages. / max += zone->pages_high; if (max > zone->present_pages) max = zone->present_pages; reserve_pages += max; } } totalreserve_pages = reserve_pages; } /
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2710 2711 2712 2713 2714 2715 2716 2717	* setup_per_zone_lowmem_reserve - called whenever * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone * has a correct pages reserved value, so an adequate number of * pages are left in the zone after a successful __alloc_pages(). / static void setup_per_zone_lowmem_reserve(void) { struct pglist_data pgdat;
2f6726e54 Christoph Lameter [PATCH] Apply typ...	2718	enum zone_type j, idx;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2719
ec936fc56 KAMEZAWA Hiroyuki [PATCH] for_each_...	2720	for_each_online_pgdat(pgdat) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2721 2722 2723 2724 2725	for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long present_pages = zone->present_pages; zone->lowmem_reserve[j] = 0;
2f6726e54 Christoph Lameter [PATCH] Apply typ...	2726 2727	idx = j; while (idx) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2728	struct zone *lower_zone;
2f6726e54 Christoph Lameter [PATCH] Apply typ...	2729	idx--;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2730 2731 2732 2733 2734 2735 2736 2737 2738 2739	if (sysctl_lowmem_reserve_ratio[idx] < 1) sysctl_lowmem_reserve_ratio[idx] = 1; lower_zone = pgdat->node_zones + idx; lower_zone->lowmem_reserve[j] = present_pages / sysctl_lowmem_reserve_ratio[idx]; present_pages += lower_zone->present_pages; } } }
cb45b0e96 Hideo AOKI [PATCH] overcommi...	2740 2741 2742	/* update totalreserve_pages */ calculate_totalreserve_pages();
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2743	}
88ca3b94e Randy Dunlap [PATCH] page_allo...	2744 2745 2746 2747 2748	/** * setup_per_zone_pages_min - called when min_free_kbytes changes. * * Ensures that the pages_{min,low,high} values for each zone are set correctly * with respect to min_free_kbytes.
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2749	*/
3947be196 Dave Hansen [PATCH] memory ho...	2750	void setup_per_zone_pages_min(void)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763	{ unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); unsigned long lowmem_pages = 0; struct zone zone; unsigned long flags; / Calculate total number of !ZONE_HIGHMEM pages */ for_each_zone(zone) { if (!is_highmem(zone)) lowmem_pages += zone->present_pages; } for_each_zone(zone) {
ac924c603 Andrew Morton [PATCH] setup_per...	2764	u64 tmp;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2765	spin_lock_irqsave(&zone->lru_lock, flags);
ac924c603 Andrew Morton [PATCH] setup_per...	2766 2767	tmp = (u64)pages_min * zone->present_pages; do_div(tmp, lowmem_pages);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2768 2769	if (is_highmem(zone)) { /*
669ed1752 Nick Piggin [PATCH] mm: highm...	2770 2771 2772 2773 2774 2775 2776	* __GFP_HIGH and PF_MEMALLOC allocations usually don't * need highmem pages, so cap pages_min to a small * value here. * * The (pages_high-pages_low) and (pages_low-pages_min) * deltas controls asynch page reclaim, and so should * not be capped for highmem.
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2777 2778 2779 2780 2781 2782 2783 2784 2785 2786	*/ int min_pages; min_pages = zone->present_pages / 1024; if (min_pages < SWAP_CLUSTER_MAX) min_pages = SWAP_CLUSTER_MAX; if (min_pages > 128) min_pages = 128; zone->pages_min = min_pages; } else {
669ed1752 Nick Piggin [PATCH] mm: highm...	2787 2788	/* * If it's a lowmem zone, reserve a number of pages
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2789 2790	* proportionate to the zone's size. */
669ed1752 Nick Piggin [PATCH] mm: highm...	2791	zone->pages_min = tmp;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2792	}
ac924c603 Andrew Morton [PATCH] setup_per...	2793 2794	zone->pages_low = zone->pages_min + (tmp >> 2); zone->pages_high = zone->pages_min + (tmp >> 1);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2795 2796	spin_unlock_irqrestore(&zone->lru_lock, flags); }
cb45b0e96 Hideo AOKI [PATCH] overcommi...	2797 2798 2799	/* update totalreserve_pages */ calculate_totalreserve_pages();
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854	} /* * Initialise min_free_kbytes. * * For small machines we want it small (128k min). For large machines * we want it large (64MB max). But it is not linear, because network * bandwidth does not increase linearly with machine size. We use * * min_free_kbytes = 4 * sqrt(lowmem_kbytes), for better accuracy: * min_free_kbytes = sqrt(lowmem_kbytes * 16) * * which yields * * 16MB: 512k * 32MB: 724k * 64MB: 1024k * 128MB: 1448k * 256MB: 2048k * 512MB: 2896k * 1024MB: 4096k * 2048MB: 5792k * 4096MB: 8192k * 8192MB: 11584k * 16384MB: 16384k / static int __init init_per_zone_pages_min(void) { unsigned long lowmem_kbytes; lowmem_kbytes = nr_free_buffer_pages() (PAGE_SIZE >> 10); min_free_kbytes = int_sqrt(lowmem_kbytes * 16); if (min_free_kbytes < 128) min_free_kbytes = 128; if (min_free_kbytes > 65536) min_free_kbytes = 65536; setup_per_zone_pages_min(); setup_per_zone_lowmem_reserve(); return 0; } module_init(init_per_zone_pages_min) /* * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so * that we can call two helper functions whenever min_free_kbytes * changes. / int min_free_kbytes_sysctl_handler(ctl_table table, int write, struct file file, void __user buffer, size_t length, loff_t ppos) { proc_dointvec(table, write, file, buffer, length, ppos); setup_per_zone_pages_min(); return 0; }
9614634fe Christoph Lameter [PATCH] ZVC/zone_...	2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866	#ifdef CONFIG_NUMA int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table table, int write, struct file file, void __user buffer, size_t length, loff_t ppos) { struct zone zone; int rc; rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); if (rc) return rc; for_each_zone(zone)
8417bba4b Christoph Lameter [PATCH] Replace m...	2867	zone->min_unmapped_pages = (zone->present_pages *
9614634fe Christoph Lameter [PATCH] ZVC/zone_...	2868 2869 2870	sysctl_min_unmapped_ratio) / 100; return 0; }
0ff38490c Christoph Lameter [PATCH] zone_recl...	2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886	int sysctl_min_slab_ratio_sysctl_handler(ctl_table table, int write, struct file file, void __user buffer, size_t length, loff_t ppos) { struct zone zone; int rc; rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); if (rc) return rc; for_each_zone(zone) zone->min_slab_pages = (zone->present_pages * sysctl_min_slab_ratio) / 100; return 0; }
9614634fe Christoph Lameter [PATCH] ZVC/zone_...	2887	#endif
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903	/* * lowmem_reserve_ratio_sysctl_handler - just a wrapper around * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve() * whenever sysctl_lowmem_reserve_ratio changes. * * The reserve ratio obviously has absolutely no relation with the * pages_min watermarks. The lowmem reserve ratio can only make sense * if in function of the boot time zone sizes. / int lowmem_reserve_ratio_sysctl_handler(ctl_table table, int write, struct file file, void __user buffer, size_t length, loff_t ppos) { proc_dointvec_minmax(table, write, file, buffer, length, ppos); setup_per_zone_lowmem_reserve(); return 0; }
8ad4b1fb8 Rohit Seth [PATCH] Make high...	2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928	/* * percpu_pagelist_fraction - changes the pcp->high for each zone on each * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist * can have before it gets flushed back to buddy allocator. / int percpu_pagelist_fraction_sysctl_handler(ctl_table table, int write, struct file file, void __user buffer, size_t length, loff_t ppos) { struct zone *zone; unsigned int cpu; int ret; ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); if (!write \|\| (ret == -EINVAL)) return ret; for_each_zone(zone) { for_each_online_cpu(cpu) { unsigned long high; high = zone->present_pages / percpu_pagelist_fraction; setup_pagelist_highmark(zone_pcp(zone, cpu), high); } } return 0; }
f034b5d4e David S. Miller [XFRM]: Dynamic x...	2929	int hashdist = HASHDIST_DEFAULT;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974	#ifdef CONFIG_NUMA static int __init set_hashdist(char str) { if (!str) return 0; hashdist = simple_strtoul(str, &str, 0); return 1; } __setup("hashdist=", set_hashdist); #endif / * allocate a large system hash table from bootmem * - it is assumed that the hash table must contain an exact power-of-2 * quantity of entries * - limit is the number of hash buckets, not the total allocation size / void __init alloc_large_system_hash(const char tablename, unsigned long bucketsize, unsigned long numentries, int scale, int flags, unsigned int _hash_shift, unsigned int _hash_mask, unsigned long limit) { unsigned long long max = limit; unsigned long log2qty, size; void table = NULL; /* allow the kernel cmdline to have a say / if (!numentries) { / round applicable memory size up to nearest megabyte / numentries = (flags & HASH_HIGHMEM) ? nr_all_pages : nr_kernel_pages; numentries += (1UL << (20 - PAGE_SHIFT)) - 1; numentries >>= 20 - PAGE_SHIFT; numentries <<= 20 - PAGE_SHIFT; / limit to 1 bucket per 2^scale bytes of low memory */ if (scale > PAGE_SHIFT) numentries >>= (scale - PAGE_SHIFT); else numentries <<= (PAGE_SHIFT - scale); }
6e692ed37 John Hawkes [PATCH] fix alloc...	2975	numentries = roundup_pow_of_two(numentries);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019	/* limit allocation size to 1/16 total memory by default / if (max == 0) { max = ((unsigned long long)nr_all_pages << PAGE_SHIFT) >> 4; do_div(max, bucketsize); } if (numentries > max) numentries = max; log2qty = long_log2(numentries); do { size = bucketsize << log2qty; if (flags & HASH_EARLY) table = alloc_bootmem(size); else if (hashdist) table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL); else { unsigned long order; for (order = 0; ((1UL << order) << PAGE_SHIFT) < size; order++) ; table = (void) __get_free_pages(GFP_ATOMIC, order); } } while (!table && size > PAGE_SIZE && --log2qty); if (!table) panic("Failed to allocate %s hash table ", tablename); printk("%s hash table entries: %d (order: %d, %lu bytes) ", tablename, (1U << log2qty), long_log2(size) - PAGE_SHIFT, size); if (_hash_shift) _hash_shift = log2qty; if (_hash_mask) _hash_mask = (1 << log2qty) - 1; return table; }
a117e66ed KAMEZAWA Hiroyuki [PATCH] unify pfn...	3020 3021	#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
a117e66ed KAMEZAWA Hiroyuki [PATCH] unify pfn...	3022 3023	struct page *pfn_to_page(unsigned long pfn) {
67de64821 Andy Whitcroft [PATCH] squash du...	3024	return __pfn_to_page(pfn);
a117e66ed KAMEZAWA Hiroyuki [PATCH] unify pfn...	3025 3026 3027	} unsigned long page_to_pfn(struct page *page) {
67de64821 Andy Whitcroft [PATCH] squash du...	3028	return __page_to_pfn(page);
a117e66ed KAMEZAWA Hiroyuki [PATCH] unify pfn...	3029	}
a117e66ed KAMEZAWA Hiroyuki [PATCH] unify pfn...	3030 3031 3032	EXPORT_SYMBOL(pfn_to_page); EXPORT_SYMBOL(page_to_pfn); #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */