Blame view
include/linux/mmzone.h
37.9 KB
1da177e4c
|
1 2 |
#ifndef _LINUX_MMZONE_H #define _LINUX_MMZONE_H |
1da177e4c
|
3 |
#ifndef __ASSEMBLY__ |
97965478a
|
4 |
#ifndef __GENERATING_BOUNDS_H |
1da177e4c
|
5 |
|
1da177e4c
|
6 7 8 |
#include <linux/spinlock.h> #include <linux/list.h> #include <linux/wait.h> |
e815af95f
|
9 |
#include <linux/bitops.h> |
1da177e4c
|
10 11 12 13 |
#include <linux/cache.h> #include <linux/threads.h> #include <linux/numa.h> #include <linux/init.h> |
bdc8cb984
|
14 |
#include <linux/seqlock.h> |
8357f8695
|
15 |
#include <linux/nodemask.h> |
835c134ec
|
16 |
#include <linux/pageblock-flags.h> |
bbeae5b05
|
17 |
#include <linux/page-flags-layout.h> |
60063497a
|
18 |
#include <linux/atomic.h> |
93ff66bf1
|
19 |
#include <asm/page.h> |
1da177e4c
|
20 21 22 23 24 25 26 |
/* Free memory management - zoned buddy allocator. */ #ifndef CONFIG_FORCE_MAX_ZONEORDER #define MAX_ORDER 11 #else #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER #endif |
e984bb43f
|
27 |
#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) |
1da177e4c
|
28 |
|
5ad333eb6
|
29 30 31 |
/* * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed * costly to service. That is between allocation orders which should |
35fca53e1
|
32 |
* coalesce naturally under reasonable reclaim pressure and those which |
5ad333eb6
|
33 34 35 |
* will not. */ #define PAGE_ALLOC_COSTLY_ORDER 3 |
47118af07
|
36 37 |
enum { MIGRATE_UNMOVABLE, |
47118af07
|
38 |
MIGRATE_MOVABLE, |
016c13daa
|
39 |
MIGRATE_RECLAIMABLE, |
0aaa29a56
|
40 41 |
MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES, |
47118af07
|
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
#ifdef CONFIG_CMA /* * MIGRATE_CMA migration type is designed to mimic the way * ZONE_MOVABLE works. Only movable pages can be allocated * from MIGRATE_CMA pageblocks and page allocator never * implicitly change migration type of MIGRATE_CMA pageblock. * * The way to use it is to change migratetype of a range of * pageblocks to MIGRATE_CMA which can be done by * __free_pageblock_cma() function. What is important though * is that a range of pageblocks must be aligned to * MAX_ORDER_NR_PAGES should biggest page be bigger then * a single pageblock. */ MIGRATE_CMA, #endif |
194159fbc
|
58 |
#ifdef CONFIG_MEMORY_ISOLATION |
47118af07
|
59 |
MIGRATE_ISOLATE, /* can't allocate from here */ |
194159fbc
|
60 |
#endif |
47118af07
|
61 62 |
MIGRATE_TYPES }; |
60f30350f
|
63 64 |
/* In mm/page_alloc.c; keep in sync also with show_migration_types() there */ extern char * const migratetype_names[MIGRATE_TYPES]; |
47118af07
|
65 66 |
#ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) |
7c15d9bb8
|
67 |
# define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA) |
47118af07
|
68 69 |
#else # define is_migrate_cma(migratetype) false |
7c15d9bb8
|
70 |
# define is_migrate_cma_page(_page) false |
47118af07
|
71 |
#endif |
b2a0ac887
|
72 73 74 75 |
#define for_each_migratetype_order(order, type) \ for (order = 0; order < MAX_ORDER; order++) \ for (type = 0; type < MIGRATE_TYPES; type++) |
467c996c1
|
76 |
extern int page_group_by_mobility_disabled; |
e58469baf
|
77 78 |
#define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1) #define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1) |
dc4b0caff
|
79 80 81 |
#define get_pageblock_migratetype(page) \ get_pfnblock_flags_mask(page, page_to_pfn(page), \ PB_migrate_end, MIGRATETYPE_MASK) |
1da177e4c
|
82 |
struct free_area { |
b2a0ac887
|
83 |
struct list_head free_list[MIGRATE_TYPES]; |
1da177e4c
|
84 85 86 87 88 89 |
unsigned long nr_free; }; struct pglist_data; /* |
a52633d8e
|
90 |
* zone->lock and the zone lru_lock are two of the hottest locks in the kernel. |
1da177e4c
|
91 92 93 94 95 96 97 |
* So add a wild amount of padding here to ensure that they fall into separate * cachelines. There are very few zone structures in the machine, so space * consumption is not a concern here. */ #if defined(CONFIG_SMP) struct zone_padding { char x[0]; |
22fc6eccb
|
98 |
} ____cacheline_internodealigned_in_smp; |
1da177e4c
|
99 100 101 102 |
#define ZONE_PADDING(name) struct zone_padding name; #else #define ZONE_PADDING(name) #endif |
2244b95a7
|
103 |
enum zone_stat_item { |
51ed44912
|
104 |
/* First 128 byte cacheline (assuming 64 bit words) */ |
d23ad4232
|
105 |
NR_FREE_PAGES, |
71c799f49
|
106 107 108 109 110 111 |
NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */ NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE, NR_ZONE_ACTIVE_ANON, NR_ZONE_INACTIVE_FILE, NR_ZONE_ACTIVE_FILE, NR_ZONE_UNEVICTABLE, |
5a1c84b40
|
112 |
NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ |
5344b7e64
|
113 |
NR_MLOCK, /* mlock()ed pages found and moved off LRU */ |
51ed44912
|
114 115 116 |
NR_SLAB_RECLAIMABLE, NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ |
d30dd8be0
|
117 |
NR_KERNEL_STACK_KB, /* measured in KiB */ |
c6a7f5728
|
118 |
/* Second 128 byte cacheline */ |
d2c5e30c9
|
119 |
NR_BOUNCE, |
91537fee0
|
120 121 122 |
#if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ #endif |
ca889e6c4
|
123 124 125 126 127 128 129 130 |
#ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ NUMA_FOREIGN, /* was intended here, hit elsewhere */ NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ NUMA_LOCAL, /* allocation from local node */ NUMA_OTHER, /* allocation from other node */ #endif |
d1ce749a0
|
131 |
NR_FREE_CMA_PAGES, |
2244b95a7
|
132 |
NR_VM_ZONE_STAT_ITEMS }; |
75ef71840
|
133 |
enum node_stat_item { |
599d0c954
|
134 135 136 137 138 139 140 141 142 |
NR_LRU_BASE, NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ NR_ACTIVE_ANON, /* " " " " " */ NR_INACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */ NR_UNEVICTABLE, /* " " " " " */ NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ NR_PAGES_SCANNED, /* pages scanned since last reclaim */ |
1e6b10857
|
143 144 145 |
WORKINGSET_REFAULT, WORKINGSET_ACTIVATE, WORKINGSET_NODERECLAIM, |
4b9d0fab7
|
146 |
NR_ANON_MAPPED, /* Mapped anonymous pages */ |
50658e2e0
|
147 148 |
NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. only modified from process context */ |
11fb99898
|
149 150 151 152 153 154 155 156 157 |
NR_FILE_PAGES, NR_FILE_DIRTY, NR_WRITEBACK, NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ NR_SHMEM_THPS, NR_SHMEM_PMDMAPPED, NR_ANON_THPS, NR_UNSTABLE_NFS, /* NFS unstable pages */ |
c4a25635b
|
158 159 160 161 |
NR_VMSCAN_WRITE, NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ |
75ef71840
|
162 163 |
NR_VM_NODE_STAT_ITEMS }; |
4f98a2fee
|
164 165 166 167 168 169 170 171 172 173 174 175 |
/* * We do arithmetic on the LRU lists in various places in the code, * so it is important to keep the active lists LRU_ACTIVE higher in * the array than the corresponding inactive lists, and to keep * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists. * * This has to be kept in sync with the statistics in zone_stat_item * above and the descriptions in vmstat_text in mm/vmstat.c */ #define LRU_BASE 0 #define LRU_ACTIVE 1 #define LRU_FILE 2 |
b69408e88
|
176 |
enum lru_list { |
4f98a2fee
|
177 178 179 180 |
LRU_INACTIVE_ANON = LRU_BASE, LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE, LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE, LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE, |
894bc3104
|
181 |
LRU_UNEVICTABLE, |
894bc3104
|
182 183 |
NR_LRU_LISTS }; |
b69408e88
|
184 |
|
4111304da
|
185 |
#define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++) |
b69408e88
|
186 |
|
4111304da
|
187 |
#define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++) |
894bc3104
|
188 |
|
4111304da
|
189 |
static inline int is_file_lru(enum lru_list lru) |
4f98a2fee
|
190 |
{ |
4111304da
|
191 |
return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE); |
4f98a2fee
|
192 |
} |
4111304da
|
193 |
static inline int is_active_lru(enum lru_list lru) |
b69408e88
|
194 |
{ |
4111304da
|
195 |
return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); |
b69408e88
|
196 |
} |
89abfab13
|
197 198 199 |
struct zone_reclaim_stat { /* * The pageout code in vmscan.c keeps track of how many of the |
59f91e5dd
|
200 |
* mem/swap backed and file backed pages are referenced. |
89abfab13
|
201 202 203 204 205 206 207 208 |
* The higher the rotated/scanned ratio, the more valuable * that cache is. * * The anon LRU stats live in [0], file LRU stats in [1] */ unsigned long recent_rotated[2]; unsigned long recent_scanned[2]; }; |
6290df545
|
209 |
struct lruvec { |
23047a96d
|
210 211 212 213 |
struct list_head lists[NR_LRU_LISTS]; struct zone_reclaim_stat reclaim_stat; /* Evictions & activations on the inactive file list */ atomic_long_t inactive_age; |
c255a4580
|
214 |
#ifdef CONFIG_MEMCG |
599d0c954
|
215 |
struct pglist_data *pgdat; |
7f5e86c2c
|
216 |
#endif |
6290df545
|
217 |
}; |
bb2a0de92
|
218 219 220 |
/* Mask used at gathering information at once (see memcontrol.c) */ #define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE)) #define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON)) |
bb2a0de92
|
221 |
#define LRU_ALL ((1 << NR_LRU_LISTS) - 1) |
39deaf858
|
222 |
/* Isolate clean file */ |
f3fd4a619
|
223 |
#define ISOLATE_CLEAN ((__force isolate_mode_t)0x1) |
f80c06736
|
224 |
/* Isolate unmapped file */ |
f3fd4a619
|
225 |
#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) |
c82449352
|
226 |
/* Isolate for asynchronous migration */ |
f3fd4a619
|
227 |
#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) |
e46a28790
|
228 229 |
/* Isolate unevictable pages */ #define ISOLATE_UNEVICTABLE ((__force isolate_mode_t)0x8) |
4356f21d0
|
230 231 232 |
/* LRU Isolation modes. */ typedef unsigned __bitwise__ isolate_mode_t; |
418589663
|
233 234 235 236 237 238 239 240 241 242 |
enum zone_watermarks { WMARK_MIN, WMARK_LOW, WMARK_HIGH, NR_WMARK }; #define min_wmark_pages(z) (z->watermark[WMARK_MIN]) #define low_wmark_pages(z) (z->watermark[WMARK_LOW]) #define high_wmark_pages(z) (z->watermark[WMARK_HIGH]) |
1da177e4c
|
243 244 |
struct per_cpu_pages { int count; /* number of pages in the list */ |
1da177e4c
|
245 246 |
int high; /* high watermark, emptying needed */ int batch; /* chunk size for buddy add/remove */ |
5f8dcc212
|
247 248 249 |
/* Lists of pages, one per migrate type stored on the pcp-lists */ struct list_head lists[MIGRATE_PCPTYPES]; |
1da177e4c
|
250 251 252 |
}; struct per_cpu_pageset { |
3dfa5721f
|
253 |
struct per_cpu_pages pcp; |
4037d4522
|
254 255 256 |
#ifdef CONFIG_NUMA s8 expire; #endif |
2244b95a7
|
257 |
#ifdef CONFIG_SMP |
df9ecaba3
|
258 |
s8 stat_threshold; |
2244b95a7
|
259 260 |
s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; #endif |
99dcc3e5a
|
261 |
}; |
e7c8d5c99
|
262 |
|
75ef71840
|
263 264 265 266 |
struct per_cpu_nodestat { s8 stat_threshold; s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS]; }; |
97965478a
|
267 |
#endif /* !__GENERATING_BOUNDS.H */ |
2f1b62486
|
268 |
enum zone_type { |
4b51d6698
|
269 |
#ifdef CONFIG_ZONE_DMA |
2f1b62486
|
270 271 272 273 274 275 276 277 278 279 280 281 |
/* * ZONE_DMA is used when there are devices that are not able * to do DMA to all of addressable memory (ZONE_NORMAL). Then we * carve out the portion of memory that is needed for these devices. * The range is arch specific. * * Some examples * * Architecture Limit * --------------------------- * parisc, ia64, sparc <4G * s390 <2G |
2f1b62486
|
282 283 284 285 286 287 288 |
* arm Various * alpha Unlimited or 0-16MB. * * i386, x86_64 and multiple other arches * <16M. */ ZONE_DMA, |
4b51d6698
|
289 |
#endif |
fb0e7942b
|
290 |
#ifdef CONFIG_ZONE_DMA32 |
2f1b62486
|
291 292 293 294 295 296 |
/* * x86_64 needs two ZONE_DMAs because it supports devices that are * only able to do DMA to the lower 16M but also 32 bit devices that * can only do DMA areas below 4G. */ ZONE_DMA32, |
fb0e7942b
|
297 |
#endif |
2f1b62486
|
298 299 300 301 302 303 |
/* * Normal addressable memory is in ZONE_NORMAL. DMA operations can be * performed on pages in ZONE_NORMAL if the DMA devices support * transfers to all addressable memory. */ ZONE_NORMAL, |
e53ef38d0
|
304 |
#ifdef CONFIG_HIGHMEM |
2f1b62486
|
305 306 307 308 309 310 311 312 313 |
/* * A memory area that is only addressable by the kernel through * mapping portions into its own address space. This is for example * used by i386 to allow the kernel to address the memory beyond * 900MB. The kernel will set up special mappings (page * table entries on i386) for each page that the kernel needs to * access. */ ZONE_HIGHMEM, |
e53ef38d0
|
314 |
#endif |
2a1e274ac
|
315 |
ZONE_MOVABLE, |
033fbae98
|
316 317 318 |
#ifdef CONFIG_ZONE_DEVICE ZONE_DEVICE, #endif |
97965478a
|
319 |
__MAX_NR_ZONES |
033fbae98
|
320 |
|
2f1b62486
|
321 |
}; |
1da177e4c
|
322 |
|
97965478a
|
323 |
#ifndef __GENERATING_BOUNDS_H |
1da177e4c
|
324 |
struct zone { |
3484b2de9
|
325 |
/* Read-mostly fields */ |
418589663
|
326 327 328 |
/* zone watermarks, access with *_wmark_pages(zone) macros */ unsigned long watermark[NR_WMARK]; |
0aaa29a56
|
329 |
unsigned long nr_reserved_highatomic; |
1da177e4c
|
330 |
/* |
899033276
|
331 332 333 334 335 336 337 |
* We don't know if the memory that we're going to allocate will be * freeable or/and it will be released eventually, so to avoid totally * wasting several GB of ram we must reserve some of the lower zone * memory (otherwise we risk to run OOM on the lower zones despite * there being tons of freeable ram on the higher zones). This array is * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl * changes. |
1da177e4c
|
338 |
*/ |
3484b2de9
|
339 |
long lowmem_reserve[MAX_NR_ZONES]; |
ab8fabd46
|
340 |
|
e7c8d5c99
|
341 |
#ifdef CONFIG_NUMA |
d5f541ed6
|
342 |
int node; |
3484b2de9
|
343 |
#endif |
3484b2de9
|
344 |
struct pglist_data *zone_pgdat; |
43cf38eb5
|
345 |
struct per_cpu_pageset __percpu *pageset; |
3484b2de9
|
346 |
|
835c134ec
|
347 348 |
#ifndef CONFIG_SPARSEMEM /* |
d9c234005
|
349 |
* Flags for a pageblock_nr_pages block. See pageblock-flags.h. |
835c134ec
|
350 351 352 353 |
* In SPARSEMEM, this map is stored in struct mem_section */ unsigned long *pageblock_flags; #endif /* CONFIG_SPARSEMEM */ |
1da177e4c
|
354 355 |
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ unsigned long zone_start_pfn; |
bdc8cb984
|
356 |
/* |
9feedc9d8
|
357 358 359 |
* spanned_pages is the total pages spanned by the zone, including * holes, which is calculated as: * spanned_pages = zone_end_pfn - zone_start_pfn; |
bdc8cb984
|
360 |
* |
9feedc9d8
|
361 362 |
* present_pages is physical pages existing within the zone, which * is calculated as: |
8761e31c2
|
363 |
* present_pages = spanned_pages - absent_pages(pages in holes); |
9feedc9d8
|
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 |
* * managed_pages is present pages managed by the buddy system, which * is calculated as (reserved_pages includes pages allocated by the * bootmem allocator): * managed_pages = present_pages - reserved_pages; * * So present_pages may be used by memory hotplug or memory power * management logic to figure out unmanaged pages by checking * (present_pages - managed_pages). And managed_pages should be used * by page allocator and vm scanner to calculate all kinds of watermarks * and thresholds. * * Locking rules: * * zone_start_pfn and spanned_pages are protected by span_seqlock. * It is a seqlock because it has to be read outside of zone->lock, * and it is done in the main allocator path. But, it is written * quite infrequently. * * The span_seq lock is declared along with zone->lock because it is |
bdc8cb984
|
384 385 |
* frequently read in proximity to zone->lock. It's good to * give them a chance of being in the same cacheline. |
9feedc9d8
|
386 |
* |
c3d5f5f0c
|
387 |
* Write access to present_pages at runtime should be protected by |
bfc8c9013
|
388 389 |
* mem_hotplug_begin/end(). Any reader who can't tolerant drift of * present_pages should get_online_mems() to get a stable value. |
c3d5f5f0c
|
390 391 392 393 394 395 |
* * Read access to managed_pages should be safe because it's unsigned * long. Write access to zone->managed_pages and totalram_pages are * protected by managed_page_count_lock at runtime. Idealy only * adjust_managed_page_count() should be used instead of directly * touching zone->managed_pages and totalram_pages. |
bdc8cb984
|
396 |
*/ |
3484b2de9
|
397 |
unsigned long managed_pages; |
9feedc9d8
|
398 399 |
unsigned long spanned_pages; unsigned long present_pages; |
3484b2de9
|
400 401 |
const char *name; |
1da177e4c
|
402 |
|
ad53f92eb
|
403 404 405 406 407 408 409 410 |
#ifdef CONFIG_MEMORY_ISOLATION /* * Number of isolated pageblock. It is used to solve incorrect * freepage counting problem due to racy retrieving migratetype * of pageblock. Protected by zone->lock. */ unsigned long nr_isolate_pageblock; #endif |
3484b2de9
|
411 412 413 414 |
#ifdef CONFIG_MEMORY_HOTPLUG /* see spanned/present_pages for more description */ seqlock_t span_seqlock; #endif |
9dcb8b685
|
415 |
int initialized; |
3484b2de9
|
416 |
|
0f6611489
|
417 |
/* Write-intensive fields used from the page allocator */ |
3484b2de9
|
418 |
ZONE_PADDING(_pad1_) |
0f6611489
|
419 |
|
3484b2de9
|
420 421 422 423 424 |
/* free areas of different sizes */ struct free_area free_area[MAX_ORDER]; /* zone flags, see below */ unsigned long flags; |
0f6611489
|
425 |
/* Primarily protects free_area */ |
a368ab67a
|
426 |
spinlock_t lock; |
0f6611489
|
427 |
/* Write-intensive fields used by compaction and vmstats. */ |
3484b2de9
|
428 |
ZONE_PADDING(_pad2_) |
3484b2de9
|
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 |
/* * When free pages are below this point, additional steps are taken * when reading the number of free pages to avoid per-cpu counter * drift allowing watermarks to be breached */ unsigned long percpu_drift_mark; #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* pfn where compaction free scanner should start */ unsigned long compact_cached_free_pfn; /* pfn where async and sync compaction migration scanner should start */ unsigned long compact_cached_migrate_pfn[2]; #endif #ifdef CONFIG_COMPACTION /* * On compaction failure, 1<<compact_defer_shift compactions * are skipped before trying again. The number attempted since * last failure is tracked with compact_considered. */ unsigned int compact_considered; unsigned int compact_defer_shift; int compact_order_failed; #endif #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* Set to true when the PG_migrate_skip bits should be cleared */ bool compact_blockskip_flush; #endif |
7cf91a98e
|
458 |
bool contiguous; |
3484b2de9
|
459 460 461 |
ZONE_PADDING(_pad3_) /* Zone statistics */ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; |
22fc6eccb
|
462 |
} ____cacheline_internodealigned_in_smp; |
1da177e4c
|
463 |
|
599d0c954
|
464 465 |
enum pgdat_flags { PGDAT_CONGESTED, /* pgdat has many dirty pages backed by |
0e093d997
|
466 467 |
* a congested BDI */ |
599d0c954
|
468 |
PGDAT_DIRTY, /* reclaim scanning has recently found |
d43006d50
|
469 470 471 |
* many dirty file pages at the tail * of the LRU. */ |
599d0c954
|
472 |
PGDAT_WRITEBACK, /* reclaim scanning has recently found |
283aba9f9
|
473 474 |
* many pages under writeback */ |
a5f5f91da
|
475 |
PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ |
570546517
|
476 |
}; |
e815af95f
|
477 |
|
f9228b204
|
478 |
static inline unsigned long zone_end_pfn(const struct zone *zone) |
108bcc96e
|
479 480 481 482 483 484 485 486 |
{ return zone->zone_start_pfn + zone->spanned_pages; } static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn) { return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone); } |
2a6e3ebee
|
487 488 |
static inline bool zone_is_initialized(struct zone *zone) { |
9dcb8b685
|
489 |
return zone->initialized; |
2a6e3ebee
|
490 491 492 493 494 495 |
} static inline bool zone_is_empty(struct zone *zone) { return zone->spanned_pages == 0; } |
1da177e4c
|
496 497 498 499 500 501 |
/* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the * queues ("queue_length >> 12") during an aging round. */ #define DEF_PRIORITY 12 |
9276b1bc9
|
502 503 |
/* Maximum number of zones on a zonelist */ #define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES) |
c00eb15a8
|
504 505 |
enum { ZONELIST_FALLBACK, /* zonelist with fallback */ |
9276b1bc9
|
506 |
#ifdef CONFIG_NUMA |
c00eb15a8
|
507 508 509 510 511 |
/* * The NUMA zonelists are doubled because we need zonelists that * restrict the allocations to a single node for __GFP_THISNODE. */ ZONELIST_NOFALLBACK, /* zonelist without fallback (__GFP_THISNODE) */ |
9276b1bc9
|
512 |
#endif |
c00eb15a8
|
513 514 |
MAX_ZONELISTS }; |
9276b1bc9
|
515 |
|
1da177e4c
|
516 |
/* |
dd1a239f6
|
517 518 519 520 521 522 523 524 525 |
* This struct contains information about a zone in a zonelist. It is stored * here to avoid dereferences into large structures and lookups of tables */ struct zoneref { struct zone *zone; /* Pointer to actual zone */ int zone_idx; /* zone_idx(zoneref->zone) */ }; /* |
1da177e4c
|
526 527 528 529 530 |
* One allocation request operates on a zonelist. A zonelist * is a list of zones, the first one is the 'goal' of the * allocation, the other zones are fallback zones, in decreasing * priority. * |
dd1a239f6
|
531 532 533 534 535 536 537 |
* To speed the reading of the zonelist, the zonerefs contain the zone index * of the entry being read. Helper functions to access information given * a struct zoneref are * * zonelist_zone() - Return the struct zone * for an entry in _zonerefs * zonelist_zone_idx() - Return the index of the zone for an entry * zonelist_node_idx() - Return the index of the node for an entry |
1da177e4c
|
538 539 |
*/ struct zonelist { |
dd1a239f6
|
540 |
struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1]; |
1da177e4c
|
541 |
}; |
5b99cd0ef
|
542 543 544 545 |
#ifndef CONFIG_DISCONTIGMEM /* The array of struct pages - for discontigmem use pgdat->lmem_map */ extern struct page *mem_map; #endif |
1da177e4c
|
546 547 548 549 550 551 552 553 554 555 556 557 558 559 |
/* * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM * (mostly NUMA machines?) to denote a higher-level memory zone than the * zone denotes. * * On NUMA machines, each NUMA node would have a pg_data_t to describe * it's memory layout. * * Memory statistics and page replacement data structures are maintained on a * per-zone basis. */ struct bootmem_data; typedef struct pglist_data { struct zone node_zones[MAX_NR_ZONES]; |
523b94585
|
560 |
struct zonelist node_zonelists[MAX_ZONELISTS]; |
1da177e4c
|
561 |
int nr_zones; |
52d4b9ac0
|
562 |
#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ |
1da177e4c
|
563 |
struct page *node_mem_map; |
eefa864b7
|
564 565 566 |
#ifdef CONFIG_PAGE_EXTENSION struct page_ext *node_page_ext; #endif |
d41dee369
|
567 |
#endif |
08677214e
|
568 |
#ifndef CONFIG_NO_BOOTMEM |
1da177e4c
|
569 |
struct bootmem_data *bdata; |
08677214e
|
570 |
#endif |
208d54e55
|
571 572 573 574 575 576 |
#ifdef CONFIG_MEMORY_HOTPLUG /* * Must be held any time you expect node_start_pfn, node_present_pages * or node_spanned_pages stay constant. Holding this will also * guarantee that any pfn_valid() stays that way. * |
114d4b79f
|
577 578 579 |
* pgdat_resize_lock() and pgdat_resize_unlock() are provided to * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG. * |
72c3b51bd
|
580 |
* Nests above zone->lock and zone->span_seqlock |
208d54e55
|
581 582 583 |
*/ spinlock_t node_size_lock; #endif |
1da177e4c
|
584 585 586 587 588 |
unsigned long node_start_pfn; unsigned long node_present_pages; /* total number of physical pages */ unsigned long node_spanned_pages; /* total size of physical page range, including holes */ int node_id; |
1da177e4c
|
589 |
wait_queue_head_t kswapd_wait; |
5515061d2
|
590 |
wait_queue_head_t pfmemalloc_wait; |
bfc8c9013
|
591 592 |
struct task_struct *kswapd; /* Protected by mem_hotplug_begin/end() */ |
38087d9b0
|
593 594 |
int kswapd_order; enum zone_type kswapd_classzone_idx; |
698b1b306
|
595 596 597 598 599 600 |
#ifdef CONFIG_COMPACTION int kcompactd_max_order; enum zone_type kcompactd_classzone_idx; wait_queue_head_t kcompactd_wait; struct task_struct *kcompactd; #endif |
8177a420e
|
601 |
#ifdef CONFIG_NUMA_BALANCING |
1c5e9c27c
|
602 |
/* Lock serializing the migrate rate limiting window */ |
8177a420e
|
603 604 605 606 607 608 609 610 |
spinlock_t numabalancing_migrate_lock; /* Rate limiting time interval */ unsigned long numabalancing_migrate_next_window; /* Number of pages migrated during the rate limiting time interval */ unsigned long numabalancing_migrate_nr_pages; #endif |
281e37265
|
611 612 613 614 615 |
/* * This is a per-node reserve of pages that are not available * to userspace allocations. */ unsigned long totalreserve_pages; |
a5f5f91da
|
616 617 618 619 620 621 622 |
#ifdef CONFIG_NUMA /* * zone reclaim becomes active if more unmapped pages exist. */ unsigned long min_unmapped_pages; unsigned long min_slab_pages; #endif /* CONFIG_NUMA */ |
a52633d8e
|
623 624 625 |
/* Write-intensive fields used by page reclaim */ ZONE_PADDING(_pad1_) spinlock_t lru_lock; |
3a80a7fa7
|
626 627 628 629 630 631 632 633 |
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* * If memory initialisation on large machines is deferred then this * is the first PFN that needs to be initialised. */ unsigned long first_deferred_pfn; #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ |
a3d0a9185
|
634 635 636 637 638 639 |
#ifdef CONFIG_TRANSPARENT_HUGEPAGE spinlock_t split_queue_lock; struct list_head split_queue; unsigned long split_queue_len; #endif |
75ef71840
|
640 |
|
599d0c954
|
641 642 643 644 645 646 647 648 649 650 651 652 |
/* Fields commonly accessed by the page reclaim scanner */ struct lruvec lruvec; /* * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on * this node's LRU. Maintained by the pageout code. */ unsigned int inactive_ratio; unsigned long flags; ZONE_PADDING(_pad2_) |
75ef71840
|
653 654 655 |
/* Per-node vmstats */ struct per_cpu_nodestat __percpu *per_cpu_nodestats; atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS]; |
1da177e4c
|
656 657 658 659 |
} pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) |
d41dee369
|
660 |
#ifdef CONFIG_FLAT_NODE_MEM_MAP |
408fde81c
|
661 |
#define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr)) |
d41dee369
|
662 663 664 |
#else #define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr)) #endif |
408fde81c
|
665 |
#define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) |
1da177e4c
|
666 |
|
c6830c226
|
667 |
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) |
da3649e13
|
668 |
#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) |
a52633d8e
|
669 670 671 672 |
static inline spinlock_t *zone_lru_lock(struct zone *zone) { return &zone->zone_pgdat->lru_lock; } |
c6830c226
|
673 |
|
a9dd0a831
|
674 |
static inline struct lruvec *node_lruvec(struct pglist_data *pgdat) |
599d0c954
|
675 |
{ |
a9dd0a831
|
676 |
return &pgdat->lruvec; |
599d0c954
|
677 |
} |
da3649e13
|
678 679 680 681 682 683 684 685 686 |
static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) { return pgdat->node_start_pfn + pgdat->node_spanned_pages; } static inline bool pgdat_is_empty(pg_data_t *pgdat) { return !pgdat->node_start_pfn && !pgdat->node_spanned_pages; } |
c6830c226
|
687 |
|
033fbae98
|
688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 |
static inline int zone_id(const struct zone *zone) { struct pglist_data *pgdat = zone->zone_pgdat; return zone - pgdat->node_zones; } #ifdef CONFIG_ZONE_DEVICE static inline bool is_dev_zone(const struct zone *zone) { return zone_id(zone) == ZONE_DEVICE; } #else static inline bool is_dev_zone(const struct zone *zone) { return false; } #endif |
208d54e55
|
706 |
#include <linux/memory_hotplug.h> |
4eaf3f643
|
707 |
extern struct mutex zonelists_mutex; |
9adb62a5d
|
708 |
void build_all_zonelists(pg_data_t *pgdat, struct zone *zone); |
995047488
|
709 |
void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); |
86a294a81
|
710 711 712 |
bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx, unsigned int alloc_flags, long free_pages); |
7aeb09f91
|
713 |
bool zone_watermark_ok(struct zone *z, unsigned int order, |
c603844bd
|
714 715 |
unsigned long mark, int classzone_idx, unsigned int alloc_flags); |
7aeb09f91
|
716 |
bool zone_watermark_ok_safe(struct zone *z, unsigned int order, |
e2b19197f
|
717 |
unsigned long mark, int classzone_idx); |
a2f3aa025
|
718 719 720 721 |
enum memmap_context { MEMMAP_EARLY, MEMMAP_HOTPLUG, }; |
718127cc3
|
722 |
extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, |
b171e4093
|
723 |
unsigned long size); |
718127cc3
|
724 |
|
bea8c150a
|
725 |
extern void lruvec_init(struct lruvec *lruvec); |
7f5e86c2c
|
726 |
|
599d0c954
|
727 |
static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec) |
7f5e86c2c
|
728 |
{ |
c255a4580
|
729 |
#ifdef CONFIG_MEMCG |
599d0c954
|
730 |
return lruvec->pgdat; |
7f5e86c2c
|
731 |
#else |
599d0c954
|
732 |
return container_of(lruvec, struct pglist_data, lruvec); |
7f5e86c2c
|
733 734 |
#endif } |
23047a96d
|
735 |
extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru); |
1da177e4c
|
736 737 738 739 740 |
#ifdef CONFIG_HAVE_MEMORY_PRESENT void memory_present(int nid, unsigned long start, unsigned long end); #else static inline void memory_present(int nid, unsigned long start, unsigned long end) {} #endif |
7aac78988
|
741 742 743 744 745 |
#ifdef CONFIG_HAVE_MEMORYLESS_NODES int local_memory_node(int node_id); #else static inline int local_memory_node(int node_id) { return node_id; }; #endif |
1da177e4c
|
746 747 748 749 750 751 752 753 |
#ifdef CONFIG_NEED_NODE_MEMMAP_SIZE unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); #endif /* * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. */ #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) |
6aa303def
|
754 755 756 757 758 759 760 761 762 763 764 765 766 |
/* * Returns true if a zone has pages managed by the buddy allocator. * All the reclaim decisions have to use this function rather than * populated_zone(). If the whole zone is reserved then we can easily * end up with populated_zone() && !managed_zone(). */ static inline bool managed_zone(struct zone *zone) { return zone->managed_pages; } /* Returns true if a zone has memory */ static inline bool populated_zone(struct zone *zone) |
f3fe65122
|
767 |
{ |
6aa303def
|
768 |
return zone->present_pages; |
f3fe65122
|
769 |
} |
2a1e274ac
|
770 |
extern int movable_zone; |
d7e4a2ea5
|
771 |
#ifdef CONFIG_HIGHMEM |
2a1e274ac
|
772 773 |
static inline int zone_movable_is_highmem(void) { |
d7e4a2ea5
|
774 |
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
2a1e274ac
|
775 776 |
return movable_zone == ZONE_HIGHMEM; #else |
d7e4a2ea5
|
777 |
return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM; |
2a1e274ac
|
778 779 |
#endif } |
d7e4a2ea5
|
780 |
#endif |
2a1e274ac
|
781 |
|
2f1b62486
|
782 |
static inline int is_highmem_idx(enum zone_type idx) |
1da177e4c
|
783 |
{ |
e53ef38d0
|
784 |
#ifdef CONFIG_HIGHMEM |
2a1e274ac
|
785 786 |
return (idx == ZONE_HIGHMEM || (idx == ZONE_MOVABLE && zone_movable_is_highmem())); |
e53ef38d0
|
787 788 789 |
#else return 0; #endif |
1da177e4c
|
790 |
} |
1da177e4c
|
791 792 793 794 795 796 797 798 |
/** * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references * to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum. * @zone - pointer to struct zone variable */ static inline int is_highmem(struct zone *zone) { |
e53ef38d0
|
799 |
#ifdef CONFIG_HIGHMEM |
29f9cb53d
|
800 |
return is_highmem_idx(zone_idx(zone)); |
e53ef38d0
|
801 802 803 |
#else return 0; #endif |
1da177e4c
|
804 |
} |
1da177e4c
|
805 806 |
/* These two functions are used to setup the per zone pages min values */ struct ctl_table; |
8d65af789
|
807 |
int min_free_kbytes_sysctl_handler(struct ctl_table *, int, |
1da177e4c
|
808 |
void __user *, size_t *, loff_t *); |
795ae7a0d
|
809 810 |
int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); |
1da177e4c
|
811 |
extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; |
8d65af789
|
812 |
int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, |
1da177e4c
|
813 |
void __user *, size_t *, loff_t *); |
8d65af789
|
814 |
int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, |
8ad4b1fb8
|
815 |
void __user *, size_t *, loff_t *); |
9614634fe
|
816 |
int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, |
8d65af789
|
817 |
void __user *, size_t *, loff_t *); |
0ff38490c
|
818 |
int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, |
8d65af789
|
819 |
void __user *, size_t *, loff_t *); |
1da177e4c
|
820 |
|
f0c0b2b80
|
821 |
extern int numa_zonelist_order_handler(struct ctl_table *, int, |
8d65af789
|
822 |
void __user *, size_t *, loff_t *); |
f0c0b2b80
|
823 824 |
extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */ |
93b7504e3
|
825 |
#ifndef CONFIG_NEED_MULTIPLE_NODES |
1da177e4c
|
826 827 828 829 |
extern struct pglist_data contig_page_data; #define NODE_DATA(nid) (&contig_page_data) #define NODE_MEM_MAP(nid) mem_map |
1da177e4c
|
830 |
|
93b7504e3
|
831 |
#else /* CONFIG_NEED_MULTIPLE_NODES */ |
1da177e4c
|
832 833 |
#include <asm/mmzone.h> |
93b7504e3
|
834 |
#endif /* !CONFIG_NEED_MULTIPLE_NODES */ |
348f8b6c4
|
835 |
|
95144c788
|
836 837 838 |
extern struct pglist_data *first_online_pgdat(void); extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); extern struct zone *next_zone(struct zone *zone); |
8357f8695
|
839 840 |
/** |
12d15f0d5
|
841 |
* for_each_online_pgdat - helper macro to iterate over all online nodes |
8357f8695
|
842 843 844 845 846 847 |
* @pgdat - pointer to a pg_data_t variable */ #define for_each_online_pgdat(pgdat) \ for (pgdat = first_online_pgdat(); \ pgdat; \ pgdat = next_online_pgdat(pgdat)) |
8357f8695
|
848 849 850 851 852 853 854 855 856 857 858 |
/** * for_each_zone - helper macro to iterate over all memory zones * @zone - pointer to struct zone variable * * The user only needs to declare the zone variable, for_each_zone * fills it in. */ #define for_each_zone(zone) \ for (zone = (first_online_pgdat())->node_zones; \ zone; \ zone = next_zone(zone)) |
ee99c71c5
|
859 860 861 862 863 864 865 |
#define for_each_populated_zone(zone) \ for (zone = (first_online_pgdat())->node_zones; \ zone; \ zone = next_zone(zone)) \ if (!populated_zone(zone)) \ ; /* do nothing */ \ else |
dd1a239f6
|
866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 |
static inline struct zone *zonelist_zone(struct zoneref *zoneref) { return zoneref->zone; } static inline int zonelist_zone_idx(struct zoneref *zoneref) { return zoneref->zone_idx; } static inline int zonelist_node_idx(struct zoneref *zoneref) { #ifdef CONFIG_NUMA /* zone_to_nid not available in this context */ return zoneref->zone->node; #else return 0; #endif /* CONFIG_NUMA */ } |
682a3385e
|
885 886 887 |
struct zoneref *__next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, nodemask_t *nodes); |
19770b326
|
888 889 890 891 892 |
/** * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point * @z - The cursor used as a starting point for the search * @highest_zoneidx - The zone index of the highest zone to return * @nodes - An optional nodemask to filter the zonelist with |
19770b326
|
893 894 895 |
* * This function returns the next zone at or below a given zone index that is * within the allowed nodemask using a cursor as the starting point for the |
5bead2a06
|
896 897 898 |
* search. The zoneref returned is a cursor that represents the current zone * being examined. It should be advanced by one before calling * next_zones_zonelist again. |
19770b326
|
899 |
*/ |
682a3385e
|
900 |
static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z, |
19770b326
|
901 |
enum zone_type highest_zoneidx, |
682a3385e
|
902 903 904 905 906 907 |
nodemask_t *nodes) { if (likely(!nodes && zonelist_zone_idx(z) <= highest_zoneidx)) return z; return __next_zones_zonelist(z, highest_zoneidx, nodes); } |
dd1a239f6
|
908 |
|
19770b326
|
909 910 911 912 913 914 915 916 917 |
/** * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist * @zonelist - The zonelist to search for a suitable zone * @highest_zoneidx - The zone index of the highest zone to return * @nodes - An optional nodemask to filter the zonelist with * @zone - The first suitable zone found is returned via this parameter * * This function returns the first zone at or below a given zone index that is * within the allowed nodemask. The zoneref returned is a cursor that can be |
5bead2a06
|
918 919 |
* used to iterate the zonelist with next_zones_zonelist by advancing it by * one before calling. |
19770b326
|
920 |
*/ |
dd1a239f6
|
921 |
static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, |
19770b326
|
922 |
enum zone_type highest_zoneidx, |
c33d6c06f
|
923 |
nodemask_t *nodes) |
54a6eb5c4
|
924 |
{ |
c33d6c06f
|
925 |
return next_zones_zonelist(zonelist->_zonerefs, |
05891fb06
|
926 |
highest_zoneidx, nodes); |
54a6eb5c4
|
927 |
} |
19770b326
|
928 929 930 931 932 933 934 935 936 937 938 939 |
/** * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask * @zone - The current zone in the iterator * @z - The current pointer within zonelist->zones being iterated * @zlist - The zonelist being iterated * @highidx - The zone index of the highest zone to return * @nodemask - Nodemask allowed by the allocator * * This iterator iterates though all zones at or below a given zone index and * within a given nodemask */ #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ |
c33d6c06f
|
940 |
for (z = first_zones_zonelist(zlist, highidx, nodemask), zone = zonelist_zone(z); \ |
19770b326
|
941 |
zone; \ |
05891fb06
|
942 |
z = next_zones_zonelist(++z, highidx, nodemask), \ |
c33d6c06f
|
943 944 945 946 947 948 949 |
zone = zonelist_zone(z)) #define for_next_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ for (zone = z->zone; \ zone; \ z = next_zones_zonelist(++z, highidx, nodemask), \ zone = zonelist_zone(z)) |
54a6eb5c4
|
950 951 952 953 954 955 956 957 958 959 960 |
/** * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index * @zone - The current zone in the iterator * @z - The current pointer within zonelist->zones being iterated * @zlist - The zonelist being iterated * @highidx - The zone index of the highest zone to return * * This iterator iterates though all zones at or below a given zone index. */ #define for_each_zone_zonelist(zone, z, zlist, highidx) \ |
19770b326
|
961 |
for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL) |
54a6eb5c4
|
962 |
|
d41dee369
|
963 964 965 |
#ifdef CONFIG_SPARSEMEM #include <asm/sparsemem.h> #endif |
c713216de
|
966 |
#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ |
0ee332c14
|
967 |
!defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) |
b45445684
|
968 969 970 971 |
static inline unsigned long early_pfn_to_nid(unsigned long pfn) { return 0; } |
b159d43fb
|
972 |
#endif |
2bdaf115b
|
973 974 975 |
#ifdef CONFIG_FLATMEM #define pfn_to_nid(pfn) (0) #endif |
d41dee369
|
976 977 978 979 980 981 982 983 |
#ifdef CONFIG_SPARSEMEM /* * SECTION_SHIFT #bits space required to store a section # * * PA_SECTION_SHIFT physical address to/from section number * PFN_SECTION_SHIFT pfn to/from section number */ |
d41dee369
|
984 985 986 987 988 989 990 |
#define PA_SECTION_SHIFT (SECTION_SIZE_BITS) #define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT) #define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT) #define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT) #define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) |
835c134ec
|
991 |
#define SECTION_BLOCKFLAGS_BITS \ |
d9c234005
|
992 |
((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS) |
835c134ec
|
993 |
|
d41dee369
|
994 995 996 |
#if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS #error Allocator MAX_ORDER exceeds SECTION_SIZE #endif |
e3c40f379
|
997 998 |
#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) #define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT) |
a539f3533
|
999 1000 |
#define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) |
d41dee369
|
1001 |
struct page; |
eefa864b7
|
1002 |
struct page_ext; |
d41dee369
|
1003 |
struct mem_section { |
29751f699
|
1004 1005 1006 1007 1008 |
/* * This is, logically, a pointer to an array of struct * pages. However, it is stored with some other magic. * (see sparse.c::sparse_init_one_section()) * |
30c253e6d
|
1009 1010 1011 1012 |
* Additionally during early boot we encode node id of * the location of the section here to guide allocation. * (see sparse.c::memory_present()) * |
29751f699
|
1013 1014 1015 1016 |
* Making it a UL at least makes someone do a cast * before using it wrong. */ unsigned long section_mem_map; |
5c0e30664
|
1017 1018 1019 |
/* See declaration of similar field in struct zone */ unsigned long *pageblock_flags; |
eefa864b7
|
1020 1021 |
#ifdef CONFIG_PAGE_EXTENSION /* |
0c9ad804f
|
1022 |
* If SPARSEMEM, pgdat doesn't have page_ext pointer. We use |
eefa864b7
|
1023 1024 1025 1026 1027 |
* section. (see page_ext.h about this.) */ struct page_ext *page_ext; unsigned long pad; #endif |
55878e88c
|
1028 1029 1030 1031 |
/* * WARNING: mem_section must be a power-of-2 in size for the * calculation and use of SECTION_ROOT_MASK to make sense. */ |
d41dee369
|
1032 |
}; |
3e347261a
|
1033 1034 1035 1036 1037 |
#ifdef CONFIG_SPARSEMEM_EXTREME #define SECTIONS_PER_ROOT (PAGE_SIZE / sizeof (struct mem_section)) #else #define SECTIONS_PER_ROOT 1 #endif |
802f192e4
|
1038 |
|
3e347261a
|
1039 |
#define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT) |
0faa56389
|
1040 |
#define NR_SECTION_ROOTS DIV_ROUND_UP(NR_MEM_SECTIONS, SECTIONS_PER_ROOT) |
3e347261a
|
1041 |
#define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1) |
802f192e4
|
1042 |
|
3e347261a
|
1043 1044 |
#ifdef CONFIG_SPARSEMEM_EXTREME extern struct mem_section *mem_section[NR_SECTION_ROOTS]; |
802f192e4
|
1045 |
#else |
3e347261a
|
1046 1047 |
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; #endif |
d41dee369
|
1048 |
|
29751f699
|
1049 1050 |
static inline struct mem_section *__nr_to_section(unsigned long nr) { |
3e347261a
|
1051 1052 1053 |
if (!mem_section[SECTION_NR_TO_ROOT(nr)]) return NULL; return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; |
29751f699
|
1054 |
} |
4ca644d97
|
1055 |
extern int __section_nr(struct mem_section* ms); |
047532787
|
1056 |
extern unsigned long usemap_size(void); |
29751f699
|
1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 |
/* * We use the lower bits of the mem_map pointer to store * a little bit of information. There should be at least * 3 bits here due to 32-bit alignment. */ #define SECTION_MARKED_PRESENT (1UL<<0) #define SECTION_HAS_MEM_MAP (1UL<<1) #define SECTION_MAP_LAST_BIT (1UL<<2) #define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) |
30c253e6d
|
1067 |
#define SECTION_NID_SHIFT 2 |
29751f699
|
1068 1069 1070 1071 1072 1073 1074 |
static inline struct page *__section_mem_map_addr(struct mem_section *section) { unsigned long map = section->section_mem_map; map &= SECTION_MAP_MASK; return (struct page *)map; } |
540557b94
|
1075 |
static inline int present_section(struct mem_section *section) |
29751f699
|
1076 |
{ |
802f192e4
|
1077 |
return (section && (section->section_mem_map & SECTION_MARKED_PRESENT)); |
29751f699
|
1078 |
} |
540557b94
|
1079 1080 1081 1082 1083 1084 |
static inline int present_section_nr(unsigned long nr) { return present_section(__nr_to_section(nr)); } static inline int valid_section(struct mem_section *section) |
29751f699
|
1085 |
{ |
802f192e4
|
1086 |
return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP)); |
29751f699
|
1087 1088 1089 1090 1091 1092 |
} static inline int valid_section_nr(unsigned long nr) { return valid_section(__nr_to_section(nr)); } |
d41dee369
|
1093 1094 |
static inline struct mem_section *__pfn_to_section(unsigned long pfn) { |
29751f699
|
1095 |
return __nr_to_section(pfn_to_section_nr(pfn)); |
d41dee369
|
1096 |
} |
7b7bf499f
|
1097 |
#ifndef CONFIG_HAVE_ARCH_PFN_VALID |
d41dee369
|
1098 1099 1100 1101 |
static inline int pfn_valid(unsigned long pfn) { if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; |
29751f699
|
1102 |
return valid_section(__nr_to_section(pfn_to_section_nr(pfn))); |
d41dee369
|
1103 |
} |
7b7bf499f
|
1104 |
#endif |
d41dee369
|
1105 |
|
540557b94
|
1106 1107 1108 1109 1110 1111 |
static inline int pfn_present(unsigned long pfn) { if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; return present_section(__nr_to_section(pfn_to_section_nr(pfn))); } |
d41dee369
|
1112 1113 1114 1115 1116 1117 |
/* * These are _only_ used during initialisation, therefore they * can use __initdata ... They could have names to indicate * this restriction. */ #ifdef CONFIG_NUMA |
161599ff3
|
1118 1119 1120 1121 1122 |
#define pfn_to_nid(pfn) \ ({ \ unsigned long __pfn_to_nid_pfn = (pfn); \ page_to_nid(pfn_to_page(__pfn_to_nid_pfn)); \ }) |
2bdaf115b
|
1123 1124 |
#else #define pfn_to_nid(pfn) (0) |
d41dee369
|
1125 |
#endif |
d41dee369
|
1126 1127 1128 1129 |
#define early_pfn_valid(pfn) pfn_valid(pfn) void sparse_init(void); #else #define sparse_init() do {} while (0) |
28ae55c98
|
1130 |
#define sparse_index_init(_sec, _nid) do {} while (0) |
d41dee369
|
1131 |
#endif /* CONFIG_SPARSEMEM */ |
8a942fdea
|
1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 |
/* * During memory init memblocks map pfns to nids. The search is expensive and * this caches recent lookups. The implementation of __early_pfn_to_nid * may treat start/end as pfns or sections. */ struct mminit_pfnnid_cache { unsigned long last_start; unsigned long last_end; int last_nid; }; |
d41dee369
|
1142 1143 1144 1145 1146 1147 |
#ifndef early_pfn_valid #define early_pfn_valid(pfn) (1) #endif void memory_present(int nid, unsigned long start, unsigned long end); unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); |
14e072984
|
1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 |
/* * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we * need to check pfn validility within that MAX_ORDER_NR_PAGES block. * pfn_valid_within() should be used in this case; we optimise this away * when we have no holes within a MAX_ORDER_NR_PAGES block. */ #ifdef CONFIG_HOLES_IN_ZONE #define pfn_valid_within(pfn) pfn_valid(pfn) #else #define pfn_valid_within(pfn) (1) #endif |
eb33575cf
|
1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 |
#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL /* * pfn_valid() is meant to be able to tell if a given PFN has valid memmap * associated with it or not. In FLATMEM, it is expected that holes always * have valid memmap as long as there is valid PFNs either side of the hole. * In SPARSEMEM, it is assumed that a valid section has a memmap for the * entire section. * * However, an ARM, and maybe other embedded architectures in the future * free memmap backing holes to save memory on the assumption the memmap is * never used. The page_zone linkages are then broken even though pfn_valid() * returns true. A walker of the full memmap must then do this additional * check to ensure the memmap they are looking at is sane by making sure * the zone and PFN linkages are still valid. This is expensive, but walkers * of the full memmap are extremely rare. */ |
5b80287a6
|
1175 |
bool memmap_valid_within(unsigned long pfn, |
eb33575cf
|
1176 1177 |
struct page *page, struct zone *zone); #else |
5b80287a6
|
1178 |
static inline bool memmap_valid_within(unsigned long pfn, |
eb33575cf
|
1179 1180 |
struct page *page, struct zone *zone) { |
5b80287a6
|
1181 |
return true; |
eb33575cf
|
1182 1183 |
} #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ |
97965478a
|
1184 |
#endif /* !__GENERATING_BOUNDS.H */ |
1da177e4c
|
1185 |
#endif /* !__ASSEMBLY__ */ |
1da177e4c
|
1186 |
#endif /* _LINUX_MMZONE_H */ |