Commit 79442ed189acb8b949662676e750eda173c06f9b
Committed by
Linus Torvalds
1 parent
1402899e43
Exists in
smarc-imx_3.14.28_1.0.0_ga
and in
1 other branch
mm/memblock.c: introduce bottom-up allocation mode
The Linux kernel cannot migrate pages used by the kernel. As a result, kernel pages cannot be hot-removed. So we cannot allocate hotpluggable memory for the kernel. ACPI SRAT (System Resource Affinity Table) contains the memory hotplug info. But before SRAT is parsed, memblock has already started to allocate memory for the kernel. So we need to prevent memblock from doing this. In a memory hotplug system, any numa node the kernel resides in should be unhotpluggable. And for a modern server, each node could have at least 16GB memory. So memory around the kernel image is highly likely unhotpluggable. So the basic idea is: Allocate memory from the end of the kernel image and to the higher memory. Since memory allocation before SRAT is parsed won't be too much, it could highly likely be in the same node with kernel image. The current memblock can only allocate memory top-down. So this patch introduces a new bottom-up allocation mode to allocate memory bottom-up. And later when we use this allocation direction to allocate memory, we will limit the start address above the kernel. Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com> Signed-off-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com> Acked-by: Toshi Kani <toshi.kani@hp.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Tejun Heo <tj@kernel.org> Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com> Cc: Thomas Renninger <trenn@suse.de> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Jiang Liu <jiang.liu@huawei.com> Cc: Wen Congyang <wency@cn.fujitsu.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: Taku Izumi <izumi.taku@jp.fujitsu.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Michal Nazarewicz <mina86@mina86.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Rik van Riel <riel@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 108 additions and 3 deletions Inline Diff
include/linux/memblock.h
1 | #ifndef _LINUX_MEMBLOCK_H | 1 | #ifndef _LINUX_MEMBLOCK_H |
2 | #define _LINUX_MEMBLOCK_H | 2 | #define _LINUX_MEMBLOCK_H |
3 | #ifdef __KERNEL__ | 3 | #ifdef __KERNEL__ |
4 | 4 | ||
5 | #ifdef CONFIG_HAVE_MEMBLOCK | 5 | #ifdef CONFIG_HAVE_MEMBLOCK |
6 | /* | 6 | /* |
7 | * Logical memory blocks. | 7 | * Logical memory blocks. |
8 | * | 8 | * |
9 | * Copyright (C) 2001 Peter Bergner, IBM Corp. | 9 | * Copyright (C) 2001 Peter Bergner, IBM Corp. |
10 | * | 10 | * |
11 | * This program is free software; you can redistribute it and/or | 11 | * This program is free software; you can redistribute it and/or |
12 | * modify it under the terms of the GNU General Public License | 12 | * modify it under the terms of the GNU General Public License |
13 | * as published by the Free Software Foundation; either version | 13 | * as published by the Free Software Foundation; either version |
14 | * 2 of the License, or (at your option) any later version. | 14 | * 2 of the License, or (at your option) any later version. |
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
18 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
19 | 19 | ||
20 | #define INIT_MEMBLOCK_REGIONS 128 | 20 | #define INIT_MEMBLOCK_REGIONS 128 |
21 | 21 | ||
22 | struct memblock_region { | 22 | struct memblock_region { |
23 | phys_addr_t base; | 23 | phys_addr_t base; |
24 | phys_addr_t size; | 24 | phys_addr_t size; |
25 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 25 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
26 | int nid; | 26 | int nid; |
27 | #endif | 27 | #endif |
28 | }; | 28 | }; |
29 | 29 | ||
30 | struct memblock_type { | 30 | struct memblock_type { |
31 | unsigned long cnt; /* number of regions */ | 31 | unsigned long cnt; /* number of regions */ |
32 | unsigned long max; /* size of the allocated array */ | 32 | unsigned long max; /* size of the allocated array */ |
33 | phys_addr_t total_size; /* size of all regions */ | 33 | phys_addr_t total_size; /* size of all regions */ |
34 | struct memblock_region *regions; | 34 | struct memblock_region *regions; |
35 | }; | 35 | }; |
36 | 36 | ||
37 | struct memblock { | 37 | struct memblock { |
38 | bool bottom_up; /* is bottom up direction? */ | ||
38 | phys_addr_t current_limit; | 39 | phys_addr_t current_limit; |
39 | struct memblock_type memory; | 40 | struct memblock_type memory; |
40 | struct memblock_type reserved; | 41 | struct memblock_type reserved; |
41 | }; | 42 | }; |
42 | 43 | ||
43 | extern struct memblock memblock; | 44 | extern struct memblock memblock; |
44 | extern int memblock_debug; | 45 | extern int memblock_debug; |
45 | 46 | ||
46 | #define memblock_dbg(fmt, ...) \ | 47 | #define memblock_dbg(fmt, ...) \ |
47 | if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) | 48 | if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) |
48 | 49 | ||
49 | phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end, | 50 | phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end, |
50 | phys_addr_t size, phys_addr_t align, int nid); | 51 | phys_addr_t size, phys_addr_t align, int nid); |
51 | phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, | 52 | phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, |
52 | phys_addr_t size, phys_addr_t align); | 53 | phys_addr_t size, phys_addr_t align); |
53 | phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); | 54 | phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); |
54 | void memblock_allow_resize(void); | 55 | void memblock_allow_resize(void); |
55 | int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); | 56 | int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); |
56 | int memblock_add(phys_addr_t base, phys_addr_t size); | 57 | int memblock_add(phys_addr_t base, phys_addr_t size); |
57 | int memblock_remove(phys_addr_t base, phys_addr_t size); | 58 | int memblock_remove(phys_addr_t base, phys_addr_t size); |
58 | int memblock_free(phys_addr_t base, phys_addr_t size); | 59 | int memblock_free(phys_addr_t base, phys_addr_t size); |
59 | int memblock_reserve(phys_addr_t base, phys_addr_t size); | 60 | int memblock_reserve(phys_addr_t base, phys_addr_t size); |
60 | void memblock_trim_memory(phys_addr_t align); | 61 | void memblock_trim_memory(phys_addr_t align); |
61 | 62 | ||
62 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 63 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
63 | int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, | 64 | int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, |
64 | unsigned long *end_pfn); | 65 | unsigned long *end_pfn); |
65 | void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, | 66 | void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, |
66 | unsigned long *out_end_pfn, int *out_nid); | 67 | unsigned long *out_end_pfn, int *out_nid); |
67 | 68 | ||
68 | /** | 69 | /** |
69 | * for_each_mem_pfn_range - early memory pfn range iterator | 70 | * for_each_mem_pfn_range - early memory pfn range iterator |
70 | * @i: an integer used as loop variable | 71 | * @i: an integer used as loop variable |
71 | * @nid: node selector, %MAX_NUMNODES for all nodes | 72 | * @nid: node selector, %MAX_NUMNODES for all nodes |
72 | * @p_start: ptr to ulong for start pfn of the range, can be %NULL | 73 | * @p_start: ptr to ulong for start pfn of the range, can be %NULL |
73 | * @p_end: ptr to ulong for end pfn of the range, can be %NULL | 74 | * @p_end: ptr to ulong for end pfn of the range, can be %NULL |
74 | * @p_nid: ptr to int for nid of the range, can be %NULL | 75 | * @p_nid: ptr to int for nid of the range, can be %NULL |
75 | * | 76 | * |
76 | * Walks over configured memory ranges. | 77 | * Walks over configured memory ranges. |
77 | */ | 78 | */ |
78 | #define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \ | 79 | #define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \ |
79 | for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ | 80 | for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ |
80 | i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) | 81 | i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) |
81 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 82 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
82 | 83 | ||
83 | void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start, | 84 | void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start, |
84 | phys_addr_t *out_end, int *out_nid); | 85 | phys_addr_t *out_end, int *out_nid); |
85 | 86 | ||
86 | /** | 87 | /** |
87 | * for_each_free_mem_range - iterate through free memblock areas | 88 | * for_each_free_mem_range - iterate through free memblock areas |
88 | * @i: u64 used as loop variable | 89 | * @i: u64 used as loop variable |
89 | * @nid: node selector, %MAX_NUMNODES for all nodes | 90 | * @nid: node selector, %MAX_NUMNODES for all nodes |
90 | * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL | 91 | * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL |
91 | * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL | 92 | * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL |
92 | * @p_nid: ptr to int for nid of the range, can be %NULL | 93 | * @p_nid: ptr to int for nid of the range, can be %NULL |
93 | * | 94 | * |
94 | * Walks over free (memory && !reserved) areas of memblock. Available as | 95 | * Walks over free (memory && !reserved) areas of memblock. Available as |
95 | * soon as memblock is initialized. | 96 | * soon as memblock is initialized. |
96 | */ | 97 | */ |
97 | #define for_each_free_mem_range(i, nid, p_start, p_end, p_nid) \ | 98 | #define for_each_free_mem_range(i, nid, p_start, p_end, p_nid) \ |
98 | for (i = 0, \ | 99 | for (i = 0, \ |
99 | __next_free_mem_range(&i, nid, p_start, p_end, p_nid); \ | 100 | __next_free_mem_range(&i, nid, p_start, p_end, p_nid); \ |
100 | i != (u64)ULLONG_MAX; \ | 101 | i != (u64)ULLONG_MAX; \ |
101 | __next_free_mem_range(&i, nid, p_start, p_end, p_nid)) | 102 | __next_free_mem_range(&i, nid, p_start, p_end, p_nid)) |
102 | 103 | ||
103 | void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start, | 104 | void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start, |
104 | phys_addr_t *out_end, int *out_nid); | 105 | phys_addr_t *out_end, int *out_nid); |
105 | 106 | ||
106 | /** | 107 | /** |
107 | * for_each_free_mem_range_reverse - rev-iterate through free memblock areas | 108 | * for_each_free_mem_range_reverse - rev-iterate through free memblock areas |
108 | * @i: u64 used as loop variable | 109 | * @i: u64 used as loop variable |
109 | * @nid: node selector, %MAX_NUMNODES for all nodes | 110 | * @nid: node selector, %MAX_NUMNODES for all nodes |
110 | * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL | 111 | * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL |
111 | * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL | 112 | * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL |
112 | * @p_nid: ptr to int for nid of the range, can be %NULL | 113 | * @p_nid: ptr to int for nid of the range, can be %NULL |
113 | * | 114 | * |
114 | * Walks over free (memory && !reserved) areas of memblock in reverse | 115 | * Walks over free (memory && !reserved) areas of memblock in reverse |
115 | * order. Available as soon as memblock is initialized. | 116 | * order. Available as soon as memblock is initialized. |
116 | */ | 117 | */ |
117 | #define for_each_free_mem_range_reverse(i, nid, p_start, p_end, p_nid) \ | 118 | #define for_each_free_mem_range_reverse(i, nid, p_start, p_end, p_nid) \ |
118 | for (i = (u64)ULLONG_MAX, \ | 119 | for (i = (u64)ULLONG_MAX, \ |
119 | __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid); \ | 120 | __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid); \ |
120 | i != (u64)ULLONG_MAX; \ | 121 | i != (u64)ULLONG_MAX; \ |
121 | __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid)) | 122 | __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid)) |
122 | 123 | ||
123 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 124 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
124 | int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid); | 125 | int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid); |
125 | 126 | ||
126 | static inline void memblock_set_region_node(struct memblock_region *r, int nid) | 127 | static inline void memblock_set_region_node(struct memblock_region *r, int nid) |
127 | { | 128 | { |
128 | r->nid = nid; | 129 | r->nid = nid; |
129 | } | 130 | } |
130 | 131 | ||
131 | static inline int memblock_get_region_node(const struct memblock_region *r) | 132 | static inline int memblock_get_region_node(const struct memblock_region *r) |
132 | { | 133 | { |
133 | return r->nid; | 134 | return r->nid; |
134 | } | 135 | } |
135 | #else | 136 | #else |
136 | static inline void memblock_set_region_node(struct memblock_region *r, int nid) | 137 | static inline void memblock_set_region_node(struct memblock_region *r, int nid) |
137 | { | 138 | { |
138 | } | 139 | } |
139 | 140 | ||
140 | static inline int memblock_get_region_node(const struct memblock_region *r) | 141 | static inline int memblock_get_region_node(const struct memblock_region *r) |
141 | { | 142 | { |
142 | return 0; | 143 | return 0; |
143 | } | 144 | } |
144 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 145 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
145 | 146 | ||
146 | phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); | 147 | phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); |
147 | phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); | 148 | phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); |
148 | 149 | ||
149 | phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align); | 150 | phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align); |
151 | |||
152 | #ifdef CONFIG_MOVABLE_NODE | ||
153 | /* | ||
154 | * Set the allocation direction to bottom-up or top-down. | ||
155 | */ | ||
156 | static inline void memblock_set_bottom_up(bool enable) | ||
157 | { | ||
158 | memblock.bottom_up = enable; | ||
159 | } | ||
160 | |||
161 | /* | ||
162 | * Check if the allocation direction is bottom-up or not. | ||
163 | * if this is true, that said, memblock will allocate memory | ||
164 | * in bottom-up direction. | ||
165 | */ | ||
166 | static inline bool memblock_bottom_up(void) | ||
167 | { | ||
168 | return memblock.bottom_up; | ||
169 | } | ||
170 | #else | ||
171 | static inline void memblock_set_bottom_up(bool enable) {} | ||
172 | static inline bool memblock_bottom_up(void) { return false; } | ||
173 | #endif | ||
150 | 174 | ||
151 | /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ | 175 | /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ |
152 | #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) | 176 | #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) |
153 | #define MEMBLOCK_ALLOC_ACCESSIBLE 0 | 177 | #define MEMBLOCK_ALLOC_ACCESSIBLE 0 |
154 | 178 | ||
155 | phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, | 179 | phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, |
156 | phys_addr_t max_addr); | 180 | phys_addr_t max_addr); |
157 | phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, | 181 | phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, |
158 | phys_addr_t max_addr); | 182 | phys_addr_t max_addr); |
159 | phys_addr_t memblock_phys_mem_size(void); | 183 | phys_addr_t memblock_phys_mem_size(void); |
160 | phys_addr_t memblock_mem_size(unsigned long limit_pfn); | 184 | phys_addr_t memblock_mem_size(unsigned long limit_pfn); |
161 | phys_addr_t memblock_start_of_DRAM(void); | 185 | phys_addr_t memblock_start_of_DRAM(void); |
162 | phys_addr_t memblock_end_of_DRAM(void); | 186 | phys_addr_t memblock_end_of_DRAM(void); |
163 | void memblock_enforce_memory_limit(phys_addr_t memory_limit); | 187 | void memblock_enforce_memory_limit(phys_addr_t memory_limit); |
164 | int memblock_is_memory(phys_addr_t addr); | 188 | int memblock_is_memory(phys_addr_t addr); |
165 | int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); | 189 | int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); |
166 | int memblock_is_reserved(phys_addr_t addr); | 190 | int memblock_is_reserved(phys_addr_t addr); |
167 | int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); | 191 | int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); |
168 | 192 | ||
169 | extern void __memblock_dump_all(void); | 193 | extern void __memblock_dump_all(void); |
170 | 194 | ||
171 | static inline void memblock_dump_all(void) | 195 | static inline void memblock_dump_all(void) |
172 | { | 196 | { |
173 | if (memblock_debug) | 197 | if (memblock_debug) |
174 | __memblock_dump_all(); | 198 | __memblock_dump_all(); |
175 | } | 199 | } |
176 | 200 | ||
177 | /** | 201 | /** |
178 | * memblock_set_current_limit - Set the current allocation limit to allow | 202 | * memblock_set_current_limit - Set the current allocation limit to allow |
179 | * limiting allocations to what is currently | 203 | * limiting allocations to what is currently |
180 | * accessible during boot | 204 | * accessible during boot |
181 | * @limit: New limit value (physical address) | 205 | * @limit: New limit value (physical address) |
182 | */ | 206 | */ |
183 | void memblock_set_current_limit(phys_addr_t limit); | 207 | void memblock_set_current_limit(phys_addr_t limit); |
184 | 208 | ||
185 | 209 | ||
186 | /* | 210 | /* |
187 | * pfn conversion functions | 211 | * pfn conversion functions |
188 | * | 212 | * |
189 | * While the memory MEMBLOCKs should always be page aligned, the reserved | 213 | * While the memory MEMBLOCKs should always be page aligned, the reserved |
190 | * MEMBLOCKs may not be. This accessor attempt to provide a very clear | 214 | * MEMBLOCKs may not be. This accessor attempt to provide a very clear |
191 | * idea of what they return for such non aligned MEMBLOCKs. | 215 | * idea of what they return for such non aligned MEMBLOCKs. |
192 | */ | 216 | */ |
193 | 217 | ||
194 | /** | 218 | /** |
195 | * memblock_region_memory_base_pfn - Return the lowest pfn intersecting with the memory region | 219 | * memblock_region_memory_base_pfn - Return the lowest pfn intersecting with the memory region |
196 | * @reg: memblock_region structure | 220 | * @reg: memblock_region structure |
197 | */ | 221 | */ |
198 | static inline unsigned long memblock_region_memory_base_pfn(const struct memblock_region *reg) | 222 | static inline unsigned long memblock_region_memory_base_pfn(const struct memblock_region *reg) |
199 | { | 223 | { |
200 | return PFN_UP(reg->base); | 224 | return PFN_UP(reg->base); |
201 | } | 225 | } |
202 | 226 | ||
203 | /** | 227 | /** |
204 | * memblock_region_memory_end_pfn - Return the end_pfn this region | 228 | * memblock_region_memory_end_pfn - Return the end_pfn this region |
205 | * @reg: memblock_region structure | 229 | * @reg: memblock_region structure |
206 | */ | 230 | */ |
207 | static inline unsigned long memblock_region_memory_end_pfn(const struct memblock_region *reg) | 231 | static inline unsigned long memblock_region_memory_end_pfn(const struct memblock_region *reg) |
208 | { | 232 | { |
209 | return PFN_DOWN(reg->base + reg->size); | 233 | return PFN_DOWN(reg->base + reg->size); |
210 | } | 234 | } |
211 | 235 | ||
212 | /** | 236 | /** |
213 | * memblock_region_reserved_base_pfn - Return the lowest pfn intersecting with the reserved region | 237 | * memblock_region_reserved_base_pfn - Return the lowest pfn intersecting with the reserved region |
214 | * @reg: memblock_region structure | 238 | * @reg: memblock_region structure |
215 | */ | 239 | */ |
216 | static inline unsigned long memblock_region_reserved_base_pfn(const struct memblock_region *reg) | 240 | static inline unsigned long memblock_region_reserved_base_pfn(const struct memblock_region *reg) |
217 | { | 241 | { |
218 | return PFN_DOWN(reg->base); | 242 | return PFN_DOWN(reg->base); |
219 | } | 243 | } |
220 | 244 | ||
221 | /** | 245 | /** |
222 | * memblock_region_reserved_end_pfn - Return the end_pfn this region | 246 | * memblock_region_reserved_end_pfn - Return the end_pfn this region |
223 | * @reg: memblock_region structure | 247 | * @reg: memblock_region structure |
224 | */ | 248 | */ |
225 | static inline unsigned long memblock_region_reserved_end_pfn(const struct memblock_region *reg) | 249 | static inline unsigned long memblock_region_reserved_end_pfn(const struct memblock_region *reg) |
226 | { | 250 | { |
227 | return PFN_UP(reg->base + reg->size); | 251 | return PFN_UP(reg->base + reg->size); |
228 | } | 252 | } |
229 | 253 | ||
230 | #define for_each_memblock(memblock_type, region) \ | 254 | #define for_each_memblock(memblock_type, region) \ |
231 | for (region = memblock.memblock_type.regions; \ | 255 | for (region = memblock.memblock_type.regions; \ |
232 | region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ | 256 | region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ |
233 | region++) | 257 | region++) |
234 | 258 | ||
235 | 259 | ||
236 | #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK | 260 | #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK |
237 | #define __init_memblock __meminit | 261 | #define __init_memblock __meminit |
238 | #define __initdata_memblock __meminitdata | 262 | #define __initdata_memblock __meminitdata |
239 | #else | 263 | #else |
240 | #define __init_memblock | 264 | #define __init_memblock |
241 | #define __initdata_memblock | 265 | #define __initdata_memblock |
242 | #endif | 266 | #endif |
243 | 267 | ||
244 | #else | 268 | #else |
245 | static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) | 269 | static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) |
246 | { | 270 | { |
247 | return 0; | 271 | return 0; |
248 | } | 272 | } |
249 | 273 | ||
250 | #endif /* CONFIG_HAVE_MEMBLOCK */ | 274 | #endif /* CONFIG_HAVE_MEMBLOCK */ |
251 | 275 | ||
252 | #endif /* __KERNEL__ */ | 276 | #endif /* __KERNEL__ */ |
253 | 277 | ||
254 | #endif /* _LINUX_MEMBLOCK_H */ | 278 | #endif /* _LINUX_MEMBLOCK_H */ |
255 | 279 |
include/linux/mm.h
1 | #ifndef _LINUX_MM_H | 1 | #ifndef _LINUX_MM_H |
2 | #define _LINUX_MM_H | 2 | #define _LINUX_MM_H |
3 | 3 | ||
4 | #include <linux/errno.h> | 4 | #include <linux/errno.h> |
5 | 5 | ||
6 | #ifdef __KERNEL__ | 6 | #ifdef __KERNEL__ |
7 | 7 | ||
8 | #include <linux/gfp.h> | 8 | #include <linux/gfp.h> |
9 | #include <linux/bug.h> | 9 | #include <linux/bug.h> |
10 | #include <linux/list.h> | 10 | #include <linux/list.h> |
11 | #include <linux/mmzone.h> | 11 | #include <linux/mmzone.h> |
12 | #include <linux/rbtree.h> | 12 | #include <linux/rbtree.h> |
13 | #include <linux/atomic.h> | 13 | #include <linux/atomic.h> |
14 | #include <linux/debug_locks.h> | 14 | #include <linux/debug_locks.h> |
15 | #include <linux/mm_types.h> | 15 | #include <linux/mm_types.h> |
16 | #include <linux/range.h> | 16 | #include <linux/range.h> |
17 | #include <linux/pfn.h> | 17 | #include <linux/pfn.h> |
18 | #include <linux/bit_spinlock.h> | 18 | #include <linux/bit_spinlock.h> |
19 | #include <linux/shrinker.h> | 19 | #include <linux/shrinker.h> |
20 | 20 | ||
21 | struct mempolicy; | 21 | struct mempolicy; |
22 | struct anon_vma; | 22 | struct anon_vma; |
23 | struct anon_vma_chain; | 23 | struct anon_vma_chain; |
24 | struct file_ra_state; | 24 | struct file_ra_state; |
25 | struct user_struct; | 25 | struct user_struct; |
26 | struct writeback_control; | 26 | struct writeback_control; |
27 | 27 | ||
28 | #ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */ | 28 | #ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */ |
29 | extern unsigned long max_mapnr; | 29 | extern unsigned long max_mapnr; |
30 | 30 | ||
31 | static inline void set_max_mapnr(unsigned long limit) | 31 | static inline void set_max_mapnr(unsigned long limit) |
32 | { | 32 | { |
33 | max_mapnr = limit; | 33 | max_mapnr = limit; |
34 | } | 34 | } |
35 | #else | 35 | #else |
36 | static inline void set_max_mapnr(unsigned long limit) { } | 36 | static inline void set_max_mapnr(unsigned long limit) { } |
37 | #endif | 37 | #endif |
38 | 38 | ||
39 | extern unsigned long totalram_pages; | 39 | extern unsigned long totalram_pages; |
40 | extern void * high_memory; | 40 | extern void * high_memory; |
41 | extern int page_cluster; | 41 | extern int page_cluster; |
42 | 42 | ||
43 | #ifdef CONFIG_SYSCTL | 43 | #ifdef CONFIG_SYSCTL |
44 | extern int sysctl_legacy_va_layout; | 44 | extern int sysctl_legacy_va_layout; |
45 | #else | 45 | #else |
46 | #define sysctl_legacy_va_layout 0 | 46 | #define sysctl_legacy_va_layout 0 |
47 | #endif | 47 | #endif |
48 | 48 | ||
49 | #include <asm/page.h> | 49 | #include <asm/page.h> |
50 | #include <asm/pgtable.h> | 50 | #include <asm/pgtable.h> |
51 | #include <asm/processor.h> | 51 | #include <asm/processor.h> |
52 | 52 | ||
53 | #ifndef __pa_symbol | ||
54 | #define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0)) | ||
55 | #endif | ||
56 | |||
53 | extern unsigned long sysctl_user_reserve_kbytes; | 57 | extern unsigned long sysctl_user_reserve_kbytes; |
54 | extern unsigned long sysctl_admin_reserve_kbytes; | 58 | extern unsigned long sysctl_admin_reserve_kbytes; |
55 | 59 | ||
56 | #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) | 60 | #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) |
57 | 61 | ||
58 | /* to align the pointer to the (next) page boundary */ | 62 | /* to align the pointer to the (next) page boundary */ |
59 | #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) | 63 | #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) |
60 | 64 | ||
61 | /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ | 65 | /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ |
62 | #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)addr, PAGE_SIZE) | 66 | #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)addr, PAGE_SIZE) |
63 | 67 | ||
64 | /* | 68 | /* |
65 | * Linux kernel virtual memory manager primitives. | 69 | * Linux kernel virtual memory manager primitives. |
66 | * The idea being to have a "virtual" mm in the same way | 70 | * The idea being to have a "virtual" mm in the same way |
67 | * we have a virtual fs - giving a cleaner interface to the | 71 | * we have a virtual fs - giving a cleaner interface to the |
68 | * mm details, and allowing different kinds of memory mappings | 72 | * mm details, and allowing different kinds of memory mappings |
69 | * (from shared memory to executable loading to arbitrary | 73 | * (from shared memory to executable loading to arbitrary |
70 | * mmap() functions). | 74 | * mmap() functions). |
71 | */ | 75 | */ |
72 | 76 | ||
73 | extern struct kmem_cache *vm_area_cachep; | 77 | extern struct kmem_cache *vm_area_cachep; |
74 | 78 | ||
75 | #ifndef CONFIG_MMU | 79 | #ifndef CONFIG_MMU |
76 | extern struct rb_root nommu_region_tree; | 80 | extern struct rb_root nommu_region_tree; |
77 | extern struct rw_semaphore nommu_region_sem; | 81 | extern struct rw_semaphore nommu_region_sem; |
78 | 82 | ||
79 | extern unsigned int kobjsize(const void *objp); | 83 | extern unsigned int kobjsize(const void *objp); |
80 | #endif | 84 | #endif |
81 | 85 | ||
82 | /* | 86 | /* |
83 | * vm_flags in vm_area_struct, see mm_types.h. | 87 | * vm_flags in vm_area_struct, see mm_types.h. |
84 | */ | 88 | */ |
85 | #define VM_NONE 0x00000000 | 89 | #define VM_NONE 0x00000000 |
86 | 90 | ||
87 | #define VM_READ 0x00000001 /* currently active flags */ | 91 | #define VM_READ 0x00000001 /* currently active flags */ |
88 | #define VM_WRITE 0x00000002 | 92 | #define VM_WRITE 0x00000002 |
89 | #define VM_EXEC 0x00000004 | 93 | #define VM_EXEC 0x00000004 |
90 | #define VM_SHARED 0x00000008 | 94 | #define VM_SHARED 0x00000008 |
91 | 95 | ||
92 | /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */ | 96 | /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */ |
93 | #define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */ | 97 | #define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */ |
94 | #define VM_MAYWRITE 0x00000020 | 98 | #define VM_MAYWRITE 0x00000020 |
95 | #define VM_MAYEXEC 0x00000040 | 99 | #define VM_MAYEXEC 0x00000040 |
96 | #define VM_MAYSHARE 0x00000080 | 100 | #define VM_MAYSHARE 0x00000080 |
97 | 101 | ||
98 | #define VM_GROWSDOWN 0x00000100 /* general info on the segment */ | 102 | #define VM_GROWSDOWN 0x00000100 /* general info on the segment */ |
99 | #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ | 103 | #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ |
100 | #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ | 104 | #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ |
101 | 105 | ||
102 | #define VM_LOCKED 0x00002000 | 106 | #define VM_LOCKED 0x00002000 |
103 | #define VM_IO 0x00004000 /* Memory mapped I/O or similar */ | 107 | #define VM_IO 0x00004000 /* Memory mapped I/O or similar */ |
104 | 108 | ||
105 | /* Used by sys_madvise() */ | 109 | /* Used by sys_madvise() */ |
106 | #define VM_SEQ_READ 0x00008000 /* App will access data sequentially */ | 110 | #define VM_SEQ_READ 0x00008000 /* App will access data sequentially */ |
107 | #define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */ | 111 | #define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */ |
108 | 112 | ||
109 | #define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ | 113 | #define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ |
110 | #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ | 114 | #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ |
111 | #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ | 115 | #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ |
112 | #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ | 116 | #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ |
113 | #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ | 117 | #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ |
114 | #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ | 118 | #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ |
115 | #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ | 119 | #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ |
116 | #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ | 120 | #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ |
117 | 121 | ||
118 | #ifdef CONFIG_MEM_SOFT_DIRTY | 122 | #ifdef CONFIG_MEM_SOFT_DIRTY |
119 | # define VM_SOFTDIRTY 0x08000000 /* Not soft dirty clean area */ | 123 | # define VM_SOFTDIRTY 0x08000000 /* Not soft dirty clean area */ |
120 | #else | 124 | #else |
121 | # define VM_SOFTDIRTY 0 | 125 | # define VM_SOFTDIRTY 0 |
122 | #endif | 126 | #endif |
123 | 127 | ||
124 | #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ | 128 | #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ |
125 | #define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ | 129 | #define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ |
126 | #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ | 130 | #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ |
127 | #define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ | 131 | #define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ |
128 | 132 | ||
129 | #if defined(CONFIG_X86) | 133 | #if defined(CONFIG_X86) |
130 | # define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ | 134 | # define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ |
131 | #elif defined(CONFIG_PPC) | 135 | #elif defined(CONFIG_PPC) |
132 | # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ | 136 | # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ |
133 | #elif defined(CONFIG_PARISC) | 137 | #elif defined(CONFIG_PARISC) |
134 | # define VM_GROWSUP VM_ARCH_1 | 138 | # define VM_GROWSUP VM_ARCH_1 |
135 | #elif defined(CONFIG_METAG) | 139 | #elif defined(CONFIG_METAG) |
136 | # define VM_GROWSUP VM_ARCH_1 | 140 | # define VM_GROWSUP VM_ARCH_1 |
137 | #elif defined(CONFIG_IA64) | 141 | #elif defined(CONFIG_IA64) |
138 | # define VM_GROWSUP VM_ARCH_1 | 142 | # define VM_GROWSUP VM_ARCH_1 |
139 | #elif !defined(CONFIG_MMU) | 143 | #elif !defined(CONFIG_MMU) |
140 | # define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */ | 144 | # define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */ |
141 | #endif | 145 | #endif |
142 | 146 | ||
143 | #ifndef VM_GROWSUP | 147 | #ifndef VM_GROWSUP |
144 | # define VM_GROWSUP VM_NONE | 148 | # define VM_GROWSUP VM_NONE |
145 | #endif | 149 | #endif |
146 | 150 | ||
147 | /* Bits set in the VMA until the stack is in its final location */ | 151 | /* Bits set in the VMA until the stack is in its final location */ |
148 | #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ) | 152 | #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ) |
149 | 153 | ||
150 | #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ | 154 | #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ |
151 | #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS | 155 | #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS |
152 | #endif | 156 | #endif |
153 | 157 | ||
154 | #ifdef CONFIG_STACK_GROWSUP | 158 | #ifdef CONFIG_STACK_GROWSUP |
155 | #define VM_STACK_FLAGS (VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) | 159 | #define VM_STACK_FLAGS (VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) |
156 | #else | 160 | #else |
157 | #define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) | 161 | #define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) |
158 | #endif | 162 | #endif |
159 | 163 | ||
160 | /* | 164 | /* |
161 | * Special vmas that are non-mergable, non-mlock()able. | 165 | * Special vmas that are non-mergable, non-mlock()able. |
162 | * Note: mm/huge_memory.c VM_NO_THP depends on this definition. | 166 | * Note: mm/huge_memory.c VM_NO_THP depends on this definition. |
163 | */ | 167 | */ |
164 | #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP) | 168 | #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP) |
165 | 169 | ||
166 | /* | 170 | /* |
167 | * mapping from the currently active vm_flags protection bits (the | 171 | * mapping from the currently active vm_flags protection bits (the |
168 | * low four bits) to a page protection mask.. | 172 | * low four bits) to a page protection mask.. |
169 | */ | 173 | */ |
170 | extern pgprot_t protection_map[16]; | 174 | extern pgprot_t protection_map[16]; |
171 | 175 | ||
172 | #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ | 176 | #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ |
173 | #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ | 177 | #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ |
174 | #define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ | 178 | #define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ |
175 | #define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */ | 179 | #define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */ |
176 | #define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ | 180 | #define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ |
177 | #define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ | 181 | #define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ |
178 | #define FAULT_FLAG_TRIED 0x40 /* second try */ | 182 | #define FAULT_FLAG_TRIED 0x40 /* second try */ |
179 | #define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */ | 183 | #define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */ |
180 | 184 | ||
181 | /* | 185 | /* |
182 | * vm_fault is filled by the the pagefault handler and passed to the vma's | 186 | * vm_fault is filled by the the pagefault handler and passed to the vma's |
183 | * ->fault function. The vma's ->fault is responsible for returning a bitmask | 187 | * ->fault function. The vma's ->fault is responsible for returning a bitmask |
184 | * of VM_FAULT_xxx flags that give details about how the fault was handled. | 188 | * of VM_FAULT_xxx flags that give details about how the fault was handled. |
185 | * | 189 | * |
186 | * pgoff should be used in favour of virtual_address, if possible. If pgoff | 190 | * pgoff should be used in favour of virtual_address, if possible. If pgoff |
187 | * is used, one may implement ->remap_pages to get nonlinear mapping support. | 191 | * is used, one may implement ->remap_pages to get nonlinear mapping support. |
188 | */ | 192 | */ |
189 | struct vm_fault { | 193 | struct vm_fault { |
190 | unsigned int flags; /* FAULT_FLAG_xxx flags */ | 194 | unsigned int flags; /* FAULT_FLAG_xxx flags */ |
191 | pgoff_t pgoff; /* Logical page offset based on vma */ | 195 | pgoff_t pgoff; /* Logical page offset based on vma */ |
192 | void __user *virtual_address; /* Faulting virtual address */ | 196 | void __user *virtual_address; /* Faulting virtual address */ |
193 | 197 | ||
194 | struct page *page; /* ->fault handlers should return a | 198 | struct page *page; /* ->fault handlers should return a |
195 | * page here, unless VM_FAULT_NOPAGE | 199 | * page here, unless VM_FAULT_NOPAGE |
196 | * is set (which is also implied by | 200 | * is set (which is also implied by |
197 | * VM_FAULT_ERROR). | 201 | * VM_FAULT_ERROR). |
198 | */ | 202 | */ |
199 | }; | 203 | }; |
200 | 204 | ||
201 | /* | 205 | /* |
202 | * These are the virtual MM functions - opening of an area, closing and | 206 | * These are the virtual MM functions - opening of an area, closing and |
203 | * unmapping it (needed to keep files on disk up-to-date etc), pointer | 207 | * unmapping it (needed to keep files on disk up-to-date etc), pointer |
204 | * to the functions called when a no-page or a wp-page exception occurs. | 208 | * to the functions called when a no-page or a wp-page exception occurs. |
205 | */ | 209 | */ |
206 | struct vm_operations_struct { | 210 | struct vm_operations_struct { |
207 | void (*open)(struct vm_area_struct * area); | 211 | void (*open)(struct vm_area_struct * area); |
208 | void (*close)(struct vm_area_struct * area); | 212 | void (*close)(struct vm_area_struct * area); |
209 | int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); | 213 | int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); |
210 | 214 | ||
211 | /* notification that a previously read-only page is about to become | 215 | /* notification that a previously read-only page is about to become |
212 | * writable, if an error is returned it will cause a SIGBUS */ | 216 | * writable, if an error is returned it will cause a SIGBUS */ |
213 | int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf); | 217 | int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf); |
214 | 218 | ||
215 | /* called by access_process_vm when get_user_pages() fails, typically | 219 | /* called by access_process_vm when get_user_pages() fails, typically |
216 | * for use by special VMAs that can switch between memory and hardware | 220 | * for use by special VMAs that can switch between memory and hardware |
217 | */ | 221 | */ |
218 | int (*access)(struct vm_area_struct *vma, unsigned long addr, | 222 | int (*access)(struct vm_area_struct *vma, unsigned long addr, |
219 | void *buf, int len, int write); | 223 | void *buf, int len, int write); |
220 | #ifdef CONFIG_NUMA | 224 | #ifdef CONFIG_NUMA |
221 | /* | 225 | /* |
222 | * set_policy() op must add a reference to any non-NULL @new mempolicy | 226 | * set_policy() op must add a reference to any non-NULL @new mempolicy |
223 | * to hold the policy upon return. Caller should pass NULL @new to | 227 | * to hold the policy upon return. Caller should pass NULL @new to |
224 | * remove a policy and fall back to surrounding context--i.e. do not | 228 | * remove a policy and fall back to surrounding context--i.e. do not |
225 | * install a MPOL_DEFAULT policy, nor the task or system default | 229 | * install a MPOL_DEFAULT policy, nor the task or system default |
226 | * mempolicy. | 230 | * mempolicy. |
227 | */ | 231 | */ |
228 | int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new); | 232 | int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new); |
229 | 233 | ||
230 | /* | 234 | /* |
231 | * get_policy() op must add reference [mpol_get()] to any policy at | 235 | * get_policy() op must add reference [mpol_get()] to any policy at |
232 | * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure | 236 | * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure |
233 | * in mm/mempolicy.c will do this automatically. | 237 | * in mm/mempolicy.c will do this automatically. |
234 | * get_policy() must NOT add a ref if the policy at (vma,addr) is not | 238 | * get_policy() must NOT add a ref if the policy at (vma,addr) is not |
235 | * marked as MPOL_SHARED. vma policies are protected by the mmap_sem. | 239 | * marked as MPOL_SHARED. vma policies are protected by the mmap_sem. |
236 | * If no [shared/vma] mempolicy exists at the addr, get_policy() op | 240 | * If no [shared/vma] mempolicy exists at the addr, get_policy() op |
237 | * must return NULL--i.e., do not "fallback" to task or system default | 241 | * must return NULL--i.e., do not "fallback" to task or system default |
238 | * policy. | 242 | * policy. |
239 | */ | 243 | */ |
240 | struct mempolicy *(*get_policy)(struct vm_area_struct *vma, | 244 | struct mempolicy *(*get_policy)(struct vm_area_struct *vma, |
241 | unsigned long addr); | 245 | unsigned long addr); |
242 | int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from, | 246 | int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from, |
243 | const nodemask_t *to, unsigned long flags); | 247 | const nodemask_t *to, unsigned long flags); |
244 | #endif | 248 | #endif |
245 | /* called by sys_remap_file_pages() to populate non-linear mapping */ | 249 | /* called by sys_remap_file_pages() to populate non-linear mapping */ |
246 | int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr, | 250 | int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr, |
247 | unsigned long size, pgoff_t pgoff); | 251 | unsigned long size, pgoff_t pgoff); |
248 | }; | 252 | }; |
249 | 253 | ||
250 | struct mmu_gather; | 254 | struct mmu_gather; |
251 | struct inode; | 255 | struct inode; |
252 | 256 | ||
253 | #define page_private(page) ((page)->private) | 257 | #define page_private(page) ((page)->private) |
254 | #define set_page_private(page, v) ((page)->private = (v)) | 258 | #define set_page_private(page, v) ((page)->private = (v)) |
255 | 259 | ||
256 | /* It's valid only if the page is free path or free_list */ | 260 | /* It's valid only if the page is free path or free_list */ |
257 | static inline void set_freepage_migratetype(struct page *page, int migratetype) | 261 | static inline void set_freepage_migratetype(struct page *page, int migratetype) |
258 | { | 262 | { |
259 | page->index = migratetype; | 263 | page->index = migratetype; |
260 | } | 264 | } |
261 | 265 | ||
262 | /* It's valid only if the page is free path or free_list */ | 266 | /* It's valid only if the page is free path or free_list */ |
263 | static inline int get_freepage_migratetype(struct page *page) | 267 | static inline int get_freepage_migratetype(struct page *page) |
264 | { | 268 | { |
265 | return page->index; | 269 | return page->index; |
266 | } | 270 | } |
267 | 271 | ||
268 | /* | 272 | /* |
269 | * FIXME: take this include out, include page-flags.h in | 273 | * FIXME: take this include out, include page-flags.h in |
270 | * files which need it (119 of them) | 274 | * files which need it (119 of them) |
271 | */ | 275 | */ |
272 | #include <linux/page-flags.h> | 276 | #include <linux/page-flags.h> |
273 | #include <linux/huge_mm.h> | 277 | #include <linux/huge_mm.h> |
274 | 278 | ||
275 | /* | 279 | /* |
276 | * Methods to modify the page usage count. | 280 | * Methods to modify the page usage count. |
277 | * | 281 | * |
278 | * What counts for a page usage: | 282 | * What counts for a page usage: |
279 | * - cache mapping (page->mapping) | 283 | * - cache mapping (page->mapping) |
280 | * - private data (page->private) | 284 | * - private data (page->private) |
281 | * - page mapped in a task's page tables, each mapping | 285 | * - page mapped in a task's page tables, each mapping |
282 | * is counted separately | 286 | * is counted separately |
283 | * | 287 | * |
284 | * Also, many kernel routines increase the page count before a critical | 288 | * Also, many kernel routines increase the page count before a critical |
285 | * routine so they can be sure the page doesn't go away from under them. | 289 | * routine so they can be sure the page doesn't go away from under them. |
286 | */ | 290 | */ |
287 | 291 | ||
288 | /* | 292 | /* |
289 | * Drop a ref, return true if the refcount fell to zero (the page has no users) | 293 | * Drop a ref, return true if the refcount fell to zero (the page has no users) |
290 | */ | 294 | */ |
291 | static inline int put_page_testzero(struct page *page) | 295 | static inline int put_page_testzero(struct page *page) |
292 | { | 296 | { |
293 | VM_BUG_ON(atomic_read(&page->_count) == 0); | 297 | VM_BUG_ON(atomic_read(&page->_count) == 0); |
294 | return atomic_dec_and_test(&page->_count); | 298 | return atomic_dec_and_test(&page->_count); |
295 | } | 299 | } |
296 | 300 | ||
297 | /* | 301 | /* |
298 | * Try to grab a ref unless the page has a refcount of zero, return false if | 302 | * Try to grab a ref unless the page has a refcount of zero, return false if |
299 | * that is the case. | 303 | * that is the case. |
300 | * This can be called when MMU is off so it must not access | 304 | * This can be called when MMU is off so it must not access |
301 | * any of the virtual mappings. | 305 | * any of the virtual mappings. |
302 | */ | 306 | */ |
303 | static inline int get_page_unless_zero(struct page *page) | 307 | static inline int get_page_unless_zero(struct page *page) |
304 | { | 308 | { |
305 | return atomic_inc_not_zero(&page->_count); | 309 | return atomic_inc_not_zero(&page->_count); |
306 | } | 310 | } |
307 | 311 | ||
308 | /* | 312 | /* |
309 | * Try to drop a ref unless the page has a refcount of one, return false if | 313 | * Try to drop a ref unless the page has a refcount of one, return false if |
310 | * that is the case. | 314 | * that is the case. |
311 | * This is to make sure that the refcount won't become zero after this drop. | 315 | * This is to make sure that the refcount won't become zero after this drop. |
312 | * This can be called when MMU is off so it must not access | 316 | * This can be called when MMU is off so it must not access |
313 | * any of the virtual mappings. | 317 | * any of the virtual mappings. |
314 | */ | 318 | */ |
315 | static inline int put_page_unless_one(struct page *page) | 319 | static inline int put_page_unless_one(struct page *page) |
316 | { | 320 | { |
317 | return atomic_add_unless(&page->_count, -1, 1); | 321 | return atomic_add_unless(&page->_count, -1, 1); |
318 | } | 322 | } |
319 | 323 | ||
320 | extern int page_is_ram(unsigned long pfn); | 324 | extern int page_is_ram(unsigned long pfn); |
321 | 325 | ||
322 | /* Support for virtually mapped pages */ | 326 | /* Support for virtually mapped pages */ |
323 | struct page *vmalloc_to_page(const void *addr); | 327 | struct page *vmalloc_to_page(const void *addr); |
324 | unsigned long vmalloc_to_pfn(const void *addr); | 328 | unsigned long vmalloc_to_pfn(const void *addr); |
325 | 329 | ||
326 | /* | 330 | /* |
327 | * Determine if an address is within the vmalloc range | 331 | * Determine if an address is within the vmalloc range |
328 | * | 332 | * |
329 | * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there | 333 | * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there |
330 | * is no special casing required. | 334 | * is no special casing required. |
331 | */ | 335 | */ |
332 | static inline int is_vmalloc_addr(const void *x) | 336 | static inline int is_vmalloc_addr(const void *x) |
333 | { | 337 | { |
334 | #ifdef CONFIG_MMU | 338 | #ifdef CONFIG_MMU |
335 | unsigned long addr = (unsigned long)x; | 339 | unsigned long addr = (unsigned long)x; |
336 | 340 | ||
337 | return addr >= VMALLOC_START && addr < VMALLOC_END; | 341 | return addr >= VMALLOC_START && addr < VMALLOC_END; |
338 | #else | 342 | #else |
339 | return 0; | 343 | return 0; |
340 | #endif | 344 | #endif |
341 | } | 345 | } |
342 | #ifdef CONFIG_MMU | 346 | #ifdef CONFIG_MMU |
343 | extern int is_vmalloc_or_module_addr(const void *x); | 347 | extern int is_vmalloc_or_module_addr(const void *x); |
344 | #else | 348 | #else |
345 | static inline int is_vmalloc_or_module_addr(const void *x) | 349 | static inline int is_vmalloc_or_module_addr(const void *x) |
346 | { | 350 | { |
347 | return 0; | 351 | return 0; |
348 | } | 352 | } |
349 | #endif | 353 | #endif |
350 | 354 | ||
351 | static inline void compound_lock(struct page *page) | 355 | static inline void compound_lock(struct page *page) |
352 | { | 356 | { |
353 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 357 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
354 | VM_BUG_ON(PageSlab(page)); | 358 | VM_BUG_ON(PageSlab(page)); |
355 | bit_spin_lock(PG_compound_lock, &page->flags); | 359 | bit_spin_lock(PG_compound_lock, &page->flags); |
356 | #endif | 360 | #endif |
357 | } | 361 | } |
358 | 362 | ||
359 | static inline void compound_unlock(struct page *page) | 363 | static inline void compound_unlock(struct page *page) |
360 | { | 364 | { |
361 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 365 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
362 | VM_BUG_ON(PageSlab(page)); | 366 | VM_BUG_ON(PageSlab(page)); |
363 | bit_spin_unlock(PG_compound_lock, &page->flags); | 367 | bit_spin_unlock(PG_compound_lock, &page->flags); |
364 | #endif | 368 | #endif |
365 | } | 369 | } |
366 | 370 | ||
367 | static inline unsigned long compound_lock_irqsave(struct page *page) | 371 | static inline unsigned long compound_lock_irqsave(struct page *page) |
368 | { | 372 | { |
369 | unsigned long uninitialized_var(flags); | 373 | unsigned long uninitialized_var(flags); |
370 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 374 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
371 | local_irq_save(flags); | 375 | local_irq_save(flags); |
372 | compound_lock(page); | 376 | compound_lock(page); |
373 | #endif | 377 | #endif |
374 | return flags; | 378 | return flags; |
375 | } | 379 | } |
376 | 380 | ||
377 | static inline void compound_unlock_irqrestore(struct page *page, | 381 | static inline void compound_unlock_irqrestore(struct page *page, |
378 | unsigned long flags) | 382 | unsigned long flags) |
379 | { | 383 | { |
380 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 384 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
381 | compound_unlock(page); | 385 | compound_unlock(page); |
382 | local_irq_restore(flags); | 386 | local_irq_restore(flags); |
383 | #endif | 387 | #endif |
384 | } | 388 | } |
385 | 389 | ||
386 | static inline struct page *compound_head(struct page *page) | 390 | static inline struct page *compound_head(struct page *page) |
387 | { | 391 | { |
388 | if (unlikely(PageTail(page))) | 392 | if (unlikely(PageTail(page))) |
389 | return page->first_page; | 393 | return page->first_page; |
390 | return page; | 394 | return page; |
391 | } | 395 | } |
392 | 396 | ||
393 | /* | 397 | /* |
394 | * The atomic page->_mapcount, starts from -1: so that transitions | 398 | * The atomic page->_mapcount, starts from -1: so that transitions |
395 | * both from it and to it can be tracked, using atomic_inc_and_test | 399 | * both from it and to it can be tracked, using atomic_inc_and_test |
396 | * and atomic_add_negative(-1). | 400 | * and atomic_add_negative(-1). |
397 | */ | 401 | */ |
398 | static inline void page_mapcount_reset(struct page *page) | 402 | static inline void page_mapcount_reset(struct page *page) |
399 | { | 403 | { |
400 | atomic_set(&(page)->_mapcount, -1); | 404 | atomic_set(&(page)->_mapcount, -1); |
401 | } | 405 | } |
402 | 406 | ||
403 | static inline int page_mapcount(struct page *page) | 407 | static inline int page_mapcount(struct page *page) |
404 | { | 408 | { |
405 | return atomic_read(&(page)->_mapcount) + 1; | 409 | return atomic_read(&(page)->_mapcount) + 1; |
406 | } | 410 | } |
407 | 411 | ||
408 | static inline int page_count(struct page *page) | 412 | static inline int page_count(struct page *page) |
409 | { | 413 | { |
410 | return atomic_read(&compound_head(page)->_count); | 414 | return atomic_read(&compound_head(page)->_count); |
411 | } | 415 | } |
412 | 416 | ||
413 | static inline void get_huge_page_tail(struct page *page) | 417 | static inline void get_huge_page_tail(struct page *page) |
414 | { | 418 | { |
415 | /* | 419 | /* |
416 | * __split_huge_page_refcount() cannot run | 420 | * __split_huge_page_refcount() cannot run |
417 | * from under us. | 421 | * from under us. |
418 | */ | 422 | */ |
419 | VM_BUG_ON(page_mapcount(page) < 0); | 423 | VM_BUG_ON(page_mapcount(page) < 0); |
420 | VM_BUG_ON(atomic_read(&page->_count) != 0); | 424 | VM_BUG_ON(atomic_read(&page->_count) != 0); |
421 | atomic_inc(&page->_mapcount); | 425 | atomic_inc(&page->_mapcount); |
422 | } | 426 | } |
423 | 427 | ||
424 | extern bool __get_page_tail(struct page *page); | 428 | extern bool __get_page_tail(struct page *page); |
425 | 429 | ||
426 | static inline void get_page(struct page *page) | 430 | static inline void get_page(struct page *page) |
427 | { | 431 | { |
428 | if (unlikely(PageTail(page))) | 432 | if (unlikely(PageTail(page))) |
429 | if (likely(__get_page_tail(page))) | 433 | if (likely(__get_page_tail(page))) |
430 | return; | 434 | return; |
431 | /* | 435 | /* |
432 | * Getting a normal page or the head of a compound page | 436 | * Getting a normal page or the head of a compound page |
433 | * requires to already have an elevated page->_count. | 437 | * requires to already have an elevated page->_count. |
434 | */ | 438 | */ |
435 | VM_BUG_ON(atomic_read(&page->_count) <= 0); | 439 | VM_BUG_ON(atomic_read(&page->_count) <= 0); |
436 | atomic_inc(&page->_count); | 440 | atomic_inc(&page->_count); |
437 | } | 441 | } |
438 | 442 | ||
439 | static inline struct page *virt_to_head_page(const void *x) | 443 | static inline struct page *virt_to_head_page(const void *x) |
440 | { | 444 | { |
441 | struct page *page = virt_to_page(x); | 445 | struct page *page = virt_to_page(x); |
442 | return compound_head(page); | 446 | return compound_head(page); |
443 | } | 447 | } |
444 | 448 | ||
445 | /* | 449 | /* |
446 | * Setup the page count before being freed into the page allocator for | 450 | * Setup the page count before being freed into the page allocator for |
447 | * the first time (boot or memory hotplug) | 451 | * the first time (boot or memory hotplug) |
448 | */ | 452 | */ |
449 | static inline void init_page_count(struct page *page) | 453 | static inline void init_page_count(struct page *page) |
450 | { | 454 | { |
451 | atomic_set(&page->_count, 1); | 455 | atomic_set(&page->_count, 1); |
452 | } | 456 | } |
453 | 457 | ||
454 | /* | 458 | /* |
455 | * PageBuddy() indicate that the page is free and in the buddy system | 459 | * PageBuddy() indicate that the page is free and in the buddy system |
456 | * (see mm/page_alloc.c). | 460 | * (see mm/page_alloc.c). |
457 | * | 461 | * |
458 | * PAGE_BUDDY_MAPCOUNT_VALUE must be <= -2 but better not too close to | 462 | * PAGE_BUDDY_MAPCOUNT_VALUE must be <= -2 but better not too close to |
459 | * -2 so that an underflow of the page_mapcount() won't be mistaken | 463 | * -2 so that an underflow of the page_mapcount() won't be mistaken |
460 | * for a genuine PAGE_BUDDY_MAPCOUNT_VALUE. -128 can be created very | 464 | * for a genuine PAGE_BUDDY_MAPCOUNT_VALUE. -128 can be created very |
461 | * efficiently by most CPU architectures. | 465 | * efficiently by most CPU architectures. |
462 | */ | 466 | */ |
463 | #define PAGE_BUDDY_MAPCOUNT_VALUE (-128) | 467 | #define PAGE_BUDDY_MAPCOUNT_VALUE (-128) |
464 | 468 | ||
465 | static inline int PageBuddy(struct page *page) | 469 | static inline int PageBuddy(struct page *page) |
466 | { | 470 | { |
467 | return atomic_read(&page->_mapcount) == PAGE_BUDDY_MAPCOUNT_VALUE; | 471 | return atomic_read(&page->_mapcount) == PAGE_BUDDY_MAPCOUNT_VALUE; |
468 | } | 472 | } |
469 | 473 | ||
470 | static inline void __SetPageBuddy(struct page *page) | 474 | static inline void __SetPageBuddy(struct page *page) |
471 | { | 475 | { |
472 | VM_BUG_ON(atomic_read(&page->_mapcount) != -1); | 476 | VM_BUG_ON(atomic_read(&page->_mapcount) != -1); |
473 | atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE); | 477 | atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE); |
474 | } | 478 | } |
475 | 479 | ||
476 | static inline void __ClearPageBuddy(struct page *page) | 480 | static inline void __ClearPageBuddy(struct page *page) |
477 | { | 481 | { |
478 | VM_BUG_ON(!PageBuddy(page)); | 482 | VM_BUG_ON(!PageBuddy(page)); |
479 | atomic_set(&page->_mapcount, -1); | 483 | atomic_set(&page->_mapcount, -1); |
480 | } | 484 | } |
481 | 485 | ||
482 | void put_page(struct page *page); | 486 | void put_page(struct page *page); |
483 | void put_pages_list(struct list_head *pages); | 487 | void put_pages_list(struct list_head *pages); |
484 | 488 | ||
485 | void split_page(struct page *page, unsigned int order); | 489 | void split_page(struct page *page, unsigned int order); |
486 | int split_free_page(struct page *page); | 490 | int split_free_page(struct page *page); |
487 | 491 | ||
488 | /* | 492 | /* |
489 | * Compound pages have a destructor function. Provide a | 493 | * Compound pages have a destructor function. Provide a |
490 | * prototype for that function and accessor functions. | 494 | * prototype for that function and accessor functions. |
491 | * These are _only_ valid on the head of a PG_compound page. | 495 | * These are _only_ valid on the head of a PG_compound page. |
492 | */ | 496 | */ |
493 | typedef void compound_page_dtor(struct page *); | 497 | typedef void compound_page_dtor(struct page *); |
494 | 498 | ||
495 | static inline void set_compound_page_dtor(struct page *page, | 499 | static inline void set_compound_page_dtor(struct page *page, |
496 | compound_page_dtor *dtor) | 500 | compound_page_dtor *dtor) |
497 | { | 501 | { |
498 | page[1].lru.next = (void *)dtor; | 502 | page[1].lru.next = (void *)dtor; |
499 | } | 503 | } |
500 | 504 | ||
501 | static inline compound_page_dtor *get_compound_page_dtor(struct page *page) | 505 | static inline compound_page_dtor *get_compound_page_dtor(struct page *page) |
502 | { | 506 | { |
503 | return (compound_page_dtor *)page[1].lru.next; | 507 | return (compound_page_dtor *)page[1].lru.next; |
504 | } | 508 | } |
505 | 509 | ||
506 | static inline int compound_order(struct page *page) | 510 | static inline int compound_order(struct page *page) |
507 | { | 511 | { |
508 | if (!PageHead(page)) | 512 | if (!PageHead(page)) |
509 | return 0; | 513 | return 0; |
510 | return (unsigned long)page[1].lru.prev; | 514 | return (unsigned long)page[1].lru.prev; |
511 | } | 515 | } |
512 | 516 | ||
513 | static inline void set_compound_order(struct page *page, unsigned long order) | 517 | static inline void set_compound_order(struct page *page, unsigned long order) |
514 | { | 518 | { |
515 | page[1].lru.prev = (void *)order; | 519 | page[1].lru.prev = (void *)order; |
516 | } | 520 | } |
517 | 521 | ||
518 | #ifdef CONFIG_MMU | 522 | #ifdef CONFIG_MMU |
519 | /* | 523 | /* |
520 | * Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when | 524 | * Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when |
521 | * servicing faults for write access. In the normal case, do always want | 525 | * servicing faults for write access. In the normal case, do always want |
522 | * pte_mkwrite. But get_user_pages can cause write faults for mappings | 526 | * pte_mkwrite. But get_user_pages can cause write faults for mappings |
523 | * that do not have writing enabled, when used by access_process_vm. | 527 | * that do not have writing enabled, when used by access_process_vm. |
524 | */ | 528 | */ |
525 | static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) | 529 | static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) |
526 | { | 530 | { |
527 | if (likely(vma->vm_flags & VM_WRITE)) | 531 | if (likely(vma->vm_flags & VM_WRITE)) |
528 | pte = pte_mkwrite(pte); | 532 | pte = pte_mkwrite(pte); |
529 | return pte; | 533 | return pte; |
530 | } | 534 | } |
531 | #endif | 535 | #endif |
532 | 536 | ||
533 | /* | 537 | /* |
534 | * Multiple processes may "see" the same page. E.g. for untouched | 538 | * Multiple processes may "see" the same page. E.g. for untouched |
535 | * mappings of /dev/null, all processes see the same page full of | 539 | * mappings of /dev/null, all processes see the same page full of |
536 | * zeroes, and text pages of executables and shared libraries have | 540 | * zeroes, and text pages of executables and shared libraries have |
537 | * only one copy in memory, at most, normally. | 541 | * only one copy in memory, at most, normally. |
538 | * | 542 | * |
539 | * For the non-reserved pages, page_count(page) denotes a reference count. | 543 | * For the non-reserved pages, page_count(page) denotes a reference count. |
540 | * page_count() == 0 means the page is free. page->lru is then used for | 544 | * page_count() == 0 means the page is free. page->lru is then used for |
541 | * freelist management in the buddy allocator. | 545 | * freelist management in the buddy allocator. |
542 | * page_count() > 0 means the page has been allocated. | 546 | * page_count() > 0 means the page has been allocated. |
543 | * | 547 | * |
544 | * Pages are allocated by the slab allocator in order to provide memory | 548 | * Pages are allocated by the slab allocator in order to provide memory |
545 | * to kmalloc and kmem_cache_alloc. In this case, the management of the | 549 | * to kmalloc and kmem_cache_alloc. In this case, the management of the |
546 | * page, and the fields in 'struct page' are the responsibility of mm/slab.c | 550 | * page, and the fields in 'struct page' are the responsibility of mm/slab.c |
547 | * unless a particular usage is carefully commented. (the responsibility of | 551 | * unless a particular usage is carefully commented. (the responsibility of |
548 | * freeing the kmalloc memory is the caller's, of course). | 552 | * freeing the kmalloc memory is the caller's, of course). |
549 | * | 553 | * |
550 | * A page may be used by anyone else who does a __get_free_page(). | 554 | * A page may be used by anyone else who does a __get_free_page(). |
551 | * In this case, page_count still tracks the references, and should only | 555 | * In this case, page_count still tracks the references, and should only |
552 | * be used through the normal accessor functions. The top bits of page->flags | 556 | * be used through the normal accessor functions. The top bits of page->flags |
553 | * and page->virtual store page management information, but all other fields | 557 | * and page->virtual store page management information, but all other fields |
554 | * are unused and could be used privately, carefully. The management of this | 558 | * are unused and could be used privately, carefully. The management of this |
555 | * page is the responsibility of the one who allocated it, and those who have | 559 | * page is the responsibility of the one who allocated it, and those who have |
556 | * subsequently been given references to it. | 560 | * subsequently been given references to it. |
557 | * | 561 | * |
558 | * The other pages (we may call them "pagecache pages") are completely | 562 | * The other pages (we may call them "pagecache pages") are completely |
559 | * managed by the Linux memory manager: I/O, buffers, swapping etc. | 563 | * managed by the Linux memory manager: I/O, buffers, swapping etc. |
560 | * The following discussion applies only to them. | 564 | * The following discussion applies only to them. |
561 | * | 565 | * |
562 | * A pagecache page contains an opaque `private' member, which belongs to the | 566 | * A pagecache page contains an opaque `private' member, which belongs to the |
563 | * page's address_space. Usually, this is the address of a circular list of | 567 | * page's address_space. Usually, this is the address of a circular list of |
564 | * the page's disk buffers. PG_private must be set to tell the VM to call | 568 | * the page's disk buffers. PG_private must be set to tell the VM to call |
565 | * into the filesystem to release these pages. | 569 | * into the filesystem to release these pages. |
566 | * | 570 | * |
567 | * A page may belong to an inode's memory mapping. In this case, page->mapping | 571 | * A page may belong to an inode's memory mapping. In this case, page->mapping |
568 | * is the pointer to the inode, and page->index is the file offset of the page, | 572 | * is the pointer to the inode, and page->index is the file offset of the page, |
569 | * in units of PAGE_CACHE_SIZE. | 573 | * in units of PAGE_CACHE_SIZE. |
570 | * | 574 | * |
571 | * If pagecache pages are not associated with an inode, they are said to be | 575 | * If pagecache pages are not associated with an inode, they are said to be |
572 | * anonymous pages. These may become associated with the swapcache, and in that | 576 | * anonymous pages. These may become associated with the swapcache, and in that |
573 | * case PG_swapcache is set, and page->private is an offset into the swapcache. | 577 | * case PG_swapcache is set, and page->private is an offset into the swapcache. |
574 | * | 578 | * |
575 | * In either case (swapcache or inode backed), the pagecache itself holds one | 579 | * In either case (swapcache or inode backed), the pagecache itself holds one |
576 | * reference to the page. Setting PG_private should also increment the | 580 | * reference to the page. Setting PG_private should also increment the |
577 | * refcount. The each user mapping also has a reference to the page. | 581 | * refcount. The each user mapping also has a reference to the page. |
578 | * | 582 | * |
579 | * The pagecache pages are stored in a per-mapping radix tree, which is | 583 | * The pagecache pages are stored in a per-mapping radix tree, which is |
580 | * rooted at mapping->page_tree, and indexed by offset. | 584 | * rooted at mapping->page_tree, and indexed by offset. |
581 | * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space | 585 | * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space |
582 | * lists, we instead now tag pages as dirty/writeback in the radix tree. | 586 | * lists, we instead now tag pages as dirty/writeback in the radix tree. |
583 | * | 587 | * |
584 | * All pagecache pages may be subject to I/O: | 588 | * All pagecache pages may be subject to I/O: |
585 | * - inode pages may need to be read from disk, | 589 | * - inode pages may need to be read from disk, |
586 | * - inode pages which have been modified and are MAP_SHARED may need | 590 | * - inode pages which have been modified and are MAP_SHARED may need |
587 | * to be written back to the inode on disk, | 591 | * to be written back to the inode on disk, |
588 | * - anonymous pages (including MAP_PRIVATE file mappings) which have been | 592 | * - anonymous pages (including MAP_PRIVATE file mappings) which have been |
589 | * modified may need to be swapped out to swap space and (later) to be read | 593 | * modified may need to be swapped out to swap space and (later) to be read |
590 | * back into memory. | 594 | * back into memory. |
591 | */ | 595 | */ |
592 | 596 | ||
593 | /* | 597 | /* |
594 | * The zone field is never updated after free_area_init_core() | 598 | * The zone field is never updated after free_area_init_core() |
595 | * sets it, so none of the operations on it need to be atomic. | 599 | * sets it, so none of the operations on it need to be atomic. |
596 | */ | 600 | */ |
597 | 601 | ||
598 | /* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ | 602 | /* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ |
599 | #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) | 603 | #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) |
600 | #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) | 604 | #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) |
601 | #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) | 605 | #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) |
602 | #define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) | 606 | #define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) |
603 | 607 | ||
604 | /* | 608 | /* |
605 | * Define the bit shifts to access each section. For non-existent | 609 | * Define the bit shifts to access each section. For non-existent |
606 | * sections we define the shift as 0; that plus a 0 mask ensures | 610 | * sections we define the shift as 0; that plus a 0 mask ensures |
607 | * the compiler will optimise away reference to them. | 611 | * the compiler will optimise away reference to them. |
608 | */ | 612 | */ |
609 | #define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) | 613 | #define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) |
610 | #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) | 614 | #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) |
611 | #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) | 615 | #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) |
612 | #define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0)) | 616 | #define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0)) |
613 | 617 | ||
614 | /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ | 618 | /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ |
615 | #ifdef NODE_NOT_IN_PAGE_FLAGS | 619 | #ifdef NODE_NOT_IN_PAGE_FLAGS |
616 | #define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) | 620 | #define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) |
617 | #define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF)? \ | 621 | #define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF)? \ |
618 | SECTIONS_PGOFF : ZONES_PGOFF) | 622 | SECTIONS_PGOFF : ZONES_PGOFF) |
619 | #else | 623 | #else |
620 | #define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT) | 624 | #define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT) |
621 | #define ZONEID_PGOFF ((NODES_PGOFF < ZONES_PGOFF)? \ | 625 | #define ZONEID_PGOFF ((NODES_PGOFF < ZONES_PGOFF)? \ |
622 | NODES_PGOFF : ZONES_PGOFF) | 626 | NODES_PGOFF : ZONES_PGOFF) |
623 | #endif | 627 | #endif |
624 | 628 | ||
625 | #define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0)) | 629 | #define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0)) |
626 | 630 | ||
627 | #if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS | 631 | #if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS |
628 | #error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS | 632 | #error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS |
629 | #endif | 633 | #endif |
630 | 634 | ||
631 | #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) | 635 | #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) |
632 | #define NODES_MASK ((1UL << NODES_WIDTH) - 1) | 636 | #define NODES_MASK ((1UL << NODES_WIDTH) - 1) |
633 | #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) | 637 | #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) |
634 | #define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_WIDTH) - 1) | 638 | #define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_WIDTH) - 1) |
635 | #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) | 639 | #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) |
636 | 640 | ||
637 | static inline enum zone_type page_zonenum(const struct page *page) | 641 | static inline enum zone_type page_zonenum(const struct page *page) |
638 | { | 642 | { |
639 | return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; | 643 | return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; |
640 | } | 644 | } |
641 | 645 | ||
642 | #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) | 646 | #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) |
643 | #define SECTION_IN_PAGE_FLAGS | 647 | #define SECTION_IN_PAGE_FLAGS |
644 | #endif | 648 | #endif |
645 | 649 | ||
646 | /* | 650 | /* |
647 | * The identification function is mainly used by the buddy allocator for | 651 | * The identification function is mainly used by the buddy allocator for |
648 | * determining if two pages could be buddies. We are not really identifying | 652 | * determining if two pages could be buddies. We are not really identifying |
649 | * the zone since we could be using the section number id if we do not have | 653 | * the zone since we could be using the section number id if we do not have |
650 | * node id available in page flags. | 654 | * node id available in page flags. |
651 | * We only guarantee that it will return the same value for two combinable | 655 | * We only guarantee that it will return the same value for two combinable |
652 | * pages in a zone. | 656 | * pages in a zone. |
653 | */ | 657 | */ |
654 | static inline int page_zone_id(struct page *page) | 658 | static inline int page_zone_id(struct page *page) |
655 | { | 659 | { |
656 | return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK; | 660 | return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK; |
657 | } | 661 | } |
658 | 662 | ||
659 | static inline int zone_to_nid(struct zone *zone) | 663 | static inline int zone_to_nid(struct zone *zone) |
660 | { | 664 | { |
661 | #ifdef CONFIG_NUMA | 665 | #ifdef CONFIG_NUMA |
662 | return zone->node; | 666 | return zone->node; |
663 | #else | 667 | #else |
664 | return 0; | 668 | return 0; |
665 | #endif | 669 | #endif |
666 | } | 670 | } |
667 | 671 | ||
668 | #ifdef NODE_NOT_IN_PAGE_FLAGS | 672 | #ifdef NODE_NOT_IN_PAGE_FLAGS |
669 | extern int page_to_nid(const struct page *page); | 673 | extern int page_to_nid(const struct page *page); |
670 | #else | 674 | #else |
671 | static inline int page_to_nid(const struct page *page) | 675 | static inline int page_to_nid(const struct page *page) |
672 | { | 676 | { |
673 | return (page->flags >> NODES_PGSHIFT) & NODES_MASK; | 677 | return (page->flags >> NODES_PGSHIFT) & NODES_MASK; |
674 | } | 678 | } |
675 | #endif | 679 | #endif |
676 | 680 | ||
677 | #ifdef CONFIG_NUMA_BALANCING | 681 | #ifdef CONFIG_NUMA_BALANCING |
678 | static inline int cpu_pid_to_cpupid(int cpu, int pid) | 682 | static inline int cpu_pid_to_cpupid(int cpu, int pid) |
679 | { | 683 | { |
680 | return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK); | 684 | return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK); |
681 | } | 685 | } |
682 | 686 | ||
683 | static inline int cpupid_to_pid(int cpupid) | 687 | static inline int cpupid_to_pid(int cpupid) |
684 | { | 688 | { |
685 | return cpupid & LAST__PID_MASK; | 689 | return cpupid & LAST__PID_MASK; |
686 | } | 690 | } |
687 | 691 | ||
688 | static inline int cpupid_to_cpu(int cpupid) | 692 | static inline int cpupid_to_cpu(int cpupid) |
689 | { | 693 | { |
690 | return (cpupid >> LAST__PID_SHIFT) & LAST__CPU_MASK; | 694 | return (cpupid >> LAST__PID_SHIFT) & LAST__CPU_MASK; |
691 | } | 695 | } |
692 | 696 | ||
693 | static inline int cpupid_to_nid(int cpupid) | 697 | static inline int cpupid_to_nid(int cpupid) |
694 | { | 698 | { |
695 | return cpu_to_node(cpupid_to_cpu(cpupid)); | 699 | return cpu_to_node(cpupid_to_cpu(cpupid)); |
696 | } | 700 | } |
697 | 701 | ||
698 | static inline bool cpupid_pid_unset(int cpupid) | 702 | static inline bool cpupid_pid_unset(int cpupid) |
699 | { | 703 | { |
700 | return cpupid_to_pid(cpupid) == (-1 & LAST__PID_MASK); | 704 | return cpupid_to_pid(cpupid) == (-1 & LAST__PID_MASK); |
701 | } | 705 | } |
702 | 706 | ||
703 | static inline bool cpupid_cpu_unset(int cpupid) | 707 | static inline bool cpupid_cpu_unset(int cpupid) |
704 | { | 708 | { |
705 | return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK); | 709 | return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK); |
706 | } | 710 | } |
707 | 711 | ||
708 | static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid) | 712 | static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid) |
709 | { | 713 | { |
710 | return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid); | 714 | return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid); |
711 | } | 715 | } |
712 | 716 | ||
713 | #define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid) | 717 | #define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid) |
714 | #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS | 718 | #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS |
715 | static inline int page_cpupid_xchg_last(struct page *page, int cpupid) | 719 | static inline int page_cpupid_xchg_last(struct page *page, int cpupid) |
716 | { | 720 | { |
717 | return xchg(&page->_last_cpupid, cpupid); | 721 | return xchg(&page->_last_cpupid, cpupid); |
718 | } | 722 | } |
719 | 723 | ||
720 | static inline int page_cpupid_last(struct page *page) | 724 | static inline int page_cpupid_last(struct page *page) |
721 | { | 725 | { |
722 | return page->_last_cpupid; | 726 | return page->_last_cpupid; |
723 | } | 727 | } |
724 | static inline void page_cpupid_reset_last(struct page *page) | 728 | static inline void page_cpupid_reset_last(struct page *page) |
725 | { | 729 | { |
726 | page->_last_cpupid = -1; | 730 | page->_last_cpupid = -1; |
727 | } | 731 | } |
728 | #else | 732 | #else |
729 | static inline int page_cpupid_last(struct page *page) | 733 | static inline int page_cpupid_last(struct page *page) |
730 | { | 734 | { |
731 | return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; | 735 | return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; |
732 | } | 736 | } |
733 | 737 | ||
734 | extern int page_cpupid_xchg_last(struct page *page, int cpupid); | 738 | extern int page_cpupid_xchg_last(struct page *page, int cpupid); |
735 | 739 | ||
736 | static inline void page_cpupid_reset_last(struct page *page) | 740 | static inline void page_cpupid_reset_last(struct page *page) |
737 | { | 741 | { |
738 | int cpupid = (1 << LAST_CPUPID_SHIFT) - 1; | 742 | int cpupid = (1 << LAST_CPUPID_SHIFT) - 1; |
739 | 743 | ||
740 | page->flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT); | 744 | page->flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT); |
741 | page->flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT; | 745 | page->flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT; |
742 | } | 746 | } |
743 | #endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */ | 747 | #endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */ |
744 | #else /* !CONFIG_NUMA_BALANCING */ | 748 | #else /* !CONFIG_NUMA_BALANCING */ |
745 | static inline int page_cpupid_xchg_last(struct page *page, int cpupid) | 749 | static inline int page_cpupid_xchg_last(struct page *page, int cpupid) |
746 | { | 750 | { |
747 | return page_to_nid(page); /* XXX */ | 751 | return page_to_nid(page); /* XXX */ |
748 | } | 752 | } |
749 | 753 | ||
750 | static inline int page_cpupid_last(struct page *page) | 754 | static inline int page_cpupid_last(struct page *page) |
751 | { | 755 | { |
752 | return page_to_nid(page); /* XXX */ | 756 | return page_to_nid(page); /* XXX */ |
753 | } | 757 | } |
754 | 758 | ||
755 | static inline int cpupid_to_nid(int cpupid) | 759 | static inline int cpupid_to_nid(int cpupid) |
756 | { | 760 | { |
757 | return -1; | 761 | return -1; |
758 | } | 762 | } |
759 | 763 | ||
760 | static inline int cpupid_to_pid(int cpupid) | 764 | static inline int cpupid_to_pid(int cpupid) |
761 | { | 765 | { |
762 | return -1; | 766 | return -1; |
763 | } | 767 | } |
764 | 768 | ||
765 | static inline int cpupid_to_cpu(int cpupid) | 769 | static inline int cpupid_to_cpu(int cpupid) |
766 | { | 770 | { |
767 | return -1; | 771 | return -1; |
768 | } | 772 | } |
769 | 773 | ||
770 | static inline int cpu_pid_to_cpupid(int nid, int pid) | 774 | static inline int cpu_pid_to_cpupid(int nid, int pid) |
771 | { | 775 | { |
772 | return -1; | 776 | return -1; |
773 | } | 777 | } |
774 | 778 | ||
775 | static inline bool cpupid_pid_unset(int cpupid) | 779 | static inline bool cpupid_pid_unset(int cpupid) |
776 | { | 780 | { |
777 | return 1; | 781 | return 1; |
778 | } | 782 | } |
779 | 783 | ||
780 | static inline void page_cpupid_reset_last(struct page *page) | 784 | static inline void page_cpupid_reset_last(struct page *page) |
781 | { | 785 | { |
782 | } | 786 | } |
783 | 787 | ||
784 | static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) | 788 | static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) |
785 | { | 789 | { |
786 | return false; | 790 | return false; |
787 | } | 791 | } |
788 | #endif /* CONFIG_NUMA_BALANCING */ | 792 | #endif /* CONFIG_NUMA_BALANCING */ |
789 | 793 | ||
790 | static inline struct zone *page_zone(const struct page *page) | 794 | static inline struct zone *page_zone(const struct page *page) |
791 | { | 795 | { |
792 | return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; | 796 | return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; |
793 | } | 797 | } |
794 | 798 | ||
795 | #ifdef SECTION_IN_PAGE_FLAGS | 799 | #ifdef SECTION_IN_PAGE_FLAGS |
796 | static inline void set_page_section(struct page *page, unsigned long section) | 800 | static inline void set_page_section(struct page *page, unsigned long section) |
797 | { | 801 | { |
798 | page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); | 802 | page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); |
799 | page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; | 803 | page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; |
800 | } | 804 | } |
801 | 805 | ||
802 | static inline unsigned long page_to_section(const struct page *page) | 806 | static inline unsigned long page_to_section(const struct page *page) |
803 | { | 807 | { |
804 | return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; | 808 | return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; |
805 | } | 809 | } |
806 | #endif | 810 | #endif |
807 | 811 | ||
808 | static inline void set_page_zone(struct page *page, enum zone_type zone) | 812 | static inline void set_page_zone(struct page *page, enum zone_type zone) |
809 | { | 813 | { |
810 | page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); | 814 | page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); |
811 | page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT; | 815 | page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT; |
812 | } | 816 | } |
813 | 817 | ||
814 | static inline void set_page_node(struct page *page, unsigned long node) | 818 | static inline void set_page_node(struct page *page, unsigned long node) |
815 | { | 819 | { |
816 | page->flags &= ~(NODES_MASK << NODES_PGSHIFT); | 820 | page->flags &= ~(NODES_MASK << NODES_PGSHIFT); |
817 | page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; | 821 | page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; |
818 | } | 822 | } |
819 | 823 | ||
820 | static inline void set_page_links(struct page *page, enum zone_type zone, | 824 | static inline void set_page_links(struct page *page, enum zone_type zone, |
821 | unsigned long node, unsigned long pfn) | 825 | unsigned long node, unsigned long pfn) |
822 | { | 826 | { |
823 | set_page_zone(page, zone); | 827 | set_page_zone(page, zone); |
824 | set_page_node(page, node); | 828 | set_page_node(page, node); |
825 | #ifdef SECTION_IN_PAGE_FLAGS | 829 | #ifdef SECTION_IN_PAGE_FLAGS |
826 | set_page_section(page, pfn_to_section_nr(pfn)); | 830 | set_page_section(page, pfn_to_section_nr(pfn)); |
827 | #endif | 831 | #endif |
828 | } | 832 | } |
829 | 833 | ||
830 | /* | 834 | /* |
831 | * Some inline functions in vmstat.h depend on page_zone() | 835 | * Some inline functions in vmstat.h depend on page_zone() |
832 | */ | 836 | */ |
833 | #include <linux/vmstat.h> | 837 | #include <linux/vmstat.h> |
834 | 838 | ||
835 | static __always_inline void *lowmem_page_address(const struct page *page) | 839 | static __always_inline void *lowmem_page_address(const struct page *page) |
836 | { | 840 | { |
837 | return __va(PFN_PHYS(page_to_pfn(page))); | 841 | return __va(PFN_PHYS(page_to_pfn(page))); |
838 | } | 842 | } |
839 | 843 | ||
840 | #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) | 844 | #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) |
841 | #define HASHED_PAGE_VIRTUAL | 845 | #define HASHED_PAGE_VIRTUAL |
842 | #endif | 846 | #endif |
843 | 847 | ||
844 | #if defined(WANT_PAGE_VIRTUAL) | 848 | #if defined(WANT_PAGE_VIRTUAL) |
845 | #define page_address(page) ((page)->virtual) | 849 | #define page_address(page) ((page)->virtual) |
846 | #define set_page_address(page, address) \ | 850 | #define set_page_address(page, address) \ |
847 | do { \ | 851 | do { \ |
848 | (page)->virtual = (address); \ | 852 | (page)->virtual = (address); \ |
849 | } while(0) | 853 | } while(0) |
850 | #define page_address_init() do { } while(0) | 854 | #define page_address_init() do { } while(0) |
851 | #endif | 855 | #endif |
852 | 856 | ||
853 | #if defined(HASHED_PAGE_VIRTUAL) | 857 | #if defined(HASHED_PAGE_VIRTUAL) |
854 | void *page_address(const struct page *page); | 858 | void *page_address(const struct page *page); |
855 | void set_page_address(struct page *page, void *virtual); | 859 | void set_page_address(struct page *page, void *virtual); |
856 | void page_address_init(void); | 860 | void page_address_init(void); |
857 | #endif | 861 | #endif |
858 | 862 | ||
859 | #if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL) | 863 | #if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL) |
860 | #define page_address(page) lowmem_page_address(page) | 864 | #define page_address(page) lowmem_page_address(page) |
861 | #define set_page_address(page, address) do { } while(0) | 865 | #define set_page_address(page, address) do { } while(0) |
862 | #define page_address_init() do { } while(0) | 866 | #define page_address_init() do { } while(0) |
863 | #endif | 867 | #endif |
864 | 868 | ||
865 | /* | 869 | /* |
866 | * On an anonymous page mapped into a user virtual memory area, | 870 | * On an anonymous page mapped into a user virtual memory area, |
867 | * page->mapping points to its anon_vma, not to a struct address_space; | 871 | * page->mapping points to its anon_vma, not to a struct address_space; |
868 | * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h. | 872 | * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h. |
869 | * | 873 | * |
870 | * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled, | 874 | * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled, |
871 | * the PAGE_MAPPING_KSM bit may be set along with the PAGE_MAPPING_ANON bit; | 875 | * the PAGE_MAPPING_KSM bit may be set along with the PAGE_MAPPING_ANON bit; |
872 | * and then page->mapping points, not to an anon_vma, but to a private | 876 | * and then page->mapping points, not to an anon_vma, but to a private |
873 | * structure which KSM associates with that merged page. See ksm.h. | 877 | * structure which KSM associates with that merged page. See ksm.h. |
874 | * | 878 | * |
875 | * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is currently never used. | 879 | * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is currently never used. |
876 | * | 880 | * |
877 | * Please note that, confusingly, "page_mapping" refers to the inode | 881 | * Please note that, confusingly, "page_mapping" refers to the inode |
878 | * address_space which maps the page from disk; whereas "page_mapped" | 882 | * address_space which maps the page from disk; whereas "page_mapped" |
879 | * refers to user virtual address space into which the page is mapped. | 883 | * refers to user virtual address space into which the page is mapped. |
880 | */ | 884 | */ |
881 | #define PAGE_MAPPING_ANON 1 | 885 | #define PAGE_MAPPING_ANON 1 |
882 | #define PAGE_MAPPING_KSM 2 | 886 | #define PAGE_MAPPING_KSM 2 |
883 | #define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM) | 887 | #define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM) |
884 | 888 | ||
885 | extern struct address_space *page_mapping(struct page *page); | 889 | extern struct address_space *page_mapping(struct page *page); |
886 | 890 | ||
887 | /* Neutral page->mapping pointer to address_space or anon_vma or other */ | 891 | /* Neutral page->mapping pointer to address_space or anon_vma or other */ |
888 | static inline void *page_rmapping(struct page *page) | 892 | static inline void *page_rmapping(struct page *page) |
889 | { | 893 | { |
890 | return (void *)((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS); | 894 | return (void *)((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS); |
891 | } | 895 | } |
892 | 896 | ||
893 | extern struct address_space *__page_file_mapping(struct page *); | 897 | extern struct address_space *__page_file_mapping(struct page *); |
894 | 898 | ||
895 | static inline | 899 | static inline |
896 | struct address_space *page_file_mapping(struct page *page) | 900 | struct address_space *page_file_mapping(struct page *page) |
897 | { | 901 | { |
898 | if (unlikely(PageSwapCache(page))) | 902 | if (unlikely(PageSwapCache(page))) |
899 | return __page_file_mapping(page); | 903 | return __page_file_mapping(page); |
900 | 904 | ||
901 | return page->mapping; | 905 | return page->mapping; |
902 | } | 906 | } |
903 | 907 | ||
904 | static inline int PageAnon(struct page *page) | 908 | static inline int PageAnon(struct page *page) |
905 | { | 909 | { |
906 | return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; | 910 | return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; |
907 | } | 911 | } |
908 | 912 | ||
909 | /* | 913 | /* |
910 | * Return the pagecache index of the passed page. Regular pagecache pages | 914 | * Return the pagecache index of the passed page. Regular pagecache pages |
911 | * use ->index whereas swapcache pages use ->private | 915 | * use ->index whereas swapcache pages use ->private |
912 | */ | 916 | */ |
913 | static inline pgoff_t page_index(struct page *page) | 917 | static inline pgoff_t page_index(struct page *page) |
914 | { | 918 | { |
915 | if (unlikely(PageSwapCache(page))) | 919 | if (unlikely(PageSwapCache(page))) |
916 | return page_private(page); | 920 | return page_private(page); |
917 | return page->index; | 921 | return page->index; |
918 | } | 922 | } |
919 | 923 | ||
920 | extern pgoff_t __page_file_index(struct page *page); | 924 | extern pgoff_t __page_file_index(struct page *page); |
921 | 925 | ||
922 | /* | 926 | /* |
923 | * Return the file index of the page. Regular pagecache pages use ->index | 927 | * Return the file index of the page. Regular pagecache pages use ->index |
924 | * whereas swapcache pages use swp_offset(->private) | 928 | * whereas swapcache pages use swp_offset(->private) |
925 | */ | 929 | */ |
926 | static inline pgoff_t page_file_index(struct page *page) | 930 | static inline pgoff_t page_file_index(struct page *page) |
927 | { | 931 | { |
928 | if (unlikely(PageSwapCache(page))) | 932 | if (unlikely(PageSwapCache(page))) |
929 | return __page_file_index(page); | 933 | return __page_file_index(page); |
930 | 934 | ||
931 | return page->index; | 935 | return page->index; |
932 | } | 936 | } |
933 | 937 | ||
934 | /* | 938 | /* |
935 | * Return true if this page is mapped into pagetables. | 939 | * Return true if this page is mapped into pagetables. |
936 | */ | 940 | */ |
937 | static inline int page_mapped(struct page *page) | 941 | static inline int page_mapped(struct page *page) |
938 | { | 942 | { |
939 | return atomic_read(&(page)->_mapcount) >= 0; | 943 | return atomic_read(&(page)->_mapcount) >= 0; |
940 | } | 944 | } |
941 | 945 | ||
942 | /* | 946 | /* |
943 | * Different kinds of faults, as returned by handle_mm_fault(). | 947 | * Different kinds of faults, as returned by handle_mm_fault(). |
944 | * Used to decide whether a process gets delivered SIGBUS or | 948 | * Used to decide whether a process gets delivered SIGBUS or |
945 | * just gets major/minor fault counters bumped up. | 949 | * just gets major/minor fault counters bumped up. |
946 | */ | 950 | */ |
947 | 951 | ||
948 | #define VM_FAULT_MINOR 0 /* For backwards compat. Remove me quickly. */ | 952 | #define VM_FAULT_MINOR 0 /* For backwards compat. Remove me quickly. */ |
949 | 953 | ||
950 | #define VM_FAULT_OOM 0x0001 | 954 | #define VM_FAULT_OOM 0x0001 |
951 | #define VM_FAULT_SIGBUS 0x0002 | 955 | #define VM_FAULT_SIGBUS 0x0002 |
952 | #define VM_FAULT_MAJOR 0x0004 | 956 | #define VM_FAULT_MAJOR 0x0004 |
953 | #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ | 957 | #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ |
954 | #define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned small page */ | 958 | #define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned small page */ |
955 | #define VM_FAULT_HWPOISON_LARGE 0x0020 /* Hit poisoned large page. Index encoded in upper bits */ | 959 | #define VM_FAULT_HWPOISON_LARGE 0x0020 /* Hit poisoned large page. Index encoded in upper bits */ |
956 | 960 | ||
957 | #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ | 961 | #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ |
958 | #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ | 962 | #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ |
959 | #define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ | 963 | #define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ |
960 | #define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */ | 964 | #define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */ |
961 | 965 | ||
962 | #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ | 966 | #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ |
963 | 967 | ||
964 | #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \ | 968 | #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \ |
965 | VM_FAULT_FALLBACK | VM_FAULT_HWPOISON_LARGE) | 969 | VM_FAULT_FALLBACK | VM_FAULT_HWPOISON_LARGE) |
966 | 970 | ||
967 | /* Encode hstate index for a hwpoisoned large page */ | 971 | /* Encode hstate index for a hwpoisoned large page */ |
968 | #define VM_FAULT_SET_HINDEX(x) ((x) << 12) | 972 | #define VM_FAULT_SET_HINDEX(x) ((x) << 12) |
969 | #define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf) | 973 | #define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf) |
970 | 974 | ||
971 | /* | 975 | /* |
972 | * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. | 976 | * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. |
973 | */ | 977 | */ |
974 | extern void pagefault_out_of_memory(void); | 978 | extern void pagefault_out_of_memory(void); |
975 | 979 | ||
976 | #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) | 980 | #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) |
977 | 981 | ||
978 | /* | 982 | /* |
979 | * Flags passed to show_mem() and show_free_areas() to suppress output in | 983 | * Flags passed to show_mem() and show_free_areas() to suppress output in |
980 | * various contexts. | 984 | * various contexts. |
981 | */ | 985 | */ |
982 | #define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ | 986 | #define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ |
983 | #define SHOW_MEM_FILTER_PAGE_COUNT (0x0002u) /* page type count */ | 987 | #define SHOW_MEM_FILTER_PAGE_COUNT (0x0002u) /* page type count */ |
984 | 988 | ||
985 | extern void show_free_areas(unsigned int flags); | 989 | extern void show_free_areas(unsigned int flags); |
986 | extern bool skip_free_areas_node(unsigned int flags, int nid); | 990 | extern bool skip_free_areas_node(unsigned int flags, int nid); |
987 | 991 | ||
988 | int shmem_zero_setup(struct vm_area_struct *); | 992 | int shmem_zero_setup(struct vm_area_struct *); |
989 | 993 | ||
990 | extern int can_do_mlock(void); | 994 | extern int can_do_mlock(void); |
991 | extern int user_shm_lock(size_t, struct user_struct *); | 995 | extern int user_shm_lock(size_t, struct user_struct *); |
992 | extern void user_shm_unlock(size_t, struct user_struct *); | 996 | extern void user_shm_unlock(size_t, struct user_struct *); |
993 | 997 | ||
994 | /* | 998 | /* |
995 | * Parameter block passed down to zap_pte_range in exceptional cases. | 999 | * Parameter block passed down to zap_pte_range in exceptional cases. |
996 | */ | 1000 | */ |
997 | struct zap_details { | 1001 | struct zap_details { |
998 | struct vm_area_struct *nonlinear_vma; /* Check page->index if set */ | 1002 | struct vm_area_struct *nonlinear_vma; /* Check page->index if set */ |
999 | struct address_space *check_mapping; /* Check page->mapping if set */ | 1003 | struct address_space *check_mapping; /* Check page->mapping if set */ |
1000 | pgoff_t first_index; /* Lowest page->index to unmap */ | 1004 | pgoff_t first_index; /* Lowest page->index to unmap */ |
1001 | pgoff_t last_index; /* Highest page->index to unmap */ | 1005 | pgoff_t last_index; /* Highest page->index to unmap */ |
1002 | }; | 1006 | }; |
1003 | 1007 | ||
1004 | struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, | 1008 | struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, |
1005 | pte_t pte); | 1009 | pte_t pte); |
1006 | 1010 | ||
1007 | int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, | 1011 | int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, |
1008 | unsigned long size); | 1012 | unsigned long size); |
1009 | void zap_page_range(struct vm_area_struct *vma, unsigned long address, | 1013 | void zap_page_range(struct vm_area_struct *vma, unsigned long address, |
1010 | unsigned long size, struct zap_details *); | 1014 | unsigned long size, struct zap_details *); |
1011 | void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, | 1015 | void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, |
1012 | unsigned long start, unsigned long end); | 1016 | unsigned long start, unsigned long end); |
1013 | 1017 | ||
1014 | /** | 1018 | /** |
1015 | * mm_walk - callbacks for walk_page_range | 1019 | * mm_walk - callbacks for walk_page_range |
1016 | * @pgd_entry: if set, called for each non-empty PGD (top-level) entry | 1020 | * @pgd_entry: if set, called for each non-empty PGD (top-level) entry |
1017 | * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry | 1021 | * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry |
1018 | * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry | 1022 | * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry |
1019 | * this handler is required to be able to handle | 1023 | * this handler is required to be able to handle |
1020 | * pmd_trans_huge() pmds. They may simply choose to | 1024 | * pmd_trans_huge() pmds. They may simply choose to |
1021 | * split_huge_page() instead of handling it explicitly. | 1025 | * split_huge_page() instead of handling it explicitly. |
1022 | * @pte_entry: if set, called for each non-empty PTE (4th-level) entry | 1026 | * @pte_entry: if set, called for each non-empty PTE (4th-level) entry |
1023 | * @pte_hole: if set, called for each hole at all levels | 1027 | * @pte_hole: if set, called for each hole at all levels |
1024 | * @hugetlb_entry: if set, called for each hugetlb entry | 1028 | * @hugetlb_entry: if set, called for each hugetlb entry |
1025 | * *Caution*: The caller must hold mmap_sem() if @hugetlb_entry | 1029 | * *Caution*: The caller must hold mmap_sem() if @hugetlb_entry |
1026 | * is used. | 1030 | * is used. |
1027 | * | 1031 | * |
1028 | * (see walk_page_range for more details) | 1032 | * (see walk_page_range for more details) |
1029 | */ | 1033 | */ |
1030 | struct mm_walk { | 1034 | struct mm_walk { |
1031 | int (*pgd_entry)(pgd_t *pgd, unsigned long addr, | 1035 | int (*pgd_entry)(pgd_t *pgd, unsigned long addr, |
1032 | unsigned long next, struct mm_walk *walk); | 1036 | unsigned long next, struct mm_walk *walk); |
1033 | int (*pud_entry)(pud_t *pud, unsigned long addr, | 1037 | int (*pud_entry)(pud_t *pud, unsigned long addr, |
1034 | unsigned long next, struct mm_walk *walk); | 1038 | unsigned long next, struct mm_walk *walk); |
1035 | int (*pmd_entry)(pmd_t *pmd, unsigned long addr, | 1039 | int (*pmd_entry)(pmd_t *pmd, unsigned long addr, |
1036 | unsigned long next, struct mm_walk *walk); | 1040 | unsigned long next, struct mm_walk *walk); |
1037 | int (*pte_entry)(pte_t *pte, unsigned long addr, | 1041 | int (*pte_entry)(pte_t *pte, unsigned long addr, |
1038 | unsigned long next, struct mm_walk *walk); | 1042 | unsigned long next, struct mm_walk *walk); |
1039 | int (*pte_hole)(unsigned long addr, unsigned long next, | 1043 | int (*pte_hole)(unsigned long addr, unsigned long next, |
1040 | struct mm_walk *walk); | 1044 | struct mm_walk *walk); |
1041 | int (*hugetlb_entry)(pte_t *pte, unsigned long hmask, | 1045 | int (*hugetlb_entry)(pte_t *pte, unsigned long hmask, |
1042 | unsigned long addr, unsigned long next, | 1046 | unsigned long addr, unsigned long next, |
1043 | struct mm_walk *walk); | 1047 | struct mm_walk *walk); |
1044 | struct mm_struct *mm; | 1048 | struct mm_struct *mm; |
1045 | void *private; | 1049 | void *private; |
1046 | }; | 1050 | }; |
1047 | 1051 | ||
1048 | int walk_page_range(unsigned long addr, unsigned long end, | 1052 | int walk_page_range(unsigned long addr, unsigned long end, |
1049 | struct mm_walk *walk); | 1053 | struct mm_walk *walk); |
1050 | void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, | 1054 | void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, |
1051 | unsigned long end, unsigned long floor, unsigned long ceiling); | 1055 | unsigned long end, unsigned long floor, unsigned long ceiling); |
1052 | int copy_page_range(struct mm_struct *dst, struct mm_struct *src, | 1056 | int copy_page_range(struct mm_struct *dst, struct mm_struct *src, |
1053 | struct vm_area_struct *vma); | 1057 | struct vm_area_struct *vma); |
1054 | void unmap_mapping_range(struct address_space *mapping, | 1058 | void unmap_mapping_range(struct address_space *mapping, |
1055 | loff_t const holebegin, loff_t const holelen, int even_cows); | 1059 | loff_t const holebegin, loff_t const holelen, int even_cows); |
1056 | int follow_pfn(struct vm_area_struct *vma, unsigned long address, | 1060 | int follow_pfn(struct vm_area_struct *vma, unsigned long address, |
1057 | unsigned long *pfn); | 1061 | unsigned long *pfn); |
1058 | int follow_phys(struct vm_area_struct *vma, unsigned long address, | 1062 | int follow_phys(struct vm_area_struct *vma, unsigned long address, |
1059 | unsigned int flags, unsigned long *prot, resource_size_t *phys); | 1063 | unsigned int flags, unsigned long *prot, resource_size_t *phys); |
1060 | int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, | 1064 | int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, |
1061 | void *buf, int len, int write); | 1065 | void *buf, int len, int write); |
1062 | 1066 | ||
1063 | static inline void unmap_shared_mapping_range(struct address_space *mapping, | 1067 | static inline void unmap_shared_mapping_range(struct address_space *mapping, |
1064 | loff_t const holebegin, loff_t const holelen) | 1068 | loff_t const holebegin, loff_t const holelen) |
1065 | { | 1069 | { |
1066 | unmap_mapping_range(mapping, holebegin, holelen, 0); | 1070 | unmap_mapping_range(mapping, holebegin, holelen, 0); |
1067 | } | 1071 | } |
1068 | 1072 | ||
1069 | extern void truncate_pagecache(struct inode *inode, loff_t new); | 1073 | extern void truncate_pagecache(struct inode *inode, loff_t new); |
1070 | extern void truncate_setsize(struct inode *inode, loff_t newsize); | 1074 | extern void truncate_setsize(struct inode *inode, loff_t newsize); |
1071 | void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); | 1075 | void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); |
1072 | int truncate_inode_page(struct address_space *mapping, struct page *page); | 1076 | int truncate_inode_page(struct address_space *mapping, struct page *page); |
1073 | int generic_error_remove_page(struct address_space *mapping, struct page *page); | 1077 | int generic_error_remove_page(struct address_space *mapping, struct page *page); |
1074 | int invalidate_inode_page(struct page *page); | 1078 | int invalidate_inode_page(struct page *page); |
1075 | 1079 | ||
1076 | #ifdef CONFIG_MMU | 1080 | #ifdef CONFIG_MMU |
1077 | extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 1081 | extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
1078 | unsigned long address, unsigned int flags); | 1082 | unsigned long address, unsigned int flags); |
1079 | extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, | 1083 | extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, |
1080 | unsigned long address, unsigned int fault_flags); | 1084 | unsigned long address, unsigned int fault_flags); |
1081 | #else | 1085 | #else |
1082 | static inline int handle_mm_fault(struct mm_struct *mm, | 1086 | static inline int handle_mm_fault(struct mm_struct *mm, |
1083 | struct vm_area_struct *vma, unsigned long address, | 1087 | struct vm_area_struct *vma, unsigned long address, |
1084 | unsigned int flags) | 1088 | unsigned int flags) |
1085 | { | 1089 | { |
1086 | /* should never happen if there's no MMU */ | 1090 | /* should never happen if there's no MMU */ |
1087 | BUG(); | 1091 | BUG(); |
1088 | return VM_FAULT_SIGBUS; | 1092 | return VM_FAULT_SIGBUS; |
1089 | } | 1093 | } |
1090 | static inline int fixup_user_fault(struct task_struct *tsk, | 1094 | static inline int fixup_user_fault(struct task_struct *tsk, |
1091 | struct mm_struct *mm, unsigned long address, | 1095 | struct mm_struct *mm, unsigned long address, |
1092 | unsigned int fault_flags) | 1096 | unsigned int fault_flags) |
1093 | { | 1097 | { |
1094 | /* should never happen if there's no MMU */ | 1098 | /* should never happen if there's no MMU */ |
1095 | BUG(); | 1099 | BUG(); |
1096 | return -EFAULT; | 1100 | return -EFAULT; |
1097 | } | 1101 | } |
1098 | #endif | 1102 | #endif |
1099 | 1103 | ||
1100 | extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); | 1104 | extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); |
1101 | extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, | 1105 | extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, |
1102 | void *buf, int len, int write); | 1106 | void *buf, int len, int write); |
1103 | 1107 | ||
1104 | long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 1108 | long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
1105 | unsigned long start, unsigned long nr_pages, | 1109 | unsigned long start, unsigned long nr_pages, |
1106 | unsigned int foll_flags, struct page **pages, | 1110 | unsigned int foll_flags, struct page **pages, |
1107 | struct vm_area_struct **vmas, int *nonblocking); | 1111 | struct vm_area_struct **vmas, int *nonblocking); |
1108 | long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 1112 | long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
1109 | unsigned long start, unsigned long nr_pages, | 1113 | unsigned long start, unsigned long nr_pages, |
1110 | int write, int force, struct page **pages, | 1114 | int write, int force, struct page **pages, |
1111 | struct vm_area_struct **vmas); | 1115 | struct vm_area_struct **vmas); |
1112 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, | 1116 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, |
1113 | struct page **pages); | 1117 | struct page **pages); |
1114 | struct kvec; | 1118 | struct kvec; |
1115 | int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, | 1119 | int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, |
1116 | struct page **pages); | 1120 | struct page **pages); |
1117 | int get_kernel_page(unsigned long start, int write, struct page **pages); | 1121 | int get_kernel_page(unsigned long start, int write, struct page **pages); |
1118 | struct page *get_dump_page(unsigned long addr); | 1122 | struct page *get_dump_page(unsigned long addr); |
1119 | 1123 | ||
1120 | extern int try_to_release_page(struct page * page, gfp_t gfp_mask); | 1124 | extern int try_to_release_page(struct page * page, gfp_t gfp_mask); |
1121 | extern void do_invalidatepage(struct page *page, unsigned int offset, | 1125 | extern void do_invalidatepage(struct page *page, unsigned int offset, |
1122 | unsigned int length); | 1126 | unsigned int length); |
1123 | 1127 | ||
1124 | int __set_page_dirty_nobuffers(struct page *page); | 1128 | int __set_page_dirty_nobuffers(struct page *page); |
1125 | int __set_page_dirty_no_writeback(struct page *page); | 1129 | int __set_page_dirty_no_writeback(struct page *page); |
1126 | int redirty_page_for_writepage(struct writeback_control *wbc, | 1130 | int redirty_page_for_writepage(struct writeback_control *wbc, |
1127 | struct page *page); | 1131 | struct page *page); |
1128 | void account_page_dirtied(struct page *page, struct address_space *mapping); | 1132 | void account_page_dirtied(struct page *page, struct address_space *mapping); |
1129 | void account_page_writeback(struct page *page); | 1133 | void account_page_writeback(struct page *page); |
1130 | int set_page_dirty(struct page *page); | 1134 | int set_page_dirty(struct page *page); |
1131 | int set_page_dirty_lock(struct page *page); | 1135 | int set_page_dirty_lock(struct page *page); |
1132 | int clear_page_dirty_for_io(struct page *page); | 1136 | int clear_page_dirty_for_io(struct page *page); |
1133 | 1137 | ||
1134 | /* Is the vma a continuation of the stack vma above it? */ | 1138 | /* Is the vma a continuation of the stack vma above it? */ |
1135 | static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr) | 1139 | static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr) |
1136 | { | 1140 | { |
1137 | return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); | 1141 | return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); |
1138 | } | 1142 | } |
1139 | 1143 | ||
1140 | static inline int stack_guard_page_start(struct vm_area_struct *vma, | 1144 | static inline int stack_guard_page_start(struct vm_area_struct *vma, |
1141 | unsigned long addr) | 1145 | unsigned long addr) |
1142 | { | 1146 | { |
1143 | return (vma->vm_flags & VM_GROWSDOWN) && | 1147 | return (vma->vm_flags & VM_GROWSDOWN) && |
1144 | (vma->vm_start == addr) && | 1148 | (vma->vm_start == addr) && |
1145 | !vma_growsdown(vma->vm_prev, addr); | 1149 | !vma_growsdown(vma->vm_prev, addr); |
1146 | } | 1150 | } |
1147 | 1151 | ||
1148 | /* Is the vma a continuation of the stack vma below it? */ | 1152 | /* Is the vma a continuation of the stack vma below it? */ |
1149 | static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr) | 1153 | static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr) |
1150 | { | 1154 | { |
1151 | return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP); | 1155 | return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP); |
1152 | } | 1156 | } |
1153 | 1157 | ||
1154 | static inline int stack_guard_page_end(struct vm_area_struct *vma, | 1158 | static inline int stack_guard_page_end(struct vm_area_struct *vma, |
1155 | unsigned long addr) | 1159 | unsigned long addr) |
1156 | { | 1160 | { |
1157 | return (vma->vm_flags & VM_GROWSUP) && | 1161 | return (vma->vm_flags & VM_GROWSUP) && |
1158 | (vma->vm_end == addr) && | 1162 | (vma->vm_end == addr) && |
1159 | !vma_growsup(vma->vm_next, addr); | 1163 | !vma_growsup(vma->vm_next, addr); |
1160 | } | 1164 | } |
1161 | 1165 | ||
1162 | extern pid_t | 1166 | extern pid_t |
1163 | vm_is_stack(struct task_struct *task, struct vm_area_struct *vma, int in_group); | 1167 | vm_is_stack(struct task_struct *task, struct vm_area_struct *vma, int in_group); |
1164 | 1168 | ||
1165 | extern unsigned long move_page_tables(struct vm_area_struct *vma, | 1169 | extern unsigned long move_page_tables(struct vm_area_struct *vma, |
1166 | unsigned long old_addr, struct vm_area_struct *new_vma, | 1170 | unsigned long old_addr, struct vm_area_struct *new_vma, |
1167 | unsigned long new_addr, unsigned long len, | 1171 | unsigned long new_addr, unsigned long len, |
1168 | bool need_rmap_locks); | 1172 | bool need_rmap_locks); |
1169 | extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, | 1173 | extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, |
1170 | unsigned long end, pgprot_t newprot, | 1174 | unsigned long end, pgprot_t newprot, |
1171 | int dirty_accountable, int prot_numa); | 1175 | int dirty_accountable, int prot_numa); |
1172 | extern int mprotect_fixup(struct vm_area_struct *vma, | 1176 | extern int mprotect_fixup(struct vm_area_struct *vma, |
1173 | struct vm_area_struct **pprev, unsigned long start, | 1177 | struct vm_area_struct **pprev, unsigned long start, |
1174 | unsigned long end, unsigned long newflags); | 1178 | unsigned long end, unsigned long newflags); |
1175 | 1179 | ||
1176 | /* | 1180 | /* |
1177 | * doesn't attempt to fault and will return short. | 1181 | * doesn't attempt to fault and will return short. |
1178 | */ | 1182 | */ |
1179 | int __get_user_pages_fast(unsigned long start, int nr_pages, int write, | 1183 | int __get_user_pages_fast(unsigned long start, int nr_pages, int write, |
1180 | struct page **pages); | 1184 | struct page **pages); |
1181 | /* | 1185 | /* |
1182 | * per-process(per-mm_struct) statistics. | 1186 | * per-process(per-mm_struct) statistics. |
1183 | */ | 1187 | */ |
1184 | static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) | 1188 | static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) |
1185 | { | 1189 | { |
1186 | long val = atomic_long_read(&mm->rss_stat.count[member]); | 1190 | long val = atomic_long_read(&mm->rss_stat.count[member]); |
1187 | 1191 | ||
1188 | #ifdef SPLIT_RSS_COUNTING | 1192 | #ifdef SPLIT_RSS_COUNTING |
1189 | /* | 1193 | /* |
1190 | * counter is updated in asynchronous manner and may go to minus. | 1194 | * counter is updated in asynchronous manner and may go to minus. |
1191 | * But it's never be expected number for users. | 1195 | * But it's never be expected number for users. |
1192 | */ | 1196 | */ |
1193 | if (val < 0) | 1197 | if (val < 0) |
1194 | val = 0; | 1198 | val = 0; |
1195 | #endif | 1199 | #endif |
1196 | return (unsigned long)val; | 1200 | return (unsigned long)val; |
1197 | } | 1201 | } |
1198 | 1202 | ||
1199 | static inline void add_mm_counter(struct mm_struct *mm, int member, long value) | 1203 | static inline void add_mm_counter(struct mm_struct *mm, int member, long value) |
1200 | { | 1204 | { |
1201 | atomic_long_add(value, &mm->rss_stat.count[member]); | 1205 | atomic_long_add(value, &mm->rss_stat.count[member]); |
1202 | } | 1206 | } |
1203 | 1207 | ||
1204 | static inline void inc_mm_counter(struct mm_struct *mm, int member) | 1208 | static inline void inc_mm_counter(struct mm_struct *mm, int member) |
1205 | { | 1209 | { |
1206 | atomic_long_inc(&mm->rss_stat.count[member]); | 1210 | atomic_long_inc(&mm->rss_stat.count[member]); |
1207 | } | 1211 | } |
1208 | 1212 | ||
1209 | static inline void dec_mm_counter(struct mm_struct *mm, int member) | 1213 | static inline void dec_mm_counter(struct mm_struct *mm, int member) |
1210 | { | 1214 | { |
1211 | atomic_long_dec(&mm->rss_stat.count[member]); | 1215 | atomic_long_dec(&mm->rss_stat.count[member]); |
1212 | } | 1216 | } |
1213 | 1217 | ||
1214 | static inline unsigned long get_mm_rss(struct mm_struct *mm) | 1218 | static inline unsigned long get_mm_rss(struct mm_struct *mm) |
1215 | { | 1219 | { |
1216 | return get_mm_counter(mm, MM_FILEPAGES) + | 1220 | return get_mm_counter(mm, MM_FILEPAGES) + |
1217 | get_mm_counter(mm, MM_ANONPAGES); | 1221 | get_mm_counter(mm, MM_ANONPAGES); |
1218 | } | 1222 | } |
1219 | 1223 | ||
1220 | static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm) | 1224 | static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm) |
1221 | { | 1225 | { |
1222 | return max(mm->hiwater_rss, get_mm_rss(mm)); | 1226 | return max(mm->hiwater_rss, get_mm_rss(mm)); |
1223 | } | 1227 | } |
1224 | 1228 | ||
1225 | static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm) | 1229 | static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm) |
1226 | { | 1230 | { |
1227 | return max(mm->hiwater_vm, mm->total_vm); | 1231 | return max(mm->hiwater_vm, mm->total_vm); |
1228 | } | 1232 | } |
1229 | 1233 | ||
1230 | static inline void update_hiwater_rss(struct mm_struct *mm) | 1234 | static inline void update_hiwater_rss(struct mm_struct *mm) |
1231 | { | 1235 | { |
1232 | unsigned long _rss = get_mm_rss(mm); | 1236 | unsigned long _rss = get_mm_rss(mm); |
1233 | 1237 | ||
1234 | if ((mm)->hiwater_rss < _rss) | 1238 | if ((mm)->hiwater_rss < _rss) |
1235 | (mm)->hiwater_rss = _rss; | 1239 | (mm)->hiwater_rss = _rss; |
1236 | } | 1240 | } |
1237 | 1241 | ||
1238 | static inline void update_hiwater_vm(struct mm_struct *mm) | 1242 | static inline void update_hiwater_vm(struct mm_struct *mm) |
1239 | { | 1243 | { |
1240 | if (mm->hiwater_vm < mm->total_vm) | 1244 | if (mm->hiwater_vm < mm->total_vm) |
1241 | mm->hiwater_vm = mm->total_vm; | 1245 | mm->hiwater_vm = mm->total_vm; |
1242 | } | 1246 | } |
1243 | 1247 | ||
1244 | static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, | 1248 | static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, |
1245 | struct mm_struct *mm) | 1249 | struct mm_struct *mm) |
1246 | { | 1250 | { |
1247 | unsigned long hiwater_rss = get_mm_hiwater_rss(mm); | 1251 | unsigned long hiwater_rss = get_mm_hiwater_rss(mm); |
1248 | 1252 | ||
1249 | if (*maxrss < hiwater_rss) | 1253 | if (*maxrss < hiwater_rss) |
1250 | *maxrss = hiwater_rss; | 1254 | *maxrss = hiwater_rss; |
1251 | } | 1255 | } |
1252 | 1256 | ||
1253 | #if defined(SPLIT_RSS_COUNTING) | 1257 | #if defined(SPLIT_RSS_COUNTING) |
1254 | void sync_mm_rss(struct mm_struct *mm); | 1258 | void sync_mm_rss(struct mm_struct *mm); |
1255 | #else | 1259 | #else |
1256 | static inline void sync_mm_rss(struct mm_struct *mm) | 1260 | static inline void sync_mm_rss(struct mm_struct *mm) |
1257 | { | 1261 | { |
1258 | } | 1262 | } |
1259 | #endif | 1263 | #endif |
1260 | 1264 | ||
1261 | int vma_wants_writenotify(struct vm_area_struct *vma); | 1265 | int vma_wants_writenotify(struct vm_area_struct *vma); |
1262 | 1266 | ||
1263 | extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, | 1267 | extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, |
1264 | spinlock_t **ptl); | 1268 | spinlock_t **ptl); |
1265 | static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, | 1269 | static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, |
1266 | spinlock_t **ptl) | 1270 | spinlock_t **ptl) |
1267 | { | 1271 | { |
1268 | pte_t *ptep; | 1272 | pte_t *ptep; |
1269 | __cond_lock(*ptl, ptep = __get_locked_pte(mm, addr, ptl)); | 1273 | __cond_lock(*ptl, ptep = __get_locked_pte(mm, addr, ptl)); |
1270 | return ptep; | 1274 | return ptep; |
1271 | } | 1275 | } |
1272 | 1276 | ||
1273 | #ifdef __PAGETABLE_PUD_FOLDED | 1277 | #ifdef __PAGETABLE_PUD_FOLDED |
1274 | static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, | 1278 | static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, |
1275 | unsigned long address) | 1279 | unsigned long address) |
1276 | { | 1280 | { |
1277 | return 0; | 1281 | return 0; |
1278 | } | 1282 | } |
1279 | #else | 1283 | #else |
1280 | int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); | 1284 | int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); |
1281 | #endif | 1285 | #endif |
1282 | 1286 | ||
1283 | #ifdef __PAGETABLE_PMD_FOLDED | 1287 | #ifdef __PAGETABLE_PMD_FOLDED |
1284 | static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud, | 1288 | static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud, |
1285 | unsigned long address) | 1289 | unsigned long address) |
1286 | { | 1290 | { |
1287 | return 0; | 1291 | return 0; |
1288 | } | 1292 | } |
1289 | #else | 1293 | #else |
1290 | int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); | 1294 | int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); |
1291 | #endif | 1295 | #endif |
1292 | 1296 | ||
1293 | int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, | 1297 | int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, |
1294 | pmd_t *pmd, unsigned long address); | 1298 | pmd_t *pmd, unsigned long address); |
1295 | int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); | 1299 | int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); |
1296 | 1300 | ||
1297 | /* | 1301 | /* |
1298 | * The following ifdef needed to get the 4level-fixup.h header to work. | 1302 | * The following ifdef needed to get the 4level-fixup.h header to work. |
1299 | * Remove it when 4level-fixup.h has been removed. | 1303 | * Remove it when 4level-fixup.h has been removed. |
1300 | */ | 1304 | */ |
1301 | #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) | 1305 | #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) |
1302 | static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) | 1306 | static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) |
1303 | { | 1307 | { |
1304 | return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? | 1308 | return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? |
1305 | NULL: pud_offset(pgd, address); | 1309 | NULL: pud_offset(pgd, address); |
1306 | } | 1310 | } |
1307 | 1311 | ||
1308 | static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) | 1312 | static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) |
1309 | { | 1313 | { |
1310 | return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))? | 1314 | return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))? |
1311 | NULL: pmd_offset(pud, address); | 1315 | NULL: pmd_offset(pud, address); |
1312 | } | 1316 | } |
1313 | #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ | 1317 | #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ |
1314 | 1318 | ||
1315 | #if USE_SPLIT_PTLOCKS | 1319 | #if USE_SPLIT_PTLOCKS |
1316 | /* | 1320 | /* |
1317 | * We tuck a spinlock to guard each pagetable page into its struct page, | 1321 | * We tuck a spinlock to guard each pagetable page into its struct page, |
1318 | * at page->private, with BUILD_BUG_ON to make sure that this will not | 1322 | * at page->private, with BUILD_BUG_ON to make sure that this will not |
1319 | * overflow into the next struct page (as it might with DEBUG_SPINLOCK). | 1323 | * overflow into the next struct page (as it might with DEBUG_SPINLOCK). |
1320 | * When freeing, reset page->mapping so free_pages_check won't complain. | 1324 | * When freeing, reset page->mapping so free_pages_check won't complain. |
1321 | */ | 1325 | */ |
1322 | #define __pte_lockptr(page) &((page)->ptl) | 1326 | #define __pte_lockptr(page) &((page)->ptl) |
1323 | #define pte_lock_init(_page) do { \ | 1327 | #define pte_lock_init(_page) do { \ |
1324 | spin_lock_init(__pte_lockptr(_page)); \ | 1328 | spin_lock_init(__pte_lockptr(_page)); \ |
1325 | } while (0) | 1329 | } while (0) |
1326 | #define pte_lock_deinit(page) ((page)->mapping = NULL) | 1330 | #define pte_lock_deinit(page) ((page)->mapping = NULL) |
1327 | #define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) | 1331 | #define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) |
1328 | #else /* !USE_SPLIT_PTLOCKS */ | 1332 | #else /* !USE_SPLIT_PTLOCKS */ |
1329 | /* | 1333 | /* |
1330 | * We use mm->page_table_lock to guard all pagetable pages of the mm. | 1334 | * We use mm->page_table_lock to guard all pagetable pages of the mm. |
1331 | */ | 1335 | */ |
1332 | #define pte_lock_init(page) do {} while (0) | 1336 | #define pte_lock_init(page) do {} while (0) |
1333 | #define pte_lock_deinit(page) do {} while (0) | 1337 | #define pte_lock_deinit(page) do {} while (0) |
1334 | #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) | 1338 | #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) |
1335 | #endif /* USE_SPLIT_PTLOCKS */ | 1339 | #endif /* USE_SPLIT_PTLOCKS */ |
1336 | 1340 | ||
1337 | static inline void pgtable_page_ctor(struct page *page) | 1341 | static inline void pgtable_page_ctor(struct page *page) |
1338 | { | 1342 | { |
1339 | pte_lock_init(page); | 1343 | pte_lock_init(page); |
1340 | inc_zone_page_state(page, NR_PAGETABLE); | 1344 | inc_zone_page_state(page, NR_PAGETABLE); |
1341 | } | 1345 | } |
1342 | 1346 | ||
1343 | static inline void pgtable_page_dtor(struct page *page) | 1347 | static inline void pgtable_page_dtor(struct page *page) |
1344 | { | 1348 | { |
1345 | pte_lock_deinit(page); | 1349 | pte_lock_deinit(page); |
1346 | dec_zone_page_state(page, NR_PAGETABLE); | 1350 | dec_zone_page_state(page, NR_PAGETABLE); |
1347 | } | 1351 | } |
1348 | 1352 | ||
1349 | #define pte_offset_map_lock(mm, pmd, address, ptlp) \ | 1353 | #define pte_offset_map_lock(mm, pmd, address, ptlp) \ |
1350 | ({ \ | 1354 | ({ \ |
1351 | spinlock_t *__ptl = pte_lockptr(mm, pmd); \ | 1355 | spinlock_t *__ptl = pte_lockptr(mm, pmd); \ |
1352 | pte_t *__pte = pte_offset_map(pmd, address); \ | 1356 | pte_t *__pte = pte_offset_map(pmd, address); \ |
1353 | *(ptlp) = __ptl; \ | 1357 | *(ptlp) = __ptl; \ |
1354 | spin_lock(__ptl); \ | 1358 | spin_lock(__ptl); \ |
1355 | __pte; \ | 1359 | __pte; \ |
1356 | }) | 1360 | }) |
1357 | 1361 | ||
1358 | #define pte_unmap_unlock(pte, ptl) do { \ | 1362 | #define pte_unmap_unlock(pte, ptl) do { \ |
1359 | spin_unlock(ptl); \ | 1363 | spin_unlock(ptl); \ |
1360 | pte_unmap(pte); \ | 1364 | pte_unmap(pte); \ |
1361 | } while (0) | 1365 | } while (0) |
1362 | 1366 | ||
1363 | #define pte_alloc_map(mm, vma, pmd, address) \ | 1367 | #define pte_alloc_map(mm, vma, pmd, address) \ |
1364 | ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma, \ | 1368 | ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma, \ |
1365 | pmd, address))? \ | 1369 | pmd, address))? \ |
1366 | NULL: pte_offset_map(pmd, address)) | 1370 | NULL: pte_offset_map(pmd, address)) |
1367 | 1371 | ||
1368 | #define pte_alloc_map_lock(mm, pmd, address, ptlp) \ | 1372 | #define pte_alloc_map_lock(mm, pmd, address, ptlp) \ |
1369 | ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL, \ | 1373 | ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL, \ |
1370 | pmd, address))? \ | 1374 | pmd, address))? \ |
1371 | NULL: pte_offset_map_lock(mm, pmd, address, ptlp)) | 1375 | NULL: pte_offset_map_lock(mm, pmd, address, ptlp)) |
1372 | 1376 | ||
1373 | #define pte_alloc_kernel(pmd, address) \ | 1377 | #define pte_alloc_kernel(pmd, address) \ |
1374 | ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ | 1378 | ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ |
1375 | NULL: pte_offset_kernel(pmd, address)) | 1379 | NULL: pte_offset_kernel(pmd, address)) |
1376 | 1380 | ||
1377 | extern void free_area_init(unsigned long * zones_size); | 1381 | extern void free_area_init(unsigned long * zones_size); |
1378 | extern void free_area_init_node(int nid, unsigned long * zones_size, | 1382 | extern void free_area_init_node(int nid, unsigned long * zones_size, |
1379 | unsigned long zone_start_pfn, unsigned long *zholes_size); | 1383 | unsigned long zone_start_pfn, unsigned long *zholes_size); |
1380 | extern void free_initmem(void); | 1384 | extern void free_initmem(void); |
1381 | 1385 | ||
1382 | /* | 1386 | /* |
1383 | * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK) | 1387 | * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK) |
1384 | * into the buddy system. The freed pages will be poisoned with pattern | 1388 | * into the buddy system. The freed pages will be poisoned with pattern |
1385 | * "poison" if it's within range [0, UCHAR_MAX]. | 1389 | * "poison" if it's within range [0, UCHAR_MAX]. |
1386 | * Return pages freed into the buddy system. | 1390 | * Return pages freed into the buddy system. |
1387 | */ | 1391 | */ |
1388 | extern unsigned long free_reserved_area(void *start, void *end, | 1392 | extern unsigned long free_reserved_area(void *start, void *end, |
1389 | int poison, char *s); | 1393 | int poison, char *s); |
1390 | 1394 | ||
1391 | #ifdef CONFIG_HIGHMEM | 1395 | #ifdef CONFIG_HIGHMEM |
1392 | /* | 1396 | /* |
1393 | * Free a highmem page into the buddy system, adjusting totalhigh_pages | 1397 | * Free a highmem page into the buddy system, adjusting totalhigh_pages |
1394 | * and totalram_pages. | 1398 | * and totalram_pages. |
1395 | */ | 1399 | */ |
1396 | extern void free_highmem_page(struct page *page); | 1400 | extern void free_highmem_page(struct page *page); |
1397 | #endif | 1401 | #endif |
1398 | 1402 | ||
1399 | extern void adjust_managed_page_count(struct page *page, long count); | 1403 | extern void adjust_managed_page_count(struct page *page, long count); |
1400 | extern void mem_init_print_info(const char *str); | 1404 | extern void mem_init_print_info(const char *str); |
1401 | 1405 | ||
1402 | /* Free the reserved page into the buddy system, so it gets managed. */ | 1406 | /* Free the reserved page into the buddy system, so it gets managed. */ |
1403 | static inline void __free_reserved_page(struct page *page) | 1407 | static inline void __free_reserved_page(struct page *page) |
1404 | { | 1408 | { |
1405 | ClearPageReserved(page); | 1409 | ClearPageReserved(page); |
1406 | init_page_count(page); | 1410 | init_page_count(page); |
1407 | __free_page(page); | 1411 | __free_page(page); |
1408 | } | 1412 | } |
1409 | 1413 | ||
1410 | static inline void free_reserved_page(struct page *page) | 1414 | static inline void free_reserved_page(struct page *page) |
1411 | { | 1415 | { |
1412 | __free_reserved_page(page); | 1416 | __free_reserved_page(page); |
1413 | adjust_managed_page_count(page, 1); | 1417 | adjust_managed_page_count(page, 1); |
1414 | } | 1418 | } |
1415 | 1419 | ||
1416 | static inline void mark_page_reserved(struct page *page) | 1420 | static inline void mark_page_reserved(struct page *page) |
1417 | { | 1421 | { |
1418 | SetPageReserved(page); | 1422 | SetPageReserved(page); |
1419 | adjust_managed_page_count(page, -1); | 1423 | adjust_managed_page_count(page, -1); |
1420 | } | 1424 | } |
1421 | 1425 | ||
1422 | /* | 1426 | /* |
1423 | * Default method to free all the __init memory into the buddy system. | 1427 | * Default method to free all the __init memory into the buddy system. |
1424 | * The freed pages will be poisoned with pattern "poison" if it's within | 1428 | * The freed pages will be poisoned with pattern "poison" if it's within |
1425 | * range [0, UCHAR_MAX]. | 1429 | * range [0, UCHAR_MAX]. |
1426 | * Return pages freed into the buddy system. | 1430 | * Return pages freed into the buddy system. |
1427 | */ | 1431 | */ |
1428 | static inline unsigned long free_initmem_default(int poison) | 1432 | static inline unsigned long free_initmem_default(int poison) |
1429 | { | 1433 | { |
1430 | extern char __init_begin[], __init_end[]; | 1434 | extern char __init_begin[], __init_end[]; |
1431 | 1435 | ||
1432 | return free_reserved_area(&__init_begin, &__init_end, | 1436 | return free_reserved_area(&__init_begin, &__init_end, |
1433 | poison, "unused kernel"); | 1437 | poison, "unused kernel"); |
1434 | } | 1438 | } |
1435 | 1439 | ||
1436 | static inline unsigned long get_num_physpages(void) | 1440 | static inline unsigned long get_num_physpages(void) |
1437 | { | 1441 | { |
1438 | int nid; | 1442 | int nid; |
1439 | unsigned long phys_pages = 0; | 1443 | unsigned long phys_pages = 0; |
1440 | 1444 | ||
1441 | for_each_online_node(nid) | 1445 | for_each_online_node(nid) |
1442 | phys_pages += node_present_pages(nid); | 1446 | phys_pages += node_present_pages(nid); |
1443 | 1447 | ||
1444 | return phys_pages; | 1448 | return phys_pages; |
1445 | } | 1449 | } |
1446 | 1450 | ||
1447 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 1451 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
1448 | /* | 1452 | /* |
1449 | * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its | 1453 | * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its |
1450 | * zones, allocate the backing mem_map and account for memory holes in a more | 1454 | * zones, allocate the backing mem_map and account for memory holes in a more |
1451 | * architecture independent manner. This is a substitute for creating the | 1455 | * architecture independent manner. This is a substitute for creating the |
1452 | * zone_sizes[] and zholes_size[] arrays and passing them to | 1456 | * zone_sizes[] and zholes_size[] arrays and passing them to |
1453 | * free_area_init_node() | 1457 | * free_area_init_node() |
1454 | * | 1458 | * |
1455 | * An architecture is expected to register range of page frames backed by | 1459 | * An architecture is expected to register range of page frames backed by |
1456 | * physical memory with memblock_add[_node]() before calling | 1460 | * physical memory with memblock_add[_node]() before calling |
1457 | * free_area_init_nodes() passing in the PFN each zone ends at. At a basic | 1461 | * free_area_init_nodes() passing in the PFN each zone ends at. At a basic |
1458 | * usage, an architecture is expected to do something like | 1462 | * usage, an architecture is expected to do something like |
1459 | * | 1463 | * |
1460 | * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn, | 1464 | * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn, |
1461 | * max_highmem_pfn}; | 1465 | * max_highmem_pfn}; |
1462 | * for_each_valid_physical_page_range() | 1466 | * for_each_valid_physical_page_range() |
1463 | * memblock_add_node(base, size, nid) | 1467 | * memblock_add_node(base, size, nid) |
1464 | * free_area_init_nodes(max_zone_pfns); | 1468 | * free_area_init_nodes(max_zone_pfns); |
1465 | * | 1469 | * |
1466 | * free_bootmem_with_active_regions() calls free_bootmem_node() for each | 1470 | * free_bootmem_with_active_regions() calls free_bootmem_node() for each |
1467 | * registered physical page range. Similarly | 1471 | * registered physical page range. Similarly |
1468 | * sparse_memory_present_with_active_regions() calls memory_present() for | 1472 | * sparse_memory_present_with_active_regions() calls memory_present() for |
1469 | * each range when SPARSEMEM is enabled. | 1473 | * each range when SPARSEMEM is enabled. |
1470 | * | 1474 | * |
1471 | * See mm/page_alloc.c for more information on each function exposed by | 1475 | * See mm/page_alloc.c for more information on each function exposed by |
1472 | * CONFIG_HAVE_MEMBLOCK_NODE_MAP. | 1476 | * CONFIG_HAVE_MEMBLOCK_NODE_MAP. |
1473 | */ | 1477 | */ |
1474 | extern void free_area_init_nodes(unsigned long *max_zone_pfn); | 1478 | extern void free_area_init_nodes(unsigned long *max_zone_pfn); |
1475 | unsigned long node_map_pfn_alignment(void); | 1479 | unsigned long node_map_pfn_alignment(void); |
1476 | unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, | 1480 | unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, |
1477 | unsigned long end_pfn); | 1481 | unsigned long end_pfn); |
1478 | extern unsigned long absent_pages_in_range(unsigned long start_pfn, | 1482 | extern unsigned long absent_pages_in_range(unsigned long start_pfn, |
1479 | unsigned long end_pfn); | 1483 | unsigned long end_pfn); |
1480 | extern void get_pfn_range_for_nid(unsigned int nid, | 1484 | extern void get_pfn_range_for_nid(unsigned int nid, |
1481 | unsigned long *start_pfn, unsigned long *end_pfn); | 1485 | unsigned long *start_pfn, unsigned long *end_pfn); |
1482 | extern unsigned long find_min_pfn_with_active_regions(void); | 1486 | extern unsigned long find_min_pfn_with_active_regions(void); |
1483 | extern void free_bootmem_with_active_regions(int nid, | 1487 | extern void free_bootmem_with_active_regions(int nid, |
1484 | unsigned long max_low_pfn); | 1488 | unsigned long max_low_pfn); |
1485 | extern void sparse_memory_present_with_active_regions(int nid); | 1489 | extern void sparse_memory_present_with_active_regions(int nid); |
1486 | 1490 | ||
1487 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 1491 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
1488 | 1492 | ||
1489 | #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \ | 1493 | #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \ |
1490 | !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) | 1494 | !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) |
1491 | static inline int __early_pfn_to_nid(unsigned long pfn) | 1495 | static inline int __early_pfn_to_nid(unsigned long pfn) |
1492 | { | 1496 | { |
1493 | return 0; | 1497 | return 0; |
1494 | } | 1498 | } |
1495 | #else | 1499 | #else |
1496 | /* please see mm/page_alloc.c */ | 1500 | /* please see mm/page_alloc.c */ |
1497 | extern int __meminit early_pfn_to_nid(unsigned long pfn); | 1501 | extern int __meminit early_pfn_to_nid(unsigned long pfn); |
1498 | #ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID | 1502 | #ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID |
1499 | /* there is a per-arch backend function. */ | 1503 | /* there is a per-arch backend function. */ |
1500 | extern int __meminit __early_pfn_to_nid(unsigned long pfn); | 1504 | extern int __meminit __early_pfn_to_nid(unsigned long pfn); |
1501 | #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ | 1505 | #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ |
1502 | #endif | 1506 | #endif |
1503 | 1507 | ||
1504 | extern void set_dma_reserve(unsigned long new_dma_reserve); | 1508 | extern void set_dma_reserve(unsigned long new_dma_reserve); |
1505 | extern void memmap_init_zone(unsigned long, int, unsigned long, | 1509 | extern void memmap_init_zone(unsigned long, int, unsigned long, |
1506 | unsigned long, enum memmap_context); | 1510 | unsigned long, enum memmap_context); |
1507 | extern void setup_per_zone_wmarks(void); | 1511 | extern void setup_per_zone_wmarks(void); |
1508 | extern int __meminit init_per_zone_wmark_min(void); | 1512 | extern int __meminit init_per_zone_wmark_min(void); |
1509 | extern void mem_init(void); | 1513 | extern void mem_init(void); |
1510 | extern void __init mmap_init(void); | 1514 | extern void __init mmap_init(void); |
1511 | extern void show_mem(unsigned int flags); | 1515 | extern void show_mem(unsigned int flags); |
1512 | extern void si_meminfo(struct sysinfo * val); | 1516 | extern void si_meminfo(struct sysinfo * val); |
1513 | extern void si_meminfo_node(struct sysinfo *val, int nid); | 1517 | extern void si_meminfo_node(struct sysinfo *val, int nid); |
1514 | 1518 | ||
1515 | extern __printf(3, 4) | 1519 | extern __printf(3, 4) |
1516 | void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...); | 1520 | void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...); |
1517 | 1521 | ||
1518 | extern void setup_per_cpu_pageset(void); | 1522 | extern void setup_per_cpu_pageset(void); |
1519 | 1523 | ||
1520 | extern void zone_pcp_update(struct zone *zone); | 1524 | extern void zone_pcp_update(struct zone *zone); |
1521 | extern void zone_pcp_reset(struct zone *zone); | 1525 | extern void zone_pcp_reset(struct zone *zone); |
1522 | 1526 | ||
1523 | /* page_alloc.c */ | 1527 | /* page_alloc.c */ |
1524 | extern int min_free_kbytes; | 1528 | extern int min_free_kbytes; |
1525 | 1529 | ||
1526 | /* nommu.c */ | 1530 | /* nommu.c */ |
1527 | extern atomic_long_t mmap_pages_allocated; | 1531 | extern atomic_long_t mmap_pages_allocated; |
1528 | extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); | 1532 | extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); |
1529 | 1533 | ||
1530 | /* interval_tree.c */ | 1534 | /* interval_tree.c */ |
1531 | void vma_interval_tree_insert(struct vm_area_struct *node, | 1535 | void vma_interval_tree_insert(struct vm_area_struct *node, |
1532 | struct rb_root *root); | 1536 | struct rb_root *root); |
1533 | void vma_interval_tree_insert_after(struct vm_area_struct *node, | 1537 | void vma_interval_tree_insert_after(struct vm_area_struct *node, |
1534 | struct vm_area_struct *prev, | 1538 | struct vm_area_struct *prev, |
1535 | struct rb_root *root); | 1539 | struct rb_root *root); |
1536 | void vma_interval_tree_remove(struct vm_area_struct *node, | 1540 | void vma_interval_tree_remove(struct vm_area_struct *node, |
1537 | struct rb_root *root); | 1541 | struct rb_root *root); |
1538 | struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root, | 1542 | struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root, |
1539 | unsigned long start, unsigned long last); | 1543 | unsigned long start, unsigned long last); |
1540 | struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node, | 1544 | struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node, |
1541 | unsigned long start, unsigned long last); | 1545 | unsigned long start, unsigned long last); |
1542 | 1546 | ||
1543 | #define vma_interval_tree_foreach(vma, root, start, last) \ | 1547 | #define vma_interval_tree_foreach(vma, root, start, last) \ |
1544 | for (vma = vma_interval_tree_iter_first(root, start, last); \ | 1548 | for (vma = vma_interval_tree_iter_first(root, start, last); \ |
1545 | vma; vma = vma_interval_tree_iter_next(vma, start, last)) | 1549 | vma; vma = vma_interval_tree_iter_next(vma, start, last)) |
1546 | 1550 | ||
1547 | static inline void vma_nonlinear_insert(struct vm_area_struct *vma, | 1551 | static inline void vma_nonlinear_insert(struct vm_area_struct *vma, |
1548 | struct list_head *list) | 1552 | struct list_head *list) |
1549 | { | 1553 | { |
1550 | list_add_tail(&vma->shared.nonlinear, list); | 1554 | list_add_tail(&vma->shared.nonlinear, list); |
1551 | } | 1555 | } |
1552 | 1556 | ||
1553 | void anon_vma_interval_tree_insert(struct anon_vma_chain *node, | 1557 | void anon_vma_interval_tree_insert(struct anon_vma_chain *node, |
1554 | struct rb_root *root); | 1558 | struct rb_root *root); |
1555 | void anon_vma_interval_tree_remove(struct anon_vma_chain *node, | 1559 | void anon_vma_interval_tree_remove(struct anon_vma_chain *node, |
1556 | struct rb_root *root); | 1560 | struct rb_root *root); |
1557 | struct anon_vma_chain *anon_vma_interval_tree_iter_first( | 1561 | struct anon_vma_chain *anon_vma_interval_tree_iter_first( |
1558 | struct rb_root *root, unsigned long start, unsigned long last); | 1562 | struct rb_root *root, unsigned long start, unsigned long last); |
1559 | struct anon_vma_chain *anon_vma_interval_tree_iter_next( | 1563 | struct anon_vma_chain *anon_vma_interval_tree_iter_next( |
1560 | struct anon_vma_chain *node, unsigned long start, unsigned long last); | 1564 | struct anon_vma_chain *node, unsigned long start, unsigned long last); |
1561 | #ifdef CONFIG_DEBUG_VM_RB | 1565 | #ifdef CONFIG_DEBUG_VM_RB |
1562 | void anon_vma_interval_tree_verify(struct anon_vma_chain *node); | 1566 | void anon_vma_interval_tree_verify(struct anon_vma_chain *node); |
1563 | #endif | 1567 | #endif |
1564 | 1568 | ||
1565 | #define anon_vma_interval_tree_foreach(avc, root, start, last) \ | 1569 | #define anon_vma_interval_tree_foreach(avc, root, start, last) \ |
1566 | for (avc = anon_vma_interval_tree_iter_first(root, start, last); \ | 1570 | for (avc = anon_vma_interval_tree_iter_first(root, start, last); \ |
1567 | avc; avc = anon_vma_interval_tree_iter_next(avc, start, last)) | 1571 | avc; avc = anon_vma_interval_tree_iter_next(avc, start, last)) |
1568 | 1572 | ||
1569 | /* mmap.c */ | 1573 | /* mmap.c */ |
1570 | extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); | 1574 | extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); |
1571 | extern int vma_adjust(struct vm_area_struct *vma, unsigned long start, | 1575 | extern int vma_adjust(struct vm_area_struct *vma, unsigned long start, |
1572 | unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); | 1576 | unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); |
1573 | extern struct vm_area_struct *vma_merge(struct mm_struct *, | 1577 | extern struct vm_area_struct *vma_merge(struct mm_struct *, |
1574 | struct vm_area_struct *prev, unsigned long addr, unsigned long end, | 1578 | struct vm_area_struct *prev, unsigned long addr, unsigned long end, |
1575 | unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, | 1579 | unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, |
1576 | struct mempolicy *); | 1580 | struct mempolicy *); |
1577 | extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); | 1581 | extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); |
1578 | extern int split_vma(struct mm_struct *, | 1582 | extern int split_vma(struct mm_struct *, |
1579 | struct vm_area_struct *, unsigned long addr, int new_below); | 1583 | struct vm_area_struct *, unsigned long addr, int new_below); |
1580 | extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); | 1584 | extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); |
1581 | extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, | 1585 | extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, |
1582 | struct rb_node **, struct rb_node *); | 1586 | struct rb_node **, struct rb_node *); |
1583 | extern void unlink_file_vma(struct vm_area_struct *); | 1587 | extern void unlink_file_vma(struct vm_area_struct *); |
1584 | extern struct vm_area_struct *copy_vma(struct vm_area_struct **, | 1588 | extern struct vm_area_struct *copy_vma(struct vm_area_struct **, |
1585 | unsigned long addr, unsigned long len, pgoff_t pgoff, | 1589 | unsigned long addr, unsigned long len, pgoff_t pgoff, |
1586 | bool *need_rmap_locks); | 1590 | bool *need_rmap_locks); |
1587 | extern void exit_mmap(struct mm_struct *); | 1591 | extern void exit_mmap(struct mm_struct *); |
1588 | 1592 | ||
1589 | extern int mm_take_all_locks(struct mm_struct *mm); | 1593 | extern int mm_take_all_locks(struct mm_struct *mm); |
1590 | extern void mm_drop_all_locks(struct mm_struct *mm); | 1594 | extern void mm_drop_all_locks(struct mm_struct *mm); |
1591 | 1595 | ||
1592 | extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); | 1596 | extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); |
1593 | extern struct file *get_mm_exe_file(struct mm_struct *mm); | 1597 | extern struct file *get_mm_exe_file(struct mm_struct *mm); |
1594 | 1598 | ||
1595 | extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); | 1599 | extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); |
1596 | extern int install_special_mapping(struct mm_struct *mm, | 1600 | extern int install_special_mapping(struct mm_struct *mm, |
1597 | unsigned long addr, unsigned long len, | 1601 | unsigned long addr, unsigned long len, |
1598 | unsigned long flags, struct page **pages); | 1602 | unsigned long flags, struct page **pages); |
1599 | 1603 | ||
1600 | extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); | 1604 | extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); |
1601 | 1605 | ||
1602 | extern unsigned long mmap_region(struct file *file, unsigned long addr, | 1606 | extern unsigned long mmap_region(struct file *file, unsigned long addr, |
1603 | unsigned long len, vm_flags_t vm_flags, unsigned long pgoff); | 1607 | unsigned long len, vm_flags_t vm_flags, unsigned long pgoff); |
1604 | extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | 1608 | extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, |
1605 | unsigned long len, unsigned long prot, unsigned long flags, | 1609 | unsigned long len, unsigned long prot, unsigned long flags, |
1606 | unsigned long pgoff, unsigned long *populate); | 1610 | unsigned long pgoff, unsigned long *populate); |
1607 | extern int do_munmap(struct mm_struct *, unsigned long, size_t); | 1611 | extern int do_munmap(struct mm_struct *, unsigned long, size_t); |
1608 | 1612 | ||
1609 | #ifdef CONFIG_MMU | 1613 | #ifdef CONFIG_MMU |
1610 | extern int __mm_populate(unsigned long addr, unsigned long len, | 1614 | extern int __mm_populate(unsigned long addr, unsigned long len, |
1611 | int ignore_errors); | 1615 | int ignore_errors); |
1612 | static inline void mm_populate(unsigned long addr, unsigned long len) | 1616 | static inline void mm_populate(unsigned long addr, unsigned long len) |
1613 | { | 1617 | { |
1614 | /* Ignore errors */ | 1618 | /* Ignore errors */ |
1615 | (void) __mm_populate(addr, len, 1); | 1619 | (void) __mm_populate(addr, len, 1); |
1616 | } | 1620 | } |
1617 | #else | 1621 | #else |
1618 | static inline void mm_populate(unsigned long addr, unsigned long len) {} | 1622 | static inline void mm_populate(unsigned long addr, unsigned long len) {} |
1619 | #endif | 1623 | #endif |
1620 | 1624 | ||
1621 | /* These take the mm semaphore themselves */ | 1625 | /* These take the mm semaphore themselves */ |
1622 | extern unsigned long vm_brk(unsigned long, unsigned long); | 1626 | extern unsigned long vm_brk(unsigned long, unsigned long); |
1623 | extern int vm_munmap(unsigned long, size_t); | 1627 | extern int vm_munmap(unsigned long, size_t); |
1624 | extern unsigned long vm_mmap(struct file *, unsigned long, | 1628 | extern unsigned long vm_mmap(struct file *, unsigned long, |
1625 | unsigned long, unsigned long, | 1629 | unsigned long, unsigned long, |
1626 | unsigned long, unsigned long); | 1630 | unsigned long, unsigned long); |
1627 | 1631 | ||
1628 | struct vm_unmapped_area_info { | 1632 | struct vm_unmapped_area_info { |
1629 | #define VM_UNMAPPED_AREA_TOPDOWN 1 | 1633 | #define VM_UNMAPPED_AREA_TOPDOWN 1 |
1630 | unsigned long flags; | 1634 | unsigned long flags; |
1631 | unsigned long length; | 1635 | unsigned long length; |
1632 | unsigned long low_limit; | 1636 | unsigned long low_limit; |
1633 | unsigned long high_limit; | 1637 | unsigned long high_limit; |
1634 | unsigned long align_mask; | 1638 | unsigned long align_mask; |
1635 | unsigned long align_offset; | 1639 | unsigned long align_offset; |
1636 | }; | 1640 | }; |
1637 | 1641 | ||
1638 | extern unsigned long unmapped_area(struct vm_unmapped_area_info *info); | 1642 | extern unsigned long unmapped_area(struct vm_unmapped_area_info *info); |
1639 | extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info); | 1643 | extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info); |
1640 | 1644 | ||
1641 | /* | 1645 | /* |
1642 | * Search for an unmapped address range. | 1646 | * Search for an unmapped address range. |
1643 | * | 1647 | * |
1644 | * We are looking for a range that: | 1648 | * We are looking for a range that: |
1645 | * - does not intersect with any VMA; | 1649 | * - does not intersect with any VMA; |
1646 | * - is contained within the [low_limit, high_limit) interval; | 1650 | * - is contained within the [low_limit, high_limit) interval; |
1647 | * - is at least the desired size. | 1651 | * - is at least the desired size. |
1648 | * - satisfies (begin_addr & align_mask) == (align_offset & align_mask) | 1652 | * - satisfies (begin_addr & align_mask) == (align_offset & align_mask) |
1649 | */ | 1653 | */ |
1650 | static inline unsigned long | 1654 | static inline unsigned long |
1651 | vm_unmapped_area(struct vm_unmapped_area_info *info) | 1655 | vm_unmapped_area(struct vm_unmapped_area_info *info) |
1652 | { | 1656 | { |
1653 | if (!(info->flags & VM_UNMAPPED_AREA_TOPDOWN)) | 1657 | if (!(info->flags & VM_UNMAPPED_AREA_TOPDOWN)) |
1654 | return unmapped_area(info); | 1658 | return unmapped_area(info); |
1655 | else | 1659 | else |
1656 | return unmapped_area_topdown(info); | 1660 | return unmapped_area_topdown(info); |
1657 | } | 1661 | } |
1658 | 1662 | ||
1659 | /* truncate.c */ | 1663 | /* truncate.c */ |
1660 | extern void truncate_inode_pages(struct address_space *, loff_t); | 1664 | extern void truncate_inode_pages(struct address_space *, loff_t); |
1661 | extern void truncate_inode_pages_range(struct address_space *, | 1665 | extern void truncate_inode_pages_range(struct address_space *, |
1662 | loff_t lstart, loff_t lend); | 1666 | loff_t lstart, loff_t lend); |
1663 | 1667 | ||
1664 | /* generic vm_area_ops exported for stackable file systems */ | 1668 | /* generic vm_area_ops exported for stackable file systems */ |
1665 | extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); | 1669 | extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); |
1666 | extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 1670 | extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
1667 | 1671 | ||
1668 | /* mm/page-writeback.c */ | 1672 | /* mm/page-writeback.c */ |
1669 | int write_one_page(struct page *page, int wait); | 1673 | int write_one_page(struct page *page, int wait); |
1670 | void task_dirty_inc(struct task_struct *tsk); | 1674 | void task_dirty_inc(struct task_struct *tsk); |
1671 | 1675 | ||
1672 | /* readahead.c */ | 1676 | /* readahead.c */ |
1673 | #define VM_MAX_READAHEAD 128 /* kbytes */ | 1677 | #define VM_MAX_READAHEAD 128 /* kbytes */ |
1674 | #define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */ | 1678 | #define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */ |
1675 | 1679 | ||
1676 | int force_page_cache_readahead(struct address_space *mapping, struct file *filp, | 1680 | int force_page_cache_readahead(struct address_space *mapping, struct file *filp, |
1677 | pgoff_t offset, unsigned long nr_to_read); | 1681 | pgoff_t offset, unsigned long nr_to_read); |
1678 | 1682 | ||
1679 | void page_cache_sync_readahead(struct address_space *mapping, | 1683 | void page_cache_sync_readahead(struct address_space *mapping, |
1680 | struct file_ra_state *ra, | 1684 | struct file_ra_state *ra, |
1681 | struct file *filp, | 1685 | struct file *filp, |
1682 | pgoff_t offset, | 1686 | pgoff_t offset, |
1683 | unsigned long size); | 1687 | unsigned long size); |
1684 | 1688 | ||
1685 | void page_cache_async_readahead(struct address_space *mapping, | 1689 | void page_cache_async_readahead(struct address_space *mapping, |
1686 | struct file_ra_state *ra, | 1690 | struct file_ra_state *ra, |
1687 | struct file *filp, | 1691 | struct file *filp, |
1688 | struct page *pg, | 1692 | struct page *pg, |
1689 | pgoff_t offset, | 1693 | pgoff_t offset, |
1690 | unsigned long size); | 1694 | unsigned long size); |
1691 | 1695 | ||
1692 | unsigned long max_sane_readahead(unsigned long nr); | 1696 | unsigned long max_sane_readahead(unsigned long nr); |
1693 | unsigned long ra_submit(struct file_ra_state *ra, | 1697 | unsigned long ra_submit(struct file_ra_state *ra, |
1694 | struct address_space *mapping, | 1698 | struct address_space *mapping, |
1695 | struct file *filp); | 1699 | struct file *filp); |
1696 | 1700 | ||
1697 | /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ | 1701 | /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ |
1698 | extern int expand_stack(struct vm_area_struct *vma, unsigned long address); | 1702 | extern int expand_stack(struct vm_area_struct *vma, unsigned long address); |
1699 | 1703 | ||
1700 | /* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */ | 1704 | /* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */ |
1701 | extern int expand_downwards(struct vm_area_struct *vma, | 1705 | extern int expand_downwards(struct vm_area_struct *vma, |
1702 | unsigned long address); | 1706 | unsigned long address); |
1703 | #if VM_GROWSUP | 1707 | #if VM_GROWSUP |
1704 | extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); | 1708 | extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); |
1705 | #else | 1709 | #else |
1706 | #define expand_upwards(vma, address) do { } while (0) | 1710 | #define expand_upwards(vma, address) do { } while (0) |
1707 | #endif | 1711 | #endif |
1708 | 1712 | ||
1709 | /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ | 1713 | /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ |
1710 | extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); | 1714 | extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); |
1711 | extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, | 1715 | extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, |
1712 | struct vm_area_struct **pprev); | 1716 | struct vm_area_struct **pprev); |
1713 | 1717 | ||
1714 | /* Look up the first VMA which intersects the interval start_addr..end_addr-1, | 1718 | /* Look up the first VMA which intersects the interval start_addr..end_addr-1, |
1715 | NULL if none. Assume start_addr < end_addr. */ | 1719 | NULL if none. Assume start_addr < end_addr. */ |
1716 | static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr) | 1720 | static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr) |
1717 | { | 1721 | { |
1718 | struct vm_area_struct * vma = find_vma(mm,start_addr); | 1722 | struct vm_area_struct * vma = find_vma(mm,start_addr); |
1719 | 1723 | ||
1720 | if (vma && end_addr <= vma->vm_start) | 1724 | if (vma && end_addr <= vma->vm_start) |
1721 | vma = NULL; | 1725 | vma = NULL; |
1722 | return vma; | 1726 | return vma; |
1723 | } | 1727 | } |
1724 | 1728 | ||
1725 | static inline unsigned long vma_pages(struct vm_area_struct *vma) | 1729 | static inline unsigned long vma_pages(struct vm_area_struct *vma) |
1726 | { | 1730 | { |
1727 | return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | 1731 | return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; |
1728 | } | 1732 | } |
1729 | 1733 | ||
1730 | /* Look up the first VMA which exactly match the interval vm_start ... vm_end */ | 1734 | /* Look up the first VMA which exactly match the interval vm_start ... vm_end */ |
1731 | static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, | 1735 | static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, |
1732 | unsigned long vm_start, unsigned long vm_end) | 1736 | unsigned long vm_start, unsigned long vm_end) |
1733 | { | 1737 | { |
1734 | struct vm_area_struct *vma = find_vma(mm, vm_start); | 1738 | struct vm_area_struct *vma = find_vma(mm, vm_start); |
1735 | 1739 | ||
1736 | if (vma && (vma->vm_start != vm_start || vma->vm_end != vm_end)) | 1740 | if (vma && (vma->vm_start != vm_start || vma->vm_end != vm_end)) |
1737 | vma = NULL; | 1741 | vma = NULL; |
1738 | 1742 | ||
1739 | return vma; | 1743 | return vma; |
1740 | } | 1744 | } |
1741 | 1745 | ||
1742 | #ifdef CONFIG_MMU | 1746 | #ifdef CONFIG_MMU |
1743 | pgprot_t vm_get_page_prot(unsigned long vm_flags); | 1747 | pgprot_t vm_get_page_prot(unsigned long vm_flags); |
1744 | #else | 1748 | #else |
1745 | static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) | 1749 | static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) |
1746 | { | 1750 | { |
1747 | return __pgprot(0); | 1751 | return __pgprot(0); |
1748 | } | 1752 | } |
1749 | #endif | 1753 | #endif |
1750 | 1754 | ||
1751 | #ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE | 1755 | #ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE |
1752 | unsigned long change_prot_numa(struct vm_area_struct *vma, | 1756 | unsigned long change_prot_numa(struct vm_area_struct *vma, |
1753 | unsigned long start, unsigned long end); | 1757 | unsigned long start, unsigned long end); |
1754 | #endif | 1758 | #endif |
1755 | 1759 | ||
1756 | struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); | 1760 | struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); |
1757 | int remap_pfn_range(struct vm_area_struct *, unsigned long addr, | 1761 | int remap_pfn_range(struct vm_area_struct *, unsigned long addr, |
1758 | unsigned long pfn, unsigned long size, pgprot_t); | 1762 | unsigned long pfn, unsigned long size, pgprot_t); |
1759 | int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); | 1763 | int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); |
1760 | int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, | 1764 | int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, |
1761 | unsigned long pfn); | 1765 | unsigned long pfn); |
1762 | int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, | 1766 | int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, |
1763 | unsigned long pfn); | 1767 | unsigned long pfn); |
1764 | int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); | 1768 | int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); |
1765 | 1769 | ||
1766 | 1770 | ||
1767 | struct page *follow_page_mask(struct vm_area_struct *vma, | 1771 | struct page *follow_page_mask(struct vm_area_struct *vma, |
1768 | unsigned long address, unsigned int foll_flags, | 1772 | unsigned long address, unsigned int foll_flags, |
1769 | unsigned int *page_mask); | 1773 | unsigned int *page_mask); |
1770 | 1774 | ||
1771 | static inline struct page *follow_page(struct vm_area_struct *vma, | 1775 | static inline struct page *follow_page(struct vm_area_struct *vma, |
1772 | unsigned long address, unsigned int foll_flags) | 1776 | unsigned long address, unsigned int foll_flags) |
1773 | { | 1777 | { |
1774 | unsigned int unused_page_mask; | 1778 | unsigned int unused_page_mask; |
1775 | return follow_page_mask(vma, address, foll_flags, &unused_page_mask); | 1779 | return follow_page_mask(vma, address, foll_flags, &unused_page_mask); |
1776 | } | 1780 | } |
1777 | 1781 | ||
1778 | #define FOLL_WRITE 0x01 /* check pte is writable */ | 1782 | #define FOLL_WRITE 0x01 /* check pte is writable */ |
1779 | #define FOLL_TOUCH 0x02 /* mark page accessed */ | 1783 | #define FOLL_TOUCH 0x02 /* mark page accessed */ |
1780 | #define FOLL_GET 0x04 /* do get_page on page */ | 1784 | #define FOLL_GET 0x04 /* do get_page on page */ |
1781 | #define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ | 1785 | #define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ |
1782 | #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ | 1786 | #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ |
1783 | #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO | 1787 | #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO |
1784 | * and return without waiting upon it */ | 1788 | * and return without waiting upon it */ |
1785 | #define FOLL_MLOCK 0x40 /* mark page as mlocked */ | 1789 | #define FOLL_MLOCK 0x40 /* mark page as mlocked */ |
1786 | #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ | 1790 | #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ |
1787 | #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ | 1791 | #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ |
1788 | #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ | 1792 | #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ |
1789 | #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ | 1793 | #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ |
1790 | 1794 | ||
1791 | typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, | 1795 | typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, |
1792 | void *data); | 1796 | void *data); |
1793 | extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, | 1797 | extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, |
1794 | unsigned long size, pte_fn_t fn, void *data); | 1798 | unsigned long size, pte_fn_t fn, void *data); |
1795 | 1799 | ||
1796 | #ifdef CONFIG_PROC_FS | 1800 | #ifdef CONFIG_PROC_FS |
1797 | void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long); | 1801 | void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long); |
1798 | #else | 1802 | #else |
1799 | static inline void vm_stat_account(struct mm_struct *mm, | 1803 | static inline void vm_stat_account(struct mm_struct *mm, |
1800 | unsigned long flags, struct file *file, long pages) | 1804 | unsigned long flags, struct file *file, long pages) |
1801 | { | 1805 | { |
1802 | mm->total_vm += pages; | 1806 | mm->total_vm += pages; |
1803 | } | 1807 | } |
1804 | #endif /* CONFIG_PROC_FS */ | 1808 | #endif /* CONFIG_PROC_FS */ |
1805 | 1809 | ||
1806 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1810 | #ifdef CONFIG_DEBUG_PAGEALLOC |
1807 | extern void kernel_map_pages(struct page *page, int numpages, int enable); | 1811 | extern void kernel_map_pages(struct page *page, int numpages, int enable); |
1808 | #ifdef CONFIG_HIBERNATION | 1812 | #ifdef CONFIG_HIBERNATION |
1809 | extern bool kernel_page_present(struct page *page); | 1813 | extern bool kernel_page_present(struct page *page); |
1810 | #endif /* CONFIG_HIBERNATION */ | 1814 | #endif /* CONFIG_HIBERNATION */ |
1811 | #else | 1815 | #else |
1812 | static inline void | 1816 | static inline void |
1813 | kernel_map_pages(struct page *page, int numpages, int enable) {} | 1817 | kernel_map_pages(struct page *page, int numpages, int enable) {} |
1814 | #ifdef CONFIG_HIBERNATION | 1818 | #ifdef CONFIG_HIBERNATION |
1815 | static inline bool kernel_page_present(struct page *page) { return true; } | 1819 | static inline bool kernel_page_present(struct page *page) { return true; } |
1816 | #endif /* CONFIG_HIBERNATION */ | 1820 | #endif /* CONFIG_HIBERNATION */ |
1817 | #endif | 1821 | #endif |
1818 | 1822 | ||
1819 | extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); | 1823 | extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); |
1820 | #ifdef __HAVE_ARCH_GATE_AREA | 1824 | #ifdef __HAVE_ARCH_GATE_AREA |
1821 | int in_gate_area_no_mm(unsigned long addr); | 1825 | int in_gate_area_no_mm(unsigned long addr); |
1822 | int in_gate_area(struct mm_struct *mm, unsigned long addr); | 1826 | int in_gate_area(struct mm_struct *mm, unsigned long addr); |
1823 | #else | 1827 | #else |
1824 | int in_gate_area_no_mm(unsigned long addr); | 1828 | int in_gate_area_no_mm(unsigned long addr); |
1825 | #define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);}) | 1829 | #define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);}) |
1826 | #endif /* __HAVE_ARCH_GATE_AREA */ | 1830 | #endif /* __HAVE_ARCH_GATE_AREA */ |
1827 | 1831 | ||
1828 | #ifdef CONFIG_SYSCTL | 1832 | #ifdef CONFIG_SYSCTL |
1829 | extern int sysctl_drop_caches; | 1833 | extern int sysctl_drop_caches; |
1830 | int drop_caches_sysctl_handler(struct ctl_table *, int, | 1834 | int drop_caches_sysctl_handler(struct ctl_table *, int, |
1831 | void __user *, size_t *, loff_t *); | 1835 | void __user *, size_t *, loff_t *); |
1832 | #endif | 1836 | #endif |
1833 | 1837 | ||
1834 | unsigned long shrink_slab(struct shrink_control *shrink, | 1838 | unsigned long shrink_slab(struct shrink_control *shrink, |
1835 | unsigned long nr_pages_scanned, | 1839 | unsigned long nr_pages_scanned, |
1836 | unsigned long lru_pages); | 1840 | unsigned long lru_pages); |
1837 | 1841 | ||
1838 | #ifndef CONFIG_MMU | 1842 | #ifndef CONFIG_MMU |
1839 | #define randomize_va_space 0 | 1843 | #define randomize_va_space 0 |
1840 | #else | 1844 | #else |
1841 | extern int randomize_va_space; | 1845 | extern int randomize_va_space; |
1842 | #endif | 1846 | #endif |
1843 | 1847 | ||
1844 | const char * arch_vma_name(struct vm_area_struct *vma); | 1848 | const char * arch_vma_name(struct vm_area_struct *vma); |
1845 | void print_vma_addr(char *prefix, unsigned long rip); | 1849 | void print_vma_addr(char *prefix, unsigned long rip); |
1846 | 1850 | ||
1847 | void sparse_mem_maps_populate_node(struct page **map_map, | 1851 | void sparse_mem_maps_populate_node(struct page **map_map, |
1848 | unsigned long pnum_begin, | 1852 | unsigned long pnum_begin, |
1849 | unsigned long pnum_end, | 1853 | unsigned long pnum_end, |
1850 | unsigned long map_count, | 1854 | unsigned long map_count, |
1851 | int nodeid); | 1855 | int nodeid); |
1852 | 1856 | ||
1853 | struct page *sparse_mem_map_populate(unsigned long pnum, int nid); | 1857 | struct page *sparse_mem_map_populate(unsigned long pnum, int nid); |
1854 | pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); | 1858 | pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); |
1855 | pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); | 1859 | pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); |
1856 | pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); | 1860 | pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); |
1857 | pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); | 1861 | pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); |
1858 | void *vmemmap_alloc_block(unsigned long size, int node); | 1862 | void *vmemmap_alloc_block(unsigned long size, int node); |
1859 | void *vmemmap_alloc_block_buf(unsigned long size, int node); | 1863 | void *vmemmap_alloc_block_buf(unsigned long size, int node); |
1860 | void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); | 1864 | void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); |
1861 | int vmemmap_populate_basepages(unsigned long start, unsigned long end, | 1865 | int vmemmap_populate_basepages(unsigned long start, unsigned long end, |
1862 | int node); | 1866 | int node); |
1863 | int vmemmap_populate(unsigned long start, unsigned long end, int node); | 1867 | int vmemmap_populate(unsigned long start, unsigned long end, int node); |
1864 | void vmemmap_populate_print_last(void); | 1868 | void vmemmap_populate_print_last(void); |
1865 | #ifdef CONFIG_MEMORY_HOTPLUG | 1869 | #ifdef CONFIG_MEMORY_HOTPLUG |
1866 | void vmemmap_free(unsigned long start, unsigned long end); | 1870 | void vmemmap_free(unsigned long start, unsigned long end); |
1867 | #endif | 1871 | #endif |
1868 | void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, | 1872 | void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, |
1869 | unsigned long size); | 1873 | unsigned long size); |
1870 | 1874 | ||
1871 | enum mf_flags { | 1875 | enum mf_flags { |
1872 | MF_COUNT_INCREASED = 1 << 0, | 1876 | MF_COUNT_INCREASED = 1 << 0, |
1873 | MF_ACTION_REQUIRED = 1 << 1, | 1877 | MF_ACTION_REQUIRED = 1 << 1, |
1874 | MF_MUST_KILL = 1 << 2, | 1878 | MF_MUST_KILL = 1 << 2, |
1875 | MF_SOFT_OFFLINE = 1 << 3, | 1879 | MF_SOFT_OFFLINE = 1 << 3, |
1876 | }; | 1880 | }; |
1877 | extern int memory_failure(unsigned long pfn, int trapno, int flags); | 1881 | extern int memory_failure(unsigned long pfn, int trapno, int flags); |
1878 | extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); | 1882 | extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); |
1879 | extern int unpoison_memory(unsigned long pfn); | 1883 | extern int unpoison_memory(unsigned long pfn); |
1880 | extern int sysctl_memory_failure_early_kill; | 1884 | extern int sysctl_memory_failure_early_kill; |
1881 | extern int sysctl_memory_failure_recovery; | 1885 | extern int sysctl_memory_failure_recovery; |
1882 | extern void shake_page(struct page *p, int access); | 1886 | extern void shake_page(struct page *p, int access); |
1883 | extern atomic_long_t num_poisoned_pages; | 1887 | extern atomic_long_t num_poisoned_pages; |
1884 | extern int soft_offline_page(struct page *page, int flags); | 1888 | extern int soft_offline_page(struct page *page, int flags); |
1885 | 1889 | ||
1886 | extern void dump_page(struct page *page); | 1890 | extern void dump_page(struct page *page); |
1887 | 1891 | ||
1888 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) | 1892 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) |
1889 | extern void clear_huge_page(struct page *page, | 1893 | extern void clear_huge_page(struct page *page, |
1890 | unsigned long addr, | 1894 | unsigned long addr, |
1891 | unsigned int pages_per_huge_page); | 1895 | unsigned int pages_per_huge_page); |
1892 | extern void copy_user_huge_page(struct page *dst, struct page *src, | 1896 | extern void copy_user_huge_page(struct page *dst, struct page *src, |
1893 | unsigned long addr, struct vm_area_struct *vma, | 1897 | unsigned long addr, struct vm_area_struct *vma, |
1894 | unsigned int pages_per_huge_page); | 1898 | unsigned int pages_per_huge_page); |
1895 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ | 1899 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ |
1896 | 1900 | ||
1897 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1901 | #ifdef CONFIG_DEBUG_PAGEALLOC |
1898 | extern unsigned int _debug_guardpage_minorder; | 1902 | extern unsigned int _debug_guardpage_minorder; |
1899 | 1903 | ||
1900 | static inline unsigned int debug_guardpage_minorder(void) | 1904 | static inline unsigned int debug_guardpage_minorder(void) |
1901 | { | 1905 | { |
1902 | return _debug_guardpage_minorder; | 1906 | return _debug_guardpage_minorder; |
1903 | } | 1907 | } |
1904 | 1908 | ||
1905 | static inline bool page_is_guard(struct page *page) | 1909 | static inline bool page_is_guard(struct page *page) |
1906 | { | 1910 | { |
1907 | return test_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags); | 1911 | return test_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags); |
1908 | } | 1912 | } |
1909 | #else | 1913 | #else |
1910 | static inline unsigned int debug_guardpage_minorder(void) { return 0; } | 1914 | static inline unsigned int debug_guardpage_minorder(void) { return 0; } |
1911 | static inline bool page_is_guard(struct page *page) { return false; } | 1915 | static inline bool page_is_guard(struct page *page) { return false; } |
1912 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | 1916 | #endif /* CONFIG_DEBUG_PAGEALLOC */ |
1913 | 1917 | ||
1914 | #if MAX_NUMNODES > 1 | 1918 | #if MAX_NUMNODES > 1 |
1915 | void __init setup_nr_node_ids(void); | 1919 | void __init setup_nr_node_ids(void); |
1916 | #else | 1920 | #else |
1917 | static inline void setup_nr_node_ids(void) {} | 1921 | static inline void setup_nr_node_ids(void) {} |
1918 | #endif | 1922 | #endif |
1919 | 1923 | ||
1920 | #endif /* __KERNEL__ */ | 1924 | #endif /* __KERNEL__ */ |
1921 | #endif /* _LINUX_MM_H */ | 1925 | #endif /* _LINUX_MM_H */ |
1922 | 1926 |
mm/memblock.c
1 | /* | 1 | /* |
2 | * Procedures for maintaining information about logical memory blocks. | 2 | * Procedures for maintaining information about logical memory blocks. |
3 | * | 3 | * |
4 | * Peter Bergner, IBM Corp. June 2001. | 4 | * Peter Bergner, IBM Corp. June 2001. |
5 | * Copyright (C) 2001 Peter Bergner. | 5 | * Copyright (C) 2001 Peter Bergner. |
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or | 7 | * This program is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU General Public License | 8 | * modify it under the terms of the GNU General Public License |
9 | * as published by the Free Software Foundation; either version | 9 | * as published by the Free Software Foundation; either version |
10 | * 2 of the License, or (at your option) any later version. | 10 | * 2 of the License, or (at your option) any later version. |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/bitops.h> | 16 | #include <linux/bitops.h> |
17 | #include <linux/poison.h> | 17 | #include <linux/poison.h> |
18 | #include <linux/pfn.h> | 18 | #include <linux/pfn.h> |
19 | #include <linux/debugfs.h> | 19 | #include <linux/debugfs.h> |
20 | #include <linux/seq_file.h> | 20 | #include <linux/seq_file.h> |
21 | #include <linux/memblock.h> | 21 | #include <linux/memblock.h> |
22 | 22 | ||
23 | #include <asm-generic/sections.h> | ||
24 | |||
23 | static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; | 25 | static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; |
24 | static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; | 26 | static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; |
25 | 27 | ||
26 | struct memblock memblock __initdata_memblock = { | 28 | struct memblock memblock __initdata_memblock = { |
27 | .memory.regions = memblock_memory_init_regions, | 29 | .memory.regions = memblock_memory_init_regions, |
28 | .memory.cnt = 1, /* empty dummy entry */ | 30 | .memory.cnt = 1, /* empty dummy entry */ |
29 | .memory.max = INIT_MEMBLOCK_REGIONS, | 31 | .memory.max = INIT_MEMBLOCK_REGIONS, |
30 | 32 | ||
31 | .reserved.regions = memblock_reserved_init_regions, | 33 | .reserved.regions = memblock_reserved_init_regions, |
32 | .reserved.cnt = 1, /* empty dummy entry */ | 34 | .reserved.cnt = 1, /* empty dummy entry */ |
33 | .reserved.max = INIT_MEMBLOCK_REGIONS, | 35 | .reserved.max = INIT_MEMBLOCK_REGIONS, |
34 | 36 | ||
37 | .bottom_up = false, | ||
35 | .current_limit = MEMBLOCK_ALLOC_ANYWHERE, | 38 | .current_limit = MEMBLOCK_ALLOC_ANYWHERE, |
36 | }; | 39 | }; |
37 | 40 | ||
38 | int memblock_debug __initdata_memblock; | 41 | int memblock_debug __initdata_memblock; |
39 | static int memblock_can_resize __initdata_memblock; | 42 | static int memblock_can_resize __initdata_memblock; |
40 | static int memblock_memory_in_slab __initdata_memblock = 0; | 43 | static int memblock_memory_in_slab __initdata_memblock = 0; |
41 | static int memblock_reserved_in_slab __initdata_memblock = 0; | 44 | static int memblock_reserved_in_slab __initdata_memblock = 0; |
42 | 45 | ||
43 | /* inline so we don't get a warning when pr_debug is compiled out */ | 46 | /* inline so we don't get a warning when pr_debug is compiled out */ |
44 | static __init_memblock const char * | 47 | static __init_memblock const char * |
45 | memblock_type_name(struct memblock_type *type) | 48 | memblock_type_name(struct memblock_type *type) |
46 | { | 49 | { |
47 | if (type == &memblock.memory) | 50 | if (type == &memblock.memory) |
48 | return "memory"; | 51 | return "memory"; |
49 | else if (type == &memblock.reserved) | 52 | else if (type == &memblock.reserved) |
50 | return "reserved"; | 53 | return "reserved"; |
51 | else | 54 | else |
52 | return "unknown"; | 55 | return "unknown"; |
53 | } | 56 | } |
54 | 57 | ||
55 | /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ | 58 | /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ |
56 | static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) | 59 | static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) |
57 | { | 60 | { |
58 | return *size = min(*size, (phys_addr_t)ULLONG_MAX - base); | 61 | return *size = min(*size, (phys_addr_t)ULLONG_MAX - base); |
59 | } | 62 | } |
60 | 63 | ||
61 | /* | 64 | /* |
62 | * Address comparison utilities | 65 | * Address comparison utilities |
63 | */ | 66 | */ |
64 | static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, | 67 | static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, |
65 | phys_addr_t base2, phys_addr_t size2) | 68 | phys_addr_t base2, phys_addr_t size2) |
66 | { | 69 | { |
67 | return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); | 70 | return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); |
68 | } | 71 | } |
69 | 72 | ||
70 | static long __init_memblock memblock_overlaps_region(struct memblock_type *type, | 73 | static long __init_memblock memblock_overlaps_region(struct memblock_type *type, |
71 | phys_addr_t base, phys_addr_t size) | 74 | phys_addr_t base, phys_addr_t size) |
72 | { | 75 | { |
73 | unsigned long i; | 76 | unsigned long i; |
74 | 77 | ||
75 | for (i = 0; i < type->cnt; i++) { | 78 | for (i = 0; i < type->cnt; i++) { |
76 | phys_addr_t rgnbase = type->regions[i].base; | 79 | phys_addr_t rgnbase = type->regions[i].base; |
77 | phys_addr_t rgnsize = type->regions[i].size; | 80 | phys_addr_t rgnsize = type->regions[i].size; |
78 | if (memblock_addrs_overlap(base, size, rgnbase, rgnsize)) | 81 | if (memblock_addrs_overlap(base, size, rgnbase, rgnsize)) |
79 | break; | 82 | break; |
80 | } | 83 | } |
81 | 84 | ||
82 | return (i < type->cnt) ? i : -1; | 85 | return (i < type->cnt) ? i : -1; |
83 | } | 86 | } |
84 | 87 | ||
88 | /* | ||
89 | * __memblock_find_range_bottom_up - find free area utility in bottom-up | ||
90 | * @start: start of candidate range | ||
91 | * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} | ||
92 | * @size: size of free area to find | ||
93 | * @align: alignment of free area to find | ||
94 | * @nid: nid of the free area to find, %MAX_NUMNODES for any node | ||
95 | * | ||
96 | * Utility called from memblock_find_in_range_node(), find free area bottom-up. | ||
97 | * | ||
98 | * RETURNS: | ||
99 | * Found address on success, 0 on failure. | ||
100 | */ | ||
101 | static phys_addr_t __init_memblock | ||
102 | __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end, | ||
103 | phys_addr_t size, phys_addr_t align, int nid) | ||
104 | { | ||
105 | phys_addr_t this_start, this_end, cand; | ||
106 | u64 i; | ||
107 | |||
108 | for_each_free_mem_range(i, nid, &this_start, &this_end, NULL) { | ||
109 | this_start = clamp(this_start, start, end); | ||
110 | this_end = clamp(this_end, start, end); | ||
111 | |||
112 | cand = round_up(this_start, align); | ||
113 | if (cand < this_end && this_end - cand >= size) | ||
114 | return cand; | ||
115 | } | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
85 | /** | 120 | /** |
86 | * __memblock_find_range_top_down - find free area utility, in top-down | 121 | * __memblock_find_range_top_down - find free area utility, in top-down |
87 | * @start: start of candidate range | 122 | * @start: start of candidate range |
88 | * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} | 123 | * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} |
89 | * @size: size of free area to find | 124 | * @size: size of free area to find |
90 | * @align: alignment of free area to find | 125 | * @align: alignment of free area to find |
91 | * @nid: nid of the free area to find, %MAX_NUMNODES for any node | 126 | * @nid: nid of the free area to find, %MAX_NUMNODES for any node |
92 | * | 127 | * |
93 | * Utility called from memblock_find_in_range_node(), find free area top-down. | 128 | * Utility called from memblock_find_in_range_node(), find free area top-down. |
94 | * | 129 | * |
95 | * RETURNS: | 130 | * RETURNS: |
96 | * Found address on success, %0 on failure. | 131 | * Found address on success, 0 on failure. |
97 | */ | 132 | */ |
98 | static phys_addr_t __init_memblock | 133 | static phys_addr_t __init_memblock |
99 | __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, | 134 | __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, |
100 | phys_addr_t size, phys_addr_t align, int nid) | 135 | phys_addr_t size, phys_addr_t align, int nid) |
101 | { | 136 | { |
102 | phys_addr_t this_start, this_end, cand; | 137 | phys_addr_t this_start, this_end, cand; |
103 | u64 i; | 138 | u64 i; |
104 | 139 | ||
105 | for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) { | 140 | for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) { |
106 | this_start = clamp(this_start, start, end); | 141 | this_start = clamp(this_start, start, end); |
107 | this_end = clamp(this_end, start, end); | 142 | this_end = clamp(this_end, start, end); |
108 | 143 | ||
109 | if (this_end < size) | 144 | if (this_end < size) |
110 | continue; | 145 | continue; |
111 | 146 | ||
112 | cand = round_down(this_end - size, align); | 147 | cand = round_down(this_end - size, align); |
113 | if (cand >= this_start) | 148 | if (cand >= this_start) |
114 | return cand; | 149 | return cand; |
115 | } | 150 | } |
116 | 151 | ||
117 | return 0; | 152 | return 0; |
118 | } | 153 | } |
119 | 154 | ||
120 | /** | 155 | /** |
121 | * memblock_find_in_range_node - find free area in given range and node | 156 | * memblock_find_in_range_node - find free area in given range and node |
122 | * @start: start of candidate range | 157 | * @start: start of candidate range |
123 | * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} | 158 | * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} |
124 | * @size: size of free area to find | 159 | * @size: size of free area to find |
125 | * @align: alignment of free area to find | 160 | * @align: alignment of free area to find |
126 | * @nid: nid of the free area to find, %MAX_NUMNODES for any node | 161 | * @nid: nid of the free area to find, %MAX_NUMNODES for any node |
127 | * | 162 | * |
128 | * Find @size free area aligned to @align in the specified range and node. | 163 | * Find @size free area aligned to @align in the specified range and node. |
129 | * | 164 | * |
165 | * When allocation direction is bottom-up, the @start should be greater | ||
166 | * than the end of the kernel image. Otherwise, it will be trimmed. The | ||
167 | * reason is that we want the bottom-up allocation just near the kernel | ||
168 | * image so it is highly likely that the allocated memory and the kernel | ||
169 | * will reside in the same node. | ||
170 | * | ||
171 | * If bottom-up allocation failed, will try to allocate memory top-down. | ||
172 | * | ||
130 | * RETURNS: | 173 | * RETURNS: |
131 | * Found address on success, %0 on failure. | 174 | * Found address on success, 0 on failure. |
132 | */ | 175 | */ |
133 | phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start, | 176 | phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start, |
134 | phys_addr_t end, phys_addr_t size, | 177 | phys_addr_t end, phys_addr_t size, |
135 | phys_addr_t align, int nid) | 178 | phys_addr_t align, int nid) |
136 | { | 179 | { |
180 | int ret; | ||
181 | phys_addr_t kernel_end; | ||
182 | |||
137 | /* pump up @end */ | 183 | /* pump up @end */ |
138 | if (end == MEMBLOCK_ALLOC_ACCESSIBLE) | 184 | if (end == MEMBLOCK_ALLOC_ACCESSIBLE) |
139 | end = memblock.current_limit; | 185 | end = memblock.current_limit; |
140 | 186 | ||
141 | /* avoid allocating the first page */ | 187 | /* avoid allocating the first page */ |
142 | start = max_t(phys_addr_t, start, PAGE_SIZE); | 188 | start = max_t(phys_addr_t, start, PAGE_SIZE); |
143 | end = max(start, end); | 189 | end = max(start, end); |
190 | kernel_end = __pa_symbol(_end); | ||
144 | 191 | ||
192 | /* | ||
193 | * try bottom-up allocation only when bottom-up mode | ||
194 | * is set and @end is above the kernel image. | ||
195 | */ | ||
196 | if (memblock_bottom_up() && end > kernel_end) { | ||
197 | phys_addr_t bottom_up_start; | ||
198 | |||
199 | /* make sure we will allocate above the kernel */ | ||
200 | bottom_up_start = max(start, kernel_end); | ||
201 | |||
202 | /* ok, try bottom-up allocation first */ | ||
203 | ret = __memblock_find_range_bottom_up(bottom_up_start, end, | ||
204 | size, align, nid); | ||
205 | if (ret) | ||
206 | return ret; | ||
207 | |||
208 | /* | ||
209 | * we always limit bottom-up allocation above the kernel, | ||
210 | * but top-down allocation doesn't have the limit, so | ||
211 | * retrying top-down allocation may succeed when bottom-up | ||
212 | * allocation failed. | ||
213 | * | ||
214 | * bottom-up allocation is expected to be fail very rarely, | ||
215 | * so we use WARN_ONCE() here to see the stack trace if | ||
216 | * fail happens. | ||
217 | */ | ||
218 | WARN_ONCE(1, "memblock: bottom-up allocation failed, " | ||
219 | "memory hotunplug may be affected\n"); | ||
220 | } | ||
221 | |||
145 | return __memblock_find_range_top_down(start, end, size, align, nid); | 222 | return __memblock_find_range_top_down(start, end, size, align, nid); |
146 | } | 223 | } |
147 | 224 | ||
148 | /** | 225 | /** |
149 | * memblock_find_in_range - find free area in given range | 226 | * memblock_find_in_range - find free area in given range |
150 | * @start: start of candidate range | 227 | * @start: start of candidate range |
151 | * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} | 228 | * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} |
152 | * @size: size of free area to find | 229 | * @size: size of free area to find |
153 | * @align: alignment of free area to find | 230 | * @align: alignment of free area to find |
154 | * | 231 | * |
155 | * Find @size free area aligned to @align in the specified range. | 232 | * Find @size free area aligned to @align in the specified range. |
156 | * | 233 | * |
157 | * RETURNS: | 234 | * RETURNS: |
158 | * Found address on success, %0 on failure. | 235 | * Found address on success, 0 on failure. |
159 | */ | 236 | */ |
160 | phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, | 237 | phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, |
161 | phys_addr_t end, phys_addr_t size, | 238 | phys_addr_t end, phys_addr_t size, |
162 | phys_addr_t align) | 239 | phys_addr_t align) |
163 | { | 240 | { |
164 | return memblock_find_in_range_node(start, end, size, align, | 241 | return memblock_find_in_range_node(start, end, size, align, |
165 | MAX_NUMNODES); | 242 | MAX_NUMNODES); |
166 | } | 243 | } |
167 | 244 | ||
168 | static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) | 245 | static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) |
169 | { | 246 | { |
170 | type->total_size -= type->regions[r].size; | 247 | type->total_size -= type->regions[r].size; |
171 | memmove(&type->regions[r], &type->regions[r + 1], | 248 | memmove(&type->regions[r], &type->regions[r + 1], |
172 | (type->cnt - (r + 1)) * sizeof(type->regions[r])); | 249 | (type->cnt - (r + 1)) * sizeof(type->regions[r])); |
173 | type->cnt--; | 250 | type->cnt--; |
174 | 251 | ||
175 | /* Special case for empty arrays */ | 252 | /* Special case for empty arrays */ |
176 | if (type->cnt == 0) { | 253 | if (type->cnt == 0) { |
177 | WARN_ON(type->total_size != 0); | 254 | WARN_ON(type->total_size != 0); |
178 | type->cnt = 1; | 255 | type->cnt = 1; |
179 | type->regions[0].base = 0; | 256 | type->regions[0].base = 0; |
180 | type->regions[0].size = 0; | 257 | type->regions[0].size = 0; |
181 | memblock_set_region_node(&type->regions[0], MAX_NUMNODES); | 258 | memblock_set_region_node(&type->regions[0], MAX_NUMNODES); |
182 | } | 259 | } |
183 | } | 260 | } |
184 | 261 | ||
185 | phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info( | 262 | phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info( |
186 | phys_addr_t *addr) | 263 | phys_addr_t *addr) |
187 | { | 264 | { |
188 | if (memblock.reserved.regions == memblock_reserved_init_regions) | 265 | if (memblock.reserved.regions == memblock_reserved_init_regions) |
189 | return 0; | 266 | return 0; |
190 | 267 | ||
191 | *addr = __pa(memblock.reserved.regions); | 268 | *addr = __pa(memblock.reserved.regions); |
192 | 269 | ||
193 | return PAGE_ALIGN(sizeof(struct memblock_region) * | 270 | return PAGE_ALIGN(sizeof(struct memblock_region) * |
194 | memblock.reserved.max); | 271 | memblock.reserved.max); |
195 | } | 272 | } |
196 | 273 | ||
197 | /** | 274 | /** |
198 | * memblock_double_array - double the size of the memblock regions array | 275 | * memblock_double_array - double the size of the memblock regions array |
199 | * @type: memblock type of the regions array being doubled | 276 | * @type: memblock type of the regions array being doubled |
200 | * @new_area_start: starting address of memory range to avoid overlap with | 277 | * @new_area_start: starting address of memory range to avoid overlap with |
201 | * @new_area_size: size of memory range to avoid overlap with | 278 | * @new_area_size: size of memory range to avoid overlap with |
202 | * | 279 | * |
203 | * Double the size of the @type regions array. If memblock is being used to | 280 | * Double the size of the @type regions array. If memblock is being used to |
204 | * allocate memory for a new reserved regions array and there is a previously | 281 | * allocate memory for a new reserved regions array and there is a previously |
205 | * allocated memory range [@new_area_start,@new_area_start+@new_area_size] | 282 | * allocated memory range [@new_area_start,@new_area_start+@new_area_size] |
206 | * waiting to be reserved, ensure the memory used by the new array does | 283 | * waiting to be reserved, ensure the memory used by the new array does |
207 | * not overlap. | 284 | * not overlap. |
208 | * | 285 | * |
209 | * RETURNS: | 286 | * RETURNS: |
210 | * 0 on success, -1 on failure. | 287 | * 0 on success, -1 on failure. |
211 | */ | 288 | */ |
212 | static int __init_memblock memblock_double_array(struct memblock_type *type, | 289 | static int __init_memblock memblock_double_array(struct memblock_type *type, |
213 | phys_addr_t new_area_start, | 290 | phys_addr_t new_area_start, |
214 | phys_addr_t new_area_size) | 291 | phys_addr_t new_area_size) |
215 | { | 292 | { |
216 | struct memblock_region *new_array, *old_array; | 293 | struct memblock_region *new_array, *old_array; |
217 | phys_addr_t old_alloc_size, new_alloc_size; | 294 | phys_addr_t old_alloc_size, new_alloc_size; |
218 | phys_addr_t old_size, new_size, addr; | 295 | phys_addr_t old_size, new_size, addr; |
219 | int use_slab = slab_is_available(); | 296 | int use_slab = slab_is_available(); |
220 | int *in_slab; | 297 | int *in_slab; |
221 | 298 | ||
222 | /* We don't allow resizing until we know about the reserved regions | 299 | /* We don't allow resizing until we know about the reserved regions |
223 | * of memory that aren't suitable for allocation | 300 | * of memory that aren't suitable for allocation |
224 | */ | 301 | */ |
225 | if (!memblock_can_resize) | 302 | if (!memblock_can_resize) |
226 | return -1; | 303 | return -1; |
227 | 304 | ||
228 | /* Calculate new doubled size */ | 305 | /* Calculate new doubled size */ |
229 | old_size = type->max * sizeof(struct memblock_region); | 306 | old_size = type->max * sizeof(struct memblock_region); |
230 | new_size = old_size << 1; | 307 | new_size = old_size << 1; |
231 | /* | 308 | /* |
232 | * We need to allocated new one align to PAGE_SIZE, | 309 | * We need to allocated new one align to PAGE_SIZE, |
233 | * so we can free them completely later. | 310 | * so we can free them completely later. |
234 | */ | 311 | */ |
235 | old_alloc_size = PAGE_ALIGN(old_size); | 312 | old_alloc_size = PAGE_ALIGN(old_size); |
236 | new_alloc_size = PAGE_ALIGN(new_size); | 313 | new_alloc_size = PAGE_ALIGN(new_size); |
237 | 314 | ||
238 | /* Retrieve the slab flag */ | 315 | /* Retrieve the slab flag */ |
239 | if (type == &memblock.memory) | 316 | if (type == &memblock.memory) |
240 | in_slab = &memblock_memory_in_slab; | 317 | in_slab = &memblock_memory_in_slab; |
241 | else | 318 | else |
242 | in_slab = &memblock_reserved_in_slab; | 319 | in_slab = &memblock_reserved_in_slab; |
243 | 320 | ||
244 | /* Try to find some space for it. | 321 | /* Try to find some space for it. |
245 | * | 322 | * |
246 | * WARNING: We assume that either slab_is_available() and we use it or | 323 | * WARNING: We assume that either slab_is_available() and we use it or |
247 | * we use MEMBLOCK for allocations. That means that this is unsafe to | 324 | * we use MEMBLOCK for allocations. That means that this is unsafe to |
248 | * use when bootmem is currently active (unless bootmem itself is | 325 | * use when bootmem is currently active (unless bootmem itself is |
249 | * implemented on top of MEMBLOCK which isn't the case yet) | 326 | * implemented on top of MEMBLOCK which isn't the case yet) |
250 | * | 327 | * |
251 | * This should however not be an issue for now, as we currently only | 328 | * This should however not be an issue for now, as we currently only |
252 | * call into MEMBLOCK while it's still active, or much later when slab | 329 | * call into MEMBLOCK while it's still active, or much later when slab |
253 | * is active for memory hotplug operations | 330 | * is active for memory hotplug operations |
254 | */ | 331 | */ |
255 | if (use_slab) { | 332 | if (use_slab) { |
256 | new_array = kmalloc(new_size, GFP_KERNEL); | 333 | new_array = kmalloc(new_size, GFP_KERNEL); |
257 | addr = new_array ? __pa(new_array) : 0; | 334 | addr = new_array ? __pa(new_array) : 0; |
258 | } else { | 335 | } else { |
259 | /* only exclude range when trying to double reserved.regions */ | 336 | /* only exclude range when trying to double reserved.regions */ |
260 | if (type != &memblock.reserved) | 337 | if (type != &memblock.reserved) |
261 | new_area_start = new_area_size = 0; | 338 | new_area_start = new_area_size = 0; |
262 | 339 | ||
263 | addr = memblock_find_in_range(new_area_start + new_area_size, | 340 | addr = memblock_find_in_range(new_area_start + new_area_size, |
264 | memblock.current_limit, | 341 | memblock.current_limit, |
265 | new_alloc_size, PAGE_SIZE); | 342 | new_alloc_size, PAGE_SIZE); |
266 | if (!addr && new_area_size) | 343 | if (!addr && new_area_size) |
267 | addr = memblock_find_in_range(0, | 344 | addr = memblock_find_in_range(0, |
268 | min(new_area_start, memblock.current_limit), | 345 | min(new_area_start, memblock.current_limit), |
269 | new_alloc_size, PAGE_SIZE); | 346 | new_alloc_size, PAGE_SIZE); |
270 | 347 | ||
271 | new_array = addr ? __va(addr) : NULL; | 348 | new_array = addr ? __va(addr) : NULL; |
272 | } | 349 | } |
273 | if (!addr) { | 350 | if (!addr) { |
274 | pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", | 351 | pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", |
275 | memblock_type_name(type), type->max, type->max * 2); | 352 | memblock_type_name(type), type->max, type->max * 2); |
276 | return -1; | 353 | return -1; |
277 | } | 354 | } |
278 | 355 | ||
279 | memblock_dbg("memblock: %s is doubled to %ld at [%#010llx-%#010llx]", | 356 | memblock_dbg("memblock: %s is doubled to %ld at [%#010llx-%#010llx]", |
280 | memblock_type_name(type), type->max * 2, (u64)addr, | 357 | memblock_type_name(type), type->max * 2, (u64)addr, |
281 | (u64)addr + new_size - 1); | 358 | (u64)addr + new_size - 1); |
282 | 359 | ||
283 | /* | 360 | /* |
284 | * Found space, we now need to move the array over before we add the | 361 | * Found space, we now need to move the array over before we add the |
285 | * reserved region since it may be our reserved array itself that is | 362 | * reserved region since it may be our reserved array itself that is |
286 | * full. | 363 | * full. |
287 | */ | 364 | */ |
288 | memcpy(new_array, type->regions, old_size); | 365 | memcpy(new_array, type->regions, old_size); |
289 | memset(new_array + type->max, 0, old_size); | 366 | memset(new_array + type->max, 0, old_size); |
290 | old_array = type->regions; | 367 | old_array = type->regions; |
291 | type->regions = new_array; | 368 | type->regions = new_array; |
292 | type->max <<= 1; | 369 | type->max <<= 1; |
293 | 370 | ||
294 | /* Free old array. We needn't free it if the array is the static one */ | 371 | /* Free old array. We needn't free it if the array is the static one */ |
295 | if (*in_slab) | 372 | if (*in_slab) |
296 | kfree(old_array); | 373 | kfree(old_array); |
297 | else if (old_array != memblock_memory_init_regions && | 374 | else if (old_array != memblock_memory_init_regions && |
298 | old_array != memblock_reserved_init_regions) | 375 | old_array != memblock_reserved_init_regions) |
299 | memblock_free(__pa(old_array), old_alloc_size); | 376 | memblock_free(__pa(old_array), old_alloc_size); |
300 | 377 | ||
301 | /* | 378 | /* |
302 | * Reserve the new array if that comes from the memblock. Otherwise, we | 379 | * Reserve the new array if that comes from the memblock. Otherwise, we |
303 | * needn't do it | 380 | * needn't do it |
304 | */ | 381 | */ |
305 | if (!use_slab) | 382 | if (!use_slab) |
306 | BUG_ON(memblock_reserve(addr, new_alloc_size)); | 383 | BUG_ON(memblock_reserve(addr, new_alloc_size)); |
307 | 384 | ||
308 | /* Update slab flag */ | 385 | /* Update slab flag */ |
309 | *in_slab = use_slab; | 386 | *in_slab = use_slab; |
310 | 387 | ||
311 | return 0; | 388 | return 0; |
312 | } | 389 | } |
313 | 390 | ||
314 | /** | 391 | /** |
315 | * memblock_merge_regions - merge neighboring compatible regions | 392 | * memblock_merge_regions - merge neighboring compatible regions |
316 | * @type: memblock type to scan | 393 | * @type: memblock type to scan |
317 | * | 394 | * |
318 | * Scan @type and merge neighboring compatible regions. | 395 | * Scan @type and merge neighboring compatible regions. |
319 | */ | 396 | */ |
320 | static void __init_memblock memblock_merge_regions(struct memblock_type *type) | 397 | static void __init_memblock memblock_merge_regions(struct memblock_type *type) |
321 | { | 398 | { |
322 | int i = 0; | 399 | int i = 0; |
323 | 400 | ||
324 | /* cnt never goes below 1 */ | 401 | /* cnt never goes below 1 */ |
325 | while (i < type->cnt - 1) { | 402 | while (i < type->cnt - 1) { |
326 | struct memblock_region *this = &type->regions[i]; | 403 | struct memblock_region *this = &type->regions[i]; |
327 | struct memblock_region *next = &type->regions[i + 1]; | 404 | struct memblock_region *next = &type->regions[i + 1]; |
328 | 405 | ||
329 | if (this->base + this->size != next->base || | 406 | if (this->base + this->size != next->base || |
330 | memblock_get_region_node(this) != | 407 | memblock_get_region_node(this) != |
331 | memblock_get_region_node(next)) { | 408 | memblock_get_region_node(next)) { |
332 | BUG_ON(this->base + this->size > next->base); | 409 | BUG_ON(this->base + this->size > next->base); |
333 | i++; | 410 | i++; |
334 | continue; | 411 | continue; |
335 | } | 412 | } |
336 | 413 | ||
337 | this->size += next->size; | 414 | this->size += next->size; |
338 | /* move forward from next + 1, index of which is i + 2 */ | 415 | /* move forward from next + 1, index of which is i + 2 */ |
339 | memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); | 416 | memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); |
340 | type->cnt--; | 417 | type->cnt--; |
341 | } | 418 | } |
342 | } | 419 | } |
343 | 420 | ||
344 | /** | 421 | /** |
345 | * memblock_insert_region - insert new memblock region | 422 | * memblock_insert_region - insert new memblock region |
346 | * @type: memblock type to insert into | 423 | * @type: memblock type to insert into |
347 | * @idx: index for the insertion point | 424 | * @idx: index for the insertion point |
348 | * @base: base address of the new region | 425 | * @base: base address of the new region |
349 | * @size: size of the new region | 426 | * @size: size of the new region |
350 | * @nid: node id of the new region | 427 | * @nid: node id of the new region |
351 | * | 428 | * |
352 | * Insert new memblock region [@base,@base+@size) into @type at @idx. | 429 | * Insert new memblock region [@base,@base+@size) into @type at @idx. |
353 | * @type must already have extra room to accomodate the new region. | 430 | * @type must already have extra room to accomodate the new region. |
354 | */ | 431 | */ |
355 | static void __init_memblock memblock_insert_region(struct memblock_type *type, | 432 | static void __init_memblock memblock_insert_region(struct memblock_type *type, |
356 | int idx, phys_addr_t base, | 433 | int idx, phys_addr_t base, |
357 | phys_addr_t size, int nid) | 434 | phys_addr_t size, int nid) |
358 | { | 435 | { |
359 | struct memblock_region *rgn = &type->regions[idx]; | 436 | struct memblock_region *rgn = &type->regions[idx]; |
360 | 437 | ||
361 | BUG_ON(type->cnt >= type->max); | 438 | BUG_ON(type->cnt >= type->max); |
362 | memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); | 439 | memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); |
363 | rgn->base = base; | 440 | rgn->base = base; |
364 | rgn->size = size; | 441 | rgn->size = size; |
365 | memblock_set_region_node(rgn, nid); | 442 | memblock_set_region_node(rgn, nid); |
366 | type->cnt++; | 443 | type->cnt++; |
367 | type->total_size += size; | 444 | type->total_size += size; |
368 | } | 445 | } |
369 | 446 | ||
370 | /** | 447 | /** |
371 | * memblock_add_region - add new memblock region | 448 | * memblock_add_region - add new memblock region |
372 | * @type: memblock type to add new region into | 449 | * @type: memblock type to add new region into |
373 | * @base: base address of the new region | 450 | * @base: base address of the new region |
374 | * @size: size of the new region | 451 | * @size: size of the new region |
375 | * @nid: nid of the new region | 452 | * @nid: nid of the new region |
376 | * | 453 | * |
377 | * Add new memblock region [@base,@base+@size) into @type. The new region | 454 | * Add new memblock region [@base,@base+@size) into @type. The new region |
378 | * is allowed to overlap with existing ones - overlaps don't affect already | 455 | * is allowed to overlap with existing ones - overlaps don't affect already |
379 | * existing regions. @type is guaranteed to be minimal (all neighbouring | 456 | * existing regions. @type is guaranteed to be minimal (all neighbouring |
380 | * compatible regions are merged) after the addition. | 457 | * compatible regions are merged) after the addition. |
381 | * | 458 | * |
382 | * RETURNS: | 459 | * RETURNS: |
383 | * 0 on success, -errno on failure. | 460 | * 0 on success, -errno on failure. |
384 | */ | 461 | */ |
385 | static int __init_memblock memblock_add_region(struct memblock_type *type, | 462 | static int __init_memblock memblock_add_region(struct memblock_type *type, |
386 | phys_addr_t base, phys_addr_t size, int nid) | 463 | phys_addr_t base, phys_addr_t size, int nid) |
387 | { | 464 | { |
388 | bool insert = false; | 465 | bool insert = false; |
389 | phys_addr_t obase = base; | 466 | phys_addr_t obase = base; |
390 | phys_addr_t end = base + memblock_cap_size(base, &size); | 467 | phys_addr_t end = base + memblock_cap_size(base, &size); |
391 | int i, nr_new; | 468 | int i, nr_new; |
392 | 469 | ||
393 | if (!size) | 470 | if (!size) |
394 | return 0; | 471 | return 0; |
395 | 472 | ||
396 | /* special case for empty array */ | 473 | /* special case for empty array */ |
397 | if (type->regions[0].size == 0) { | 474 | if (type->regions[0].size == 0) { |
398 | WARN_ON(type->cnt != 1 || type->total_size); | 475 | WARN_ON(type->cnt != 1 || type->total_size); |
399 | type->regions[0].base = base; | 476 | type->regions[0].base = base; |
400 | type->regions[0].size = size; | 477 | type->regions[0].size = size; |
401 | memblock_set_region_node(&type->regions[0], nid); | 478 | memblock_set_region_node(&type->regions[0], nid); |
402 | type->total_size = size; | 479 | type->total_size = size; |
403 | return 0; | 480 | return 0; |
404 | } | 481 | } |
405 | repeat: | 482 | repeat: |
406 | /* | 483 | /* |
407 | * The following is executed twice. Once with %false @insert and | 484 | * The following is executed twice. Once with %false @insert and |
408 | * then with %true. The first counts the number of regions needed | 485 | * then with %true. The first counts the number of regions needed |
409 | * to accomodate the new area. The second actually inserts them. | 486 | * to accomodate the new area. The second actually inserts them. |
410 | */ | 487 | */ |
411 | base = obase; | 488 | base = obase; |
412 | nr_new = 0; | 489 | nr_new = 0; |
413 | 490 | ||
414 | for (i = 0; i < type->cnt; i++) { | 491 | for (i = 0; i < type->cnt; i++) { |
415 | struct memblock_region *rgn = &type->regions[i]; | 492 | struct memblock_region *rgn = &type->regions[i]; |
416 | phys_addr_t rbase = rgn->base; | 493 | phys_addr_t rbase = rgn->base; |
417 | phys_addr_t rend = rbase + rgn->size; | 494 | phys_addr_t rend = rbase + rgn->size; |
418 | 495 | ||
419 | if (rbase >= end) | 496 | if (rbase >= end) |
420 | break; | 497 | break; |
421 | if (rend <= base) | 498 | if (rend <= base) |
422 | continue; | 499 | continue; |
423 | /* | 500 | /* |
424 | * @rgn overlaps. If it separates the lower part of new | 501 | * @rgn overlaps. If it separates the lower part of new |
425 | * area, insert that portion. | 502 | * area, insert that portion. |
426 | */ | 503 | */ |
427 | if (rbase > base) { | 504 | if (rbase > base) { |
428 | nr_new++; | 505 | nr_new++; |
429 | if (insert) | 506 | if (insert) |
430 | memblock_insert_region(type, i++, base, | 507 | memblock_insert_region(type, i++, base, |
431 | rbase - base, nid); | 508 | rbase - base, nid); |
432 | } | 509 | } |
433 | /* area below @rend is dealt with, forget about it */ | 510 | /* area below @rend is dealt with, forget about it */ |
434 | base = min(rend, end); | 511 | base = min(rend, end); |
435 | } | 512 | } |
436 | 513 | ||
437 | /* insert the remaining portion */ | 514 | /* insert the remaining portion */ |
438 | if (base < end) { | 515 | if (base < end) { |
439 | nr_new++; | 516 | nr_new++; |
440 | if (insert) | 517 | if (insert) |
441 | memblock_insert_region(type, i, base, end - base, nid); | 518 | memblock_insert_region(type, i, base, end - base, nid); |
442 | } | 519 | } |
443 | 520 | ||
444 | /* | 521 | /* |
445 | * If this was the first round, resize array and repeat for actual | 522 | * If this was the first round, resize array and repeat for actual |
446 | * insertions; otherwise, merge and return. | 523 | * insertions; otherwise, merge and return. |
447 | */ | 524 | */ |
448 | if (!insert) { | 525 | if (!insert) { |
449 | while (type->cnt + nr_new > type->max) | 526 | while (type->cnt + nr_new > type->max) |
450 | if (memblock_double_array(type, obase, size) < 0) | 527 | if (memblock_double_array(type, obase, size) < 0) |
451 | return -ENOMEM; | 528 | return -ENOMEM; |
452 | insert = true; | 529 | insert = true; |
453 | goto repeat; | 530 | goto repeat; |
454 | } else { | 531 | } else { |
455 | memblock_merge_regions(type); | 532 | memblock_merge_regions(type); |
456 | return 0; | 533 | return 0; |
457 | } | 534 | } |
458 | } | 535 | } |
459 | 536 | ||
460 | int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, | 537 | int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, |
461 | int nid) | 538 | int nid) |
462 | { | 539 | { |
463 | return memblock_add_region(&memblock.memory, base, size, nid); | 540 | return memblock_add_region(&memblock.memory, base, size, nid); |
464 | } | 541 | } |
465 | 542 | ||
466 | int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) | 543 | int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) |
467 | { | 544 | { |
468 | return memblock_add_region(&memblock.memory, base, size, MAX_NUMNODES); | 545 | return memblock_add_region(&memblock.memory, base, size, MAX_NUMNODES); |
469 | } | 546 | } |
470 | 547 | ||
471 | /** | 548 | /** |
472 | * memblock_isolate_range - isolate given range into disjoint memblocks | 549 | * memblock_isolate_range - isolate given range into disjoint memblocks |
473 | * @type: memblock type to isolate range for | 550 | * @type: memblock type to isolate range for |
474 | * @base: base of range to isolate | 551 | * @base: base of range to isolate |
475 | * @size: size of range to isolate | 552 | * @size: size of range to isolate |
476 | * @start_rgn: out parameter for the start of isolated region | 553 | * @start_rgn: out parameter for the start of isolated region |
477 | * @end_rgn: out parameter for the end of isolated region | 554 | * @end_rgn: out parameter for the end of isolated region |
478 | * | 555 | * |
479 | * Walk @type and ensure that regions don't cross the boundaries defined by | 556 | * Walk @type and ensure that regions don't cross the boundaries defined by |
480 | * [@base,@base+@size). Crossing regions are split at the boundaries, | 557 | * [@base,@base+@size). Crossing regions are split at the boundaries, |
481 | * which may create at most two more regions. The index of the first | 558 | * which may create at most two more regions. The index of the first |
482 | * region inside the range is returned in *@start_rgn and end in *@end_rgn. | 559 | * region inside the range is returned in *@start_rgn and end in *@end_rgn. |
483 | * | 560 | * |
484 | * RETURNS: | 561 | * RETURNS: |
485 | * 0 on success, -errno on failure. | 562 | * 0 on success, -errno on failure. |
486 | */ | 563 | */ |
487 | static int __init_memblock memblock_isolate_range(struct memblock_type *type, | 564 | static int __init_memblock memblock_isolate_range(struct memblock_type *type, |
488 | phys_addr_t base, phys_addr_t size, | 565 | phys_addr_t base, phys_addr_t size, |
489 | int *start_rgn, int *end_rgn) | 566 | int *start_rgn, int *end_rgn) |
490 | { | 567 | { |
491 | phys_addr_t end = base + memblock_cap_size(base, &size); | 568 | phys_addr_t end = base + memblock_cap_size(base, &size); |
492 | int i; | 569 | int i; |
493 | 570 | ||
494 | *start_rgn = *end_rgn = 0; | 571 | *start_rgn = *end_rgn = 0; |
495 | 572 | ||
496 | if (!size) | 573 | if (!size) |
497 | return 0; | 574 | return 0; |
498 | 575 | ||
499 | /* we'll create at most two more regions */ | 576 | /* we'll create at most two more regions */ |
500 | while (type->cnt + 2 > type->max) | 577 | while (type->cnt + 2 > type->max) |
501 | if (memblock_double_array(type, base, size) < 0) | 578 | if (memblock_double_array(type, base, size) < 0) |
502 | return -ENOMEM; | 579 | return -ENOMEM; |
503 | 580 | ||
504 | for (i = 0; i < type->cnt; i++) { | 581 | for (i = 0; i < type->cnt; i++) { |
505 | struct memblock_region *rgn = &type->regions[i]; | 582 | struct memblock_region *rgn = &type->regions[i]; |
506 | phys_addr_t rbase = rgn->base; | 583 | phys_addr_t rbase = rgn->base; |
507 | phys_addr_t rend = rbase + rgn->size; | 584 | phys_addr_t rend = rbase + rgn->size; |
508 | 585 | ||
509 | if (rbase >= end) | 586 | if (rbase >= end) |
510 | break; | 587 | break; |
511 | if (rend <= base) | 588 | if (rend <= base) |
512 | continue; | 589 | continue; |
513 | 590 | ||
514 | if (rbase < base) { | 591 | if (rbase < base) { |
515 | /* | 592 | /* |
516 | * @rgn intersects from below. Split and continue | 593 | * @rgn intersects from below. Split and continue |
517 | * to process the next region - the new top half. | 594 | * to process the next region - the new top half. |
518 | */ | 595 | */ |
519 | rgn->base = base; | 596 | rgn->base = base; |
520 | rgn->size -= base - rbase; | 597 | rgn->size -= base - rbase; |
521 | type->total_size -= base - rbase; | 598 | type->total_size -= base - rbase; |
522 | memblock_insert_region(type, i, rbase, base - rbase, | 599 | memblock_insert_region(type, i, rbase, base - rbase, |
523 | memblock_get_region_node(rgn)); | 600 | memblock_get_region_node(rgn)); |
524 | } else if (rend > end) { | 601 | } else if (rend > end) { |
525 | /* | 602 | /* |
526 | * @rgn intersects from above. Split and redo the | 603 | * @rgn intersects from above. Split and redo the |
527 | * current region - the new bottom half. | 604 | * current region - the new bottom half. |
528 | */ | 605 | */ |
529 | rgn->base = end; | 606 | rgn->base = end; |
530 | rgn->size -= end - rbase; | 607 | rgn->size -= end - rbase; |
531 | type->total_size -= end - rbase; | 608 | type->total_size -= end - rbase; |
532 | memblock_insert_region(type, i--, rbase, end - rbase, | 609 | memblock_insert_region(type, i--, rbase, end - rbase, |
533 | memblock_get_region_node(rgn)); | 610 | memblock_get_region_node(rgn)); |
534 | } else { | 611 | } else { |
535 | /* @rgn is fully contained, record it */ | 612 | /* @rgn is fully contained, record it */ |
536 | if (!*end_rgn) | 613 | if (!*end_rgn) |
537 | *start_rgn = i; | 614 | *start_rgn = i; |
538 | *end_rgn = i + 1; | 615 | *end_rgn = i + 1; |
539 | } | 616 | } |
540 | } | 617 | } |
541 | 618 | ||
542 | return 0; | 619 | return 0; |
543 | } | 620 | } |
544 | 621 | ||
545 | static int __init_memblock __memblock_remove(struct memblock_type *type, | 622 | static int __init_memblock __memblock_remove(struct memblock_type *type, |
546 | phys_addr_t base, phys_addr_t size) | 623 | phys_addr_t base, phys_addr_t size) |
547 | { | 624 | { |
548 | int start_rgn, end_rgn; | 625 | int start_rgn, end_rgn; |
549 | int i, ret; | 626 | int i, ret; |
550 | 627 | ||
551 | ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); | 628 | ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); |
552 | if (ret) | 629 | if (ret) |
553 | return ret; | 630 | return ret; |
554 | 631 | ||
555 | for (i = end_rgn - 1; i >= start_rgn; i--) | 632 | for (i = end_rgn - 1; i >= start_rgn; i--) |
556 | memblock_remove_region(type, i); | 633 | memblock_remove_region(type, i); |
557 | return 0; | 634 | return 0; |
558 | } | 635 | } |
559 | 636 | ||
560 | int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) | 637 | int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) |
561 | { | 638 | { |
562 | return __memblock_remove(&memblock.memory, base, size); | 639 | return __memblock_remove(&memblock.memory, base, size); |
563 | } | 640 | } |
564 | 641 | ||
565 | int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) | 642 | int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) |
566 | { | 643 | { |
567 | memblock_dbg(" memblock_free: [%#016llx-%#016llx] %pF\n", | 644 | memblock_dbg(" memblock_free: [%#016llx-%#016llx] %pF\n", |
568 | (unsigned long long)base, | 645 | (unsigned long long)base, |
569 | (unsigned long long)base + size, | 646 | (unsigned long long)base + size, |
570 | (void *)_RET_IP_); | 647 | (void *)_RET_IP_); |
571 | 648 | ||
572 | return __memblock_remove(&memblock.reserved, base, size); | 649 | return __memblock_remove(&memblock.reserved, base, size); |
573 | } | 650 | } |
574 | 651 | ||
575 | int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) | 652 | int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) |
576 | { | 653 | { |
577 | struct memblock_type *_rgn = &memblock.reserved; | 654 | struct memblock_type *_rgn = &memblock.reserved; |
578 | 655 | ||
579 | memblock_dbg("memblock_reserve: [%#016llx-%#016llx] %pF\n", | 656 | memblock_dbg("memblock_reserve: [%#016llx-%#016llx] %pF\n", |
580 | (unsigned long long)base, | 657 | (unsigned long long)base, |
581 | (unsigned long long)base + size, | 658 | (unsigned long long)base + size, |
582 | (void *)_RET_IP_); | 659 | (void *)_RET_IP_); |
583 | 660 | ||
584 | return memblock_add_region(_rgn, base, size, MAX_NUMNODES); | 661 | return memblock_add_region(_rgn, base, size, MAX_NUMNODES); |
585 | } | 662 | } |
586 | 663 | ||
587 | /** | 664 | /** |
588 | * __next_free_mem_range - next function for for_each_free_mem_range() | 665 | * __next_free_mem_range - next function for for_each_free_mem_range() |
589 | * @idx: pointer to u64 loop variable | 666 | * @idx: pointer to u64 loop variable |
590 | * @nid: node selector, %MAX_NUMNODES for all nodes | 667 | * @nid: node selector, %MAX_NUMNODES for all nodes |
591 | * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL | 668 | * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL |
592 | * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL | 669 | * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL |
593 | * @out_nid: ptr to int for nid of the range, can be %NULL | 670 | * @out_nid: ptr to int for nid of the range, can be %NULL |
594 | * | 671 | * |
595 | * Find the first free area from *@idx which matches @nid, fill the out | 672 | * Find the first free area from *@idx which matches @nid, fill the out |
596 | * parameters, and update *@idx for the next iteration. The lower 32bit of | 673 | * parameters, and update *@idx for the next iteration. The lower 32bit of |
597 | * *@idx contains index into memory region and the upper 32bit indexes the | 674 | * *@idx contains index into memory region and the upper 32bit indexes the |
598 | * areas before each reserved region. For example, if reserved regions | 675 | * areas before each reserved region. For example, if reserved regions |
599 | * look like the following, | 676 | * look like the following, |
600 | * | 677 | * |
601 | * 0:[0-16), 1:[32-48), 2:[128-130) | 678 | * 0:[0-16), 1:[32-48), 2:[128-130) |
602 | * | 679 | * |
603 | * The upper 32bit indexes the following regions. | 680 | * The upper 32bit indexes the following regions. |
604 | * | 681 | * |
605 | * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX) | 682 | * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX) |
606 | * | 683 | * |
607 | * As both region arrays are sorted, the function advances the two indices | 684 | * As both region arrays are sorted, the function advances the two indices |
608 | * in lockstep and returns each intersection. | 685 | * in lockstep and returns each intersection. |
609 | */ | 686 | */ |
610 | void __init_memblock __next_free_mem_range(u64 *idx, int nid, | 687 | void __init_memblock __next_free_mem_range(u64 *idx, int nid, |
611 | phys_addr_t *out_start, | 688 | phys_addr_t *out_start, |
612 | phys_addr_t *out_end, int *out_nid) | 689 | phys_addr_t *out_end, int *out_nid) |
613 | { | 690 | { |
614 | struct memblock_type *mem = &memblock.memory; | 691 | struct memblock_type *mem = &memblock.memory; |
615 | struct memblock_type *rsv = &memblock.reserved; | 692 | struct memblock_type *rsv = &memblock.reserved; |
616 | int mi = *idx & 0xffffffff; | 693 | int mi = *idx & 0xffffffff; |
617 | int ri = *idx >> 32; | 694 | int ri = *idx >> 32; |
618 | 695 | ||
619 | for ( ; mi < mem->cnt; mi++) { | 696 | for ( ; mi < mem->cnt; mi++) { |
620 | struct memblock_region *m = &mem->regions[mi]; | 697 | struct memblock_region *m = &mem->regions[mi]; |
621 | phys_addr_t m_start = m->base; | 698 | phys_addr_t m_start = m->base; |
622 | phys_addr_t m_end = m->base + m->size; | 699 | phys_addr_t m_end = m->base + m->size; |
623 | 700 | ||
624 | /* only memory regions are associated with nodes, check it */ | 701 | /* only memory regions are associated with nodes, check it */ |
625 | if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m)) | 702 | if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m)) |
626 | continue; | 703 | continue; |
627 | 704 | ||
628 | /* scan areas before each reservation for intersection */ | 705 | /* scan areas before each reservation for intersection */ |
629 | for ( ; ri < rsv->cnt + 1; ri++) { | 706 | for ( ; ri < rsv->cnt + 1; ri++) { |
630 | struct memblock_region *r = &rsv->regions[ri]; | 707 | struct memblock_region *r = &rsv->regions[ri]; |
631 | phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0; | 708 | phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0; |
632 | phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX; | 709 | phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX; |
633 | 710 | ||
634 | /* if ri advanced past mi, break out to advance mi */ | 711 | /* if ri advanced past mi, break out to advance mi */ |
635 | if (r_start >= m_end) | 712 | if (r_start >= m_end) |
636 | break; | 713 | break; |
637 | /* if the two regions intersect, we're done */ | 714 | /* if the two regions intersect, we're done */ |
638 | if (m_start < r_end) { | 715 | if (m_start < r_end) { |
639 | if (out_start) | 716 | if (out_start) |
640 | *out_start = max(m_start, r_start); | 717 | *out_start = max(m_start, r_start); |
641 | if (out_end) | 718 | if (out_end) |
642 | *out_end = min(m_end, r_end); | 719 | *out_end = min(m_end, r_end); |
643 | if (out_nid) | 720 | if (out_nid) |
644 | *out_nid = memblock_get_region_node(m); | 721 | *out_nid = memblock_get_region_node(m); |
645 | /* | 722 | /* |
646 | * The region which ends first is advanced | 723 | * The region which ends first is advanced |
647 | * for the next iteration. | 724 | * for the next iteration. |
648 | */ | 725 | */ |
649 | if (m_end <= r_end) | 726 | if (m_end <= r_end) |
650 | mi++; | 727 | mi++; |
651 | else | 728 | else |
652 | ri++; | 729 | ri++; |
653 | *idx = (u32)mi | (u64)ri << 32; | 730 | *idx = (u32)mi | (u64)ri << 32; |
654 | return; | 731 | return; |
655 | } | 732 | } |
656 | } | 733 | } |
657 | } | 734 | } |
658 | 735 | ||
659 | /* signal end of iteration */ | 736 | /* signal end of iteration */ |
660 | *idx = ULLONG_MAX; | 737 | *idx = ULLONG_MAX; |
661 | } | 738 | } |
662 | 739 | ||
663 | /** | 740 | /** |
664 | * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse() | 741 | * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse() |
665 | * @idx: pointer to u64 loop variable | 742 | * @idx: pointer to u64 loop variable |
666 | * @nid: nid: node selector, %MAX_NUMNODES for all nodes | 743 | * @nid: nid: node selector, %MAX_NUMNODES for all nodes |
667 | * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL | 744 | * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL |
668 | * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL | 745 | * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL |
669 | * @out_nid: ptr to int for nid of the range, can be %NULL | 746 | * @out_nid: ptr to int for nid of the range, can be %NULL |
670 | * | 747 | * |
671 | * Reverse of __next_free_mem_range(). | 748 | * Reverse of __next_free_mem_range(). |
672 | */ | 749 | */ |
673 | void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid, | 750 | void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid, |
674 | phys_addr_t *out_start, | 751 | phys_addr_t *out_start, |
675 | phys_addr_t *out_end, int *out_nid) | 752 | phys_addr_t *out_end, int *out_nid) |
676 | { | 753 | { |
677 | struct memblock_type *mem = &memblock.memory; | 754 | struct memblock_type *mem = &memblock.memory; |
678 | struct memblock_type *rsv = &memblock.reserved; | 755 | struct memblock_type *rsv = &memblock.reserved; |
679 | int mi = *idx & 0xffffffff; | 756 | int mi = *idx & 0xffffffff; |
680 | int ri = *idx >> 32; | 757 | int ri = *idx >> 32; |
681 | 758 | ||
682 | if (*idx == (u64)ULLONG_MAX) { | 759 | if (*idx == (u64)ULLONG_MAX) { |
683 | mi = mem->cnt - 1; | 760 | mi = mem->cnt - 1; |
684 | ri = rsv->cnt; | 761 | ri = rsv->cnt; |
685 | } | 762 | } |
686 | 763 | ||
687 | for ( ; mi >= 0; mi--) { | 764 | for ( ; mi >= 0; mi--) { |
688 | struct memblock_region *m = &mem->regions[mi]; | 765 | struct memblock_region *m = &mem->regions[mi]; |
689 | phys_addr_t m_start = m->base; | 766 | phys_addr_t m_start = m->base; |
690 | phys_addr_t m_end = m->base + m->size; | 767 | phys_addr_t m_end = m->base + m->size; |
691 | 768 | ||
692 | /* only memory regions are associated with nodes, check it */ | 769 | /* only memory regions are associated with nodes, check it */ |
693 | if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m)) | 770 | if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m)) |
694 | continue; | 771 | continue; |
695 | 772 | ||
696 | /* scan areas before each reservation for intersection */ | 773 | /* scan areas before each reservation for intersection */ |
697 | for ( ; ri >= 0; ri--) { | 774 | for ( ; ri >= 0; ri--) { |
698 | struct memblock_region *r = &rsv->regions[ri]; | 775 | struct memblock_region *r = &rsv->regions[ri]; |
699 | phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0; | 776 | phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0; |
700 | phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX; | 777 | phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX; |
701 | 778 | ||
702 | /* if ri advanced past mi, break out to advance mi */ | 779 | /* if ri advanced past mi, break out to advance mi */ |
703 | if (r_end <= m_start) | 780 | if (r_end <= m_start) |
704 | break; | 781 | break; |
705 | /* if the two regions intersect, we're done */ | 782 | /* if the two regions intersect, we're done */ |
706 | if (m_end > r_start) { | 783 | if (m_end > r_start) { |
707 | if (out_start) | 784 | if (out_start) |
708 | *out_start = max(m_start, r_start); | 785 | *out_start = max(m_start, r_start); |
709 | if (out_end) | 786 | if (out_end) |
710 | *out_end = min(m_end, r_end); | 787 | *out_end = min(m_end, r_end); |
711 | if (out_nid) | 788 | if (out_nid) |
712 | *out_nid = memblock_get_region_node(m); | 789 | *out_nid = memblock_get_region_node(m); |
713 | 790 | ||
714 | if (m_start >= r_start) | 791 | if (m_start >= r_start) |
715 | mi--; | 792 | mi--; |
716 | else | 793 | else |
717 | ri--; | 794 | ri--; |
718 | *idx = (u32)mi | (u64)ri << 32; | 795 | *idx = (u32)mi | (u64)ri << 32; |
719 | return; | 796 | return; |
720 | } | 797 | } |
721 | } | 798 | } |
722 | } | 799 | } |
723 | 800 | ||
724 | *idx = ULLONG_MAX; | 801 | *idx = ULLONG_MAX; |
725 | } | 802 | } |
726 | 803 | ||
727 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 804 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
728 | /* | 805 | /* |
729 | * Common iterator interface used to define for_each_mem_range(). | 806 | * Common iterator interface used to define for_each_mem_range(). |
730 | */ | 807 | */ |
731 | void __init_memblock __next_mem_pfn_range(int *idx, int nid, | 808 | void __init_memblock __next_mem_pfn_range(int *idx, int nid, |
732 | unsigned long *out_start_pfn, | 809 | unsigned long *out_start_pfn, |
733 | unsigned long *out_end_pfn, int *out_nid) | 810 | unsigned long *out_end_pfn, int *out_nid) |
734 | { | 811 | { |
735 | struct memblock_type *type = &memblock.memory; | 812 | struct memblock_type *type = &memblock.memory; |
736 | struct memblock_region *r; | 813 | struct memblock_region *r; |
737 | 814 | ||
738 | while (++*idx < type->cnt) { | 815 | while (++*idx < type->cnt) { |
739 | r = &type->regions[*idx]; | 816 | r = &type->regions[*idx]; |
740 | 817 | ||
741 | if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) | 818 | if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) |
742 | continue; | 819 | continue; |
743 | if (nid == MAX_NUMNODES || nid == r->nid) | 820 | if (nid == MAX_NUMNODES || nid == r->nid) |
744 | break; | 821 | break; |
745 | } | 822 | } |
746 | if (*idx >= type->cnt) { | 823 | if (*idx >= type->cnt) { |
747 | *idx = -1; | 824 | *idx = -1; |
748 | return; | 825 | return; |
749 | } | 826 | } |
750 | 827 | ||
751 | if (out_start_pfn) | 828 | if (out_start_pfn) |
752 | *out_start_pfn = PFN_UP(r->base); | 829 | *out_start_pfn = PFN_UP(r->base); |
753 | if (out_end_pfn) | 830 | if (out_end_pfn) |
754 | *out_end_pfn = PFN_DOWN(r->base + r->size); | 831 | *out_end_pfn = PFN_DOWN(r->base + r->size); |
755 | if (out_nid) | 832 | if (out_nid) |
756 | *out_nid = r->nid; | 833 | *out_nid = r->nid; |
757 | } | 834 | } |
758 | 835 | ||
759 | /** | 836 | /** |
760 | * memblock_set_node - set node ID on memblock regions | 837 | * memblock_set_node - set node ID on memblock regions |
761 | * @base: base of area to set node ID for | 838 | * @base: base of area to set node ID for |
762 | * @size: size of area to set node ID for | 839 | * @size: size of area to set node ID for |
763 | * @nid: node ID to set | 840 | * @nid: node ID to set |
764 | * | 841 | * |
765 | * Set the nid of memblock memory regions in [@base,@base+@size) to @nid. | 842 | * Set the nid of memblock memory regions in [@base,@base+@size) to @nid. |
766 | * Regions which cross the area boundaries are split as necessary. | 843 | * Regions which cross the area boundaries are split as necessary. |
767 | * | 844 | * |
768 | * RETURNS: | 845 | * RETURNS: |
769 | * 0 on success, -errno on failure. | 846 | * 0 on success, -errno on failure. |
770 | */ | 847 | */ |
771 | int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, | 848 | int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, |
772 | int nid) | 849 | int nid) |
773 | { | 850 | { |
774 | struct memblock_type *type = &memblock.memory; | 851 | struct memblock_type *type = &memblock.memory; |
775 | int start_rgn, end_rgn; | 852 | int start_rgn, end_rgn; |
776 | int i, ret; | 853 | int i, ret; |
777 | 854 | ||
778 | ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); | 855 | ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); |
779 | if (ret) | 856 | if (ret) |
780 | return ret; | 857 | return ret; |
781 | 858 | ||
782 | for (i = start_rgn; i < end_rgn; i++) | 859 | for (i = start_rgn; i < end_rgn; i++) |
783 | memblock_set_region_node(&type->regions[i], nid); | 860 | memblock_set_region_node(&type->regions[i], nid); |
784 | 861 | ||
785 | memblock_merge_regions(type); | 862 | memblock_merge_regions(type); |
786 | return 0; | 863 | return 0; |
787 | } | 864 | } |
788 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 865 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
789 | 866 | ||
790 | static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size, | 867 | static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size, |
791 | phys_addr_t align, phys_addr_t max_addr, | 868 | phys_addr_t align, phys_addr_t max_addr, |
792 | int nid) | 869 | int nid) |
793 | { | 870 | { |
794 | phys_addr_t found; | 871 | phys_addr_t found; |
795 | 872 | ||
796 | if (WARN_ON(!align)) | 873 | if (WARN_ON(!align)) |
797 | align = __alignof__(long long); | 874 | align = __alignof__(long long); |
798 | 875 | ||
799 | /* align @size to avoid excessive fragmentation on reserved array */ | 876 | /* align @size to avoid excessive fragmentation on reserved array */ |
800 | size = round_up(size, align); | 877 | size = round_up(size, align); |
801 | 878 | ||
802 | found = memblock_find_in_range_node(0, max_addr, size, align, nid); | 879 | found = memblock_find_in_range_node(0, max_addr, size, align, nid); |
803 | if (found && !memblock_reserve(found, size)) | 880 | if (found && !memblock_reserve(found, size)) |
804 | return found; | 881 | return found; |
805 | 882 | ||
806 | return 0; | 883 | return 0; |
807 | } | 884 | } |
808 | 885 | ||
809 | phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) | 886 | phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) |
810 | { | 887 | { |
811 | return memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, nid); | 888 | return memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, nid); |
812 | } | 889 | } |
813 | 890 | ||
814 | phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) | 891 | phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) |
815 | { | 892 | { |
816 | return memblock_alloc_base_nid(size, align, max_addr, MAX_NUMNODES); | 893 | return memblock_alloc_base_nid(size, align, max_addr, MAX_NUMNODES); |
817 | } | 894 | } |
818 | 895 | ||
819 | phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) | 896 | phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) |
820 | { | 897 | { |
821 | phys_addr_t alloc; | 898 | phys_addr_t alloc; |
822 | 899 | ||
823 | alloc = __memblock_alloc_base(size, align, max_addr); | 900 | alloc = __memblock_alloc_base(size, align, max_addr); |
824 | 901 | ||
825 | if (alloc == 0) | 902 | if (alloc == 0) |
826 | panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n", | 903 | panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n", |
827 | (unsigned long long) size, (unsigned long long) max_addr); | 904 | (unsigned long long) size, (unsigned long long) max_addr); |
828 | 905 | ||
829 | return alloc; | 906 | return alloc; |
830 | } | 907 | } |
831 | 908 | ||
832 | phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) | 909 | phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) |
833 | { | 910 | { |
834 | return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); | 911 | return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); |
835 | } | 912 | } |
836 | 913 | ||
837 | phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) | 914 | phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) |
838 | { | 915 | { |
839 | phys_addr_t res = memblock_alloc_nid(size, align, nid); | 916 | phys_addr_t res = memblock_alloc_nid(size, align, nid); |
840 | 917 | ||
841 | if (res) | 918 | if (res) |
842 | return res; | 919 | return res; |
843 | return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); | 920 | return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); |
844 | } | 921 | } |
845 | 922 | ||
846 | 923 | ||
847 | /* | 924 | /* |
848 | * Remaining API functions | 925 | * Remaining API functions |
849 | */ | 926 | */ |
850 | 927 | ||
851 | phys_addr_t __init memblock_phys_mem_size(void) | 928 | phys_addr_t __init memblock_phys_mem_size(void) |
852 | { | 929 | { |
853 | return memblock.memory.total_size; | 930 | return memblock.memory.total_size; |
854 | } | 931 | } |
855 | 932 | ||
856 | phys_addr_t __init memblock_mem_size(unsigned long limit_pfn) | 933 | phys_addr_t __init memblock_mem_size(unsigned long limit_pfn) |
857 | { | 934 | { |
858 | unsigned long pages = 0; | 935 | unsigned long pages = 0; |
859 | struct memblock_region *r; | 936 | struct memblock_region *r; |
860 | unsigned long start_pfn, end_pfn; | 937 | unsigned long start_pfn, end_pfn; |
861 | 938 | ||
862 | for_each_memblock(memory, r) { | 939 | for_each_memblock(memory, r) { |
863 | start_pfn = memblock_region_memory_base_pfn(r); | 940 | start_pfn = memblock_region_memory_base_pfn(r); |
864 | end_pfn = memblock_region_memory_end_pfn(r); | 941 | end_pfn = memblock_region_memory_end_pfn(r); |
865 | start_pfn = min_t(unsigned long, start_pfn, limit_pfn); | 942 | start_pfn = min_t(unsigned long, start_pfn, limit_pfn); |
866 | end_pfn = min_t(unsigned long, end_pfn, limit_pfn); | 943 | end_pfn = min_t(unsigned long, end_pfn, limit_pfn); |
867 | pages += end_pfn - start_pfn; | 944 | pages += end_pfn - start_pfn; |
868 | } | 945 | } |
869 | 946 | ||
870 | return (phys_addr_t)pages << PAGE_SHIFT; | 947 | return (phys_addr_t)pages << PAGE_SHIFT; |
871 | } | 948 | } |
872 | 949 | ||
873 | /* lowest address */ | 950 | /* lowest address */ |
874 | phys_addr_t __init_memblock memblock_start_of_DRAM(void) | 951 | phys_addr_t __init_memblock memblock_start_of_DRAM(void) |
875 | { | 952 | { |
876 | return memblock.memory.regions[0].base; | 953 | return memblock.memory.regions[0].base; |
877 | } | 954 | } |
878 | 955 | ||
879 | phys_addr_t __init_memblock memblock_end_of_DRAM(void) | 956 | phys_addr_t __init_memblock memblock_end_of_DRAM(void) |
880 | { | 957 | { |
881 | int idx = memblock.memory.cnt - 1; | 958 | int idx = memblock.memory.cnt - 1; |
882 | 959 | ||
883 | return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); | 960 | return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); |
884 | } | 961 | } |
885 | 962 | ||
886 | void __init memblock_enforce_memory_limit(phys_addr_t limit) | 963 | void __init memblock_enforce_memory_limit(phys_addr_t limit) |
887 | { | 964 | { |
888 | unsigned long i; | 965 | unsigned long i; |
889 | phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX; | 966 | phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX; |
890 | 967 | ||
891 | if (!limit) | 968 | if (!limit) |
892 | return; | 969 | return; |
893 | 970 | ||
894 | /* find out max address */ | 971 | /* find out max address */ |
895 | for (i = 0; i < memblock.memory.cnt; i++) { | 972 | for (i = 0; i < memblock.memory.cnt; i++) { |
896 | struct memblock_region *r = &memblock.memory.regions[i]; | 973 | struct memblock_region *r = &memblock.memory.regions[i]; |
897 | 974 | ||
898 | if (limit <= r->size) { | 975 | if (limit <= r->size) { |
899 | max_addr = r->base + limit; | 976 | max_addr = r->base + limit; |
900 | break; | 977 | break; |
901 | } | 978 | } |
902 | limit -= r->size; | 979 | limit -= r->size; |
903 | } | 980 | } |
904 | 981 | ||
905 | /* truncate both memory and reserved regions */ | 982 | /* truncate both memory and reserved regions */ |
906 | __memblock_remove(&memblock.memory, max_addr, (phys_addr_t)ULLONG_MAX); | 983 | __memblock_remove(&memblock.memory, max_addr, (phys_addr_t)ULLONG_MAX); |
907 | __memblock_remove(&memblock.reserved, max_addr, (phys_addr_t)ULLONG_MAX); | 984 | __memblock_remove(&memblock.reserved, max_addr, (phys_addr_t)ULLONG_MAX); |
908 | } | 985 | } |
909 | 986 | ||
910 | static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) | 987 | static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) |
911 | { | 988 | { |
912 | unsigned int left = 0, right = type->cnt; | 989 | unsigned int left = 0, right = type->cnt; |
913 | 990 | ||
914 | do { | 991 | do { |
915 | unsigned int mid = (right + left) / 2; | 992 | unsigned int mid = (right + left) / 2; |
916 | 993 | ||
917 | if (addr < type->regions[mid].base) | 994 | if (addr < type->regions[mid].base) |
918 | right = mid; | 995 | right = mid; |
919 | else if (addr >= (type->regions[mid].base + | 996 | else if (addr >= (type->regions[mid].base + |
920 | type->regions[mid].size)) | 997 | type->regions[mid].size)) |
921 | left = mid + 1; | 998 | left = mid + 1; |
922 | else | 999 | else |
923 | return mid; | 1000 | return mid; |
924 | } while (left < right); | 1001 | } while (left < right); |
925 | return -1; | 1002 | return -1; |
926 | } | 1003 | } |
927 | 1004 | ||
928 | int __init memblock_is_reserved(phys_addr_t addr) | 1005 | int __init memblock_is_reserved(phys_addr_t addr) |
929 | { | 1006 | { |
930 | return memblock_search(&memblock.reserved, addr) != -1; | 1007 | return memblock_search(&memblock.reserved, addr) != -1; |
931 | } | 1008 | } |
932 | 1009 | ||
933 | int __init_memblock memblock_is_memory(phys_addr_t addr) | 1010 | int __init_memblock memblock_is_memory(phys_addr_t addr) |
934 | { | 1011 | { |
935 | return memblock_search(&memblock.memory, addr) != -1; | 1012 | return memblock_search(&memblock.memory, addr) != -1; |
936 | } | 1013 | } |
937 | 1014 | ||
938 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 1015 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
939 | int __init_memblock memblock_search_pfn_nid(unsigned long pfn, | 1016 | int __init_memblock memblock_search_pfn_nid(unsigned long pfn, |
940 | unsigned long *start_pfn, unsigned long *end_pfn) | 1017 | unsigned long *start_pfn, unsigned long *end_pfn) |
941 | { | 1018 | { |
942 | struct memblock_type *type = &memblock.memory; | 1019 | struct memblock_type *type = &memblock.memory; |
943 | int mid = memblock_search(type, (phys_addr_t)pfn << PAGE_SHIFT); | 1020 | int mid = memblock_search(type, (phys_addr_t)pfn << PAGE_SHIFT); |
944 | 1021 | ||
945 | if (mid == -1) | 1022 | if (mid == -1) |
946 | return -1; | 1023 | return -1; |
947 | 1024 | ||
948 | *start_pfn = type->regions[mid].base >> PAGE_SHIFT; | 1025 | *start_pfn = type->regions[mid].base >> PAGE_SHIFT; |
949 | *end_pfn = (type->regions[mid].base + type->regions[mid].size) | 1026 | *end_pfn = (type->regions[mid].base + type->regions[mid].size) |
950 | >> PAGE_SHIFT; | 1027 | >> PAGE_SHIFT; |
951 | 1028 | ||
952 | return type->regions[mid].nid; | 1029 | return type->regions[mid].nid; |
953 | } | 1030 | } |
954 | #endif | 1031 | #endif |
955 | 1032 | ||
956 | /** | 1033 | /** |
957 | * memblock_is_region_memory - check if a region is a subset of memory | 1034 | * memblock_is_region_memory - check if a region is a subset of memory |
958 | * @base: base of region to check | 1035 | * @base: base of region to check |
959 | * @size: size of region to check | 1036 | * @size: size of region to check |
960 | * | 1037 | * |
961 | * Check if the region [@base, @base+@size) is a subset of a memory block. | 1038 | * Check if the region [@base, @base+@size) is a subset of a memory block. |
962 | * | 1039 | * |
963 | * RETURNS: | 1040 | * RETURNS: |
964 | * 0 if false, non-zero if true | 1041 | * 0 if false, non-zero if true |
965 | */ | 1042 | */ |
966 | int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) | 1043 | int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) |
967 | { | 1044 | { |
968 | int idx = memblock_search(&memblock.memory, base); | 1045 | int idx = memblock_search(&memblock.memory, base); |
969 | phys_addr_t end = base + memblock_cap_size(base, &size); | 1046 | phys_addr_t end = base + memblock_cap_size(base, &size); |
970 | 1047 | ||
971 | if (idx == -1) | 1048 | if (idx == -1) |
972 | return 0; | 1049 | return 0; |
973 | return memblock.memory.regions[idx].base <= base && | 1050 | return memblock.memory.regions[idx].base <= base && |
974 | (memblock.memory.regions[idx].base + | 1051 | (memblock.memory.regions[idx].base + |
975 | memblock.memory.regions[idx].size) >= end; | 1052 | memblock.memory.regions[idx].size) >= end; |
976 | } | 1053 | } |
977 | 1054 | ||
978 | /** | 1055 | /** |
979 | * memblock_is_region_reserved - check if a region intersects reserved memory | 1056 | * memblock_is_region_reserved - check if a region intersects reserved memory |
980 | * @base: base of region to check | 1057 | * @base: base of region to check |
981 | * @size: size of region to check | 1058 | * @size: size of region to check |
982 | * | 1059 | * |
983 | * Check if the region [@base, @base+@size) intersects a reserved memory block. | 1060 | * Check if the region [@base, @base+@size) intersects a reserved memory block. |
984 | * | 1061 | * |
985 | * RETURNS: | 1062 | * RETURNS: |
986 | * 0 if false, non-zero if true | 1063 | * 0 if false, non-zero if true |
987 | */ | 1064 | */ |
988 | int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) | 1065 | int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) |
989 | { | 1066 | { |
990 | memblock_cap_size(base, &size); | 1067 | memblock_cap_size(base, &size); |
991 | return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; | 1068 | return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; |
992 | } | 1069 | } |
993 | 1070 | ||
994 | void __init_memblock memblock_trim_memory(phys_addr_t align) | 1071 | void __init_memblock memblock_trim_memory(phys_addr_t align) |
995 | { | 1072 | { |
996 | int i; | 1073 | int i; |
997 | phys_addr_t start, end, orig_start, orig_end; | 1074 | phys_addr_t start, end, orig_start, orig_end; |
998 | struct memblock_type *mem = &memblock.memory; | 1075 | struct memblock_type *mem = &memblock.memory; |
999 | 1076 | ||
1000 | for (i = 0; i < mem->cnt; i++) { | 1077 | for (i = 0; i < mem->cnt; i++) { |
1001 | orig_start = mem->regions[i].base; | 1078 | orig_start = mem->regions[i].base; |
1002 | orig_end = mem->regions[i].base + mem->regions[i].size; | 1079 | orig_end = mem->regions[i].base + mem->regions[i].size; |
1003 | start = round_up(orig_start, align); | 1080 | start = round_up(orig_start, align); |
1004 | end = round_down(orig_end, align); | 1081 | end = round_down(orig_end, align); |
1005 | 1082 | ||
1006 | if (start == orig_start && end == orig_end) | 1083 | if (start == orig_start && end == orig_end) |
1007 | continue; | 1084 | continue; |
1008 | 1085 | ||
1009 | if (start < end) { | 1086 | if (start < end) { |
1010 | mem->regions[i].base = start; | 1087 | mem->regions[i].base = start; |
1011 | mem->regions[i].size = end - start; | 1088 | mem->regions[i].size = end - start; |
1012 | } else { | 1089 | } else { |
1013 | memblock_remove_region(mem, i); | 1090 | memblock_remove_region(mem, i); |
1014 | i--; | 1091 | i--; |
1015 | } | 1092 | } |
1016 | } | 1093 | } |
1017 | } | 1094 | } |
1018 | 1095 | ||
1019 | void __init_memblock memblock_set_current_limit(phys_addr_t limit) | 1096 | void __init_memblock memblock_set_current_limit(phys_addr_t limit) |
1020 | { | 1097 | { |
1021 | memblock.current_limit = limit; | 1098 | memblock.current_limit = limit; |
1022 | } | 1099 | } |
1023 | 1100 | ||
1024 | static void __init_memblock memblock_dump(struct memblock_type *type, char *name) | 1101 | static void __init_memblock memblock_dump(struct memblock_type *type, char *name) |
1025 | { | 1102 | { |
1026 | unsigned long long base, size; | 1103 | unsigned long long base, size; |
1027 | int i; | 1104 | int i; |
1028 | 1105 | ||
1029 | pr_info(" %s.cnt = 0x%lx\n", name, type->cnt); | 1106 | pr_info(" %s.cnt = 0x%lx\n", name, type->cnt); |
1030 | 1107 | ||
1031 | for (i = 0; i < type->cnt; i++) { | 1108 | for (i = 0; i < type->cnt; i++) { |
1032 | struct memblock_region *rgn = &type->regions[i]; | 1109 | struct memblock_region *rgn = &type->regions[i]; |
1033 | char nid_buf[32] = ""; | 1110 | char nid_buf[32] = ""; |
1034 | 1111 | ||
1035 | base = rgn->base; | 1112 | base = rgn->base; |
1036 | size = rgn->size; | 1113 | size = rgn->size; |
1037 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 1114 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
1038 | if (memblock_get_region_node(rgn) != MAX_NUMNODES) | 1115 | if (memblock_get_region_node(rgn) != MAX_NUMNODES) |
1039 | snprintf(nid_buf, sizeof(nid_buf), " on node %d", | 1116 | snprintf(nid_buf, sizeof(nid_buf), " on node %d", |
1040 | memblock_get_region_node(rgn)); | 1117 | memblock_get_region_node(rgn)); |
1041 | #endif | 1118 | #endif |
1042 | pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s\n", | 1119 | pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s\n", |
1043 | name, i, base, base + size - 1, size, nid_buf); | 1120 | name, i, base, base + size - 1, size, nid_buf); |
1044 | } | 1121 | } |
1045 | } | 1122 | } |
1046 | 1123 | ||
1047 | void __init_memblock __memblock_dump_all(void) | 1124 | void __init_memblock __memblock_dump_all(void) |
1048 | { | 1125 | { |
1049 | pr_info("MEMBLOCK configuration:\n"); | 1126 | pr_info("MEMBLOCK configuration:\n"); |
1050 | pr_info(" memory size = %#llx reserved size = %#llx\n", | 1127 | pr_info(" memory size = %#llx reserved size = %#llx\n", |
1051 | (unsigned long long)memblock.memory.total_size, | 1128 | (unsigned long long)memblock.memory.total_size, |
1052 | (unsigned long long)memblock.reserved.total_size); | 1129 | (unsigned long long)memblock.reserved.total_size); |
1053 | 1130 | ||
1054 | memblock_dump(&memblock.memory, "memory"); | 1131 | memblock_dump(&memblock.memory, "memory"); |
1055 | memblock_dump(&memblock.reserved, "reserved"); | 1132 | memblock_dump(&memblock.reserved, "reserved"); |
1056 | } | 1133 | } |
1057 | 1134 | ||
1058 | void __init memblock_allow_resize(void) | 1135 | void __init memblock_allow_resize(void) |
1059 | { | 1136 | { |
1060 | memblock_can_resize = 1; | 1137 | memblock_can_resize = 1; |
1061 | } | 1138 | } |
1062 | 1139 | ||
1063 | static int __init early_memblock(char *p) | 1140 | static int __init early_memblock(char *p) |
1064 | { | 1141 | { |
1065 | if (p && strstr(p, "debug")) | 1142 | if (p && strstr(p, "debug")) |
1066 | memblock_debug = 1; | 1143 | memblock_debug = 1; |
1067 | return 0; | 1144 | return 0; |
1068 | } | 1145 | } |
1069 | early_param("memblock", early_memblock); | 1146 | early_param("memblock", early_memblock); |
1070 | 1147 | ||
1071 | #if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_ARCH_DISCARD_MEMBLOCK) | 1148 | #if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_ARCH_DISCARD_MEMBLOCK) |
1072 | 1149 | ||
1073 | static int memblock_debug_show(struct seq_file *m, void *private) | 1150 | static int memblock_debug_show(struct seq_file *m, void *private) |
1074 | { | 1151 | { |
1075 | struct memblock_type *type = m->private; | 1152 | struct memblock_type *type = m->private; |
1076 | struct memblock_region *reg; | 1153 | struct memblock_region *reg; |
1077 | int i; | 1154 | int i; |
1078 | 1155 | ||
1079 | for (i = 0; i < type->cnt; i++) { | 1156 | for (i = 0; i < type->cnt; i++) { |
1080 | reg = &type->regions[i]; | 1157 | reg = &type->regions[i]; |
1081 | seq_printf(m, "%4d: ", i); | 1158 | seq_printf(m, "%4d: ", i); |
1082 | if (sizeof(phys_addr_t) == 4) | 1159 | if (sizeof(phys_addr_t) == 4) |
1083 | seq_printf(m, "0x%08lx..0x%08lx\n", | 1160 | seq_printf(m, "0x%08lx..0x%08lx\n", |
1084 | (unsigned long)reg->base, | 1161 | (unsigned long)reg->base, |
1085 | (unsigned long)(reg->base + reg->size - 1)); | 1162 | (unsigned long)(reg->base + reg->size - 1)); |
1086 | else | 1163 | else |
1087 | seq_printf(m, "0x%016llx..0x%016llx\n", | 1164 | seq_printf(m, "0x%016llx..0x%016llx\n", |
1088 | (unsigned long long)reg->base, | 1165 | (unsigned long long)reg->base, |
1089 | (unsigned long long)(reg->base + reg->size - 1)); | 1166 | (unsigned long long)(reg->base + reg->size - 1)); |
1090 | 1167 | ||
1091 | } | 1168 | } |
1092 | return 0; | 1169 | return 0; |
1093 | } | 1170 | } |
1094 | 1171 | ||
1095 | static int memblock_debug_open(struct inode *inode, struct file *file) | 1172 | static int memblock_debug_open(struct inode *inode, struct file *file) |
1096 | { | 1173 | { |
1097 | return single_open(file, memblock_debug_show, inode->i_private); | 1174 | return single_open(file, memblock_debug_show, inode->i_private); |
1098 | } | 1175 | } |
1099 | 1176 | ||
1100 | static const struct file_operations memblock_debug_fops = { | 1177 | static const struct file_operations memblock_debug_fops = { |
1101 | .open = memblock_debug_open, | 1178 | .open = memblock_debug_open, |
1102 | .read = seq_read, | 1179 | .read = seq_read, |
1103 | .llseek = seq_lseek, | 1180 | .llseek = seq_lseek, |
1104 | .release = single_release, | 1181 | .release = single_release, |
1105 | }; | 1182 | }; |
1106 | 1183 | ||
1107 | static int __init memblock_init_debugfs(void) | 1184 | static int __init memblock_init_debugfs(void) |
1108 | { | 1185 | { |
1109 | struct dentry *root = debugfs_create_dir("memblock", NULL); | 1186 | struct dentry *root = debugfs_create_dir("memblock", NULL); |
1110 | if (!root) | 1187 | if (!root) |
1111 | return -ENXIO; | 1188 | return -ENXIO; |
1112 | debugfs_create_file("memory", S_IRUGO, root, &memblock.memory, &memblock_debug_fops); | 1189 | debugfs_create_file("memory", S_IRUGO, root, &memblock.memory, &memblock_debug_fops); |
1113 | debugfs_create_file("reserved", S_IRUGO, root, &memblock.reserved, &memblock_debug_fops); | 1190 | debugfs_create_file("reserved", S_IRUGO, root, &memblock.reserved, &memblock_debug_fops); |
1114 | 1191 | ||
1115 | return 0; | 1192 | return 0; |
1116 | } | 1193 | } |
1117 | __initcall(memblock_init_debugfs); | 1194 | __initcall(memblock_init_debugfs); |
1118 | 1195 | ||
1119 | #endif /* CONFIG_DEBUG_FS */ | 1196 | #endif /* CONFIG_DEBUG_FS */ |
1120 | 1197 |