Commit e303297e6c3a7b847c4731eb14006ca6b435ecca
Committed by
Linus Torvalds
1 parent
2672391169
Exists in
master
and in
4 other branches
mm: extended batches for generic mmu_gather
Instead of using a single batch (the small on-stack, or an allocated page), try and extend the batch every time it runs out and only flush once either the extend fails or we're done. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Requested-by: Nick Piggin <npiggin@kernel.dk> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Hugh Dickins <hughd@google.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Miller <davem@davemloft.net> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Jeff Dike <jdike@addtoit.com> Cc: Richard Weinberger <richard@nod.at> Cc: Tony Luck <tony.luck@intel.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Namhyung Kim <namhyung@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 2 changed files with 84 additions and 47 deletions Side-by-side Diff
include/asm-generic/tlb.h
... | ... | @@ -19,16 +19,6 @@ |
19 | 19 | #include <asm/pgalloc.h> |
20 | 20 | #include <asm/tlbflush.h> |
21 | 21 | |
22 | -/* | |
23 | - * For UP we don't need to worry about TLB flush | |
24 | - * and page free order so much.. | |
25 | - */ | |
26 | -#ifdef CONFIG_SMP | |
27 | - #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U) | |
28 | -#else | |
29 | - #define tlb_fast_mode(tlb) 1 | |
30 | -#endif | |
31 | - | |
32 | 22 | #ifdef CONFIG_HAVE_RCU_TABLE_FREE |
33 | 23 | /* |
34 | 24 | * Semi RCU freeing of the page directories. |
... | ... | @@ -78,6 +68,16 @@ |
78 | 68 | */ |
79 | 69 | #define MMU_GATHER_BUNDLE 8 |
80 | 70 | |
71 | +struct mmu_gather_batch { | |
72 | + struct mmu_gather_batch *next; | |
73 | + unsigned int nr; | |
74 | + unsigned int max; | |
75 | + struct page *pages[0]; | |
76 | +}; | |
77 | + | |
78 | +#define MAX_GATHER_BATCH \ | |
79 | + ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *)) | |
80 | + | |
81 | 81 | /* struct mmu_gather is an opaque type used by the mm code for passing around |
82 | 82 | * any data needed by arch specific code for tlb_remove_page. |
83 | 83 | */ |
84 | 84 | |
85 | 85 | |
86 | 86 | |
87 | 87 | |
... | ... | @@ -86,22 +86,48 @@ |
86 | 86 | #ifdef CONFIG_HAVE_RCU_TABLE_FREE |
87 | 87 | struct mmu_table_batch *batch; |
88 | 88 | #endif |
89 | - unsigned int nr; /* set to ~0U means fast mode */ | |
90 | - unsigned int max; /* nr < max */ | |
91 | - unsigned int need_flush;/* Really unmapped some ptes? */ | |
92 | - unsigned int fullmm; /* non-zero means full mm flush */ | |
93 | - struct page **pages; | |
94 | - struct page *local[MMU_GATHER_BUNDLE]; | |
89 | + unsigned int need_flush : 1, /* Did free PTEs */ | |
90 | + fast_mode : 1; /* No batching */ | |
91 | + | |
92 | + unsigned int fullmm; | |
93 | + | |
94 | + struct mmu_gather_batch *active; | |
95 | + struct mmu_gather_batch local; | |
96 | + struct page *__pages[MMU_GATHER_BUNDLE]; | |
95 | 97 | }; |
96 | 98 | |
97 | -static inline void __tlb_alloc_page(struct mmu_gather *tlb) | |
99 | +/* | |
100 | + * For UP we don't need to worry about TLB flush | |
101 | + * and page free order so much.. | |
102 | + */ | |
103 | +#ifdef CONFIG_SMP | |
104 | + #define tlb_fast_mode(tlb) (tlb->fast_mode) | |
105 | +#else | |
106 | + #define tlb_fast_mode(tlb) 1 | |
107 | +#endif | |
108 | + | |
109 | +static inline int tlb_next_batch(struct mmu_gather *tlb) | |
98 | 110 | { |
99 | - unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); | |
111 | + struct mmu_gather_batch *batch; | |
100 | 112 | |
101 | - if (addr) { | |
102 | - tlb->pages = (void *)addr; | |
103 | - tlb->max = PAGE_SIZE / sizeof(struct page *); | |
113 | + batch = tlb->active; | |
114 | + if (batch->next) { | |
115 | + tlb->active = batch->next; | |
116 | + return 1; | |
104 | 117 | } |
118 | + | |
119 | + batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); | |
120 | + if (!batch) | |
121 | + return 0; | |
122 | + | |
123 | + batch->next = NULL; | |
124 | + batch->nr = 0; | |
125 | + batch->max = MAX_GATHER_BATCH; | |
126 | + | |
127 | + tlb->active->next = batch; | |
128 | + tlb->active = batch; | |
129 | + | |
130 | + return 1; | |
105 | 131 | } |
106 | 132 | |
107 | 133 | /* tlb_gather_mmu |
108 | 134 | |
... | ... | @@ -114,17 +140,14 @@ |
114 | 140 | { |
115 | 141 | tlb->mm = mm; |
116 | 142 | |
117 | - tlb->max = ARRAY_SIZE(tlb->local); | |
118 | - tlb->pages = tlb->local; | |
143 | + tlb->fullmm = fullmm; | |
144 | + tlb->need_flush = 0; | |
145 | + tlb->fast_mode = (num_possible_cpus() == 1); | |
146 | + tlb->local.next = NULL; | |
147 | + tlb->local.nr = 0; | |
148 | + tlb->local.max = ARRAY_SIZE(tlb->__pages); | |
149 | + tlb->active = &tlb->local; | |
119 | 150 | |
120 | - if (num_online_cpus() > 1) { | |
121 | - tlb->nr = 0; | |
122 | - __tlb_alloc_page(tlb); | |
123 | - } else /* Use fast mode if only one CPU is online */ | |
124 | - tlb->nr = ~0U; | |
125 | - | |
126 | - tlb->fullmm = fullmm; | |
127 | - | |
128 | 151 | #ifdef CONFIG_HAVE_RCU_TABLE_FREE |
129 | 152 | tlb->batch = NULL; |
130 | 153 | #endif |
... | ... | @@ -133,6 +156,8 @@ |
133 | 156 | static inline void |
134 | 157 | tlb_flush_mmu(struct mmu_gather *tlb) |
135 | 158 | { |
159 | + struct mmu_gather_batch *batch; | |
160 | + | |
136 | 161 | if (!tlb->need_flush) |
137 | 162 | return; |
138 | 163 | tlb->need_flush = 0; |
139 | 164 | |
... | ... | @@ -140,17 +165,15 @@ |
140 | 165 | #ifdef CONFIG_HAVE_RCU_TABLE_FREE |
141 | 166 | tlb_table_flush(tlb); |
142 | 167 | #endif |
143 | - if (!tlb_fast_mode(tlb)) { | |
144 | - free_pages_and_swap_cache(tlb->pages, tlb->nr); | |
145 | - tlb->nr = 0; | |
146 | - /* | |
147 | - * If we are using the local on-stack array of pages for MMU | |
148 | - * gather, try allocating an off-stack array again as we have | |
149 | - * recently freed pages. | |
150 | - */ | |
151 | - if (tlb->pages == tlb->local) | |
152 | - __tlb_alloc_page(tlb); | |
168 | + | |
169 | + if (tlb_fast_mode(tlb)) | |
170 | + return; | |
171 | + | |
172 | + for (batch = &tlb->local; batch; batch = batch->next) { | |
173 | + free_pages_and_swap_cache(batch->pages, batch->nr); | |
174 | + batch->nr = 0; | |
153 | 175 | } |
176 | + tlb->active = &tlb->local; | |
154 | 177 | } |
155 | 178 | |
156 | 179 | /* tlb_finish_mmu |
157 | 180 | |
... | ... | @@ -160,13 +183,18 @@ |
160 | 183 | static inline void |
161 | 184 | tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) |
162 | 185 | { |
186 | + struct mmu_gather_batch *batch, *next; | |
187 | + | |
163 | 188 | tlb_flush_mmu(tlb); |
164 | 189 | |
165 | 190 | /* keep the page table cache within bounds */ |
166 | 191 | check_pgt_cache(); |
167 | 192 | |
168 | - if (tlb->pages != tlb->local) | |
169 | - free_pages((unsigned long)tlb->pages, 0); | |
193 | + for (batch = tlb->local.next; batch; batch = next) { | |
194 | + next = batch->next; | |
195 | + free_pages((unsigned long)batch, 0); | |
196 | + } | |
197 | + tlb->local.next = NULL; | |
170 | 198 | } |
171 | 199 | |
172 | 200 | /* __tlb_remove_page |
173 | 201 | |
174 | 202 | |
175 | 203 | |
... | ... | @@ -177,15 +205,24 @@ |
177 | 205 | */ |
178 | 206 | static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) |
179 | 207 | { |
208 | + struct mmu_gather_batch *batch; | |
209 | + | |
180 | 210 | tlb->need_flush = 1; |
211 | + | |
181 | 212 | if (tlb_fast_mode(tlb)) { |
182 | 213 | free_page_and_swap_cache(page); |
183 | 214 | return 1; /* avoid calling tlb_flush_mmu() */ |
184 | 215 | } |
185 | - tlb->pages[tlb->nr++] = page; | |
186 | - VM_BUG_ON(tlb->nr > tlb->max); | |
187 | 216 | |
188 | - return tlb->max - tlb->nr; | |
217 | + batch = tlb->active; | |
218 | + batch->pages[batch->nr++] = page; | |
219 | + VM_BUG_ON(batch->nr > batch->max); | |
220 | + if (batch->nr == batch->max) { | |
221 | + if (!tlb_next_batch(tlb)) | |
222 | + return 0; | |
223 | + } | |
224 | + | |
225 | + return batch->max - batch->nr; | |
189 | 226 | } |
190 | 227 | |
191 | 228 | /* tlb_remove_page |
mm/memory.c