Commit e303297e6c3a7b847c4731eb14006ca6b435ecca

Authored by Peter Zijlstra
Committed by Linus Torvalds
1 parent 2672391169

mm: extended batches for generic mmu_gather

Instead of using a single batch (the small on-stack, or an allocated
page), try and extend the batch every time it runs out and only flush once
either the extend fails or we're done.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Requested-by: Nick Piggin <npiggin@kernel.dk>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 2 changed files with 84 additions and 47 deletions Side-by-side Diff

include/asm-generic/tlb.h
... ... @@ -19,16 +19,6 @@
19 19 #include <asm/pgalloc.h>
20 20 #include <asm/tlbflush.h>
21 21  
22   -/*
23   - * For UP we don't need to worry about TLB flush
24   - * and page free order so much..
25   - */
26   -#ifdef CONFIG_SMP
27   - #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
28   -#else
29   - #define tlb_fast_mode(tlb) 1
30   -#endif
31   -
32 22 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
33 23 /*
34 24 * Semi RCU freeing of the page directories.
... ... @@ -78,6 +68,16 @@
78 68 */
79 69 #define MMU_GATHER_BUNDLE 8
80 70  
  71 +struct mmu_gather_batch {
  72 + struct mmu_gather_batch *next;
  73 + unsigned int nr;
  74 + unsigned int max;
  75 + struct page *pages[0];
  76 +};
  77 +
  78 +#define MAX_GATHER_BATCH \
  79 + ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *))
  80 +
81 81 /* struct mmu_gather is an opaque type used by the mm code for passing around
82 82 * any data needed by arch specific code for tlb_remove_page.
83 83 */
84 84  
85 85  
86 86  
87 87  
... ... @@ -86,22 +86,48 @@
86 86 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
87 87 struct mmu_table_batch *batch;
88 88 #endif
89   - unsigned int nr; /* set to ~0U means fast mode */
90   - unsigned int max; /* nr < max */
91   - unsigned int need_flush;/* Really unmapped some ptes? */
92   - unsigned int fullmm; /* non-zero means full mm flush */
93   - struct page **pages;
94   - struct page *local[MMU_GATHER_BUNDLE];
  89 + unsigned int need_flush : 1, /* Did free PTEs */
  90 + fast_mode : 1; /* No batching */
  91 +
  92 + unsigned int fullmm;
  93 +
  94 + struct mmu_gather_batch *active;
  95 + struct mmu_gather_batch local;
  96 + struct page *__pages[MMU_GATHER_BUNDLE];
95 97 };
96 98  
97   -static inline void __tlb_alloc_page(struct mmu_gather *tlb)
  99 +/*
  100 + * For UP we don't need to worry about TLB flush
  101 + * and page free order so much..
  102 + */
  103 +#ifdef CONFIG_SMP
  104 + #define tlb_fast_mode(tlb) (tlb->fast_mode)
  105 +#else
  106 + #define tlb_fast_mode(tlb) 1
  107 +#endif
  108 +
  109 +static inline int tlb_next_batch(struct mmu_gather *tlb)
98 110 {
99   - unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
  111 + struct mmu_gather_batch *batch;
100 112  
101   - if (addr) {
102   - tlb->pages = (void *)addr;
103   - tlb->max = PAGE_SIZE / sizeof(struct page *);
  113 + batch = tlb->active;
  114 + if (batch->next) {
  115 + tlb->active = batch->next;
  116 + return 1;
104 117 }
  118 +
  119 + batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
  120 + if (!batch)
  121 + return 0;
  122 +
  123 + batch->next = NULL;
  124 + batch->nr = 0;
  125 + batch->max = MAX_GATHER_BATCH;
  126 +
  127 + tlb->active->next = batch;
  128 + tlb->active = batch;
  129 +
  130 + return 1;
105 131 }
106 132  
107 133 /* tlb_gather_mmu
108 134  
... ... @@ -114,17 +140,14 @@
114 140 {
115 141 tlb->mm = mm;
116 142  
117   - tlb->max = ARRAY_SIZE(tlb->local);
118   - tlb->pages = tlb->local;
  143 + tlb->fullmm = fullmm;
  144 + tlb->need_flush = 0;
  145 + tlb->fast_mode = (num_possible_cpus() == 1);
  146 + tlb->local.next = NULL;
  147 + tlb->local.nr = 0;
  148 + tlb->local.max = ARRAY_SIZE(tlb->__pages);
  149 + tlb->active = &tlb->local;
119 150  
120   - if (num_online_cpus() > 1) {
121   - tlb->nr = 0;
122   - __tlb_alloc_page(tlb);
123   - } else /* Use fast mode if only one CPU is online */
124   - tlb->nr = ~0U;
125   -
126   - tlb->fullmm = fullmm;
127   -
128 151 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
129 152 tlb->batch = NULL;
130 153 #endif
... ... @@ -133,6 +156,8 @@
133 156 static inline void
134 157 tlb_flush_mmu(struct mmu_gather *tlb)
135 158 {
  159 + struct mmu_gather_batch *batch;
  160 +
136 161 if (!tlb->need_flush)
137 162 return;
138 163 tlb->need_flush = 0;
139 164  
... ... @@ -140,17 +165,15 @@
140 165 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
141 166 tlb_table_flush(tlb);
142 167 #endif
143   - if (!tlb_fast_mode(tlb)) {
144   - free_pages_and_swap_cache(tlb->pages, tlb->nr);
145   - tlb->nr = 0;
146   - /*
147   - * If we are using the local on-stack array of pages for MMU
148   - * gather, try allocating an off-stack array again as we have
149   - * recently freed pages.
150   - */
151   - if (tlb->pages == tlb->local)
152   - __tlb_alloc_page(tlb);
  168 +
  169 + if (tlb_fast_mode(tlb))
  170 + return;
  171 +
  172 + for (batch = &tlb->local; batch; batch = batch->next) {
  173 + free_pages_and_swap_cache(batch->pages, batch->nr);
  174 + batch->nr = 0;
153 175 }
  176 + tlb->active = &tlb->local;
154 177 }
155 178  
156 179 /* tlb_finish_mmu
157 180  
... ... @@ -160,13 +183,18 @@
160 183 static inline void
161 184 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
162 185 {
  186 + struct mmu_gather_batch *batch, *next;
  187 +
163 188 tlb_flush_mmu(tlb);
164 189  
165 190 /* keep the page table cache within bounds */
166 191 check_pgt_cache();
167 192  
168   - if (tlb->pages != tlb->local)
169   - free_pages((unsigned long)tlb->pages, 0);
  193 + for (batch = tlb->local.next; batch; batch = next) {
  194 + next = batch->next;
  195 + free_pages((unsigned long)batch, 0);
  196 + }
  197 + tlb->local.next = NULL;
170 198 }
171 199  
172 200 /* __tlb_remove_page
173 201  
174 202  
175 203  
... ... @@ -177,15 +205,24 @@
177 205 */
178 206 static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
179 207 {
  208 + struct mmu_gather_batch *batch;
  209 +
180 210 tlb->need_flush = 1;
  211 +
181 212 if (tlb_fast_mode(tlb)) {
182 213 free_page_and_swap_cache(page);
183 214 return 1; /* avoid calling tlb_flush_mmu() */
184 215 }
185   - tlb->pages[tlb->nr++] = page;
186   - VM_BUG_ON(tlb->nr > tlb->max);
187 216  
188   - return tlb->max - tlb->nr;
  217 + batch = tlb->active;
  218 + batch->pages[batch->nr++] = page;
  219 + VM_BUG_ON(batch->nr > batch->max);
  220 + if (batch->nr == batch->max) {
  221 + if (!tlb_next_batch(tlb))
  222 + return 0;
  223 + }
  224 +
  225 + return batch->max - batch->nr;
189 226 }
190 227  
191 228 /* tlb_remove_page
... ... @@ -994,8 +994,8 @@
994 994 spinlock_t *ptl;
995 995 int rss[NR_MM_COUNTERS];
996 996  
997   - init_rss_vec(rss);
998 997 again:
  998 + init_rss_vec(rss);
999 999 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
1000 1000 arch_enter_lazy_mmu_mode();
1001 1001 do {