Commit ac6c9e2bed093c4b60e313674fb7aec4f264c3d4

Authored by Linus Torvalds

Merge branch 'safe-dirty-tlb-flush'

This merges the patch to fix possible loss of dirty bit on munmap() or
madvice(DONTNEED).  If there are concurrent writers on other CPU's that
have the unmapped/unneeded page in their TLBs, their writes to the page
could possibly get lost if a third CPU raced with the TLB flush and did
a page_mkclean() before the page was fully written.

Admittedly, if you unmap() or madvice(DONTNEED) an area _while_ another
thread is still busy writing to it, you deserve all the lost writes you
could get.  But we kernel people hold ourselves to higher quality
standards than "crazy people deserve to lose", because, well, we've seen
people do all kinds of crazy things.

So let's get it right, just because we can, and we don't have to worry
about it.

* safe-dirty-tlb-flush:
  mm: split 'tlb_flush_mmu()' into tlb flushing and memory freeing parts

Showing 6 changed files Side-by-side Diff

arch/arm/include/asm/tlb.h
... ... @@ -98,13 +98,23 @@
98 98 }
99 99 }
100 100  
101   -static inline void tlb_flush_mmu(struct mmu_gather *tlb)
  101 +static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
102 102 {
103 103 tlb_flush(tlb);
  104 +}
  105 +
  106 +static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
  107 +{
104 108 free_pages_and_swap_cache(tlb->pages, tlb->nr);
105 109 tlb->nr = 0;
106 110 if (tlb->pages == tlb->local)
107 111 __tlb_alloc_page(tlb);
  112 +}
  113 +
  114 +static inline void tlb_flush_mmu(struct mmu_gather *tlb)
  115 +{
  116 + tlb_flush_mmu_tlbonly(tlb);
  117 + tlb_flush_mmu_free(tlb);
108 118 }
109 119  
110 120 static inline void
arch/ia64/include/asm/tlb.h
... ... @@ -91,18 +91,9 @@
91 91 #define RR_RID_MASK 0x00000000ffffff00L
92 92 #define RR_TO_RID(val) ((val >> 8) & 0xffffff)
93 93  
94   -/*
95   - * Flush the TLB for address range START to END and, if not in fast mode, release the
96   - * freed pages that where gathered up to this point.
97   - */
98 94 static inline void
99   -ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
  95 +ia64_tlb_flush_mmu_tlbonly(struct mmu_gather *tlb, unsigned long start, unsigned long end)
100 96 {
101   - unsigned long i;
102   - unsigned int nr;
103   -
104   - if (!tlb->need_flush)
105   - return;
106 97 tlb->need_flush = 0;
107 98  
108 99 if (tlb->fullmm) {
... ... @@ -135,6 +126,14 @@
135 126 flush_tlb_range(&vma, ia64_thash(start), ia64_thash(end));
136 127 }
137 128  
  129 +}
  130 +
  131 +static inline void
  132 +ia64_tlb_flush_mmu_free(struct mmu_gather *tlb)
  133 +{
  134 + unsigned long i;
  135 + unsigned int nr;
  136 +
138 137 /* lastly, release the freed pages */
139 138 nr = tlb->nr;
140 139  
... ... @@ -144,6 +143,19 @@
144 143 free_page_and_swap_cache(tlb->pages[i]);
145 144 }
146 145  
  146 +/*
  147 + * Flush the TLB for address range START to END and, if not in fast mode, release the
  148 + * freed pages that where gathered up to this point.
  149 + */
  150 +static inline void
  151 +ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
  152 +{
  153 + if (!tlb->need_flush)
  154 + return;
  155 + ia64_tlb_flush_mmu_tlbonly(tlb, start, end);
  156 + ia64_tlb_flush_mmu_free(tlb);
  157 +}
  158 +
147 159 static inline void __tlb_alloc_page(struct mmu_gather *tlb)
148 160 {
149 161 unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
... ... @@ -204,6 +216,16 @@
204 216 VM_BUG_ON(tlb->nr > tlb->max);
205 217  
206 218 return tlb->max - tlb->nr;
  219 +}
  220 +
  221 +static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
  222 +{
  223 + ia64_tlb_flush_mmu_tlbonly(tlb, tlb->start_addr, tlb->end_addr);
  224 +}
  225 +
  226 +static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
  227 +{
  228 + ia64_tlb_flush_mmu_free(tlb);
207 229 }
208 230  
209 231 static inline void tlb_flush_mmu(struct mmu_gather *tlb)
arch/s390/include/asm/tlb.h
... ... @@ -59,10 +59,21 @@
59 59 tlb->batch = NULL;
60 60 }
61 61  
62   -static inline void tlb_flush_mmu(struct mmu_gather *tlb)
  62 +static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
63 63 {
64 64 __tlb_flush_mm_lazy(tlb->mm);
  65 +}
  66 +
  67 +static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
  68 +{
65 69 tlb_table_flush(tlb);
  70 +}
  71 +
  72 +
  73 +static inline void tlb_flush_mmu(struct mmu_gather *tlb)
  74 +{
  75 + tlb_flush_mmu_tlbonly(tlb);
  76 + tlb_flush_mmu_free(tlb);
66 77 }
67 78  
68 79 static inline void tlb_finish_mmu(struct mmu_gather *tlb,
arch/sh/include/asm/tlb.h
... ... @@ -86,6 +86,14 @@
86 86 }
87 87 }
88 88  
  89 +static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
  90 +{
  91 +}
  92 +
  93 +static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
  94 +{
  95 +}
  96 +
89 97 static inline void tlb_flush_mmu(struct mmu_gather *tlb)
90 98 {
91 99 }
arch/um/include/asm/tlb.h
... ... @@ -59,13 +59,25 @@
59 59 unsigned long end);
60 60  
61 61 static inline void
  62 +tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
  63 +{
  64 + flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end);
  65 +}
  66 +
  67 +static inline void
  68 +tlb_flush_mmu_free(struct mmu_gather *tlb)
  69 +{
  70 + init_tlb_gather(tlb);
  71 +}
  72 +
  73 +static inline void
62 74 tlb_flush_mmu(struct mmu_gather *tlb)
63 75 {
64 76 if (!tlb->need_flush)
65 77 return;
66 78  
67   - flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end);
68   - init_tlb_gather(tlb);
  79 + tlb_flush_mmu_tlbonly(tlb);
  80 + tlb_flush_mmu_free(tlb);
69 81 }
70 82  
71 83 /* tlb_finish_mmu
... ... @@ -232,18 +232,19 @@
232 232 #endif
233 233 }
234 234  
235   -void tlb_flush_mmu(struct mmu_gather *tlb)
  235 +static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
236 236 {
237   - struct mmu_gather_batch *batch;
238   -
239   - if (!tlb->need_flush)
240   - return;
241 237 tlb->need_flush = 0;
242 238 tlb_flush(tlb);
243 239 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
244 240 tlb_table_flush(tlb);
245 241 #endif
  242 +}
246 243  
  244 +static void tlb_flush_mmu_free(struct mmu_gather *tlb)
  245 +{
  246 + struct mmu_gather_batch *batch;
  247 +
247 248 for (batch = &tlb->local; batch; batch = batch->next) {
248 249 free_pages_and_swap_cache(batch->pages, batch->nr);
249 250 batch->nr = 0;
... ... @@ -251,6 +252,14 @@
251 252 tlb->active = &tlb->local;
252 253 }
253 254  
  255 +void tlb_flush_mmu(struct mmu_gather *tlb)
  256 +{
  257 + if (!tlb->need_flush)
  258 + return;
  259 + tlb_flush_mmu_tlbonly(tlb);
  260 + tlb_flush_mmu_free(tlb);
  261 +}
  262 +
254 263 /* tlb_finish_mmu
255 264 * Called at the end of the shootdown operation to free up any resources
256 265 * that were required.
257 266  
... ... @@ -1127,8 +1136,10 @@
1127 1136 if (PageAnon(page))
1128 1137 rss[MM_ANONPAGES]--;
1129 1138 else {
1130   - if (pte_dirty(ptent))
  1139 + if (pte_dirty(ptent)) {
  1140 + force_flush = 1;
1131 1141 set_page_dirty(page);
  1142 + }
1132 1143 if (pte_young(ptent) &&
1133 1144 likely(!(vma->vm_flags & VM_SEQ_READ)))
1134 1145 mark_page_accessed(page);
1135 1146  
... ... @@ -1137,9 +1148,10 @@
1137 1148 page_remove_rmap(page);
1138 1149 if (unlikely(page_mapcount(page) < 0))
1139 1150 print_bad_pte(vma, addr, ptent, page);
1140   - force_flush = !__tlb_remove_page(tlb, page);
1141   - if (force_flush)
  1151 + if (unlikely(!__tlb_remove_page(tlb, page))) {
  1152 + force_flush = 1;
1142 1153 break;
  1154 + }
1143 1155 continue;
1144 1156 }
1145 1157 /*
1146 1158  
1147 1159  
... ... @@ -1174,18 +1186,11 @@
1174 1186  
1175 1187 add_mm_rss_vec(mm, rss);
1176 1188 arch_leave_lazy_mmu_mode();
1177   - pte_unmap_unlock(start_pte, ptl);
1178 1189  
1179   - /*
1180   - * mmu_gather ran out of room to batch pages, we break out of
1181   - * the PTE lock to avoid doing the potential expensive TLB invalidate
1182   - * and page-free while holding it.
1183   - */
  1190 + /* Do the actual TLB flush before dropping ptl */
1184 1191 if (force_flush) {
1185 1192 unsigned long old_end;
1186 1193  
1187   - force_flush = 0;
1188   -
1189 1194 /*
1190 1195 * Flush the TLB just for the previous segment,
1191 1196 * then update the range to be the remaining
1192 1197  
... ... @@ -1193,11 +1198,21 @@
1193 1198 */
1194 1199 old_end = tlb->end;
1195 1200 tlb->end = addr;
1196   -
1197   - tlb_flush_mmu(tlb);
1198   -
  1201 + tlb_flush_mmu_tlbonly(tlb);
1199 1202 tlb->start = addr;
1200 1203 tlb->end = old_end;
  1204 + }
  1205 + pte_unmap_unlock(start_pte, ptl);
  1206 +
  1207 + /*
  1208 + * If we forced a TLB flush (either due to running out of
  1209 + * batch buffers or because we needed to flush dirty TLB
  1210 + * entries before releasing the ptl), free the batched
  1211 + * memory too. Restart if we didn't do everything.
  1212 + */
  1213 + if (force_flush) {
  1214 + force_flush = 0;
  1215 + tlb_flush_mmu_free(tlb);
1201 1216  
1202 1217 if (addr != end)
1203 1218 goto again;