Commit acf92b485cccf028177f46918e045c0c4e80ee10
Committed by
Al Viro
1 parent
095760730c
Exists in
master
and in
6 other branches
vmscan: shrinker->nr updates race and go wrong
shrink_slab() allows shrinkers to be called in parallel so the struct shrinker can be updated concurrently. It does not provide any exclusio for such updates, so we can get the shrinker->nr value increasing or decreasing incorrectly. As a result, when a shrinker repeatedly returns a value of -1 (e.g. a VFS shrinker called w/ GFP_NOFS), the shrinker->nr goes haywire, sometimes updating with the scan count that wasn't used, sometimes losing it altogether. Worse is when a shrinker does work and that update is lost due to racy updates, which means the shrinker will do the work again! Fix this by making the total_scan calculations independent of shrinker->nr, and making the shrinker->nr updates atomic w.r.t. to other updates via cmpxchg loops. Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Showing 1 changed file with 32 additions and 13 deletions Side-by-side Diff
mm/vmscan.c
... | ... | @@ -251,17 +251,29 @@ |
251 | 251 | unsigned long total_scan; |
252 | 252 | unsigned long max_pass; |
253 | 253 | int shrink_ret = 0; |
254 | + long nr; | |
255 | + long new_nr; | |
254 | 256 | |
257 | + /* | |
258 | + * copy the current shrinker scan count into a local variable | |
259 | + * and zero it so that other concurrent shrinker invocations | |
260 | + * don't also do this scanning work. | |
261 | + */ | |
262 | + do { | |
263 | + nr = shrinker->nr; | |
264 | + } while (cmpxchg(&shrinker->nr, nr, 0) != nr); | |
265 | + | |
266 | + total_scan = nr; | |
255 | 267 | max_pass = do_shrinker_shrink(shrinker, shrink, 0); |
256 | 268 | delta = (4 * nr_pages_scanned) / shrinker->seeks; |
257 | 269 | delta *= max_pass; |
258 | 270 | do_div(delta, lru_pages + 1); |
259 | - shrinker->nr += delta; | |
260 | - if (shrinker->nr < 0) { | |
271 | + total_scan += delta; | |
272 | + if (total_scan < 0) { | |
261 | 273 | printk(KERN_ERR "shrink_slab: %pF negative objects to " |
262 | 274 | "delete nr=%ld\n", |
263 | - shrinker->shrink, shrinker->nr); | |
264 | - shrinker->nr = max_pass; | |
275 | + shrinker->shrink, total_scan); | |
276 | + total_scan = max_pass; | |
265 | 277 | } |
266 | 278 | |
267 | 279 | /* |
268 | 280 | |
... | ... | @@ -269,13 +281,10 @@ |
269 | 281 | * never try to free more than twice the estimate number of |
270 | 282 | * freeable entries. |
271 | 283 | */ |
272 | - if (shrinker->nr > max_pass * 2) | |
273 | - shrinker->nr = max_pass * 2; | |
284 | + if (total_scan > max_pass * 2) | |
285 | + total_scan = max_pass * 2; | |
274 | 286 | |
275 | - total_scan = shrinker->nr; | |
276 | - shrinker->nr = 0; | |
277 | - | |
278 | - trace_mm_shrink_slab_start(shrinker, shrink, total_scan, | |
287 | + trace_mm_shrink_slab_start(shrinker, shrink, nr, | |
279 | 288 | nr_pages_scanned, lru_pages, |
280 | 289 | max_pass, delta, total_scan); |
281 | 290 | |
... | ... | @@ -296,9 +305,19 @@ |
296 | 305 | cond_resched(); |
297 | 306 | } |
298 | 307 | |
299 | - shrinker->nr += total_scan; | |
300 | - trace_mm_shrink_slab_end(shrinker, shrink_ret, total_scan, | |
301 | - shrinker->nr); | |
308 | + /* | |
309 | + * move the unused scan count back into the shrinker in a | |
310 | + * manner that handles concurrent updates. If we exhausted the | |
311 | + * scan, there is no need to do an update. | |
312 | + */ | |
313 | + do { | |
314 | + nr = shrinker->nr; | |
315 | + new_nr = total_scan + nr; | |
316 | + if (total_scan <= 0) | |
317 | + break; | |
318 | + } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr); | |
319 | + | |
320 | + trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); | |
302 | 321 | } |
303 | 322 | up_read(&shrinker_rwsem); |
304 | 323 | out: |