Commit acf92b485cccf028177f46918e045c0c4e80ee10

Authored by Dave Chinner
Committed by Al Viro
1 parent 095760730c

vmscan: shrinker->nr updates race and go wrong

shrink_slab() allows shrinkers to be called in parallel so the
struct shrinker can be updated concurrently. It does not provide any
exclusio for such updates, so we can get the shrinker->nr value
increasing or decreasing incorrectly.

As a result, when a shrinker repeatedly returns a value of -1 (e.g.
a VFS shrinker called w/ GFP_NOFS), the shrinker->nr goes haywire,
sometimes updating with the scan count that wasn't used, sometimes
losing it altogether. Worse is when a shrinker does work and that
update is lost due to racy updates, which means the shrinker will do
the work again!

Fix this by making the total_scan calculations independent of
shrinker->nr, and making the shrinker->nr updates atomic w.r.t. to
other updates via cmpxchg loops.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Showing 1 changed file with 32 additions and 13 deletions Side-by-side Diff

... ... @@ -251,17 +251,29 @@
251 251 unsigned long total_scan;
252 252 unsigned long max_pass;
253 253 int shrink_ret = 0;
  254 + long nr;
  255 + long new_nr;
254 256  
  257 + /*
  258 + * copy the current shrinker scan count into a local variable
  259 + * and zero it so that other concurrent shrinker invocations
  260 + * don't also do this scanning work.
  261 + */
  262 + do {
  263 + nr = shrinker->nr;
  264 + } while (cmpxchg(&shrinker->nr, nr, 0) != nr);
  265 +
  266 + total_scan = nr;
255 267 max_pass = do_shrinker_shrink(shrinker, shrink, 0);
256 268 delta = (4 * nr_pages_scanned) / shrinker->seeks;
257 269 delta *= max_pass;
258 270 do_div(delta, lru_pages + 1);
259   - shrinker->nr += delta;
260   - if (shrinker->nr < 0) {
  271 + total_scan += delta;
  272 + if (total_scan < 0) {
261 273 printk(KERN_ERR "shrink_slab: %pF negative objects to "
262 274 "delete nr=%ld\n",
263   - shrinker->shrink, shrinker->nr);
264   - shrinker->nr = max_pass;
  275 + shrinker->shrink, total_scan);
  276 + total_scan = max_pass;
265 277 }
266 278  
267 279 /*
268 280  
... ... @@ -269,13 +281,10 @@
269 281 * never try to free more than twice the estimate number of
270 282 * freeable entries.
271 283 */
272   - if (shrinker->nr > max_pass * 2)
273   - shrinker->nr = max_pass * 2;
  284 + if (total_scan > max_pass * 2)
  285 + total_scan = max_pass * 2;
274 286  
275   - total_scan = shrinker->nr;
276   - shrinker->nr = 0;
277   -
278   - trace_mm_shrink_slab_start(shrinker, shrink, total_scan,
  287 + trace_mm_shrink_slab_start(shrinker, shrink, nr,
279 288 nr_pages_scanned, lru_pages,
280 289 max_pass, delta, total_scan);
281 290  
... ... @@ -296,9 +305,19 @@
296 305 cond_resched();
297 306 }
298 307  
299   - shrinker->nr += total_scan;
300   - trace_mm_shrink_slab_end(shrinker, shrink_ret, total_scan,
301   - shrinker->nr);
  308 + /*
  309 + * move the unused scan count back into the shrinker in a
  310 + * manner that handles concurrent updates. If we exhausted the
  311 + * scan, there is no need to do an update.
  312 + */
  313 + do {
  314 + nr = shrinker->nr;
  315 + new_nr = total_scan + nr;
  316 + if (total_scan <= 0)
  317 + break;
  318 + } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr);
  319 +
  320 + trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
302 321 }
303 322 up_read(&shrinker_rwsem);
304 323 out: