Commit 833af8427be4b217b5bc522f61afdbd3f1d282c2

Authored by Tejun Heo
1 parent 4a6cc4bd32

percpu: restructure pcpu_extend_area_map() to fix bugs and improve readability

pcpu_extend_area_map() had the following two bugs.

* It should return 1 if pcpu_lock was dropped and reacquired but it
  returned 0.  This could lead to oops if free_percpu() races with
  area map extension.

* pcpu_mem_free() was called under pcpu_lock.  pcpu_mem_free() might
  end up calling vfree() which isn't IRQ safe.  This could lead to
  deadlock through lock order inversion via IRQ.

In addition, Linus pointed out that the temporary lock dropping and
subtle three-way return value of pcpu_extend_area_map() was very ugly
and suggested to split the function into two - pcpu_need_to_extend()
and pcpu_extend_area_map().

This patch restructures pcpu_extend_area_map() as suggested and fixes
the two bugs.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>

Showing 1 changed file with 81 additions and 40 deletions Side-by-side Diff

... ... @@ -355,62 +355,86 @@
355 355 }
356 356  
357 357 /**
358   - * pcpu_extend_area_map - extend area map for allocation
359   - * @chunk: target chunk
  358 + * pcpu_need_to_extend - determine whether chunk area map needs to be extended
  359 + * @chunk: chunk of interest
360 360 *
361   - * Extend area map of @chunk so that it can accomodate an allocation.
362   - * A single allocation can split an area into three areas, so this
363   - * function makes sure that @chunk->map has at least two extra slots.
  361 + * Determine whether area map of @chunk needs to be extended to
  362 + * accomodate a new allocation.
364 363 *
365 364 * CONTEXT:
366   - * pcpu_alloc_mutex, pcpu_lock. pcpu_lock is released and reacquired
367   - * if area map is extended.
  365 + * pcpu_lock.
368 366 *
369 367 * RETURNS:
370   - * 0 if noop, 1 if successfully extended, -errno on failure.
  368 + * New target map allocation length if extension is necessary, 0
  369 + * otherwise.
371 370 */
372   -static int pcpu_extend_area_map(struct pcpu_chunk *chunk, unsigned long *flags)
  371 +static int pcpu_need_to_extend(struct pcpu_chunk *chunk)
373 372 {
374 373 int new_alloc;
375   - int *new;
376   - size_t size;
377 374  
378   - /* has enough? */
379 375 if (chunk->map_alloc >= chunk->map_used + 2)
380 376 return 0;
381 377  
382   - spin_unlock_irqrestore(&pcpu_lock, *flags);
383   -
384 378 new_alloc = PCPU_DFL_MAP_ALLOC;
385 379 while (new_alloc < chunk->map_used + 2)
386 380 new_alloc *= 2;
387 381  
388   - new = pcpu_mem_alloc(new_alloc * sizeof(new[0]));
389   - if (!new) {
390   - spin_lock_irqsave(&pcpu_lock, *flags);
  382 + return new_alloc;
  383 +}
  384 +
  385 +/**
  386 + * pcpu_extend_area_map - extend area map of a chunk
  387 + * @chunk: chunk of interest
  388 + * @new_alloc: new target allocation length of the area map
  389 + *
  390 + * Extend area map of @chunk to have @new_alloc entries.
  391 + *
  392 + * CONTEXT:
  393 + * Does GFP_KERNEL allocation. Grabs and releases pcpu_lock.
  394 + *
  395 + * RETURNS:
  396 + * 0 on success, -errno on failure.
  397 + */
  398 +static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
  399 +{
  400 + int *old = NULL, *new = NULL;
  401 + size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
  402 + unsigned long flags;
  403 +
  404 + new = pcpu_mem_alloc(new_size);
  405 + if (!new)
391 406 return -ENOMEM;
392   - }
393 407  
394   - /*
395   - * Acquire pcpu_lock and switch to new area map. Only free
396   - * could have happened inbetween, so map_used couldn't have
397   - * grown.
398   - */
399   - spin_lock_irqsave(&pcpu_lock, *flags);
400   - BUG_ON(new_alloc < chunk->map_used + 2);
  408 + /* acquire pcpu_lock and switch to new area map */
  409 + spin_lock_irqsave(&pcpu_lock, flags);
401 410  
402   - size = chunk->map_alloc * sizeof(chunk->map[0]);
403   - memcpy(new, chunk->map, size);
  411 + if (new_alloc <= chunk->map_alloc)
  412 + goto out_unlock;
404 413  
  414 + old_size = chunk->map_alloc * sizeof(chunk->map[0]);
  415 + memcpy(new, chunk->map, old_size);
  416 +
405 417 /*
406 418 * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is
407 419 * one of the first chunks and still using static map.
408 420 */
409 421 if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC)
410   - pcpu_mem_free(chunk->map, size);
  422 + old = chunk->map;
411 423  
412 424 chunk->map_alloc = new_alloc;
413 425 chunk->map = new;
  426 + new = NULL;
  427 +
  428 +out_unlock:
  429 + spin_unlock_irqrestore(&pcpu_lock, flags);
  430 +
  431 + /*
  432 + * pcpu_mem_free() might end up calling vfree() which uses
  433 + * IRQ-unsafe lock and thus can't be called under pcpu_lock.
  434 + */
  435 + pcpu_mem_free(old, old_size);
  436 + pcpu_mem_free(new, new_size);
  437 +
414 438 return 0;
415 439 }
416 440  
... ... @@ -1049,7 +1073,7 @@
1049 1073 static int warn_limit = 10;
1050 1074 struct pcpu_chunk *chunk;
1051 1075 const char *err;
1052   - int slot, off;
  1076 + int slot, off, new_alloc;
1053 1077 unsigned long flags;
1054 1078  
1055 1079 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
1056 1080  
1057 1081  
... ... @@ -1064,14 +1088,25 @@
1064 1088 /* serve reserved allocations from the reserved chunk if available */
1065 1089 if (reserved && pcpu_reserved_chunk) {
1066 1090 chunk = pcpu_reserved_chunk;
1067   - if (size > chunk->contig_hint ||
1068   - pcpu_extend_area_map(chunk, &flags) < 0) {
1069   - err = "failed to extend area map of reserved chunk";
  1091 +
  1092 + if (size > chunk->contig_hint) {
  1093 + err = "alloc from reserved chunk failed";
1070 1094 goto fail_unlock;
1071 1095 }
  1096 +
  1097 + while ((new_alloc = pcpu_need_to_extend(chunk))) {
  1098 + spin_unlock_irqrestore(&pcpu_lock, flags);
  1099 + if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
  1100 + err = "failed to extend area map of reserved chunk";
  1101 + goto fail_unlock_mutex;
  1102 + }
  1103 + spin_lock_irqsave(&pcpu_lock, flags);
  1104 + }
  1105 +
1072 1106 off = pcpu_alloc_area(chunk, size, align);
1073 1107 if (off >= 0)
1074 1108 goto area_found;
  1109 +
1075 1110 err = "alloc from reserved chunk failed";
1076 1111 goto fail_unlock;
1077 1112 }
... ... @@ -1083,14 +1118,20 @@
1083 1118 if (size > chunk->contig_hint)
1084 1119 continue;
1085 1120  
1086   - switch (pcpu_extend_area_map(chunk, &flags)) {
1087   - case 0:
1088   - break;
1089   - case 1:
1090   - goto restart; /* pcpu_lock dropped, restart */
1091   - default:
1092   - err = "failed to extend area map";
1093   - goto fail_unlock;
  1121 + new_alloc = pcpu_need_to_extend(chunk);
  1122 + if (new_alloc) {
  1123 + spin_unlock_irqrestore(&pcpu_lock, flags);
  1124 + if (pcpu_extend_area_map(chunk,
  1125 + new_alloc) < 0) {
  1126 + err = "failed to extend area map";
  1127 + goto fail_unlock_mutex;
  1128 + }
  1129 + spin_lock_irqsave(&pcpu_lock, flags);
  1130 + /*
  1131 + * pcpu_lock has been dropped, need to
  1132 + * restart cpu_slot list walking.
  1133 + */
  1134 + goto restart;
1094 1135 }
1095 1136  
1096 1137 off = pcpu_alloc_area(chunk, size, align);