Commit 723ad1d90b5663ab623bb3bfba3e4ee7101795d7
Committed by
Tejun Heo
1 parent
706c16f237
Exists in
master
and in
13 other branches
percpu: store offsets instead of lengths in ->map[]
Current code keeps +-length for each area in chunk->map[]. It has several unpleasant consequences: * even if we know that first 50 areas are all in use, allocation still needs to go through all those areas just to sum their sizes, just to get the offset of free one. * freeing needs to find the array entry refering to the area in question; again, the need to sum the sizes until we reach the offset we are interested in. Note that offsets are monotonous, so simple binary search would do here. New data representation: array of <offset,in-use flag> pairs. Each pair is represented by one int - we use offset|1 for <offset, in use> and offset for <offset, free> (we make sure that all offsets are even). In the end we put a sentry entry - <total size, in use>. The first entry is <0, flag>; it would be possible to store together the flag for Nth area and offset for N+1st, but that leads to much hairier code. In other words, where the old variant would have 4, -8, -4, 4, -12, 100 (4 bytes free, 8 in use, 4 in use, 4 free, 12 in use, 100 free) we store <0,0>, <4,1>, <12,1>, <16,0>, <20,1>, <32,0>, <132,1> i.e. 0, 5, 13, 16, 21, 32, 133 This commit switches to new data representation and takes care of a couple of low-hanging fruits in free_pcpu_area() - one is the switch to binary search, another is not doing two memmove() when one would do. Speeding the alloc side up (by keeping track of how many areas in the beginning are known to be all in use) also becomes possible - that'll be done in the next commit. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Tejun Heo <tj@kernel.org>
Showing 1 changed file with 81 additions and 55 deletions Side-by-side Diff
mm/percpu.c
... | ... | @@ -102,7 +102,7 @@ |
102 | 102 | int free_size; /* free bytes in the chunk */ |
103 | 103 | int contig_hint; /* max contiguous size hint */ |
104 | 104 | void *base_addr; /* base address of this chunk */ |
105 | - int map_used; /* # of map entries used */ | |
105 | + int map_used; /* # of map entries used before the sentry */ | |
106 | 106 | int map_alloc; /* # of map entries allocated */ |
107 | 107 | int *map; /* allocation map */ |
108 | 108 | void *data; /* chunk data */ |
109 | 109 | |
... | ... | @@ -356,11 +356,11 @@ |
356 | 356 | { |
357 | 357 | int new_alloc; |
358 | 358 | |
359 | - if (chunk->map_alloc >= chunk->map_used + 2) | |
359 | + if (chunk->map_alloc >= chunk->map_used + 3) | |
360 | 360 | return 0; |
361 | 361 | |
362 | 362 | new_alloc = PCPU_DFL_MAP_ALLOC; |
363 | - while (new_alloc < chunk->map_used + 2) | |
363 | + while (new_alloc < chunk->map_used + 3) | |
364 | 364 | new_alloc *= 2; |
365 | 365 | |
366 | 366 | return new_alloc; |
367 | 367 | |
368 | 368 | |
369 | 369 | |
370 | 370 | |
371 | 371 | |
372 | 372 | |
... | ... | @@ -441,20 +441,23 @@ |
441 | 441 | int oslot = pcpu_chunk_slot(chunk); |
442 | 442 | int max_contig = 0; |
443 | 443 | int i, off; |
444 | + int *p; | |
444 | 445 | |
445 | - for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) { | |
446 | - bool is_last = i + 1 == chunk->map_used; | |
446 | + for (i = 0, p = chunk->map; i < chunk->map_used; i++, p++) { | |
447 | 447 | int head, tail; |
448 | + int this_size; | |
448 | 449 | |
450 | + off = *p; | |
451 | + if (off & 1) | |
452 | + continue; | |
453 | + | |
449 | 454 | /* extra for alignment requirement */ |
450 | 455 | head = ALIGN(off, align) - off; |
451 | - BUG_ON(i == 0 && head != 0); | |
452 | 456 | |
453 | - if (chunk->map[i] < 0) | |
457 | + this_size = (p[1] & ~1) - off; | |
458 | + if (this_size < head + size) { | |
459 | + max_contig = max(this_size, max_contig); | |
454 | 460 | continue; |
455 | - if (chunk->map[i] < head + size) { | |
456 | - max_contig = max(chunk->map[i], max_contig); | |
457 | - continue; | |
458 | 461 | } |
459 | 462 | |
460 | 463 | /* |
461 | 464 | |
462 | 465 | |
463 | 466 | |
464 | 467 | |
465 | 468 | |
466 | 469 | |
467 | 470 | |
468 | 471 | |
... | ... | @@ -463,55 +466,50 @@ |
463 | 466 | * than sizeof(int), which is very small but isn't too |
464 | 467 | * uncommon for percpu allocations. |
465 | 468 | */ |
466 | - if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) { | |
467 | - if (chunk->map[i - 1] > 0) | |
468 | - chunk->map[i - 1] += head; | |
469 | - else { | |
470 | - chunk->map[i - 1] -= head; | |
469 | + if (head && (head < sizeof(int) || !(p[-1] & 1))) { | |
470 | + if (p[-1] & 1) | |
471 | 471 | chunk->free_size -= head; |
472 | - } | |
473 | - chunk->map[i] -= head; | |
474 | - off += head; | |
472 | + *p = off += head; | |
473 | + this_size -= head; | |
475 | 474 | head = 0; |
476 | 475 | } |
477 | 476 | |
478 | 477 | /* if tail is small, just keep it around */ |
479 | - tail = chunk->map[i] - head - size; | |
480 | - if (tail < sizeof(int)) | |
478 | + tail = this_size - head - size; | |
479 | + if (tail < sizeof(int)) { | |
481 | 480 | tail = 0; |
481 | + size = this_size - head; | |
482 | + } | |
482 | 483 | |
483 | 484 | /* split if warranted */ |
484 | 485 | if (head || tail) { |
485 | 486 | int nr_extra = !!head + !!tail; |
486 | 487 | |
487 | 488 | /* insert new subblocks */ |
488 | - memmove(&chunk->map[i + nr_extra], &chunk->map[i], | |
489 | + memmove(p + nr_extra + 1, p + 1, | |
489 | 490 | sizeof(chunk->map[0]) * (chunk->map_used - i)); |
490 | 491 | chunk->map_used += nr_extra; |
491 | 492 | |
492 | 493 | if (head) { |
493 | - chunk->map[i + 1] = chunk->map[i] - head; | |
494 | - chunk->map[i] = head; | |
495 | - off += head; | |
496 | - i++; | |
494 | + *++p = off += head; | |
495 | + ++i; | |
497 | 496 | max_contig = max(head, max_contig); |
498 | 497 | } |
499 | 498 | if (tail) { |
500 | - chunk->map[i] -= tail; | |
501 | - chunk->map[i + 1] = tail; | |
499 | + p[1] = off + size; | |
502 | 500 | max_contig = max(tail, max_contig); |
503 | 501 | } |
504 | 502 | } |
505 | 503 | |
506 | 504 | /* update hint and mark allocated */ |
507 | - if (is_last) | |
505 | + if (i + 1 == chunk->map_used) | |
508 | 506 | chunk->contig_hint = max_contig; /* fully scanned */ |
509 | 507 | else |
510 | 508 | chunk->contig_hint = max(chunk->contig_hint, |
511 | 509 | max_contig); |
512 | 510 | |
513 | - chunk->free_size -= chunk->map[i]; | |
514 | - chunk->map[i] = -chunk->map[i]; | |
511 | + chunk->free_size -= size; | |
512 | + *p |= 1; | |
515 | 513 | |
516 | 514 | pcpu_chunk_relocate(chunk, oslot); |
517 | 515 | return off; |
518 | 516 | |
519 | 517 | |
520 | 518 | |
521 | 519 | |
522 | 520 | |
523 | 521 | |
524 | 522 | |
525 | 523 | |
... | ... | @@ -539,34 +537,47 @@ |
539 | 537 | static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) |
540 | 538 | { |
541 | 539 | int oslot = pcpu_chunk_slot(chunk); |
542 | - int i, off; | |
540 | + int off = 0; | |
541 | + unsigned i, j; | |
542 | + int to_free = 0; | |
543 | + int *p; | |
543 | 544 | |
544 | - for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) | |
545 | - if (off == freeme) | |
546 | - break; | |
545 | + freeme |= 1; /* we are searching for <given offset, in use> pair */ | |
546 | + | |
547 | + i = 0; | |
548 | + j = chunk->map_used; | |
549 | + while (i != j) { | |
550 | + unsigned k = (i + j) / 2; | |
551 | + off = chunk->map[k]; | |
552 | + if (off < freeme) | |
553 | + i = k + 1; | |
554 | + else if (off > freeme) | |
555 | + j = k; | |
556 | + else | |
557 | + i = j = k; | |
558 | + } | |
547 | 559 | BUG_ON(off != freeme); |
548 | - BUG_ON(chunk->map[i] > 0); | |
549 | 560 | |
550 | - chunk->map[i] = -chunk->map[i]; | |
551 | - chunk->free_size += chunk->map[i]; | |
561 | + p = chunk->map + i; | |
562 | + *p = off &= ~1; | |
563 | + chunk->free_size += (p[1] & ~1) - off; | |
552 | 564 | |
565 | + /* merge with next? */ | |
566 | + if (!(p[1] & 1)) | |
567 | + to_free++; | |
553 | 568 | /* merge with previous? */ |
554 | - if (i > 0 && chunk->map[i - 1] >= 0) { | |
555 | - chunk->map[i - 1] += chunk->map[i]; | |
556 | - chunk->map_used--; | |
557 | - memmove(&chunk->map[i], &chunk->map[i + 1], | |
558 | - (chunk->map_used - i) * sizeof(chunk->map[0])); | |
569 | + if (i > 0 && !(p[-1] & 1)) { | |
570 | + to_free++; | |
559 | 571 | i--; |
572 | + p--; | |
560 | 573 | } |
561 | - /* merge with next? */ | |
562 | - if (i + 1 < chunk->map_used && chunk->map[i + 1] >= 0) { | |
563 | - chunk->map[i] += chunk->map[i + 1]; | |
564 | - chunk->map_used--; | |
565 | - memmove(&chunk->map[i + 1], &chunk->map[i + 2], | |
566 | - (chunk->map_used - (i + 1)) * sizeof(chunk->map[0])); | |
574 | + if (to_free) { | |
575 | + chunk->map_used -= to_free; | |
576 | + memmove(p + 1, p + 1 + to_free, | |
577 | + (chunk->map_used - i) * sizeof(chunk->map[0])); | |
567 | 578 | } |
568 | 579 | |
569 | - chunk->contig_hint = max(chunk->map[i], chunk->contig_hint); | |
580 | + chunk->contig_hint = max(chunk->map[i + 1] - chunk->map[i] - 1, chunk->contig_hint); | |
570 | 581 | pcpu_chunk_relocate(chunk, oslot); |
571 | 582 | } |
572 | 583 | |
... | ... | @@ -586,7 +597,9 @@ |
586 | 597 | } |
587 | 598 | |
588 | 599 | chunk->map_alloc = PCPU_DFL_MAP_ALLOC; |
589 | - chunk->map[chunk->map_used++] = pcpu_unit_size; | |
600 | + chunk->map[0] = 0; | |
601 | + chunk->map[1] = pcpu_unit_size | 1; | |
602 | + chunk->map_used = 1; | |
590 | 603 | |
591 | 604 | INIT_LIST_HEAD(&chunk->list); |
592 | 605 | chunk->free_size = pcpu_unit_size; |
... | ... | @@ -682,6 +695,13 @@ |
682 | 695 | unsigned long flags; |
683 | 696 | void __percpu *ptr; |
684 | 697 | |
698 | + /* | |
699 | + * We want the lowest bit of offset available for in-use/free | |
700 | + * indicator. | |
701 | + */ | |
702 | + if (unlikely(align < 2)) | |
703 | + align = 2; | |
704 | + | |
685 | 705 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { |
686 | 706 | WARN(true, "illegal size (%zu) or align (%zu) for " |
687 | 707 | "percpu allocation\n", size, align); |
688 | 708 | |
... | ... | @@ -1312,9 +1332,13 @@ |
1312 | 1332 | } |
1313 | 1333 | schunk->contig_hint = schunk->free_size; |
1314 | 1334 | |
1315 | - schunk->map[schunk->map_used++] = -ai->static_size; | |
1335 | + schunk->map[0] = 1; | |
1336 | + schunk->map[1] = ai->static_size; | |
1337 | + schunk->map_used = 1; | |
1316 | 1338 | if (schunk->free_size) |
1317 | - schunk->map[schunk->map_used++] = schunk->free_size; | |
1339 | + schunk->map[++schunk->map_used] = 1 | (ai->static_size + schunk->free_size); | |
1340 | + else | |
1341 | + schunk->map[1] |= 1; | |
1318 | 1342 | |
1319 | 1343 | /* init dynamic chunk if necessary */ |
1320 | 1344 | if (dyn_size) { |
... | ... | @@ -1327,8 +1351,10 @@ |
1327 | 1351 | bitmap_fill(dchunk->populated, pcpu_unit_pages); |
1328 | 1352 | |
1329 | 1353 | dchunk->contig_hint = dchunk->free_size = dyn_size; |
1330 | - dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; | |
1331 | - dchunk->map[dchunk->map_used++] = dchunk->free_size; | |
1354 | + dchunk->map[0] = 1; | |
1355 | + dchunk->map[1] = pcpu_reserved_chunk_limit; | |
1356 | + dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1; | |
1357 | + dchunk->map_used = 2; | |
1332 | 1358 | } |
1333 | 1359 | |
1334 | 1360 | /* link the first chunk in */ |