Commit 723ad1d90b5663ab623bb3bfba3e4ee7101795d7

Authored by Al Viro
Committed by Tejun Heo
1 parent 706c16f237

percpu: store offsets instead of lengths in ->map[]

Current code keeps +-length for each area in chunk->map[].  It has
several unpleasant consequences:
	* even if we know that first 50 areas are all in use, allocation
still needs to go through all those areas just to sum their sizes, just
to get the offset of free one.
	* freeing needs to find the array entry refering to the area
in question; again, the need to sum the sizes until we reach the offset
we are interested in.  Note that offsets are monotonous, so simple
binary search would do here.

	New data representation: array of <offset,in-use flag> pairs.
Each pair is represented by one int - we use offset|1 for <offset, in use>
and offset for <offset, free> (we make sure that all offsets are even).
In the end we put a sentry entry - <total size, in use>.  The first
entry is <0, flag>; it would be possible to store together the flag
for Nth area and offset for N+1st, but that leads to much hairier code.

In other words, where the old variant would have
	4, -8, -4, 4, -12, 100
(4 bytes free, 8 in use, 4 in use, 4 free, 12 in use, 100 free) we store
	<0,0>, <4,1>, <12,1>, <16,0>, <20,1>, <32,0>, <132,1>
i.e.
	0, 5, 13, 16, 21, 32, 133

This commit switches to new data representation and takes care of a couple
of low-hanging fruits in free_pcpu_area() - one is the switch to binary
search, another is not doing two memmove() when one would do.  Speeding
the alloc side up (by keeping track of how many areas in the beginning are
known to be all in use) also becomes possible - that'll be done in the next
commit.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Tejun Heo <tj@kernel.org>

Showing 1 changed file with 81 additions and 55 deletions Side-by-side Diff

... ... @@ -102,7 +102,7 @@
102 102 int free_size; /* free bytes in the chunk */
103 103 int contig_hint; /* max contiguous size hint */
104 104 void *base_addr; /* base address of this chunk */
105   - int map_used; /* # of map entries used */
  105 + int map_used; /* # of map entries used before the sentry */
106 106 int map_alloc; /* # of map entries allocated */
107 107 int *map; /* allocation map */
108 108 void *data; /* chunk data */
109 109  
... ... @@ -356,11 +356,11 @@
356 356 {
357 357 int new_alloc;
358 358  
359   - if (chunk->map_alloc >= chunk->map_used + 2)
  359 + if (chunk->map_alloc >= chunk->map_used + 3)
360 360 return 0;
361 361  
362 362 new_alloc = PCPU_DFL_MAP_ALLOC;
363   - while (new_alloc < chunk->map_used + 2)
  363 + while (new_alloc < chunk->map_used + 3)
364 364 new_alloc *= 2;
365 365  
366 366 return new_alloc;
367 367  
368 368  
369 369  
370 370  
371 371  
372 372  
... ... @@ -441,20 +441,23 @@
441 441 int oslot = pcpu_chunk_slot(chunk);
442 442 int max_contig = 0;
443 443 int i, off;
  444 + int *p;
444 445  
445   - for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) {
446   - bool is_last = i + 1 == chunk->map_used;
  446 + for (i = 0, p = chunk->map; i < chunk->map_used; i++, p++) {
447 447 int head, tail;
  448 + int this_size;
448 449  
  450 + off = *p;
  451 + if (off & 1)
  452 + continue;
  453 +
449 454 /* extra for alignment requirement */
450 455 head = ALIGN(off, align) - off;
451   - BUG_ON(i == 0 && head != 0);
452 456  
453   - if (chunk->map[i] < 0)
  457 + this_size = (p[1] & ~1) - off;
  458 + if (this_size < head + size) {
  459 + max_contig = max(this_size, max_contig);
454 460 continue;
455   - if (chunk->map[i] < head + size) {
456   - max_contig = max(chunk->map[i], max_contig);
457   - continue;
458 461 }
459 462  
460 463 /*
461 464  
462 465  
463 466  
464 467  
465 468  
466 469  
467 470  
468 471  
... ... @@ -463,55 +466,50 @@
463 466 * than sizeof(int), which is very small but isn't too
464 467 * uncommon for percpu allocations.
465 468 */
466   - if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) {
467   - if (chunk->map[i - 1] > 0)
468   - chunk->map[i - 1] += head;
469   - else {
470   - chunk->map[i - 1] -= head;
  469 + if (head && (head < sizeof(int) || !(p[-1] & 1))) {
  470 + if (p[-1] & 1)
471 471 chunk->free_size -= head;
472   - }
473   - chunk->map[i] -= head;
474   - off += head;
  472 + *p = off += head;
  473 + this_size -= head;
475 474 head = 0;
476 475 }
477 476  
478 477 /* if tail is small, just keep it around */
479   - tail = chunk->map[i] - head - size;
480   - if (tail < sizeof(int))
  478 + tail = this_size - head - size;
  479 + if (tail < sizeof(int)) {
481 480 tail = 0;
  481 + size = this_size - head;
  482 + }
482 483  
483 484 /* split if warranted */
484 485 if (head || tail) {
485 486 int nr_extra = !!head + !!tail;
486 487  
487 488 /* insert new subblocks */
488   - memmove(&chunk->map[i + nr_extra], &chunk->map[i],
  489 + memmove(p + nr_extra + 1, p + 1,
489 490 sizeof(chunk->map[0]) * (chunk->map_used - i));
490 491 chunk->map_used += nr_extra;
491 492  
492 493 if (head) {
493   - chunk->map[i + 1] = chunk->map[i] - head;
494   - chunk->map[i] = head;
495   - off += head;
496   - i++;
  494 + *++p = off += head;
  495 + ++i;
497 496 max_contig = max(head, max_contig);
498 497 }
499 498 if (tail) {
500   - chunk->map[i] -= tail;
501   - chunk->map[i + 1] = tail;
  499 + p[1] = off + size;
502 500 max_contig = max(tail, max_contig);
503 501 }
504 502 }
505 503  
506 504 /* update hint and mark allocated */
507   - if (is_last)
  505 + if (i + 1 == chunk->map_used)
508 506 chunk->contig_hint = max_contig; /* fully scanned */
509 507 else
510 508 chunk->contig_hint = max(chunk->contig_hint,
511 509 max_contig);
512 510  
513   - chunk->free_size -= chunk->map[i];
514   - chunk->map[i] = -chunk->map[i];
  511 + chunk->free_size -= size;
  512 + *p |= 1;
515 513  
516 514 pcpu_chunk_relocate(chunk, oslot);
517 515 return off;
518 516  
519 517  
520 518  
521 519  
522 520  
523 521  
524 522  
525 523  
... ... @@ -539,34 +537,47 @@
539 537 static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
540 538 {
541 539 int oslot = pcpu_chunk_slot(chunk);
542   - int i, off;
  540 + int off = 0;
  541 + unsigned i, j;
  542 + int to_free = 0;
  543 + int *p;
543 544  
544   - for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++]))
545   - if (off == freeme)
546   - break;
  545 + freeme |= 1; /* we are searching for <given offset, in use> pair */
  546 +
  547 + i = 0;
  548 + j = chunk->map_used;
  549 + while (i != j) {
  550 + unsigned k = (i + j) / 2;
  551 + off = chunk->map[k];
  552 + if (off < freeme)
  553 + i = k + 1;
  554 + else if (off > freeme)
  555 + j = k;
  556 + else
  557 + i = j = k;
  558 + }
547 559 BUG_ON(off != freeme);
548   - BUG_ON(chunk->map[i] > 0);
549 560  
550   - chunk->map[i] = -chunk->map[i];
551   - chunk->free_size += chunk->map[i];
  561 + p = chunk->map + i;
  562 + *p = off &= ~1;
  563 + chunk->free_size += (p[1] & ~1) - off;
552 564  
  565 + /* merge with next? */
  566 + if (!(p[1] & 1))
  567 + to_free++;
553 568 /* merge with previous? */
554   - if (i > 0 && chunk->map[i - 1] >= 0) {
555   - chunk->map[i - 1] += chunk->map[i];
556   - chunk->map_used--;
557   - memmove(&chunk->map[i], &chunk->map[i + 1],
558   - (chunk->map_used - i) * sizeof(chunk->map[0]));
  569 + if (i > 0 && !(p[-1] & 1)) {
  570 + to_free++;
559 571 i--;
  572 + p--;
560 573 }
561   - /* merge with next? */
562   - if (i + 1 < chunk->map_used && chunk->map[i + 1] >= 0) {
563   - chunk->map[i] += chunk->map[i + 1];
564   - chunk->map_used--;
565   - memmove(&chunk->map[i + 1], &chunk->map[i + 2],
566   - (chunk->map_used - (i + 1)) * sizeof(chunk->map[0]));
  574 + if (to_free) {
  575 + chunk->map_used -= to_free;
  576 + memmove(p + 1, p + 1 + to_free,
  577 + (chunk->map_used - i) * sizeof(chunk->map[0]));
567 578 }
568 579  
569   - chunk->contig_hint = max(chunk->map[i], chunk->contig_hint);
  580 + chunk->contig_hint = max(chunk->map[i + 1] - chunk->map[i] - 1, chunk->contig_hint);
570 581 pcpu_chunk_relocate(chunk, oslot);
571 582 }
572 583  
... ... @@ -586,7 +597,9 @@
586 597 }
587 598  
588 599 chunk->map_alloc = PCPU_DFL_MAP_ALLOC;
589   - chunk->map[chunk->map_used++] = pcpu_unit_size;
  600 + chunk->map[0] = 0;
  601 + chunk->map[1] = pcpu_unit_size | 1;
  602 + chunk->map_used = 1;
590 603  
591 604 INIT_LIST_HEAD(&chunk->list);
592 605 chunk->free_size = pcpu_unit_size;
... ... @@ -682,6 +695,13 @@
682 695 unsigned long flags;
683 696 void __percpu *ptr;
684 697  
  698 + /*
  699 + * We want the lowest bit of offset available for in-use/free
  700 + * indicator.
  701 + */
  702 + if (unlikely(align < 2))
  703 + align = 2;
  704 +
685 705 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
686 706 WARN(true, "illegal size (%zu) or align (%zu) for "
687 707 "percpu allocation\n", size, align);
688 708  
... ... @@ -1312,9 +1332,13 @@
1312 1332 }
1313 1333 schunk->contig_hint = schunk->free_size;
1314 1334  
1315   - schunk->map[schunk->map_used++] = -ai->static_size;
  1335 + schunk->map[0] = 1;
  1336 + schunk->map[1] = ai->static_size;
  1337 + schunk->map_used = 1;
1316 1338 if (schunk->free_size)
1317   - schunk->map[schunk->map_used++] = schunk->free_size;
  1339 + schunk->map[++schunk->map_used] = 1 | (ai->static_size + schunk->free_size);
  1340 + else
  1341 + schunk->map[1] |= 1;
1318 1342  
1319 1343 /* init dynamic chunk if necessary */
1320 1344 if (dyn_size) {
... ... @@ -1327,8 +1351,10 @@
1327 1351 bitmap_fill(dchunk->populated, pcpu_unit_pages);
1328 1352  
1329 1353 dchunk->contig_hint = dchunk->free_size = dyn_size;
1330   - dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit;
1331   - dchunk->map[dchunk->map_used++] = dchunk->free_size;
  1354 + dchunk->map[0] = 1;
  1355 + dchunk->map[1] = pcpu_reserved_chunk_limit;
  1356 + dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1;
  1357 + dchunk->map_used = 2;
1332 1358 }
1333 1359  
1334 1360 /* link the first chunk in */