Commit 64970b68d2b3ed32b964b0b30b1b98518fde388e

Authored by Alexander van Heukelum
Committed by Ingo Molnar
1 parent 60b6783a04

x86, generic: optimize find_next_(zero_)bit for small constant-size bitmaps

This moves an optimization for searching constant-sized small
bitmaps form x86_64-specific to generic code.

On an i386 defconfig (the x86#testing one), the size of vmlinux hardly
changes with this applied. I have observed only four places where this
optimization avoids a call into find_next_bit:

In the functions return_unused_surplus_pages, alloc_fresh_huge_page,
and adjust_pool_surplus, this patch avoids a call for a 1-bit bitmap.
In __next_cpu a call is avoided for a 32-bit bitmap. That's it.

On x86_64, 52 locations are optimized with a minimal increase in
code size:

Current #testing defconfig:
	146 x bsf, 27 x find_next_*bit
   text    data     bss     dec     hex filename
   5392637  846592  724424 6963653  6a41c5 vmlinux

After removing the x86_64 specific optimization for find_next_*bit:
	94 x bsf, 79 x find_next_*bit
   text    data     bss     dec     hex filename
   5392358  846592  724424 6963374  6a40ae vmlinux

After this patch (making the optimization generic):
	146 x bsf, 27 x find_next_*bit
   text    data     bss     dec     hex filename
   5392396  846592  724424 6963412  6a40d4 vmlinux

[ tglx@linutronix.de: build fixes ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 5 changed files with 88 additions and 32 deletions Side-by-side Diff

include/asm-generic/bitops/find.h
1 1 #ifndef _ASM_GENERIC_BITOPS_FIND_H_
2 2 #define _ASM_GENERIC_BITOPS_FIND_H_
3 3  
  4 +#ifndef CONFIG_GENERIC_FIND_NEXT_BIT
4 5 extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
5 6 size, unsigned long offset);
6 7  
7 8 extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned
8 9 long size, unsigned long offset);
  10 +#endif
9 11  
10 12 #define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
11 13 #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
include/asm-x86/bitops.h
... ... @@ -306,12 +306,6 @@
306 306 #undef BIT_ADDR
307 307 #undef ADDR
308 308  
309   -unsigned long find_next_bit(const unsigned long *addr,
310   - unsigned long size, unsigned long offset);
311   -unsigned long find_next_zero_bit(const unsigned long *addr,
312   - unsigned long size, unsigned long offset);
313   -
314   -
315 309 #ifdef CONFIG_X86_32
316 310 # include "bitops_32.h"
317 311 #else
include/asm-x86/bitops_64.h
... ... @@ -15,16 +15,6 @@
15 15 return val;
16 16 }
17 17  
18   -#define find_next_bit(addr,size,off) \
19   -((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
20   - ((off) + (__scanbit((*(unsigned long *)addr) >> (off),(size)-(off)))) : \
21   - find_next_bit(addr,size,off)))
22   -
23   -#define find_next_zero_bit(addr,size,off) \
24   -((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
25   - ((off)+(__scanbit(~(((*(unsigned long *)addr)) >> (off)),(size)-(off)))) : \
26   - find_next_zero_bit(addr,size,off)))
27   -
28 18 #define find_first_bit(addr, size) \
29 19 ((__builtin_constant_p((size)) && (size) <= BITS_PER_LONG \
30 20 ? (__scanbit(*(unsigned long *)(addr), (size))) \
include/linux/bitops.h
... ... @@ -112,5 +112,82 @@
112 112 return fls64(l);
113 113 }
114 114  
  115 +#ifdef __KERNEL__
  116 +#ifdef CONFIG_GENERIC_FIND_NEXT_BIT
  117 +extern unsigned long __find_next_bit(const unsigned long *addr,
  118 + unsigned long size, unsigned long offset);
  119 +
  120 +/**
  121 + * find_next_bit - find the next set bit in a memory region
  122 + * @addr: The address to base the search on
  123 + * @offset: The bitnumber to start searching at
  124 + * @size: The bitmap size in bits
  125 + */
  126 +static __always_inline unsigned long
  127 +find_next_bit(const unsigned long *addr, unsigned long size,
  128 + unsigned long offset)
  129 +{
  130 + unsigned long value;
  131 +
  132 + /* Avoid a function call if the bitmap size is a constant */
  133 + /* and not bigger than BITS_PER_LONG. */
  134 +
  135 + /* insert a sentinel so that __ffs returns size if there */
  136 + /* are no set bits in the bitmap */
  137 + if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
  138 + value = (*addr) & ((~0ul) << offset);
  139 + value |= (1ul << size);
  140 + return __ffs(value);
  141 + }
  142 +
  143 + /* the result of __ffs(0) is undefined, so it needs to be */
  144 + /* handled separately */
  145 + if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) {
  146 + value = (*addr) & ((~0ul) << offset);
  147 + return (value == 0) ? BITS_PER_LONG : __ffs(value);
  148 + }
  149 +
  150 + /* size is not constant or too big */
  151 + return __find_next_bit(addr, size, offset);
  152 +}
  153 +
  154 +extern unsigned long __find_next_zero_bit(const unsigned long *addr,
  155 + unsigned long size, unsigned long offset);
  156 +
  157 +/**
  158 + * find_next_zero_bit - find the next cleared bit in a memory region
  159 + * @addr: The address to base the search on
  160 + * @offset: The bitnumber to start searching at
  161 + * @size: The bitmap size in bits
  162 + */
  163 +static __always_inline unsigned long
  164 +find_next_zero_bit(const unsigned long *addr, unsigned long size,
  165 + unsigned long offset)
  166 +{
  167 + unsigned long value;
  168 +
  169 + /* Avoid a function call if the bitmap size is a constant */
  170 + /* and not bigger than BITS_PER_LONG. */
  171 +
  172 + /* insert a sentinel so that __ffs returns size if there */
  173 + /* are no set bits in the bitmap */
  174 + if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
  175 + value = (~(*addr)) & ((~0ul) << offset);
  176 + value |= (1ul << size);
  177 + return __ffs(value);
  178 + }
  179 +
  180 + /* the result of __ffs(0) is undefined, so it needs to be */
  181 + /* handled separately */
  182 + if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) {
  183 + value = (~(*addr)) & ((~0ul) << offset);
  184 + return (value == 0) ? BITS_PER_LONG : __ffs(value);
  185 + }
  186 +
  187 + /* size is not constant or too big */
  188 + return __find_next_zero_bit(addr, size, offset);
  189 +}
  190 +#endif /* CONFIG_GENERIC_FIND_NEXT_BIT */
  191 +#endif /* __KERNEL__ */
115 192 #endif
... ... @@ -15,17 +15,12 @@
15 15 #include <asm/byteorder.h>
16 16  
17 17 #define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
18   -#undef find_next_bit
19   -#undef find_next_zero_bit
20 18  
21   -/**
22   - * find_next_bit - find the next set bit in a memory region
23   - * @addr: The address to base the search on
24   - * @offset: The bitnumber to start searching at
25   - * @size: The maximum size to search
  19 +/*
  20 + * Find the next set bit in a memory region.
26 21 */
27   -unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
28   - unsigned long offset)
  22 +unsigned long __find_next_bit(const unsigned long *addr,
  23 + unsigned long size, unsigned long offset)
29 24 {
30 25 const unsigned long *p = addr + BITOP_WORD(offset);
31 26 unsigned long result = offset & ~(BITS_PER_LONG-1);
32 27  
33 28  
... ... @@ -62,15 +57,14 @@
62 57 found_middle:
63 58 return result + __ffs(tmp);
64 59 }
  60 +EXPORT_SYMBOL(__find_next_bit);
65 61  
66   -EXPORT_SYMBOL(find_next_bit);
67   -
68 62 /*
69 63 * This implementation of find_{first,next}_zero_bit was stolen from
70 64 * Linus' asm-alpha/bitops.h.
71 65 */
72   -unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
73   - unsigned long offset)
  66 +unsigned long __find_next_zero_bit(const unsigned long *addr,
  67 + unsigned long size, unsigned long offset)
74 68 {
75 69 const unsigned long *p = addr + BITOP_WORD(offset);
76 70 unsigned long result = offset & ~(BITS_PER_LONG-1);
... ... @@ -107,8 +101,7 @@
107 101 found_middle:
108 102 return result + ffz(tmp);
109 103 }
110   -
111   -EXPORT_SYMBOL(find_next_zero_bit);
  104 +EXPORT_SYMBOL(__find_next_zero_bit);
112 105  
113 106 #ifdef __BIG_ENDIAN
114 107