Blame view
include/linux/reciprocal_div.h
3.28 KB
b24413180 License cleanup: ... |
1 |
/* SPDX-License-Identifier: GPL-2.0 */ |
6a2d7a955 [PATCH] SLAB: use... |
2 3 4 5 6 7 |
#ifndef _LINUX_RECIPROCAL_DIV_H #define _LINUX_RECIPROCAL_DIV_H #include <linux/types.h> /* |
809fa972f reciprocal_divide... |
8 9 10 |
* This algorithm is based on the paper "Division by Invariant * Integers Using Multiplication" by Torbjörn Granlund and Peter * L. Montgomery. |
6a2d7a955 [PATCH] SLAB: use... |
11 |
* |
809fa972f reciprocal_divide... |
12 13 14 |
* The assembler implementation from Agner Fog, which this code is * based on, can be found here: * http://www.agner.org/optimize/asmlib.zip |
6a2d7a955 [PATCH] SLAB: use... |
15 |
* |
809fa972f reciprocal_divide... |
16 17 18 19 20 |
* This optimization for A/B is helpful if the divisor B is mostly * runtime invariant. The reciprocal of B is calculated in the * slow-path with reciprocal_value(). The fast-path can then just use * a much faster multiplication operation with a variable dividend A * to calculate the division A/B. |
6a2d7a955 [PATCH] SLAB: use... |
21 |
*/ |
809fa972f reciprocal_divide... |
22 23 24 25 |
struct reciprocal_value { u32 m; u8 sh1, sh2; }; |
6a2d7a955 [PATCH] SLAB: use... |
26 |
|
06ae48269 lib: reciprocal_d... |
27 28 29 |
/* "reciprocal_value" and "reciprocal_divide" together implement the basic * version of the algorithm described in Figure 4.1 of the paper. */ |
809fa972f reciprocal_divide... |
30 |
struct reciprocal_value reciprocal_value(u32 d); |
6a2d7a955 [PATCH] SLAB: use... |
31 |
|
809fa972f reciprocal_divide... |
32 |
static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) |
6a2d7a955 [PATCH] SLAB: use... |
33 |
{ |
809fa972f reciprocal_divide... |
34 35 |
u32 t = (u32)(((u64)a * R.m) >> 32); return (t + ((a - t) >> R.sh1)) >> R.sh2; |
6a2d7a955 [PATCH] SLAB: use... |
36 |
} |
809fa972f reciprocal_divide... |
37 |
|
06ae48269 lib: reciprocal_d... |
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
struct reciprocal_value_adv { u32 m; u8 sh, exp; bool is_wide_m; }; /* "reciprocal_value_adv" implements the advanced version of the algorithm * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The * exception case could be easily handled before calling "reciprocal_value_adv". * * The advanced version requires more complex calculation to get the reciprocal * multiplier and other control variables, but then could reduce the required * emulation operations. * * It makes no sense to use this advanced version for host divide emulation, * those extra complexities for calculating multiplier etc could completely * waive our saving on emulation operations. * * However, it makes sense to use it for JIT divide code generation for which * we are willing to trade performance of JITed code with that of host. As shown * by the following pseudo code, the required emulation operations could go down * from 6 (the basic version) to 3 or 4. * * To use the result of "reciprocal_value_adv", suppose we want to calculate * n/d, the pseudo C code will be: * * struct reciprocal_value_adv rvalue; * u8 pre_shift, exp; * * // handle exception case. * if (d >= (1U << 31)) { * result = n >= d; * return; * } * * rvalue = reciprocal_value_adv(d, 32) * exp = rvalue.exp; * if (rvalue.is_wide_m && !(d & 1)) { * // floor(log2(d & (2^32 -d))) * pre_shift = fls(d & -d) - 1; * rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); * } else { * pre_shift = 0; * } * * // code generation starts. * if (imm == 1U << exp) { * result = n >> exp; * } else if (rvalue.is_wide_m) { * // pre_shift must be zero when reached here. * t = (n * rvalue.m) >> 32; * result = n - t; * result >>= 1; * result += t; * result >>= rvalue.sh - 1; * } else { * if (pre_shift) * result = n >> pre_shift; * result = ((u64)result * rvalue.m) >> 32; * result >>= rvalue.sh; * } */ struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec); |
809fa972f reciprocal_divide... |
102 |
#endif /* _LINUX_RECIPROCAL_DIV_H */ |