Eric Lee / smarc-fsl-linux-kernel

Blame view

include/linux/reciprocal_div.h 3.28 KB

b24413180 Greg Kroah-Hartman License cleanup: ...	1	/* SPDX-License-Identifier: GPL-2.0 */
6a2d7a955 Eric Dumazet [PATCH] SLAB: use...	2 3 4 5 6 7	#ifndef _LINUX_RECIPROCAL_DIV_H #define _LINUX_RECIPROCAL_DIV_H #include <linux/types.h> /*
809fa972f Hannes Frederic Sowa reciprocal_divide...	8 9 10	* This algorithm is based on the paper "Division by Invariant * Integers Using Multiplication" by Torbjörn Granlund and Peter * L. Montgomery.
6a2d7a955 Eric Dumazet [PATCH] SLAB: use...	11	*
809fa972f Hannes Frederic Sowa reciprocal_divide...	12 13 14	* The assembler implementation from Agner Fog, which this code is * based on, can be found here: * http://www.agner.org/optimize/asmlib.zip
6a2d7a955 Eric Dumazet [PATCH] SLAB: use...	15	*
809fa972f Hannes Frederic Sowa reciprocal_divide...	16 17 18 19 20	* This optimization for A/B is helpful if the divisor B is mostly * runtime invariant. The reciprocal of B is calculated in the * slow-path with reciprocal_value(). The fast-path can then just use * a much faster multiplication operation with a variable dividend A * to calculate the division A/B.
6a2d7a955 Eric Dumazet [PATCH] SLAB: use...	21	*/
809fa972f Hannes Frederic Sowa reciprocal_divide...	22 23 24 25	struct reciprocal_value { u32 m; u8 sh1, sh2; };
6a2d7a955 Eric Dumazet [PATCH] SLAB: use...	26
06ae48269 Jiong Wang lib: reciprocal_d...	27 28 29	/* "reciprocal_value" and "reciprocal_divide" together implement the basic * version of the algorithm described in Figure 4.1 of the paper. */
809fa972f Hannes Frederic Sowa reciprocal_divide...	30	struct reciprocal_value reciprocal_value(u32 d);
6a2d7a955 Eric Dumazet [PATCH] SLAB: use...	31
809fa972f Hannes Frederic Sowa reciprocal_divide...	32	static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
6a2d7a955 Eric Dumazet [PATCH] SLAB: use...	33	{
809fa972f Hannes Frederic Sowa reciprocal_divide...	34 35	u32 t = (u32)(((u64)a * R.m) >> 32); return (t + ((a - t) >> R.sh1)) >> R.sh2;
6a2d7a955 Eric Dumazet [PATCH] SLAB: use...	36	}
809fa972f Hannes Frederic Sowa reciprocal_divide...	37
06ae48269 Jiong Wang lib: reciprocal_d...	38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101	struct reciprocal_value_adv { u32 m; u8 sh, exp; bool is_wide_m; }; /* "reciprocal_value_adv" implements the advanced version of the algorithm * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The * exception case could be easily handled before calling "reciprocal_value_adv". * * The advanced version requires more complex calculation to get the reciprocal * multiplier and other control variables, but then could reduce the required * emulation operations. * * It makes no sense to use this advanced version for host divide emulation, * those extra complexities for calculating multiplier etc could completely * waive our saving on emulation operations. * * However, it makes sense to use it for JIT divide code generation for which * we are willing to trade performance of JITed code with that of host. As shown * by the following pseudo code, the required emulation operations could go down * from 6 (the basic version) to 3 or 4. * * To use the result of "reciprocal_value_adv", suppose we want to calculate * n/d, the pseudo C code will be: * * struct reciprocal_value_adv rvalue; * u8 pre_shift, exp; * * // handle exception case. * if (d >= (1U << 31)) { * result = n >= d; * return; * } * * rvalue = reciprocal_value_adv(d, 32) * exp = rvalue.exp; * if (rvalue.is_wide_m && !(d & 1)) { * // floor(log2(d & (2^32 -d))) * pre_shift = fls(d & -d) - 1; * rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); * } else { * pre_shift = 0; * } * * // code generation starts. * if (imm == 1U << exp) { * result = n >> exp; * } else if (rvalue.is_wide_m) { * // pre_shift must be zero when reached here. * t = (n * rvalue.m) >> 32; * result = n - t; * result >>= 1; * result += t; * result >>= rvalue.sh - 1; * } else { * if (pre_shift) * result = n >> pre_shift; * result = ((u64)result * rvalue.m) >> 32; * result >>= rvalue.sh; * } */ struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec);
809fa972f Hannes Frederic Sowa reciprocal_divide...	102	#endif /* _LINUX_RECIPROCAL_DIV_H */