Blame view

include/linux/reciprocal_div.h 3.28 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  /* SPDX-License-Identifier: GPL-2.0 */
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
2
3
4
5
6
7
  #ifndef _LINUX_RECIPROCAL_DIV_H
  #define _LINUX_RECIPROCAL_DIV_H
  
  #include <linux/types.h>
  
  /*
809fa972f   Hannes Frederic Sowa   reciprocal_divide...
8
9
10
   * This algorithm is based on the paper "Division by Invariant
   * Integers Using Multiplication" by Torbjörn Granlund and Peter
   * L. Montgomery.
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
11
   *
809fa972f   Hannes Frederic Sowa   reciprocal_divide...
12
13
14
   * The assembler implementation from Agner Fog, which this code is
   * based on, can be found here:
   * http://www.agner.org/optimize/asmlib.zip
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
15
   *
809fa972f   Hannes Frederic Sowa   reciprocal_divide...
16
17
18
19
20
   * This optimization for A/B is helpful if the divisor B is mostly
   * runtime invariant. The reciprocal of B is calculated in the
   * slow-path with reciprocal_value(). The fast-path can then just use
   * a much faster multiplication operation with a variable dividend A
   * to calculate the division A/B.
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
21
   */
809fa972f   Hannes Frederic Sowa   reciprocal_divide...
22
23
24
25
  struct reciprocal_value {
  	u32 m;
  	u8 sh1, sh2;
  };
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
26

06ae48269   Jiong Wang   lib: reciprocal_d...
27
28
29
  /* "reciprocal_value" and "reciprocal_divide" together implement the basic
   * version of the algorithm described in Figure 4.1 of the paper.
   */
809fa972f   Hannes Frederic Sowa   reciprocal_divide...
30
  struct reciprocal_value reciprocal_value(u32 d);
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
31

809fa972f   Hannes Frederic Sowa   reciprocal_divide...
32
  static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
33
  {
809fa972f   Hannes Frederic Sowa   reciprocal_divide...
34
35
  	u32 t = (u32)(((u64)a * R.m) >> 32);
  	return (t + ((a - t) >> R.sh1)) >> R.sh2;
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
36
  }
809fa972f   Hannes Frederic Sowa   reciprocal_divide...
37

06ae48269   Jiong Wang   lib: reciprocal_d...
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
  struct reciprocal_value_adv {
  	u32 m;
  	u8 sh, exp;
  	bool is_wide_m;
  };
  
  /* "reciprocal_value_adv" implements the advanced version of the algorithm
   * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose
   * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The
   * exception case could be easily handled before calling "reciprocal_value_adv".
   *
   * The advanced version requires more complex calculation to get the reciprocal
   * multiplier and other control variables, but then could reduce the required
   * emulation operations.
   *
   * It makes no sense to use this advanced version for host divide emulation,
   * those extra complexities for calculating multiplier etc could completely
   * waive our saving on emulation operations.
   *
   * However, it makes sense to use it for JIT divide code generation for which
   * we are willing to trade performance of JITed code with that of host. As shown
   * by the following pseudo code, the required emulation operations could go down
   * from 6 (the basic version) to 3 or 4.
   *
   * To use the result of "reciprocal_value_adv", suppose we want to calculate
   * n/d, the pseudo C code will be:
   *
   *   struct reciprocal_value_adv rvalue;
   *   u8 pre_shift, exp;
   *
   *   // handle exception case.
   *   if (d >= (1U << 31)) {
   *     result = n >= d;
   *     return;
   *   }
   *
   *   rvalue = reciprocal_value_adv(d, 32)
   *   exp = rvalue.exp;
   *   if (rvalue.is_wide_m && !(d & 1)) {
   *     // floor(log2(d & (2^32 -d)))
   *     pre_shift = fls(d & -d) - 1;
   *     rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift);
   *   } else {
   *     pre_shift = 0;
   *   }
   *
   *   // code generation starts.
   *   if (imm == 1U << exp) {
   *     result = n >> exp;
   *   } else if (rvalue.is_wide_m) {
   *     // pre_shift must be zero when reached here.
   *     t = (n * rvalue.m) >> 32;
   *     result = n - t;
   *     result >>= 1;
   *     result += t;
   *     result >>= rvalue.sh - 1;
   *   } else {
   *     if (pre_shift)
   *       result = n >> pre_shift;
   *     result = ((u64)result * rvalue.m) >> 32;
   *     result >>= rvalue.sh;
   *   }
   */
  struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec);
809fa972f   Hannes Frederic Sowa   reciprocal_divide...
102
  #endif /* _LINUX_RECIPROCAL_DIV_H */