Blame view

include/div64.h 6.91 KB
7b64fef33   Wolfgang Denk   Add AVR32 archite...
1
2
3
4
5
6
  #ifndef _ASM_GENERIC_DIV64_H
  #define _ASM_GENERIC_DIV64_H
  /*
   * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
   * Based on former asm-ppc/div64.h and asm-m68knommu/div64.h
   *
0342e335b   Peng Fan   lib: div64: sync ...
7
8
9
   * Optimization for constant divisors on 32-bit machines:
   * Copyright (C) 2006-2015 Nicolas Pitre
   *
7b64fef33   Wolfgang Denk   Add AVR32 archite...
10
11
   * The semantics of do_div() are:
   *
ca49b2c6e   Simon Glass   div64: Use kernel...
12
   * u32 do_div(u64 *n, u32 base)
7b64fef33   Wolfgang Denk   Add AVR32 archite...
13
   * {
ca49b2c6e   Simon Glass   div64: Use kernel...
14
15
16
   *	u32 remainder = *n % base;
   *	*n = *n / base;
   *	return remainder;
7b64fef33   Wolfgang Denk   Add AVR32 archite...
17
18
19
20
21
22
23
   * }
   *
   * NOTE: macro parameter n is evaluated multiple times,
   *       beware of side effects!
   */
  
  #include <linux/types.h>
0342e335b   Peng Fan   lib: div64: sync ...
24
25
26
27
28
  #include <linux/compiler.h>
  
  #if BITS_PER_LONG == 64
  
  # define do_div(n,base) ({					\
ca49b2c6e   Simon Glass   div64: Use kernel...
29
30
31
32
  	u32 __base = (base);				\
  	u32 __rem;						\
  	__rem = ((u64)(n)) % __base;			\
  	(n) = ((u64)(n)) / __base;				\
0342e335b   Peng Fan   lib: div64: sync ...
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
  	__rem;							\
   })
  
  #elif BITS_PER_LONG == 32
  
  #include <linux/log2.h>
  
  /*
   * If the divisor happens to be constant, we determine the appropriate
   * inverse at compile time to turn the division into a few inline
   * multiplications which ought to be much faster. And yet only if compiling
   * with a sufficiently recent gcc version to perform proper 64-bit constant
   * propagation.
   *
   * (It is unfortunate that gcc doesn't perform all this internally.)
   */
  
  #ifndef __div64_const32_is_OK
  #define __div64_const32_is_OK (__GNUC__ >= 4)
  #endif
  
  #define __div64_const32(n, ___b)					\
  ({									\
  	/*								\
  	 * Multiplication by reciprocal of b: n / b = n * (p / b) / p	\
  	 *								\
  	 * We rely on the fact that most of this code gets optimized	\
  	 * away at compile time due to constant propagation and only	\
  	 * a few multiplication instructions should remain.		\
  	 * Hence this monstrous macro (static inline doesn't always	\
  	 * do the trick here).						\
  	 */								\
ca49b2c6e   Simon Glass   div64: Use kernel...
65
66
  	u64 ___res, ___x, ___t, ___m, ___n = (n);			\
  	u32 ___p, ___bias;						\
0342e335b   Peng Fan   lib: div64: sync ...
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
  									\
  	/* determine MSB of b */					\
  	___p = 1 << ilog2(___b);					\
  									\
  	/* compute m = ((p << 64) + b - 1) / b */			\
  	___m = (~0ULL / ___b) * ___p;					\
  	___m += (((~0ULL % ___b + 1) * ___p) + ___b - 1) / ___b;	\
  									\
  	/* one less than the dividend with highest result */		\
  	___x = ~0ULL / ___b * ___b - 1;					\
  									\
  	/* test our ___m with res = m * x / (p << 64) */		\
  	___res = ((___m & 0xffffffff) * (___x & 0xffffffff)) >> 32;	\
  	___t = ___res += (___m & 0xffffffff) * (___x >> 32);		\
  	___res += (___x & 0xffffffff) * (___m >> 32);			\
  	___t = (___res < ___t) ? (1ULL << 32) : 0;			\
  	___res = (___res >> 32) + ___t;					\
  	___res += (___m >> 32) * (___x >> 32);				\
  	___res /= ___p;							\
  									\
  	/* Now sanitize and optimize what we've got. */			\
  	if (~0ULL % (___b / (___b & -___b)) == 0) {			\
  		/* special case, can be simplified to ... */		\
  		___n /= (___b & -___b);					\
  		___m = ~0ULL / (___b / (___b & -___b));			\
  		___p = 1;						\
  		___bias = 1;						\
  	} else if (___res != ___x / ___b) {				\
  		/*							\
  		 * We can't get away without a bias to compensate	\
  		 * for bit truncation errors.  To avoid it we'd need an	\
  		 * additional bit to represent m which would overflow	\
  		 * a 64-bit variable.					\
  		 *							\
  		 * Instead we do m = p / b and n / b = (n * m + m) / p.	\
  		 */							\
  		___bias = 1;						\
  		/* Compute m = (p << 64) / b */				\
  		___m = (~0ULL / ___b) * ___p;				\
  		___m += ((~0ULL % ___b + 1) * ___p) / ___b;		\
  	} else {							\
  		/*							\
  		 * Reduce m / p, and try to clear bit 31 of m when	\
  		 * possible, otherwise that'll need extra overflow	\
  		 * handling later.					\
  		 */							\
ca49b2c6e   Simon Glass   div64: Use kernel...
113
  		u32 ___bits = -(___m & -___m);			\
0342e335b   Peng Fan   lib: div64: sync ...
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
  		___bits |= ___m >> 32;					\
  		___bits = (~___bits) << 1;				\
  		/*							\
  		 * If ___bits == 0 then setting bit 31 is  unavoidable.	\
  		 * Simply apply the maximum possible reduction in that	\
  		 * case. Otherwise the MSB of ___bits indicates the	\
  		 * best reduction we should apply.			\
  		 */							\
  		if (!___bits) {						\
  			___p /= (___m & -___m);				\
  			___m /= (___m & -___m);				\
  		} else {						\
  			___p >>= ilog2(___bits);			\
  			___m >>= ilog2(___bits);			\
  		}							\
  		/* No bias needed. */					\
  		___bias = 0;						\
  	}								\
  									\
  	/*								\
  	 * Now we have a combination of 2 conditions:			\
  	 *								\
  	 * 1) whether or not we need to apply a bias, and		\
  	 *								\
  	 * 2) whether or not there might be an overflow in the cross	\
  	 *    product determined by (___m & ((1 << 63) | (1 << 31))).	\
  	 *								\
  	 * Select the best way to do (m_bias + m * n) / (1 << 64).	\
  	 * From now on there will be actual runtime code generated.	\
  	 */								\
  	___res = __arch_xprod_64(___m, ___n, ___bias);			\
  									\
  	___res /= ___p;							\
  })
  
  #ifndef __arch_xprod_64
  /*
   * Default C implementation for __arch_xprod_64()
   *
ca49b2c6e   Simon Glass   div64: Use kernel...
153
   * Prototype: u64 __arch_xprod_64(const u64 m, u64 n, bool bias)
0342e335b   Peng Fan   lib: div64: sync ...
154
155
156
157
158
159
   * Semantic:  retval = ((bias ? m : 0) + m * n) >> 64
   *
   * The product is a 128-bit value, scaled down to 64 bits.
   * Assuming constant propagation to optimize away unused conditional code.
   * Architectures may provide their own optimized assembly implementation.
   */
ca49b2c6e   Simon Glass   div64: Use kernel...
160
  static inline u64 __arch_xprod_64(const u64 m, u64 n, bool bias)
0342e335b   Peng Fan   lib: div64: sync ...
161
  {
ca49b2c6e   Simon Glass   div64: Use kernel...
162
163
164
165
166
  	u32 m_lo = m;
  	u32 m_hi = m >> 32;
  	u32 n_lo = n;
  	u32 n_hi = n >> 32;
  	u64 res, tmp;
0342e335b   Peng Fan   lib: div64: sync ...
167
168
  
  	if (!bias) {
ca49b2c6e   Simon Glass   div64: Use kernel...
169
  		res = ((u64)m_lo * n_lo) >> 32;
0342e335b   Peng Fan   lib: div64: sync ...
170
171
  	} else if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
  		/* there can't be any overflow here */
ca49b2c6e   Simon Glass   div64: Use kernel...
172
  		res = (m + (u64)m_lo * n_lo) >> 32;
0342e335b   Peng Fan   lib: div64: sync ...
173
  	} else {
ca49b2c6e   Simon Glass   div64: Use kernel...
174
  		res = m + (u64)m_lo * n_lo;
0342e335b   Peng Fan   lib: div64: sync ...
175
176
177
178
179
180
  		tmp = (res < m) ? (1ULL << 32) : 0;
  		res = (res >> 32) + tmp;
  	}
  
  	if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
  		/* there can't be any overflow here */
ca49b2c6e   Simon Glass   div64: Use kernel...
181
182
  		res += (u64)m_lo * n_hi;
  		res += (u64)m_hi * n_lo;
0342e335b   Peng Fan   lib: div64: sync ...
183
184
  		res >>= 32;
  	} else {
ca49b2c6e   Simon Glass   div64: Use kernel...
185
186
  		tmp = res += (u64)m_lo * n_hi;
  		res += (u64)m_hi * n_lo;
0342e335b   Peng Fan   lib: div64: sync ...
187
188
189
  		tmp = (res < tmp) ? (1ULL << 32) : 0;
  		res = (res >> 32) + tmp;
  	}
7b64fef33   Wolfgang Denk   Add AVR32 archite...
190

ca49b2c6e   Simon Glass   div64: Use kernel...
191
  	res += (u64)m_hi * n_hi;
0342e335b   Peng Fan   lib: div64: sync ...
192
193
194
195
196
197
  
  	return res;
  }
  #endif
  
  #ifndef __div64_32
ca49b2c6e   Simon Glass   div64: Use kernel...
198
  extern u32 __div64_32(u64 *dividend, u32 divisor);
0342e335b   Peng Fan   lib: div64: sync ...
199
  #endif
7b64fef33   Wolfgang Denk   Add AVR32 archite...
200
201
202
203
204
  
  /* The unnecessary pointer compare is there
   * to check for type safety (n must be 64bit)
   */
  # define do_div(n,base) ({				\
ca49b2c6e   Simon Glass   div64: Use kernel...
205
206
207
  	u32 __base = (base);			\
  	u32 __rem;					\
  	(void)(((typeof((n)) *)0) == ((u64 *)0));	\
0342e335b   Peng Fan   lib: div64: sync ...
208
209
210
211
212
213
214
  	if (__builtin_constant_p(__base) &&		\
  	    is_power_of_2(__base)) {			\
  		__rem = (n) & (__base - 1);		\
  		(n) >>= ilog2(__base);			\
  	} else if (__div64_const32_is_OK &&		\
  		   __builtin_constant_p(__base) &&	\
  		   __base != 0) {			\
ca49b2c6e   Simon Glass   div64: Use kernel...
215
  		u32 __res_lo, __n_lo = (n);	\
0342e335b   Peng Fan   lib: div64: sync ...
216
217
218
219
220
  		(n) = __div64_const32(n, __base);	\
  		/* the remainder can be computed with 32-bit regs */ \
  		__res_lo = (n);				\
  		__rem = __n_lo - __res_lo * __base;	\
  	} else if (likely(((n) >> 32) == 0)) {		\
ca49b2c6e   Simon Glass   div64: Use kernel...
221
222
  		__rem = (u32)(n) % __base;		\
  		(n) = (u32)(n) / __base;		\
0342e335b   Peng Fan   lib: div64: sync ...
223
  	} else 						\
7b64fef33   Wolfgang Denk   Add AVR32 archite...
224
225
226
  		__rem = __div64_32(&(n), __base);	\
  	__rem;						\
   })
0342e335b   Peng Fan   lib: div64: sync ...
227
228
229
230
231
  #else /* BITS_PER_LONG == ?? */
  
  # error do_div() does not yet support the C64
  
  #endif /* BITS_PER_LONG */
3feb647f3   Sergei Poselenov   Add a do_div() wr...
232
  /* Wrapper for do_div(). Doesn't modify dividend and returns
2121bbe49   Heinrich Schuchardt   lib: div64: fix t...
233
   * the result, not remainder.
3feb647f3   Sergei Poselenov   Add a do_div() wr...
234
   */
ca49b2c6e   Simon Glass   div64: Use kernel...
235
  static inline u64 lldiv(u64 dividend, u32 divisor)
3feb647f3   Sergei Poselenov   Add a do_div() wr...
236
  {
ca49b2c6e   Simon Glass   div64: Use kernel...
237
  	u64 __res = dividend;
3feb647f3   Sergei Poselenov   Add a do_div() wr...
238
239
240
  	do_div(__res, divisor);
  	return(__res);
  }
7b64fef33   Wolfgang Denk   Add AVR32 archite...
241
  #endif /* _ASM_GENERIC_DIV64_H */