Commit 0342e335ba887817ed401e77be324e064ea7031e

Authored by Peng Fan
Committed by Tom Rini
1 parent 6823e6fe66

lib: div64: sync with Linux

Sync with Linux commit ad0376eb1483b ("Merge tag 'edac_for_4.11_2'").

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Cc: Tom Rini <trini@konsulko.com>

Showing 3 changed files with 508 additions and 10 deletions Side-by-side Diff

... ... @@ -4,13 +4,16 @@
4 4 * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
5 5 * Based on former asm-ppc/div64.h and asm-m68knommu/div64.h
6 6 *
  7 + * Optimization for constant divisors on 32-bit machines:
  8 + * Copyright (C) 2006-2015 Nicolas Pitre
  9 + *
7 10 * The semantics of do_div() are:
8 11 *
9 12 * uint32_t do_div(uint64_t *n, uint32_t base)
10 13 * {
11   - * uint32_t remainder = *n % base;
12   - * *n = *n / base;
13   - * return remainder;
  14 + * uint32_t remainder = *n % base;
  15 + * *n = *n / base;
  16 + * return remainder;
14 17 * }
15 18 *
16 19 * NOTE: macro parameter n is evaluated multiple times,
17 20  
18 21  
... ... @@ -18,8 +21,182 @@
18 21 */
19 22  
20 23 #include <linux/types.h>
  24 +#include <linux/compiler.h>
21 25  
  26 +#if BITS_PER_LONG == 64
  27 +
  28 +# define do_div(n,base) ({ \
  29 + uint32_t __base = (base); \
  30 + uint32_t __rem; \
  31 + __rem = ((uint64_t)(n)) % __base; \
  32 + (n) = ((uint64_t)(n)) / __base; \
  33 + __rem; \
  34 + })
  35 +
  36 +#elif BITS_PER_LONG == 32
  37 +
  38 +#include <linux/log2.h>
  39 +
  40 +/*
  41 + * If the divisor happens to be constant, we determine the appropriate
  42 + * inverse at compile time to turn the division into a few inline
  43 + * multiplications which ought to be much faster. And yet only if compiling
  44 + * with a sufficiently recent gcc version to perform proper 64-bit constant
  45 + * propagation.
  46 + *
  47 + * (It is unfortunate that gcc doesn't perform all this internally.)
  48 + */
  49 +
  50 +#ifndef __div64_const32_is_OK
  51 +#define __div64_const32_is_OK (__GNUC__ >= 4)
  52 +#endif
  53 +
  54 +#define __div64_const32(n, ___b) \
  55 +({ \
  56 + /* \
  57 + * Multiplication by reciprocal of b: n / b = n * (p / b) / p \
  58 + * \
  59 + * We rely on the fact that most of this code gets optimized \
  60 + * away at compile time due to constant propagation and only \
  61 + * a few multiplication instructions should remain. \
  62 + * Hence this monstrous macro (static inline doesn't always \
  63 + * do the trick here). \
  64 + */ \
  65 + uint64_t ___res, ___x, ___t, ___m, ___n = (n); \
  66 + uint32_t ___p, ___bias; \
  67 + \
  68 + /* determine MSB of b */ \
  69 + ___p = 1 << ilog2(___b); \
  70 + \
  71 + /* compute m = ((p << 64) + b - 1) / b */ \
  72 + ___m = (~0ULL / ___b) * ___p; \
  73 + ___m += (((~0ULL % ___b + 1) * ___p) + ___b - 1) / ___b; \
  74 + \
  75 + /* one less than the dividend with highest result */ \
  76 + ___x = ~0ULL / ___b * ___b - 1; \
  77 + \
  78 + /* test our ___m with res = m * x / (p << 64) */ \
  79 + ___res = ((___m & 0xffffffff) * (___x & 0xffffffff)) >> 32; \
  80 + ___t = ___res += (___m & 0xffffffff) * (___x >> 32); \
  81 + ___res += (___x & 0xffffffff) * (___m >> 32); \
  82 + ___t = (___res < ___t) ? (1ULL << 32) : 0; \
  83 + ___res = (___res >> 32) + ___t; \
  84 + ___res += (___m >> 32) * (___x >> 32); \
  85 + ___res /= ___p; \
  86 + \
  87 + /* Now sanitize and optimize what we've got. */ \
  88 + if (~0ULL % (___b / (___b & -___b)) == 0) { \
  89 + /* special case, can be simplified to ... */ \
  90 + ___n /= (___b & -___b); \
  91 + ___m = ~0ULL / (___b / (___b & -___b)); \
  92 + ___p = 1; \
  93 + ___bias = 1; \
  94 + } else if (___res != ___x / ___b) { \
  95 + /* \
  96 + * We can't get away without a bias to compensate \
  97 + * for bit truncation errors. To avoid it we'd need an \
  98 + * additional bit to represent m which would overflow \
  99 + * a 64-bit variable. \
  100 + * \
  101 + * Instead we do m = p / b and n / b = (n * m + m) / p. \
  102 + */ \
  103 + ___bias = 1; \
  104 + /* Compute m = (p << 64) / b */ \
  105 + ___m = (~0ULL / ___b) * ___p; \
  106 + ___m += ((~0ULL % ___b + 1) * ___p) / ___b; \
  107 + } else { \
  108 + /* \
  109 + * Reduce m / p, and try to clear bit 31 of m when \
  110 + * possible, otherwise that'll need extra overflow \
  111 + * handling later. \
  112 + */ \
  113 + uint32_t ___bits = -(___m & -___m); \
  114 + ___bits |= ___m >> 32; \
  115 + ___bits = (~___bits) << 1; \
  116 + /* \
  117 + * If ___bits == 0 then setting bit 31 is unavoidable. \
  118 + * Simply apply the maximum possible reduction in that \
  119 + * case. Otherwise the MSB of ___bits indicates the \
  120 + * best reduction we should apply. \
  121 + */ \
  122 + if (!___bits) { \
  123 + ___p /= (___m & -___m); \
  124 + ___m /= (___m & -___m); \
  125 + } else { \
  126 + ___p >>= ilog2(___bits); \
  127 + ___m >>= ilog2(___bits); \
  128 + } \
  129 + /* No bias needed. */ \
  130 + ___bias = 0; \
  131 + } \
  132 + \
  133 + /* \
  134 + * Now we have a combination of 2 conditions: \
  135 + * \
  136 + * 1) whether or not we need to apply a bias, and \
  137 + * \
  138 + * 2) whether or not there might be an overflow in the cross \
  139 + * product determined by (___m & ((1 << 63) | (1 << 31))). \
  140 + * \
  141 + * Select the best way to do (m_bias + m * n) / (1 << 64). \
  142 + * From now on there will be actual runtime code generated. \
  143 + */ \
  144 + ___res = __arch_xprod_64(___m, ___n, ___bias); \
  145 + \
  146 + ___res /= ___p; \
  147 +})
  148 +
  149 +#ifndef __arch_xprod_64
  150 +/*
  151 + * Default C implementation for __arch_xprod_64()
  152 + *
  153 + * Prototype: uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
  154 + * Semantic: retval = ((bias ? m : 0) + m * n) >> 64
  155 + *
  156 + * The product is a 128-bit value, scaled down to 64 bits.
  157 + * Assuming constant propagation to optimize away unused conditional code.
  158 + * Architectures may provide their own optimized assembly implementation.
  159 + */
  160 +static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
  161 +{
  162 + uint32_t m_lo = m;
  163 + uint32_t m_hi = m >> 32;
  164 + uint32_t n_lo = n;
  165 + uint32_t n_hi = n >> 32;
  166 + uint64_t res, tmp;
  167 +
  168 + if (!bias) {
  169 + res = ((uint64_t)m_lo * n_lo) >> 32;
  170 + } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
  171 + /* there can't be any overflow here */
  172 + res = (m + (uint64_t)m_lo * n_lo) >> 32;
  173 + } else {
  174 + res = m + (uint64_t)m_lo * n_lo;
  175 + tmp = (res < m) ? (1ULL << 32) : 0;
  176 + res = (res >> 32) + tmp;
  177 + }
  178 +
  179 + if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
  180 + /* there can't be any overflow here */
  181 + res += (uint64_t)m_lo * n_hi;
  182 + res += (uint64_t)m_hi * n_lo;
  183 + res >>= 32;
  184 + } else {
  185 + tmp = res += (uint64_t)m_lo * n_hi;
  186 + res += (uint64_t)m_hi * n_lo;
  187 + tmp = (res < tmp) ? (1ULL << 32) : 0;
  188 + res = (res >> 32) + tmp;
  189 + }
  190 +
  191 + res += (uint64_t)m_hi * n_hi;
  192 +
  193 + return res;
  194 +}
  195 +#endif
  196 +
  197 +#ifndef __div64_32
22 198 extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
  199 +#endif
23 200  
24 201 /* The unnecessary pointer compare is there
25 202 * to check for type safety (n must be 64bit)
26 203  
27 204  
... ... @@ -28,13 +205,31 @@
28 205 uint32_t __base = (base); \
29 206 uint32_t __rem; \
30 207 (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
31   - if (((n) >> 32) == 0) { \
  208 + if (__builtin_constant_p(__base) && \
  209 + is_power_of_2(__base)) { \
  210 + __rem = (n) & (__base - 1); \
  211 + (n) >>= ilog2(__base); \
  212 + } else if (__div64_const32_is_OK && \
  213 + __builtin_constant_p(__base) && \
  214 + __base != 0) { \
  215 + uint32_t __res_lo, __n_lo = (n); \
  216 + (n) = __div64_const32(n, __base); \
  217 + /* the remainder can be computed with 32-bit regs */ \
  218 + __res_lo = (n); \
  219 + __rem = __n_lo - __res_lo * __base; \
  220 + } else if (likely(((n) >> 32) == 0)) { \
32 221 __rem = (uint32_t)(n) % __base; \
33 222 (n) = (uint32_t)(n) / __base; \
34   - } else \
  223 + } else \
35 224 __rem = __div64_32(&(n), __base); \
36 225 __rem; \
37 226 })
  227 +
  228 +#else /* BITS_PER_LONG == ?? */
  229 +
  230 +# error do_div() does not yet support the C64
  231 +
  232 +#endif /* BITS_PER_LONG */
38 233  
39 234 /* Wrapper for do_div(). Doesn't modify dividend and returns
40 235 * the result, not reminder.
include/linux/math64.h
1 1 #ifndef _LINUX_MATH64_H
2 2 #define _LINUX_MATH64_H
3 3  
  4 +#include <div64.h>
  5 +#include <linux/bitops.h>
4 6 #include <linux/types.h>
5 7  
6 8 #if BITS_PER_LONG == 64
7 9  
  10 +#define div64_long(x, y) div64_s64((x), (y))
  11 +#define div64_ul(x, y) div64_u64((x), (y))
  12 +
8 13 /**
9 14 * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder
10 15 *
... ... @@ -27,6 +32,15 @@
27 32 }
28 33  
29 34 /**
  35 + * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
  36 + */
  37 +static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
  38 +{
  39 + *remainder = dividend % divisor;
  40 + return dividend / divisor;
  41 +}
  42 +
  43 +/**
30 44 * div64_u64 - unsigned 64bit divide with 64bit divisor
31 45 */
32 46 static inline u64 div64_u64(u64 dividend, u64 divisor)
33 47  
... ... @@ -34,8 +48,19 @@
34 48 return dividend / divisor;
35 49 }
36 50  
  51 +/**
  52 + * div64_s64 - signed 64bit divide with 64bit divisor
  53 + */
  54 +static inline s64 div64_s64(s64 dividend, s64 divisor)
  55 +{
  56 + return dividend / divisor;
  57 +}
  58 +
37 59 #elif BITS_PER_LONG == 32
38 60  
  61 +#define div64_long(x, y) div_s64((x), (y))
  62 +#define div64_ul(x, y) div_u64((x), (y))
  63 +
39 64 #ifndef div_u64_rem
40 65 static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
41 66 {
42 67  
... ... @@ -48,10 +73,18 @@
48 73 extern s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder);
49 74 #endif
50 75  
  76 +#ifndef div64_u64_rem
  77 +extern u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder);
  78 +#endif
  79 +
51 80 #ifndef div64_u64
52 81 extern u64 div64_u64(u64 dividend, u64 divisor);
53 82 #endif
54 83  
  84 +#ifndef div64_s64
  85 +extern s64 div64_s64(s64 dividend, s64 divisor);
  86 +#endif
  87 +
55 88 #endif /* BITS_PER_LONG */
56 89  
57 90 /**
... ... @@ -81,6 +114,145 @@
81 114 #endif
82 115  
83 116 u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder);
  117 +
  118 +static __always_inline u32
  119 +__iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
  120 +{
  121 + u32 ret = 0;
  122 +
  123 + while (dividend >= divisor) {
  124 + /* The following asm() prevents the compiler from
  125 + optimising this loop into a modulo operation. */
  126 + asm("" : "+rm"(dividend));
  127 +
  128 + dividend -= divisor;
  129 + ret++;
  130 + }
  131 +
  132 + *remainder = dividend;
  133 +
  134 + return ret;
  135 +}
  136 +
  137 +#ifndef mul_u32_u32
  138 +/*
  139 + * Many a GCC version messes this up and generates a 64x64 mult :-(
  140 + */
  141 +static inline u64 mul_u32_u32(u32 a, u32 b)
  142 +{
  143 + return (u64)a * b;
  144 +}
  145 +#endif
  146 +
  147 +#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
  148 +
  149 +#ifndef mul_u64_u32_shr
  150 +static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
  151 +{
  152 + return (u64)(((unsigned __int128)a * mul) >> shift);
  153 +}
  154 +#endif /* mul_u64_u32_shr */
  155 +
  156 +#ifndef mul_u64_u64_shr
  157 +static inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift)
  158 +{
  159 + return (u64)(((unsigned __int128)a * mul) >> shift);
  160 +}
  161 +#endif /* mul_u64_u64_shr */
  162 +
  163 +#else
  164 +
  165 +#ifndef mul_u64_u32_shr
  166 +static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
  167 +{
  168 + u32 ah, al;
  169 + u64 ret;
  170 +
  171 + al = a;
  172 + ah = a >> 32;
  173 +
  174 + ret = mul_u32_u32(al, mul) >> shift;
  175 + if (ah)
  176 + ret += mul_u32_u32(ah, mul) << (32 - shift);
  177 +
  178 + return ret;
  179 +}
  180 +#endif /* mul_u64_u32_shr */
  181 +
  182 +#ifndef mul_u64_u64_shr
  183 +static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift)
  184 +{
  185 + union {
  186 + u64 ll;
  187 + struct {
  188 +#ifdef __BIG_ENDIAN
  189 + u32 high, low;
  190 +#else
  191 + u32 low, high;
  192 +#endif
  193 + } l;
  194 + } rl, rm, rn, rh, a0, b0;
  195 + u64 c;
  196 +
  197 + a0.ll = a;
  198 + b0.ll = b;
  199 +
  200 + rl.ll = mul_u32_u32(a0.l.low, b0.l.low);
  201 + rm.ll = mul_u32_u32(a0.l.low, b0.l.high);
  202 + rn.ll = mul_u32_u32(a0.l.high, b0.l.low);
  203 + rh.ll = mul_u32_u32(a0.l.high, b0.l.high);
  204 +
  205 + /*
  206 + * Each of these lines computes a 64-bit intermediate result into "c",
  207 + * starting at bits 32-95. The low 32-bits go into the result of the
  208 + * multiplication, the high 32-bits are carried into the next step.
  209 + */
  210 + rl.l.high = c = (u64)rl.l.high + rm.l.low + rn.l.low;
  211 + rh.l.low = c = (c >> 32) + rm.l.high + rn.l.high + rh.l.low;
  212 + rh.l.high = (c >> 32) + rh.l.high;
  213 +
  214 + /*
  215 + * The 128-bit result of the multiplication is in rl.ll and rh.ll,
  216 + * shift it right and throw away the high part of the result.
  217 + */
  218 + if (shift == 0)
  219 + return rl.ll;
  220 + if (shift < 64)
  221 + return (rl.ll >> shift) | (rh.ll << (64 - shift));
  222 + return rh.ll >> (shift & 63);
  223 +}
  224 +#endif /* mul_u64_u64_shr */
  225 +
  226 +#endif
  227 +
  228 +#ifndef mul_u64_u32_div
  229 +static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
  230 +{
  231 + union {
  232 + u64 ll;
  233 + struct {
  234 +#ifdef __BIG_ENDIAN
  235 + u32 high, low;
  236 +#else
  237 + u32 low, high;
  238 +#endif
  239 + } l;
  240 + } u, rl, rh;
  241 +
  242 + u.ll = a;
  243 + rl.ll = mul_u32_u32(u.l.low, mul);
  244 + rh.ll = mul_u32_u32(u.l.high, mul) + rl.l.high;
  245 +
  246 + /* Bits 32-63 of the result will be in rh.l.low. */
  247 + rl.l.high = do_div(rh.ll, divisor);
  248 +
  249 + /* Bits 0-31 of the result will be in rl.l.low. */
  250 + do_div(rl.ll, divisor);
  251 +
  252 + rl.l.high = rh.l.low;
  253 + return rl.ll;
  254 +}
  255 +#endif /* mul_u64_u32_div */
84 256  
85 257 #endif /* _LINUX_MATH64_H */
... ... @@ -13,14 +13,19 @@
13 13 *
14 14 * Code generated for this function might be very inefficient
15 15 * for some CPUs. __div64_32() can be overridden by linking arch-specific
16   - * assembly versions such as arch/powerpc/lib/div64.S and arch/sh/lib/div64.S.
  16 + * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S
  17 + * or by defining a preprocessor macro in arch/include/asm/div64.h.
17 18 */
18 19  
19   -#include <div64.h>
20   -#include <linux/types.h>
21   -#include <linux/compiler.h>
  20 +#include <linux/compat.h>
  21 +#include <linux/kernel.h>
  22 +#include <linux/math64.h>
22 23  
23   -uint32_t notrace __div64_32(uint64_t *n, uint32_t base)
  24 +/* Not needed on 64bit architectures */
  25 +#if BITS_PER_LONG == 32
  26 +
  27 +#ifndef __div64_32
  28 +uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
24 29 {
25 30 uint64_t rem = *n;
26 31 uint64_t b = base;
... ... @@ -52,4 +57,130 @@
52 57 *n = res;
53 58 return rem;
54 59 }
  60 +EXPORT_SYMBOL(__div64_32);
  61 +#endif
  62 +
  63 +#ifndef div_s64_rem
  64 +s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
  65 +{
  66 + u64 quotient;
  67 +
  68 + if (dividend < 0) {
  69 + quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder);
  70 + *remainder = -*remainder;
  71 + if (divisor > 0)
  72 + quotient = -quotient;
  73 + } else {
  74 + quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder);
  75 + if (divisor < 0)
  76 + quotient = -quotient;
  77 + }
  78 + return quotient;
  79 +}
  80 +EXPORT_SYMBOL(div_s64_rem);
  81 +#endif
  82 +
  83 +/**
  84 + * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
  85 + * @dividend: 64bit dividend
  86 + * @divisor: 64bit divisor
  87 + * @remainder: 64bit remainder
  88 + *
  89 + * This implementation is a comparable to algorithm used by div64_u64.
  90 + * But this operation, which includes math for calculating the remainder,
  91 + * is kept distinct to avoid slowing down the div64_u64 operation on 32bit
  92 + * systems.
  93 + */
  94 +#ifndef div64_u64_rem
  95 +u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
  96 +{
  97 + u32 high = divisor >> 32;
  98 + u64 quot;
  99 +
  100 + if (high == 0) {
  101 + u32 rem32;
  102 + quot = div_u64_rem(dividend, divisor, &rem32);
  103 + *remainder = rem32;
  104 + } else {
  105 + int n = 1 + fls(high);
  106 + quot = div_u64(dividend >> n, divisor >> n);
  107 +
  108 + if (quot != 0)
  109 + quot--;
  110 +
  111 + *remainder = dividend - quot * divisor;
  112 + if (*remainder >= divisor) {
  113 + quot++;
  114 + *remainder -= divisor;
  115 + }
  116 + }
  117 +
  118 + return quot;
  119 +}
  120 +EXPORT_SYMBOL(div64_u64_rem);
  121 +#endif
  122 +
  123 +/**
  124 + * div64_u64 - unsigned 64bit divide with 64bit divisor
  125 + * @dividend: 64bit dividend
  126 + * @divisor: 64bit divisor
  127 + *
  128 + * This implementation is a modified version of the algorithm proposed
  129 + * by the book 'Hacker's Delight'. The original source and full proof
  130 + * can be found here and is available for use without restriction.
  131 + *
  132 + * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt'
  133 + */
  134 +#ifndef div64_u64
  135 +u64 div64_u64(u64 dividend, u64 divisor)
  136 +{
  137 + u32 high = divisor >> 32;
  138 + u64 quot;
  139 +
  140 + if (high == 0) {
  141 + quot = div_u64(dividend, divisor);
  142 + } else {
  143 + int n = 1 + fls(high);
  144 + quot = div_u64(dividend >> n, divisor >> n);
  145 +
  146 + if (quot != 0)
  147 + quot--;
  148 + if ((dividend - quot * divisor) >= divisor)
  149 + quot++;
  150 + }
  151 +
  152 + return quot;
  153 +}
  154 +EXPORT_SYMBOL(div64_u64);
  155 +#endif
  156 +
  157 +/**
  158 + * div64_s64 - signed 64bit divide with 64bit divisor
  159 + * @dividend: 64bit dividend
  160 + * @divisor: 64bit divisor
  161 + */
  162 +#ifndef div64_s64
  163 +s64 div64_s64(s64 dividend, s64 divisor)
  164 +{
  165 + s64 quot, t;
  166 +
  167 + quot = div64_u64(abs(dividend), abs(divisor));
  168 + t = (dividend ^ divisor) >> 63;
  169 +
  170 + return (quot ^ t) - t;
  171 +}
  172 +EXPORT_SYMBOL(div64_s64);
  173 +#endif
  174 +
  175 +#endif /* BITS_PER_LONG == 32 */
  176 +
  177 +/*
  178 + * Iterative div/mod for use when dividend is not expected to be much
  179 + * bigger than divisor.
  180 + */
  181 +u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
  182 +{
  183 + return __iter_div_u64_rem(dividend, divisor, remainder);
  184 +}
  185 +EXPORT_SYMBOL(iter_div_u64_rem);