Commit 9eb2d627190a8afe4b9276b24615a9559504fa60

Authored by Stephen Hemminger
Committed by David S. Miller
1 parent 89b3d9aaf4

[TCP] cubic: use Newton-Raphson

Replace cube root algorithim with a faster version using Newton-Raphson.
Surprisingly, doing the scaled div64_64 is faster than a true 64 bit
division on 64 bit CPU's.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 1 changed file with 39 additions and 54 deletions Side-by-side Diff

net/ipv4/tcp_cubic.c
... ... @@ -52,6 +52,7 @@
52 52 module_param(tcp_friendliness, int, 0644);
53 53 MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
54 54  
  55 +#include <asm/div64.h>
55 56  
56 57 /* BIC TCP Parameters */
57 58 struct bictcp {
58 59  
59 60  
60 61  
61 62  
62 63  
63 64  
64 65  
... ... @@ -93,67 +94,51 @@
93 94 tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
94 95 }
95 96  
96   -/* 65536 times the cubic root */
97   -static const u64 cubic_table[8]
98   - = {0, 65536, 82570, 94519, 104030, 112063, 119087, 125367};
99   -
100   -/*
101   - * calculate the cubic root of x
102   - * the basic idea is that x can be expressed as i*8^j
103   - * so cubic_root(x) = cubic_root(i)*2^j
104   - * in the following code, x is i, and y is 2^j
105   - * because of integer calculation, there are errors in calculation
106   - * so finally use binary search to find out the exact solution
107   - */
108   -static u32 cubic_root(u64 x)
  97 +/* 64bit divisor, dividend and result. dynamic precision */
  98 +static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
109 99 {
110   - u64 y, app, target, start, end, mid, start_diff, end_diff;
  100 + u_int32_t d = divisor;
111 101  
112   - if (x == 0)
113   - return 0;
  102 + if (divisor > 0xffffffffULL) {
  103 + unsigned int shift = fls(divisor >> 32);
114 104  
115   - target = x;
  105 + d = divisor >> shift;
  106 + dividend >>= shift;
  107 + }
116 108  
117   - /* first estimate lower and upper bound */
118   - y = 1;
119   - while (x >= 8){
120   - x = (x >> 3);
121   - y = (y << 1);
122   - }
123   - start = (y*cubic_table[x])>>16;
124   - if (x==7)
125   - end = (y<<1);
126   - else
127   - end = (y*cubic_table[x+1]+65535)>>16;
  109 + /* avoid 64 bit division if possible */
  110 + if (dividend >> 32)
  111 + do_div(dividend, d);
  112 + else
  113 + dividend = (uint32_t) dividend / d;
128 114  
129   - /* binary search for more accurate one */
130   - while (start < end-1) {
131   - mid = (start+end) >> 1;
132   - app = mid*mid*mid;
133   - if (app < target)
134   - start = mid;
135   - else if (app > target)
136   - end = mid;
137   - else
138   - return mid;
139   - }
  115 + return dividend;
  116 +}
140 117  
141   - /* find the most accurate one from start and end */
142   - app = start*start*start;
143   - if (app < target)
144   - start_diff = target - app;
145   - else
146   - start_diff = app - target;
147   - app = end*end*end;
148   - if (app < target)
149   - end_diff = target - app;
150   - else
151   - end_diff = app - target;
  118 +/*
  119 + * calculate the cubic root of x using Newton-Raphson
  120 + */
  121 +static u32 cubic_root(u64 a)
  122 +{
  123 + u32 x, x1;
152 124  
153   - if (start_diff < end_diff)
154   - return (u32)start;
155   - else
156   - return (u32)end;
  125 + /* Initial estimate is based on:
  126 + * cbrt(x) = exp(log(x) / 3)
  127 + */
  128 + x = 1u << (fls64(a)/3);
  129 +
  130 + /*
  131 + * Iteration based on:
  132 + * 2
  133 + * x = ( 2 * x + a / x ) / 3
  134 + * k+1 k k
  135 + */
  136 + do {
  137 + x1 = x;
  138 + x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
  139 + } while (abs(x1 - x) > 1);
  140 +
  141 + return x;
157 142 }
158 143  
159 144 /*