Commit 9eb2d627190a8afe4b9276b24615a9559504fa60
Committed by
David S. Miller
1 parent
89b3d9aaf4
Exists in
master
and in
7 other branches
[TCP] cubic: use Newton-Raphson
Replace cube root algorithim with a faster version using Newton-Raphson. Surprisingly, doing the scaled div64_64 is faster than a true 64 bit division on 64 bit CPU's. Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 1 changed file with 39 additions and 54 deletions Side-by-side Diff
net/ipv4/tcp_cubic.c
... | ... | @@ -52,6 +52,7 @@ |
52 | 52 | module_param(tcp_friendliness, int, 0644); |
53 | 53 | MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); |
54 | 54 | |
55 | +#include <asm/div64.h> | |
55 | 56 | |
56 | 57 | /* BIC TCP Parameters */ |
57 | 58 | struct bictcp { |
58 | 59 | |
59 | 60 | |
60 | 61 | |
61 | 62 | |
62 | 63 | |
63 | 64 | |
64 | 65 | |
... | ... | @@ -93,67 +94,51 @@ |
93 | 94 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; |
94 | 95 | } |
95 | 96 | |
96 | -/* 65536 times the cubic root */ | |
97 | -static const u64 cubic_table[8] | |
98 | - = {0, 65536, 82570, 94519, 104030, 112063, 119087, 125367}; | |
99 | - | |
100 | -/* | |
101 | - * calculate the cubic root of x | |
102 | - * the basic idea is that x can be expressed as i*8^j | |
103 | - * so cubic_root(x) = cubic_root(i)*2^j | |
104 | - * in the following code, x is i, and y is 2^j | |
105 | - * because of integer calculation, there are errors in calculation | |
106 | - * so finally use binary search to find out the exact solution | |
107 | - */ | |
108 | -static u32 cubic_root(u64 x) | |
97 | +/* 64bit divisor, dividend and result. dynamic precision */ | |
98 | +static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor) | |
109 | 99 | { |
110 | - u64 y, app, target, start, end, mid, start_diff, end_diff; | |
100 | + u_int32_t d = divisor; | |
111 | 101 | |
112 | - if (x == 0) | |
113 | - return 0; | |
102 | + if (divisor > 0xffffffffULL) { | |
103 | + unsigned int shift = fls(divisor >> 32); | |
114 | 104 | |
115 | - target = x; | |
105 | + d = divisor >> shift; | |
106 | + dividend >>= shift; | |
107 | + } | |
116 | 108 | |
117 | - /* first estimate lower and upper bound */ | |
118 | - y = 1; | |
119 | - while (x >= 8){ | |
120 | - x = (x >> 3); | |
121 | - y = (y << 1); | |
122 | - } | |
123 | - start = (y*cubic_table[x])>>16; | |
124 | - if (x==7) | |
125 | - end = (y<<1); | |
126 | - else | |
127 | - end = (y*cubic_table[x+1]+65535)>>16; | |
109 | + /* avoid 64 bit division if possible */ | |
110 | + if (dividend >> 32) | |
111 | + do_div(dividend, d); | |
112 | + else | |
113 | + dividend = (uint32_t) dividend / d; | |
128 | 114 | |
129 | - /* binary search for more accurate one */ | |
130 | - while (start < end-1) { | |
131 | - mid = (start+end) >> 1; | |
132 | - app = mid*mid*mid; | |
133 | - if (app < target) | |
134 | - start = mid; | |
135 | - else if (app > target) | |
136 | - end = mid; | |
137 | - else | |
138 | - return mid; | |
139 | - } | |
115 | + return dividend; | |
116 | +} | |
140 | 117 | |
141 | - /* find the most accurate one from start and end */ | |
142 | - app = start*start*start; | |
143 | - if (app < target) | |
144 | - start_diff = target - app; | |
145 | - else | |
146 | - start_diff = app - target; | |
147 | - app = end*end*end; | |
148 | - if (app < target) | |
149 | - end_diff = target - app; | |
150 | - else | |
151 | - end_diff = app - target; | |
118 | +/* | |
119 | + * calculate the cubic root of x using Newton-Raphson | |
120 | + */ | |
121 | +static u32 cubic_root(u64 a) | |
122 | +{ | |
123 | + u32 x, x1; | |
152 | 124 | |
153 | - if (start_diff < end_diff) | |
154 | - return (u32)start; | |
155 | - else | |
156 | - return (u32)end; | |
125 | + /* Initial estimate is based on: | |
126 | + * cbrt(x) = exp(log(x) / 3) | |
127 | + */ | |
128 | + x = 1u << (fls64(a)/3); | |
129 | + | |
130 | + /* | |
131 | + * Iteration based on: | |
132 | + * 2 | |
133 | + * x = ( 2 * x + a / x ) / 3 | |
134 | + * k+1 k k | |
135 | + */ | |
136 | + do { | |
137 | + x1 = x; | |
138 | + x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3; | |
139 | + } while (abs(x1 - x) > 1); | |
140 | + | |
141 | + return x; | |
157 | 142 | } |
158 | 143 | |
159 | 144 | /* |