[TCP] cubic: use Newton-Raphson

Replace cube root algorithim with a faster version using Newton-Raphson. Surprisingly, doing the scaled div64_64 is faster than a true 64 bit division on 64 bit CPU's. Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>

[TCP] cubic: use Newton-Raphson
Replace cube root algorithim with a faster version using Newton-Raphson. Surprisingly, doing the scaled div64_64 is faster than a true 64 bit division on 64 bit CPU's. Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Stephen Hemminger · David S. Miller
1 parent 89b3d9aaf4
Showing 1 changed file with 39 additions and 54 deletions Side-by-side Diff
net/ipv4/tcp_cubic.c
@@ -52,6 +52,7 @@
 module_param(tcp_friendliness, int, 0644);
 MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
  
+#include <asm/div64.h>
  
 /* BIC TCP Parameters */
 struct bictcp {
  
  
  
  
  
  
  
@@ -93,67 +94,51 @@
 		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
 }
  
-/* 65536 times the cubic root */
-static const u64 cubic_table[8]
-	= {0, 65536, 82570, 94519, 104030, 112063, 119087, 125367};
-
-/*
- * calculate the cubic root of x
- * the basic idea is that x can be expressed as i*8^j
- * so cubic_root(x) = cubic_root(i)*2^j
- *  in the following code, x is i, and y is 2^j
- *  because of integer calculation, there are errors in calculation
- *  so finally use binary search to find out the exact solution
- */
-static u32 cubic_root(u64 x)
+/* 64bit divisor, dividend and result. dynamic precision */
+static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
 {
-        u64 y, app, target, start, end, mid, start_diff, end_diff;
+	u_int32_t d = divisor;
  
-        if (x == 0)
-                return 0;
+	if (divisor > 0xffffffffULL) {
+		unsigned int shift = fls(divisor >> 32);
  
-        target = x;
+		d = divisor >> shift;
+		dividend >>= shift;
+	}
  
-        /* first estimate lower and upper bound */
-        y = 1;
-        while (x >= 8){
-                x = (x >> 3);
-                y = (y << 1);
-        }
-        start = (y*cubic_table[x])>>16;
-        if (x==7)
-                end = (y<<1);
-        else
-                end = (y*cubic_table[x+1]+65535)>>16;
+	/* avoid 64 bit division if possible */
+	if (dividend >> 32)
+		do_div(dividend, d);
+	else
+		dividend = (uint32_t) dividend / d;
  
-        /* binary search for more accurate one */
-        while (start < end-1) {
-                mid = (start+end) >> 1;
-                app = mid*mid*mid;
-                if (app < target)
-                        start = mid;
-                else if (app > target)
-                        end = mid;
-                else
-                        return mid;
-        }
+	return dividend;
+}
  
-        /* find the most accurate one from start and end */
-        app = start*start*start;
-        if (app < target)
-                start_diff = target - app;
-        else
-                start_diff = app - target;
-        app = end*end*end;
-        if (app < target)
-                end_diff = target - app;
-        else
-                end_diff = app - target;
+/*
+ * calculate the cubic root of x using Newton-Raphson
+ */
+static u32 cubic_root(u64 a)
+{
+	u32 x, x1;
  
-        if (start_diff < end_diff)
-                return (u32)start;
-        else
-                return (u32)end;
+	/* Initial estimate is based on:
+	 * cbrt(x) = exp(log(x) / 3)
+	 */
+	x = 1u << (fls64(a)/3);
+
+	/*
+	 * Iteration based on:
+	 *                         2
+	 * x    = ( 2 * x  +  a / x  ) / 3
+	 *  k+1          k         k
+	 */
+	do {
+		x1 = x;
+		x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
+	} while (abs(x1 - x) > 1);
+
+	return x;
 }
  
 /*
...	...	@@ -52,6 +52,7 @@
52	52	module_param(tcp_friendliness, int, 0644);
53	53	MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
54	54
	55	+#include <asm/div64.h>
55	56
56	57	/* BIC TCP Parameters */
57	58	struct bictcp {
58	59
59	60
60	61
61	62
62	63
63	64
64	65
...	...	@@ -93,67 +94,51 @@
93	94	tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
94	95	}
95	96
96		-/* 65536 times the cubic root */
97		-static const u64 cubic_table[8]
98		- = {0, 65536, 82570, 94519, 104030, 112063, 119087, 125367};
99		-
100		-/*
101		- * calculate the cubic root of x
102		- * the basic idea is that x can be expressed as i*8^j
103		- * so cubic_root(x) = cubic_root(i)*2^j
104		- * in the following code, x is i, and y is 2^j
105		- * because of integer calculation, there are errors in calculation
106		- * so finally use binary search to find out the exact solution
107		- */
108		-static u32 cubic_root(u64 x)
	97	+/* 64bit divisor, dividend and result. dynamic precision */
	98	+static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
109	99	{
110		- u64 y, app, target, start, end, mid, start_diff, end_diff;
	100	+ u_int32_t d = divisor;
111	101
112		- if (x == 0)
113		- return 0;
	102	+ if (divisor > 0xffffffffULL) {
	103	+ unsigned int shift = fls(divisor >> 32);
114	104
115		- target = x;
	105	+ d = divisor >> shift;
	106	+ dividend >>= shift;
	107	+ }
116	108
117		- /* first estimate lower and upper bound */
118		- y = 1;
119		- while (x >= 8){
120		- x = (x >> 3);
121		- y = (y << 1);
122		- }
123		- start = (y*cubic_table[x])>>16;
124		- if (x==7)
125		- end = (y<<1);
126		- else
127		- end = (y*cubic_table[x+1]+65535)>>16;
	109	+ /* avoid 64 bit division if possible */
	110	+ if (dividend >> 32)
	111	+ do_div(dividend, d);
	112	+ else
	113	+ dividend = (uint32_t) dividend / d;
128	114
129		- /* binary search for more accurate one */
130		- while (start < end-1) {
131		- mid = (start+end) >> 1;
132		- app = midmidmid;
133		- if (app < target)
134		- start = mid;
135		- else if (app > target)
136		- end = mid;
137		- else
138		- return mid;
139		- }
	115	+ return dividend;
	116	+}
140	117
141		- /* find the most accurate one from start and end */
142		- app = startstartstart;
143		- if (app < target)
144		- start_diff = target - app;
145		- else
146		- start_diff = app - target;
147		- app = endendend;
148		- if (app < target)
149		- end_diff = target - app;
150		- else
151		- end_diff = app - target;
	118	+/*
	119	+ * calculate the cubic root of x using Newton-Raphson
	120	+ */
	121	+static u32 cubic_root(u64 a)
	122	+{
	123	+ u32 x, x1;
152	124
153		- if (start_diff < end_diff)
154		- return (u32)start;
155		- else
156		- return (u32)end;
	125	+ /* Initial estimate is based on:
	126	+ * cbrt(x) = exp(log(x) / 3)
	127	+ */
	128	+ x = 1u << (fls64(a)/3);
	129	+
	130	+ /*
	131	+ * Iteration based on:
	132	+ * 2
	133	+ * x = ( 2 * x + a / x ) / 3
	134	+ * k+1 k k
	135	+ */
	136	+ do {
	137	+ x1 = x;
	138	+ x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
	139	+ } while (abs(x1 - x) > 1);
	140	+
	141	+ return x;
157	142	}
158	143
159	144	/*