Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

This updates the sha512 fix so that it doesn't cause excessive stack usage on i386. This is done by reverting to the original code, and avoiding the W duplication by moving its initialisation into the loop. As the underlying code is in fact the one that we have used for years, I'm pushing this now instead of postponing to the next cycle. * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: crypto: sha512 - Avoid stack bloat on i386 crypto: sha512 - Use binary and instead of modulus

Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
This updates the sha512 fix so that it doesn't cause excessive stack usage on i386. This is done by reverting to the original code, and avoiding the W duplication by moving its initialisation into the loop. As the underlying code is in fact the one that we have used for years, I'm pushing this now instead of postponing to the next cycle. * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: crypto: sha512 - Avoid stack bloat on i386 crypto: sha512 - Use binary and instead of modulus
Linus Torvalds
2 parents e3f89f4ae4 3a92d687c8
Showing 1 changed file Side-by-side Diff
crypto/sha512_generic.c
@@ -78,7 +78,7 @@
  
 static inline void BLEND_OP(int I, u64 *W)
 {
-	W[I % 16] += s1(W[(I-2) % 16]) + W[(I-7) % 16] + s0(W[(I-15) % 16]);
+	W[I & 15] += s1(W[(I-2) & 15]) + W[(I-7) & 15] + s0(W[(I-15) & 15]);
 }
  
 static void
  
  
  
@@ -89,46 +89,42 @@
 	int i;
 	u64 W[16];
  
-	/* load the input */
-        for (i = 0; i < 16; i++)
-                LOAD_OP(i, W, input);
-
 	/* load the state into our registers */
 	a=state[0];   b=state[1];   c=state[2];   d=state[3];
 	e=state[4];   f=state[5];   g=state[6];   h=state[7];
  
-#define SHA512_0_15(i, a, b, c, d, e, f, g, h)			\
-	t1 = h + e1(e) + Ch(e, f, g) + sha512_K[i] + W[i];	\
-	t2 = e0(a) + Maj(a, b, c);				\
-	d += t1;						\
-	h = t1 + t2
+	/* now iterate */
+	for (i=0; i<80; i+=8) {
+		if (!(i & 8)) {
+			int j;
  
-#define SHA512_16_79(i, a, b, c, d, e, f, g, h)			\
-	BLEND_OP(i, W);						\
-	t1 = h + e1(e) + Ch(e, f, g) + sha512_K[i] + W[(i)%16];	\
-	t2 = e0(a) + Maj(a, b, c);				\
-	d += t1;						\
-	h = t1 + t2
+			if (i < 16) {
+				/* load the input */
+				for (j = 0; j < 16; j++)
+					LOAD_OP(i + j, W, input);
+			} else {
+				for (j = 0; j < 16; j++) {
+					BLEND_OP(i + j, W);
+				}
+			}
+		}
  
-	for (i = 0; i < 16; i += 8) {
-		SHA512_0_15(i, a, b, c, d, e, f, g, h);
-		SHA512_0_15(i + 1, h, a, b, c, d, e, f, g);
-		SHA512_0_15(i + 2, g, h, a, b, c, d, e, f);
-		SHA512_0_15(i + 3, f, g, h, a, b, c, d, e);
-		SHA512_0_15(i + 4, e, f, g, h, a, b, c, d);
-		SHA512_0_15(i + 5, d, e, f, g, h, a, b, c);
-		SHA512_0_15(i + 6, c, d, e, f, g, h, a, b);
-		SHA512_0_15(i + 7, b, c, d, e, f, g, h, a);
-	}
-	for (i = 16; i < 80; i += 8) {
-		SHA512_16_79(i, a, b, c, d, e, f, g, h);
-		SHA512_16_79(i + 1, h, a, b, c, d, e, f, g);
-		SHA512_16_79(i + 2, g, h, a, b, c, d, e, f);
-		SHA512_16_79(i + 3, f, g, h, a, b, c, d, e);
-		SHA512_16_79(i + 4, e, f, g, h, a, b, c, d);
-		SHA512_16_79(i + 5, d, e, f, g, h, a, b, c);
-		SHA512_16_79(i + 6, c, d, e, f, g, h, a, b);
-		SHA512_16_79(i + 7, b, c, d, e, f, g, h, a);
+		t1 = h + e1(e) + Ch(e,f,g) + sha512_K[i  ] + W[(i & 15)];
+		t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
+		t1 = g + e1(d) + Ch(d,e,f) + sha512_K[i+1] + W[(i & 15) + 1];
+		t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
+		t1 = f + e1(c) + Ch(c,d,e) + sha512_K[i+2] + W[(i & 15) + 2];
+		t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
+		t1 = e + e1(b) + Ch(b,c,d) + sha512_K[i+3] + W[(i & 15) + 3];
+		t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
+		t1 = d + e1(a) + Ch(a,b,c) + sha512_K[i+4] + W[(i & 15) + 4];
+		t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
+		t1 = c + e1(h) + Ch(h,a,b) + sha512_K[i+5] + W[(i & 15) + 5];
+		t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
+		t1 = b + e1(g) + Ch(g,h,a) + sha512_K[i+6] + W[(i & 15) + 6];
+		t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
+		t1 = a + e1(f) + Ch(f,g,h) + sha512_K[i+7] + W[(i & 15) + 7];
+		t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
 	}
  
 	state[0] += a; state[1] += b; state[2] += c; state[3] += d;
...	...	@@ -78,7 +78,7 @@
78	78
79	79	static inline void BLEND_OP(int I, u64 *W)
80	80	{
81		- W[I % 16] += s1(W[(I-2) % 16]) + W[(I-7) % 16] + s0(W[(I-15) % 16]);
	81	+ W[I & 15] += s1(W[(I-2) & 15]) + W[(I-7) & 15] + s0(W[(I-15) & 15]);
82	82	}
83	83
84	84	static void
85	85
86	86
87	87
...	...	@@ -89,46 +89,42 @@
89	89	int i;
90	90	u64 W[16];
91	91
92		- /* load the input */
93		- for (i = 0; i < 16; i++)
94		- LOAD_OP(i, W, input);
95		-
96	92	/* load the state into our registers */
97	93	a=state[0]; b=state[1]; c=state[2]; d=state[3];
98	94	e=state[4]; f=state[5]; g=state[6]; h=state[7];
99	95
100		-#define SHA512_0_15(i, a, b, c, d, e, f, g, h) \
101		- t1 = h + e1(e) + Ch(e, f, g) + sha512_K[i] + W[i]; \
102		- t2 = e0(a) + Maj(a, b, c); \
103		- d += t1; \
104		- h = t1 + t2
	96	+ /* now iterate */
	97	+ for (i=0; i<80; i+=8) {
	98	+ if (!(i & 8)) {
	99	+ int j;
105	100
106		-#define SHA512_16_79(i, a, b, c, d, e, f, g, h) \
107		- BLEND_OP(i, W); \
108		- t1 = h + e1(e) + Ch(e, f, g) + sha512_K[i] + W[(i)%16]; \
109		- t2 = e0(a) + Maj(a, b, c); \
110		- d += t1; \
111		- h = t1 + t2
	101	+ if (i < 16) {
	102	+ /* load the input */
	103	+ for (j = 0; j < 16; j++)
	104	+ LOAD_OP(i + j, W, input);
	105	+ } else {
	106	+ for (j = 0; j < 16; j++) {
	107	+ BLEND_OP(i + j, W);
	108	+ }
	109	+ }
	110	+ }
112	111
113		- for (i = 0; i < 16; i += 8) {
114		- SHA512_0_15(i, a, b, c, d, e, f, g, h);
115		- SHA512_0_15(i + 1, h, a, b, c, d, e, f, g);
116		- SHA512_0_15(i + 2, g, h, a, b, c, d, e, f);
117		- SHA512_0_15(i + 3, f, g, h, a, b, c, d, e);
118		- SHA512_0_15(i + 4, e, f, g, h, a, b, c, d);
119		- SHA512_0_15(i + 5, d, e, f, g, h, a, b, c);
120		- SHA512_0_15(i + 6, c, d, e, f, g, h, a, b);
121		- SHA512_0_15(i + 7, b, c, d, e, f, g, h, a);
122		- }
123		- for (i = 16; i < 80; i += 8) {
124		- SHA512_16_79(i, a, b, c, d, e, f, g, h);
125		- SHA512_16_79(i + 1, h, a, b, c, d, e, f, g);
126		- SHA512_16_79(i + 2, g, h, a, b, c, d, e, f);
127		- SHA512_16_79(i + 3, f, g, h, a, b, c, d, e);
128		- SHA512_16_79(i + 4, e, f, g, h, a, b, c, d);
129		- SHA512_16_79(i + 5, d, e, f, g, h, a, b, c);
130		- SHA512_16_79(i + 6, c, d, e, f, g, h, a, b);
131		- SHA512_16_79(i + 7, b, c, d, e, f, g, h, a);
	112	+ t1 = h + e1(e) + Ch(e,f,g) + sha512_K[i ] + W[(i & 15)];
	113	+ t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
	114	+ t1 = g + e1(d) + Ch(d,e,f) + sha512_K[i+1] + W[(i & 15) + 1];
	115	+ t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
	116	+ t1 = f + e1(c) + Ch(c,d,e) + sha512_K[i+2] + W[(i & 15) + 2];
	117	+ t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
	118	+ t1 = e + e1(b) + Ch(b,c,d) + sha512_K[i+3] + W[(i & 15) + 3];
	119	+ t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
	120	+ t1 = d + e1(a) + Ch(a,b,c) + sha512_K[i+4] + W[(i & 15) + 4];
	121	+ t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
	122	+ t1 = c + e1(h) + Ch(h,a,b) + sha512_K[i+5] + W[(i & 15) + 5];
	123	+ t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
	124	+ t1 = b + e1(g) + Ch(g,h,a) + sha512_K[i+6] + W[(i & 15) + 6];
	125	+ t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
	126	+ t1 = a + e1(f) + Ch(f,g,h) + sha512_K[i+7] + W[(i & 15) + 7];
	127	+ t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
132	128	}
133	129
134	130	state[0] += a; state[1] += b; state[2] += c; state[3] += d;