Commit ca81a62198e39ad9155f12725c269fcc2a9f1f8b

Authored by Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

This updates the sha512 fix so that it doesn't cause excessive stack
usage on i386.  This is done by reverting to the original code, and
avoiding the W duplication by moving its initialisation into the loop.

As the underlying code is in fact the one that we have used for years,
I'm pushing this now instead of postponing to the next cycle.

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
  crypto: sha512 - Avoid stack bloat on i386
  crypto: sha512 - Use binary and instead of modulus

Showing 1 changed file Side-by-side Diff

crypto/sha512_generic.c
... ... @@ -78,7 +78,7 @@
78 78  
79 79 static inline void BLEND_OP(int I, u64 *W)
80 80 {
81   - W[I % 16] += s1(W[(I-2) % 16]) + W[(I-7) % 16] + s0(W[(I-15) % 16]);
  81 + W[I & 15] += s1(W[(I-2) & 15]) + W[(I-7) & 15] + s0(W[(I-15) & 15]);
82 82 }
83 83  
84 84 static void
85 85  
86 86  
87 87  
... ... @@ -89,46 +89,42 @@
89 89 int i;
90 90 u64 W[16];
91 91  
92   - /* load the input */
93   - for (i = 0; i < 16; i++)
94   - LOAD_OP(i, W, input);
95   -
96 92 /* load the state into our registers */
97 93 a=state[0]; b=state[1]; c=state[2]; d=state[3];
98 94 e=state[4]; f=state[5]; g=state[6]; h=state[7];
99 95  
100   -#define SHA512_0_15(i, a, b, c, d, e, f, g, h) \
101   - t1 = h + e1(e) + Ch(e, f, g) + sha512_K[i] + W[i]; \
102   - t2 = e0(a) + Maj(a, b, c); \
103   - d += t1; \
104   - h = t1 + t2
  96 + /* now iterate */
  97 + for (i=0; i<80; i+=8) {
  98 + if (!(i & 8)) {
  99 + int j;
105 100  
106   -#define SHA512_16_79(i, a, b, c, d, e, f, g, h) \
107   - BLEND_OP(i, W); \
108   - t1 = h + e1(e) + Ch(e, f, g) + sha512_K[i] + W[(i)%16]; \
109   - t2 = e0(a) + Maj(a, b, c); \
110   - d += t1; \
111   - h = t1 + t2
  101 + if (i < 16) {
  102 + /* load the input */
  103 + for (j = 0; j < 16; j++)
  104 + LOAD_OP(i + j, W, input);
  105 + } else {
  106 + for (j = 0; j < 16; j++) {
  107 + BLEND_OP(i + j, W);
  108 + }
  109 + }
  110 + }
112 111  
113   - for (i = 0; i < 16; i += 8) {
114   - SHA512_0_15(i, a, b, c, d, e, f, g, h);
115   - SHA512_0_15(i + 1, h, a, b, c, d, e, f, g);
116   - SHA512_0_15(i + 2, g, h, a, b, c, d, e, f);
117   - SHA512_0_15(i + 3, f, g, h, a, b, c, d, e);
118   - SHA512_0_15(i + 4, e, f, g, h, a, b, c, d);
119   - SHA512_0_15(i + 5, d, e, f, g, h, a, b, c);
120   - SHA512_0_15(i + 6, c, d, e, f, g, h, a, b);
121   - SHA512_0_15(i + 7, b, c, d, e, f, g, h, a);
122   - }
123   - for (i = 16; i < 80; i += 8) {
124   - SHA512_16_79(i, a, b, c, d, e, f, g, h);
125   - SHA512_16_79(i + 1, h, a, b, c, d, e, f, g);
126   - SHA512_16_79(i + 2, g, h, a, b, c, d, e, f);
127   - SHA512_16_79(i + 3, f, g, h, a, b, c, d, e);
128   - SHA512_16_79(i + 4, e, f, g, h, a, b, c, d);
129   - SHA512_16_79(i + 5, d, e, f, g, h, a, b, c);
130   - SHA512_16_79(i + 6, c, d, e, f, g, h, a, b);
131   - SHA512_16_79(i + 7, b, c, d, e, f, g, h, a);
  112 + t1 = h + e1(e) + Ch(e,f,g) + sha512_K[i ] + W[(i & 15)];
  113 + t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
  114 + t1 = g + e1(d) + Ch(d,e,f) + sha512_K[i+1] + W[(i & 15) + 1];
  115 + t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
  116 + t1 = f + e1(c) + Ch(c,d,e) + sha512_K[i+2] + W[(i & 15) + 2];
  117 + t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
  118 + t1 = e + e1(b) + Ch(b,c,d) + sha512_K[i+3] + W[(i & 15) + 3];
  119 + t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
  120 + t1 = d + e1(a) + Ch(a,b,c) + sha512_K[i+4] + W[(i & 15) + 4];
  121 + t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
  122 + t1 = c + e1(h) + Ch(h,a,b) + sha512_K[i+5] + W[(i & 15) + 5];
  123 + t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
  124 + t1 = b + e1(g) + Ch(g,h,a) + sha512_K[i+6] + W[(i & 15) + 6];
  125 + t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
  126 + t1 = a + e1(f) + Ch(f,g,h) + sha512_K[i+7] + W[(i & 15) + 7];
  127 + t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
132 128 }
133 129  
134 130 state[0] += a; state[1] += b; state[2] += c; state[3] += d;