Commit 087e791172c5df20d02c27de99ecba0efc7dc7ac
Committed by
Greg Kroah-Hartman
1 parent
553aeac6c8
crypto: aesni - fix "by8" variant for 128 bit keys
commit 0b1e95b2fa0934c3a08db483979c70d3b287f50e upstream. The "by8" counter mode optimization is broken for 128 bit keys with input data longer than 128 bytes. It uses the wrong key material for en- and decryption. The key registers xkey0, xkey4, xkey8 and xkey12 need to be preserved in case we're handling more than 128 bytes of input data -- they won't get reloaded after the initial load. They must therefore be (a) loaded on the first iteration and (b) be preserved for the latter ones. The implementation for 128 bit keys does not comply with (a) nor (b). Fix this by bringing the implementation back to its original source and correctly load the key registers and preserve their values by *not* re-using the registers for other purposes. Kudos to James for reporting the issue and providing a test case showing the discrepancies. Reported-by: James Yonan <james@openvpn.net> Cc: Chandramouli Narayanan <mouli@linux.intel.com> Signed-off-by: Mathias Krause <minipli@googlemail.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Showing 1 changed file with 35 additions and 11 deletions Side-by-side Diff
arch/x86/crypto/aes_ctrby8_avx-x86_64.S
... | ... | @@ -208,7 +208,7 @@ |
208 | 208 | |
209 | 209 | .if (klen == KEY_128) |
210 | 210 | .if (load_keys) |
211 | - vmovdqa 3*16(p_keys), xkeyA | |
211 | + vmovdqa 3*16(p_keys), xkey4 | |
212 | 212 | .endif |
213 | 213 | .else |
214 | 214 | vmovdqa 3*16(p_keys), xkeyA |
... | ... | @@ -224,7 +224,7 @@ |
224 | 224 | add $(16*by), p_in |
225 | 225 | |
226 | 226 | .if (klen == KEY_128) |
227 | - vmovdqa 4*16(p_keys), xkey4 | |
227 | + vmovdqa 4*16(p_keys), xkeyB | |
228 | 228 | .else |
229 | 229 | .if (load_keys) |
230 | 230 | vmovdqa 4*16(p_keys), xkey4 |
... | ... | @@ -234,7 +234,12 @@ |
234 | 234 | .set i, 0 |
235 | 235 | .rept by |
236 | 236 | club XDATA, i |
237 | - vaesenc xkeyA, var_xdata, var_xdata /* key 3 */ | |
237 | + /* key 3 */ | |
238 | + .if (klen == KEY_128) | |
239 | + vaesenc xkey4, var_xdata, var_xdata | |
240 | + .else | |
241 | + vaesenc xkeyA, var_xdata, var_xdata | |
242 | + .endif | |
238 | 243 | .set i, (i +1) |
239 | 244 | .endr |
240 | 245 | |
241 | 246 | |
... | ... | @@ -243,13 +248,18 @@ |
243 | 248 | .set i, 0 |
244 | 249 | .rept by |
245 | 250 | club XDATA, i |
246 | - vaesenc xkey4, var_xdata, var_xdata /* key 4 */ | |
251 | + /* key 4 */ | |
252 | + .if (klen == KEY_128) | |
253 | + vaesenc xkeyB, var_xdata, var_xdata | |
254 | + .else | |
255 | + vaesenc xkey4, var_xdata, var_xdata | |
256 | + .endif | |
247 | 257 | .set i, (i +1) |
248 | 258 | .endr |
249 | 259 | |
250 | 260 | .if (klen == KEY_128) |
251 | 261 | .if (load_keys) |
252 | - vmovdqa 6*16(p_keys), xkeyB | |
262 | + vmovdqa 6*16(p_keys), xkey8 | |
253 | 263 | .endif |
254 | 264 | .else |
255 | 265 | vmovdqa 6*16(p_keys), xkeyB |
256 | 266 | |
... | ... | @@ -267,12 +277,17 @@ |
267 | 277 | .set i, 0 |
268 | 278 | .rept by |
269 | 279 | club XDATA, i |
270 | - vaesenc xkeyB, var_xdata, var_xdata /* key 6 */ | |
280 | + /* key 6 */ | |
281 | + .if (klen == KEY_128) | |
282 | + vaesenc xkey8, var_xdata, var_xdata | |
283 | + .else | |
284 | + vaesenc xkeyB, var_xdata, var_xdata | |
285 | + .endif | |
271 | 286 | .set i, (i +1) |
272 | 287 | .endr |
273 | 288 | |
274 | 289 | .if (klen == KEY_128) |
275 | - vmovdqa 8*16(p_keys), xkey8 | |
290 | + vmovdqa 8*16(p_keys), xkeyB | |
276 | 291 | .else |
277 | 292 | .if (load_keys) |
278 | 293 | vmovdqa 8*16(p_keys), xkey8 |
... | ... | @@ -288,7 +303,7 @@ |
288 | 303 | |
289 | 304 | .if (klen == KEY_128) |
290 | 305 | .if (load_keys) |
291 | - vmovdqa 9*16(p_keys), xkeyA | |
306 | + vmovdqa 9*16(p_keys), xkey12 | |
292 | 307 | .endif |
293 | 308 | .else |
294 | 309 | vmovdqa 9*16(p_keys), xkeyA |
... | ... | @@ -297,7 +312,12 @@ |
297 | 312 | .set i, 0 |
298 | 313 | .rept by |
299 | 314 | club XDATA, i |
300 | - vaesenc xkey8, var_xdata, var_xdata /* key 8 */ | |
315 | + /* key 8 */ | |
316 | + .if (klen == KEY_128) | |
317 | + vaesenc xkeyB, var_xdata, var_xdata | |
318 | + .else | |
319 | + vaesenc xkey8, var_xdata, var_xdata | |
320 | + .endif | |
301 | 321 | .set i, (i +1) |
302 | 322 | .endr |
303 | 323 | |
... | ... | @@ -306,7 +326,12 @@ |
306 | 326 | .set i, 0 |
307 | 327 | .rept by |
308 | 328 | club XDATA, i |
309 | - vaesenc xkeyA, var_xdata, var_xdata /* key 9 */ | |
329 | + /* key 9 */ | |
330 | + .if (klen == KEY_128) | |
331 | + vaesenc xkey12, var_xdata, var_xdata | |
332 | + .else | |
333 | + vaesenc xkeyA, var_xdata, var_xdata | |
334 | + .endif | |
310 | 335 | .set i, (i +1) |
311 | 336 | .endr |
312 | 337 | |
... | ... | @@ -412,7 +437,6 @@ |
412 | 437 | /* main body of aes ctr load */ |
413 | 438 | |
414 | 439 | .macro do_aes_ctrmain key_len |
415 | - | |
416 | 440 | cmp $16, num_bytes |
417 | 441 | jb .Ldo_return2\key_len |
418 | 442 |